net/mlx5e: Fix mapping of ct_label zero
[linux-2.6-microblaze.git] / drivers / net / ethernet / mellanox / mlx5 / core / en / tc_ct.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3
4 #include <net/netfilter/nf_conntrack.h>
5 #include <net/netfilter/nf_conntrack_core.h>
6 #include <net/netfilter/nf_conntrack_zones.h>
7 #include <net/netfilter/nf_conntrack_labels.h>
8 #include <net/netfilter/nf_conntrack_helper.h>
9 #include <net/netfilter/nf_conntrack_acct.h>
10 #include <uapi/linux/tc_act/tc_pedit.h>
11 #include <net/tc_act/tc_ct.h>
12 #include <net/flow_offload.h>
13 #include <net/netfilter/nf_flow_table.h>
14 #include <linux/workqueue.h>
15 #include <linux/refcount.h>
16 #include <linux/xarray.h>
17
18 #include "lib/fs_chains.h"
19 #include "en/tc_ct.h"
20 #include "en/mod_hdr.h"
21 #include "en/mapping.h"
22 #include "en.h"
23 #include "en_tc.h"
24 #include "en_rep.h"
25
26 #define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8)
27 #define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
28 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
29 #define MLX5_CT_STATE_TRK_BIT BIT(2)
30 #define MLX5_CT_STATE_NAT_BIT BIT(3)
31 #define MLX5_CT_STATE_REPLY_BIT BIT(4)
32
33 #define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
34 #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
35 #define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX
36
37 #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen * 8)
38 #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
39
40 #define ct_dbg(fmt, args...)\
41         netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
42
43 struct mlx5_tc_ct_priv {
44         struct mlx5_core_dev *dev;
45         const struct net_device *netdev;
46         struct mod_hdr_tbl *mod_hdr_tbl;
47         struct idr fte_ids;
48         struct xarray tuple_ids;
49         struct rhashtable zone_ht;
50         struct rhashtable ct_tuples_ht;
51         struct rhashtable ct_tuples_nat_ht;
52         struct mlx5_flow_table *ct;
53         struct mlx5_flow_table *ct_nat;
54         struct mlx5_flow_table *post_ct;
55         struct mutex control_lock; /* guards parallel adds/dels */
56         struct mapping_ctx *zone_mapping;
57         struct mapping_ctx *labels_mapping;
58         enum mlx5_flow_namespace_type ns_type;
59         struct mlx5_fs_chains *chains;
60         spinlock_t ht_lock; /* protects ft entries */
61 };
62
63 struct mlx5_ct_flow {
64         struct mlx5_flow_attr *pre_ct_attr;
65         struct mlx5_flow_attr *post_ct_attr;
66         struct mlx5_flow_handle *pre_ct_rule;
67         struct mlx5_flow_handle *post_ct_rule;
68         struct mlx5_ct_ft *ft;
69         u32 fte_id;
70         u32 chain_mapping;
71 };
72
73 struct mlx5_ct_zone_rule {
74         struct mlx5_flow_handle *rule;
75         struct mlx5e_mod_hdr_handle *mh;
76         struct mlx5_flow_attr *attr;
77         bool nat;
78 };
79
80 struct mlx5_tc_ct_pre {
81         struct mlx5_flow_table *ft;
82         struct mlx5_flow_group *flow_grp;
83         struct mlx5_flow_group *miss_grp;
84         struct mlx5_flow_handle *flow_rule;
85         struct mlx5_flow_handle *miss_rule;
86         struct mlx5_modify_hdr *modify_hdr;
87 };
88
89 struct mlx5_ct_ft {
90         struct rhash_head node;
91         u16 zone;
92         u32 zone_restore_id;
93         refcount_t refcount;
94         struct nf_flowtable *nf_ft;
95         struct mlx5_tc_ct_priv *ct_priv;
96         struct rhashtable ct_entries_ht;
97         struct mlx5_tc_ct_pre pre_ct;
98         struct mlx5_tc_ct_pre pre_ct_nat;
99 };
100
101 struct mlx5_ct_tuple {
102         u16 addr_type;
103         __be16 n_proto;
104         u8 ip_proto;
105         struct {
106                 union {
107                         __be32 src_v4;
108                         struct in6_addr src_v6;
109                 };
110                 union {
111                         __be32 dst_v4;
112                         struct in6_addr dst_v6;
113                 };
114         } ip;
115         struct {
116                 __be16 src;
117                 __be16 dst;
118         } port;
119
120         u16 zone;
121 };
122
123 struct mlx5_ct_counter {
124         struct mlx5_fc *counter;
125         refcount_t refcount;
126         bool is_shared;
127 };
128
129 enum {
130         MLX5_CT_ENTRY_FLAG_VALID,
131 };
132
133 struct mlx5_ct_entry {
134         struct rhash_head node;
135         struct rhash_head tuple_node;
136         struct rhash_head tuple_nat_node;
137         struct mlx5_ct_counter *counter;
138         unsigned long cookie;
139         unsigned long restore_cookie;
140         struct mlx5_ct_tuple tuple;
141         struct mlx5_ct_tuple tuple_nat;
142         struct mlx5_ct_zone_rule zone_rules[2];
143
144         struct mlx5_tc_ct_priv *ct_priv;
145         struct work_struct work;
146
147         refcount_t refcnt;
148         unsigned long flags;
149 };
150
151 static const struct rhashtable_params cts_ht_params = {
152         .head_offset = offsetof(struct mlx5_ct_entry, node),
153         .key_offset = offsetof(struct mlx5_ct_entry, cookie),
154         .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
155         .automatic_shrinking = true,
156         .min_size = 16 * 1024,
157 };
158
159 static const struct rhashtable_params zone_params = {
160         .head_offset = offsetof(struct mlx5_ct_ft, node),
161         .key_offset = offsetof(struct mlx5_ct_ft, zone),
162         .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
163         .automatic_shrinking = true,
164 };
165
166 static const struct rhashtable_params tuples_ht_params = {
167         .head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
168         .key_offset = offsetof(struct mlx5_ct_entry, tuple),
169         .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
170         .automatic_shrinking = true,
171         .min_size = 16 * 1024,
172 };
173
174 static const struct rhashtable_params tuples_nat_ht_params = {
175         .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
176         .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
177         .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
178         .automatic_shrinking = true,
179         .min_size = 16 * 1024,
180 };
181
182 static bool
183 mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
184 {
185         return !!(entry->tuple_nat_node.next);
186 }
187
188 static int
189 mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv,
190                        u32 *labels, u32 *id)
191 {
192         if (!memchr_inv(labels, 0, sizeof(u32) * 4)) {
193                 *id = 0;
194                 return 0;
195         }
196
197         if (mapping_add(ct_priv->labels_mapping, labels, id))
198                 return -EOPNOTSUPP;
199
200         return 0;
201 }
202
203 static void
204 mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id)
205 {
206         if (id)
207                 mapping_remove(ct_priv->labels_mapping, id);
208 }
209
210 static int
211 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
212 {
213         struct flow_match_control control;
214         struct flow_match_basic basic;
215
216         flow_rule_match_basic(rule, &basic);
217         flow_rule_match_control(rule, &control);
218
219         tuple->n_proto = basic.key->n_proto;
220         tuple->ip_proto = basic.key->ip_proto;
221         tuple->addr_type = control.key->addr_type;
222
223         if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
224                 struct flow_match_ipv4_addrs match;
225
226                 flow_rule_match_ipv4_addrs(rule, &match);
227                 tuple->ip.src_v4 = match.key->src;
228                 tuple->ip.dst_v4 = match.key->dst;
229         } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
230                 struct flow_match_ipv6_addrs match;
231
232                 flow_rule_match_ipv6_addrs(rule, &match);
233                 tuple->ip.src_v6 = match.key->src;
234                 tuple->ip.dst_v6 = match.key->dst;
235         } else {
236                 return -EOPNOTSUPP;
237         }
238
239         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
240                 struct flow_match_ports match;
241
242                 flow_rule_match_ports(rule, &match);
243                 switch (tuple->ip_proto) {
244                 case IPPROTO_TCP:
245                 case IPPROTO_UDP:
246                         tuple->port.src = match.key->src;
247                         tuple->port.dst = match.key->dst;
248                         break;
249                 default:
250                         return -EOPNOTSUPP;
251                 }
252         } else {
253                 return -EOPNOTSUPP;
254         }
255
256         return 0;
257 }
258
259 static int
260 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
261                              struct flow_rule *rule)
262 {
263         struct flow_action *flow_action = &rule->action;
264         struct flow_action_entry *act;
265         u32 offset, val, ip6_offset;
266         int i;
267
268         flow_action_for_each(i, act, flow_action) {
269                 if (act->id != FLOW_ACTION_MANGLE)
270                         continue;
271
272                 offset = act->mangle.offset;
273                 val = act->mangle.val;
274                 switch (act->mangle.htype) {
275                 case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
276                         if (offset == offsetof(struct iphdr, saddr))
277                                 tuple->ip.src_v4 = cpu_to_be32(val);
278                         else if (offset == offsetof(struct iphdr, daddr))
279                                 tuple->ip.dst_v4 = cpu_to_be32(val);
280                         else
281                                 return -EOPNOTSUPP;
282                         break;
283
284                 case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
285                         ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
286                         ip6_offset /= 4;
287                         if (ip6_offset < 4)
288                                 tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
289                         else if (ip6_offset < 8)
290                                 tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
291                         else
292                                 return -EOPNOTSUPP;
293                         break;
294
295                 case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
296                         if (offset == offsetof(struct tcphdr, source))
297                                 tuple->port.src = cpu_to_be16(val);
298                         else if (offset == offsetof(struct tcphdr, dest))
299                                 tuple->port.dst = cpu_to_be16(val);
300                         else
301                                 return -EOPNOTSUPP;
302                         break;
303
304                 case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
305                         if (offset == offsetof(struct udphdr, source))
306                                 tuple->port.src = cpu_to_be16(val);
307                         else if (offset == offsetof(struct udphdr, dest))
308                                 tuple->port.dst = cpu_to_be16(val);
309                         else
310                                 return -EOPNOTSUPP;
311                         break;
312
313                 default:
314                         return -EOPNOTSUPP;
315                 }
316         }
317
318         return 0;
319 }
320
321 static int
322 mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
323                            struct flow_rule *rule)
324 {
325         void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
326                                        outer_headers);
327         void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
328                                        outer_headers);
329         u16 addr_type = 0;
330         u8 ip_proto = 0;
331
332         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
333                 struct flow_match_basic match;
334
335                 flow_rule_match_basic(rule, &match);
336
337                 mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
338                                        headers_v);
339                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
340                          match.mask->ip_proto);
341                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
342                          match.key->ip_proto);
343
344                 ip_proto = match.key->ip_proto;
345         }
346
347         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
348                 struct flow_match_control match;
349
350                 flow_rule_match_control(rule, &match);
351                 addr_type = match.key->addr_type;
352         }
353
354         if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
355                 struct flow_match_ipv4_addrs match;
356
357                 flow_rule_match_ipv4_addrs(rule, &match);
358                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
359                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
360                        &match.mask->src, sizeof(match.mask->src));
361                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
362                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
363                        &match.key->src, sizeof(match.key->src));
364                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
365                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
366                        &match.mask->dst, sizeof(match.mask->dst));
367                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
368                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
369                        &match.key->dst, sizeof(match.key->dst));
370         }
371
372         if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
373                 struct flow_match_ipv6_addrs match;
374
375                 flow_rule_match_ipv6_addrs(rule, &match);
376                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
377                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
378                        &match.mask->src, sizeof(match.mask->src));
379                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
380                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
381                        &match.key->src, sizeof(match.key->src));
382
383                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
384                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
385                        &match.mask->dst, sizeof(match.mask->dst));
386                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
387                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
388                        &match.key->dst, sizeof(match.key->dst));
389         }
390
391         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
392                 struct flow_match_ports match;
393
394                 flow_rule_match_ports(rule, &match);
395                 switch (ip_proto) {
396                 case IPPROTO_TCP:
397                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
398                                  tcp_sport, ntohs(match.mask->src));
399                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
400                                  tcp_sport, ntohs(match.key->src));
401
402                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
403                                  tcp_dport, ntohs(match.mask->dst));
404                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
405                                  tcp_dport, ntohs(match.key->dst));
406                         break;
407
408                 case IPPROTO_UDP:
409                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
410                                  udp_sport, ntohs(match.mask->src));
411                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
412                                  udp_sport, ntohs(match.key->src));
413
414                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
415                                  udp_dport, ntohs(match.mask->dst));
416                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
417                                  udp_dport, ntohs(match.key->dst));
418                         break;
419                 default:
420                         break;
421                 }
422         }
423
424         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
425                 struct flow_match_tcp match;
426
427                 flow_rule_match_tcp(rule, &match);
428                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
429                          ntohs(match.mask->flags));
430                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
431                          ntohs(match.key->flags));
432         }
433
434         return 0;
435 }
436
437 static void
438 mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
439 {
440         if (entry->counter->is_shared &&
441             !refcount_dec_and_test(&entry->counter->refcount))
442                 return;
443
444         mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
445         kfree(entry->counter);
446 }
447
448 static void
449 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
450                           struct mlx5_ct_entry *entry,
451                           bool nat)
452 {
453         struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
454         struct mlx5_flow_attr *attr = zone_rule->attr;
455
456         ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
457
458         mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr);
459         mlx5e_mod_hdr_detach(ct_priv->dev,
460                              ct_priv->mod_hdr_tbl, zone_rule->mh);
461         mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
462         kfree(attr);
463 }
464
465 static void
466 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
467                            struct mlx5_ct_entry *entry)
468 {
469         mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
470         mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
471 }
472
473 static struct flow_action_entry *
474 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
475 {
476         struct flow_action *flow_action = &flow_rule->action;
477         struct flow_action_entry *act;
478         int i;
479
480         flow_action_for_each(i, act, flow_action) {
481                 if (act->id == FLOW_ACTION_CT_METADATA)
482                         return act;
483         }
484
485         return NULL;
486 }
487
488 static int
489 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
490                                struct mlx5e_tc_mod_hdr_acts *mod_acts,
491                                u8 ct_state,
492                                u32 mark,
493                                u32 labels_id,
494                                u8 zone_restore_id)
495 {
496         enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
497         struct mlx5_core_dev *dev = ct_priv->dev;
498         int err;
499
500         err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
501                                         CTSTATE_TO_REG, ct_state);
502         if (err)
503                 return err;
504
505         err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
506                                         MARK_TO_REG, mark);
507         if (err)
508                 return err;
509
510         err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
511                                         LABELS_TO_REG, labels_id);
512         if (err)
513                 return err;
514
515         err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
516                                         ZONE_RESTORE_TO_REG, zone_restore_id);
517         if (err)
518                 return err;
519
520         /* Make another copy of zone id in reg_b for
521          * NIC rx flows since we don't copy reg_c1 to
522          * reg_b upon miss.
523          */
524         if (ns != MLX5_FLOW_NAMESPACE_FDB) {
525                 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
526                                                 NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
527                 if (err)
528                         return err;
529         }
530         return 0;
531 }
532
533 static int
534 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
535                                    char *modact)
536 {
537         u32 offset = act->mangle.offset, field;
538
539         switch (act->mangle.htype) {
540         case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
541                 MLX5_SET(set_action_in, modact, length, 0);
542                 if (offset == offsetof(struct iphdr, saddr))
543                         field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
544                 else if (offset == offsetof(struct iphdr, daddr))
545                         field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
546                 else
547                         return -EOPNOTSUPP;
548                 break;
549
550         case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
551                 MLX5_SET(set_action_in, modact, length, 0);
552                 if (offset == offsetof(struct ipv6hdr, saddr) + 12)
553                         field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
554                 else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
555                         field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
556                 else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
557                         field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
558                 else if (offset == offsetof(struct ipv6hdr, saddr))
559                         field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
560                 else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
561                         field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
562                 else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
563                         field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
564                 else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
565                         field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
566                 else if (offset == offsetof(struct ipv6hdr, daddr))
567                         field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
568                 else
569                         return -EOPNOTSUPP;
570                 break;
571
572         case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
573                 MLX5_SET(set_action_in, modact, length, 16);
574                 if (offset == offsetof(struct tcphdr, source))
575                         field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
576                 else if (offset == offsetof(struct tcphdr, dest))
577                         field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
578                 else
579                         return -EOPNOTSUPP;
580                 break;
581
582         case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
583                 MLX5_SET(set_action_in, modact, length, 16);
584                 if (offset == offsetof(struct udphdr, source))
585                         field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
586                 else if (offset == offsetof(struct udphdr, dest))
587                         field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
588                 else
589                         return -EOPNOTSUPP;
590                 break;
591
592         default:
593                 return -EOPNOTSUPP;
594         }
595
596         MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
597         MLX5_SET(set_action_in, modact, offset, 0);
598         MLX5_SET(set_action_in, modact, field, field);
599         MLX5_SET(set_action_in, modact, data, act->mangle.val);
600
601         return 0;
602 }
603
604 static int
605 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
606                             struct flow_rule *flow_rule,
607                             struct mlx5e_tc_mod_hdr_acts *mod_acts)
608 {
609         struct flow_action *flow_action = &flow_rule->action;
610         struct mlx5_core_dev *mdev = ct_priv->dev;
611         struct flow_action_entry *act;
612         size_t action_size;
613         char *modact;
614         int err, i;
615
616         action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
617
618         flow_action_for_each(i, act, flow_action) {
619                 switch (act->id) {
620                 case FLOW_ACTION_MANGLE: {
621                         err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type,
622                                                     mod_acts);
623                         if (err)
624                                 return err;
625
626                         modact = mod_acts->actions +
627                                  mod_acts->num_actions * action_size;
628
629                         err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
630                         if (err)
631                                 return err;
632
633                         mod_acts->num_actions++;
634                 }
635                 break;
636
637                 case FLOW_ACTION_CT_METADATA:
638                         /* Handled earlier */
639                         continue;
640                 default:
641                         return -EOPNOTSUPP;
642                 }
643         }
644
645         return 0;
646 }
647
648 static int
649 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
650                                 struct mlx5_flow_attr *attr,
651                                 struct flow_rule *flow_rule,
652                                 struct mlx5e_mod_hdr_handle **mh,
653                                 u8 zone_restore_id, bool nat)
654 {
655         struct mlx5e_tc_mod_hdr_acts mod_acts = {};
656         struct flow_action_entry *meta;
657         u16 ct_state = 0;
658         int err;
659
660         meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
661         if (!meta)
662                 return -EOPNOTSUPP;
663
664         err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels,
665                                      &attr->ct_attr.ct_labels_id);
666         if (err)
667                 return -EOPNOTSUPP;
668         if (nat) {
669                 err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule,
670                                                   &mod_acts);
671                 if (err)
672                         goto err_mapping;
673
674                 ct_state |= MLX5_CT_STATE_NAT_BIT;
675         }
676
677         ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
678         ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT;
679         err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
680                                              ct_state,
681                                              meta->ct_metadata.mark,
682                                              attr->ct_attr.ct_labels_id,
683                                              zone_restore_id);
684         if (err)
685                 goto err_mapping;
686
687         *mh = mlx5e_mod_hdr_attach(ct_priv->dev,
688                                    ct_priv->mod_hdr_tbl,
689                                    ct_priv->ns_type,
690                                    &mod_acts);
691         if (IS_ERR(*mh)) {
692                 err = PTR_ERR(*mh);
693                 goto err_mapping;
694         }
695         attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
696
697         dealloc_mod_hdr_actions(&mod_acts);
698         return 0;
699
700 err_mapping:
701         dealloc_mod_hdr_actions(&mod_acts);
702         mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
703         return err;
704 }
705
706 static int
707 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
708                           struct flow_rule *flow_rule,
709                           struct mlx5_ct_entry *entry,
710                           bool nat, u8 zone_restore_id)
711 {
712         struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
713         struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
714         struct mlx5_flow_spec *spec = NULL;
715         struct mlx5_flow_attr *attr;
716         int err;
717
718         zone_rule->nat = nat;
719
720         spec = kzalloc(sizeof(*spec), GFP_KERNEL);
721         if (!spec)
722                 return -ENOMEM;
723
724         attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
725         if (!attr) {
726                 err = -ENOMEM;
727                 goto err_attr;
728         }
729
730         err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
731                                               &zone_rule->mh,
732                                               zone_restore_id, nat);
733         if (err) {
734                 ct_dbg("Failed to create ct entry mod hdr");
735                 goto err_mod_hdr;
736         }
737
738         attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
739                        MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
740                        MLX5_FLOW_CONTEXT_ACTION_COUNT;
741         attr->dest_chain = 0;
742         attr->dest_ft = ct_priv->post_ct;
743         attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
744         attr->outer_match_level = MLX5_MATCH_L4;
745         attr->counter = entry->counter->counter;
746         attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
747         if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
748                 attr->esw_attr->in_mdev = priv->mdev;
749
750         mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
751         mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
752
753         zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
754         if (IS_ERR(zone_rule->rule)) {
755                 err = PTR_ERR(zone_rule->rule);
756                 ct_dbg("Failed to add ct entry rule, nat: %d", nat);
757                 goto err_rule;
758         }
759
760         zone_rule->attr = attr;
761
762         kfree(spec);
763         ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
764
765         return 0;
766
767 err_rule:
768         mlx5e_mod_hdr_detach(ct_priv->dev,
769                              ct_priv->mod_hdr_tbl, zone_rule->mh);
770         mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
771 err_mod_hdr:
772         kfree(attr);
773 err_attr:
774         kfree(spec);
775         return err;
776 }
777
778 static bool
779 mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
780 {
781         return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
782 }
783
784 static struct mlx5_ct_entry *
785 mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
786 {
787         struct mlx5_ct_entry *entry;
788
789         entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
790                                        tuples_ht_params);
791         if (entry && mlx5_tc_ct_entry_valid(entry) &&
792             refcount_inc_not_zero(&entry->refcnt)) {
793                 return entry;
794         } else if (!entry) {
795                 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
796                                                tuple, tuples_nat_ht_params);
797                 if (entry && mlx5_tc_ct_entry_valid(entry) &&
798                     refcount_inc_not_zero(&entry->refcnt))
799                         return entry;
800         }
801
802         return entry ? ERR_PTR(-EINVAL) : NULL;
803 }
804
805 static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
806 {
807         struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
808
809         rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
810                                &entry->tuple_nat_node,
811                                tuples_nat_ht_params);
812         rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
813                                tuples_ht_params);
814 }
815
816 static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
817 {
818         struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
819
820         mlx5_tc_ct_entry_del_rules(ct_priv, entry);
821
822         spin_lock_bh(&ct_priv->ht_lock);
823         mlx5_tc_ct_entry_remove_from_tuples(entry);
824         spin_unlock_bh(&ct_priv->ht_lock);
825
826         mlx5_tc_ct_counter_put(ct_priv, entry);
827         kfree(entry);
828 }
829
830 static void
831 mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
832 {
833         if (!refcount_dec_and_test(&entry->refcnt))
834                 return;
835
836         mlx5_tc_ct_entry_del(entry);
837 }
838
839 static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
840 {
841         struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
842
843         mlx5_tc_ct_entry_del(entry);
844 }
845
846 static void
847 __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
848 {
849         struct mlx5e_priv *priv;
850
851         if (!refcount_dec_and_test(&entry->refcnt))
852                 return;
853
854         priv = netdev_priv(entry->ct_priv->netdev);
855         INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
856         queue_work(priv->wq, &entry->work);
857 }
858
859 static struct mlx5_ct_counter *
860 mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
861 {
862         struct mlx5_ct_counter *counter;
863         int ret;
864
865         counter = kzalloc(sizeof(*counter), GFP_KERNEL);
866         if (!counter)
867                 return ERR_PTR(-ENOMEM);
868
869         counter->is_shared = false;
870         counter->counter = mlx5_fc_create(ct_priv->dev, true);
871         if (IS_ERR(counter->counter)) {
872                 ct_dbg("Failed to create counter for ct entry");
873                 ret = PTR_ERR(counter->counter);
874                 kfree(counter);
875                 return ERR_PTR(ret);
876         }
877
878         return counter;
879 }
880
881 static struct mlx5_ct_counter *
882 mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
883                               struct mlx5_ct_entry *entry)
884 {
885         struct mlx5_ct_tuple rev_tuple = entry->tuple;
886         struct mlx5_ct_counter *shared_counter;
887         struct mlx5_ct_entry *rev_entry;
888         __be16 tmp_port;
889
890         /* get the reversed tuple */
891         tmp_port = rev_tuple.port.src;
892         rev_tuple.port.src = rev_tuple.port.dst;
893         rev_tuple.port.dst = tmp_port;
894
895         if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
896                 __be32 tmp_addr = rev_tuple.ip.src_v4;
897
898                 rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
899                 rev_tuple.ip.dst_v4 = tmp_addr;
900         } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
901                 struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
902
903                 rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
904                 rev_tuple.ip.dst_v6 = tmp_addr;
905         } else {
906                 return ERR_PTR(-EOPNOTSUPP);
907         }
908
909         /* Use the same counter as the reverse direction */
910         spin_lock_bh(&ct_priv->ht_lock);
911         rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
912
913         if (IS_ERR(rev_entry)) {
914                 spin_unlock_bh(&ct_priv->ht_lock);
915                 goto create_counter;
916         }
917
918         if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
919                 ct_dbg("Using shared counter entry=0x%p rev=0x%p\n", entry, rev_entry);
920                 shared_counter = rev_entry->counter;
921                 spin_unlock_bh(&ct_priv->ht_lock);
922
923                 mlx5_tc_ct_entry_put(rev_entry);
924                 return shared_counter;
925         }
926
927         spin_unlock_bh(&ct_priv->ht_lock);
928
929 create_counter:
930
931         shared_counter = mlx5_tc_ct_counter_create(ct_priv);
932         if (IS_ERR(shared_counter))
933                 return shared_counter;
934
935         shared_counter->is_shared = true;
936         refcount_set(&shared_counter->refcount, 1);
937         return shared_counter;
938 }
939
940 static int
941 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
942                            struct flow_rule *flow_rule,
943                            struct mlx5_ct_entry *entry,
944                            u8 zone_restore_id)
945 {
946         int err;
947
948         if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
949                 entry->counter = mlx5_tc_ct_counter_create(ct_priv);
950         else
951                 entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
952
953         if (IS_ERR(entry->counter)) {
954                 err = PTR_ERR(entry->counter);
955                 return err;
956         }
957
958         err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
959                                         zone_restore_id);
960         if (err)
961                 goto err_orig;
962
963         err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
964                                         zone_restore_id);
965         if (err)
966                 goto err_nat;
967
968         return 0;
969
970 err_nat:
971         mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
972 err_orig:
973         mlx5_tc_ct_counter_put(ct_priv, entry);
974         return err;
975 }
976
977 static int
978 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
979                                   struct flow_cls_offload *flow)
980 {
981         struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
982         struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
983         struct flow_action_entry *meta_action;
984         unsigned long cookie = flow->cookie;
985         struct mlx5_ct_entry *entry;
986         int err;
987
988         meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
989         if (!meta_action)
990                 return -EOPNOTSUPP;
991
992         spin_lock_bh(&ct_priv->ht_lock);
993         entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
994         if (entry && refcount_inc_not_zero(&entry->refcnt)) {
995                 spin_unlock_bh(&ct_priv->ht_lock);
996                 mlx5_tc_ct_entry_put(entry);
997                 return -EEXIST;
998         }
999         spin_unlock_bh(&ct_priv->ht_lock);
1000
1001         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1002         if (!entry)
1003                 return -ENOMEM;
1004
1005         entry->tuple.zone = ft->zone;
1006         entry->cookie = flow->cookie;
1007         entry->restore_cookie = meta_action->ct_metadata.cookie;
1008         refcount_set(&entry->refcnt, 2);
1009         entry->ct_priv = ct_priv;
1010
1011         err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
1012         if (err)
1013                 goto err_set;
1014
1015         memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
1016         err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
1017         if (err)
1018                 goto err_set;
1019
1020         spin_lock_bh(&ct_priv->ht_lock);
1021
1022         err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
1023                                             cts_ht_params);
1024         if (err)
1025                 goto err_entries;
1026
1027         err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
1028                                             &entry->tuple_node,
1029                                             tuples_ht_params);
1030         if (err)
1031                 goto err_tuple;
1032
1033         if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
1034                 err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
1035                                                     &entry->tuple_nat_node,
1036                                                     tuples_nat_ht_params);
1037                 if (err)
1038                         goto err_tuple_nat;
1039         }
1040         spin_unlock_bh(&ct_priv->ht_lock);
1041
1042         err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
1043                                          ft->zone_restore_id);
1044         if (err)
1045                 goto err_rules;
1046
1047         set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
1048         mlx5_tc_ct_entry_put(entry); /* this function reference */
1049
1050         return 0;
1051
1052 err_rules:
1053         spin_lock_bh(&ct_priv->ht_lock);
1054         if (mlx5_tc_ct_entry_has_nat(entry))
1055                 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
1056                                        &entry->tuple_nat_node, tuples_nat_ht_params);
1057 err_tuple_nat:
1058         rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
1059                                &entry->tuple_node,
1060                                tuples_ht_params);
1061 err_tuple:
1062         rhashtable_remove_fast(&ft->ct_entries_ht,
1063                                &entry->node,
1064                                cts_ht_params);
1065 err_entries:
1066         spin_unlock_bh(&ct_priv->ht_lock);
1067 err_set:
1068         kfree(entry);
1069         if (err != -EEXIST)
1070                 netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
1071         return err;
1072 }
1073
1074 static int
1075 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
1076                                   struct flow_cls_offload *flow)
1077 {
1078         struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1079         unsigned long cookie = flow->cookie;
1080         struct mlx5_ct_entry *entry;
1081
1082         spin_lock_bh(&ct_priv->ht_lock);
1083         entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1084         if (!entry) {
1085                 spin_unlock_bh(&ct_priv->ht_lock);
1086                 return -ENOENT;
1087         }
1088
1089         if (!mlx5_tc_ct_entry_valid(entry)) {
1090                 spin_unlock_bh(&ct_priv->ht_lock);
1091                 return -EINVAL;
1092         }
1093
1094         rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
1095         mlx5_tc_ct_entry_remove_from_tuples(entry);
1096         spin_unlock_bh(&ct_priv->ht_lock);
1097
1098         mlx5_tc_ct_entry_put(entry);
1099
1100         return 0;
1101 }
1102
1103 static int
1104 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
1105                                     struct flow_cls_offload *f)
1106 {
1107         struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1108         unsigned long cookie = f->cookie;
1109         struct mlx5_ct_entry *entry;
1110         u64 lastuse, packets, bytes;
1111
1112         spin_lock_bh(&ct_priv->ht_lock);
1113         entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1114         if (!entry) {
1115                 spin_unlock_bh(&ct_priv->ht_lock);
1116                 return -ENOENT;
1117         }
1118
1119         if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
1120                 spin_unlock_bh(&ct_priv->ht_lock);
1121                 return -EINVAL;
1122         }
1123
1124         spin_unlock_bh(&ct_priv->ht_lock);
1125
1126         mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
1127         flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
1128                           FLOW_ACTION_HW_STATS_DELAYED);
1129
1130         mlx5_tc_ct_entry_put(entry);
1131         return 0;
1132 }
1133
1134 static int
1135 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
1136                               void *cb_priv)
1137 {
1138         struct flow_cls_offload *f = type_data;
1139         struct mlx5_ct_ft *ft = cb_priv;
1140
1141         if (type != TC_SETUP_CLSFLOWER)
1142                 return -EOPNOTSUPP;
1143
1144         switch (f->command) {
1145         case FLOW_CLS_REPLACE:
1146                 return mlx5_tc_ct_block_flow_offload_add(ft, f);
1147         case FLOW_CLS_DESTROY:
1148                 return mlx5_tc_ct_block_flow_offload_del(ft, f);
1149         case FLOW_CLS_STATS:
1150                 return mlx5_tc_ct_block_flow_offload_stats(ft, f);
1151         default:
1152                 break;
1153         }
1154
1155         return -EOPNOTSUPP;
1156 }
1157
1158 static bool
1159 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
1160                         u16 zone)
1161 {
1162         struct flow_keys flow_keys;
1163
1164         skb_reset_network_header(skb);
1165         skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
1166
1167         tuple->zone = zone;
1168
1169         if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
1170             flow_keys.basic.ip_proto != IPPROTO_UDP)
1171                 return false;
1172
1173         tuple->port.src = flow_keys.ports.src;
1174         tuple->port.dst = flow_keys.ports.dst;
1175         tuple->n_proto = flow_keys.basic.n_proto;
1176         tuple->ip_proto = flow_keys.basic.ip_proto;
1177
1178         switch (flow_keys.basic.n_proto) {
1179         case htons(ETH_P_IP):
1180                 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1181                 tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
1182                 tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
1183                 break;
1184
1185         case htons(ETH_P_IPV6):
1186                 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1187                 tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
1188                 tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
1189                 break;
1190         default:
1191                 goto out;
1192         }
1193
1194         return true;
1195
1196 out:
1197         return false;
1198 }
1199
1200 int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
1201 {
1202         u32 ctstate = 0, ctstate_mask = 0;
1203
1204         mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
1205                                         &ctstate, &ctstate_mask);
1206
1207         if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT)
1208                 return -EOPNOTSUPP;
1209
1210         ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
1211         mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1212                                     ctstate, ctstate_mask);
1213
1214         return 0;
1215 }
1216
1217 void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
1218 {
1219         if (!priv || !ct_attr->ct_labels_id)
1220                 return;
1221
1222         mlx5_put_label_mapping(priv, ct_attr->ct_labels_id);
1223 }
1224
1225 int
1226 mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
1227                      struct mlx5_flow_spec *spec,
1228                      struct flow_cls_offload *f,
1229                      struct mlx5_ct_attr *ct_attr,
1230                      struct netlink_ext_ack *extack)
1231 {
1232         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1233         bool trk, est, untrk, unest, new, rpl, unrpl;
1234         struct flow_dissector_key_ct *mask, *key;
1235         u32 ctstate = 0, ctstate_mask = 0;
1236         u16 ct_state_on, ct_state_off;
1237         u16 ct_state, ct_state_mask;
1238         struct flow_match_ct match;
1239         u32 ct_labels[4];
1240
1241         if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
1242                 return 0;
1243
1244         if (!priv) {
1245                 NL_SET_ERR_MSG_MOD(extack,
1246                                    "offload of ct matching isn't available");
1247                 return -EOPNOTSUPP;
1248         }
1249
1250         flow_rule_match_ct(rule, &match);
1251
1252         key = match.key;
1253         mask = match.mask;
1254
1255         ct_state = key->ct_state;
1256         ct_state_mask = mask->ct_state;
1257
1258         if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
1259                               TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
1260                               TCA_FLOWER_KEY_CT_FLAGS_NEW |
1261                               TCA_FLOWER_KEY_CT_FLAGS_REPLY)) {
1262                 NL_SET_ERR_MSG_MOD(extack,
1263                                    "only ct_state trk, est, new and rpl are supported for offload");
1264                 return -EOPNOTSUPP;
1265         }
1266
1267         ct_state_on = ct_state & ct_state_mask;
1268         ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1269         trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1270         new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1271         est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1272         rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1273         untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1274         unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1275         unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1276
1277         ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1278         ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1279         ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0;
1280         ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1281         ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1282         ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0;
1283
1284         if (new) {
1285                 NL_SET_ERR_MSG_MOD(extack,
1286                                    "matching on ct_state +new isn't supported");
1287                 return -EOPNOTSUPP;
1288         }
1289
1290         if (mask->ct_zone)
1291                 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1292                                             key->ct_zone, MLX5_CT_ZONE_MASK);
1293         if (ctstate_mask)
1294                 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1295                                             ctstate, ctstate_mask);
1296         if (mask->ct_mark)
1297                 mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
1298                                             key->ct_mark, mask->ct_mark);
1299         if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1300             mask->ct_labels[3]) {
1301                 ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1302                 ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1303                 ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1304                 ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1305                 if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id))
1306                         return -EOPNOTSUPP;
1307                 mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
1308                                             MLX5_CT_LABELS_MASK);
1309         }
1310
1311         return 0;
1312 }
1313
1314 int
1315 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
1316                         struct mlx5_flow_attr *attr,
1317                         const struct flow_action_entry *act,
1318                         struct netlink_ext_ack *extack)
1319 {
1320         if (!priv) {
1321                 NL_SET_ERR_MSG_MOD(extack,
1322                                    "offload of ct action isn't available");
1323                 return -EOPNOTSUPP;
1324         }
1325
1326         attr->ct_attr.zone = act->ct.zone;
1327         attr->ct_attr.ct_action = act->ct.action;
1328         attr->ct_attr.nf_ft = act->ct.flow_table;
1329
1330         return 0;
1331 }
1332
1333 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1334                                   struct mlx5_tc_ct_pre *pre_ct,
1335                                   bool nat)
1336 {
1337         struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1338         struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1339         struct mlx5_core_dev *dev = ct_priv->dev;
1340         struct mlx5_flow_table *ft = pre_ct->ft;
1341         struct mlx5_flow_destination dest = {};
1342         struct mlx5_flow_act flow_act = {};
1343         struct mlx5_modify_hdr *mod_hdr;
1344         struct mlx5_flow_handle *rule;
1345         struct mlx5_flow_spec *spec;
1346         u32 ctstate;
1347         u16 zone;
1348         int err;
1349
1350         spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1351         if (!spec)
1352                 return -ENOMEM;
1353
1354         zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1355         err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
1356                                         ZONE_TO_REG, zone);
1357         if (err) {
1358                 ct_dbg("Failed to set zone register mapping");
1359                 goto err_mapping;
1360         }
1361
1362         mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
1363                                            pre_mod_acts.num_actions,
1364                                            pre_mod_acts.actions);
1365
1366         if (IS_ERR(mod_hdr)) {
1367                 err = PTR_ERR(mod_hdr);
1368                 ct_dbg("Failed to create pre ct mod hdr");
1369                 goto err_mapping;
1370         }
1371         pre_ct->modify_hdr = mod_hdr;
1372
1373         flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1374                           MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1375         flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1376         flow_act.modify_hdr = mod_hdr;
1377         dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1378
1379         /* add flow rule */
1380         mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1381                                     zone, MLX5_CT_ZONE_MASK);
1382         ctstate = MLX5_CT_STATE_TRK_BIT;
1383         if (nat)
1384                 ctstate |= MLX5_CT_STATE_NAT_BIT;
1385         mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
1386
1387         dest.ft = ct_priv->post_ct;
1388         rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1389         if (IS_ERR(rule)) {
1390                 err = PTR_ERR(rule);
1391                 ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1392                 goto err_flow_rule;
1393         }
1394         pre_ct->flow_rule = rule;
1395
1396         /* add miss rule */
1397         dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1398         rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1);
1399         if (IS_ERR(rule)) {
1400                 err = PTR_ERR(rule);
1401                 ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1402                 goto err_miss_rule;
1403         }
1404         pre_ct->miss_rule = rule;
1405
1406         dealloc_mod_hdr_actions(&pre_mod_acts);
1407         kvfree(spec);
1408         return 0;
1409
1410 err_miss_rule:
1411         mlx5_del_flow_rules(pre_ct->flow_rule);
1412 err_flow_rule:
1413         mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1414 err_mapping:
1415         dealloc_mod_hdr_actions(&pre_mod_acts);
1416         kvfree(spec);
1417         return err;
1418 }
1419
1420 static void
1421 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1422                        struct mlx5_tc_ct_pre *pre_ct)
1423 {
1424         struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1425         struct mlx5_core_dev *dev = ct_priv->dev;
1426
1427         mlx5_del_flow_rules(pre_ct->flow_rule);
1428         mlx5_del_flow_rules(pre_ct->miss_rule);
1429         mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1430 }
1431
1432 static int
1433 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1434                         struct mlx5_tc_ct_pre *pre_ct,
1435                         bool nat)
1436 {
1437         int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1438         struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1439         struct mlx5_core_dev *dev = ct_priv->dev;
1440         struct mlx5_flow_table_attr ft_attr = {};
1441         struct mlx5_flow_namespace *ns;
1442         struct mlx5_flow_table *ft;
1443         struct mlx5_flow_group *g;
1444         u32 metadata_reg_c_2_mask;
1445         u32 *flow_group_in;
1446         void *misc;
1447         int err;
1448
1449         ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
1450         if (!ns) {
1451                 err = -EOPNOTSUPP;
1452                 ct_dbg("Failed to get flow namespace");
1453                 return err;
1454         }
1455
1456         flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1457         if (!flow_group_in)
1458                 return -ENOMEM;
1459
1460         ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1461         ft_attr.prio =  ct_priv->ns_type ==  MLX5_FLOW_NAMESPACE_FDB ?
1462                         FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
1463         ft_attr.max_fte = 2;
1464         ft_attr.level = 1;
1465         ft = mlx5_create_flow_table(ns, &ft_attr);
1466         if (IS_ERR(ft)) {
1467                 err = PTR_ERR(ft);
1468                 ct_dbg("Failed to create pre ct table");
1469                 goto out_free;
1470         }
1471         pre_ct->ft = ft;
1472
1473         /* create flow group */
1474         MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1475         MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1476         MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1477                  MLX5_MATCH_MISC_PARAMETERS_2);
1478
1479         misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1480                             match_criteria.misc_parameters_2);
1481
1482         metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1483         metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1484         if (nat)
1485                 metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1486
1487         MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1488                  metadata_reg_c_2_mask);
1489
1490         g = mlx5_create_flow_group(ft, flow_group_in);
1491         if (IS_ERR(g)) {
1492                 err = PTR_ERR(g);
1493                 ct_dbg("Failed to create pre ct group");
1494                 goto err_flow_grp;
1495         }
1496         pre_ct->flow_grp = g;
1497
1498         /* create miss group */
1499         memset(flow_group_in, 0, inlen);
1500         MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1501         MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1502         g = mlx5_create_flow_group(ft, flow_group_in);
1503         if (IS_ERR(g)) {
1504                 err = PTR_ERR(g);
1505                 ct_dbg("Failed to create pre ct miss group");
1506                 goto err_miss_grp;
1507         }
1508         pre_ct->miss_grp = g;
1509
1510         err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1511         if (err)
1512                 goto err_add_rules;
1513
1514         kvfree(flow_group_in);
1515         return 0;
1516
1517 err_add_rules:
1518         mlx5_destroy_flow_group(pre_ct->miss_grp);
1519 err_miss_grp:
1520         mlx5_destroy_flow_group(pre_ct->flow_grp);
1521 err_flow_grp:
1522         mlx5_destroy_flow_table(ft);
1523 out_free:
1524         kvfree(flow_group_in);
1525         return err;
1526 }
1527
1528 static void
1529 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1530                        struct mlx5_tc_ct_pre *pre_ct)
1531 {
1532         tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1533         mlx5_destroy_flow_group(pre_ct->miss_grp);
1534         mlx5_destroy_flow_group(pre_ct->flow_grp);
1535         mlx5_destroy_flow_table(pre_ct->ft);
1536 }
1537
1538 static int
1539 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1540 {
1541         int err;
1542
1543         err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
1544         if (err)
1545                 return err;
1546
1547         err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
1548         if (err)
1549                 goto err_pre_ct_nat;
1550
1551         return 0;
1552
1553 err_pre_ct_nat:
1554         mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1555         return err;
1556 }
1557
1558 static void
1559 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1560 {
1561         mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
1562         mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1563 }
1564
1565 static struct mlx5_ct_ft *
1566 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1567                      struct nf_flowtable *nf_ft)
1568 {
1569         struct mlx5_ct_ft *ft;
1570         int err;
1571
1572         ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
1573         if (ft) {
1574                 refcount_inc(&ft->refcount);
1575                 return ft;
1576         }
1577
1578         ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1579         if (!ft)
1580                 return ERR_PTR(-ENOMEM);
1581
1582         err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
1583         if (err)
1584                 goto err_mapping;
1585
1586         ft->zone = zone;
1587         ft->nf_ft = nf_ft;
1588         ft->ct_priv = ct_priv;
1589         refcount_set(&ft->refcount, 1);
1590
1591         err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1592         if (err)
1593                 goto err_alloc_pre_ct;
1594
1595         err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
1596         if (err)
1597                 goto err_init;
1598
1599         err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
1600                                      zone_params);
1601         if (err)
1602                 goto err_insert;
1603
1604         err = nf_flow_table_offload_add_cb(ft->nf_ft,
1605                                            mlx5_tc_ct_block_flow_offload, ft);
1606         if (err)
1607                 goto err_add_cb;
1608
1609         return ft;
1610
1611 err_add_cb:
1612         rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1613 err_insert:
1614         rhashtable_destroy(&ft->ct_entries_ht);
1615 err_init:
1616         mlx5_tc_ct_free_pre_ct_tables(ft);
1617 err_alloc_pre_ct:
1618         mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1619 err_mapping:
1620         kfree(ft);
1621         return ERR_PTR(err);
1622 }
1623
1624 static void
1625 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1626 {
1627         struct mlx5_ct_entry *entry = ptr;
1628
1629         mlx5_tc_ct_entry_put(entry);
1630 }
1631
1632 static void
1633 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1634 {
1635         if (!refcount_dec_and_test(&ft->refcount))
1636                 return;
1637
1638         nf_flow_table_offload_del_cb(ft->nf_ft,
1639                                      mlx5_tc_ct_block_flow_offload, ft);
1640         rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1641         rhashtable_free_and_destroy(&ft->ct_entries_ht,
1642                                     mlx5_tc_ct_flush_ft_entry,
1643                                     ct_priv);
1644         mlx5_tc_ct_free_pre_ct_tables(ft);
1645         mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1646         kfree(ft);
1647 }
1648
1649 /* We translate the tc filter with CT action to the following HW model:
1650  *
1651  * +---------------------+
1652  * + ft prio (tc chain) +
1653  * + original match      +
1654  * +---------------------+
1655  *      | set chain miss mapping
1656  *      | set fte_id
1657  *      | set tunnel_id
1658  *      | do decap
1659  *      v
1660  * +---------------------+
1661  * + pre_ct/pre_ct_nat   +  if matches     +---------------------+
1662  * + zone+nat match      +---------------->+ post_ct (see below) +
1663  * +---------------------+  set zone       +---------------------+
1664  *      | set zone
1665  *      v
1666  * +--------------------+
1667  * + CT (nat or no nat) +
1668  * + tuple + zone match +
1669  * +--------------------+
1670  *      | set mark
1671  *      | set labels_id
1672  *      | set established
1673  *      | set zone_restore
1674  *      | do nat (if needed)
1675  *      v
1676  * +--------------+
1677  * + post_ct      + original filter actions
1678  * + fte_id match +------------------------>
1679  * +--------------+
1680  */
1681 static struct mlx5_flow_handle *
1682 __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
1683                           struct mlx5e_tc_flow *flow,
1684                           struct mlx5_flow_spec *orig_spec,
1685                           struct mlx5_flow_attr *attr)
1686 {
1687         bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1688         struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1689         struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1690         u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1691         struct mlx5_flow_spec *post_ct_spec = NULL;
1692         struct mlx5_flow_attr *pre_ct_attr;
1693         struct mlx5_modify_hdr *mod_hdr;
1694         struct mlx5_flow_handle *rule;
1695         struct mlx5_ct_flow *ct_flow;
1696         int chain_mapping = 0, err;
1697         struct mlx5_ct_ft *ft;
1698         u32 fte_id = 1;
1699
1700         post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
1701         ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1702         if (!post_ct_spec || !ct_flow) {
1703                 kfree(post_ct_spec);
1704                 kfree(ct_flow);
1705                 return ERR_PTR(-ENOMEM);
1706         }
1707
1708         /* Register for CT established events */
1709         ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
1710                                   attr->ct_attr.nf_ft);
1711         if (IS_ERR(ft)) {
1712                 err = PTR_ERR(ft);
1713                 ct_dbg("Failed to register to ft callback");
1714                 goto err_ft;
1715         }
1716         ct_flow->ft = ft;
1717
1718         err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id,
1719                             MLX5_FTE_ID_MAX, GFP_KERNEL);
1720         if (err) {
1721                 netdev_warn(priv->netdev,
1722                             "Failed to allocate fte id, err: %d\n", err);
1723                 goto err_idr;
1724         }
1725         ct_flow->fte_id = fte_id;
1726
1727         /* Base flow attributes of both rules on original rule attribute */
1728         ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1729         if (!ct_flow->pre_ct_attr) {
1730                 err = -ENOMEM;
1731                 goto err_alloc_pre;
1732         }
1733
1734         ct_flow->post_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1735         if (!ct_flow->post_ct_attr) {
1736                 err = -ENOMEM;
1737                 goto err_alloc_post;
1738         }
1739
1740         pre_ct_attr = ct_flow->pre_ct_attr;
1741         memcpy(pre_ct_attr, attr, attr_sz);
1742         memcpy(ct_flow->post_ct_attr, attr, attr_sz);
1743
1744         /* Modify the original rule's action to fwd and modify, leave decap */
1745         pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
1746         pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1747                                MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1748
1749         /* Write chain miss tag for miss in ct table as we
1750          * don't go though all prios of this chain as normal tc rules
1751          * miss.
1752          */
1753         err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
1754                                             &chain_mapping);
1755         if (err) {
1756                 ct_dbg("Failed to get chain register mapping for chain");
1757                 goto err_get_chain;
1758         }
1759         ct_flow->chain_mapping = chain_mapping;
1760
1761         err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1762                                         CHAIN_TO_REG, chain_mapping);
1763         if (err) {
1764                 ct_dbg("Failed to set chain register mapping");
1765                 goto err_mapping;
1766         }
1767
1768         err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1769                                         FTEID_TO_REG, fte_id);
1770         if (err) {
1771                 ct_dbg("Failed to set fte_id register mapping");
1772                 goto err_mapping;
1773         }
1774
1775         /* If original flow is decap, we do it before going into ct table
1776          * so add a rewrite for the tunnel match_id.
1777          */
1778         if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
1779             attr->chain == 0) {
1780                 u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
1781
1782                 err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts,
1783                                                 ct_priv->ns_type,
1784                                                 TUNNEL_TO_REG,
1785                                                 tun_id);
1786                 if (err) {
1787                         ct_dbg("Failed to set tunnel register mapping");
1788                         goto err_mapping;
1789                 }
1790         }
1791
1792         mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1793                                            pre_mod_acts.num_actions,
1794                                            pre_mod_acts.actions);
1795         if (IS_ERR(mod_hdr)) {
1796                 err = PTR_ERR(mod_hdr);
1797                 ct_dbg("Failed to create pre ct mod hdr");
1798                 goto err_mapping;
1799         }
1800         pre_ct_attr->modify_hdr = mod_hdr;
1801
1802         /* Post ct rule matches on fte_id and executes original rule's
1803          * tc rule action
1804          */
1805         mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG,
1806                                     fte_id, MLX5_FTE_ID_MASK);
1807
1808         /* Put post_ct rule on post_ct flow table */
1809         ct_flow->post_ct_attr->chain = 0;
1810         ct_flow->post_ct_attr->prio = 0;
1811         ct_flow->post_ct_attr->ft = ct_priv->post_ct;
1812
1813         ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE;
1814         ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE;
1815         ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
1816         rule = mlx5_tc_rule_insert(priv, post_ct_spec,
1817                                    ct_flow->post_ct_attr);
1818         ct_flow->post_ct_rule = rule;
1819         if (IS_ERR(ct_flow->post_ct_rule)) {
1820                 err = PTR_ERR(ct_flow->post_ct_rule);
1821                 ct_dbg("Failed to add post ct rule");
1822                 goto err_insert_post_ct;
1823         }
1824
1825         /* Change original rule point to ct table */
1826         pre_ct_attr->dest_chain = 0;
1827         pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
1828         ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
1829                                                    pre_ct_attr);
1830         if (IS_ERR(ct_flow->pre_ct_rule)) {
1831                 err = PTR_ERR(ct_flow->pre_ct_rule);
1832                 ct_dbg("Failed to add pre ct rule");
1833                 goto err_insert_orig;
1834         }
1835
1836         attr->ct_attr.ct_flow = ct_flow;
1837         dealloc_mod_hdr_actions(&pre_mod_acts);
1838         kfree(post_ct_spec);
1839
1840         return rule;
1841
1842 err_insert_orig:
1843         mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
1844                             ct_flow->post_ct_attr);
1845 err_insert_post_ct:
1846         mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1847 err_mapping:
1848         dealloc_mod_hdr_actions(&pre_mod_acts);
1849         mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1850 err_get_chain:
1851         kfree(ct_flow->post_ct_attr);
1852 err_alloc_post:
1853         kfree(ct_flow->pre_ct_attr);
1854 err_alloc_pre:
1855         idr_remove(&ct_priv->fte_ids, fte_id);
1856 err_idr:
1857         mlx5_tc_ct_del_ft_cb(ct_priv, ft);
1858 err_ft:
1859         kfree(post_ct_spec);
1860         kfree(ct_flow);
1861         netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
1862         return ERR_PTR(err);
1863 }
1864
1865 static struct mlx5_flow_handle *
1866 __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
1867                                 struct mlx5_flow_spec *orig_spec,
1868                                 struct mlx5_flow_attr *attr,
1869                                 struct mlx5e_tc_mod_hdr_acts *mod_acts)
1870 {
1871         struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1872         u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1873         struct mlx5_flow_attr *pre_ct_attr;
1874         struct mlx5_modify_hdr *mod_hdr;
1875         struct mlx5_flow_handle *rule;
1876         struct mlx5_ct_flow *ct_flow;
1877         int err;
1878
1879         ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1880         if (!ct_flow)
1881                 return ERR_PTR(-ENOMEM);
1882
1883         /* Base esw attributes on original rule attribute */
1884         pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1885         if (!pre_ct_attr) {
1886                 err = -ENOMEM;
1887                 goto err_attr;
1888         }
1889
1890         memcpy(pre_ct_attr, attr, attr_sz);
1891
1892         err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
1893         if (err) {
1894                 ct_dbg("Failed to set register for ct clear");
1895                 goto err_set_registers;
1896         }
1897
1898         mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1899                                            mod_acts->num_actions,
1900                                            mod_acts->actions);
1901         if (IS_ERR(mod_hdr)) {
1902                 err = PTR_ERR(mod_hdr);
1903                 ct_dbg("Failed to add create ct clear mod hdr");
1904                 goto err_set_registers;
1905         }
1906
1907         pre_ct_attr->modify_hdr = mod_hdr;
1908         pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1909
1910         rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
1911         if (IS_ERR(rule)) {
1912                 err = PTR_ERR(rule);
1913                 ct_dbg("Failed to add ct clear rule");
1914                 goto err_insert;
1915         }
1916
1917         attr->ct_attr.ct_flow = ct_flow;
1918         ct_flow->pre_ct_attr = pre_ct_attr;
1919         ct_flow->pre_ct_rule = rule;
1920         return rule;
1921
1922 err_insert:
1923         mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
1924 err_set_registers:
1925         netdev_warn(priv->netdev,
1926                     "Failed to offload ct clear flow, err %d\n", err);
1927         kfree(pre_ct_attr);
1928 err_attr:
1929         kfree(ct_flow);
1930
1931         return ERR_PTR(err);
1932 }
1933
1934 struct mlx5_flow_handle *
1935 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
1936                         struct mlx5e_tc_flow *flow,
1937                         struct mlx5_flow_spec *spec,
1938                         struct mlx5_flow_attr *attr,
1939                         struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
1940 {
1941         bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
1942         struct mlx5_flow_handle *rule;
1943
1944         if (!priv)
1945                 return ERR_PTR(-EOPNOTSUPP);
1946
1947         mutex_lock(&priv->control_lock);
1948
1949         if (clear_action)
1950                 rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
1951         else
1952                 rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
1953         mutex_unlock(&priv->control_lock);
1954
1955         return rule;
1956 }
1957
1958 static void
1959 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
1960                          struct mlx5e_tc_flow *flow,
1961                          struct mlx5_ct_flow *ct_flow)
1962 {
1963         struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
1964         struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1965
1966         mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule,
1967                             pre_ct_attr);
1968         mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1969
1970         if (ct_flow->post_ct_rule) {
1971                 mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
1972                                     ct_flow->post_ct_attr);
1973                 mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1974                 idr_remove(&ct_priv->fte_ids, ct_flow->fte_id);
1975                 mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
1976         }
1977
1978         kfree(ct_flow->pre_ct_attr);
1979         kfree(ct_flow->post_ct_attr);
1980         kfree(ct_flow);
1981 }
1982
1983 void
1984 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
1985                        struct mlx5e_tc_flow *flow,
1986                        struct mlx5_flow_attr *attr)
1987 {
1988         struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
1989
1990         /* We are called on error to clean up stuff from parsing
1991          * but we don't have anything for now
1992          */
1993         if (!ct_flow)
1994                 return;
1995
1996         mutex_lock(&priv->control_lock);
1997         __mlx5_tc_ct_delete_flow(priv, flow, ct_flow);
1998         mutex_unlock(&priv->control_lock);
1999 }
2000
2001 static int
2002 mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
2003                                   const char **err_msg)
2004 {
2005         if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) {
2006                 *err_msg = "firmware level support is missing";
2007                 return -EOPNOTSUPP;
2008         }
2009
2010         if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
2011                 /* vlan workaround should be avoided for multi chain rules.
2012                  * This is just a sanity check as pop vlan action should
2013                  * be supported by any FW that supports ignore_flow_level
2014                  */
2015
2016                 *err_msg = "firmware vlan actions support is missing";
2017                 return -EOPNOTSUPP;
2018         }
2019
2020         if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
2021                                     fdb_modify_header_fwd_to_table)) {
2022                 /* CT always writes to registers which are mod header actions.
2023                  * Therefore, mod header and goto is required
2024                  */
2025
2026                 *err_msg = "firmware fwd and modify support is missing";
2027                 return -EOPNOTSUPP;
2028         }
2029
2030         if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2031                 *err_msg = "register loopback isn't supported";
2032                 return -EOPNOTSUPP;
2033         }
2034
2035         return 0;
2036 }
2037
2038 static int
2039 mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv *priv,
2040                                   const char **err_msg)
2041 {
2042         if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
2043                 *err_msg = "firmware level support is missing";
2044                 return -EOPNOTSUPP;
2045         }
2046
2047         return 0;
2048 }
2049
2050 static int
2051 mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
2052                               enum mlx5_flow_namespace_type ns_type,
2053                               const char **err_msg)
2054 {
2055         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2056
2057 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
2058         /* cannot restore chain ID on HW miss */
2059
2060         *err_msg = "tc skb extension missing";
2061         return -EOPNOTSUPP;
2062 #endif
2063         if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
2064                 return mlx5_tc_ct_init_check_esw_support(esw, err_msg);
2065         else
2066                 return mlx5_tc_ct_init_check_nic_support(priv, err_msg);
2067 }
2068
2069 #define INIT_ERR_PREFIX "tc ct offload init failed"
2070
2071 struct mlx5_tc_ct_priv *
2072 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
2073                 struct mod_hdr_tbl *mod_hdr,
2074                 enum mlx5_flow_namespace_type ns_type)
2075 {
2076         struct mlx5_tc_ct_priv *ct_priv;
2077         struct mlx5_core_dev *dev;
2078         const char *msg;
2079         int err;
2080
2081         dev = priv->mdev;
2082         err = mlx5_tc_ct_init_check_support(priv, ns_type, &msg);
2083         if (err) {
2084                 mlx5_core_warn(dev,
2085                                "tc ct offload not supported, %s\n",
2086                                msg);
2087                 goto err_support;
2088         }
2089
2090         ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
2091         if (!ct_priv)
2092                 goto err_alloc;
2093
2094         ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
2095         if (IS_ERR(ct_priv->zone_mapping)) {
2096                 err = PTR_ERR(ct_priv->zone_mapping);
2097                 goto err_mapping_zone;
2098         }
2099
2100         ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true);
2101         if (IS_ERR(ct_priv->labels_mapping)) {
2102                 err = PTR_ERR(ct_priv->labels_mapping);
2103                 goto err_mapping_labels;
2104         }
2105
2106         spin_lock_init(&ct_priv->ht_lock);
2107         ct_priv->ns_type = ns_type;
2108         ct_priv->chains = chains;
2109         ct_priv->netdev = priv->netdev;
2110         ct_priv->dev = priv->mdev;
2111         ct_priv->mod_hdr_tbl = mod_hdr;
2112         ct_priv->ct = mlx5_chains_create_global_table(chains);
2113         if (IS_ERR(ct_priv->ct)) {
2114                 err = PTR_ERR(ct_priv->ct);
2115                 mlx5_core_warn(dev,
2116                                "%s, failed to create ct table err: %d\n",
2117                                INIT_ERR_PREFIX, err);
2118                 goto err_ct_tbl;
2119         }
2120
2121         ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
2122         if (IS_ERR(ct_priv->ct_nat)) {
2123                 err = PTR_ERR(ct_priv->ct_nat);
2124                 mlx5_core_warn(dev,
2125                                "%s, failed to create ct nat table err: %d\n",
2126                                INIT_ERR_PREFIX, err);
2127                 goto err_ct_nat_tbl;
2128         }
2129
2130         ct_priv->post_ct = mlx5_chains_create_global_table(chains);
2131         if (IS_ERR(ct_priv->post_ct)) {
2132                 err = PTR_ERR(ct_priv->post_ct);
2133                 mlx5_core_warn(dev,
2134                                "%s, failed to create post ct table err: %d\n",
2135                                INIT_ERR_PREFIX, err);
2136                 goto err_post_ct_tbl;
2137         }
2138
2139         idr_init(&ct_priv->fte_ids);
2140         mutex_init(&ct_priv->control_lock);
2141         rhashtable_init(&ct_priv->zone_ht, &zone_params);
2142         rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
2143         rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
2144
2145         return ct_priv;
2146
2147 err_post_ct_tbl:
2148         mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2149 err_ct_nat_tbl:
2150         mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2151 err_ct_tbl:
2152         mapping_destroy(ct_priv->labels_mapping);
2153 err_mapping_labels:
2154         mapping_destroy(ct_priv->zone_mapping);
2155 err_mapping_zone:
2156         kfree(ct_priv);
2157 err_alloc:
2158 err_support:
2159
2160         return NULL;
2161 }
2162
2163 void
2164 mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
2165 {
2166         struct mlx5_fs_chains *chains;
2167
2168         if (!ct_priv)
2169                 return;
2170
2171         chains = ct_priv->chains;
2172
2173         mlx5_chains_destroy_global_table(chains, ct_priv->post_ct);
2174         mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2175         mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2176         mapping_destroy(ct_priv->zone_mapping);
2177         mapping_destroy(ct_priv->labels_mapping);
2178
2179         rhashtable_destroy(&ct_priv->ct_tuples_ht);
2180         rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2181         rhashtable_destroy(&ct_priv->zone_ht);
2182         mutex_destroy(&ct_priv->control_lock);
2183         idr_destroy(&ct_priv->fte_ids);
2184         kfree(ct_priv);
2185 }
2186
2187 bool
2188 mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
2189                          struct sk_buff *skb, u8 zone_restore_id)
2190 {
2191         struct mlx5_ct_tuple tuple = {};
2192         struct mlx5_ct_entry *entry;
2193         u16 zone;
2194
2195         if (!ct_priv || !zone_restore_id)
2196                 return true;
2197
2198         if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
2199                 return false;
2200
2201         if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
2202                 return false;
2203
2204         spin_lock(&ct_priv->ht_lock);
2205
2206         entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
2207         if (!entry) {
2208                 spin_unlock(&ct_priv->ht_lock);
2209                 return false;
2210         }
2211
2212         if (IS_ERR(entry)) {
2213                 spin_unlock(&ct_priv->ht_lock);
2214                 return false;
2215         }
2216         spin_unlock(&ct_priv->ht_lock);
2217
2218         tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
2219         __mlx5_tc_ct_entry_put(entry);
2220
2221         return true;
2222 }