2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <net/flow_dissector.h>
34 #include <net/sch_generic.h>
35 #include <net/pkt_cls.h>
36 #include <net/tc_act/tc_gact.h>
37 #include <net/tc_act/tc_skbedit.h>
38 #include <linux/mlx5/fs.h>
39 #include <linux/mlx5/device.h>
40 #include <linux/rhashtable.h>
41 #include <net/switchdev.h>
42 #include <net/tc_act/tc_mirred.h>
43 #include <net/tc_act/tc_vlan.h>
44 #include <net/tc_act/tc_tunnel_key.h>
45 #include <net/tc_act/tc_pedit.h>
46 #include <net/vxlan.h>
52 struct mlx5_nic_flow_attr {
59 MLX5E_TC_FLOW_ESWITCH = BIT(0),
60 MLX5E_TC_FLOW_NIC = BIT(1),
63 struct mlx5e_tc_flow {
64 struct rhash_head node;
67 struct mlx5_flow_handle *rule;
68 struct list_head encap; /* flows sharing the same encap */
70 struct mlx5_esw_flow_attr esw_attr[0];
71 struct mlx5_nic_flow_attr nic_attr[0];
75 struct mlx5e_tc_flow_parse_attr {
76 struct mlx5_flow_spec spec;
77 int num_mod_hdr_actions;
78 void *mod_hdr_actions;
82 MLX5_HEADER_TYPE_VXLAN = 0x0,
83 MLX5_HEADER_TYPE_NVGRE = 0x1,
86 #define MLX5E_TC_TABLE_NUM_ENTRIES 1024
87 #define MLX5E_TC_TABLE_NUM_GROUPS 4
89 static struct mlx5_flow_handle *
90 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
91 struct mlx5e_tc_flow_parse_attr *parse_attr,
92 struct mlx5e_tc_flow *flow)
94 struct mlx5_nic_flow_attr *attr = flow->nic_attr;
95 struct mlx5_core_dev *dev = priv->mdev;
96 struct mlx5_flow_destination dest = {};
97 struct mlx5_flow_act flow_act = {
98 .action = attr->action,
99 .flow_tag = attr->flow_tag,
102 struct mlx5_fc *counter = NULL;
103 struct mlx5_flow_handle *rule;
104 bool table_created = false;
107 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
108 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
109 dest.ft = priv->fs.vlan.ft.t;
110 } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
111 counter = mlx5_fc_create(dev, true);
113 return ERR_CAST(counter);
115 dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
116 dest.counter = counter;
119 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
120 err = mlx5_modify_header_alloc(dev, MLX5_FLOW_NAMESPACE_KERNEL,
121 parse_attr->num_mod_hdr_actions,
122 parse_attr->mod_hdr_actions,
124 flow_act.modify_id = attr->mod_hdr_id;
125 kfree(parse_attr->mod_hdr_actions);
128 goto err_create_mod_hdr_id;
132 if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
134 mlx5_create_auto_grouped_flow_table(priv->fs.ns,
136 MLX5E_TC_TABLE_NUM_ENTRIES,
137 MLX5E_TC_TABLE_NUM_GROUPS,
139 if (IS_ERR(priv->fs.tc.t)) {
140 netdev_err(priv->netdev,
141 "Failed to create tc offload table\n");
142 rule = ERR_CAST(priv->fs.tc.t);
146 table_created = true;
149 parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
150 rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
151 &flow_act, &dest, 1);
160 mlx5_destroy_flow_table(priv->fs.tc.t);
161 priv->fs.tc.t = NULL;
164 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
165 mlx5_modify_header_dealloc(priv->mdev,
167 err_create_mod_hdr_id:
168 mlx5_fc_destroy(dev, counter);
173 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
174 struct mlx5e_tc_flow *flow)
176 struct mlx5_fc *counter = NULL;
178 counter = mlx5_flow_rule_counter(flow->rule);
179 mlx5_del_flow_rules(flow->rule);
180 mlx5_fc_destroy(priv->mdev, counter);
182 if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
183 mlx5_destroy_flow_table(priv->fs.tc.t);
184 priv->fs.tc.t = NULL;
187 if (flow->nic_attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
188 mlx5_modify_header_dealloc(priv->mdev,
189 flow->nic_attr->mod_hdr_id);
192 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
193 struct mlx5e_tc_flow *flow);
195 static struct mlx5_flow_handle *
196 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
197 struct mlx5e_tc_flow_parse_attr *parse_attr,
198 struct mlx5e_tc_flow *flow)
200 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
201 struct mlx5_esw_flow_attr *attr = flow->esw_attr;
202 struct mlx5_flow_handle *rule;
205 err = mlx5_eswitch_add_vlan_action(esw, attr);
211 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
212 err = mlx5_modify_header_alloc(priv->mdev, MLX5_FLOW_NAMESPACE_FDB,
213 parse_attr->num_mod_hdr_actions,
214 parse_attr->mod_hdr_actions,
216 kfree(parse_attr->mod_hdr_actions);
223 rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
230 if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
231 mlx5_modify_header_dealloc(priv->mdev,
234 mlx5_eswitch_del_vlan_action(esw, attr);
236 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
237 mlx5e_detach_encap(priv, flow);
241 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
242 struct mlx5e_tc_flow *flow)
244 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
245 struct mlx5_esw_flow_attr *attr = flow->esw_attr;
247 mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->esw_attr);
249 mlx5_eswitch_del_vlan_action(esw, flow->esw_attr);
251 if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
252 mlx5e_detach_encap(priv, flow);
254 if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
255 mlx5_modify_header_dealloc(priv->mdev,
259 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
260 struct mlx5e_tc_flow *flow)
262 struct list_head *next = flow->encap.next;
264 list_del(&flow->encap);
265 if (list_empty(next)) {
266 struct mlx5_encap_entry *e;
268 e = list_entry(next, struct mlx5_encap_entry, flows);
270 mlx5_encap_dealloc(priv->mdev, e->encap_id);
273 hlist_del_rcu(&e->encap_hlist);
278 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
279 struct mlx5e_tc_flow *flow)
281 if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
282 mlx5e_tc_del_fdb_flow(priv, flow);
284 mlx5e_tc_del_nic_flow(priv, flow);
287 static void parse_vxlan_attr(struct mlx5_flow_spec *spec,
288 struct tc_cls_flower_offload *f)
290 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
292 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
294 void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
296 void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
299 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
300 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
302 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
303 struct flow_dissector_key_keyid *key =
304 skb_flow_dissector_target(f->dissector,
305 FLOW_DISSECTOR_KEY_ENC_KEYID,
307 struct flow_dissector_key_keyid *mask =
308 skb_flow_dissector_target(f->dissector,
309 FLOW_DISSECTOR_KEY_ENC_KEYID,
311 MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
312 be32_to_cpu(mask->keyid));
313 MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
314 be32_to_cpu(key->keyid));
318 static int parse_tunnel_attr(struct mlx5e_priv *priv,
319 struct mlx5_flow_spec *spec,
320 struct tc_cls_flower_offload *f)
322 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
324 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
327 struct flow_dissector_key_control *enc_control =
328 skb_flow_dissector_target(f->dissector,
329 FLOW_DISSECTOR_KEY_ENC_CONTROL,
332 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
333 struct flow_dissector_key_ports *key =
334 skb_flow_dissector_target(f->dissector,
335 FLOW_DISSECTOR_KEY_ENC_PORTS,
337 struct flow_dissector_key_ports *mask =
338 skb_flow_dissector_target(f->dissector,
339 FLOW_DISSECTOR_KEY_ENC_PORTS,
341 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
342 struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
343 struct mlx5e_priv *up_priv = netdev_priv(up_dev);
345 /* Full udp dst port must be given */
346 if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
347 goto vxlan_match_offload_err;
349 if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->dst)) &&
350 MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
351 parse_vxlan_attr(spec, f);
353 netdev_warn(priv->netdev,
354 "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst));
358 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
359 udp_dport, ntohs(mask->dst));
360 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
361 udp_dport, ntohs(key->dst));
363 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
364 udp_sport, ntohs(mask->src));
365 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
366 udp_sport, ntohs(key->src));
367 } else { /* udp dst port must be given */
368 vxlan_match_offload_err:
369 netdev_warn(priv->netdev,
370 "IP tunnel decap offload supported only for vxlan, must set UDP dport\n");
374 if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
375 struct flow_dissector_key_ipv4_addrs *key =
376 skb_flow_dissector_target(f->dissector,
377 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
379 struct flow_dissector_key_ipv4_addrs *mask =
380 skb_flow_dissector_target(f->dissector,
381 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
383 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
384 src_ipv4_src_ipv6.ipv4_layout.ipv4,
386 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
387 src_ipv4_src_ipv6.ipv4_layout.ipv4,
390 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
391 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
393 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
394 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
397 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
398 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
399 } else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
400 struct flow_dissector_key_ipv6_addrs *key =
401 skb_flow_dissector_target(f->dissector,
402 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
404 struct flow_dissector_key_ipv6_addrs *mask =
405 skb_flow_dissector_target(f->dissector,
406 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
409 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
410 src_ipv4_src_ipv6.ipv6_layout.ipv6),
411 &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
412 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
413 src_ipv4_src_ipv6.ipv6_layout.ipv6),
414 &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
416 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
417 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
418 &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
419 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
420 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
421 &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
423 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
424 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6);
427 /* Enforce DMAC when offloading incoming tunneled flows.
428 * Flow counters require a match on the DMAC.
430 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16);
431 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0);
432 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
433 dmac_47_16), priv->netdev->dev_addr);
435 /* let software handle IP fragments */
436 MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
437 MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
442 static int __parse_cls_flower(struct mlx5e_priv *priv,
443 struct mlx5_flow_spec *spec,
444 struct tc_cls_flower_offload *f,
447 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
449 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
454 *min_inline = MLX5_INLINE_MODE_L2;
456 if (f->dissector->used_keys &
457 ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
458 BIT(FLOW_DISSECTOR_KEY_BASIC) |
459 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
460 BIT(FLOW_DISSECTOR_KEY_VLAN) |
461 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
462 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
463 BIT(FLOW_DISSECTOR_KEY_PORTS) |
464 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
465 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
466 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
467 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
468 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL))) {
469 netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
470 f->dissector->used_keys);
474 if ((dissector_uses_key(f->dissector,
475 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) ||
476 dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) ||
477 dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) &&
478 dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
479 struct flow_dissector_key_control *key =
480 skb_flow_dissector_target(f->dissector,
481 FLOW_DISSECTOR_KEY_ENC_CONTROL,
483 switch (key->addr_type) {
484 case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
485 case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
486 if (parse_tunnel_attr(priv, spec, f))
493 /* In decap flow, header pointers should point to the inner
494 * headers, outer header were already set by parse_tunnel_attr
496 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
498 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
502 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
503 struct flow_dissector_key_control *key =
504 skb_flow_dissector_target(f->dissector,
505 FLOW_DISSECTOR_KEY_CONTROL,
508 struct flow_dissector_key_control *mask =
509 skb_flow_dissector_target(f->dissector,
510 FLOW_DISSECTOR_KEY_CONTROL,
512 addr_type = key->addr_type;
514 if (mask->flags & FLOW_DIS_IS_FRAGMENT) {
515 MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
516 MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
517 key->flags & FLOW_DIS_IS_FRAGMENT);
519 /* the HW doesn't need L3 inline to match on frag=no */
520 if (key->flags & FLOW_DIS_IS_FRAGMENT)
521 *min_inline = MLX5_INLINE_MODE_IP;
525 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
526 struct flow_dissector_key_basic *key =
527 skb_flow_dissector_target(f->dissector,
528 FLOW_DISSECTOR_KEY_BASIC,
530 struct flow_dissector_key_basic *mask =
531 skb_flow_dissector_target(f->dissector,
532 FLOW_DISSECTOR_KEY_BASIC,
534 ip_proto = key->ip_proto;
536 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
537 ntohs(mask->n_proto));
538 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
539 ntohs(key->n_proto));
541 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
543 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
547 *min_inline = MLX5_INLINE_MODE_IP;
550 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
551 struct flow_dissector_key_eth_addrs *key =
552 skb_flow_dissector_target(f->dissector,
553 FLOW_DISSECTOR_KEY_ETH_ADDRS,
555 struct flow_dissector_key_eth_addrs *mask =
556 skb_flow_dissector_target(f->dissector,
557 FLOW_DISSECTOR_KEY_ETH_ADDRS,
560 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
563 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
567 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
570 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
575 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
576 struct flow_dissector_key_vlan *key =
577 skb_flow_dissector_target(f->dissector,
578 FLOW_DISSECTOR_KEY_VLAN,
580 struct flow_dissector_key_vlan *mask =
581 skb_flow_dissector_target(f->dissector,
582 FLOW_DISSECTOR_KEY_VLAN,
584 if (mask->vlan_id || mask->vlan_priority) {
585 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
586 MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
588 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id);
589 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id);
591 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority);
592 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority);
596 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
597 struct flow_dissector_key_ipv4_addrs *key =
598 skb_flow_dissector_target(f->dissector,
599 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
601 struct flow_dissector_key_ipv4_addrs *mask =
602 skb_flow_dissector_target(f->dissector,
603 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
606 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
607 src_ipv4_src_ipv6.ipv4_layout.ipv4),
608 &mask->src, sizeof(mask->src));
609 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
610 src_ipv4_src_ipv6.ipv4_layout.ipv4),
611 &key->src, sizeof(key->src));
612 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
613 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
614 &mask->dst, sizeof(mask->dst));
615 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
616 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
617 &key->dst, sizeof(key->dst));
619 if (mask->src || mask->dst)
620 *min_inline = MLX5_INLINE_MODE_IP;
623 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
624 struct flow_dissector_key_ipv6_addrs *key =
625 skb_flow_dissector_target(f->dissector,
626 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
628 struct flow_dissector_key_ipv6_addrs *mask =
629 skb_flow_dissector_target(f->dissector,
630 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
633 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
634 src_ipv4_src_ipv6.ipv6_layout.ipv6),
635 &mask->src, sizeof(mask->src));
636 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
637 src_ipv4_src_ipv6.ipv6_layout.ipv6),
638 &key->src, sizeof(key->src));
640 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
641 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
642 &mask->dst, sizeof(mask->dst));
643 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
644 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
645 &key->dst, sizeof(key->dst));
647 if (ipv6_addr_type(&mask->src) != IPV6_ADDR_ANY ||
648 ipv6_addr_type(&mask->dst) != IPV6_ADDR_ANY)
649 *min_inline = MLX5_INLINE_MODE_IP;
652 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
653 struct flow_dissector_key_ports *key =
654 skb_flow_dissector_target(f->dissector,
655 FLOW_DISSECTOR_KEY_PORTS,
657 struct flow_dissector_key_ports *mask =
658 skb_flow_dissector_target(f->dissector,
659 FLOW_DISSECTOR_KEY_PORTS,
663 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
664 tcp_sport, ntohs(mask->src));
665 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
666 tcp_sport, ntohs(key->src));
668 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
669 tcp_dport, ntohs(mask->dst));
670 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
671 tcp_dport, ntohs(key->dst));
675 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
676 udp_sport, ntohs(mask->src));
677 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
678 udp_sport, ntohs(key->src));
680 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
681 udp_dport, ntohs(mask->dst));
682 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
683 udp_dport, ntohs(key->dst));
686 netdev_err(priv->netdev,
687 "Only UDP and TCP transport are supported\n");
691 if (mask->src || mask->dst)
692 *min_inline = MLX5_INLINE_MODE_TCP_UDP;
698 static int parse_cls_flower(struct mlx5e_priv *priv,
699 struct mlx5e_tc_flow *flow,
700 struct mlx5_flow_spec *spec,
701 struct tc_cls_flower_offload *f)
703 struct mlx5_core_dev *dev = priv->mdev;
704 struct mlx5_eswitch *esw = dev->priv.eswitch;
705 struct mlx5_eswitch_rep *rep = priv->ppriv;
709 err = __parse_cls_flower(priv, spec, f, &min_inline);
711 if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH) &&
712 rep->vport != FDB_UPLINK_VPORT) {
713 if (min_inline > esw->offloads.inline_mode) {
714 netdev_warn(priv->netdev,
715 "Flow is not offloaded due to min inline setting, required %d actual %d\n",
716 min_inline, esw->offloads.inline_mode);
724 struct pedit_headers {
732 static int pedit_header_offsets[] = {
733 [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
734 [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
735 [TCA_PEDIT_KEY_EX_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
736 [TCA_PEDIT_KEY_EX_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
737 [TCA_PEDIT_KEY_EX_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
740 #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
742 static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
743 struct pedit_headers *masks,
744 struct pedit_headers *vals)
746 u32 *curr_pmask, *curr_pval;
748 if (hdr_type >= __PEDIT_HDR_TYPE_MAX)
751 curr_pmask = (u32 *)(pedit_header(masks, hdr_type) + offset);
752 curr_pval = (u32 *)(pedit_header(vals, hdr_type) + offset);
754 if (*curr_pmask & mask) /* disallow acting twice on the same location */
758 *curr_pval |= (val & mask);
772 static struct mlx5_fields fields[] = {
773 {MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16, 4, offsetof(struct pedit_headers, eth.h_dest[0])},
774 {MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0, 2, offsetof(struct pedit_headers, eth.h_dest[4])},
775 {MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16, 4, offsetof(struct pedit_headers, eth.h_source[0])},
776 {MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0, 2, offsetof(struct pedit_headers, eth.h_source[4])},
777 {MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE, 2, offsetof(struct pedit_headers, eth.h_proto)},
779 {MLX5_ACTION_IN_FIELD_OUT_IP_DSCP, 1, offsetof(struct pedit_headers, ip4.tos)},
780 {MLX5_ACTION_IN_FIELD_OUT_IP_TTL, 1, offsetof(struct pedit_headers, ip4.ttl)},
781 {MLX5_ACTION_IN_FIELD_OUT_SIPV4, 4, offsetof(struct pedit_headers, ip4.saddr)},
782 {MLX5_ACTION_IN_FIELD_OUT_DIPV4, 4, offsetof(struct pedit_headers, ip4.daddr)},
784 {MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[0])},
785 {MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[1])},
786 {MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[2])},
787 {MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[3])},
788 {MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[0])},
789 {MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[1])},
790 {MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[2])},
791 {MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[3])},
793 {MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT, 2, offsetof(struct pedit_headers, tcp.source)},
794 {MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT, 2, offsetof(struct pedit_headers, tcp.dest)},
795 {MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS, 1, offsetof(struct pedit_headers, tcp.ack_seq) + 5},
797 {MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT, 2, offsetof(struct pedit_headers, udp.source)},
798 {MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT, 2, offsetof(struct pedit_headers, udp.dest)},
801 /* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at
802 * max from the SW pedit action. On success, it says how many HW actions were
805 static int offload_pedit_fields(struct pedit_headers *masks,
806 struct pedit_headers *vals,
807 struct mlx5e_tc_flow_parse_attr *parse_attr)
809 struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
810 int i, action_size, nactions, max_actions, first, last;
811 void *s_masks_p, *a_masks_p, *vals_p;
812 u32 s_mask, a_mask, val;
813 struct mlx5_fields *f;
818 set_masks = &masks[TCA_PEDIT_KEY_EX_CMD_SET];
819 add_masks = &masks[TCA_PEDIT_KEY_EX_CMD_ADD];
820 set_vals = &vals[TCA_PEDIT_KEY_EX_CMD_SET];
821 add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD];
823 action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
824 action = parse_attr->mod_hdr_actions;
825 max_actions = parse_attr->num_mod_hdr_actions;
828 for (i = 0; i < ARRAY_SIZE(fields); i++) {
830 /* avoid seeing bits set from previous iterations */
831 s_mask = a_mask = mask = val = 0;
833 s_masks_p = (void *)set_masks + f->offset;
834 a_masks_p = (void *)add_masks + f->offset;
836 memcpy(&s_mask, s_masks_p, f->size);
837 memcpy(&a_mask, a_masks_p, f->size);
839 if (!s_mask && !a_mask) /* nothing to offload here */
842 if (s_mask && a_mask) {
843 printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
847 if (nactions == max_actions) {
848 printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions);
853 cmd = MLX5_ACTION_TYPE_SET;
855 vals_p = (void *)set_vals + f->offset;
856 /* clear to denote we consumed this field */
857 memset(s_masks_p, 0, f->size);
859 cmd = MLX5_ACTION_TYPE_ADD;
861 vals_p = (void *)add_vals + f->offset;
862 /* clear to denote we consumed this field */
863 memset(a_masks_p, 0, f->size);
866 memcpy(&val, vals_p, f->size);
868 field_bsize = f->size * BITS_PER_BYTE;
869 first = find_first_bit(&mask, field_bsize);
870 last = find_last_bit(&mask, field_bsize);
871 if (first > 0 || last != (field_bsize - 1)) {
872 printk(KERN_WARNING "mlx5: partial rewrite (mask %lx) is currently not offloaded\n",
877 MLX5_SET(set_action_in, action, action_type, cmd);
878 MLX5_SET(set_action_in, action, field, f->field);
880 if (cmd == MLX5_ACTION_TYPE_SET) {
881 MLX5_SET(set_action_in, action, offset, 0);
882 /* length is num of bits to be written, zero means length of 32 */
883 MLX5_SET(set_action_in, action, length, field_bsize);
886 if (field_bsize == 32)
887 MLX5_SET(set_action_in, action, data, ntohl(val));
888 else if (field_bsize == 16)
889 MLX5_SET(set_action_in, action, data, ntohs(val));
890 else if (field_bsize == 8)
891 MLX5_SET(set_action_in, action, data, val);
893 action += action_size;
897 parse_attr->num_mod_hdr_actions = nactions;
901 static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,
902 const struct tc_action *a, int namespace,
903 struct mlx5e_tc_flow_parse_attr *parse_attr)
905 int nkeys, action_size, max_actions;
907 nkeys = tcf_pedit_nkeys(a);
908 action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
910 if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
911 max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, max_modify_header_actions);
912 else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
913 max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, max_modify_header_actions);
915 /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
916 max_actions = min(max_actions, nkeys * 16);
918 parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL);
919 if (!parse_attr->mod_hdr_actions)
922 parse_attr->num_mod_hdr_actions = max_actions;
926 static const struct pedit_headers zero_masks = {};
928 static int parse_tc_pedit_action(struct mlx5e_priv *priv,
929 const struct tc_action *a, int namespace,
930 struct mlx5e_tc_flow_parse_attr *parse_attr)
932 struct pedit_headers masks[__PEDIT_CMD_MAX], vals[__PEDIT_CMD_MAX], *cmd_masks;
933 int nkeys, i, err = -EOPNOTSUPP;
934 u32 mask, val, offset;
937 nkeys = tcf_pedit_nkeys(a);
939 memset(masks, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
940 memset(vals, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
942 for (i = 0; i < nkeys; i++) {
943 htype = tcf_pedit_htype(a, i);
944 cmd = tcf_pedit_cmd(a, i);
945 err = -EOPNOTSUPP; /* can't be all optimistic */
947 if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) {
948 printk(KERN_WARNING "mlx5: legacy pedit isn't offloaded\n");
952 if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) {
953 printk(KERN_WARNING "mlx5: pedit cmd %d isn't offloaded\n", cmd);
957 mask = tcf_pedit_mask(a, i);
958 val = tcf_pedit_val(a, i);
959 offset = tcf_pedit_offset(a, i);
961 err = set_pedit_val(htype, ~mask, val, offset, &masks[cmd], &vals[cmd]);
966 err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr);
970 err = offload_pedit_fields(masks, vals, parse_attr);
972 goto out_dealloc_parsed_actions;
974 for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
975 cmd_masks = &masks[cmd];
976 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
977 printk(KERN_WARNING "mlx5: attempt to offload an unsupported field (cmd %d)\n",
979 print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
980 16, 1, cmd_masks, sizeof(zero_masks), true);
982 goto out_dealloc_parsed_actions;
988 out_dealloc_parsed_actions:
989 kfree(parse_attr->mod_hdr_actions);
994 static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
995 struct mlx5e_tc_flow_parse_attr *parse_attr,
996 struct mlx5e_tc_flow *flow)
998 struct mlx5_nic_flow_attr *attr = flow->nic_attr;
999 const struct tc_action *a;
1003 if (tc_no_actions(exts))
1006 attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
1009 tcf_exts_to_list(exts, &actions);
1010 list_for_each_entry(a, &actions, list) {
1011 /* Only support a single action per rule */
1015 if (is_tcf_gact_shot(a)) {
1016 attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
1017 if (MLX5_CAP_FLOWTABLE(priv->mdev,
1018 flow_table_properties_nic_receive.flow_counter))
1019 attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1023 if (is_tcf_pedit(a)) {
1024 err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_KERNEL,
1029 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
1030 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1034 if (is_tcf_skbedit_mark(a)) {
1035 u32 mark = tcf_skbedit_mark(a);
1037 if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
1038 netdev_warn(priv->netdev, "Bad flow mark - only 16 bit is supported: 0x%x\n",
1043 attr->flow_tag = mark;
1044 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1054 static inline int cmp_encap_info(struct ip_tunnel_key *a,
1055 struct ip_tunnel_key *b)
1057 return memcmp(a, b, sizeof(*a));
1060 static inline int hash_encap_info(struct ip_tunnel_key *key)
1062 return jhash(key, sizeof(*key), 0);
1065 static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
1066 struct net_device *mirred_dev,
1067 struct net_device **out_dev,
1069 struct neighbour **out_n,
1072 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1074 struct neighbour *n = NULL;
1076 #if IS_ENABLED(CONFIG_INET)
1079 rt = ip_route_output_key(dev_net(mirred_dev), fl4);
1080 ret = PTR_ERR_OR_ZERO(rt);
1086 /* if the egress device isn't on the same HW e-switch, we use the uplink */
1087 if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev))
1088 *out_dev = mlx5_eswitch_get_uplink_netdev(esw);
1090 *out_dev = rt->dst.dev;
1092 *out_ttl = ip4_dst_hoplimit(&rt->dst);
1093 n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
1102 static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
1103 struct net_device *mirred_dev,
1104 struct net_device **out_dev,
1106 struct neighbour **out_n,
1109 struct neighbour *n = NULL;
1110 struct dst_entry *dst;
1112 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
1113 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1116 dst = ip6_route_output(dev_net(mirred_dev), NULL, fl6);
1123 *out_ttl = ip6_dst_hoplimit(dst);
1125 /* if the egress device isn't on the same HW e-switch, we use the uplink */
1126 if (!switchdev_port_same_parent_id(priv->netdev, dst->dev))
1127 *out_dev = mlx5_eswitch_get_uplink_netdev(esw);
1129 *out_dev = dst->dev;
1134 n = dst_neigh_lookup(dst, &fl6->daddr);
1143 static int gen_vxlan_header_ipv4(struct net_device *out_dev,
1145 unsigned char h_dest[ETH_ALEN],
1149 __be16 udp_dst_port,
1152 int encap_size = VXLAN_HLEN + sizeof(struct iphdr) + ETH_HLEN;
1153 struct ethhdr *eth = (struct ethhdr *)buf;
1154 struct iphdr *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
1155 struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr));
1156 struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
1158 memset(buf, 0, encap_size);
1160 ether_addr_copy(eth->h_dest, h_dest);
1161 ether_addr_copy(eth->h_source, out_dev->dev_addr);
1162 eth->h_proto = htons(ETH_P_IP);
1168 ip->protocol = IPPROTO_UDP;
1172 udp->dest = udp_dst_port;
1173 vxh->vx_flags = VXLAN_HF_VNI;
1174 vxh->vx_vni = vxlan_vni_field(vx_vni);
1179 static int gen_vxlan_header_ipv6(struct net_device *out_dev,
1181 unsigned char h_dest[ETH_ALEN],
1183 struct in6_addr *daddr,
1184 struct in6_addr *saddr,
1185 __be16 udp_dst_port,
1188 int encap_size = VXLAN_HLEN + sizeof(struct ipv6hdr) + ETH_HLEN;
1189 struct ethhdr *eth = (struct ethhdr *)buf;
1190 struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr));
1191 struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr));
1192 struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
1194 memset(buf, 0, encap_size);
1196 ether_addr_copy(eth->h_dest, h_dest);
1197 ether_addr_copy(eth->h_source, out_dev->dev_addr);
1198 eth->h_proto = htons(ETH_P_IPV6);
1200 ip6_flow_hdr(ip6h, 0, 0);
1201 /* the HW fills up ipv6 payload len */
1202 ip6h->nexthdr = IPPROTO_UDP;
1203 ip6h->hop_limit = ttl;
1204 ip6h->daddr = *daddr;
1205 ip6h->saddr = *saddr;
1207 udp->dest = udp_dst_port;
1208 vxh->vx_flags = VXLAN_HF_VNI;
1209 vxh->vx_vni = vxlan_vni_field(vx_vni);
1214 static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
1215 struct net_device *mirred_dev,
1216 struct mlx5_encap_entry *e,
1217 struct net_device **out_dev)
1219 int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
1220 struct ip_tunnel_key *tun_key = &e->tun_info.key;
1221 int encap_size, ttl, err;
1222 struct neighbour *n = NULL;
1223 struct flowi4 fl4 = {};
1226 encap_header = kzalloc(max_encap_size, GFP_KERNEL);
1230 switch (e->tunnel_type) {
1231 case MLX5_HEADER_TYPE_VXLAN:
1232 fl4.flowi4_proto = IPPROTO_UDP;
1233 fl4.fl4_dport = tun_key->tp_dst;
1239 fl4.flowi4_tos = tun_key->tos;
1240 fl4.daddr = tun_key->u.ipv4.dst;
1241 fl4.saddr = tun_key->u.ipv4.src;
1243 err = mlx5e_route_lookup_ipv4(priv, mirred_dev, out_dev,
1248 if (!(n->nud_state & NUD_VALID)) {
1249 pr_warn("%s: can't offload, neighbour to %pI4 invalid\n", __func__, &fl4.daddr);
1255 e->out_dev = *out_dev;
1257 neigh_ha_snapshot(e->h_dest, n, *out_dev);
1259 switch (e->tunnel_type) {
1260 case MLX5_HEADER_TYPE_VXLAN:
1261 encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header,
1264 fl4.saddr, tun_key->tp_dst,
1265 tunnel_id_to_key32(tun_key->tun_id));
1272 err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
1273 encap_size, encap_header, &e->encap_id);
1277 kfree(encap_header);
1281 static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
1282 struct net_device *mirred_dev,
1283 struct mlx5_encap_entry *e,
1284 struct net_device **out_dev)
1287 int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
1288 struct ip_tunnel_key *tun_key = &e->tun_info.key;
1289 int encap_size, err, ttl = 0;
1290 struct neighbour *n = NULL;
1291 struct flowi6 fl6 = {};
1294 encap_header = kzalloc(max_encap_size, GFP_KERNEL);
1298 switch (e->tunnel_type) {
1299 case MLX5_HEADER_TYPE_VXLAN:
1300 fl6.flowi6_proto = IPPROTO_UDP;
1301 fl6.fl6_dport = tun_key->tp_dst;
1308 fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
1309 fl6.daddr = tun_key->u.ipv6.dst;
1310 fl6.saddr = tun_key->u.ipv6.src;
1312 err = mlx5e_route_lookup_ipv6(priv, mirred_dev, out_dev,
1317 if (!(n->nud_state & NUD_VALID)) {
1318 pr_warn("%s: can't offload, neighbour to %pI6 invalid\n", __func__, &fl6.daddr);
1324 e->out_dev = *out_dev;
1326 neigh_ha_snapshot(e->h_dest, n, *out_dev);
1328 switch (e->tunnel_type) {
1329 case MLX5_HEADER_TYPE_VXLAN:
1330 encap_size = gen_vxlan_header_ipv6(*out_dev, encap_header,
1333 &fl6.saddr, tun_key->tp_dst,
1334 tunnel_id_to_key32(tun_key->tun_id));
1341 err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
1342 encap_size, encap_header, &e->encap_id);
1346 kfree(encap_header);
1350 static int mlx5e_attach_encap(struct mlx5e_priv *priv,
1351 struct ip_tunnel_info *tun_info,
1352 struct net_device *mirred_dev,
1353 struct mlx5_esw_flow_attr *attr)
1355 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1356 struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
1357 struct mlx5e_priv *up_priv = netdev_priv(up_dev);
1358 unsigned short family = ip_tunnel_info_af(tun_info);
1359 struct ip_tunnel_key *key = &tun_info->key;
1360 struct mlx5_encap_entry *e;
1361 struct net_device *out_dev;
1362 int tunnel_type, err = -EOPNOTSUPP;
1366 /* udp dst port must be set */
1367 if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst)))
1368 goto vxlan_encap_offload_err;
1370 /* setting udp src port isn't supported */
1371 if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) {
1372 vxlan_encap_offload_err:
1373 netdev_warn(priv->netdev,
1374 "must set udp dst port and not set udp src port\n");
1378 if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->tp_dst)) &&
1379 MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
1380 tunnel_type = MLX5_HEADER_TYPE_VXLAN;
1382 netdev_warn(priv->netdev,
1383 "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst));
1387 hash_key = hash_encap_info(key);
1389 hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
1390 encap_hlist, hash_key) {
1391 if (!cmp_encap_info(&e->tun_info.key, key)) {
1402 e = kzalloc(sizeof(*e), GFP_KERNEL);
1406 e->tun_info = *tun_info;
1407 e->tunnel_type = tunnel_type;
1408 INIT_LIST_HEAD(&e->flows);
1410 if (family == AF_INET)
1411 err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev);
1412 else if (family == AF_INET6)
1413 err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e, &out_dev);
1419 hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
1428 static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
1429 struct mlx5e_tc_flow_parse_attr *parse_attr,
1430 struct mlx5e_tc_flow *flow)
1432 struct mlx5_esw_flow_attr *attr = flow->esw_attr;
1433 struct ip_tunnel_info *info = NULL;
1434 const struct tc_action *a;
1439 if (tc_no_actions(exts))
1442 memset(attr, 0, sizeof(*attr));
1443 attr->in_rep = priv->ppriv;
1445 tcf_exts_to_list(exts, &actions);
1446 list_for_each_entry(a, &actions, list) {
1447 if (is_tcf_gact_shot(a)) {
1448 attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
1449 MLX5_FLOW_CONTEXT_ACTION_COUNT;
1453 if (is_tcf_pedit(a)) {
1454 err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB,
1459 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1463 if (is_tcf_mirred_egress_redirect(a)) {
1464 int ifindex = tcf_mirred_ifindex(a);
1465 struct net_device *out_dev;
1466 struct mlx5e_priv *out_priv;
1468 out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex);
1470 if (switchdev_port_same_parent_id(priv->netdev,
1472 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1473 MLX5_FLOW_CONTEXT_ACTION_COUNT;
1474 out_priv = netdev_priv(out_dev);
1475 attr->out_rep = out_priv->ppriv;
1477 err = mlx5e_attach_encap(priv, info,
1481 list_add(&flow->encap, &attr->encap->flows);
1482 attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
1483 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1484 MLX5_FLOW_CONTEXT_ACTION_COUNT;
1485 out_priv = netdev_priv(attr->encap->out_dev);
1486 attr->out_rep = out_priv->ppriv;
1488 pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
1489 priv->netdev->name, out_dev->name);
1495 if (is_tcf_tunnel_set(a)) {
1496 info = tcf_tunnel_info(a);
1504 if (is_tcf_vlan(a)) {
1505 if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
1506 attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
1507 } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
1508 if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q))
1511 attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
1512 attr->vlan = tcf_vlan_push_vid(a);
1513 } else { /* action is TCA_VLAN_ACT_MODIFY */
1519 if (is_tcf_tunnel_release(a)) {
1520 attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
1529 int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
1530 struct tc_cls_flower_offload *f)
1532 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1533 struct mlx5e_tc_flow_parse_attr *parse_attr;
1534 struct mlx5e_tc_table *tc = &priv->fs.tc;
1535 struct mlx5e_tc_flow *flow;
1536 int attr_size, err = 0;
1539 if (esw && esw->mode == SRIOV_OFFLOADS) {
1540 flow_flags = MLX5E_TC_FLOW_ESWITCH;
1541 attr_size = sizeof(struct mlx5_esw_flow_attr);
1543 flow_flags = MLX5E_TC_FLOW_NIC;
1544 attr_size = sizeof(struct mlx5_nic_flow_attr);
1547 flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
1548 parse_attr = mlx5_vzalloc(sizeof(*parse_attr));
1549 if (!parse_attr || !flow) {
1554 flow->cookie = f->cookie;
1555 flow->flags = flow_flags;
1557 err = parse_cls_flower(priv, flow, &parse_attr->spec, f);
1561 if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
1562 err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow);
1565 flow->rule = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow);
1567 err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow);
1570 flow->rule = mlx5e_tc_add_nic_flow(priv, parse_attr, flow);
1573 if (IS_ERR(flow->rule)) {
1574 err = PTR_ERR(flow->rule);
1578 err = rhashtable_insert_fast(&tc->ht, &flow->node,
1586 mlx5e_tc_del_flow(priv, flow);
1595 int mlx5e_delete_flower(struct mlx5e_priv *priv,
1596 struct tc_cls_flower_offload *f)
1598 struct mlx5e_tc_flow *flow;
1599 struct mlx5e_tc_table *tc = &priv->fs.tc;
1601 flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
1606 rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params);
1608 mlx5e_tc_del_flow(priv, flow);
1616 int mlx5e_stats_flower(struct mlx5e_priv *priv,
1617 struct tc_cls_flower_offload *f)
1619 struct mlx5e_tc_table *tc = &priv->fs.tc;
1620 struct mlx5e_tc_flow *flow;
1621 struct tc_action *a;
1622 struct mlx5_fc *counter;
1628 flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
1633 counter = mlx5_flow_rule_counter(flow->rule);
1637 mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
1641 tcf_exts_to_list(f->exts, &actions);
1642 list_for_each_entry(a, &actions, list)
1643 tcf_action_stats_update(a, bytes, packets, lastuse);
1650 static const struct rhashtable_params mlx5e_tc_flow_ht_params = {
1651 .head_offset = offsetof(struct mlx5e_tc_flow, node),
1652 .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
1653 .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
1654 .automatic_shrinking = true,
1657 int mlx5e_tc_init(struct mlx5e_priv *priv)
1659 struct mlx5e_tc_table *tc = &priv->fs.tc;
1661 tc->ht_params = mlx5e_tc_flow_ht_params;
1662 return rhashtable_init(&tc->ht, &tc->ht_params);
1665 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
1667 struct mlx5e_tc_flow *flow = ptr;
1668 struct mlx5e_priv *priv = arg;
1670 mlx5e_tc_del_flow(priv, flow);
1674 void mlx5e_tc_cleanup(struct mlx5e_priv *priv)
1676 struct mlx5e_tc_table *tc = &priv->fs.tc;
1678 rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, priv);
1680 if (!IS_ERR_OR_NULL(tc->t)) {
1681 mlx5_destroy_flow_table(tc->t);