2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/hash.h>
34 #include <linux/mlx5/fs.h>
36 #include <linux/ipv6.h>
44 struct in6_addr src_ipv6;
48 struct in6_addr dst_ipv6;
55 struct mlx5e_priv *priv;
56 struct work_struct arfs_work;
57 struct mlx5_flow_handle *rule;
58 struct hlist_node hlist;
60 /* Flow ID passed to ndo_rx_flow_steer */
62 /* Filter ID returned by ndo_rx_flow_steer */
64 struct arfs_tuple tuple;
67 #define mlx5e_for_each_arfs_rule(hn, tmp, arfs_tables, i, j) \
68 for (i = 0; i < ARFS_NUM_TYPES; i++) \
69 mlx5e_for_each_hash_arfs_rule(hn, tmp, arfs_tables[i].rules_hash, j)
71 #define mlx5e_for_each_hash_arfs_rule(hn, tmp, hash, j) \
72 for (j = 0; j < ARFS_HASH_SIZE; j++) \
73 hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist)
75 static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type)
79 return MLX5E_TT_IPV4_TCP;
81 return MLX5E_TT_IPV4_UDP;
83 return MLX5E_TT_IPV6_TCP;
85 return MLX5E_TT_IPV6_UDP;
91 static int arfs_disable(struct mlx5e_priv *priv)
93 struct mlx5_flow_destination dest = {};
94 struct mlx5e_tir *tir = priv->indir_tir;
99 dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
100 for (i = 0; i < ARFS_NUM_TYPES; i++) {
101 dest.tir_num = tir[i].tirn;
103 /* Modify ttc rules destination to bypass the aRFS tables*/
104 err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt],
107 netdev_err(priv->netdev,
108 "%s: modify ttc destination failed\n",
116 static void arfs_del_rules(struct mlx5e_priv *priv);
118 int mlx5e_arfs_disable(struct mlx5e_priv *priv)
120 arfs_del_rules(priv);
122 return arfs_disable(priv);
125 int mlx5e_arfs_enable(struct mlx5e_priv *priv)
127 struct mlx5_flow_destination dest = {};
132 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
133 for (i = 0; i < ARFS_NUM_TYPES; i++) {
134 dest.ft = priv->fs.arfs.arfs_tables[i].ft.t;
136 /* Modify ttc rules destination to point on the aRFS FTs */
137 err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt],
140 netdev_err(priv->netdev,
141 "%s: modify ttc destination failed err=%d\n",
150 static void arfs_destroy_table(struct arfs_table *arfs_t)
152 mlx5_del_flow_rules(arfs_t->default_rule);
153 mlx5e_destroy_flow_table(&arfs_t->ft);
156 void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv)
160 if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
163 arfs_del_rules(priv);
164 destroy_workqueue(priv->fs.arfs.wq);
165 for (i = 0; i < ARFS_NUM_TYPES; i++) {
166 if (!IS_ERR_OR_NULL(priv->fs.arfs.arfs_tables[i].ft.t))
167 arfs_destroy_table(&priv->fs.arfs.arfs_tables[i]);
171 static int arfs_add_default_rule(struct mlx5e_priv *priv,
174 struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type];
175 struct mlx5e_tir *tir = priv->indir_tir;
176 struct mlx5_flow_destination dest = {};
177 MLX5_DECLARE_FLOW_ACT(flow_act);
178 struct mlx5_flow_spec *spec;
179 enum mlx5e_traffic_types tt;
182 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
188 dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
189 tt = arfs_get_tt(type);
191 netdev_err(priv->netdev, "%s: bad arfs_type: %d\n",
197 dest.tir_num = tir[tt].tirn;
199 arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, spec,
202 if (IS_ERR(arfs_t->default_rule)) {
203 err = PTR_ERR(arfs_t->default_rule);
204 arfs_t->default_rule = NULL;
205 netdev_err(priv->netdev, "%s: add rule failed, arfs type=%d\n",
213 #define MLX5E_ARFS_NUM_GROUPS 2
214 #define MLX5E_ARFS_GROUP1_SIZE (BIT(16) - 1)
215 #define MLX5E_ARFS_GROUP2_SIZE BIT(0)
216 #define MLX5E_ARFS_TABLE_SIZE (MLX5E_ARFS_GROUP1_SIZE +\
217 MLX5E_ARFS_GROUP2_SIZE)
218 static int arfs_create_groups(struct mlx5e_flow_table *ft,
221 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
222 void *outer_headers_c;
228 ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS,
229 sizeof(*ft->g), GFP_KERNEL);
230 in = kvzalloc(inlen, GFP_KERNEL);
237 mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
238 outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc,
240 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype);
244 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport);
245 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport);
249 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport);
250 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_sport);
260 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
261 src_ipv4_src_ipv6.ipv4_layout.ipv4);
262 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
263 dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
267 memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
268 src_ipv4_src_ipv6.ipv6_layout.ipv6),
270 memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
271 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
279 MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
280 MLX5_SET_CFG(in, start_flow_index, ix);
281 ix += MLX5E_ARFS_GROUP1_SIZE;
282 MLX5_SET_CFG(in, end_flow_index, ix - 1);
283 ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
284 if (IS_ERR(ft->g[ft->num_groups]))
288 memset(in, 0, inlen);
289 MLX5_SET_CFG(in, start_flow_index, ix);
290 ix += MLX5E_ARFS_GROUP2_SIZE;
291 MLX5_SET_CFG(in, end_flow_index, ix - 1);
292 ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
293 if (IS_ERR(ft->g[ft->num_groups]))
301 err = PTR_ERR(ft->g[ft->num_groups]);
302 ft->g[ft->num_groups] = NULL;
309 static int arfs_create_table(struct mlx5e_priv *priv,
312 struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
313 struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft;
314 struct mlx5_flow_table_attr ft_attr = {};
319 ft_attr.max_fte = MLX5E_ARFS_TABLE_SIZE;
320 ft_attr.level = MLX5E_ARFS_FT_LEVEL;
321 ft_attr.prio = MLX5E_NIC_PRIO;
323 ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
325 err = PTR_ERR(ft->t);
330 err = arfs_create_groups(ft, type);
334 err = arfs_add_default_rule(priv, type);
340 mlx5e_destroy_flow_table(ft);
344 int mlx5e_arfs_create_tables(struct mlx5e_priv *priv)
349 if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
352 spin_lock_init(&priv->fs.arfs.arfs_lock);
353 INIT_LIST_HEAD(&priv->fs.arfs.rules);
354 priv->fs.arfs.wq = create_singlethread_workqueue("mlx5e_arfs");
355 if (!priv->fs.arfs.wq)
358 for (i = 0; i < ARFS_NUM_TYPES; i++) {
359 err = arfs_create_table(priv, i);
365 mlx5e_arfs_destroy_tables(priv);
369 #define MLX5E_ARFS_EXPIRY_QUOTA 60
371 static void arfs_may_expire_flow(struct mlx5e_priv *priv)
373 struct arfs_rule *arfs_rule;
374 struct hlist_node *htmp;
379 HLIST_HEAD(del_list);
380 spin_lock_bh(&priv->fs.arfs.arfs_lock);
381 mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs.arfs_tables, i, j) {
382 if (!work_pending(&arfs_rule->arfs_work) &&
383 rps_may_expire_flow(priv->netdev,
384 arfs_rule->rxq, arfs_rule->flow_id,
385 arfs_rule->filter_id)) {
386 hlist_del_init(&arfs_rule->hlist);
387 hlist_add_head(&arfs_rule->hlist, &del_list);
388 if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA)
392 spin_unlock_bh(&priv->fs.arfs.arfs_lock);
393 hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) {
395 mlx5_del_flow_rules(arfs_rule->rule);
396 hlist_del(&arfs_rule->hlist);
401 static void arfs_del_rules(struct mlx5e_priv *priv)
403 struct hlist_node *htmp;
404 struct arfs_rule *rule;
408 HLIST_HEAD(del_list);
409 spin_lock_bh(&priv->fs.arfs.arfs_lock);
410 mlx5e_for_each_arfs_rule(rule, htmp, priv->fs.arfs.arfs_tables, i, j) {
411 hlist_del_init(&rule->hlist);
412 hlist_add_head(&rule->hlist, &del_list);
414 spin_unlock_bh(&priv->fs.arfs.arfs_lock);
416 hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) {
417 cancel_work_sync(&rule->arfs_work);
419 mlx5_del_flow_rules(rule->rule);
420 hlist_del(&rule->hlist);
425 static struct hlist_head *
426 arfs_hash_bucket(struct arfs_table *arfs_t, __be16 src_port,
432 l = (__force unsigned long)src_port |
433 ((__force unsigned long)dst_port << 2);
435 bucket_idx = hash_long(l, ARFS_HASH_SHIFT);
437 return &arfs_t->rules_hash[bucket_idx];
440 static u8 arfs_get_ip_proto(const struct sk_buff *skb)
442 return (skb->protocol == htons(ETH_P_IP)) ?
443 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
446 static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs,
447 u8 ip_proto, __be16 etype)
449 if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_TCP)
450 return &arfs->arfs_tables[ARFS_IPV4_TCP];
451 if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_UDP)
452 return &arfs->arfs_tables[ARFS_IPV4_UDP];
453 if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_TCP)
454 return &arfs->arfs_tables[ARFS_IPV6_TCP];
455 if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_UDP)
456 return &arfs->arfs_tables[ARFS_IPV6_UDP];
461 static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
462 struct arfs_rule *arfs_rule)
464 struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
465 struct arfs_tuple *tuple = &arfs_rule->tuple;
466 struct mlx5_flow_handle *rule = NULL;
467 struct mlx5_flow_destination dest = {};
468 MLX5_DECLARE_FLOW_ACT(flow_act);
469 struct arfs_table *arfs_table;
470 struct mlx5_flow_spec *spec;
471 struct mlx5_flow_table *ft;
474 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
479 spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
480 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
481 outer_headers.ethertype);
482 MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype,
483 ntohs(tuple->etype));
484 arfs_table = arfs_get_table(arfs, tuple->ip_proto, tuple->etype);
490 ft = arfs_table->ft.t;
491 if (tuple->ip_proto == IPPROTO_TCP) {
492 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
493 outer_headers.tcp_dport);
494 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
495 outer_headers.tcp_sport);
496 MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_dport,
497 ntohs(tuple->dst_port));
498 MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_sport,
499 ntohs(tuple->src_port));
501 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
502 outer_headers.udp_dport);
503 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
504 outer_headers.udp_sport);
505 MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport,
506 ntohs(tuple->dst_port));
507 MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_sport,
508 ntohs(tuple->src_port));
510 if (tuple->etype == htons(ETH_P_IP)) {
511 memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
512 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4),
515 memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
516 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
519 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
520 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
521 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
522 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
524 memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
525 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
528 memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
529 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
532 memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
533 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
536 memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
537 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
541 dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
542 dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn;
543 rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
546 priv->channel_stats[arfs_rule->rxq].rq.arfs_err++;
548 "%s: add rule(filter id=%d, rq idx=%d, ip proto=0x%x) failed,err=%d\n",
549 __func__, arfs_rule->filter_id, arfs_rule->rxq,
550 tuple->ip_proto, err);
555 return err ? ERR_PTR(err) : rule;
558 static void arfs_modify_rule_rq(struct mlx5e_priv *priv,
559 struct mlx5_flow_handle *rule, u16 rxq)
561 struct mlx5_flow_destination dst = {};
564 dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
565 dst.tir_num = priv->direct_tir[rxq].tirn;
566 err = mlx5_modify_rule_destination(rule, &dst, NULL);
568 netdev_warn(priv->netdev,
569 "Failed to modify aRFS rule destination to rq=%d\n", rxq);
572 static void arfs_handle_work(struct work_struct *work)
574 struct arfs_rule *arfs_rule = container_of(work,
577 struct mlx5e_priv *priv = arfs_rule->priv;
578 struct mlx5_flow_handle *rule;
580 mutex_lock(&priv->state_lock);
581 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
582 spin_lock_bh(&priv->fs.arfs.arfs_lock);
583 hlist_del(&arfs_rule->hlist);
584 spin_unlock_bh(&priv->fs.arfs.arfs_lock);
586 mutex_unlock(&priv->state_lock);
590 mutex_unlock(&priv->state_lock);
592 if (!arfs_rule->rule) {
593 rule = arfs_add_rule(priv, arfs_rule);
596 arfs_rule->rule = rule;
598 arfs_modify_rule_rq(priv, arfs_rule->rule,
602 arfs_may_expire_flow(priv);
605 /* return L4 destination port from ip4/6 packets */
606 static __be16 arfs_get_dst_port(const struct sk_buff *skb)
608 char *transport_header;
610 transport_header = skb_transport_header(skb);
611 if (arfs_get_ip_proto(skb) == IPPROTO_TCP)
612 return ((struct tcphdr *)transport_header)->dest;
613 return ((struct udphdr *)transport_header)->dest;
616 /* return L4 source port from ip4/6 packets */
617 static __be16 arfs_get_src_port(const struct sk_buff *skb)
619 char *transport_header;
621 transport_header = skb_transport_header(skb);
622 if (arfs_get_ip_proto(skb) == IPPROTO_TCP)
623 return ((struct tcphdr *)transport_header)->source;
624 return ((struct udphdr *)transport_header)->source;
627 static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv,
628 struct arfs_table *arfs_t,
629 const struct sk_buff *skb,
630 u16 rxq, u32 flow_id)
632 struct arfs_rule *rule;
633 struct arfs_tuple *tuple;
635 rule = kzalloc(sizeof(*rule), GFP_ATOMIC);
641 INIT_WORK(&rule->arfs_work, arfs_handle_work);
643 tuple = &rule->tuple;
644 tuple->etype = skb->protocol;
645 if (tuple->etype == htons(ETH_P_IP)) {
646 tuple->src_ipv4 = ip_hdr(skb)->saddr;
647 tuple->dst_ipv4 = ip_hdr(skb)->daddr;
649 memcpy(&tuple->src_ipv6, &ipv6_hdr(skb)->saddr,
650 sizeof(struct in6_addr));
651 memcpy(&tuple->dst_ipv6, &ipv6_hdr(skb)->daddr,
652 sizeof(struct in6_addr));
654 tuple->ip_proto = arfs_get_ip_proto(skb);
655 tuple->src_port = arfs_get_src_port(skb);
656 tuple->dst_port = arfs_get_dst_port(skb);
658 rule->flow_id = flow_id;
659 rule->filter_id = priv->fs.arfs.last_filter_id++ % RPS_NO_FILTER;
661 hlist_add_head(&rule->hlist,
662 arfs_hash_bucket(arfs_t, tuple->src_port,
667 static bool arfs_cmp_ips(struct arfs_tuple *tuple,
668 const struct sk_buff *skb)
670 if (tuple->etype == htons(ETH_P_IP) &&
671 tuple->src_ipv4 == ip_hdr(skb)->saddr &&
672 tuple->dst_ipv4 == ip_hdr(skb)->daddr)
674 if (tuple->etype == htons(ETH_P_IPV6) &&
675 (!memcmp(&tuple->src_ipv6, &ipv6_hdr(skb)->saddr,
676 sizeof(struct in6_addr))) &&
677 (!memcmp(&tuple->dst_ipv6, &ipv6_hdr(skb)->daddr,
678 sizeof(struct in6_addr))))
683 static struct arfs_rule *arfs_find_rule(struct arfs_table *arfs_t,
684 const struct sk_buff *skb)
686 struct arfs_rule *arfs_rule;
687 struct hlist_head *head;
688 __be16 src_port = arfs_get_src_port(skb);
689 __be16 dst_port = arfs_get_dst_port(skb);
691 head = arfs_hash_bucket(arfs_t, src_port, dst_port);
692 hlist_for_each_entry(arfs_rule, head, hlist) {
693 if (arfs_rule->tuple.src_port == src_port &&
694 arfs_rule->tuple.dst_port == dst_port &&
695 arfs_cmp_ips(&arfs_rule->tuple, skb)) {
703 int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
704 u16 rxq_index, u32 flow_id)
706 struct mlx5e_priv *priv = netdev_priv(dev);
707 struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
708 struct arfs_table *arfs_t;
709 struct arfs_rule *arfs_rule;
711 if (skb->protocol != htons(ETH_P_IP) &&
712 skb->protocol != htons(ETH_P_IPV6))
713 return -EPROTONOSUPPORT;
715 if (skb->encapsulation)
716 return -EPROTONOSUPPORT;
718 arfs_t = arfs_get_table(arfs, arfs_get_ip_proto(skb), skb->protocol);
720 return -EPROTONOSUPPORT;
722 spin_lock_bh(&arfs->arfs_lock);
723 arfs_rule = arfs_find_rule(arfs_t, skb);
725 if (arfs_rule->rxq == rxq_index) {
726 spin_unlock_bh(&arfs->arfs_lock);
727 return arfs_rule->filter_id;
729 arfs_rule->rxq = rxq_index;
731 arfs_rule = arfs_alloc_rule(priv, arfs_t, skb,
734 spin_unlock_bh(&arfs->arfs_lock);
738 queue_work(priv->fs.arfs.wq, &arfs_rule->arfs_work);
739 spin_unlock_bh(&arfs->arfs_lock);
740 return arfs_rule->filter_id;