2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/hash.h>
34 #include <linux/mlx5/fs.h>
36 #include <linux/ipv6.h>
39 #define ARFS_HASH_SHIFT BITS_PER_BYTE
40 #define ARFS_HASH_SIZE BIT(BITS_PER_BYTE)
43 struct mlx5e_flow_table ft;
44 struct mlx5_flow_handle *default_rule;
45 struct hlist_head rules_hash[ARFS_HASH_SIZE];
56 struct mlx5e_arfs_tables {
57 struct arfs_table arfs_tables[ARFS_NUM_TYPES];
58 /* Protect aRFS rules list */
60 struct list_head rules;
62 struct workqueue_struct *wq;
70 struct in6_addr src_ipv6;
74 struct in6_addr dst_ipv6;
81 struct mlx5e_priv *priv;
82 struct work_struct arfs_work;
83 struct mlx5_flow_handle *rule;
84 struct hlist_node hlist;
86 /* Flow ID passed to ndo_rx_flow_steer */
88 /* Filter ID returned by ndo_rx_flow_steer */
90 struct arfs_tuple tuple;
93 #define mlx5e_for_each_arfs_rule(hn, tmp, arfs_tables, i, j) \
94 for (i = 0; i < ARFS_NUM_TYPES; i++) \
95 mlx5e_for_each_hash_arfs_rule(hn, tmp, arfs_tables[i].rules_hash, j)
97 #define mlx5e_for_each_hash_arfs_rule(hn, tmp, hash, j) \
98 for (j = 0; j < ARFS_HASH_SIZE; j++) \
99 hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist)
101 static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type)
105 return MLX5E_TT_IPV4_TCP;
107 return MLX5E_TT_IPV4_UDP;
109 return MLX5E_TT_IPV6_TCP;
111 return MLX5E_TT_IPV6_UDP;
117 static int arfs_disable(struct mlx5e_priv *priv)
121 for (i = 0; i < ARFS_NUM_TYPES; i++) {
122 /* Modify ttc rules destination back to their default */
123 err = mlx5e_ttc_fwd_default_dest(priv, arfs_get_tt(i));
125 netdev_err(priv->netdev,
126 "%s: modify ttc[%d] default destination failed, err(%d)\n",
127 __func__, arfs_get_tt(i), err);
134 static void arfs_del_rules(struct mlx5e_priv *priv);
136 int mlx5e_arfs_disable(struct mlx5e_priv *priv)
138 arfs_del_rules(priv);
140 return arfs_disable(priv);
143 int mlx5e_arfs_enable(struct mlx5e_priv *priv)
145 struct mlx5_flow_destination dest = {};
148 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
149 for (i = 0; i < ARFS_NUM_TYPES; i++) {
150 dest.ft = priv->fs.arfs->arfs_tables[i].ft.t;
151 /* Modify ttc rules destination to point on the aRFS FTs */
152 err = mlx5e_ttc_fwd_dest(priv, arfs_get_tt(i), &dest);
154 netdev_err(priv->netdev,
155 "%s: modify ttc[%d] dest to arfs, failed err(%d)\n",
156 __func__, arfs_get_tt(i), err);
164 static void arfs_destroy_table(struct arfs_table *arfs_t)
166 mlx5_del_flow_rules(arfs_t->default_rule);
167 mlx5e_destroy_flow_table(&arfs_t->ft);
170 static void _mlx5e_cleanup_tables(struct mlx5e_priv *priv)
174 arfs_del_rules(priv);
175 destroy_workqueue(priv->fs.arfs->wq);
176 for (i = 0; i < ARFS_NUM_TYPES; i++) {
177 if (!IS_ERR_OR_NULL(priv->fs.arfs->arfs_tables[i].ft.t))
178 arfs_destroy_table(&priv->fs.arfs->arfs_tables[i]);
182 void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv)
184 if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
187 _mlx5e_cleanup_tables(priv);
188 kvfree(priv->fs.arfs);
191 static int arfs_add_default_rule(struct mlx5e_priv *priv,
194 struct arfs_table *arfs_t = &priv->fs.arfs->arfs_tables[type];
195 struct mlx5e_tir *tir = priv->indir_tir;
196 struct mlx5_flow_destination dest = {};
197 MLX5_DECLARE_FLOW_ACT(flow_act);
198 enum mlx5e_traffic_types tt;
201 dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
202 tt = arfs_get_tt(type);
204 netdev_err(priv->netdev, "%s: bad arfs_type: %d\n",
209 /* FIXME: Must use mlx5e_ttc_get_default_dest(),
210 * but can't since TTC default is not setup yet !
212 dest.tir_num = tir[tt].tirn;
213 arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, NULL,
216 if (IS_ERR(arfs_t->default_rule)) {
217 err = PTR_ERR(arfs_t->default_rule);
218 arfs_t->default_rule = NULL;
219 netdev_err(priv->netdev, "%s: add rule failed, arfs type=%d\n",
226 #define MLX5E_ARFS_NUM_GROUPS 2
227 #define MLX5E_ARFS_GROUP1_SIZE (BIT(16) - 1)
228 #define MLX5E_ARFS_GROUP2_SIZE BIT(0)
229 #define MLX5E_ARFS_TABLE_SIZE (MLX5E_ARFS_GROUP1_SIZE +\
230 MLX5E_ARFS_GROUP2_SIZE)
231 static int arfs_create_groups(struct mlx5e_flow_table *ft,
234 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
235 void *outer_headers_c;
241 ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS,
242 sizeof(*ft->g), GFP_KERNEL);
243 in = kvzalloc(inlen, GFP_KERNEL);
250 mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
251 outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc,
253 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype);
257 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport);
258 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport);
262 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport);
263 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_sport);
273 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
274 src_ipv4_src_ipv6.ipv4_layout.ipv4);
275 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
276 dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
280 memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
281 src_ipv4_src_ipv6.ipv6_layout.ipv6),
283 memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
284 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
292 MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
293 MLX5_SET_CFG(in, start_flow_index, ix);
294 ix += MLX5E_ARFS_GROUP1_SIZE;
295 MLX5_SET_CFG(in, end_flow_index, ix - 1);
296 ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
297 if (IS_ERR(ft->g[ft->num_groups]))
301 memset(in, 0, inlen);
302 MLX5_SET_CFG(in, start_flow_index, ix);
303 ix += MLX5E_ARFS_GROUP2_SIZE;
304 MLX5_SET_CFG(in, end_flow_index, ix - 1);
305 ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
306 if (IS_ERR(ft->g[ft->num_groups]))
314 err = PTR_ERR(ft->g[ft->num_groups]);
315 ft->g[ft->num_groups] = NULL;
322 static int arfs_create_table(struct mlx5e_priv *priv,
325 struct mlx5e_arfs_tables *arfs = priv->fs.arfs;
326 struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft;
327 struct mlx5_flow_table_attr ft_attr = {};
332 ft_attr.max_fte = MLX5E_ARFS_TABLE_SIZE;
333 ft_attr.level = MLX5E_ARFS_FT_LEVEL;
334 ft_attr.prio = MLX5E_NIC_PRIO;
336 ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
338 err = PTR_ERR(ft->t);
343 err = arfs_create_groups(ft, type);
347 err = arfs_add_default_rule(priv, type);
353 mlx5e_destroy_flow_table(ft);
357 int mlx5e_arfs_create_tables(struct mlx5e_priv *priv)
362 if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
365 priv->fs.arfs = kvzalloc(sizeof(*priv->fs.arfs), GFP_KERNEL);
369 spin_lock_init(&priv->fs.arfs->arfs_lock);
370 INIT_LIST_HEAD(&priv->fs.arfs->rules);
371 priv->fs.arfs->wq = create_singlethread_workqueue("mlx5e_arfs");
372 if (!priv->fs.arfs->wq)
375 for (i = 0; i < ARFS_NUM_TYPES; i++) {
376 err = arfs_create_table(priv, i);
383 _mlx5e_cleanup_tables(priv);
385 kvfree(priv->fs.arfs);
389 #define MLX5E_ARFS_EXPIRY_QUOTA 60
391 static void arfs_may_expire_flow(struct mlx5e_priv *priv)
393 struct arfs_rule *arfs_rule;
394 struct hlist_node *htmp;
395 HLIST_HEAD(del_list);
400 spin_lock_bh(&priv->fs.arfs->arfs_lock);
401 mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs->arfs_tables, i, j) {
402 if (!work_pending(&arfs_rule->arfs_work) &&
403 rps_may_expire_flow(priv->netdev,
404 arfs_rule->rxq, arfs_rule->flow_id,
405 arfs_rule->filter_id)) {
406 hlist_del_init(&arfs_rule->hlist);
407 hlist_add_head(&arfs_rule->hlist, &del_list);
408 if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA)
412 spin_unlock_bh(&priv->fs.arfs->arfs_lock);
413 hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) {
415 mlx5_del_flow_rules(arfs_rule->rule);
416 hlist_del(&arfs_rule->hlist);
421 static void arfs_del_rules(struct mlx5e_priv *priv)
423 struct hlist_node *htmp;
424 struct arfs_rule *rule;
425 HLIST_HEAD(del_list);
429 spin_lock_bh(&priv->fs.arfs->arfs_lock);
430 mlx5e_for_each_arfs_rule(rule, htmp, priv->fs.arfs->arfs_tables, i, j) {
431 hlist_del_init(&rule->hlist);
432 hlist_add_head(&rule->hlist, &del_list);
434 spin_unlock_bh(&priv->fs.arfs->arfs_lock);
436 hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) {
437 cancel_work_sync(&rule->arfs_work);
439 mlx5_del_flow_rules(rule->rule);
440 hlist_del(&rule->hlist);
445 static struct hlist_head *
446 arfs_hash_bucket(struct arfs_table *arfs_t, __be16 src_port,
452 l = (__force unsigned long)src_port |
453 ((__force unsigned long)dst_port << 2);
455 bucket_idx = hash_long(l, ARFS_HASH_SHIFT);
457 return &arfs_t->rules_hash[bucket_idx];
460 static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs,
461 u8 ip_proto, __be16 etype)
463 if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_TCP)
464 return &arfs->arfs_tables[ARFS_IPV4_TCP];
465 if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_UDP)
466 return &arfs->arfs_tables[ARFS_IPV4_UDP];
467 if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_TCP)
468 return &arfs->arfs_tables[ARFS_IPV6_TCP];
469 if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_UDP)
470 return &arfs->arfs_tables[ARFS_IPV6_UDP];
475 static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
476 struct arfs_rule *arfs_rule)
478 struct mlx5e_arfs_tables *arfs = priv->fs.arfs;
479 struct arfs_tuple *tuple = &arfs_rule->tuple;
480 struct mlx5_flow_handle *rule = NULL;
481 struct mlx5_flow_destination dest = {};
482 MLX5_DECLARE_FLOW_ACT(flow_act);
483 struct arfs_table *arfs_table;
484 struct mlx5_flow_spec *spec;
485 struct mlx5_flow_table *ft;
488 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
493 spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
494 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
495 outer_headers.ethertype);
496 MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype,
497 ntohs(tuple->etype));
498 arfs_table = arfs_get_table(arfs, tuple->ip_proto, tuple->etype);
504 ft = arfs_table->ft.t;
505 if (tuple->ip_proto == IPPROTO_TCP) {
506 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
507 outer_headers.tcp_dport);
508 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
509 outer_headers.tcp_sport);
510 MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_dport,
511 ntohs(tuple->dst_port));
512 MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_sport,
513 ntohs(tuple->src_port));
515 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
516 outer_headers.udp_dport);
517 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
518 outer_headers.udp_sport);
519 MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport,
520 ntohs(tuple->dst_port));
521 MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_sport,
522 ntohs(tuple->src_port));
524 if (tuple->etype == htons(ETH_P_IP)) {
525 memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
526 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4),
529 memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
530 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
533 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
534 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
535 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
536 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
538 memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
539 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
542 memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
543 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
546 memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
547 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
550 memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
551 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
555 dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
556 dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn;
557 rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
560 priv->channel_stats[arfs_rule->rxq].rq.arfs_err++;
562 "%s: add rule(filter id=%d, rq idx=%d, ip proto=0x%x) failed,err=%d\n",
563 __func__, arfs_rule->filter_id, arfs_rule->rxq,
564 tuple->ip_proto, err);
569 return err ? ERR_PTR(err) : rule;
572 static void arfs_modify_rule_rq(struct mlx5e_priv *priv,
573 struct mlx5_flow_handle *rule, u16 rxq)
575 struct mlx5_flow_destination dst = {};
578 dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
579 dst.tir_num = priv->direct_tir[rxq].tirn;
580 err = mlx5_modify_rule_destination(rule, &dst, NULL);
582 netdev_warn(priv->netdev,
583 "Failed to modify aRFS rule destination to rq=%d\n", rxq);
586 static void arfs_handle_work(struct work_struct *work)
588 struct arfs_rule *arfs_rule = container_of(work,
591 struct mlx5e_priv *priv = arfs_rule->priv;
592 struct mlx5_flow_handle *rule;
594 mutex_lock(&priv->state_lock);
595 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
596 spin_lock_bh(&priv->fs.arfs->arfs_lock);
597 hlist_del(&arfs_rule->hlist);
598 spin_unlock_bh(&priv->fs.arfs->arfs_lock);
600 mutex_unlock(&priv->state_lock);
604 mutex_unlock(&priv->state_lock);
606 if (!arfs_rule->rule) {
607 rule = arfs_add_rule(priv, arfs_rule);
610 arfs_rule->rule = rule;
612 arfs_modify_rule_rq(priv, arfs_rule->rule,
616 arfs_may_expire_flow(priv);
619 static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv,
620 struct arfs_table *arfs_t,
621 const struct flow_keys *fk,
622 u16 rxq, u32 flow_id)
624 struct arfs_rule *rule;
625 struct arfs_tuple *tuple;
627 rule = kzalloc(sizeof(*rule), GFP_ATOMIC);
633 INIT_WORK(&rule->arfs_work, arfs_handle_work);
635 tuple = &rule->tuple;
636 tuple->etype = fk->basic.n_proto;
637 tuple->ip_proto = fk->basic.ip_proto;
638 if (tuple->etype == htons(ETH_P_IP)) {
639 tuple->src_ipv4 = fk->addrs.v4addrs.src;
640 tuple->dst_ipv4 = fk->addrs.v4addrs.dst;
642 memcpy(&tuple->src_ipv6, &fk->addrs.v6addrs.src,
643 sizeof(struct in6_addr));
644 memcpy(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst,
645 sizeof(struct in6_addr));
647 tuple->src_port = fk->ports.src;
648 tuple->dst_port = fk->ports.dst;
650 rule->flow_id = flow_id;
651 rule->filter_id = priv->fs.arfs->last_filter_id++ % RPS_NO_FILTER;
653 hlist_add_head(&rule->hlist,
654 arfs_hash_bucket(arfs_t, tuple->src_port,
659 static bool arfs_cmp(const struct arfs_tuple *tuple, const struct flow_keys *fk)
661 if (tuple->src_port != fk->ports.src || tuple->dst_port != fk->ports.dst)
663 if (tuple->etype != fk->basic.n_proto)
665 if (tuple->etype == htons(ETH_P_IP))
666 return tuple->src_ipv4 == fk->addrs.v4addrs.src &&
667 tuple->dst_ipv4 == fk->addrs.v4addrs.dst;
668 if (tuple->etype == htons(ETH_P_IPV6))
669 return !memcmp(&tuple->src_ipv6, &fk->addrs.v6addrs.src,
670 sizeof(struct in6_addr)) &&
671 !memcmp(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst,
672 sizeof(struct in6_addr));
676 static struct arfs_rule *arfs_find_rule(struct arfs_table *arfs_t,
677 const struct flow_keys *fk)
679 struct arfs_rule *arfs_rule;
680 struct hlist_head *head;
682 head = arfs_hash_bucket(arfs_t, fk->ports.src, fk->ports.dst);
683 hlist_for_each_entry(arfs_rule, head, hlist) {
684 if (arfs_cmp(&arfs_rule->tuple, fk))
691 int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
692 u16 rxq_index, u32 flow_id)
694 struct mlx5e_priv *priv = netdev_priv(dev);
695 struct mlx5e_arfs_tables *arfs = priv->fs.arfs;
696 struct arfs_table *arfs_t;
697 struct arfs_rule *arfs_rule;
700 if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
701 return -EPROTONOSUPPORT;
703 if (fk.basic.n_proto != htons(ETH_P_IP) &&
704 fk.basic.n_proto != htons(ETH_P_IPV6))
705 return -EPROTONOSUPPORT;
707 if (skb->encapsulation)
708 return -EPROTONOSUPPORT;
710 arfs_t = arfs_get_table(arfs, fk.basic.ip_proto, fk.basic.n_proto);
712 return -EPROTONOSUPPORT;
714 spin_lock_bh(&arfs->arfs_lock);
715 arfs_rule = arfs_find_rule(arfs_t, &fk);
717 if (arfs_rule->rxq == rxq_index) {
718 spin_unlock_bh(&arfs->arfs_lock);
719 return arfs_rule->filter_id;
721 arfs_rule->rxq = rxq_index;
723 arfs_rule = arfs_alloc_rule(priv, arfs_t, &fk, rxq_index, flow_id);
725 spin_unlock_bh(&arfs->arfs_lock);
729 queue_work(priv->fs.arfs->wq, &arfs_rule->arfs_work);
730 spin_unlock_bh(&arfs->arfs_lock);
731 return arfs_rule->filter_id;