2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/hash.h>
34 #include <linux/mlx5/fs.h>
36 #include <linux/ipv6.h>
44 struct in6_addr src_ipv6;
48 struct in6_addr dst_ipv6;
55 struct mlx5e_priv *priv;
56 struct work_struct arfs_work;
57 struct mlx5_flow_handle *rule;
58 struct hlist_node hlist;
60 /* Flow ID passed to ndo_rx_flow_steer */
62 /* Filter ID returned by ndo_rx_flow_steer */
64 struct arfs_tuple tuple;
67 #define mlx5e_for_each_arfs_rule(hn, tmp, arfs_tables, i, j) \
68 for (i = 0; i < ARFS_NUM_TYPES; i++) \
69 mlx5e_for_each_hash_arfs_rule(hn, tmp, arfs_tables[i].rules_hash, j)
71 #define mlx5e_for_each_hash_arfs_rule(hn, tmp, hash, j) \
72 for (j = 0; j < ARFS_HASH_SIZE; j++) \
73 hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist)
75 static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type)
79 return MLX5E_TT_IPV4_TCP;
81 return MLX5E_TT_IPV4_UDP;
83 return MLX5E_TT_IPV6_TCP;
85 return MLX5E_TT_IPV6_UDP;
91 static int arfs_disable(struct mlx5e_priv *priv)
95 for (i = 0; i < ARFS_NUM_TYPES; i++) {
96 /* Modify ttc rules destination back to their default */
97 err = mlx5e_ttc_fwd_default_dest(priv, arfs_get_tt(i));
99 netdev_err(priv->netdev,
100 "%s: modify ttc[%d] default destination failed, err(%d)\n",
101 __func__, arfs_get_tt(i), err);
108 static void arfs_del_rules(struct mlx5e_priv *priv);
110 int mlx5e_arfs_disable(struct mlx5e_priv *priv)
112 arfs_del_rules(priv);
114 return arfs_disable(priv);
117 int mlx5e_arfs_enable(struct mlx5e_priv *priv)
119 struct mlx5_flow_destination dest = {};
122 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
123 for (i = 0; i < ARFS_NUM_TYPES; i++) {
124 dest.ft = priv->fs.arfs.arfs_tables[i].ft.t;
125 /* Modify ttc rules destination to point on the aRFS FTs */
126 err = mlx5e_ttc_fwd_dest(priv, arfs_get_tt(i), &dest);
128 netdev_err(priv->netdev,
129 "%s: modify ttc[%d] dest to arfs, failed err(%d)\n",
130 __func__, arfs_get_tt(i), err);
138 static void arfs_destroy_table(struct arfs_table *arfs_t)
140 mlx5_del_flow_rules(arfs_t->default_rule);
141 mlx5e_destroy_flow_table(&arfs_t->ft);
144 void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv)
148 if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
151 arfs_del_rules(priv);
152 destroy_workqueue(priv->fs.arfs.wq);
153 for (i = 0; i < ARFS_NUM_TYPES; i++) {
154 if (!IS_ERR_OR_NULL(priv->fs.arfs.arfs_tables[i].ft.t))
155 arfs_destroy_table(&priv->fs.arfs.arfs_tables[i]);
159 static int arfs_add_default_rule(struct mlx5e_priv *priv,
162 struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type];
163 struct mlx5e_tir *tir = priv->indir_tir;
164 struct mlx5_flow_destination dest = {};
165 MLX5_DECLARE_FLOW_ACT(flow_act);
166 enum mlx5e_traffic_types tt;
169 dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
170 tt = arfs_get_tt(type);
172 netdev_err(priv->netdev, "%s: bad arfs_type: %d\n",
177 /* FIXME: Must use mlx5e_ttc_get_default_dest(),
178 * but can't since TTC default is not setup yet !
180 dest.tir_num = tir[tt].tirn;
181 arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, NULL,
184 if (IS_ERR(arfs_t->default_rule)) {
185 err = PTR_ERR(arfs_t->default_rule);
186 arfs_t->default_rule = NULL;
187 netdev_err(priv->netdev, "%s: add rule failed, arfs type=%d\n",
194 #define MLX5E_ARFS_NUM_GROUPS 2
195 #define MLX5E_ARFS_GROUP1_SIZE (BIT(16) - 1)
196 #define MLX5E_ARFS_GROUP2_SIZE BIT(0)
197 #define MLX5E_ARFS_TABLE_SIZE (MLX5E_ARFS_GROUP1_SIZE +\
198 MLX5E_ARFS_GROUP2_SIZE)
199 static int arfs_create_groups(struct mlx5e_flow_table *ft,
202 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
203 void *outer_headers_c;
209 ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS,
210 sizeof(*ft->g), GFP_KERNEL);
211 in = kvzalloc(inlen, GFP_KERNEL);
218 mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
219 outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc,
221 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype);
225 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport);
226 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport);
230 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport);
231 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_sport);
241 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
242 src_ipv4_src_ipv6.ipv4_layout.ipv4);
243 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
244 dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
248 memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
249 src_ipv4_src_ipv6.ipv6_layout.ipv6),
251 memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
252 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
260 MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
261 MLX5_SET_CFG(in, start_flow_index, ix);
262 ix += MLX5E_ARFS_GROUP1_SIZE;
263 MLX5_SET_CFG(in, end_flow_index, ix - 1);
264 ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
265 if (IS_ERR(ft->g[ft->num_groups]))
269 memset(in, 0, inlen);
270 MLX5_SET_CFG(in, start_flow_index, ix);
271 ix += MLX5E_ARFS_GROUP2_SIZE;
272 MLX5_SET_CFG(in, end_flow_index, ix - 1);
273 ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
274 if (IS_ERR(ft->g[ft->num_groups]))
282 err = PTR_ERR(ft->g[ft->num_groups]);
283 ft->g[ft->num_groups] = NULL;
290 static int arfs_create_table(struct mlx5e_priv *priv,
293 struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
294 struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft;
295 struct mlx5_flow_table_attr ft_attr = {};
300 ft_attr.max_fte = MLX5E_ARFS_TABLE_SIZE;
301 ft_attr.level = MLX5E_ARFS_FT_LEVEL;
302 ft_attr.prio = MLX5E_NIC_PRIO;
304 ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
306 err = PTR_ERR(ft->t);
311 err = arfs_create_groups(ft, type);
315 err = arfs_add_default_rule(priv, type);
321 mlx5e_destroy_flow_table(ft);
325 int mlx5e_arfs_create_tables(struct mlx5e_priv *priv)
330 if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
333 spin_lock_init(&priv->fs.arfs.arfs_lock);
334 INIT_LIST_HEAD(&priv->fs.arfs.rules);
335 priv->fs.arfs.wq = create_singlethread_workqueue("mlx5e_arfs");
336 if (!priv->fs.arfs.wq)
339 for (i = 0; i < ARFS_NUM_TYPES; i++) {
340 err = arfs_create_table(priv, i);
346 mlx5e_arfs_destroy_tables(priv);
350 #define MLX5E_ARFS_EXPIRY_QUOTA 60
352 static void arfs_may_expire_flow(struct mlx5e_priv *priv)
354 struct arfs_rule *arfs_rule;
355 struct hlist_node *htmp;
360 HLIST_HEAD(del_list);
361 spin_lock_bh(&priv->fs.arfs.arfs_lock);
362 mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs.arfs_tables, i, j) {
363 if (!work_pending(&arfs_rule->arfs_work) &&
364 rps_may_expire_flow(priv->netdev,
365 arfs_rule->rxq, arfs_rule->flow_id,
366 arfs_rule->filter_id)) {
367 hlist_del_init(&arfs_rule->hlist);
368 hlist_add_head(&arfs_rule->hlist, &del_list);
369 if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA)
373 spin_unlock_bh(&priv->fs.arfs.arfs_lock);
374 hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) {
376 mlx5_del_flow_rules(arfs_rule->rule);
377 hlist_del(&arfs_rule->hlist);
382 static void arfs_del_rules(struct mlx5e_priv *priv)
384 struct hlist_node *htmp;
385 struct arfs_rule *rule;
389 HLIST_HEAD(del_list);
390 spin_lock_bh(&priv->fs.arfs.arfs_lock);
391 mlx5e_for_each_arfs_rule(rule, htmp, priv->fs.arfs.arfs_tables, i, j) {
392 hlist_del_init(&rule->hlist);
393 hlist_add_head(&rule->hlist, &del_list);
395 spin_unlock_bh(&priv->fs.arfs.arfs_lock);
397 hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) {
398 cancel_work_sync(&rule->arfs_work);
400 mlx5_del_flow_rules(rule->rule);
401 hlist_del(&rule->hlist);
406 static struct hlist_head *
407 arfs_hash_bucket(struct arfs_table *arfs_t, __be16 src_port,
413 l = (__force unsigned long)src_port |
414 ((__force unsigned long)dst_port << 2);
416 bucket_idx = hash_long(l, ARFS_HASH_SHIFT);
418 return &arfs_t->rules_hash[bucket_idx];
421 static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs,
422 u8 ip_proto, __be16 etype)
424 if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_TCP)
425 return &arfs->arfs_tables[ARFS_IPV4_TCP];
426 if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_UDP)
427 return &arfs->arfs_tables[ARFS_IPV4_UDP];
428 if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_TCP)
429 return &arfs->arfs_tables[ARFS_IPV6_TCP];
430 if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_UDP)
431 return &arfs->arfs_tables[ARFS_IPV6_UDP];
436 static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
437 struct arfs_rule *arfs_rule)
439 struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
440 struct arfs_tuple *tuple = &arfs_rule->tuple;
441 struct mlx5_flow_handle *rule = NULL;
442 struct mlx5_flow_destination dest = {};
443 MLX5_DECLARE_FLOW_ACT(flow_act);
444 struct arfs_table *arfs_table;
445 struct mlx5_flow_spec *spec;
446 struct mlx5_flow_table *ft;
449 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
454 spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
455 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
456 outer_headers.ethertype);
457 MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype,
458 ntohs(tuple->etype));
459 arfs_table = arfs_get_table(arfs, tuple->ip_proto, tuple->etype);
465 ft = arfs_table->ft.t;
466 if (tuple->ip_proto == IPPROTO_TCP) {
467 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
468 outer_headers.tcp_dport);
469 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
470 outer_headers.tcp_sport);
471 MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_dport,
472 ntohs(tuple->dst_port));
473 MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_sport,
474 ntohs(tuple->src_port));
476 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
477 outer_headers.udp_dport);
478 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
479 outer_headers.udp_sport);
480 MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport,
481 ntohs(tuple->dst_port));
482 MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_sport,
483 ntohs(tuple->src_port));
485 if (tuple->etype == htons(ETH_P_IP)) {
486 memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
487 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4),
490 memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
491 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
494 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
495 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
496 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
497 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
499 memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
500 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
503 memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
504 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
507 memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
508 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
511 memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
512 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
516 dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
517 dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn;
518 rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
521 priv->channel_stats[arfs_rule->rxq].rq.arfs_err++;
523 "%s: add rule(filter id=%d, rq idx=%d, ip proto=0x%x) failed,err=%d\n",
524 __func__, arfs_rule->filter_id, arfs_rule->rxq,
525 tuple->ip_proto, err);
530 return err ? ERR_PTR(err) : rule;
533 static void arfs_modify_rule_rq(struct mlx5e_priv *priv,
534 struct mlx5_flow_handle *rule, u16 rxq)
536 struct mlx5_flow_destination dst = {};
539 dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
540 dst.tir_num = priv->direct_tir[rxq].tirn;
541 err = mlx5_modify_rule_destination(rule, &dst, NULL);
543 netdev_warn(priv->netdev,
544 "Failed to modify aRFS rule destination to rq=%d\n", rxq);
547 static void arfs_handle_work(struct work_struct *work)
549 struct arfs_rule *arfs_rule = container_of(work,
552 struct mlx5e_priv *priv = arfs_rule->priv;
553 struct mlx5_flow_handle *rule;
555 mutex_lock(&priv->state_lock);
556 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
557 spin_lock_bh(&priv->fs.arfs.arfs_lock);
558 hlist_del(&arfs_rule->hlist);
559 spin_unlock_bh(&priv->fs.arfs.arfs_lock);
561 mutex_unlock(&priv->state_lock);
565 mutex_unlock(&priv->state_lock);
567 if (!arfs_rule->rule) {
568 rule = arfs_add_rule(priv, arfs_rule);
571 arfs_rule->rule = rule;
573 arfs_modify_rule_rq(priv, arfs_rule->rule,
577 arfs_may_expire_flow(priv);
580 static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv,
581 struct arfs_table *arfs_t,
582 const struct flow_keys *fk,
583 u16 rxq, u32 flow_id)
585 struct arfs_rule *rule;
586 struct arfs_tuple *tuple;
588 rule = kzalloc(sizeof(*rule), GFP_ATOMIC);
594 INIT_WORK(&rule->arfs_work, arfs_handle_work);
596 tuple = &rule->tuple;
597 tuple->etype = fk->basic.n_proto;
598 tuple->ip_proto = fk->basic.ip_proto;
599 if (tuple->etype == htons(ETH_P_IP)) {
600 tuple->src_ipv4 = fk->addrs.v4addrs.src;
601 tuple->dst_ipv4 = fk->addrs.v4addrs.dst;
603 memcpy(&tuple->src_ipv6, &fk->addrs.v6addrs.src,
604 sizeof(struct in6_addr));
605 memcpy(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst,
606 sizeof(struct in6_addr));
608 tuple->src_port = fk->ports.src;
609 tuple->dst_port = fk->ports.dst;
611 rule->flow_id = flow_id;
612 rule->filter_id = priv->fs.arfs.last_filter_id++ % RPS_NO_FILTER;
614 hlist_add_head(&rule->hlist,
615 arfs_hash_bucket(arfs_t, tuple->src_port,
620 static bool arfs_cmp(const struct arfs_tuple *tuple, const struct flow_keys *fk)
622 if (tuple->src_port != fk->ports.src || tuple->dst_port != fk->ports.dst)
624 if (tuple->etype != fk->basic.n_proto)
626 if (tuple->etype == htons(ETH_P_IP))
627 return tuple->src_ipv4 == fk->addrs.v4addrs.src &&
628 tuple->dst_ipv4 == fk->addrs.v4addrs.dst;
629 if (tuple->etype == htons(ETH_P_IPV6))
630 return !memcmp(&tuple->src_ipv6, &fk->addrs.v6addrs.src,
631 sizeof(struct in6_addr)) &&
632 !memcmp(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst,
633 sizeof(struct in6_addr));
637 static struct arfs_rule *arfs_find_rule(struct arfs_table *arfs_t,
638 const struct flow_keys *fk)
640 struct arfs_rule *arfs_rule;
641 struct hlist_head *head;
643 head = arfs_hash_bucket(arfs_t, fk->ports.src, fk->ports.dst);
644 hlist_for_each_entry(arfs_rule, head, hlist) {
645 if (arfs_cmp(&arfs_rule->tuple, fk))
652 int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
653 u16 rxq_index, u32 flow_id)
655 struct mlx5e_priv *priv = netdev_priv(dev);
656 struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
657 struct arfs_table *arfs_t;
658 struct arfs_rule *arfs_rule;
661 if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
662 return -EPROTONOSUPPORT;
664 if (fk.basic.n_proto != htons(ETH_P_IP) &&
665 fk.basic.n_proto != htons(ETH_P_IPV6))
666 return -EPROTONOSUPPORT;
668 if (skb->encapsulation)
669 return -EPROTONOSUPPORT;
671 arfs_t = arfs_get_table(arfs, fk.basic.ip_proto, fk.basic.n_proto);
673 return -EPROTONOSUPPORT;
675 spin_lock_bh(&arfs->arfs_lock);
676 arfs_rule = arfs_find_rule(arfs_t, &fk);
678 if (arfs_rule->rxq == rxq_index) {
679 spin_unlock_bh(&arfs->arfs_lock);
680 return arfs_rule->filter_id;
682 arfs_rule->rxq = rxq_index;
684 arfs_rule = arfs_alloc_rule(priv, arfs_t, &fk, rxq_index, flow_id);
686 spin_unlock_bh(&arfs->arfs_lock);
690 queue_work(priv->fs.arfs.wq, &arfs_rule->arfs_work);
691 spin_unlock_bh(&arfs->arfs_lock);
692 return arfs_rule->filter_id;