1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/if_macvlan.h>
17 #include <linux/refcount.h>
18 #include <linux/jhash.h>
19 #include <linux/net_namespace.h>
20 #include <linux/mutex.h>
21 #include <net/netevent.h>
22 #include <net/neighbour.h>
24 #include <net/ip_fib.h>
25 #include <net/ip6_fib.h>
26 #include <net/nexthop.h>
27 #include <net/fib_rules.h>
28 #include <net/ip_tunnels.h>
29 #include <net/l3mdev.h>
30 #include <net/addrconf.h>
31 #include <net/ndisc.h>
33 #include <net/fib_notifier.h>
34 #include <net/switchdev.h>
39 #include "spectrum_cnt.h"
40 #include "spectrum_dpipe.h"
41 #include "spectrum_ipip.h"
42 #include "spectrum_mr.h"
43 #include "spectrum_mr_tcam.h"
44 #include "spectrum_router.h"
45 #include "spectrum_span.h"
49 struct mlxsw_sp_lpm_tree;
50 struct mlxsw_sp_rif_ops;
53 struct list_head nexthop_list;
54 struct list_head neigh_list;
55 struct net_device *dev; /* NULL for underlay RIF */
56 struct mlxsw_sp_fid *fid;
57 unsigned char addr[ETH_ALEN];
61 const struct mlxsw_sp_rif_ops *ops;
62 struct mlxsw_sp *mlxsw_sp;
64 unsigned int counter_ingress;
65 bool counter_ingress_valid;
66 unsigned int counter_egress;
67 bool counter_egress_valid;
70 struct mlxsw_sp_rif_params {
71 struct net_device *dev;
80 struct mlxsw_sp_rif_subport {
81 struct mlxsw_sp_rif common;
91 struct mlxsw_sp_rif_ipip_lb {
92 struct mlxsw_sp_rif common;
93 struct mlxsw_sp_rif_ipip_lb_config lb_config;
94 u16 ul_vr_id; /* Reserved for Spectrum-2. */
95 u16 ul_rif_id; /* Reserved for Spectrum. */
98 struct mlxsw_sp_rif_params_ipip_lb {
99 struct mlxsw_sp_rif_params common;
100 struct mlxsw_sp_rif_ipip_lb_config lb_config;
103 struct mlxsw_sp_rif_ops {
104 enum mlxsw_sp_rif_type type;
107 void (*setup)(struct mlxsw_sp_rif *rif,
108 const struct mlxsw_sp_rif_params *params);
109 int (*configure)(struct mlxsw_sp_rif *rif);
110 void (*deconfigure)(struct mlxsw_sp_rif *rif);
111 struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
112 struct netlink_ext_ack *extack);
113 void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
116 static struct mlxsw_sp_rif *
117 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
118 const struct net_device *dev);
119 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
120 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
121 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
122 struct mlxsw_sp_lpm_tree *lpm_tree);
123 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
124 const struct mlxsw_sp_fib *fib,
126 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
127 const struct mlxsw_sp_fib *fib);
129 static unsigned int *
130 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
131 enum mlxsw_sp_rif_counter_dir dir)
134 case MLXSW_SP_RIF_COUNTER_EGRESS:
135 return &rif->counter_egress;
136 case MLXSW_SP_RIF_COUNTER_INGRESS:
137 return &rif->counter_ingress;
143 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
144 enum mlxsw_sp_rif_counter_dir dir)
147 case MLXSW_SP_RIF_COUNTER_EGRESS:
148 return rif->counter_egress_valid;
149 case MLXSW_SP_RIF_COUNTER_INGRESS:
150 return rif->counter_ingress_valid;
156 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
157 enum mlxsw_sp_rif_counter_dir dir,
161 case MLXSW_SP_RIF_COUNTER_EGRESS:
162 rif->counter_egress_valid = valid;
164 case MLXSW_SP_RIF_COUNTER_INGRESS:
165 rif->counter_ingress_valid = valid;
170 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
171 unsigned int counter_index, bool enable,
172 enum mlxsw_sp_rif_counter_dir dir)
174 char ritr_pl[MLXSW_REG_RITR_LEN];
175 bool is_egress = false;
178 if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
180 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
181 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
185 mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
187 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
190 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
191 struct mlxsw_sp_rif *rif,
192 enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
194 char ricnt_pl[MLXSW_REG_RICNT_LEN];
195 unsigned int *p_counter_index;
199 valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
203 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
204 if (!p_counter_index)
206 mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
207 MLXSW_REG_RICNT_OPCODE_NOP);
208 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
211 *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
215 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
216 unsigned int counter_index)
218 char ricnt_pl[MLXSW_REG_RICNT_LEN];
220 mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
221 MLXSW_REG_RICNT_OPCODE_CLEAR);
222 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
225 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
226 struct mlxsw_sp_rif *rif,
227 enum mlxsw_sp_rif_counter_dir dir)
229 unsigned int *p_counter_index;
232 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
233 if (!p_counter_index)
235 err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
240 err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
242 goto err_counter_clear;
244 err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
245 *p_counter_index, true, dir);
247 goto err_counter_edit;
248 mlxsw_sp_rif_counter_valid_set(rif, dir, true);
253 mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
258 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
259 struct mlxsw_sp_rif *rif,
260 enum mlxsw_sp_rif_counter_dir dir)
262 unsigned int *p_counter_index;
264 if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
267 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
268 if (WARN_ON(!p_counter_index))
270 mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
271 *p_counter_index, false, dir);
272 mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
274 mlxsw_sp_rif_counter_valid_set(rif, dir, false);
277 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
279 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
280 struct devlink *devlink;
282 devlink = priv_to_devlink(mlxsw_sp->core);
283 if (!devlink_dpipe_table_counter_enabled(devlink,
284 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
286 mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
289 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
291 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
293 mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
296 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
298 struct mlxsw_sp_prefix_usage {
299 DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
302 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
303 for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
306 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
307 struct mlxsw_sp_prefix_usage *prefix_usage2)
309 return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
313 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
314 struct mlxsw_sp_prefix_usage *prefix_usage2)
316 memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
320 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
321 unsigned char prefix_len)
323 set_bit(prefix_len, prefix_usage->b);
327 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
328 unsigned char prefix_len)
330 clear_bit(prefix_len, prefix_usage->b);
333 struct mlxsw_sp_fib_key {
334 unsigned char addr[sizeof(struct in6_addr)];
335 unsigned char prefix_len;
338 enum mlxsw_sp_fib_entry_type {
339 MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
340 MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
341 MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
342 MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE,
343 MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE,
345 /* This is a special case of local delivery, where a packet should be
346 * decapsulated on reception. Note that there is no corresponding ENCAP,
347 * because that's a type of next hop, not of FIB entry. (There can be
348 * several next hops in a REMOTE entry, and some of them may be
349 * encapsulating entries.)
351 MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
352 MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
355 struct mlxsw_sp_nexthop_group_info;
356 struct mlxsw_sp_nexthop_group;
357 struct mlxsw_sp_fib_entry;
359 struct mlxsw_sp_fib_node {
360 struct mlxsw_sp_fib_entry *fib_entry;
361 struct list_head list;
362 struct rhash_head ht_node;
363 struct mlxsw_sp_fib *fib;
364 struct mlxsw_sp_fib_key key;
367 struct mlxsw_sp_fib_entry_decap {
368 struct mlxsw_sp_ipip_entry *ipip_entry;
372 static struct mlxsw_sp_fib_entry_priv *
373 mlxsw_sp_fib_entry_priv_create(const struct mlxsw_sp_router_ll_ops *ll_ops)
375 struct mlxsw_sp_fib_entry_priv *priv;
377 if (!ll_ops->fib_entry_priv_size)
378 /* No need to have priv */
381 priv = kzalloc(sizeof(*priv) + ll_ops->fib_entry_priv_size, GFP_KERNEL);
383 return ERR_PTR(-ENOMEM);
384 refcount_set(&priv->refcnt, 1);
389 mlxsw_sp_fib_entry_priv_destroy(struct mlxsw_sp_fib_entry_priv *priv)
394 static void mlxsw_sp_fib_entry_priv_hold(struct mlxsw_sp_fib_entry_priv *priv)
396 refcount_inc(&priv->refcnt);
399 static void mlxsw_sp_fib_entry_priv_put(struct mlxsw_sp_fib_entry_priv *priv)
401 if (!priv || !refcount_dec_and_test(&priv->refcnt))
403 mlxsw_sp_fib_entry_priv_destroy(priv);
406 static void mlxsw_sp_fib_entry_op_ctx_priv_hold(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
407 struct mlxsw_sp_fib_entry_priv *priv)
411 mlxsw_sp_fib_entry_priv_hold(priv);
412 list_add(&priv->list, &op_ctx->fib_entry_priv_list);
415 static void mlxsw_sp_fib_entry_op_ctx_priv_put_all(struct mlxsw_sp_fib_entry_op_ctx *op_ctx)
417 struct mlxsw_sp_fib_entry_priv *priv, *tmp;
419 list_for_each_entry_safe(priv, tmp, &op_ctx->fib_entry_priv_list, list)
420 mlxsw_sp_fib_entry_priv_put(priv);
421 INIT_LIST_HEAD(&op_ctx->fib_entry_priv_list);
424 struct mlxsw_sp_fib_entry {
425 struct mlxsw_sp_fib_node *fib_node;
426 enum mlxsw_sp_fib_entry_type type;
427 struct list_head nexthop_group_node;
428 struct mlxsw_sp_nexthop_group *nh_group;
429 struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
430 struct mlxsw_sp_fib_entry_priv *priv;
433 struct mlxsw_sp_fib4_entry {
434 struct mlxsw_sp_fib_entry common;
441 struct mlxsw_sp_fib6_entry {
442 struct mlxsw_sp_fib_entry common;
443 struct list_head rt6_list;
447 struct mlxsw_sp_rt6 {
448 struct list_head list;
449 struct fib6_info *rt;
452 struct mlxsw_sp_lpm_tree {
454 unsigned int ref_count;
455 enum mlxsw_sp_l3proto proto;
456 unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
457 struct mlxsw_sp_prefix_usage prefix_usage;
460 struct mlxsw_sp_fib {
461 struct rhashtable ht;
462 struct list_head node_list;
463 struct mlxsw_sp_vr *vr;
464 struct mlxsw_sp_lpm_tree *lpm_tree;
465 enum mlxsw_sp_l3proto proto;
466 const struct mlxsw_sp_router_ll_ops *ll_ops;
470 u16 id; /* virtual router ID */
471 u32 tb_id; /* kernel fib table id */
472 unsigned int rif_count;
473 struct mlxsw_sp_fib *fib4;
474 struct mlxsw_sp_fib *fib6;
475 struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
476 struct mlxsw_sp_rif *ul_rif;
477 refcount_t ul_rif_refcnt;
480 static int mlxsw_sp_router_ll_basic_init(struct mlxsw_sp *mlxsw_sp, u16 vr_id,
481 enum mlxsw_sp_l3proto proto)
486 static int mlxsw_sp_router_ll_basic_ralta_write(struct mlxsw_sp *mlxsw_sp, char *xralta_pl)
488 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta),
489 xralta_pl + MLXSW_REG_XRALTA_RALTA_OFFSET);
492 static int mlxsw_sp_router_ll_basic_ralst_write(struct mlxsw_sp *mlxsw_sp, char *xralst_pl)
494 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst),
495 xralst_pl + MLXSW_REG_XRALST_RALST_OFFSET);
498 static int mlxsw_sp_router_ll_basic_raltb_write(struct mlxsw_sp *mlxsw_sp, char *xraltb_pl)
500 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
501 xraltb_pl + MLXSW_REG_XRALTB_RALTB_OFFSET);
504 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
506 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
507 struct mlxsw_sp_vr *vr,
508 enum mlxsw_sp_l3proto proto)
510 const struct mlxsw_sp_router_ll_ops *ll_ops = mlxsw_sp->router->proto_ll_ops[proto];
511 struct mlxsw_sp_lpm_tree *lpm_tree;
512 struct mlxsw_sp_fib *fib;
515 err = ll_ops->init(mlxsw_sp, vr->id, proto);
519 lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
520 fib = kzalloc(sizeof(*fib), GFP_KERNEL);
522 return ERR_PTR(-ENOMEM);
523 err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
525 goto err_rhashtable_init;
526 INIT_LIST_HEAD(&fib->node_list);
529 fib->lpm_tree = lpm_tree;
530 fib->ll_ops = ll_ops;
531 mlxsw_sp_lpm_tree_hold(lpm_tree);
532 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
534 goto err_lpm_tree_bind;
538 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
544 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
545 struct mlxsw_sp_fib *fib)
547 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
548 mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
549 WARN_ON(!list_empty(&fib->node_list));
550 rhashtable_destroy(&fib->ht);
554 static struct mlxsw_sp_lpm_tree *
555 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
557 static struct mlxsw_sp_lpm_tree *lpm_tree;
560 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
561 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
562 if (lpm_tree->ref_count == 0)
568 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
569 const struct mlxsw_sp_router_ll_ops *ll_ops,
570 struct mlxsw_sp_lpm_tree *lpm_tree)
572 char xralta_pl[MLXSW_REG_XRALTA_LEN];
574 mlxsw_reg_xralta_pack(xralta_pl, true,
575 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
577 return ll_ops->ralta_write(mlxsw_sp, xralta_pl);
580 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
581 const struct mlxsw_sp_router_ll_ops *ll_ops,
582 struct mlxsw_sp_lpm_tree *lpm_tree)
584 char xralta_pl[MLXSW_REG_XRALTA_LEN];
586 mlxsw_reg_xralta_pack(xralta_pl, false,
587 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
589 ll_ops->ralta_write(mlxsw_sp, xralta_pl);
593 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
594 const struct mlxsw_sp_router_ll_ops *ll_ops,
595 struct mlxsw_sp_prefix_usage *prefix_usage,
596 struct mlxsw_sp_lpm_tree *lpm_tree)
598 char xralst_pl[MLXSW_REG_XRALST_LEN];
601 u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
603 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
606 mlxsw_reg_xralst_pack(xralst_pl, root_bin, lpm_tree->id);
607 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
610 mlxsw_reg_xralst_bin_pack(xralst_pl, prefix, last_prefix,
611 MLXSW_REG_RALST_BIN_NO_CHILD);
612 last_prefix = prefix;
614 return ll_ops->ralst_write(mlxsw_sp, xralst_pl);
617 static struct mlxsw_sp_lpm_tree *
618 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
619 const struct mlxsw_sp_router_ll_ops *ll_ops,
620 struct mlxsw_sp_prefix_usage *prefix_usage,
621 enum mlxsw_sp_l3proto proto)
623 struct mlxsw_sp_lpm_tree *lpm_tree;
626 lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
628 return ERR_PTR(-EBUSY);
629 lpm_tree->proto = proto;
630 err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, ll_ops, lpm_tree);
634 err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, ll_ops, prefix_usage, lpm_tree);
636 goto err_left_struct_set;
637 memcpy(&lpm_tree->prefix_usage, prefix_usage,
638 sizeof(lpm_tree->prefix_usage));
639 memset(&lpm_tree->prefix_ref_count, 0,
640 sizeof(lpm_tree->prefix_ref_count));
641 lpm_tree->ref_count = 1;
645 mlxsw_sp_lpm_tree_free(mlxsw_sp, ll_ops, lpm_tree);
649 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
650 const struct mlxsw_sp_router_ll_ops *ll_ops,
651 struct mlxsw_sp_lpm_tree *lpm_tree)
653 mlxsw_sp_lpm_tree_free(mlxsw_sp, ll_ops, lpm_tree);
656 static struct mlxsw_sp_lpm_tree *
657 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
658 struct mlxsw_sp_prefix_usage *prefix_usage,
659 enum mlxsw_sp_l3proto proto)
661 const struct mlxsw_sp_router_ll_ops *ll_ops = mlxsw_sp->router->proto_ll_ops[proto];
662 struct mlxsw_sp_lpm_tree *lpm_tree;
665 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
666 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
667 if (lpm_tree->ref_count != 0 &&
668 lpm_tree->proto == proto &&
669 mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
671 mlxsw_sp_lpm_tree_hold(lpm_tree);
675 return mlxsw_sp_lpm_tree_create(mlxsw_sp, ll_ops, prefix_usage, proto);
678 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
680 lpm_tree->ref_count++;
683 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
684 struct mlxsw_sp_lpm_tree *lpm_tree)
686 const struct mlxsw_sp_router_ll_ops *ll_ops =
687 mlxsw_sp->router->proto_ll_ops[lpm_tree->proto];
689 if (--lpm_tree->ref_count == 0)
690 mlxsw_sp_lpm_tree_destroy(mlxsw_sp, ll_ops, lpm_tree);
693 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
695 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
697 struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
698 struct mlxsw_sp_lpm_tree *lpm_tree;
702 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
705 max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
706 mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
707 mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
708 sizeof(struct mlxsw_sp_lpm_tree),
710 if (!mlxsw_sp->router->lpm.trees)
713 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
714 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
715 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
718 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
719 MLXSW_SP_L3_PROTO_IPV4);
720 if (IS_ERR(lpm_tree)) {
721 err = PTR_ERR(lpm_tree);
722 goto err_ipv4_tree_get;
724 mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
726 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
727 MLXSW_SP_L3_PROTO_IPV6);
728 if (IS_ERR(lpm_tree)) {
729 err = PTR_ERR(lpm_tree);
730 goto err_ipv6_tree_get;
732 mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
737 lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
738 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
740 kfree(mlxsw_sp->router->lpm.trees);
744 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
746 struct mlxsw_sp_lpm_tree *lpm_tree;
748 lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
749 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
751 lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
752 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
754 kfree(mlxsw_sp->router->lpm.trees);
757 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
759 return !!vr->fib4 || !!vr->fib6 ||
760 !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
761 !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
764 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
766 struct mlxsw_sp_vr *vr;
769 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
770 vr = &mlxsw_sp->router->vrs[i];
771 if (!mlxsw_sp_vr_is_used(vr))
777 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
778 const struct mlxsw_sp_fib *fib, u8 tree_id)
780 char xraltb_pl[MLXSW_REG_XRALTB_LEN];
782 mlxsw_reg_xraltb_pack(xraltb_pl, fib->vr->id,
783 (enum mlxsw_reg_ralxx_protocol) fib->proto,
785 return fib->ll_ops->raltb_write(mlxsw_sp, xraltb_pl);
788 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
789 const struct mlxsw_sp_fib *fib)
791 char xraltb_pl[MLXSW_REG_XRALTB_LEN];
793 /* Bind to tree 0 which is default */
794 mlxsw_reg_xraltb_pack(xraltb_pl, fib->vr->id,
795 (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
796 return fib->ll_ops->raltb_write(mlxsw_sp, xraltb_pl);
799 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
801 /* For our purpose, squash main, default and local tables into one */
802 if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
803 tb_id = RT_TABLE_MAIN;
807 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
810 struct mlxsw_sp_vr *vr;
813 tb_id = mlxsw_sp_fix_tb_id(tb_id);
815 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
816 vr = &mlxsw_sp->router->vrs[i];
817 if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
823 int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
826 struct mlxsw_sp_vr *vr;
829 mutex_lock(&mlxsw_sp->router->lock);
830 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
837 mutex_unlock(&mlxsw_sp->router->lock);
841 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
842 enum mlxsw_sp_l3proto proto)
845 case MLXSW_SP_L3_PROTO_IPV4:
847 case MLXSW_SP_L3_PROTO_IPV6:
853 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
855 struct netlink_ext_ack *extack)
857 struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
858 struct mlxsw_sp_fib *fib4;
859 struct mlxsw_sp_fib *fib6;
860 struct mlxsw_sp_vr *vr;
863 vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
865 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
866 return ERR_PTR(-EBUSY);
868 fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
870 return ERR_CAST(fib4);
871 fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
874 goto err_fib6_create;
876 mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
877 MLXSW_SP_L3_PROTO_IPV4);
878 if (IS_ERR(mr4_table)) {
879 err = PTR_ERR(mr4_table);
880 goto err_mr4_table_create;
882 mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
883 MLXSW_SP_L3_PROTO_IPV6);
884 if (IS_ERR(mr6_table)) {
885 err = PTR_ERR(mr6_table);
886 goto err_mr6_table_create;
891 vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
892 vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
896 err_mr6_table_create:
897 mlxsw_sp_mr_table_destroy(mr4_table);
898 err_mr4_table_create:
899 mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
901 mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
905 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
906 struct mlxsw_sp_vr *vr)
908 mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
909 vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
910 mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
911 vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
912 mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
914 mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
918 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
919 struct netlink_ext_ack *extack)
921 struct mlxsw_sp_vr *vr;
923 tb_id = mlxsw_sp_fix_tb_id(tb_id);
924 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
926 vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
930 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
932 if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
933 list_empty(&vr->fib6->node_list) &&
934 mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
935 mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
936 mlxsw_sp_vr_destroy(mlxsw_sp, vr);
940 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
941 enum mlxsw_sp_l3proto proto, u8 tree_id)
943 struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
945 if (!mlxsw_sp_vr_is_used(vr))
947 if (fib->lpm_tree->id == tree_id)
952 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
953 struct mlxsw_sp_fib *fib,
954 struct mlxsw_sp_lpm_tree *new_tree)
956 struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
959 fib->lpm_tree = new_tree;
960 mlxsw_sp_lpm_tree_hold(new_tree);
961 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
964 mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
968 mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
969 fib->lpm_tree = old_tree;
973 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
974 struct mlxsw_sp_fib *fib,
975 struct mlxsw_sp_lpm_tree *new_tree)
977 enum mlxsw_sp_l3proto proto = fib->proto;
978 struct mlxsw_sp_lpm_tree *old_tree;
979 u8 old_id, new_id = new_tree->id;
980 struct mlxsw_sp_vr *vr;
983 old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
984 old_id = old_tree->id;
986 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
987 vr = &mlxsw_sp->router->vrs[i];
988 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
990 err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
991 mlxsw_sp_vr_fib(vr, proto),
994 goto err_tree_replace;
997 memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
998 sizeof(new_tree->prefix_ref_count));
999 mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
1000 mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
1005 for (i--; i >= 0; i--) {
1006 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
1008 mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
1009 mlxsw_sp_vr_fib(vr, proto),
1015 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
1017 struct mlxsw_sp_vr *vr;
1021 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
1024 max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
1025 mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
1027 if (!mlxsw_sp->router->vrs)
1030 for (i = 0; i < max_vrs; i++) {
1031 vr = &mlxsw_sp->router->vrs[i];
1038 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
1040 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
1042 /* At this stage we're guaranteed not to have new incoming
1043 * FIB notifications and the work queue is free from FIBs
1044 * sitting on top of mlxsw netdevs. However, we can still
1045 * have other FIBs queued. Flush the queue before flushing
1046 * the device's tables. No need for locks, as we're the only
1049 mlxsw_core_flush_owq();
1050 mlxsw_sp_router_fib_flush(mlxsw_sp);
1051 kfree(mlxsw_sp->router->vrs);
1054 static struct net_device *
1055 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
1057 struct ip_tunnel *tun = netdev_priv(ol_dev);
1058 struct net *net = dev_net(ol_dev);
1060 return dev_get_by_index_rcu(net, tun->parms.link);
1063 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
1065 struct net_device *d;
1069 d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1071 tb_id = l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
1073 tb_id = RT_TABLE_MAIN;
1079 static struct mlxsw_sp_rif *
1080 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
1081 const struct mlxsw_sp_rif_params *params,
1082 struct netlink_ext_ack *extack);
1084 static struct mlxsw_sp_rif_ipip_lb *
1085 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1086 enum mlxsw_sp_ipip_type ipipt,
1087 struct net_device *ol_dev,
1088 struct netlink_ext_ack *extack)
1090 struct mlxsw_sp_rif_params_ipip_lb lb_params;
1091 const struct mlxsw_sp_ipip_ops *ipip_ops;
1092 struct mlxsw_sp_rif *rif;
1094 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1095 lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1096 .common.dev = ol_dev,
1097 .common.lag = false,
1098 .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1101 rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1103 return ERR_CAST(rif);
1104 return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1107 static struct mlxsw_sp_ipip_entry *
1108 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1109 enum mlxsw_sp_ipip_type ipipt,
1110 struct net_device *ol_dev)
1112 const struct mlxsw_sp_ipip_ops *ipip_ops;
1113 struct mlxsw_sp_ipip_entry *ipip_entry;
1114 struct mlxsw_sp_ipip_entry *ret = NULL;
1116 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1117 ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1119 return ERR_PTR(-ENOMEM);
1121 ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1123 if (IS_ERR(ipip_entry->ol_lb)) {
1124 ret = ERR_CAST(ipip_entry->ol_lb);
1125 goto err_ol_ipip_lb_create;
1128 ipip_entry->ipipt = ipipt;
1129 ipip_entry->ol_dev = ol_dev;
1131 switch (ipip_ops->ul_proto) {
1132 case MLXSW_SP_L3_PROTO_IPV4:
1133 ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1135 case MLXSW_SP_L3_PROTO_IPV6:
1142 err_ol_ipip_lb_create:
1148 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1150 mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1155 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1156 const enum mlxsw_sp_l3proto ul_proto,
1157 union mlxsw_sp_l3addr saddr,
1159 struct mlxsw_sp_ipip_entry *ipip_entry)
1161 u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1162 enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1163 union mlxsw_sp_l3addr tun_saddr;
1165 if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1168 tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1169 return tun_ul_tb_id == ul_tb_id &&
1170 mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1174 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1175 struct mlxsw_sp_fib_entry *fib_entry,
1176 struct mlxsw_sp_ipip_entry *ipip_entry)
1181 err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1186 ipip_entry->decap_fib_entry = fib_entry;
1187 fib_entry->decap.ipip_entry = ipip_entry;
1188 fib_entry->decap.tunnel_index = tunnel_index;
1192 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1193 struct mlxsw_sp_fib_entry *fib_entry)
1195 /* Unlink this node from the IPIP entry that it's the decap entry of. */
1196 fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1197 fib_entry->decap.ipip_entry = NULL;
1198 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1199 1, fib_entry->decap.tunnel_index);
1202 static struct mlxsw_sp_fib_node *
1203 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1204 size_t addr_len, unsigned char prefix_len);
1205 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1206 struct mlxsw_sp_fib_entry *fib_entry);
1209 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1210 struct mlxsw_sp_ipip_entry *ipip_entry)
1212 struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1214 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1215 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1217 mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1221 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1222 struct mlxsw_sp_ipip_entry *ipip_entry,
1223 struct mlxsw_sp_fib_entry *decap_fib_entry)
1225 if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1228 decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1230 if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1231 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1234 static struct mlxsw_sp_fib_entry *
1235 mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1236 enum mlxsw_sp_l3proto proto,
1237 const union mlxsw_sp_l3addr *addr,
1238 enum mlxsw_sp_fib_entry_type type)
1240 struct mlxsw_sp_fib_node *fib_node;
1241 unsigned char addr_prefix_len;
1242 struct mlxsw_sp_fib *fib;
1243 struct mlxsw_sp_vr *vr;
1248 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1251 fib = mlxsw_sp_vr_fib(vr, proto);
1254 case MLXSW_SP_L3_PROTO_IPV4:
1255 addr4 = be32_to_cpu(addr->addr4);
1258 addr_prefix_len = 32;
1260 case MLXSW_SP_L3_PROTO_IPV6:
1266 fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1268 if (!fib_node || fib_node->fib_entry->type != type)
1271 return fib_node->fib_entry;
1274 /* Given an IPIP entry, find the corresponding decap route. */
1275 static struct mlxsw_sp_fib_entry *
1276 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1277 struct mlxsw_sp_ipip_entry *ipip_entry)
1279 static struct mlxsw_sp_fib_node *fib_node;
1280 const struct mlxsw_sp_ipip_ops *ipip_ops;
1281 unsigned char saddr_prefix_len;
1282 union mlxsw_sp_l3addr saddr;
1283 struct mlxsw_sp_fib *ul_fib;
1284 struct mlxsw_sp_vr *ul_vr;
1290 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1292 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1293 ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1297 ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1298 saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1299 ipip_entry->ol_dev);
1301 switch (ipip_ops->ul_proto) {
1302 case MLXSW_SP_L3_PROTO_IPV4:
1303 saddr4 = be32_to_cpu(saddr.addr4);
1306 saddr_prefix_len = 32;
1313 fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1316 fib_node->fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1319 return fib_node->fib_entry;
1322 static struct mlxsw_sp_ipip_entry *
1323 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1324 enum mlxsw_sp_ipip_type ipipt,
1325 struct net_device *ol_dev)
1327 struct mlxsw_sp_ipip_entry *ipip_entry;
1329 ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1330 if (IS_ERR(ipip_entry))
1333 list_add_tail(&ipip_entry->ipip_list_node,
1334 &mlxsw_sp->router->ipip_list);
1340 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1341 struct mlxsw_sp_ipip_entry *ipip_entry)
1343 list_del(&ipip_entry->ipip_list_node);
1344 mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1348 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1349 const struct net_device *ul_dev,
1350 enum mlxsw_sp_l3proto ul_proto,
1351 union mlxsw_sp_l3addr ul_dip,
1352 struct mlxsw_sp_ipip_entry *ipip_entry)
1354 u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1355 enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1357 if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1360 return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1361 ul_tb_id, ipip_entry);
1364 /* Given decap parameters, find the corresponding IPIP entry. */
1365 static struct mlxsw_sp_ipip_entry *
1366 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, int ul_dev_ifindex,
1367 enum mlxsw_sp_l3proto ul_proto,
1368 union mlxsw_sp_l3addr ul_dip)
1370 struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1371 struct net_device *ul_dev;
1375 ul_dev = dev_get_by_index_rcu(mlxsw_sp_net(mlxsw_sp), ul_dev_ifindex);
1379 list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1381 if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1395 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1396 const struct net_device *dev,
1397 enum mlxsw_sp_ipip_type *p_type)
1399 struct mlxsw_sp_router *router = mlxsw_sp->router;
1400 const struct mlxsw_sp_ipip_ops *ipip_ops;
1401 enum mlxsw_sp_ipip_type ipipt;
1403 for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1404 ipip_ops = router->ipip_ops_arr[ipipt];
1405 if (dev->type == ipip_ops->dev_type) {
1414 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1415 const struct net_device *dev)
1417 return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1420 static struct mlxsw_sp_ipip_entry *
1421 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1422 const struct net_device *ol_dev)
1424 struct mlxsw_sp_ipip_entry *ipip_entry;
1426 list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1428 if (ipip_entry->ol_dev == ol_dev)
1434 static struct mlxsw_sp_ipip_entry *
1435 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1436 const struct net_device *ul_dev,
1437 struct mlxsw_sp_ipip_entry *start)
1439 struct mlxsw_sp_ipip_entry *ipip_entry;
1441 ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1443 list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1445 struct net_device *ol_dev = ipip_entry->ol_dev;
1446 struct net_device *ipip_ul_dev;
1449 ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1452 if (ipip_ul_dev == ul_dev)
1459 bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp,
1460 const struct net_device *dev)
1464 mutex_lock(&mlxsw_sp->router->lock);
1465 is_ipip_ul = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1466 mutex_unlock(&mlxsw_sp->router->lock);
1471 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1472 const struct net_device *ol_dev,
1473 enum mlxsw_sp_ipip_type ipipt)
1475 const struct mlxsw_sp_ipip_ops *ops
1476 = mlxsw_sp->router->ipip_ops_arr[ipipt];
1478 return ops->can_offload(mlxsw_sp, ol_dev);
1481 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1482 struct net_device *ol_dev)
1484 enum mlxsw_sp_ipip_type ipipt = MLXSW_SP_IPIP_TYPE_MAX;
1485 struct mlxsw_sp_ipip_entry *ipip_entry;
1486 enum mlxsw_sp_l3proto ul_proto;
1487 union mlxsw_sp_l3addr saddr;
1490 mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1491 if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1492 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1493 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1494 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1495 if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1498 ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1500 if (IS_ERR(ipip_entry))
1501 return PTR_ERR(ipip_entry);
1508 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1509 struct net_device *ol_dev)
1511 struct mlxsw_sp_ipip_entry *ipip_entry;
1513 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1515 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1519 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1520 struct mlxsw_sp_ipip_entry *ipip_entry)
1522 struct mlxsw_sp_fib_entry *decap_fib_entry;
1524 decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1525 if (decap_fib_entry)
1526 mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1531 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
1532 u16 ul_rif_id, bool enable)
1534 struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1535 struct mlxsw_sp_rif *rif = &lb_rif->common;
1536 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1537 char ritr_pl[MLXSW_REG_RITR_LEN];
1540 switch (lb_cf.ul_protocol) {
1541 case MLXSW_SP_L3_PROTO_IPV4:
1542 saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1543 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1544 rif->rif_index, rif->vr_id, rif->dev->mtu);
1545 mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1546 MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1547 ul_vr_id, ul_rif_id, saddr4, lb_cf.okey);
1550 case MLXSW_SP_L3_PROTO_IPV6:
1551 return -EAFNOSUPPORT;
1554 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1557 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1558 struct net_device *ol_dev)
1560 struct mlxsw_sp_ipip_entry *ipip_entry;
1561 struct mlxsw_sp_rif_ipip_lb *lb_rif;
1564 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1566 lb_rif = ipip_entry->ol_lb;
1567 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
1568 lb_rif->ul_rif_id, true);
1571 lb_rif->common.mtu = ol_dev->mtu;
1578 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1579 struct net_device *ol_dev)
1581 struct mlxsw_sp_ipip_entry *ipip_entry;
1583 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1585 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1589 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1590 struct mlxsw_sp_ipip_entry *ipip_entry)
1592 if (ipip_entry->decap_fib_entry)
1593 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1596 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1597 struct net_device *ol_dev)
1599 struct mlxsw_sp_ipip_entry *ipip_entry;
1601 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1603 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1606 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1607 struct mlxsw_sp_rif *old_rif,
1608 struct mlxsw_sp_rif *new_rif);
1610 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1611 struct mlxsw_sp_ipip_entry *ipip_entry,
1613 struct netlink_ext_ack *extack)
1615 struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1616 struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1618 new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1622 if (IS_ERR(new_lb_rif))
1623 return PTR_ERR(new_lb_rif);
1624 ipip_entry->ol_lb = new_lb_rif;
1627 mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1628 &new_lb_rif->common);
1630 mlxsw_sp_rif_destroy(&old_lb_rif->common);
1635 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1636 struct mlxsw_sp_rif *rif);
1639 * __mlxsw_sp_ipip_entry_update_tunnel - Update offload related to IPIP entry.
1640 * @mlxsw_sp: mlxsw_sp.
1641 * @ipip_entry: IPIP entry.
1642 * @recreate_loopback: Recreates the associated loopback RIF.
1643 * @keep_encap: Updates next hops that use the tunnel netdevice. This is only
1644 * relevant when recreate_loopback is true.
1645 * @update_nexthops: Updates next hops, keeping the current loopback RIF. This
1646 * is only relevant when recreate_loopback is false.
1649 * Return: Non-zero value on failure.
1651 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1652 struct mlxsw_sp_ipip_entry *ipip_entry,
1653 bool recreate_loopback,
1655 bool update_nexthops,
1656 struct netlink_ext_ack *extack)
1660 /* RIFs can't be edited, so to update loopback, we need to destroy and
1661 * recreate it. That creates a window of opportunity where RALUE and
1662 * RATR registers end up referencing a RIF that's already gone. RATRs
1663 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1664 * of RALUE, demote the decap route back.
1666 if (ipip_entry->decap_fib_entry)
1667 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1669 if (recreate_loopback) {
1670 err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1671 keep_encap, extack);
1674 } else if (update_nexthops) {
1675 mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1676 &ipip_entry->ol_lb->common);
1679 if (ipip_entry->ol_dev->flags & IFF_UP)
1680 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1685 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1686 struct net_device *ol_dev,
1687 struct netlink_ext_ack *extack)
1689 struct mlxsw_sp_ipip_entry *ipip_entry =
1690 mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1695 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1696 true, false, false, extack);
1700 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1701 struct mlxsw_sp_ipip_entry *ipip_entry,
1702 struct net_device *ul_dev,
1704 struct netlink_ext_ack *extack)
1706 u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1707 enum mlxsw_sp_l3proto ul_proto;
1708 union mlxsw_sp_l3addr saddr;
1710 /* Moving underlay to a different VRF might cause local address
1711 * conflict, and the conflicting tunnels need to be demoted.
1713 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1714 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1715 if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1718 *demote_this = true;
1722 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1723 true, true, false, extack);
1727 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1728 struct mlxsw_sp_ipip_entry *ipip_entry,
1729 struct net_device *ul_dev)
1731 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1732 false, false, true, NULL);
1736 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1737 struct mlxsw_sp_ipip_entry *ipip_entry,
1738 struct net_device *ul_dev)
1740 /* A down underlay device causes encapsulated packets to not be
1741 * forwarded, but decap still works. So refresh next hops without
1742 * touching anything else.
1744 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1745 false, false, true, NULL);
1749 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1750 struct net_device *ol_dev,
1751 struct netlink_ext_ack *extack)
1753 const struct mlxsw_sp_ipip_ops *ipip_ops;
1754 struct mlxsw_sp_ipip_entry *ipip_entry;
1757 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1759 /* A change might make a tunnel eligible for offloading, but
1760 * that is currently not implemented. What falls to slow path
1765 /* A change might make a tunnel not eligible for offloading. */
1766 if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1767 ipip_entry->ipipt)) {
1768 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1772 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1773 err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1777 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1778 struct mlxsw_sp_ipip_entry *ipip_entry)
1780 struct net_device *ol_dev = ipip_entry->ol_dev;
1782 if (ol_dev->flags & IFF_UP)
1783 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1784 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1787 /* The configuration where several tunnels have the same local address in the
1788 * same underlay table needs special treatment in the HW. That is currently not
1789 * implemented in the driver. This function finds and demotes the first tunnel
1790 * with a given source address, except the one passed in in the argument
1794 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1795 enum mlxsw_sp_l3proto ul_proto,
1796 union mlxsw_sp_l3addr saddr,
1798 const struct mlxsw_sp_ipip_entry *except)
1800 struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1802 list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1804 if (ipip_entry != except &&
1805 mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1806 ul_tb_id, ipip_entry)) {
1807 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1815 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1816 struct net_device *ul_dev)
1818 struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1820 list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1822 struct net_device *ol_dev = ipip_entry->ol_dev;
1823 struct net_device *ipip_ul_dev;
1826 ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1828 if (ipip_ul_dev == ul_dev)
1829 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1833 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1834 struct net_device *ol_dev,
1835 unsigned long event,
1836 struct netdev_notifier_info *info)
1838 struct netdev_notifier_changeupper_info *chup;
1839 struct netlink_ext_ack *extack;
1842 mutex_lock(&mlxsw_sp->router->lock);
1844 case NETDEV_REGISTER:
1845 err = mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1847 case NETDEV_UNREGISTER:
1848 mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1851 mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1854 mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1856 case NETDEV_CHANGEUPPER:
1857 chup = container_of(info, typeof(*chup), info);
1858 extack = info->extack;
1859 if (netif_is_l3_master(chup->upper_dev))
1860 err = mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1865 extack = info->extack;
1866 err = mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1869 case NETDEV_CHANGEMTU:
1870 err = mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1873 mutex_unlock(&mlxsw_sp->router->lock);
1878 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1879 struct mlxsw_sp_ipip_entry *ipip_entry,
1880 struct net_device *ul_dev,
1882 unsigned long event,
1883 struct netdev_notifier_info *info)
1885 struct netdev_notifier_changeupper_info *chup;
1886 struct netlink_ext_ack *extack;
1889 case NETDEV_CHANGEUPPER:
1890 chup = container_of(info, typeof(*chup), info);
1891 extack = info->extack;
1892 if (netif_is_l3_master(chup->upper_dev))
1893 return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1901 return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1904 return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1912 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1913 struct net_device *ul_dev,
1914 unsigned long event,
1915 struct netdev_notifier_info *info)
1917 struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1920 mutex_lock(&mlxsw_sp->router->lock);
1921 while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1924 struct mlxsw_sp_ipip_entry *prev;
1925 bool demote_this = false;
1927 err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1928 ul_dev, &demote_this,
1931 mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1937 if (list_is_first(&ipip_entry->ipip_list_node,
1938 &mlxsw_sp->router->ipip_list))
1941 /* This can't be cached from previous iteration,
1942 * because that entry could be gone now.
1944 prev = list_prev_entry(ipip_entry,
1946 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1950 mutex_unlock(&mlxsw_sp->router->lock);
1955 int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1956 enum mlxsw_sp_l3proto ul_proto,
1957 const union mlxsw_sp_l3addr *ul_sip,
1960 enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1961 struct mlxsw_sp_router *router = mlxsw_sp->router;
1962 struct mlxsw_sp_fib_entry *fib_entry;
1965 mutex_lock(&mlxsw_sp->router->lock);
1967 if (WARN_ON_ONCE(router->nve_decap_config.valid)) {
1972 router->nve_decap_config.ul_tb_id = ul_tb_id;
1973 router->nve_decap_config.tunnel_index = tunnel_index;
1974 router->nve_decap_config.ul_proto = ul_proto;
1975 router->nve_decap_config.ul_sip = *ul_sip;
1976 router->nve_decap_config.valid = true;
1978 /* It is valid to create a tunnel with a local IP and only later
1979 * assign this IP address to a local interface
1981 fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1987 fib_entry->decap.tunnel_index = tunnel_index;
1988 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1990 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1992 goto err_fib_entry_update;
1996 err_fib_entry_update:
1997 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1998 mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2000 mutex_unlock(&mlxsw_sp->router->lock);
2004 void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
2005 enum mlxsw_sp_l3proto ul_proto,
2006 const union mlxsw_sp_l3addr *ul_sip)
2008 enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
2009 struct mlxsw_sp_router *router = mlxsw_sp->router;
2010 struct mlxsw_sp_fib_entry *fib_entry;
2012 mutex_lock(&mlxsw_sp->router->lock);
2014 if (WARN_ON_ONCE(!router->nve_decap_config.valid))
2017 router->nve_decap_config.valid = false;
2019 fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
2025 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2026 mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2028 mutex_unlock(&mlxsw_sp->router->lock);
2031 static bool mlxsw_sp_router_nve_is_decap(struct mlxsw_sp *mlxsw_sp,
2033 enum mlxsw_sp_l3proto ul_proto,
2034 const union mlxsw_sp_l3addr *ul_sip)
2036 struct mlxsw_sp_router *router = mlxsw_sp->router;
2038 return router->nve_decap_config.valid &&
2039 router->nve_decap_config.ul_tb_id == ul_tb_id &&
2040 router->nve_decap_config.ul_proto == ul_proto &&
2041 !memcmp(&router->nve_decap_config.ul_sip, ul_sip,
2045 struct mlxsw_sp_neigh_key {
2046 struct neighbour *n;
2049 struct mlxsw_sp_neigh_entry {
2050 struct list_head rif_list_node;
2051 struct rhash_head ht_node;
2052 struct mlxsw_sp_neigh_key key;
2055 unsigned char ha[ETH_ALEN];
2056 struct list_head nexthop_list; /* list of nexthops using
2059 struct list_head nexthop_neighs_list_node;
2060 unsigned int counter_index;
2064 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
2065 .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
2066 .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
2067 .key_len = sizeof(struct mlxsw_sp_neigh_key),
2070 struct mlxsw_sp_neigh_entry *
2071 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
2072 struct mlxsw_sp_neigh_entry *neigh_entry)
2075 if (list_empty(&rif->neigh_list))
2078 return list_first_entry(&rif->neigh_list,
2079 typeof(*neigh_entry),
2082 if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
2084 return list_next_entry(neigh_entry, rif_list_node);
2087 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
2089 return neigh_entry->key.n->tbl->family;
2093 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
2095 return neigh_entry->ha;
2098 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
2100 struct neighbour *n;
2102 n = neigh_entry->key.n;
2103 return ntohl(*((__be32 *) n->primary_key));
2107 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
2109 struct neighbour *n;
2111 n = neigh_entry->key.n;
2112 return (struct in6_addr *) &n->primary_key;
2115 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
2116 struct mlxsw_sp_neigh_entry *neigh_entry,
2119 if (!neigh_entry->counter_valid)
2122 return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
2126 static struct mlxsw_sp_neigh_entry *
2127 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
2130 struct mlxsw_sp_neigh_entry *neigh_entry;
2132 neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
2136 neigh_entry->key.n = n;
2137 neigh_entry->rif = rif;
2138 INIT_LIST_HEAD(&neigh_entry->nexthop_list);
2143 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
2149 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
2150 struct mlxsw_sp_neigh_entry *neigh_entry)
2152 return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
2153 &neigh_entry->ht_node,
2154 mlxsw_sp_neigh_ht_params);
2158 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
2159 struct mlxsw_sp_neigh_entry *neigh_entry)
2161 rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
2162 &neigh_entry->ht_node,
2163 mlxsw_sp_neigh_ht_params);
2167 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
2168 struct mlxsw_sp_neigh_entry *neigh_entry)
2170 struct devlink *devlink;
2171 const char *table_name;
2173 switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2175 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2178 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2185 devlink = priv_to_devlink(mlxsw_sp->core);
2186 return devlink_dpipe_table_counter_enabled(devlink, table_name);
2190 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2191 struct mlxsw_sp_neigh_entry *neigh_entry)
2193 if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2196 if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2199 neigh_entry->counter_valid = true;
2203 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2204 struct mlxsw_sp_neigh_entry *neigh_entry)
2206 if (!neigh_entry->counter_valid)
2208 mlxsw_sp_flow_counter_free(mlxsw_sp,
2209 neigh_entry->counter_index);
2210 neigh_entry->counter_valid = false;
2213 static struct mlxsw_sp_neigh_entry *
2214 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2216 struct mlxsw_sp_neigh_entry *neigh_entry;
2217 struct mlxsw_sp_rif *rif;
2220 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2222 return ERR_PTR(-EINVAL);
2224 neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2226 return ERR_PTR(-ENOMEM);
2228 err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2230 goto err_neigh_entry_insert;
2232 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2233 list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2237 err_neigh_entry_insert:
2238 mlxsw_sp_neigh_entry_free(neigh_entry);
2239 return ERR_PTR(err);
2243 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2244 struct mlxsw_sp_neigh_entry *neigh_entry)
2246 list_del(&neigh_entry->rif_list_node);
2247 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2248 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2249 mlxsw_sp_neigh_entry_free(neigh_entry);
2252 static struct mlxsw_sp_neigh_entry *
2253 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2255 struct mlxsw_sp_neigh_key key;
2258 return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2259 &key, mlxsw_sp_neigh_ht_params);
2263 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2265 unsigned long interval;
2267 #if IS_ENABLED(CONFIG_IPV6)
2268 interval = min_t(unsigned long,
2269 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2270 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2272 interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2274 mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2277 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2281 struct net_device *dev;
2282 struct neighbour *n;
2287 mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2289 if (!mlxsw_sp->router->rifs[rif]) {
2290 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2295 dev = mlxsw_sp->router->rifs[rif]->dev;
2296 n = neigh_lookup(&arp_tbl, &dipn, dev);
2300 netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2301 neigh_event_send(n, NULL);
2305 #if IS_ENABLED(CONFIG_IPV6)
2306 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2310 struct net_device *dev;
2311 struct neighbour *n;
2312 struct in6_addr dip;
2315 mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2318 if (!mlxsw_sp->router->rifs[rif]) {
2319 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2323 dev = mlxsw_sp->router->rifs[rif]->dev;
2324 n = neigh_lookup(&nd_tbl, &dip, dev);
2328 netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2329 neigh_event_send(n, NULL);
2333 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2340 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2347 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2349 /* Hardware starts counting at 0, so add 1. */
2352 /* Each record consists of several neighbour entries. */
2353 for (i = 0; i < num_entries; i++) {
2356 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2357 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2363 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2367 /* One record contains one entry. */
2368 mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2372 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2373 char *rauhtd_pl, int rec_index)
2375 switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2376 case MLXSW_REG_RAUHTD_TYPE_IPV4:
2377 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2380 case MLXSW_REG_RAUHTD_TYPE_IPV6:
2381 mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2387 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2389 u8 num_rec, last_rec_index, num_entries;
2391 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2392 last_rec_index = num_rec - 1;
2394 if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2396 if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2397 MLXSW_REG_RAUHTD_TYPE_IPV6)
2400 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2402 if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2408 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2410 enum mlxsw_reg_rauhtd_type type)
2415 /* Ensure the RIF we read from the device does not change mid-dump. */
2416 mutex_lock(&mlxsw_sp->router->lock);
2418 mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2419 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2422 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2425 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2426 for (i = 0; i < num_rec; i++)
2427 mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2429 } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2430 mutex_unlock(&mlxsw_sp->router->lock);
2435 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2437 enum mlxsw_reg_rauhtd_type type;
2441 rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2445 type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2446 err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2450 type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2451 err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2457 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2459 struct mlxsw_sp_neigh_entry *neigh_entry;
2461 mutex_lock(&mlxsw_sp->router->lock);
2462 list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2463 nexthop_neighs_list_node)
2464 /* If this neigh have nexthops, make the kernel think this neigh
2465 * is active regardless of the traffic.
2467 neigh_event_send(neigh_entry->key.n, NULL);
2468 mutex_unlock(&mlxsw_sp->router->lock);
2472 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2474 unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2476 mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2477 msecs_to_jiffies(interval));
2480 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2482 struct mlxsw_sp_router *router;
2485 router = container_of(work, struct mlxsw_sp_router,
2486 neighs_update.dw.work);
2487 err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2489 dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2491 mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2493 mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2496 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2498 struct mlxsw_sp_neigh_entry *neigh_entry;
2499 struct mlxsw_sp_router *router;
2501 router = container_of(work, struct mlxsw_sp_router,
2502 nexthop_probe_dw.work);
2503 /* Iterate over nexthop neighbours, find those who are unresolved and
2504 * send arp on them. This solves the chicken-egg problem when
2505 * the nexthop wouldn't get offloaded until the neighbor is resolved
2506 * but it wouldn't get resolved ever in case traffic is flowing in HW
2507 * using different nexthop.
2509 mutex_lock(&router->lock);
2510 list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2511 nexthop_neighs_list_node)
2512 if (!neigh_entry->connected)
2513 neigh_event_send(neigh_entry->key.n, NULL);
2514 mutex_unlock(&router->lock);
2516 mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2517 MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2521 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2522 struct mlxsw_sp_neigh_entry *neigh_entry,
2523 bool removing, bool dead);
2525 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2527 return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2528 MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2532 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2533 struct mlxsw_sp_neigh_entry *neigh_entry,
2534 enum mlxsw_reg_rauht_op op)
2536 struct neighbour *n = neigh_entry->key.n;
2537 u32 dip = ntohl(*((__be32 *) n->primary_key));
2538 char rauht_pl[MLXSW_REG_RAUHT_LEN];
2540 mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2542 if (neigh_entry->counter_valid)
2543 mlxsw_reg_rauht_pack_counter(rauht_pl,
2544 neigh_entry->counter_index);
2545 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2549 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2550 struct mlxsw_sp_neigh_entry *neigh_entry,
2551 enum mlxsw_reg_rauht_op op)
2553 struct neighbour *n = neigh_entry->key.n;
2554 char rauht_pl[MLXSW_REG_RAUHT_LEN];
2555 const char *dip = n->primary_key;
2557 mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2559 if (neigh_entry->counter_valid)
2560 mlxsw_reg_rauht_pack_counter(rauht_pl,
2561 neigh_entry->counter_index);
2562 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2565 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2567 struct neighbour *n = neigh_entry->key.n;
2569 /* Packets with a link-local destination address are trapped
2570 * after LPM lookup and never reach the neighbour table, so
2571 * there is no need to program such neighbours to the device.
2573 if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2574 IPV6_ADDR_LINKLOCAL)
2580 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2581 struct mlxsw_sp_neigh_entry *neigh_entry,
2584 enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding);
2587 if (!adding && !neigh_entry->connected)
2589 neigh_entry->connected = adding;
2590 if (neigh_entry->key.n->tbl->family == AF_INET) {
2591 err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2595 } else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2596 if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2598 err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2608 neigh_entry->key.n->flags |= NTF_OFFLOADED;
2610 neigh_entry->key.n->flags &= ~NTF_OFFLOADED;
2614 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2615 struct mlxsw_sp_neigh_entry *neigh_entry,
2619 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2621 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2622 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2625 struct mlxsw_sp_netevent_work {
2626 struct work_struct work;
2627 struct mlxsw_sp *mlxsw_sp;
2628 struct neighbour *n;
2631 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2633 struct mlxsw_sp_netevent_work *net_work =
2634 container_of(work, struct mlxsw_sp_netevent_work, work);
2635 struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2636 struct mlxsw_sp_neigh_entry *neigh_entry;
2637 struct neighbour *n = net_work->n;
2638 unsigned char ha[ETH_ALEN];
2639 bool entry_connected;
2642 /* If these parameters are changed after we release the lock,
2643 * then we are guaranteed to receive another event letting us
2646 read_lock_bh(&n->lock);
2647 memcpy(ha, n->ha, ETH_ALEN);
2648 nud_state = n->nud_state;
2650 read_unlock_bh(&n->lock);
2652 mutex_lock(&mlxsw_sp->router->lock);
2653 mlxsw_sp_span_respin(mlxsw_sp);
2655 entry_connected = nud_state & NUD_VALID && !dead;
2656 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2657 if (!entry_connected && !neigh_entry)
2660 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2661 if (IS_ERR(neigh_entry))
2665 memcpy(neigh_entry->ha, ha, ETH_ALEN);
2666 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2667 mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
2670 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2671 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2674 mutex_unlock(&mlxsw_sp->router->lock);
2679 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2681 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2683 struct mlxsw_sp_netevent_work *net_work =
2684 container_of(work, struct mlxsw_sp_netevent_work, work);
2685 struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2687 mlxsw_sp_mp_hash_init(mlxsw_sp);
2691 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2693 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2695 struct mlxsw_sp_netevent_work *net_work =
2696 container_of(work, struct mlxsw_sp_netevent_work, work);
2697 struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2699 __mlxsw_sp_router_init(mlxsw_sp);
2703 static int mlxsw_sp_router_schedule_work(struct net *net,
2704 struct notifier_block *nb,
2705 void (*cb)(struct work_struct *))
2707 struct mlxsw_sp_netevent_work *net_work;
2708 struct mlxsw_sp_router *router;
2710 router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2711 if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp)))
2714 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2718 INIT_WORK(&net_work->work, cb);
2719 net_work->mlxsw_sp = router->mlxsw_sp;
2720 mlxsw_core_schedule_work(&net_work->work);
2724 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2725 unsigned long event, void *ptr)
2727 struct mlxsw_sp_netevent_work *net_work;
2728 struct mlxsw_sp_port *mlxsw_sp_port;
2729 struct mlxsw_sp *mlxsw_sp;
2730 unsigned long interval;
2731 struct neigh_parms *p;
2732 struct neighbour *n;
2735 case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2738 /* We don't care about changes in the default table. */
2739 if (!p->dev || (p->tbl->family != AF_INET &&
2740 p->tbl->family != AF_INET6))
2743 /* We are in atomic context and can't take RTNL mutex,
2744 * so use RCU variant to walk the device chain.
2746 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2750 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2751 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2752 mlxsw_sp->router->neighs_update.interval = interval;
2754 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2756 case NETEVENT_NEIGH_UPDATE:
2759 if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2762 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2766 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2768 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2772 INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2773 net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2776 /* Take a reference to ensure the neighbour won't be
2777 * destructed until we drop the reference in delayed
2781 mlxsw_core_schedule_work(&net_work->work);
2782 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2784 case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2785 case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2786 return mlxsw_sp_router_schedule_work(ptr, nb,
2787 mlxsw_sp_router_mp_hash_event_work);
2789 case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2790 return mlxsw_sp_router_schedule_work(ptr, nb,
2791 mlxsw_sp_router_update_priority_work);
2797 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2801 err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2802 &mlxsw_sp_neigh_ht_params);
2806 /* Initialize the polling interval according to the default
2809 mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2811 /* Create the delayed works for the activity_update */
2812 INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2813 mlxsw_sp_router_neighs_update_work);
2814 INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2815 mlxsw_sp_router_probe_unresolved_nexthops);
2816 mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2817 mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2821 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2823 cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2824 cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2825 rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2828 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2829 struct mlxsw_sp_rif *rif)
2831 struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2833 list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2835 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2836 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2840 enum mlxsw_sp_nexthop_type {
2841 MLXSW_SP_NEXTHOP_TYPE_ETH,
2842 MLXSW_SP_NEXTHOP_TYPE_IPIP,
2845 struct mlxsw_sp_nexthop_key {
2846 struct fib_nh *fib_nh;
2849 struct mlxsw_sp_nexthop {
2850 struct list_head neigh_list_node; /* member of neigh entry list */
2851 struct list_head rif_list_node;
2852 struct list_head router_list_node;
2853 struct mlxsw_sp_nexthop_group_info *nhgi; /* pointer back to the group
2854 * this nexthop belongs to
2856 struct rhash_head ht_node;
2857 struct neigh_table *neigh_tbl;
2858 struct mlxsw_sp_nexthop_key key;
2859 unsigned char gw_addr[sizeof(struct in6_addr)];
2863 int num_adj_entries;
2864 struct mlxsw_sp_rif *rif;
2865 u8 should_offload:1, /* set indicates this neigh is connected and
2866 * should be put to KVD linear area of this group.
2868 offloaded:1, /* set in case the neigh is actually put into
2869 * KVD linear area of this group.
2871 update:1, /* set indicates that MAC of this neigh should be
2874 discard:1; /* nexthop is programmed to discard packets */
2875 enum mlxsw_sp_nexthop_type type;
2877 struct mlxsw_sp_neigh_entry *neigh_entry;
2878 struct mlxsw_sp_ipip_entry *ipip_entry;
2880 unsigned int counter_index;
2884 enum mlxsw_sp_nexthop_group_type {
2885 MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4,
2886 MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6,
2887 MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ,
2890 struct mlxsw_sp_nexthop_group_info {
2891 struct mlxsw_sp_nexthop_group *nh_grp;
2895 int sum_norm_weight;
2896 u8 adj_index_valid:1,
2897 gateway:1; /* routes using the group use a gateway */
2898 struct mlxsw_sp_nexthop nexthops[0];
2899 #define nh_rif nexthops[0].rif
2902 struct mlxsw_sp_nexthop_group_vr_key {
2904 enum mlxsw_sp_l3proto proto;
2907 struct mlxsw_sp_nexthop_group_vr_entry {
2908 struct list_head list; /* member in vr_list */
2909 struct rhash_head ht_node; /* member in vr_ht */
2910 refcount_t ref_count;
2911 struct mlxsw_sp_nexthop_group_vr_key key;
2914 struct mlxsw_sp_nexthop_group {
2915 struct rhash_head ht_node;
2916 struct list_head fib_list; /* list of fib entries that use this group */
2919 struct fib_info *fi;
2925 struct mlxsw_sp_nexthop_group_info *nhgi;
2926 struct list_head vr_list;
2927 struct rhashtable vr_ht;
2928 enum mlxsw_sp_nexthop_group_type type;
2932 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2933 struct mlxsw_sp_nexthop *nh)
2935 struct devlink *devlink;
2937 devlink = priv_to_devlink(mlxsw_sp->core);
2938 if (!devlink_dpipe_table_counter_enabled(devlink,
2939 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2942 if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2945 nh->counter_valid = true;
2948 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2949 struct mlxsw_sp_nexthop *nh)
2951 if (!nh->counter_valid)
2953 mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2954 nh->counter_valid = false;
2957 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2958 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2960 if (!nh->counter_valid)
2963 return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2967 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2968 struct mlxsw_sp_nexthop *nh)
2971 if (list_empty(&router->nexthop_list))
2974 return list_first_entry(&router->nexthop_list,
2975 typeof(*nh), router_list_node);
2977 if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2979 return list_next_entry(nh, router_list_node);
2982 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2984 return nh->offloaded;
2987 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2991 return nh->neigh_entry->ha;
2994 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2995 u32 *p_adj_size, u32 *p_adj_hash_index)
2997 struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi;
2998 u32 adj_hash_index = 0;
3001 if (!nh->offloaded || !nhgi->adj_index_valid)
3004 *p_adj_index = nhgi->adj_index;
3005 *p_adj_size = nhgi->ecmp_size;
3007 for (i = 0; i < nhgi->count; i++) {
3008 struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i];
3012 if (nh_iter->offloaded)
3013 adj_hash_index += nh_iter->num_adj_entries;
3016 *p_adj_hash_index = adj_hash_index;
3020 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
3025 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
3027 struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi;
3030 for (i = 0; i < nhgi->count; i++) {
3031 struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i];
3033 if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
3039 bool mlxsw_sp_nexthop_is_discard(const struct mlxsw_sp_nexthop *nh)
3044 static const struct rhashtable_params mlxsw_sp_nexthop_group_vr_ht_params = {
3045 .key_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, key),
3046 .head_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, ht_node),
3047 .key_len = sizeof(struct mlxsw_sp_nexthop_group_vr_key),
3048 .automatic_shrinking = true,
3051 static struct mlxsw_sp_nexthop_group_vr_entry *
3052 mlxsw_sp_nexthop_group_vr_entry_lookup(struct mlxsw_sp_nexthop_group *nh_grp,
3053 const struct mlxsw_sp_fib *fib)
3055 struct mlxsw_sp_nexthop_group_vr_key key;
3057 memset(&key, 0, sizeof(key));
3058 key.vr_id = fib->vr->id;
3059 key.proto = fib->proto;
3060 return rhashtable_lookup_fast(&nh_grp->vr_ht, &key,
3061 mlxsw_sp_nexthop_group_vr_ht_params);
3065 mlxsw_sp_nexthop_group_vr_entry_create(struct mlxsw_sp_nexthop_group *nh_grp,
3066 const struct mlxsw_sp_fib *fib)
3068 struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3071 vr_entry = kzalloc(sizeof(*vr_entry), GFP_KERNEL);
3075 vr_entry->key.vr_id = fib->vr->id;
3076 vr_entry->key.proto = fib->proto;
3077 refcount_set(&vr_entry->ref_count, 1);
3079 err = rhashtable_insert_fast(&nh_grp->vr_ht, &vr_entry->ht_node,
3080 mlxsw_sp_nexthop_group_vr_ht_params);
3082 goto err_hashtable_insert;
3084 list_add(&vr_entry->list, &nh_grp->vr_list);
3088 err_hashtable_insert:
3094 mlxsw_sp_nexthop_group_vr_entry_destroy(struct mlxsw_sp_nexthop_group *nh_grp,
3095 struct mlxsw_sp_nexthop_group_vr_entry *vr_entry)
3097 list_del(&vr_entry->list);
3098 rhashtable_remove_fast(&nh_grp->vr_ht, &vr_entry->ht_node,
3099 mlxsw_sp_nexthop_group_vr_ht_params);
3104 mlxsw_sp_nexthop_group_vr_link(struct mlxsw_sp_nexthop_group *nh_grp,
3105 const struct mlxsw_sp_fib *fib)
3107 struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3109 vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib);
3111 refcount_inc(&vr_entry->ref_count);
3115 return mlxsw_sp_nexthop_group_vr_entry_create(nh_grp, fib);
3119 mlxsw_sp_nexthop_group_vr_unlink(struct mlxsw_sp_nexthop_group *nh_grp,
3120 const struct mlxsw_sp_fib *fib)
3122 struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3124 vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib);
3125 if (WARN_ON_ONCE(!vr_entry))
3128 if (!refcount_dec_and_test(&vr_entry->ref_count))
3131 mlxsw_sp_nexthop_group_vr_entry_destroy(nh_grp, vr_entry);
3134 struct mlxsw_sp_nexthop_group_cmp_arg {
3135 enum mlxsw_sp_nexthop_group_type type;
3137 struct fib_info *fi;
3138 struct mlxsw_sp_fib6_entry *fib6_entry;
3144 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
3145 const struct in6_addr *gw, int ifindex,
3150 for (i = 0; i < nh_grp->nhgi->count; i++) {
3151 const struct mlxsw_sp_nexthop *nh;
3153 nh = &nh_grp->nhgi->nexthops[i];
3154 if (nh->ifindex == ifindex && nh->nh_weight == weight &&
3155 ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
3163 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
3164 const struct mlxsw_sp_fib6_entry *fib6_entry)
3166 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3168 if (nh_grp->nhgi->count != fib6_entry->nrt6)
3171 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3172 struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3173 struct in6_addr *gw;
3174 int ifindex, weight;
3176 ifindex = fib6_nh->fib_nh_dev->ifindex;
3177 weight = fib6_nh->fib_nh_weight;
3178 gw = &fib6_nh->fib_nh_gw6;
3179 if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
3188 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
3190 const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
3191 const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
3193 if (nh_grp->type != cmp_arg->type)
3196 switch (cmp_arg->type) {
3197 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3198 return cmp_arg->fi != nh_grp->ipv4.fi;
3199 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3200 return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
3201 cmp_arg->fib6_entry);
3202 case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3203 return cmp_arg->id != nh_grp->obj.id;
3210 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
3212 const struct mlxsw_sp_nexthop_group *nh_grp = data;
3213 const struct mlxsw_sp_nexthop *nh;
3214 struct fib_info *fi;
3218 switch (nh_grp->type) {
3219 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3220 fi = nh_grp->ipv4.fi;
3221 return jhash(&fi, sizeof(fi), seed);
3222 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3223 val = nh_grp->nhgi->count;
3224 for (i = 0; i < nh_grp->nhgi->count; i++) {
3225 nh = &nh_grp->nhgi->nexthops[i];
3226 val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed);
3227 val ^= jhash(&nh->gw_addr, sizeof(nh->gw_addr), seed);
3229 return jhash(&val, sizeof(val), seed);
3230 case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3231 return jhash(&nh_grp->obj.id, sizeof(nh_grp->obj.id), seed);
3239 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
3241 unsigned int val = fib6_entry->nrt6;
3242 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3244 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3245 struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3246 struct net_device *dev = fib6_nh->fib_nh_dev;
3247 struct in6_addr *gw = &fib6_nh->fib_nh_gw6;
3249 val ^= jhash(&dev->ifindex, sizeof(dev->ifindex), seed);
3250 val ^= jhash(gw, sizeof(*gw), seed);
3253 return jhash(&val, sizeof(val), seed);
3257 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
3259 const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
3261 switch (cmp_arg->type) {
3262 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3263 return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
3264 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3265 return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
3266 case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3267 return jhash(&cmp_arg->id, sizeof(cmp_arg->id), seed);
3274 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
3275 .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
3276 .hashfn = mlxsw_sp_nexthop_group_hash,
3277 .obj_hashfn = mlxsw_sp_nexthop_group_hash_obj,
3278 .obj_cmpfn = mlxsw_sp_nexthop_group_cmp,
3281 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
3282 struct mlxsw_sp_nexthop_group *nh_grp)
3284 if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 &&
3285 !nh_grp->nhgi->gateway)
3288 return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
3290 mlxsw_sp_nexthop_group_ht_params);
3293 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
3294 struct mlxsw_sp_nexthop_group *nh_grp)
3296 if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 &&
3297 !nh_grp->nhgi->gateway)
3300 rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
3302 mlxsw_sp_nexthop_group_ht_params);
3305 static struct mlxsw_sp_nexthop_group *
3306 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
3307 struct fib_info *fi)
3309 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3311 cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4;
3313 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3315 mlxsw_sp_nexthop_group_ht_params);
3318 static struct mlxsw_sp_nexthop_group *
3319 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3320 struct mlxsw_sp_fib6_entry *fib6_entry)
3322 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3324 cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6;
3325 cmp_arg.fib6_entry = fib6_entry;
3326 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3328 mlxsw_sp_nexthop_group_ht_params);
3331 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3332 .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3333 .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3334 .key_len = sizeof(struct mlxsw_sp_nexthop_key),
3337 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3338 struct mlxsw_sp_nexthop *nh)
3340 return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3341 &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3344 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3345 struct mlxsw_sp_nexthop *nh)
3347 rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3348 mlxsw_sp_nexthop_ht_params);
3351 static struct mlxsw_sp_nexthop *
3352 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3353 struct mlxsw_sp_nexthop_key key)
3355 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3356 mlxsw_sp_nexthop_ht_params);
3359 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3360 enum mlxsw_sp_l3proto proto,
3362 u32 adj_index, u16 ecmp_size,
3366 char raleu_pl[MLXSW_REG_RALEU_LEN];
3368 mlxsw_reg_raleu_pack(raleu_pl,
3369 (enum mlxsw_reg_ralxx_protocol) proto, vr_id,
3370 adj_index, ecmp_size, new_adj_index,
3372 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3375 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3376 struct mlxsw_sp_nexthop_group *nh_grp,
3377 u32 old_adj_index, u16 old_ecmp_size)
3379 struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
3380 struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3383 list_for_each_entry(vr_entry, &nh_grp->vr_list, list) {
3384 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp,
3385 vr_entry->key.proto,
3386 vr_entry->key.vr_id,
3392 goto err_mass_update_vr;
3397 list_for_each_entry_continue_reverse(vr_entry, &nh_grp->vr_list, list)
3398 mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr_entry->key.proto,
3399 vr_entry->key.vr_id,
3402 old_adj_index, old_ecmp_size);
3406 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3407 struct mlxsw_sp_nexthop *nh)
3409 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3410 char ratr_pl[MLXSW_REG_RATR_LEN];
3412 mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
3413 true, MLXSW_REG_RATR_TYPE_ETHERNET,
3414 adj_index, nh->rif->rif_index);
3416 mlxsw_reg_ratr_trap_action_set(ratr_pl,
3417 MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS);
3419 mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3420 if (nh->counter_valid)
3421 mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3423 mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3425 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3428 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3429 struct mlxsw_sp_nexthop *nh)
3433 for (i = 0; i < nh->num_adj_entries; i++) {
3436 err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
3444 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3446 struct mlxsw_sp_nexthop *nh)
3448 const struct mlxsw_sp_ipip_ops *ipip_ops;
3450 ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3451 return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
3454 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3456 struct mlxsw_sp_nexthop *nh)
3460 for (i = 0; i < nh->num_adj_entries; i++) {
3463 err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3473 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3474 struct mlxsw_sp_nexthop_group_info *nhgi,
3477 u32 adj_index = nhgi->adj_index; /* base */
3478 struct mlxsw_sp_nexthop *nh;
3481 for (i = 0; i < nhgi->count; i++) {
3482 nh = &nhgi->nexthops[i];
3484 if (!nh->should_offload) {
3489 if (nh->update || reallocate) {
3493 case MLXSW_SP_NEXTHOP_TYPE_ETH:
3494 err = mlxsw_sp_nexthop_update
3495 (mlxsw_sp, adj_index, nh);
3497 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3498 err = mlxsw_sp_nexthop_ipip_update
3499 (mlxsw_sp, adj_index, nh);
3507 adj_index += nh->num_adj_entries;
3513 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3514 struct mlxsw_sp_nexthop_group *nh_grp)
3516 struct mlxsw_sp_fib_entry *fib_entry;
3519 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3520 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3527 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3529 /* Valid sizes for an adjacency group are:
3530 * 1-64, 512, 1024, 2048 and 4096.
3532 if (*p_adj_grp_size <= 64)
3534 else if (*p_adj_grp_size <= 512)
3535 *p_adj_grp_size = 512;
3536 else if (*p_adj_grp_size <= 1024)
3537 *p_adj_grp_size = 1024;
3538 else if (*p_adj_grp_size <= 2048)
3539 *p_adj_grp_size = 2048;
3541 *p_adj_grp_size = 4096;
3544 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3545 unsigned int alloc_size)
3547 if (alloc_size >= 4096)
3548 *p_adj_grp_size = 4096;
3549 else if (alloc_size >= 2048)
3550 *p_adj_grp_size = 2048;
3551 else if (alloc_size >= 1024)
3552 *p_adj_grp_size = 1024;
3553 else if (alloc_size >= 512)
3554 *p_adj_grp_size = 512;
3557 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3558 u16 *p_adj_grp_size)
3560 unsigned int alloc_size;
3563 /* Round up the requested group size to the next size supported
3564 * by the device and make sure the request can be satisfied.
3566 mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3567 err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3568 MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3569 *p_adj_grp_size, &alloc_size);
3572 /* It is possible the allocation results in more allocated
3573 * entries than requested. Try to use as much of them as
3576 mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3582 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group_info *nhgi)
3584 int i, g = 0, sum_norm_weight = 0;
3585 struct mlxsw_sp_nexthop *nh;
3587 for (i = 0; i < nhgi->count; i++) {
3588 nh = &nhgi->nexthops[i];
3590 if (!nh->should_offload)
3593 g = gcd(nh->nh_weight, g);
3598 for (i = 0; i < nhgi->count; i++) {
3599 nh = &nhgi->nexthops[i];
3601 if (!nh->should_offload)
3603 nh->norm_nh_weight = nh->nh_weight / g;
3604 sum_norm_weight += nh->norm_nh_weight;
3607 nhgi->sum_norm_weight = sum_norm_weight;
3611 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group_info *nhgi)
3613 int i, weight = 0, lower_bound = 0;
3614 int total = nhgi->sum_norm_weight;
3615 u16 ecmp_size = nhgi->ecmp_size;
3617 for (i = 0; i < nhgi->count; i++) {
3618 struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
3621 if (!nh->should_offload)
3623 weight += nh->norm_nh_weight;
3624 upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3625 nh->num_adj_entries = upper_bound - lower_bound;
3626 lower_bound = upper_bound;
3630 static struct mlxsw_sp_nexthop *
3631 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3632 const struct mlxsw_sp_rt6 *mlxsw_sp_rt6);
3635 mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3636 struct mlxsw_sp_nexthop_group *nh_grp)
3640 for (i = 0; i < nh_grp->nhgi->count; i++) {
3641 struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
3644 nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3646 nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3651 __mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp_nexthop_group *nh_grp,
3652 struct mlxsw_sp_fib6_entry *fib6_entry)
3654 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3656 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3657 struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3658 struct mlxsw_sp_nexthop *nh;
3660 nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3661 if (nh && nh->offloaded)
3662 fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3664 fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3669 mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3670 struct mlxsw_sp_nexthop_group *nh_grp)
3672 struct mlxsw_sp_fib6_entry *fib6_entry;
3674 /* Unfortunately, in IPv6 the route and the nexthop are described by
3675 * the same struct, so we need to iterate over all the routes using the
3676 * nexthop group and set / clear the offload indication for them.
3678 list_for_each_entry(fib6_entry, &nh_grp->fib_list,
3679 common.nexthop_group_node)
3680 __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
3684 mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3685 struct mlxsw_sp_nexthop_group *nh_grp)
3687 /* Do not update the flags if the nexthop group is being destroyed
3689 * 1. The nexthop objects is being deleted, in which case the flags are
3691 * 2. The nexthop group was replaced by a newer group, in which case
3692 * the flags of the nexthop object were already updated based on the
3695 if (nh_grp->can_destroy)
3698 nexthop_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
3699 nh_grp->nhgi->adj_index_valid, false);
3703 mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3704 struct mlxsw_sp_nexthop_group *nh_grp)
3706 switch (nh_grp->type) {
3707 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3708 mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp);
3710 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3711 mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp);
3713 case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3714 mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, nh_grp);
3720 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3721 struct mlxsw_sp_nexthop_group *nh_grp)
3723 struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
3724 u16 ecmp_size, old_ecmp_size;
3725 struct mlxsw_sp_nexthop *nh;
3726 bool offload_change = false;
3728 bool old_adj_index_valid;
3729 int i, err2, err = 0;
3733 return mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3735 for (i = 0; i < nhgi->count; i++) {
3736 nh = &nhgi->nexthops[i];
3738 if (nh->should_offload != nh->offloaded) {
3739 offload_change = true;
3740 if (nh->should_offload)
3744 if (!offload_change) {
3745 /* Nothing was added or removed, so no need to reallocate. Just
3746 * update MAC on existing adjacency indexes.
3748 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, false);
3750 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3755 mlxsw_sp_nexthop_group_normalize(nhgi);
3756 if (!nhgi->sum_norm_weight)
3757 /* No neigh of this group is connected so we just set
3758 * the trap and let everthing flow through kernel.
3762 ecmp_size = nhgi->sum_norm_weight;
3763 err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3765 /* No valid allocation size available. */
3768 err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3769 ecmp_size, &adj_index);
3771 /* We ran out of KVD linear space, just set the
3772 * trap and let everything flow through kernel.
3774 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3777 old_adj_index_valid = nhgi->adj_index_valid;
3778 old_adj_index = nhgi->adj_index;
3779 old_ecmp_size = nhgi->ecmp_size;
3780 nhgi->adj_index_valid = 1;
3781 nhgi->adj_index = adj_index;
3782 nhgi->ecmp_size = ecmp_size;
3783 mlxsw_sp_nexthop_group_rebalance(nhgi);
3784 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, true);
3786 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3790 mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
3792 if (!old_adj_index_valid) {
3793 /* The trap was set for fib entries, so we have to call
3794 * fib entry update to unset it and use adjacency index.
3796 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3798 dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3804 err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3805 old_adj_index, old_ecmp_size);
3806 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3807 old_ecmp_size, old_adj_index);
3809 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3816 old_adj_index_valid = nhgi->adj_index_valid;
3817 nhgi->adj_index_valid = 0;
3818 for (i = 0; i < nhgi->count; i++) {
3819 nh = &nhgi->nexthops[i];
3822 err2 = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3824 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3825 mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
3826 if (old_adj_index_valid)
3827 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3828 nhgi->ecmp_size, nhgi->adj_index);
3832 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3836 nh->should_offload = 1;
3838 nh->should_offload = 0;
3843 mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
3844 struct mlxsw_sp_neigh_entry *neigh_entry)
3846 struct neighbour *n, *old_n = neigh_entry->key.n;
3847 struct mlxsw_sp_nexthop *nh;
3848 bool entry_connected;
3852 nh = list_first_entry(&neigh_entry->nexthop_list,
3853 struct mlxsw_sp_nexthop, neigh_list_node);
3855 n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3857 n = neigh_create(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3860 neigh_event_send(n, NULL);
3863 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
3864 neigh_entry->key.n = n;
3865 err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
3867 goto err_neigh_entry_insert;
3869 read_lock_bh(&n->lock);
3870 nud_state = n->nud_state;
3872 read_unlock_bh(&n->lock);
3873 entry_connected = nud_state & NUD_VALID && !dead;
3875 list_for_each_entry(nh, &neigh_entry->nexthop_list,
3877 neigh_release(old_n);
3879 __mlxsw_sp_nexthop_neigh_update(nh, !entry_connected);
3880 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
3887 err_neigh_entry_insert:
3888 neigh_entry->key.n = old_n;
3889 mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
3895 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3896 struct mlxsw_sp_neigh_entry *neigh_entry,
3897 bool removing, bool dead)
3899 struct mlxsw_sp_nexthop *nh;
3901 if (list_empty(&neigh_entry->nexthop_list))
3907 err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp,
3910 dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n");
3914 list_for_each_entry(nh, &neigh_entry->nexthop_list,
3916 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3917 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
3921 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3922 struct mlxsw_sp_rif *rif)
3928 list_add(&nh->rif_list_node, &rif->nexthop_list);
3931 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3936 list_del(&nh->rif_list_node);
3940 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3941 struct mlxsw_sp_nexthop *nh)
3943 struct mlxsw_sp_neigh_entry *neigh_entry;
3944 struct neighbour *n;
3948 if (!nh->nhgi->gateway || nh->neigh_entry)
3951 /* Take a reference of neigh here ensuring that neigh would
3952 * not be destructed before the nexthop entry is finished.
3953 * The reference is taken either in neigh_lookup() or
3954 * in neigh_create() in case n is not found.
3956 n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3958 n = neigh_create(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3961 neigh_event_send(n, NULL);
3963 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3965 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3966 if (IS_ERR(neigh_entry)) {
3968 goto err_neigh_entry_create;
3972 /* If that is the first nexthop connected to that neigh, add to
3973 * nexthop_neighs_list
3975 if (list_empty(&neigh_entry->nexthop_list))
3976 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3977 &mlxsw_sp->router->nexthop_neighs_list);
3979 nh->neigh_entry = neigh_entry;
3980 list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3981 read_lock_bh(&n->lock);
3982 nud_state = n->nud_state;
3984 read_unlock_bh(&n->lock);
3985 __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3989 err_neigh_entry_create:
3994 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3995 struct mlxsw_sp_nexthop *nh)
3997 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3998 struct neighbour *n;
4002 n = neigh_entry->key.n;
4004 __mlxsw_sp_nexthop_neigh_update(nh, true);
4005 list_del(&nh->neigh_list_node);
4006 nh->neigh_entry = NULL;
4008 /* If that is the last nexthop connected to that neigh, remove from
4009 * nexthop_neighs_list
4011 if (list_empty(&neigh_entry->nexthop_list))
4012 list_del(&neigh_entry->nexthop_neighs_list_node);
4014 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
4015 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
4020 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
4022 struct net_device *ul_dev;
4026 ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
4027 is_up = ul_dev ? (ul_dev->flags & IFF_UP) : true;
4033 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
4034 struct mlxsw_sp_nexthop *nh,
4035 struct mlxsw_sp_ipip_entry *ipip_entry)
4039 if (!nh->nhgi->gateway || nh->ipip_entry)
4042 nh->ipip_entry = ipip_entry;
4043 removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
4044 __mlxsw_sp_nexthop_neigh_update(nh, removing);
4045 mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
4048 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
4049 struct mlxsw_sp_nexthop *nh)
4051 struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
4056 __mlxsw_sp_nexthop_neigh_update(nh, true);
4057 nh->ipip_entry = NULL;
4060 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4061 const struct fib_nh *fib_nh,
4062 enum mlxsw_sp_ipip_type *p_ipipt)
4064 struct net_device *dev = fib_nh->fib_nh_dev;
4067 fib_nh->nh_parent->fib_type == RTN_UNICAST &&
4068 mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
4071 static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp,
4072 struct mlxsw_sp_nexthop *nh,
4073 const struct net_device *dev)
4075 const struct mlxsw_sp_ipip_ops *ipip_ops;
4076 struct mlxsw_sp_ipip_entry *ipip_entry;
4077 struct mlxsw_sp_rif *rif;
4080 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4082 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4083 if (ipip_ops->can_offload(mlxsw_sp, dev)) {
4084 nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4085 mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4090 nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4091 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4095 mlxsw_sp_nexthop_rif_init(nh, rif);
4096 err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4098 goto err_neigh_init;
4103 mlxsw_sp_nexthop_rif_fini(nh);
4107 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
4108 struct mlxsw_sp_nexthop *nh)
4111 case MLXSW_SP_NEXTHOP_TYPE_ETH:
4112 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
4113 mlxsw_sp_nexthop_rif_fini(nh);
4115 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4116 mlxsw_sp_nexthop_rif_fini(nh);
4117 mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
4122 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
4123 struct mlxsw_sp_nexthop_group *nh_grp,
4124 struct mlxsw_sp_nexthop *nh,
4125 struct fib_nh *fib_nh)
4127 struct net_device *dev = fib_nh->fib_nh_dev;
4128 struct in_device *in_dev;
4131 nh->nhgi = nh_grp->nhgi;
4132 nh->key.fib_nh = fib_nh;
4133 #ifdef CONFIG_IP_ROUTE_MULTIPATH
4134 nh->nh_weight = fib_nh->fib_nh_weight;
4138 memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4));
4139 nh->neigh_tbl = &arp_tbl;
4140 err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
4144 mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4145 list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4149 nh->ifindex = dev->ifindex;
4152 in_dev = __in_dev_get_rcu(dev);
4153 if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
4154 fib_nh->fib_nh_flags & RTNH_F_LINKDOWN) {
4160 err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
4162 goto err_nexthop_neigh_init;
4166 err_nexthop_neigh_init:
4167 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
4171 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
4172 struct mlxsw_sp_nexthop *nh)
4174 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4175 list_del(&nh->router_list_node);
4176 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4177 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
4180 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
4181 unsigned long event, struct fib_nh *fib_nh)
4183 struct mlxsw_sp_nexthop_key key;
4184 struct mlxsw_sp_nexthop *nh;
4186 if (mlxsw_sp->router->aborted)
4189 key.fib_nh = fib_nh;
4190 nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
4195 case FIB_EVENT_NH_ADD:
4196 mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, fib_nh->fib_nh_dev);
4198 case FIB_EVENT_NH_DEL:
4199 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4203 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4206 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
4207 struct mlxsw_sp_rif *rif)
4209 struct mlxsw_sp_nexthop *nh;
4212 list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
4214 case MLXSW_SP_NEXTHOP_TYPE_ETH:
4217 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4218 removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
4225 __mlxsw_sp_nexthop_neigh_update(nh, removing);
4226 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4230 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
4231 struct mlxsw_sp_rif *old_rif,
4232 struct mlxsw_sp_rif *new_rif)
4234 struct mlxsw_sp_nexthop *nh;
4236 list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
4237 list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
4239 mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
4242 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
4243 struct mlxsw_sp_rif *rif)
4245 struct mlxsw_sp_nexthop *nh, *tmp;
4247 list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
4248 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4249 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4254 mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp,
4255 const struct nh_notifier_single_info *nh,
4256 struct netlink_ext_ack *extack)
4261 NL_SET_ERR_MSG_MOD(extack, "FDB nexthops are not supported");
4262 else if (nh->has_encap)
4263 NL_SET_ERR_MSG_MOD(extack, "Encapsulating nexthops are not supported");
4271 mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp,
4272 const struct nh_notifier_grp_info *nh_grp,
4273 struct netlink_ext_ack *extack)
4277 if (nh_grp->is_fdb) {
4278 NL_SET_ERR_MSG_MOD(extack, "FDB nexthop groups are not supported");
4282 for (i = 0; i < nh_grp->num_nh; i++) {
4283 const struct nh_notifier_single_info *nh;
4286 nh = &nh_grp->nh_entries[i].nh;
4287 err = mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, nh,
4292 /* Device only nexthops with an IPIP device are programmed as
4293 * encapsulating adjacency entries.
4295 if (!nh->gw_family && !nh->is_reject &&
4296 !mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) {
4297 NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway");
4305 static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp,
4306 unsigned long event,
4307 struct nh_notifier_info *info)
4309 if (event != NEXTHOP_EVENT_REPLACE)
4313 return mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, info->nh,
4315 return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp, info->nh_grp,
4319 static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp,
4320 const struct nh_notifier_info *info)
4322 const struct net_device *dev;
4325 /* Already validated earlier. */
4328 dev = info->nh->dev;
4329 return info->nh->gw_family || info->nh->is_reject ||
4330 mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
4333 static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp,
4334 struct mlxsw_sp_nexthop *nh)
4336 u16 lb_rif_index = mlxsw_sp->router->lb_rif_index;
4339 nh->should_offload = 1;
4340 /* While nexthops that discard packets do not forward packets
4341 * via an egress RIF, they still need to be programmed using a
4342 * valid RIF, so use the loopback RIF created during init.
4344 nh->rif = mlxsw_sp->router->rifs[lb_rif_index];
4347 static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp,
4348 struct mlxsw_sp_nexthop *nh)
4351 nh->should_offload = 0;
4355 mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
4356 struct mlxsw_sp_nexthop_group *nh_grp,
4357 struct mlxsw_sp_nexthop *nh,
4358 struct nh_notifier_single_info *nh_obj, int weight)
4360 struct net_device *dev = nh_obj->dev;
4363 nh->nhgi = nh_grp->nhgi;
4364 nh->nh_weight = weight;
4366 switch (nh_obj->gw_family) {
4368 memcpy(&nh->gw_addr, &nh_obj->ipv4, sizeof(nh_obj->ipv4));
4369 nh->neigh_tbl = &arp_tbl;
4372 memcpy(&nh->gw_addr, &nh_obj->ipv6, sizeof(nh_obj->ipv6));
4373 #if IS_ENABLED(CONFIG_IPV6)
4374 nh->neigh_tbl = &nd_tbl;
4379 mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4380 list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4381 nh->ifindex = dev->ifindex;
4383 err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
4387 if (nh_obj->is_reject)
4388 mlxsw_sp_nexthop_obj_blackhole_init(mlxsw_sp, nh);
4393 list_del(&nh->router_list_node);
4394 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4398 static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp,
4399 struct mlxsw_sp_nexthop *nh)
4402 mlxsw_sp_nexthop_obj_blackhole_fini(mlxsw_sp, nh);
4403 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4404 list_del(&nh->router_list_node);
4405 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4409 mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
4410 struct mlxsw_sp_nexthop_group *nh_grp,
4411 struct nh_notifier_info *info)
4413 unsigned int nhs = info->is_grp ? info->nh_grp->num_nh : 1;
4414 struct mlxsw_sp_nexthop_group_info *nhgi;
4415 struct mlxsw_sp_nexthop *nh;
4418 nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL);
4421 nh_grp->nhgi = nhgi;
4422 nhgi->nh_grp = nh_grp;
4423 nhgi->gateway = mlxsw_sp_nexthop_obj_is_gateway(mlxsw_sp, info);
4425 for (i = 0; i < nhgi->count; i++) {
4426 struct nh_notifier_single_info *nh_obj;
4429 nh = &nhgi->nexthops[i];
4431 nh_obj = &info->nh_grp->nh_entries[i].nh;
4432 weight = info->nh_grp->nh_entries[i].weight;
4437 err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj,
4440 goto err_nexthop_obj_init;
4442 err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4444 NL_SET_ERR_MSG_MOD(info->extack, "Failed to write adjacency entries to the device");
4445 goto err_group_refresh;
4452 err_nexthop_obj_init:
4453 for (i--; i >= 0; i--) {
4454 nh = &nhgi->nexthops[i];
4455 mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
4462 mlxsw_sp_nexthop_obj_group_info_fini(struct mlxsw_sp *mlxsw_sp,
4463 struct mlxsw_sp_nexthop_group *nh_grp)
4465 struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
4468 for (i = nhgi->count - 1; i >= 0; i--) {
4469 struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
4471 mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
4473 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4474 WARN_ON_ONCE(nhgi->adj_index_valid);
4478 static struct mlxsw_sp_nexthop_group *
4479 mlxsw_sp_nexthop_obj_group_create(struct mlxsw_sp *mlxsw_sp,
4480 struct nh_notifier_info *info)
4482 struct mlxsw_sp_nexthop_group *nh_grp;
4485 nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
4487 return ERR_PTR(-ENOMEM);
4488 INIT_LIST_HEAD(&nh_grp->vr_list);
4489 err = rhashtable_init(&nh_grp->vr_ht,
4490 &mlxsw_sp_nexthop_group_vr_ht_params);
4492 goto err_nexthop_group_vr_ht_init;
4493 INIT_LIST_HEAD(&nh_grp->fib_list);
4494 nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
4495 nh_grp->obj.id = info->id;
4497 err = mlxsw_sp_nexthop_obj_group_info_init(mlxsw_sp, nh_grp, info);
4499 goto err_nexthop_group_info_init;
4501 nh_grp->can_destroy = false;
4505 err_nexthop_group_info_init:
4506 rhashtable_destroy(&nh_grp->vr_ht);
4507 err_nexthop_group_vr_ht_init:
4509 return ERR_PTR(err);
4513 mlxsw_sp_nexthop_obj_group_destroy(struct mlxsw_sp *mlxsw_sp,
4514 struct mlxsw_sp_nexthop_group *nh_grp)
4516 if (!nh_grp->can_destroy)
4518 mlxsw_sp_nexthop_obj_group_info_fini(mlxsw_sp, nh_grp);
4519 WARN_ON_ONCE(!list_empty(&nh_grp->fib_list));
4520 WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
4521 rhashtable_destroy(&nh_grp->vr_ht);
4525 static struct mlxsw_sp_nexthop_group *
4526 mlxsw_sp_nexthop_obj_group_lookup(struct mlxsw_sp *mlxsw_sp, u32 id)
4528 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
4530 cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
4532 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
4534 mlxsw_sp_nexthop_group_ht_params);
4537 static int mlxsw_sp_nexthop_obj_group_add(struct mlxsw_sp *mlxsw_sp,
4538 struct mlxsw_sp_nexthop_group *nh_grp)
4540 return mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4544 mlxsw_sp_nexthop_obj_group_replace(struct mlxsw_sp *mlxsw_sp,
4545 struct mlxsw_sp_nexthop_group *nh_grp,
4546 struct mlxsw_sp_nexthop_group *old_nh_grp,
4547 struct netlink_ext_ack *extack)
4549 struct mlxsw_sp_nexthop_group_info *old_nhgi = old_nh_grp->nhgi;
4550 struct mlxsw_sp_nexthop_group_info *new_nhgi = nh_grp->nhgi;
4553 old_nh_grp->nhgi = new_nhgi;
4554 new_nhgi->nh_grp = old_nh_grp;
4555 nh_grp->nhgi = old_nhgi;
4556 old_nhgi->nh_grp = nh_grp;
4558 if (old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
4559 /* Both the old adjacency index and the new one are valid.
4560 * Routes are currently using the old one. Tell the device to
4561 * replace the old adjacency index with the new one.
4563 err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, old_nh_grp,
4564 old_nhgi->adj_index,
4565 old_nhgi->ecmp_size);
4567 NL_SET_ERR_MSG_MOD(extack, "Failed to replace old adjacency index with new one");
4570 } else if (old_nhgi->adj_index_valid && !new_nhgi->adj_index_valid) {
4571 /* The old adjacency index is valid, while the new one is not.
4572 * Iterate over all the routes using the group and change them
4573 * to trap packets to the CPU.
4575 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
4577 NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to trap packets");
4580 } else if (!old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
4581 /* The old adjacency index is invalid, while the new one is.
4582 * Iterate over all the routes using the group and change them
4583 * to forward packets using the new valid index.
4585 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
4587 NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to forward packets");
4592 /* Make sure the flags are set / cleared based on the new nexthop group
4595 mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, old_nh_grp);
4597 /* At this point 'nh_grp' is just a shell that is not used by anyone
4598 * and its nexthop group info is the old info that was just replaced
4599 * with the new one. Remove it.
4601 nh_grp->can_destroy = true;
4602 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
4607 old_nhgi->nh_grp = old_nh_grp;
4608 nh_grp->nhgi = new_nhgi;
4609 new_nhgi->nh_grp = nh_grp;
4610 old_nh_grp->nhgi = old_nhgi;
4614 static int mlxsw_sp_nexthop_obj_new(struct mlxsw_sp *mlxsw_sp,
4615 struct nh_notifier_info *info)
4617 struct mlxsw_sp_nexthop_group *nh_grp, *old_nh_grp;
4618 struct netlink_ext_ack *extack = info->extack;
4621 nh_grp = mlxsw_sp_nexthop_obj_group_create(mlxsw_sp, info);
4623 return PTR_ERR(nh_grp);
4625 old_nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
4627 err = mlxsw_sp_nexthop_obj_group_add(mlxsw_sp, nh_grp);
4629 err = mlxsw_sp_nexthop_obj_group_replace(mlxsw_sp, nh_grp,
4630 old_nh_grp, extack);
4633 nh_grp->can_destroy = true;
4634 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
4640 static void mlxsw_sp_nexthop_obj_del(struct mlxsw_sp *mlxsw_sp,
4641 struct nh_notifier_info *info)
4643 struct mlxsw_sp_nexthop_group *nh_grp;
4645 nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
4649 nh_grp->can_destroy = true;
4650 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4652 /* If the group still has routes using it, then defer the delete
4653 * operation until the last route using it is deleted.
4655 if (!list_empty(&nh_grp->fib_list))
4657 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
4660 static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
4661 unsigned long event, void *ptr)
4663 struct nh_notifier_info *info = ptr;
4664 struct mlxsw_sp_router *router;
4667 router = container_of(nb, struct mlxsw_sp_router, nexthop_nb);
4668 err = mlxsw_sp_nexthop_obj_validate(router->mlxsw_sp, event, info);
4672 mutex_lock(&router->lock);
4677 case NEXTHOP_EVENT_REPLACE:
4678 err = mlxsw_sp_nexthop_obj_new(router->mlxsw_sp, info);
4680 case NEXTHOP_EVENT_DEL:
4681 mlxsw_sp_nexthop_obj_del(router->mlxsw_sp, info);
4687 mutex_unlock(&router->lock);
4690 return notifier_from_errno(err);
4693 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
4694 struct fib_info *fi)
4696 const struct fib_nh *nh = fib_info_nh(fi, 0);
4698 return nh->fib_nh_scope == RT_SCOPE_LINK ||
4699 mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
4703 mlxsw_sp_nexthop4_group_info_init(struct mlxsw_sp *mlxsw_sp,
4704 struct mlxsw_sp_nexthop_group *nh_grp)
4706 unsigned int nhs = fib_info_num_path(nh_grp->ipv4.fi);
4707 struct mlxsw_sp_nexthop_group_info *nhgi;
4708 struct mlxsw_sp_nexthop *nh;
4711 nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL);
4714 nh_grp->nhgi = nhgi;
4715 nhgi->nh_grp = nh_grp;
4716 nhgi->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, nh_grp->ipv4.fi);
4718 for (i = 0; i < nhgi->count; i++) {
4719 struct fib_nh *fib_nh;
4721 nh = &nhgi->nexthops[i];
4722 fib_nh = fib_info_nh(nh_grp->ipv4.fi, i);
4723 err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
4725 goto err_nexthop4_init;
4727 err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4729 goto err_group_refresh;
4736 for (i--; i >= 0; i--) {
4737 nh = &nhgi->nexthops[i];
4738 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
4745 mlxsw_sp_nexthop4_group_info_fini(struct mlxsw_sp *mlxsw_sp,
4746 struct mlxsw_sp_nexthop_group *nh_grp)
4748 struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
4751 for (i = nhgi->count - 1; i >= 0; i--) {
4752 struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
4754 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
4756 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4757 WARN_ON_ONCE(nhgi->adj_index_valid);
4761 static struct mlxsw_sp_nexthop_group *
4762 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
4764 struct mlxsw_sp_nexthop_group *nh_grp;
4767 nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
4769 return ERR_PTR(-ENOMEM);
4770 INIT_LIST_HEAD(&nh_grp->vr_list);
4771 err = rhashtable_init(&nh_grp->vr_ht,
4772 &mlxsw_sp_nexthop_group_vr_ht_params);
4774 goto err_nexthop_group_vr_ht_init;
4775 INIT_LIST_HEAD(&nh_grp->fib_list);
4776 nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4;
4777 nh_grp->ipv4.fi = fi;
4780 err = mlxsw_sp_nexthop4_group_info_init(mlxsw_sp, nh_grp);
4782 goto err_nexthop_group_info_init;
4784 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4786 goto err_nexthop_group_insert;
4788 nh_grp->can_destroy = true;
4792 err_nexthop_group_insert:
4793 mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp);
4794 err_nexthop_group_info_init:
4796 rhashtable_destroy(&nh_grp->vr_ht);
4797 err_nexthop_group_vr_ht_init:
4799 return ERR_PTR(err);
4803 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
4804 struct mlxsw_sp_nexthop_group *nh_grp)
4806 if (!nh_grp->can_destroy)
4808 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4809 mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp);
4810 fib_info_put(nh_grp->ipv4.fi);
4811 WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
4812 rhashtable_destroy(&nh_grp->vr_ht);
4816 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
4817 struct mlxsw_sp_fib_entry *fib_entry,
4818 struct fib_info *fi)
4820 struct mlxsw_sp_nexthop_group *nh_grp;
4823 nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp,
4825 if (WARN_ON_ONCE(!nh_grp))
4830 nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
4832 nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
4834 return PTR_ERR(nh_grp);
4837 list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
4838 fib_entry->nh_group = nh_grp;
4842 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
4843 struct mlxsw_sp_fib_entry *fib_entry)
4845 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4847 list_del(&fib_entry->nexthop_group_node);
4848 if (!list_empty(&nh_grp->fib_list))
4851 if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) {
4852 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
4856 mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
4860 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
4862 struct mlxsw_sp_fib4_entry *fib4_entry;
4864 fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
4866 return !fib4_entry->tos;
4870 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
4872 struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
4874 switch (fib_entry->fib_node->fib->proto) {
4875 case MLXSW_SP_L3_PROTO_IPV4:
4876 if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
4879 case MLXSW_SP_L3_PROTO_IPV6:
4883 switch (fib_entry->type) {
4884 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4885 return !!nh_group->nhgi->adj_index_valid;
4886 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4887 return !!nh_group->nhgi->nh_rif;
4888 case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
4889 case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4890 case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
4897 static struct mlxsw_sp_nexthop *
4898 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
4899 const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4903 for (i = 0; i < nh_grp->nhgi->count; i++) {
4904 struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
4905 struct fib6_info *rt = mlxsw_sp_rt6->rt;
4907 if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev &&
4908 ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
4909 &rt->fib6_nh->fib_nh_gw6))
4918 mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
4919 struct mlxsw_sp_fib_entry *fib_entry)
4921 u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
4922 int dst_len = fib_entry->fib_node->key.prefix_len;
4923 struct mlxsw_sp_fib4_entry *fib4_entry;
4924 struct fib_rt_info fri;
4925 bool should_offload;
4927 should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
4928 fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
4930 fri.fi = fib4_entry->fi;
4931 fri.tb_id = fib4_entry->tb_id;
4932 fri.dst = cpu_to_be32(*p_dst);
4933 fri.dst_len = dst_len;
4934 fri.tos = fib4_entry->tos;
4935 fri.type = fib4_entry->type;
4936 fri.offload = should_offload;
4937 fri.trap = !should_offload;
4938 fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
4942 mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
4943 struct mlxsw_sp_fib_entry *fib_entry)
4945 u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
4946 int dst_len = fib_entry->fib_node->key.prefix_len;
4947 struct mlxsw_sp_fib4_entry *fib4_entry;
4948 struct fib_rt_info fri;
4950 fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
4952 fri.fi = fib4_entry->fi;
4953 fri.tb_id = fib4_entry->tb_id;
4954 fri.dst = cpu_to_be32(*p_dst);
4955 fri.dst_len = dst_len;
4956 fri.tos = fib4_entry->tos;
4957 fri.type = fib4_entry->type;
4958 fri.offload = false;
4960 fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
4964 mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
4965 struct mlxsw_sp_fib_entry *fib_entry)
4967 struct mlxsw_sp_fib6_entry *fib6_entry;
4968 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4969 bool should_offload;
4971 should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
4973 /* In IPv6 a multipath route is represented using multiple routes, so
4974 * we need to set the flags on all of them.
4976 fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4978 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
4979 fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, should_offload,
4984 mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
4985 struct mlxsw_sp_fib_entry *fib_entry)
4987 struct mlxsw_sp_fib6_entry *fib6_entry;
4988 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4990 fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4992 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
4993 fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, false, false);
4997 mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
4998 struct mlxsw_sp_fib_entry *fib_entry)
5000 switch (fib_entry->fib_node->fib->proto) {
5001 case MLXSW_SP_L3_PROTO_IPV4:
5002 mlxsw_sp_fib4_entry_hw_flags_set(mlxsw_sp, fib_entry);
5004 case MLXSW_SP_L3_PROTO_IPV6:
5005 mlxsw_sp_fib6_entry_hw_flags_set(mlxsw_sp, fib_entry);
5011 mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5012 struct mlxsw_sp_fib_entry *fib_entry)
5014 switch (fib_entry->fib_node->fib->proto) {
5015 case MLXSW_SP_L3_PROTO_IPV4:
5016 mlxsw_sp_fib4_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5018 case MLXSW_SP_L3_PROTO_IPV6:
5019 mlxsw_sp_fib6_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5025 mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp,
5026 struct mlxsw_sp_fib_entry *fib_entry,
5027 enum mlxsw_sp_fib_entry_op op)
5030 case MLXSW_SP_FIB_ENTRY_OP_WRITE:
5031 case MLXSW_SP_FIB_ENTRY_OP_UPDATE:
5032 mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry);
5034 case MLXSW_SP_FIB_ENTRY_OP_DELETE:
5035 mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5042 struct mlxsw_sp_fib_entry_op_ctx_basic {
5043 char ralue_pl[MLXSW_REG_RALUE_LEN];
5047 mlxsw_sp_router_ll_basic_fib_entry_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5048 enum mlxsw_sp_l3proto proto,
5049 enum mlxsw_sp_fib_entry_op op,
5050 u16 virtual_router, u8 prefix_len,
5051 unsigned char *addr,
5052 struct mlxsw_sp_fib_entry_priv *priv)
5054 struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
5055 enum mlxsw_reg_ralxx_protocol ralxx_proto;
5056 char *ralue_pl = op_ctx_basic->ralue_pl;
5057 enum mlxsw_reg_ralue_op ralue_op;
5059 ralxx_proto = (enum mlxsw_reg_ralxx_protocol) proto;
5062 case MLXSW_SP_FIB_ENTRY_OP_WRITE:
5063 case MLXSW_SP_FIB_ENTRY_OP_UPDATE:
5064 ralue_op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
5066 case MLXSW_SP_FIB_ENTRY_OP_DELETE:
5067 ralue_op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
5075 case MLXSW_SP_L3_PROTO_IPV4:
5076 mlxsw_reg_ralue_pack4(ralue_pl, ralxx_proto, ralue_op,
5077 virtual_router, prefix_len, (u32 *) addr);
5079 case MLXSW_SP_L3_PROTO_IPV6:
5080 mlxsw_reg_ralue_pack6(ralue_pl, ralxx_proto, ralue_op,
5081 virtual_router, prefix_len, addr);
5087 mlxsw_sp_router_ll_basic_fib_entry_act_remote_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5088 enum mlxsw_reg_ralue_trap_action trap_action,
5089 u16 trap_id, u32 adjacency_index, u16 ecmp_size)
5091 struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
5093 mlxsw_reg_ralue_act_remote_pack(op_ctx_basic->ralue_pl, trap_action,
5094 trap_id, adjacency_index, ecmp_size);
5098 mlxsw_sp_router_ll_basic_fib_entry_act_local_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5099 enum mlxsw_reg_ralue_trap_action trap_action,
5100 u16 trap_id, u16 local_erif)
5102 struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
5104 mlxsw_reg_ralue_act_local_pack(op_ctx_basic->ralue_pl, trap_action,
5105 trap_id, local_erif);
5109 mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx)
5111 struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
5113 mlxsw_reg_ralue_act_ip2me_pack(op_ctx_basic->ralue_pl);
5117 mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_tun_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5120 struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
5122 mlxsw_reg_ralue_act_ip2me_tun_pack(op_ctx_basic->ralue_pl, tunnel_ptr);
5126 mlxsw_sp_router_ll_basic_fib_entry_commit(struct mlxsw_sp *mlxsw_sp,
5127 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5128 bool *postponed_for_bulk)
5130 struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
5132 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5133 op_ctx_basic->ralue_pl);
5137 mlxsw_sp_router_ll_basic_fib_entry_is_committed(struct mlxsw_sp_fib_entry_priv *priv)
5142 static void mlxsw_sp_fib_entry_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5143 struct mlxsw_sp_fib_entry *fib_entry,
5144 enum mlxsw_sp_fib_entry_op op)
5146 struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
5148 mlxsw_sp_fib_entry_op_ctx_priv_hold(op_ctx, fib_entry->priv);
5149 fib->ll_ops->fib_entry_pack(op_ctx, fib->proto, op, fib->vr->id,
5150 fib_entry->fib_node->key.prefix_len,
5151 fib_entry->fib_node->key.addr,
5155 static int mlxsw_sp_fib_entry_commit(struct mlxsw_sp *mlxsw_sp,
5156 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5157 const struct mlxsw_sp_router_ll_ops *ll_ops)
5159 bool postponed_for_bulk = false;
5162 err = ll_ops->fib_entry_commit(mlxsw_sp, op_ctx, &postponed_for_bulk);
5163 if (!postponed_for_bulk)
5164 mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
5168 static int mlxsw_sp_adj_discard_write(struct mlxsw_sp *mlxsw_sp)
5170 enum mlxsw_reg_ratr_trap_action trap_action;
5171 char ratr_pl[MLXSW_REG_RATR_LEN];
5174 if (mlxsw_sp->router->adj_discard_index_valid)
5177 err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
5178 &mlxsw_sp->router->adj_discard_index);
5182 trap_action = MLXSW_REG_RATR_TRAP_ACTION_TRAP;
5183 mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true,
5184 MLXSW_REG_RATR_TYPE_ETHERNET,
5185 mlxsw_sp->router->adj_discard_index,
5186 mlxsw_sp->router->lb_rif_index);
5187 mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action);
5188 mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
5189 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
5191 goto err_ratr_write;
5193 mlxsw_sp->router->adj_discard_index_valid = true;
5198 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
5199 mlxsw_sp->router->adj_discard_index);
5203 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
5204 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5205 struct mlxsw_sp_fib_entry *fib_entry,
5206 enum mlxsw_sp_fib_entry_op op)
5208 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5209 struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
5210 struct mlxsw_sp_nexthop_group_info *nhgi = nh_group->nhgi;
5211 enum mlxsw_reg_ralue_trap_action trap_action;
5213 u32 adjacency_index = 0;
5217 /* In case the nexthop group adjacency index is valid, use it
5218 * with provided ECMP size. Otherwise, setup trap and pass
5219 * traffic to kernel.
5221 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
5222 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5223 adjacency_index = nhgi->adj_index;
5224 ecmp_size = nhgi->ecmp_size;
5225 } else if (!nhgi->adj_index_valid && nhgi->count && nhgi->nh_rif) {
5226 err = mlxsw_sp_adj_discard_write(mlxsw_sp);
5229 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5230 adjacency_index = mlxsw_sp->router->adj_discard_index;
5233 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5234 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
5237 mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
5238 ll_ops->fib_entry_act_remote_pack(op_ctx, trap_action, trap_id,
5239 adjacency_index, ecmp_size);
5240 return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
5243 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
5244 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5245 struct mlxsw_sp_fib_entry *fib_entry,
5246 enum mlxsw_sp_fib_entry_op op)
5248 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5249 struct mlxsw_sp_rif *rif = fib_entry->nh_group->nhgi->nh_rif;
5250 enum mlxsw_reg_ralue_trap_action trap_action;
5254 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
5255 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5256 rif_index = rif->rif_index;
5258 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5259 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
5262 mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
5263 ll_ops->fib_entry_act_local_pack(op_ctx, trap_action, trap_id, rif_index);
5264 return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
5267 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
5268 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5269 struct mlxsw_sp_fib_entry *fib_entry,
5270 enum mlxsw_sp_fib_entry_op op)
5272 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5274 mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
5275 ll_ops->fib_entry_act_ip2me_pack(op_ctx);
5276 return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
5279 static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
5280 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5281 struct mlxsw_sp_fib_entry *fib_entry,
5282 enum mlxsw_sp_fib_entry_op op)
5284 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5285 enum mlxsw_reg_ralue_trap_action trap_action;
5287 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR;
5288 mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
5289 ll_ops->fib_entry_act_local_pack(op_ctx, trap_action, 0, 0);
5290 return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
5294 mlxsw_sp_fib_entry_op_unreachable(struct mlxsw_sp *mlxsw_sp,
5295 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5296 struct mlxsw_sp_fib_entry *fib_entry,
5297 enum mlxsw_sp_fib_entry_op op)
5299 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5300 enum mlxsw_reg_ralue_trap_action trap_action;
5303 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5304 trap_id = MLXSW_TRAP_ID_RTR_INGRESS1;
5306 mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
5307 ll_ops->fib_entry_act_local_pack(op_ctx, trap_action, trap_id, 0);
5308 return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
5312 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
5313 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5314 struct mlxsw_sp_fib_entry *fib_entry,
5315 enum mlxsw_sp_fib_entry_op op)
5317 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5318 struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
5319 const struct mlxsw_sp_ipip_ops *ipip_ops;
5322 if (WARN_ON(!ipip_entry))
5325 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
5326 err = ipip_ops->decap_config(mlxsw_sp, ipip_entry,
5327 fib_entry->decap.tunnel_index);
5331 mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
5332 ll_ops->fib_entry_act_ip2me_tun_pack(op_ctx,
5333 fib_entry->decap.tunnel_index);
5334 return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
5337 static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
5338 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5339 struct mlxsw_sp_fib_entry *fib_entry,
5340 enum mlxsw_sp_fib_entry_op op)
5342 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5344 mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
5345 ll_ops->fib_entry_act_ip2me_tun_pack(op_ctx,
5346 fib_entry->decap.tunnel_index);
5347 return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
5350 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
5351 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5352 struct mlxsw_sp_fib_entry *fib_entry,
5353 enum mlxsw_sp_fib_entry_op op)
5355 switch (fib_entry->type) {
5356 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
5357 return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, op_ctx, fib_entry, op);
5358 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
5359 return mlxsw_sp_fib_entry_op_local(mlxsw_sp, op_ctx, fib_entry, op);
5360 case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
5361 return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, op_ctx, fib_entry, op);
5362 case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
5363 return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, op_ctx, fib_entry, op);
5364 case MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE:
5365 return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, op_ctx, fib_entry, op);
5366 case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
5367 return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp, op_ctx, fib_entry, op);
5368 case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
5369 return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, op_ctx, fib_entry, op);
5374 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
5375 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5376 struct mlxsw_sp_fib_entry *fib_entry,
5377 enum mlxsw_sp_fib_entry_op op)
5379 int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, op_ctx, fib_entry, op);
5384 mlxsw_sp_fib_entry_hw_flags_refresh(mlxsw_sp, fib_entry, op);
5389 static int __mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
5390 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5391 struct mlxsw_sp_fib_entry *fib_entry,
5394 return mlxsw_sp_fib_entry_op(mlxsw_sp, op_ctx, fib_entry,
5395 is_new ? MLXSW_SP_FIB_ENTRY_OP_WRITE :
5396 MLXSW_SP_FIB_ENTRY_OP_UPDATE);
5399 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
5400 struct mlxsw_sp_fib_entry *fib_entry)
5402 struct mlxsw_sp_fib_entry_op_ctx *op_ctx = mlxsw_sp->router->ll_op_ctx;
5404 mlxsw_sp_fib_entry_op_ctx_clear(op_ctx);
5405 return __mlxsw_sp_fib_entry_update(mlxsw_sp, op_ctx, fib_entry, false);
5408 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
5409 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5410 struct mlxsw_sp_fib_entry *fib_entry)
5412 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5414 if (!ll_ops->fib_entry_is_committed(fib_entry->priv))
5416 return mlxsw_sp_fib_entry_op(mlxsw_sp, op_ctx, fib_entry,
5417 MLXSW_SP_FIB_ENTRY_OP_DELETE);
5421 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5422 const struct fib_entry_notifier_info *fen_info,
5423 struct mlxsw_sp_fib_entry *fib_entry)
5425 struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi;
5426 union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
5427 struct mlxsw_sp_router *router = mlxsw_sp->router;
5428 u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
5429 int ifindex = nhgi->nexthops[0].ifindex;
5430 struct mlxsw_sp_ipip_entry *ipip_entry;
5432 switch (fen_info->type) {
5434 ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex,
5435 MLXSW_SP_L3_PROTO_IPV4, dip);
5436 if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
5437 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
5438 return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
5442 if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id,
5443 MLXSW_SP_L3_PROTO_IPV4,
5447 tunnel_index = router->nve_decap_config.tunnel_index;
5448 fib_entry->decap.tunnel_index = tunnel_index;
5449 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
5454 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5457 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
5459 case RTN_UNREACHABLE:
5461 /* Packets hitting these routes need to be trapped, but
5462 * can do so with a lower priority than packets directed
5463 * at the host, so use action type local instead of trap.
5465 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
5469 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5471 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5479 mlxsw_sp_fib4_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
5480 struct mlxsw_sp_fib_entry *fib_entry)
5482 switch (fib_entry->type) {
5483 case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
5484 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
5491 static struct mlxsw_sp_fib4_entry *
5492 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
5493 struct mlxsw_sp_fib_node *fib_node,
5494 const struct fib_entry_notifier_info *fen_info)
5496 struct mlxsw_sp_fib4_entry *fib4_entry;
5497 struct mlxsw_sp_fib_entry *fib_entry;
5500 fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
5502 return ERR_PTR(-ENOMEM);
5503 fib_entry = &fib4_entry->common;
5505 fib_entry->priv = mlxsw_sp_fib_entry_priv_create(fib_node->fib->ll_ops);
5506 if (IS_ERR(fib_entry->priv)) {
5507 err = PTR_ERR(fib_entry->priv);
5508 goto err_fib_entry_priv_create;
5511 err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
5513 goto err_nexthop4_group_get;
5515 err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
5518 goto err_nexthop_group_vr_link;
5520 err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
5522 goto err_fib4_entry_type_set;
5524 fib4_entry->fi = fen_info->fi;
5525 fib_info_hold(fib4_entry->fi);
5526 fib4_entry->tb_id = fen_info->tb_id;
5527 fib4_entry->type = fen_info->type;
5528 fib4_entry->tos = fen_info->tos;
5530 fib_entry->fib_node = fib_node;
5534 err_fib4_entry_type_set:
5535 mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib);
5536 err_nexthop_group_vr_link:
5537 mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
5538 err_nexthop4_group_get:
5539 mlxsw_sp_fib_entry_priv_put(fib_entry->priv);
5540 err_fib_entry_priv_create:
5542 return ERR_PTR(err);
5545 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5546 struct mlxsw_sp_fib4_entry *fib4_entry)
5548 struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
5550 fib_info_put(fib4_entry->fi);
5551 mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, &fib4_entry->common);
5552 mlxsw_sp_nexthop_group_vr_unlink(fib4_entry->common.nh_group,
5554 mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
5555 mlxsw_sp_fib_entry_priv_put(fib4_entry->common.priv);
5559 static struct mlxsw_sp_fib4_entry *
5560 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5561 const struct fib_entry_notifier_info *fen_info)
5563 struct mlxsw_sp_fib4_entry *fib4_entry;
5564 struct mlxsw_sp_fib_node *fib_node;
5565 struct mlxsw_sp_fib *fib;
5566 struct mlxsw_sp_vr *vr;
5568 vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
5571 fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
5573 fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
5574 sizeof(fen_info->dst),
5579 fib4_entry = container_of(fib_node->fib_entry,
5580 struct mlxsw_sp_fib4_entry, common);
5581 if (fib4_entry->tb_id == fen_info->tb_id &&
5582 fib4_entry->tos == fen_info->tos &&
5583 fib4_entry->type == fen_info->type &&
5584 fib4_entry->fi == fen_info->fi)
5590 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
5591 .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
5592 .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
5593 .key_len = sizeof(struct mlxsw_sp_fib_key),
5594 .automatic_shrinking = true,
5597 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
5598 struct mlxsw_sp_fib_node *fib_node)
5600 return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
5601 mlxsw_sp_fib_ht_params);
5604 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
5605 struct mlxsw_sp_fib_node *fib_node)
5607 rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
5608 mlxsw_sp_fib_ht_params);
5611 static struct mlxsw_sp_fib_node *
5612 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
5613 size_t addr_len, unsigned char prefix_len)
5615 struct mlxsw_sp_fib_key key;
5617 memset(&key, 0, sizeof(key));
5618 memcpy(key.addr, addr, addr_len);
5619 key.prefix_len = prefix_len;
5620 return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
5623 static struct mlxsw_sp_fib_node *
5624 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
5625 size_t addr_len, unsigned char prefix_len)
5627 struct mlxsw_sp_fib_node *fib_node;
5629 fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
5633 list_add(&fib_node->list, &fib->node_list);
5634 memcpy(fib_node->key.addr, addr, addr_len);
5635 fib_node->key.prefix_len = prefix_len;
5640 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
5642 list_del(&fib_node->list);
5646 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
5647 struct mlxsw_sp_fib_node *fib_node)
5649 struct mlxsw_sp_prefix_usage req_prefix_usage;
5650 struct mlxsw_sp_fib *fib = fib_node->fib;
5651 struct mlxsw_sp_lpm_tree *lpm_tree;
5654 lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
5655 if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
5658 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
5659 mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
5660 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
5662 if (IS_ERR(lpm_tree))
5663 return PTR_ERR(lpm_tree);
5665 err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
5667 goto err_lpm_tree_replace;
5670 lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
5673 err_lpm_tree_replace:
5674 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
5678 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
5679 struct mlxsw_sp_fib_node *fib_node)
5681 struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
5682 struct mlxsw_sp_prefix_usage req_prefix_usage;
5683 struct mlxsw_sp_fib *fib = fib_node->fib;
5686 if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
5688 /* Try to construct a new LPM tree from the current prefix usage
5689 * minus the unused one. If we fail, continue using the old one.
5691 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
5692 mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
5693 fib_node->key.prefix_len);
5694 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
5696 if (IS_ERR(lpm_tree))
5699 err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
5701 goto err_lpm_tree_replace;
5705 err_lpm_tree_replace:
5706 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
5709 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
5710 struct mlxsw_sp_fib_node *fib_node,
5711 struct mlxsw_sp_fib *fib)
5715 err = mlxsw_sp_fib_node_insert(fib, fib_node);
5718 fib_node->fib = fib;
5720 err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
5722 goto err_fib_lpm_tree_link;
5726 err_fib_lpm_tree_link:
5727 fib_node->fib = NULL;
5728 mlxsw_sp_fib_node_remove(fib, fib_node);
5732 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
5733 struct mlxsw_sp_fib_node *fib_node)
5735 struct mlxsw_sp_fib *fib = fib_node->fib;
5737 mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
5738 fib_node->fib = NULL;
5739 mlxsw_sp_fib_node_remove(fib, fib_node);
5742 static struct mlxsw_sp_fib_node *
5743 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
5744 size_t addr_len, unsigned char prefix_len,
5745 enum mlxsw_sp_l3proto proto)
5747 struct mlxsw_sp_fib_node *fib_node;
5748 struct mlxsw_sp_fib *fib;
5749 struct mlxsw_sp_vr *vr;
5752 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
5754 return ERR_CAST(vr);
5755 fib = mlxsw_sp_vr_fib(vr, proto);
5757 fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
5761 fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
5764 goto err_fib_node_create;
5767 err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
5769 goto err_fib_node_init;
5774 mlxsw_sp_fib_node_destroy(fib_node);
5775 err_fib_node_create:
5776 mlxsw_sp_vr_put(mlxsw_sp, vr);
5777 return ERR_PTR(err);
5780 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
5781 struct mlxsw_sp_fib_node *fib_node)
5783 struct mlxsw_sp_vr *vr = fib_node->fib->vr;
5785 if (fib_node->fib_entry)
5787 mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
5788 mlxsw_sp_fib_node_destroy(fib_node);
5789 mlxsw_sp_vr_put(mlxsw_sp, vr);
5792 static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5793 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5794 struct mlxsw_sp_fib_entry *fib_entry)
5796 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
5797 bool is_new = !fib_node->fib_entry;
5800 fib_node->fib_entry = fib_entry;
5802 err = __mlxsw_sp_fib_entry_update(mlxsw_sp, op_ctx, fib_entry, is_new);
5804 goto err_fib_entry_update;
5808 err_fib_entry_update:
5809 fib_node->fib_entry = NULL;
5813 static int __mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5814 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5815 struct mlxsw_sp_fib_entry *fib_entry)
5817 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
5820 err = mlxsw_sp_fib_entry_del(mlxsw_sp, op_ctx, fib_entry);
5821 fib_node->fib_entry = NULL;
5825 static void mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5826 struct mlxsw_sp_fib_entry *fib_entry)
5828 struct mlxsw_sp_fib_entry_op_ctx *op_ctx = mlxsw_sp->router->ll_op_ctx;
5830 mlxsw_sp_fib_entry_op_ctx_clear(op_ctx);
5831 __mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, op_ctx, fib_entry);
5834 static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry)
5836 struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
5837 struct mlxsw_sp_fib4_entry *fib4_replaced;
5839 if (!fib_node->fib_entry)
5842 fib4_replaced = container_of(fib_node->fib_entry,
5843 struct mlxsw_sp_fib4_entry, common);
5844 if (fib4_entry->tb_id == RT_TABLE_MAIN &&
5845 fib4_replaced->tb_id == RT_TABLE_LOCAL)
5852 mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
5853 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5854 const struct fib_entry_notifier_info *fen_info)
5856 struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced;
5857 struct mlxsw_sp_fib_entry *replaced;
5858 struct mlxsw_sp_fib_node *fib_node;
5861 if (mlxsw_sp->router->aborted)
5864 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
5865 &fen_info->dst, sizeof(fen_info->dst),
5867 MLXSW_SP_L3_PROTO_IPV4);
5868 if (IS_ERR(fib_node)) {
5869 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
5870 return PTR_ERR(fib_node);
5873 fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
5874 if (IS_ERR(fib4_entry)) {
5875 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
5876 err = PTR_ERR(fib4_entry);
5877 goto err_fib4_entry_create;
5880 if (!mlxsw_sp_fib4_allow_replace(fib4_entry)) {
5881 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5882 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5886 replaced = fib_node->fib_entry;
5887 err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, op_ctx, &fib4_entry->common);
5889 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
5890 goto err_fib_node_entry_link;
5893 /* Nothing to replace */
5897 mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
5898 fib4_replaced = container_of(replaced, struct mlxsw_sp_fib4_entry,
5900 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_replaced);
5904 err_fib_node_entry_link:
5905 fib_node->fib_entry = replaced;
5906 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5907 err_fib4_entry_create:
5908 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5912 static int mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
5913 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5914 struct fib_entry_notifier_info *fen_info)
5916 struct mlxsw_sp_fib4_entry *fib4_entry;
5917 struct mlxsw_sp_fib_node *fib_node;
5920 if (mlxsw_sp->router->aborted)
5923 fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
5926 fib_node = fib4_entry->common.fib_node;
5928 err = __mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, op_ctx, &fib4_entry->common);
5929 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5930 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5934 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
5936 /* Multicast routes aren't supported, so ignore them. Neighbour
5937 * Discovery packets are specifically trapped.
5939 if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
5942 /* Cloned routes are irrelevant in the forwarding path. */
5943 if (rt->fib6_flags & RTF_CACHE)
5949 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
5951 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5953 mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
5955 return ERR_PTR(-ENOMEM);
5957 /* In case of route replace, replaced route is deleted with
5958 * no notification. Take reference to prevent accessing freed
5961 mlxsw_sp_rt6->rt = rt;
5964 return mlxsw_sp_rt6;
5967 #if IS_ENABLED(CONFIG_IPV6)
5968 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
5970 fib6_info_release(rt);
5973 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
5978 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
5980 struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
5982 if (!mlxsw_sp_rt6->rt->nh)
5983 fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
5984 mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
5985 kfree(mlxsw_sp_rt6);
5988 static struct fib6_info *
5989 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
5991 return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
5995 static struct mlxsw_sp_rt6 *
5996 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
5997 const struct fib6_info *rt)
5999 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6001 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
6002 if (mlxsw_sp_rt6->rt == rt)
6003 return mlxsw_sp_rt6;
6009 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
6010 const struct fib6_info *rt,
6011 enum mlxsw_sp_ipip_type *ret)
6013 return rt->fib6_nh->fib_nh_dev &&
6014 mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret);
6017 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
6018 struct mlxsw_sp_nexthop_group *nh_grp,
6019 struct mlxsw_sp_nexthop *nh,
6020 const struct fib6_info *rt)
6022 struct net_device *dev = rt->fib6_nh->fib_nh_dev;
6024 nh->nhgi = nh_grp->nhgi;
6025 nh->nh_weight = rt->fib6_nh->fib_nh_weight;
6026 memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr));
6027 #if IS_ENABLED(CONFIG_IPV6)
6028 nh->neigh_tbl = &nd_tbl;
6030 mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
6032 list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
6036 nh->ifindex = dev->ifindex;
6038 return mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
6041 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
6042 struct mlxsw_sp_nexthop *nh)
6044 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
6045 list_del(&nh->router_list_node);
6046 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
6049 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
6050 const struct fib6_info *rt)
6052 return rt->fib6_nh->fib_nh_gw_family ||
6053 mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
6057 mlxsw_sp_nexthop6_group_info_init(struct mlxsw_sp *mlxsw_sp,
6058 struct mlxsw_sp_nexthop_group *nh_grp,
6059 struct mlxsw_sp_fib6_entry *fib6_entry)
6061 struct mlxsw_sp_nexthop_group_info *nhgi;
6062 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6063 struct mlxsw_sp_nexthop *nh;
6066 nhgi = kzalloc(struct_size(nhgi, nexthops, fib6_entry->nrt6),
6070 nh_grp->nhgi = nhgi;
6071 nhgi->nh_grp = nh_grp;
6072 mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
6073 struct mlxsw_sp_rt6, list);
6074 nhgi->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
6075 nhgi->count = fib6_entry->nrt6;
6076 for (i = 0; i < nhgi->count; i++) {
6077 struct fib6_info *rt = mlxsw_sp_rt6->rt;
6079 nh = &nhgi->nexthops[i];
6080 err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
6082 goto err_nexthop6_init;
6083 mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
6085 nh_grp->nhgi = nhgi;
6086 err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
6088 goto err_group_refresh;
6095 for (i--; i >= 0; i--) {
6096 nh = &nhgi->nexthops[i];
6097 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
6104 mlxsw_sp_nexthop6_group_info_fini(struct mlxsw_sp *mlxsw_sp,
6105 struct mlxsw_sp_nexthop_group *nh_grp)
6107 struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
6110 for (i = nhgi->count - 1; i >= 0; i--) {
6111 struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
6113 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
6115 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
6116 WARN_ON_ONCE(nhgi->adj_index_valid);
6120 static struct mlxsw_sp_nexthop_group *
6121 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
6122 struct mlxsw_sp_fib6_entry *fib6_entry)
6124 struct mlxsw_sp_nexthop_group *nh_grp;
6127 nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
6129 return ERR_PTR(-ENOMEM);
6130 INIT_LIST_HEAD(&nh_grp->vr_list);
6131 err = rhashtable_init(&nh_grp->vr_ht,
6132 &mlxsw_sp_nexthop_group_vr_ht_params);
6134 goto err_nexthop_group_vr_ht_init;
6135 INIT_LIST_HEAD(&nh_grp->fib_list);
6136 nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6;
6138 err = mlxsw_sp_nexthop6_group_info_init(mlxsw_sp, nh_grp, fib6_entry);
6140 goto err_nexthop_group_info_init;
6142 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
6144 goto err_nexthop_group_insert;
6146 nh_grp->can_destroy = true;
6150 err_nexthop_group_insert:
6151 mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp);
6152 err_nexthop_group_info_init:
6153 rhashtable_destroy(&nh_grp->vr_ht);
6154 err_nexthop_group_vr_ht_init:
6156 return ERR_PTR(err);
6160 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
6161 struct mlxsw_sp_nexthop_group *nh_grp)
6163 if (!nh_grp->can_destroy)
6165 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
6166 mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp);
6167 WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
6168 rhashtable_destroy(&nh_grp->vr_ht);
6172 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
6173 struct mlxsw_sp_fib6_entry *fib6_entry)
6175 struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
6176 struct mlxsw_sp_nexthop_group *nh_grp;
6179 nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp,
6181 if (WARN_ON_ONCE(!nh_grp))
6186 nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
6188 nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
6190 return PTR_ERR(nh_grp);
6193 /* The route and the nexthop are described by the same struct, so we
6194 * need to the update the nexthop offload indication for the new route.
6196 __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
6199 list_add_tail(&fib6_entry->common.nexthop_group_node,
6201 fib6_entry->common.nh_group = nh_grp;
6206 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
6207 struct mlxsw_sp_fib_entry *fib_entry)
6209 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
6211 list_del(&fib_entry->nexthop_group_node);
6212 if (!list_empty(&nh_grp->fib_list))
6215 if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) {
6216 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
6220 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
6223 static int mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
6224 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6225 struct mlxsw_sp_fib6_entry *fib6_entry)
6227 struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
6228 struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
6231 mlxsw_sp_nexthop_group_vr_unlink(old_nh_grp, fib_node->fib);
6232 fib6_entry->common.nh_group = NULL;
6233 list_del(&fib6_entry->common.nexthop_group_node);
6235 err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
6237 goto err_nexthop6_group_get;
6239 err = mlxsw_sp_nexthop_group_vr_link(fib6_entry->common.nh_group,
6242 goto err_nexthop_group_vr_link;
6244 /* In case this entry is offloaded, then the adjacency index
6245 * currently associated with it in the device's table is that
6246 * of the old group. Start using the new one instead.
6248 err = __mlxsw_sp_fib_entry_update(mlxsw_sp, op_ctx,
6249 &fib6_entry->common, false);
6251 goto err_fib_entry_update;
6253 if (list_empty(&old_nh_grp->fib_list))
6254 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
6258 err_fib_entry_update:
6259 mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group,
6261 err_nexthop_group_vr_link:
6262 mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
6263 err_nexthop6_group_get:
6264 list_add_tail(&fib6_entry->common.nexthop_group_node,
6265 &old_nh_grp->fib_list);
6266 fib6_entry->common.nh_group = old_nh_grp;
6267 mlxsw_sp_nexthop_group_vr_link(old_nh_grp, fib_node->fib);
6272 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
6273 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6274 struct mlxsw_sp_fib6_entry *fib6_entry,
6275 struct fib6_info **rt_arr, unsigned int nrt6)
6277 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6280 for (i = 0; i < nrt6; i++) {
6281 mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
6282 if (IS_ERR(mlxsw_sp_rt6)) {
6283 err = PTR_ERR(mlxsw_sp_rt6);
6284 goto err_rt6_create;
6287 list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
6291 err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, op_ctx, fib6_entry);
6293 goto err_nexthop6_group_update;
6297 err_nexthop6_group_update:
6300 for (i--; i >= 0; i--) {
6302 mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
6303 struct mlxsw_sp_rt6, list);
6304 list_del(&mlxsw_sp_rt6->list);
6305 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6311 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
6312 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6313 struct mlxsw_sp_fib6_entry *fib6_entry,
6314 struct fib6_info **rt_arr, unsigned int nrt6)
6316 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6319 for (i = 0; i < nrt6; i++) {
6320 mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry,
6322 if (WARN_ON_ONCE(!mlxsw_sp_rt6))
6326 list_del(&mlxsw_sp_rt6->list);
6327 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6330 mlxsw_sp_nexthop6_group_update(mlxsw_sp, op_ctx, fib6_entry);
6333 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
6334 struct mlxsw_sp_fib_entry *fib_entry,
6335 const struct fib6_info *rt)
6337 if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
6338 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
6339 else if (rt->fib6_type == RTN_BLACKHOLE)
6340 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
6341 else if (rt->fib6_flags & RTF_REJECT)
6342 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
6343 else if (fib_entry->nh_group->nhgi->gateway)
6344 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
6346 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
6350 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
6352 struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
6354 list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
6357 list_del(&mlxsw_sp_rt6->list);
6358 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6362 static struct mlxsw_sp_fib6_entry *
6363 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
6364 struct mlxsw_sp_fib_node *fib_node,
6365 struct fib6_info **rt_arr, unsigned int nrt6)
6367 struct mlxsw_sp_fib6_entry *fib6_entry;
6368 struct mlxsw_sp_fib_entry *fib_entry;
6369 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6372 fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
6374 return ERR_PTR(-ENOMEM);
6375 fib_entry = &fib6_entry->common;
6377 fib_entry->priv = mlxsw_sp_fib_entry_priv_create(fib_node->fib->ll_ops);
6378 if (IS_ERR(fib_entry->priv)) {
6379 err = PTR_ERR(fib_entry->priv);
6380 goto err_fib_entry_priv_create;
6383 INIT_LIST_HEAD(&fib6_entry->rt6_list);
6385 for (i = 0; i < nrt6; i++) {
6386 mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
6387 if (IS_ERR(mlxsw_sp_rt6)) {
6388 err = PTR_ERR(mlxsw_sp_rt6);
6389 goto err_rt6_create;
6391 list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
6395 err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
6397 goto err_nexthop6_group_get;
6399 err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
6402 goto err_nexthop_group_vr_link;
6404 mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]);
6406 fib_entry->fib_node = fib_node;
6410 err_nexthop_group_vr_link:
6411 mlxsw_sp_nexthop6_group_put(mlxsw_sp, fib_entry);
6412 err_nexthop6_group_get:
6415 for (i--; i >= 0; i--) {
6417 mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
6418 struct mlxsw_sp_rt6, list);
6419 list_del(&mlxsw_sp_rt6->list);
6420 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6422 mlxsw_sp_fib_entry_priv_put(fib_entry->priv);
6423 err_fib_entry_priv_create:
6425 return ERR_PTR(err);
6428 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
6429 struct mlxsw_sp_fib6_entry *fib6_entry)
6431 struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
6433 mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group,
6435 mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
6436 mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
6437 WARN_ON(fib6_entry->nrt6);
6438 mlxsw_sp_fib_entry_priv_put(fib6_entry->common.priv);
6442 static struct mlxsw_sp_fib6_entry *
6443 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
6444 const struct fib6_info *rt)
6446 struct mlxsw_sp_fib6_entry *fib6_entry;
6447 struct mlxsw_sp_fib_node *fib_node;
6448 struct mlxsw_sp_fib *fib;
6449 struct fib6_info *cmp_rt;
6450 struct mlxsw_sp_vr *vr;
6452 vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
6455 fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
6457 fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
6458 sizeof(rt->fib6_dst.addr),
6463 fib6_entry = container_of(fib_node->fib_entry,
6464 struct mlxsw_sp_fib6_entry, common);
6465 cmp_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
6466 if (rt->fib6_table->tb6_id == cmp_rt->fib6_table->tb6_id &&
6467 rt->fib6_metric == cmp_rt->fib6_metric &&
6468 mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
6474 static bool mlxsw_sp_fib6_allow_replace(struct mlxsw_sp_fib6_entry *fib6_entry)
6476 struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
6477 struct mlxsw_sp_fib6_entry *fib6_replaced;
6478 struct fib6_info *rt, *rt_replaced;
6480 if (!fib_node->fib_entry)
6483 fib6_replaced = container_of(fib_node->fib_entry,
6484 struct mlxsw_sp_fib6_entry,
6486 rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
6487 rt_replaced = mlxsw_sp_fib6_entry_rt(fib6_replaced);
6488 if (rt->fib6_table->tb6_id == RT_TABLE_MAIN &&
6489 rt_replaced->fib6_table->tb6_id == RT_TABLE_LOCAL)
6495 static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
6496 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6497 struct fib6_info **rt_arr, unsigned int nrt6)
6499 struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced;
6500 struct mlxsw_sp_fib_entry *replaced;
6501 struct mlxsw_sp_fib_node *fib_node;
6502 struct fib6_info *rt = rt_arr[0];
6505 if (mlxsw_sp->router->aborted)
6508 if (rt->fib6_src.plen)
6511 if (mlxsw_sp_fib6_rt_should_ignore(rt))
6514 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
6516 sizeof(rt->fib6_dst.addr),
6518 MLXSW_SP_L3_PROTO_IPV6);
6519 if (IS_ERR(fib_node))
6520 return PTR_ERR(fib_node);
6522 fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
6524 if (IS_ERR(fib6_entry)) {
6525 err = PTR_ERR(fib6_entry);
6526 goto err_fib6_entry_create;
6529 if (!mlxsw_sp_fib6_allow_replace(fib6_entry)) {
6530 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
6531 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6535 replaced = fib_node->fib_entry;
6536 err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, op_ctx, &fib6_entry->common);
6538 goto err_fib_node_entry_link;
6540 /* Nothing to replace */
6544 mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
6545 fib6_replaced = container_of(replaced, struct mlxsw_sp_fib6_entry,
6547 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_replaced);
6551 err_fib_node_entry_link:
6552 fib_node->fib_entry = replaced;
6553 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
6554 err_fib6_entry_create:
6555 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6559 static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp,
6560 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6561 struct fib6_info **rt_arr, unsigned int nrt6)
6563 struct mlxsw_sp_fib6_entry *fib6_entry;
6564 struct mlxsw_sp_fib_node *fib_node;
6565 struct fib6_info *rt = rt_arr[0];
6568 if (mlxsw_sp->router->aborted)
6571 if (rt->fib6_src.plen)
6574 if (mlxsw_sp_fib6_rt_should_ignore(rt))
6577 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
6579 sizeof(rt->fib6_dst.addr),
6581 MLXSW_SP_L3_PROTO_IPV6);
6582 if (IS_ERR(fib_node))
6583 return PTR_ERR(fib_node);
6585 if (WARN_ON_ONCE(!fib_node->fib_entry)) {
6586 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6590 fib6_entry = container_of(fib_node->fib_entry,
6591 struct mlxsw_sp_fib6_entry, common);
6592 err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, op_ctx, fib6_entry, rt_arr, nrt6);
6594 goto err_fib6_entry_nexthop_add;
6598 err_fib6_entry_nexthop_add:
6599 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6603 static int mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
6604 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6605 struct fib6_info **rt_arr, unsigned int nrt6)
6607 struct mlxsw_sp_fib6_entry *fib6_entry;
6608 struct mlxsw_sp_fib_node *fib_node;
6609 struct fib6_info *rt = rt_arr[0];
6612 if (mlxsw_sp->router->aborted)
6615 if (mlxsw_sp_fib6_rt_should_ignore(rt))
6618 /* Multipath routes are first added to the FIB trie and only then
6619 * notified. If we vetoed the addition, we will get a delete
6620 * notification for a route we do not have. Therefore, do not warn if
6621 * route was not found.
6623 fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
6627 /* If not all the nexthops are deleted, then only reduce the nexthop
6630 if (nrt6 != fib6_entry->nrt6) {
6631 mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, op_ctx, fib6_entry, rt_arr, nrt6);
6635 fib_node = fib6_entry->common.fib_node;
6637 err = __mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, op_ctx, &fib6_entry->common);
6638 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
6639 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6643 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
6644 enum mlxsw_sp_l3proto proto,
6647 const struct mlxsw_sp_router_ll_ops *ll_ops = mlxsw_sp->router->proto_ll_ops[proto];
6648 enum mlxsw_reg_ralxx_protocol ralxx_proto =
6649 (enum mlxsw_reg_ralxx_protocol) proto;
6650 struct mlxsw_sp_fib_entry_priv *priv;
6651 char xralta_pl[MLXSW_REG_XRALTA_LEN];
6652 char xralst_pl[MLXSW_REG_XRALST_LEN];
6655 mlxsw_reg_xralta_pack(xralta_pl, true, ralxx_proto, tree_id);
6656 err = ll_ops->ralta_write(mlxsw_sp, xralta_pl);
6660 mlxsw_reg_xralst_pack(xralst_pl, 0xff, tree_id);
6661 err = ll_ops->ralst_write(mlxsw_sp, xralst_pl);
6665 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
6666 struct mlxsw_sp_fib_entry_op_ctx *op_ctx = mlxsw_sp->router->ll_op_ctx;
6667 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
6668 char xraltb_pl[MLXSW_REG_XRALTB_LEN];
6670 mlxsw_sp_fib_entry_op_ctx_clear(op_ctx);
6671 mlxsw_reg_xraltb_pack(xraltb_pl, vr->id, ralxx_proto, tree_id);
6672 err = ll_ops->raltb_write(mlxsw_sp, xraltb_pl);
6676 priv = mlxsw_sp_fib_entry_priv_create(ll_ops);
6678 return PTR_ERR(priv);
6680 ll_ops->fib_entry_pack(op_ctx, proto, MLXSW_SP_FIB_ENTRY_OP_WRITE,
6681 vr->id, 0, NULL, priv);
6682 ll_ops->fib_entry_act_ip2me_pack(op_ctx);
6683 err = ll_ops->fib_entry_commit(mlxsw_sp, op_ctx, NULL);
6684 mlxsw_sp_fib_entry_priv_put(priv);
6692 static struct mlxsw_sp_mr_table *
6693 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
6695 if (family == RTNL_FAMILY_IPMR)
6696 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
6698 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
6701 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
6702 struct mfc_entry_notifier_info *men_info,
6705 struct mlxsw_sp_mr_table *mrt;
6706 struct mlxsw_sp_vr *vr;
6708 if (mlxsw_sp->router->aborted)
6711 vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
6715 mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
6716 return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
6719 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
6720 struct mfc_entry_notifier_info *men_info)
6722 struct mlxsw_sp_mr_table *mrt;
6723 struct mlxsw_sp_vr *vr;
6725 if (mlxsw_sp->router->aborted)
6728 vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
6732 mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
6733 mlxsw_sp_mr_route_del(mrt, men_info->mfc);
6734 mlxsw_sp_vr_put(mlxsw_sp, vr);
6738 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
6739 struct vif_entry_notifier_info *ven_info)
6741 struct mlxsw_sp_mr_table *mrt;
6742 struct mlxsw_sp_rif *rif;
6743 struct mlxsw_sp_vr *vr;
6745 if (mlxsw_sp->router->aborted)
6748 vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
6752 mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
6753 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
6754 return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
6755 ven_info->vif_index,
6756 ven_info->vif_flags, rif);
6760 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
6761 struct vif_entry_notifier_info *ven_info)
6763 struct mlxsw_sp_mr_table *mrt;
6764 struct mlxsw_sp_vr *vr;
6766 if (mlxsw_sp->router->aborted)
6769 vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
6773 mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
6774 mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
6775 mlxsw_sp_vr_put(mlxsw_sp, vr);
6778 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
6780 enum mlxsw_sp_l3proto proto = MLXSW_SP_L3_PROTO_IPV4;
6783 err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
6784 MLXSW_SP_LPM_TREE_MIN);
6788 /* The multicast router code does not need an abort trap as by default,
6789 * packets that don't match any routes are trapped to the CPU.
6792 proto = MLXSW_SP_L3_PROTO_IPV6;
6793 return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
6794 MLXSW_SP_LPM_TREE_MIN + 1);
6797 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
6798 struct mlxsw_sp_fib_node *fib_node)
6800 struct mlxsw_sp_fib4_entry *fib4_entry;
6802 fib4_entry = container_of(fib_node->fib_entry,
6803 struct mlxsw_sp_fib4_entry, common);
6804 mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
6805 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6806 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6809 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
6810 struct mlxsw_sp_fib_node *fib_node)
6812 struct mlxsw_sp_fib6_entry *fib6_entry;
6814 fib6_entry = container_of(fib_node->fib_entry,
6815 struct mlxsw_sp_fib6_entry, common);
6816 mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
6817 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
6818 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6821 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
6822 struct mlxsw_sp_fib_node *fib_node)
6824 switch (fib_node->fib->proto) {
6825 case MLXSW_SP_L3_PROTO_IPV4:
6826 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
6828 case MLXSW_SP_L3_PROTO_IPV6:
6829 mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
6834 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
6835 struct mlxsw_sp_vr *vr,
6836 enum mlxsw_sp_l3proto proto)
6838 struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
6839 struct mlxsw_sp_fib_node *fib_node, *tmp;
6841 list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
6842 bool do_break = &tmp->list == &fib->node_list;
6844 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
6850 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
6854 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
6855 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
6857 if (!mlxsw_sp_vr_is_used(vr))
6860 for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
6861 mlxsw_sp_mr_table_flush(vr->mr_table[j]);
6862 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
6864 /* If virtual router was only used for IPv4, then it's no
6867 if (!mlxsw_sp_vr_is_used(vr))
6869 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
6872 /* After flushing all the routes, it is not possible anyone is still
6873 * using the adjacency index that is discarding packets, so free it in
6874 * case it was allocated.
6876 if (!mlxsw_sp->router->adj_discard_index_valid)
6878 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
6879 mlxsw_sp->router->adj_discard_index);
6880 mlxsw_sp->router->adj_discard_index_valid = false;
6883 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
6887 if (mlxsw_sp->router->aborted)
6889 dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
6890 mlxsw_sp_router_fib_flush(mlxsw_sp);
6891 mlxsw_sp->router->aborted = true;
6892 err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
6894 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
6897 struct mlxsw_sp_fib6_event {
6898 struct fib6_info **rt_arr;
6902 struct mlxsw_sp_fib_event {
6903 struct list_head list; /* node in fib queue */
6905 struct mlxsw_sp_fib6_event fib6_event;
6906 struct fib_entry_notifier_info fen_info;
6907 struct fib_rule_notifier_info fr_info;
6908 struct fib_nh_notifier_info fnh_info;
6909 struct mfc_entry_notifier_info men_info;
6910 struct vif_entry_notifier_info ven_info;
6912 struct mlxsw_sp *mlxsw_sp;
6913 unsigned long event;
6918 mlxsw_sp_router_fib6_event_init(struct mlxsw_sp_fib6_event *fib6_event,
6919 struct fib6_entry_notifier_info *fen6_info)
6921 struct fib6_info *rt = fen6_info->rt;
6922 struct fib6_info **rt_arr;
6923 struct fib6_info *iter;
6927 nrt6 = fen6_info->nsiblings + 1;
6929 rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC);
6933 fib6_event->rt_arr = rt_arr;
6934 fib6_event->nrt6 = nrt6;
6939 if (!fen6_info->nsiblings)
6942 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
6943 if (i == fen6_info->nsiblings)
6946 rt_arr[i + 1] = iter;
6947 fib6_info_hold(iter);
6950 WARN_ON_ONCE(i != fen6_info->nsiblings);
6956 mlxsw_sp_router_fib6_event_fini(struct mlxsw_sp_fib6_event *fib6_event)
6960 for (i = 0; i < fib6_event->nrt6; i++)
6961 mlxsw_sp_rt6_release(fib6_event->rt_arr[i]);
6962 kfree(fib6_event->rt_arr);
6965 static void mlxsw_sp_router_fib4_event_process(struct mlxsw_sp *mlxsw_sp,
6966 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6967 struct mlxsw_sp_fib_event *fib_event)
6971 mlxsw_sp_span_respin(mlxsw_sp);
6973 switch (fib_event->event) {
6974 case FIB_EVENT_ENTRY_REPLACE:
6975 err = mlxsw_sp_router_fib4_replace(mlxsw_sp, op_ctx, &fib_event->fen_info);
6977 mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
6978 mlxsw_sp_router_fib_abort(mlxsw_sp);
6980 fib_info_put(fib_event->fen_info.fi);
6982 case FIB_EVENT_ENTRY_DEL:
6983 err = mlxsw_sp_router_fib4_del(mlxsw_sp, op_ctx, &fib_event->fen_info);
6985 mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
6986 fib_info_put(fib_event->fen_info.fi);
6988 case FIB_EVENT_NH_ADD:
6989 case FIB_EVENT_NH_DEL:
6990 mlxsw_sp_nexthop4_event(mlxsw_sp, fib_event->event, fib_event->fnh_info.fib_nh);
6991 fib_info_put(fib_event->fnh_info.fib_nh->nh_parent);
6996 static void mlxsw_sp_router_fib6_event_process(struct mlxsw_sp *mlxsw_sp,
6997 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6998 struct mlxsw_sp_fib_event *fib_event)
7002 mlxsw_sp_span_respin(mlxsw_sp);
7004 switch (fib_event->event) {
7005 case FIB_EVENT_ENTRY_REPLACE:
7006 err = mlxsw_sp_router_fib6_replace(mlxsw_sp, op_ctx, fib_event->fib6_event.rt_arr,
7007 fib_event->fib6_event.nrt6);
7009 mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
7010 mlxsw_sp_router_fib_abort(mlxsw_sp);
7012 mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event);
7014 case FIB_EVENT_ENTRY_APPEND:
7015 err = mlxsw_sp_router_fib6_append(mlxsw_sp, op_ctx, fib_event->fib6_event.rt_arr,
7016 fib_event->fib6_event.nrt6);
7018 mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
7019 mlxsw_sp_router_fib_abort(mlxsw_sp);
7021 mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event);
7023 case FIB_EVENT_ENTRY_DEL:
7024 err = mlxsw_sp_router_fib6_del(mlxsw_sp, op_ctx, fib_event->fib6_event.rt_arr,
7025 fib_event->fib6_event.nrt6);
7027 mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
7028 mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event);
7033 static void mlxsw_sp_router_fibmr_event_process(struct mlxsw_sp *mlxsw_sp,
7034 struct mlxsw_sp_fib_event *fib_event)
7040 mutex_lock(&mlxsw_sp->router->lock);
7041 switch (fib_event->event) {
7042 case FIB_EVENT_ENTRY_REPLACE:
7043 case FIB_EVENT_ENTRY_ADD:
7044 replace = fib_event->event == FIB_EVENT_ENTRY_REPLACE;
7046 err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_event->men_info, replace);
7048 mlxsw_sp_router_fib_abort(mlxsw_sp);
7049 mr_cache_put(fib_event->men_info.mfc);
7051 case FIB_EVENT_ENTRY_DEL:
7052 mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_event->men_info);
7053 mr_cache_put(fib_event->men_info.mfc);
7055 case FIB_EVENT_VIF_ADD:
7056 err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
7057 &fib_event->ven_info);
7059 mlxsw_sp_router_fib_abort(mlxsw_sp);
7060 dev_put(fib_event->ven_info.dev);
7062 case FIB_EVENT_VIF_DEL:
7063 mlxsw_sp_router_fibmr_vif_del(mlxsw_sp, &fib_event->ven_info);
7064 dev_put(fib_event->ven_info.dev);
7067 mutex_unlock(&mlxsw_sp->router->lock);
7071 static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
7073 struct mlxsw_sp_router *router = container_of(work, struct mlxsw_sp_router, fib_event_work);
7074 struct mlxsw_sp_fib_entry_op_ctx *op_ctx = router->ll_op_ctx;
7075 struct mlxsw_sp *mlxsw_sp = router->mlxsw_sp;
7076 struct mlxsw_sp_fib_event *next_fib_event;
7077 struct mlxsw_sp_fib_event *fib_event;
7078 int last_family = AF_UNSPEC;
7079 LIST_HEAD(fib_event_queue);
7081 spin_lock_bh(&router->fib_event_queue_lock);
7082 list_splice_init(&router->fib_event_queue, &fib_event_queue);
7083 spin_unlock_bh(&router->fib_event_queue_lock);
7085 /* Router lock is held here to make sure per-instance
7086 * operation context is not used in between FIB4/6 events
7089 mutex_lock(&router->lock);
7090 mlxsw_sp_fib_entry_op_ctx_clear(op_ctx);
7091 list_for_each_entry_safe(fib_event, next_fib_event,
7092 &fib_event_queue, list) {
7093 /* Check if the next entry in the queue exists and it is
7094 * of the same type (family and event) as the currect one.
7095 * In that case it is permitted to do the bulking
7096 * of multiple FIB entries to a single register write.
7098 op_ctx->bulk_ok = !list_is_last(&fib_event->list, &fib_event_queue) &&
7099 fib_event->family == next_fib_event->family &&
7100 fib_event->event == next_fib_event->event;
7101 op_ctx->event = fib_event->event;
7103 /* In case family of this and the previous entry are different, context
7104 * reinitialization is going to be needed now, indicate that.
7105 * Note that since last_family is initialized to AF_UNSPEC, this is always
7106 * going to happen for the first entry processed in the work.
7108 if (fib_event->family != last_family)
7109 op_ctx->initialized = false;
7111 switch (fib_event->family) {
7113 mlxsw_sp_router_fib4_event_process(mlxsw_sp, op_ctx,
7117 mlxsw_sp_router_fib6_event_process(mlxsw_sp, op_ctx,
7120 case RTNL_FAMILY_IP6MR:
7121 case RTNL_FAMILY_IPMR:
7122 /* Unlock here as inside FIBMR the lock is taken again
7123 * under RTNL. The per-instance operation context
7124 * is not used by FIBMR.
7126 mutex_unlock(&router->lock);
7127 mlxsw_sp_router_fibmr_event_process(mlxsw_sp,
7129 mutex_lock(&router->lock);
7134 last_family = fib_event->family;
7138 WARN_ON_ONCE(!list_empty(&router->ll_op_ctx->fib_entry_priv_list));
7139 mutex_unlock(&router->lock);
7142 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event *fib_event,
7143 struct fib_notifier_info *info)
7145 struct fib_entry_notifier_info *fen_info;
7146 struct fib_nh_notifier_info *fnh_info;
7148 switch (fib_event->event) {
7149 case FIB_EVENT_ENTRY_REPLACE:
7150 case FIB_EVENT_ENTRY_DEL:
7151 fen_info = container_of(info, struct fib_entry_notifier_info,
7153 fib_event->fen_info = *fen_info;
7154 /* Take reference on fib_info to prevent it from being
7155 * freed while event is queued. Release it afterwards.
7157 fib_info_hold(fib_event->fen_info.fi);
7159 case FIB_EVENT_NH_ADD:
7160 case FIB_EVENT_NH_DEL:
7161 fnh_info = container_of(info, struct fib_nh_notifier_info,
7163 fib_event->fnh_info = *fnh_info;
7164 fib_info_hold(fib_event->fnh_info.fib_nh->nh_parent);
7169 static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event *fib_event,
7170 struct fib_notifier_info *info)
7172 struct fib6_entry_notifier_info *fen6_info;
7175 switch (fib_event->event) {
7176 case FIB_EVENT_ENTRY_REPLACE:
7177 case FIB_EVENT_ENTRY_APPEND:
7178 case FIB_EVENT_ENTRY_DEL:
7179 fen6_info = container_of(info, struct fib6_entry_notifier_info,
7181 err = mlxsw_sp_router_fib6_event_init(&fib_event->fib6_event,
7192 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event *fib_event,
7193 struct fib_notifier_info *info)
7195 switch (fib_event->event) {
7196 case FIB_EVENT_ENTRY_REPLACE:
7197 case FIB_EVENT_ENTRY_ADD:
7198 case FIB_EVENT_ENTRY_DEL:
7199 memcpy(&fib_event->men_info, info, sizeof(fib_event->men_info));
7200 mr_cache_hold(fib_event->men_info.mfc);
7202 case FIB_EVENT_VIF_ADD:
7203 case FIB_EVENT_VIF_DEL:
7204 memcpy(&fib_event->ven_info, info, sizeof(fib_event->ven_info));
7205 dev_hold(fib_event->ven_info.dev);
7210 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
7211 struct fib_notifier_info *info,
7212 struct mlxsw_sp *mlxsw_sp)
7214 struct netlink_ext_ack *extack = info->extack;
7215 struct fib_rule_notifier_info *fr_info;
7216 struct fib_rule *rule;
7219 /* nothing to do at the moment */
7220 if (event == FIB_EVENT_RULE_DEL)
7223 if (mlxsw_sp->router->aborted)
7226 fr_info = container_of(info, struct fib_rule_notifier_info, info);
7227 rule = fr_info->rule;
7229 /* Rule only affects locally generated traffic */
7230 if (rule->iifindex == mlxsw_sp_net(mlxsw_sp)->loopback_dev->ifindex)
7233 switch (info->family) {
7235 if (!fib4_rule_default(rule) && !rule->l3mdev)
7239 if (!fib6_rule_default(rule) && !rule->l3mdev)
7242 case RTNL_FAMILY_IPMR:
7243 if (!ipmr_rule_default(rule) && !rule->l3mdev)
7246 case RTNL_FAMILY_IP6MR:
7247 if (!ip6mr_rule_default(rule) && !rule->l3mdev)
7253 NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
7258 /* Called with rcu_read_lock() */
7259 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
7260 unsigned long event, void *ptr)
7262 struct mlxsw_sp_fib_event *fib_event;
7263 struct fib_notifier_info *info = ptr;
7264 struct mlxsw_sp_router *router;
7267 if ((info->family != AF_INET && info->family != AF_INET6 &&
7268 info->family != RTNL_FAMILY_IPMR &&
7269 info->family != RTNL_FAMILY_IP6MR))
7272 router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7275 case FIB_EVENT_RULE_ADD:
7276 case FIB_EVENT_RULE_DEL:
7277 err = mlxsw_sp_router_fib_rule_event(event, info,
7279 return notifier_from_errno(err);
7280 case FIB_EVENT_ENTRY_ADD:
7281 case FIB_EVENT_ENTRY_REPLACE:
7282 case FIB_EVENT_ENTRY_APPEND:
7283 if (router->aborted) {
7284 NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
7285 return notifier_from_errno(-EINVAL);
7287 if (info->family == AF_INET) {
7288 struct fib_entry_notifier_info *fen_info = ptr;
7290 if (fen_info->fi->fib_nh_is_v6) {
7291 NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
7292 return notifier_from_errno(-EINVAL);
7298 fib_event = kzalloc(sizeof(*fib_event), GFP_ATOMIC);
7302 fib_event->mlxsw_sp = router->mlxsw_sp;
7303 fib_event->event = event;
7304 fib_event->family = info->family;
7306 switch (info->family) {
7308 mlxsw_sp_router_fib4_event(fib_event, info);
7311 err = mlxsw_sp_router_fib6_event(fib_event, info);
7315 case RTNL_FAMILY_IP6MR:
7316 case RTNL_FAMILY_IPMR:
7317 mlxsw_sp_router_fibmr_event(fib_event, info);
7321 /* Enqueue the event and trigger the work */
7322 spin_lock_bh(&router->fib_event_queue_lock);
7323 list_add_tail(&fib_event->list, &router->fib_event_queue);
7324 spin_unlock_bh(&router->fib_event_queue_lock);
7325 mlxsw_core_schedule_work(&router->fib_event_work);
7334 static struct mlxsw_sp_rif *
7335 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
7336 const struct net_device *dev)
7340 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7341 if (mlxsw_sp->router->rifs[i] &&
7342 mlxsw_sp->router->rifs[i]->dev == dev)
7343 return mlxsw_sp->router->rifs[i];
7348 bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp,
7349 const struct net_device *dev)
7351 struct mlxsw_sp_rif *rif;
7353 mutex_lock(&mlxsw_sp->router->lock);
7354 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7355 mutex_unlock(&mlxsw_sp->router->lock);
7360 u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev)
7362 struct mlxsw_sp_rif *rif;
7365 mutex_lock(&mlxsw_sp->router->lock);
7366 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7370 /* We only return the VID for VLAN RIFs. Otherwise we return an
7371 * invalid value (0).
7373 if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN)
7376 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7379 mutex_unlock(&mlxsw_sp->router->lock);
7383 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
7385 char ritr_pl[MLXSW_REG_RITR_LEN];
7388 mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
7389 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7393 mlxsw_reg_ritr_enable_set(ritr_pl, false);
7394 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7397 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
7398 struct mlxsw_sp_rif *rif)
7400 mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
7401 mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
7402 mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
7406 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
7407 unsigned long event)
7409 struct inet6_dev *inet6_dev;
7410 bool addr_list_empty = true;
7411 struct in_device *idev;
7418 idev = __in_dev_get_rcu(dev);
7419 if (idev && idev->ifa_list)
7420 addr_list_empty = false;
7422 inet6_dev = __in6_dev_get(dev);
7423 if (addr_list_empty && inet6_dev &&
7424 !list_empty(&inet6_dev->addr_list))
7425 addr_list_empty = false;
7428 /* macvlans do not have a RIF, but rather piggy back on the
7429 * RIF of their lower device.
7431 if (netif_is_macvlan(dev) && addr_list_empty)
7434 if (rif && addr_list_empty &&
7435 !netif_is_l3_slave(rif->dev))
7437 /* It is possible we already removed the RIF ourselves
7438 * if it was assigned to a netdev that is now a bridge
7447 static enum mlxsw_sp_rif_type
7448 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
7449 const struct net_device *dev)
7451 enum mlxsw_sp_fid_type type;
7453 if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
7454 return MLXSW_SP_RIF_TYPE_IPIP_LB;
7456 /* Otherwise RIF type is derived from the type of the underlying FID. */
7457 if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
7458 type = MLXSW_SP_FID_TYPE_8021Q;
7459 else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
7460 type = MLXSW_SP_FID_TYPE_8021Q;
7461 else if (netif_is_bridge_master(dev))
7462 type = MLXSW_SP_FID_TYPE_8021D;
7464 type = MLXSW_SP_FID_TYPE_RFID;
7466 return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
7469 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
7473 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
7474 if (!mlxsw_sp->router->rifs[i]) {
7483 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
7485 struct net_device *l3_dev)
7487 struct mlxsw_sp_rif *rif;
7489 rif = kzalloc(rif_size, GFP_KERNEL);
7493 INIT_LIST_HEAD(&rif->nexthop_list);
7494 INIT_LIST_HEAD(&rif->neigh_list);
7496 ether_addr_copy(rif->addr, l3_dev->dev_addr);
7497 rif->mtu = l3_dev->mtu;
7501 rif->rif_index = rif_index;
7506 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
7509 return mlxsw_sp->router->rifs[rif_index];
7512 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
7514 return rif->rif_index;
7517 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
7519 return lb_rif->common.rif_index;
7522 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
7524 u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev);
7525 struct mlxsw_sp_vr *ul_vr;
7527 ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
7528 if (WARN_ON(IS_ERR(ul_vr)))
7534 u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
7536 return lb_rif->ul_rif_id;
7539 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
7541 return rif->dev->ifindex;
7544 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
7549 static struct mlxsw_sp_rif *
7550 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
7551 const struct mlxsw_sp_rif_params *params,
7552 struct netlink_ext_ack *extack)
7554 u32 tb_id = l3mdev_fib_table(params->dev);
7555 const struct mlxsw_sp_rif_ops *ops;
7556 struct mlxsw_sp_fid *fid = NULL;
7557 enum mlxsw_sp_rif_type type;
7558 struct mlxsw_sp_rif *rif;
7559 struct mlxsw_sp_vr *vr;
7563 type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
7564 ops = mlxsw_sp->rif_ops_arr[type];
7566 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
7568 return ERR_CAST(vr);
7571 err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
7573 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
7574 goto err_rif_index_alloc;
7577 rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
7583 mlxsw_sp->router->rifs[rif_index] = rif;
7584 rif->mlxsw_sp = mlxsw_sp;
7588 fid = ops->fid_get(rif, extack);
7597 ops->setup(rif, params);
7599 err = ops->configure(rif);
7603 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
7604 err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
7606 goto err_mr_rif_add;
7609 mlxsw_sp_rif_counters_alloc(rif);
7614 for (i--; i >= 0; i--)
7615 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
7616 ops->deconfigure(rif);
7619 mlxsw_sp_fid_put(fid);
7621 mlxsw_sp->router->rifs[rif_index] = NULL;
7625 err_rif_index_alloc:
7627 mlxsw_sp_vr_put(mlxsw_sp, vr);
7628 return ERR_PTR(err);
7631 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
7633 const struct mlxsw_sp_rif_ops *ops = rif->ops;
7634 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7635 struct mlxsw_sp_fid *fid = rif->fid;
7636 struct mlxsw_sp_vr *vr;
7639 mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
7640 vr = &mlxsw_sp->router->vrs[rif->vr_id];
7642 mlxsw_sp_rif_counters_free(rif);
7643 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
7644 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
7645 ops->deconfigure(rif);
7647 /* Loopback RIFs are not associated with a FID. */
7648 mlxsw_sp_fid_put(fid);
7649 mlxsw_sp->router->rifs[rif->rif_index] = NULL;
7653 mlxsw_sp_vr_put(mlxsw_sp, vr);
7656 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
7657 struct net_device *dev)
7659 struct mlxsw_sp_rif *rif;
7661 mutex_lock(&mlxsw_sp->router->lock);
7662 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7665 mlxsw_sp_rif_destroy(rif);
7667 mutex_unlock(&mlxsw_sp->router->lock);
7671 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
7672 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
7674 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
7676 params->vid = mlxsw_sp_port_vlan->vid;
7677 params->lag = mlxsw_sp_port->lagged;
7679 params->lag_id = mlxsw_sp_port->lag_id;
7681 params->system_port = mlxsw_sp_port->local_port;
7684 static struct mlxsw_sp_rif_subport *
7685 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
7687 return container_of(rif, struct mlxsw_sp_rif_subport, common);
7690 static struct mlxsw_sp_rif *
7691 mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
7692 const struct mlxsw_sp_rif_params *params,
7693 struct netlink_ext_ack *extack)
7695 struct mlxsw_sp_rif_subport *rif_subport;
7696 struct mlxsw_sp_rif *rif;
7698 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev);
7700 return mlxsw_sp_rif_create(mlxsw_sp, params, extack);
7702 rif_subport = mlxsw_sp_rif_subport_rif(rif);
7703 refcount_inc(&rif_subport->ref_count);
7707 static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif)
7709 struct mlxsw_sp_rif_subport *rif_subport;
7711 rif_subport = mlxsw_sp_rif_subport_rif(rif);
7712 if (!refcount_dec_and_test(&rif_subport->ref_count))
7715 mlxsw_sp_rif_destroy(rif);
7719 __mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
7720 struct net_device *l3_dev,
7721 struct netlink_ext_ack *extack)
7723 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
7724 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
7725 struct mlxsw_sp_rif_params params = {
7728 u16 vid = mlxsw_sp_port_vlan->vid;
7729 struct mlxsw_sp_rif *rif;
7730 struct mlxsw_sp_fid *fid;
7733 mlxsw_sp_rif_subport_params_init(¶ms, mlxsw_sp_port_vlan);
7734 rif = mlxsw_sp_rif_subport_get(mlxsw_sp, ¶ms, extack);
7736 return PTR_ERR(rif);
7738 /* FID was already created, just take a reference */
7739 fid = rif->ops->fid_get(rif, extack);
7740 err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
7742 goto err_fid_port_vid_map;
7744 err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
7746 goto err_port_vid_learning_set;
7748 err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
7749 BR_STATE_FORWARDING);
7751 goto err_port_vid_stp_set;
7753 mlxsw_sp_port_vlan->fid = fid;
7757 err_port_vid_stp_set:
7758 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
7759 err_port_vid_learning_set:
7760 mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
7761 err_fid_port_vid_map:
7762 mlxsw_sp_fid_put(fid);
7763 mlxsw_sp_rif_subport_put(rif);
7768 __mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
7770 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
7771 struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
7772 struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid);
7773 u16 vid = mlxsw_sp_port_vlan->vid;
7775 if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
7778 mlxsw_sp_port_vlan->fid = NULL;
7779 mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
7780 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
7781 mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
7782 mlxsw_sp_fid_put(fid);
7783 mlxsw_sp_rif_subport_put(rif);
7787 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
7788 struct net_device *l3_dev,
7789 struct netlink_ext_ack *extack)
7791 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
7792 struct mlxsw_sp_rif *rif;
7795 mutex_lock(&mlxsw_sp->router->lock);
7796 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7800 err = __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, l3_dev,
7803 mutex_unlock(&mlxsw_sp->router->lock);
7808 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
7810 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
7812 mutex_lock(&mlxsw_sp->router->lock);
7813 __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
7814 mutex_unlock(&mlxsw_sp->router->lock);
7817 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
7818 struct net_device *port_dev,
7819 unsigned long event, u16 vid,
7820 struct netlink_ext_ack *extack)
7822 struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
7823 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
7825 mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
7826 if (WARN_ON(!mlxsw_sp_port_vlan))
7831 return __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
7834 __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
7841 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
7842 unsigned long event,
7843 struct netlink_ext_ack *extack)
7845 if (netif_is_bridge_port(port_dev) ||
7846 netif_is_lag_port(port_dev) ||
7847 netif_is_ovs_port(port_dev))
7850 return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
7851 MLXSW_SP_DEFAULT_VID, extack);
7854 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
7855 struct net_device *lag_dev,
7856 unsigned long event, u16 vid,
7857 struct netlink_ext_ack *extack)
7859 struct net_device *port_dev;
7860 struct list_head *iter;
7863 netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
7864 if (mlxsw_sp_port_dev_check(port_dev)) {
7865 err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
7877 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
7878 unsigned long event,
7879 struct netlink_ext_ack *extack)
7881 if (netif_is_bridge_port(lag_dev))
7884 return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
7885 MLXSW_SP_DEFAULT_VID, extack);
7888 static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
7889 struct net_device *l3_dev,
7890 unsigned long event,
7891 struct netlink_ext_ack *extack)
7893 struct mlxsw_sp_rif_params params = {
7896 struct mlxsw_sp_rif *rif;
7900 if (netif_is_bridge_master(l3_dev) && br_vlan_enabled(l3_dev)) {
7903 br_vlan_get_proto(l3_dev, &proto);
7904 if (proto == ETH_P_8021AD) {
7905 NL_SET_ERR_MSG_MOD(extack, "Adding an IP address to 802.1ad bridge is not supported");
7909 rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms, extack);
7911 return PTR_ERR(rif);
7914 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7915 mlxsw_sp_rif_destroy(rif);
7922 static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
7923 struct net_device *vlan_dev,
7924 unsigned long event,
7925 struct netlink_ext_ack *extack)
7927 struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
7928 u16 vid = vlan_dev_vlan_id(vlan_dev);
7930 if (netif_is_bridge_port(vlan_dev))
7933 if (mlxsw_sp_port_dev_check(real_dev))
7934 return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
7935 event, vid, extack);
7936 else if (netif_is_lag_master(real_dev))
7937 return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
7939 else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
7940 return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event,
7946 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
7948 u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
7949 u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
7951 return ether_addr_equal_masked(mac, vrrp4, mask);
7954 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
7956 u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
7957 u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
7959 return ether_addr_equal_masked(mac, vrrp6, mask);
7962 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
7963 const u8 *mac, bool adding)
7965 char ritr_pl[MLXSW_REG_RITR_LEN];
7966 u8 vrrp_id = adding ? mac[5] : 0;
7969 if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
7970 !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
7973 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
7974 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7978 if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
7979 mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
7981 mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
7983 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7986 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
7987 const struct net_device *macvlan_dev,
7988 struct netlink_ext_ack *extack)
7990 struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
7991 struct mlxsw_sp_rif *rif;
7994 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
7996 NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
8000 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
8001 mlxsw_sp_fid_index(rif->fid), true);
8005 err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
8006 macvlan_dev->dev_addr, true);
8008 goto err_rif_vrrp_add;
8010 /* Make sure the bridge driver does not have this MAC pointing at
8013 if (rif->ops->fdb_del)
8014 rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
8019 mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
8020 mlxsw_sp_fid_index(rif->fid), false);
8024 static void __mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
8025 const struct net_device *macvlan_dev)
8027 struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
8028 struct mlxsw_sp_rif *rif;
8030 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
8031 /* If we do not have a RIF, then we already took care of
8032 * removing the macvlan's MAC during RIF deletion.
8036 mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
8038 mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
8039 mlxsw_sp_fid_index(rif->fid), false);
8042 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
8043 const struct net_device *macvlan_dev)
8045 mutex_lock(&mlxsw_sp->router->lock);
8046 __mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
8047 mutex_unlock(&mlxsw_sp->router->lock);
8050 static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
8051 struct net_device *macvlan_dev,
8052 unsigned long event,
8053 struct netlink_ext_ack *extack)
8057 return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
8059 __mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
8066 static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp,
8067 struct net_device *dev,
8068 const unsigned char *dev_addr,
8069 struct netlink_ext_ack *extack)
8071 struct mlxsw_sp_rif *rif;
8074 /* A RIF is not created for macvlan netdevs. Their MAC is used to
8077 if (netif_is_macvlan(dev) || netif_is_l3_master(dev))
8080 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
8081 rif = mlxsw_sp->router->rifs[i];
8082 if (rif && rif->ops &&
8083 rif->ops->type == MLXSW_SP_RIF_TYPE_IPIP_LB)
8085 if (rif && rif->dev && rif->dev != dev &&
8086 !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr,
8087 mlxsw_sp->mac_mask)) {
8088 NL_SET_ERR_MSG_MOD(extack, "All router interface MAC addresses must have the same prefix");
8096 static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
8097 struct net_device *dev,
8098 unsigned long event,
8099 struct netlink_ext_ack *extack)
8101 if (mlxsw_sp_port_dev_check(dev))
8102 return mlxsw_sp_inetaddr_port_event(dev, event, extack);
8103 else if (netif_is_lag_master(dev))
8104 return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
8105 else if (netif_is_bridge_master(dev))
8106 return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event,
8108 else if (is_vlan_dev(dev))
8109 return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
8111 else if (netif_is_macvlan(dev))
8112 return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
8118 static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
8119 unsigned long event, void *ptr)
8121 struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
8122 struct net_device *dev = ifa->ifa_dev->dev;
8123 struct mlxsw_sp_router *router;
8124 struct mlxsw_sp_rif *rif;
8127 /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
8128 if (event == NETDEV_UP)
8131 router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
8132 mutex_lock(&router->lock);
8133 rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
8134 if (!mlxsw_sp_rif_should_config(rif, dev, event))
8137 err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL);
8139 mutex_unlock(&router->lock);
8140 return notifier_from_errno(err);
8143 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
8144 unsigned long event, void *ptr)
8146 struct in_validator_info *ivi = (struct in_validator_info *) ptr;
8147 struct net_device *dev = ivi->ivi_dev->dev;
8148 struct mlxsw_sp *mlxsw_sp;
8149 struct mlxsw_sp_rif *rif;
8152 mlxsw_sp = mlxsw_sp_lower_get(dev);
8156 mutex_lock(&mlxsw_sp->router->lock);
8157 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8158 if (!mlxsw_sp_rif_should_config(rif, dev, event))
8161 err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
8166 err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack);
8168 mutex_unlock(&mlxsw_sp->router->lock);
8169 return notifier_from_errno(err);
8172 struct mlxsw_sp_inet6addr_event_work {
8173 struct work_struct work;
8174 struct mlxsw_sp *mlxsw_sp;
8175 struct net_device *dev;
8176 unsigned long event;
8179 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
8181 struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
8182 container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
8183 struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp;
8184 struct net_device *dev = inet6addr_work->dev;
8185 unsigned long event = inet6addr_work->event;
8186 struct mlxsw_sp_rif *rif;
8189 mutex_lock(&mlxsw_sp->router->lock);
8191 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8192 if (!mlxsw_sp_rif_should_config(rif, dev, event))
8195 __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL);
8197 mutex_unlock(&mlxsw_sp->router->lock);
8200 kfree(inet6addr_work);
8203 /* Called with rcu_read_lock() */
8204 static int mlxsw_sp_inet6addr_event(struct notifier_block *nb,
8205 unsigned long event, void *ptr)
8207 struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
8208 struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
8209 struct net_device *dev = if6->idev->dev;
8210 struct mlxsw_sp_router *router;
8212 /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
8213 if (event == NETDEV_UP)
8216 inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
8217 if (!inet6addr_work)
8220 router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb);
8221 INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
8222 inet6addr_work->mlxsw_sp = router->mlxsw_sp;
8223 inet6addr_work->dev = dev;
8224 inet6addr_work->event = event;
8226 mlxsw_core_schedule_work(&inet6addr_work->work);
8231 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
8232 unsigned long event, void *ptr)
8234 struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
8235 struct net_device *dev = i6vi->i6vi_dev->dev;
8236 struct mlxsw_sp *mlxsw_sp;
8237 struct mlxsw_sp_rif *rif;
8240 mlxsw_sp = mlxsw_sp_lower_get(dev);
8244 mutex_lock(&mlxsw_sp->router->lock);
8245 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8246 if (!mlxsw_sp_rif_should_config(rif, dev, event))
8249 err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
8254 err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack);
8256 mutex_unlock(&mlxsw_sp->router->lock);
8257 return notifier_from_errno(err);
8260 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
8261 const char *mac, int mtu)
8263 char ritr_pl[MLXSW_REG_RITR_LEN];
8266 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
8267 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8271 mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
8272 mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
8273 mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
8274 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8278 mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp,
8279 struct mlxsw_sp_rif *rif)
8281 struct net_device *dev = rif->dev;
8285 fid_index = mlxsw_sp_fid_index(rif->fid);
8287 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
8291 err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
8296 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
8298 goto err_rif_fdb_op;
8300 if (rif->mtu != dev->mtu) {
8301 struct mlxsw_sp_vr *vr;
8304 /* The RIF is relevant only to its mr_table instance, as unlike
8305 * unicast routing, in multicast routing a RIF cannot be shared
8306 * between several multicast routing tables.
8308 vr = &mlxsw_sp->router->vrs[rif->vr_id];
8309 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
8310 mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
8314 ether_addr_copy(rif->addr, dev->dev_addr);
8315 rif->mtu = dev->mtu;
8317 netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
8322 mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
8324 mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
8328 static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
8329 struct netdev_notifier_pre_changeaddr_info *info)
8331 struct netlink_ext_ack *extack;
8333 extack = netdev_notifier_info_to_extack(&info->info);
8334 return mlxsw_sp_router_port_check_rif_addr(rif->mlxsw_sp, rif->dev,
8335 info->dev_addr, extack);
8338 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
8339 unsigned long event, void *ptr)
8341 struct mlxsw_sp *mlxsw_sp;
8342 struct mlxsw_sp_rif *rif;
8345 mlxsw_sp = mlxsw_sp_lower_get(dev);
8349 mutex_lock(&mlxsw_sp->router->lock);
8350 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8355 case NETDEV_CHANGEMTU:
8356 case NETDEV_CHANGEADDR:
8357 err = mlxsw_sp_router_port_change_event(mlxsw_sp, rif);
8359 case NETDEV_PRE_CHANGEADDR:
8360 err = mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
8365 mutex_unlock(&mlxsw_sp->router->lock);
8369 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
8370 struct net_device *l3_dev,
8371 struct netlink_ext_ack *extack)
8373 struct mlxsw_sp_rif *rif;
8375 /* If netdev is already associated with a RIF, then we need to
8376 * destroy it and create a new one with the new virtual router ID.
8378 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
8380 __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN,
8383 return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack);
8386 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
8387 struct net_device *l3_dev)
8389 struct mlxsw_sp_rif *rif;
8391 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
8394 __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL);
8397 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
8398 struct netdev_notifier_changeupper_info *info)
8400 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
8403 /* We do not create a RIF for a macvlan, but only use it to
8404 * direct more MAC addresses to the router.
8406 if (!mlxsw_sp || netif_is_macvlan(l3_dev))
8409 mutex_lock(&mlxsw_sp->router->lock);
8411 case NETDEV_PRECHANGEUPPER:
8413 case NETDEV_CHANGEUPPER:
8414 if (info->linking) {
8415 struct netlink_ext_ack *extack;
8417 extack = netdev_notifier_info_to_extack(&info->info);
8418 err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
8420 mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
8424 mutex_unlock(&mlxsw_sp->router->lock);
8429 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev,
8430 struct netdev_nested_priv *priv)
8432 struct mlxsw_sp_rif *rif = (struct mlxsw_sp_rif *)priv->data;
8434 if (!netif_is_macvlan(dev))
8437 return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
8438 mlxsw_sp_fid_index(rif->fid), false);
8441 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
8443 struct netdev_nested_priv priv = {
8444 .data = (void *)rif,
8447 if (!netif_is_macvlan_port(rif->dev))
8450 netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
8451 return netdev_walk_all_upper_dev_rcu(rif->dev,
8452 __mlxsw_sp_rif_macvlan_flush, &priv);
8455 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
8456 const struct mlxsw_sp_rif_params *params)
8458 struct mlxsw_sp_rif_subport *rif_subport;
8460 rif_subport = mlxsw_sp_rif_subport_rif(rif);
8461 refcount_set(&rif_subport->ref_count, 1);
8462 rif_subport->vid = params->vid;
8463 rif_subport->lag = params->lag;
8465 rif_subport->lag_id = params->lag_id;
8467 rif_subport->system_port = params->system_port;
8470 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
8472 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8473 struct mlxsw_sp_rif_subport *rif_subport;
8474 char ritr_pl[MLXSW_REG_RITR_LEN];
8476 rif_subport = mlxsw_sp_rif_subport_rif(rif);
8477 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
8478 rif->rif_index, rif->vr_id, rif->dev->mtu);
8479 mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
8480 mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
8481 rif_subport->lag ? rif_subport->lag_id :
8482 rif_subport->system_port,
8485 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8488 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
8492 err = mlxsw_sp_rif_subport_op(rif, true);
8496 err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
8497 mlxsw_sp_fid_index(rif->fid), true);
8499 goto err_rif_fdb_op;
8501 mlxsw_sp_fid_rif_set(rif->fid, rif);
8505 mlxsw_sp_rif_subport_op(rif, false);
8509 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
8511 struct mlxsw_sp_fid *fid = rif->fid;
8513 mlxsw_sp_fid_rif_set(fid, NULL);
8514 mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
8515 mlxsw_sp_fid_index(fid), false);
8516 mlxsw_sp_rif_macvlan_flush(rif);
8517 mlxsw_sp_rif_subport_op(rif, false);
8520 static struct mlxsw_sp_fid *
8521 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
8522 struct netlink_ext_ack *extack)
8524 return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
8527 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
8528 .type = MLXSW_SP_RIF_TYPE_SUBPORT,
8529 .rif_size = sizeof(struct mlxsw_sp_rif_subport),
8530 .setup = mlxsw_sp_rif_subport_setup,
8531 .configure = mlxsw_sp_rif_subport_configure,
8532 .deconfigure = mlxsw_sp_rif_subport_deconfigure,
8533 .fid_get = mlxsw_sp_rif_subport_fid_get,
8536 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
8537 enum mlxsw_reg_ritr_if_type type,
8538 u16 vid_fid, bool enable)
8540 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8541 char ritr_pl[MLXSW_REG_RITR_LEN];
8543 mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
8545 mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
8546 mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
8548 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8551 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
8553 return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
8556 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
8558 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8559 u16 fid_index = mlxsw_sp_fid_index(rif->fid);
8562 err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
8567 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
8568 mlxsw_sp_router_port(mlxsw_sp), true);
8570 goto err_fid_mc_flood_set;
8572 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
8573 mlxsw_sp_router_port(mlxsw_sp), true);
8575 goto err_fid_bc_flood_set;
8577 err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
8578 mlxsw_sp_fid_index(rif->fid), true);
8580 goto err_rif_fdb_op;
8582 mlxsw_sp_fid_rif_set(rif->fid, rif);
8586 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
8587 mlxsw_sp_router_port(mlxsw_sp), false);
8588 err_fid_bc_flood_set:
8589 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
8590 mlxsw_sp_router_port(mlxsw_sp), false);
8591 err_fid_mc_flood_set:
8592 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
8596 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
8598 u16 fid_index = mlxsw_sp_fid_index(rif->fid);
8599 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8600 struct mlxsw_sp_fid *fid = rif->fid;
8602 mlxsw_sp_fid_rif_set(fid, NULL);
8603 mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
8604 mlxsw_sp_fid_index(fid), false);
8605 mlxsw_sp_rif_macvlan_flush(rif);
8606 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
8607 mlxsw_sp_router_port(mlxsw_sp), false);
8608 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
8609 mlxsw_sp_router_port(mlxsw_sp), false);
8610 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
8613 static struct mlxsw_sp_fid *
8614 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
8615 struct netlink_ext_ack *extack)
8617 return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
8620 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
8622 struct switchdev_notifier_fdb_info info;
8623 struct net_device *dev;
8625 dev = br_fdb_find_port(rif->dev, mac, 0);
8631 call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
8635 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
8636 .type = MLXSW_SP_RIF_TYPE_FID,
8637 .rif_size = sizeof(struct mlxsw_sp_rif),
8638 .configure = mlxsw_sp_rif_fid_configure,
8639 .deconfigure = mlxsw_sp_rif_fid_deconfigure,
8640 .fid_get = mlxsw_sp_rif_fid_fid_get,
8641 .fdb_del = mlxsw_sp_rif_fid_fdb_del,
8644 static struct mlxsw_sp_fid *
8645 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
8646 struct netlink_ext_ack *extack)
8648 struct net_device *br_dev;
8652 if (is_vlan_dev(rif->dev)) {
8653 vid = vlan_dev_vlan_id(rif->dev);
8654 br_dev = vlan_dev_real_dev(rif->dev);
8655 if (WARN_ON(!netif_is_bridge_master(br_dev)))
8656 return ERR_PTR(-EINVAL);
8658 err = br_vlan_get_pvid(rif->dev, &vid);
8659 if (err < 0 || !vid) {
8660 NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
8661 return ERR_PTR(-EINVAL);
8665 return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
8668 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
8670 u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
8671 struct switchdev_notifier_fdb_info info;
8672 struct net_device *br_dev;
8673 struct net_device *dev;
8675 br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
8676 dev = br_fdb_find_port(br_dev, mac, vid);
8682 call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
8686 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_emu_ops = {
8687 .type = MLXSW_SP_RIF_TYPE_VLAN,
8688 .rif_size = sizeof(struct mlxsw_sp_rif),
8689 .configure = mlxsw_sp_rif_fid_configure,
8690 .deconfigure = mlxsw_sp_rif_fid_deconfigure,
8691 .fid_get = mlxsw_sp_rif_vlan_fid_get,
8692 .fdb_del = mlxsw_sp_rif_vlan_fdb_del,
8695 static struct mlxsw_sp_rif_ipip_lb *
8696 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
8698 return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
8702 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
8703 const struct mlxsw_sp_rif_params *params)
8705 struct mlxsw_sp_rif_params_ipip_lb *params_lb;
8706 struct mlxsw_sp_rif_ipip_lb *rif_lb;
8708 params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
8710 rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
8711 rif_lb->lb_config = params_lb->lb_config;
8715 mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
8717 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
8718 u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
8719 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8720 struct mlxsw_sp_vr *ul_vr;
8723 ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
8725 return PTR_ERR(ul_vr);
8727 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
8729 goto err_loopback_op;
8731 lb_rif->ul_vr_id = ul_vr->id;
8732 lb_rif->ul_rif_id = 0;
8737 mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
8741 static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
8743 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
8744 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8745 struct mlxsw_sp_vr *ul_vr;
8747 ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
8748 mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
8751 mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
8754 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
8755 .type = MLXSW_SP_RIF_TYPE_IPIP_LB,
8756 .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb),
8757 .setup = mlxsw_sp_rif_ipip_lb_setup,
8758 .configure = mlxsw_sp1_rif_ipip_lb_configure,
8759 .deconfigure = mlxsw_sp1_rif_ipip_lb_deconfigure,
8762 const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
8763 [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops,
8764 [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops,
8765 [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops,
8766 [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp1_rif_ipip_lb_ops,
8770 mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
8772 struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
8773 char ritr_pl[MLXSW_REG_RITR_LEN];
8775 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
8776 ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
8777 mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
8778 MLXSW_REG_RITR_LOOPBACK_GENERIC);
8780 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8783 static struct mlxsw_sp_rif *
8784 mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
8785 struct netlink_ext_ack *extack)
8787 struct mlxsw_sp_rif *ul_rif;
8791 err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
8793 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
8794 return ERR_PTR(err);
8797 ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL);
8799 return ERR_PTR(-ENOMEM);
8801 mlxsw_sp->router->rifs[rif_index] = ul_rif;
8802 ul_rif->mlxsw_sp = mlxsw_sp;
8803 err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
8810 mlxsw_sp->router->rifs[rif_index] = NULL;
8812 return ERR_PTR(err);
8815 static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
8817 struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
8819 mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
8820 mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
8824 static struct mlxsw_sp_rif *
8825 mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
8826 struct netlink_ext_ack *extack)
8828 struct mlxsw_sp_vr *vr;
8831 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
8833 return ERR_CAST(vr);
8835 if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
8838 vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack);
8839 if (IS_ERR(vr->ul_rif)) {
8840 err = PTR_ERR(vr->ul_rif);
8841 goto err_ul_rif_create;
8845 refcount_set(&vr->ul_rif_refcnt, 1);
8850 mlxsw_sp_vr_put(mlxsw_sp, vr);
8851 return ERR_PTR(err);
8854 static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
8856 struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
8857 struct mlxsw_sp_vr *vr;
8859 vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
8861 if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
8865 mlxsw_sp_ul_rif_destroy(ul_rif);
8866 mlxsw_sp_vr_put(mlxsw_sp, vr);
8869 int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
8872 struct mlxsw_sp_rif *ul_rif;
8875 mutex_lock(&mlxsw_sp->router->lock);
8876 ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
8877 if (IS_ERR(ul_rif)) {
8878 err = PTR_ERR(ul_rif);
8881 *ul_rif_index = ul_rif->rif_index;
8883 mutex_unlock(&mlxsw_sp->router->lock);
8887 void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index)
8889 struct mlxsw_sp_rif *ul_rif;
8891 mutex_lock(&mlxsw_sp->router->lock);
8892 ul_rif = mlxsw_sp->router->rifs[ul_rif_index];
8893 if (WARN_ON(!ul_rif))
8896 mlxsw_sp_ul_rif_put(ul_rif);
8898 mutex_unlock(&mlxsw_sp->router->lock);
8902 mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
8904 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
8905 u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
8906 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8907 struct mlxsw_sp_rif *ul_rif;
8910 ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
8912 return PTR_ERR(ul_rif);
8914 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
8916 goto err_loopback_op;
8918 lb_rif->ul_vr_id = 0;
8919 lb_rif->ul_rif_id = ul_rif->rif_index;
8924 mlxsw_sp_ul_rif_put(ul_rif);
8928 static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
8930 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
8931 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8932 struct mlxsw_sp_rif *ul_rif;
8934 ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
8935 mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
8936 mlxsw_sp_ul_rif_put(ul_rif);
8939 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
8940 .type = MLXSW_SP_RIF_TYPE_IPIP_LB,
8941 .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb),
8942 .setup = mlxsw_sp_rif_ipip_lb_setup,
8943 .configure = mlxsw_sp2_rif_ipip_lb_configure,
8944 .deconfigure = mlxsw_sp2_rif_ipip_lb_deconfigure,
8947 const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
8948 [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops,
8949 [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops,
8950 [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops,
8951 [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp2_rif_ipip_lb_ops,
8954 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
8956 u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
8958 mlxsw_sp->router->rifs = kcalloc(max_rifs,
8959 sizeof(struct mlxsw_sp_rif *),
8961 if (!mlxsw_sp->router->rifs)
8967 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
8971 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
8972 WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
8974 kfree(mlxsw_sp->router->rifs);
8978 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
8980 char tigcr_pl[MLXSW_REG_TIGCR_LEN];
8982 mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
8983 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
8986 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
8990 mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
8991 INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
8993 err = mlxsw_sp_ipip_ecn_encap_init(mlxsw_sp);
8996 err = mlxsw_sp_ipip_ecn_decap_init(mlxsw_sp);
9000 return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
9003 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
9005 WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
9008 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
9010 struct mlxsw_sp_router *router;
9012 /* Flush pending FIB notifications and then flush the device's
9013 * table before requesting another dump. The FIB notification
9014 * block is unregistered, so no need to take RTNL.
9016 mlxsw_core_flush_owq();
9017 router = container_of(nb, struct mlxsw_sp_router, fib_nb);
9018 mlxsw_sp_router_fib_flush(router->mlxsw_sp);
9021 #ifdef CONFIG_IP_ROUTE_MULTIPATH
9022 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
9024 mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
9027 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
9029 mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
9032 static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl)
9034 struct net *net = mlxsw_sp_net(mlxsw_sp);
9035 bool only_l3 = !net->ipv4.sysctl_fib_multipath_hash_policy;
9037 mlxsw_sp_mp_hash_header_set(recr2_pl,
9038 MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
9039 mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
9040 mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
9041 mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
9044 mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
9045 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
9046 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
9047 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
9050 static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl)
9052 bool only_l3 = !ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp));
9054 mlxsw_sp_mp_hash_header_set(recr2_pl,
9055 MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
9056 mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
9057 mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
9058 mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
9059 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
9061 mlxsw_sp_mp_hash_field_set(recr2_pl,
9062 MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
9064 mlxsw_sp_mp_hash_header_set(recr2_pl,
9065 MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
9066 mlxsw_sp_mp_hash_field_set(recr2_pl,
9067 MLXSW_REG_RECR2_TCP_UDP_SPORT);
9068 mlxsw_sp_mp_hash_field_set(recr2_pl,
9069 MLXSW_REG_RECR2_TCP_UDP_DPORT);
9073 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
9075 char recr2_pl[MLXSW_REG_RECR2_LEN];
9078 seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
9079 mlxsw_reg_recr2_pack(recr2_pl, seed);
9080 mlxsw_sp_mp4_hash_init(mlxsw_sp, recr2_pl);
9081 mlxsw_sp_mp6_hash_init(mlxsw_sp, recr2_pl);
9083 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
9086 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
9092 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
9094 char rdpm_pl[MLXSW_REG_RDPM_LEN];
9097 MLXSW_REG_ZERO(rdpm, rdpm_pl);
9099 /* HW is determining switch priority based on DSCP-bits, but the
9100 * kernel is still doing that based on the ToS. Since there's a
9101 * mismatch in bits we need to make sure to translate the right
9102 * value ToS would observe, skipping the 2 least-significant ECN bits.
9104 for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
9105 mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
9107 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
9110 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
9112 struct net *net = mlxsw_sp_net(mlxsw_sp);
9113 bool usp = net->ipv4.sysctl_ip_fwd_update_priority;
9114 char rgcr_pl[MLXSW_REG_RGCR_LEN];
9117 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
9119 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
9121 mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
9122 mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
9123 mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
9124 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
9127 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
9129 char rgcr_pl[MLXSW_REG_RGCR_LEN];
9131 mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
9132 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
9135 static const struct mlxsw_sp_router_ll_ops mlxsw_sp_router_ll_basic_ops = {
9136 .init = mlxsw_sp_router_ll_basic_init,
9137 .ralta_write = mlxsw_sp_router_ll_basic_ralta_write,
9138 .ralst_write = mlxsw_sp_router_ll_basic_ralst_write,
9139 .raltb_write = mlxsw_sp_router_ll_basic_raltb_write,
9140 .fib_entry_op_ctx_size = sizeof(struct mlxsw_sp_fib_entry_op_ctx_basic),
9141 .fib_entry_pack = mlxsw_sp_router_ll_basic_fib_entry_pack,
9142 .fib_entry_act_remote_pack = mlxsw_sp_router_ll_basic_fib_entry_act_remote_pack,
9143 .fib_entry_act_local_pack = mlxsw_sp_router_ll_basic_fib_entry_act_local_pack,
9144 .fib_entry_act_ip2me_pack = mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_pack,
9145 .fib_entry_act_ip2me_tun_pack = mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_tun_pack,
9146 .fib_entry_commit = mlxsw_sp_router_ll_basic_fib_entry_commit,
9147 .fib_entry_is_committed = mlxsw_sp_router_ll_basic_fib_entry_is_committed,
9150 static int mlxsw_sp_router_ll_op_ctx_init(struct mlxsw_sp_router *router)
9152 size_t max_size = 0;
9155 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
9156 size_t size = router->proto_ll_ops[i]->fib_entry_op_ctx_size;
9158 if (size > max_size)
9161 router->ll_op_ctx = kzalloc(sizeof(*router->ll_op_ctx) + max_size,
9163 if (!router->ll_op_ctx)
9165 INIT_LIST_HEAD(&router->ll_op_ctx->fib_entry_priv_list);
9169 static void mlxsw_sp_router_ll_op_ctx_fini(struct mlxsw_sp_router *router)
9171 WARN_ON(!list_empty(&router->ll_op_ctx->fib_entry_priv_list));
9172 kfree(router->ll_op_ctx);
9175 static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp)
9180 /* Create a generic loopback RIF associated with the main table
9181 * (default VRF). Any table can be used, but the main table exists
9182 * anyway, so we do not waste resources.
9184 err = mlxsw_sp_router_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN,
9189 mlxsw_sp->router->lb_rif_index = lb_rif_index;
9194 static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp)
9196 mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->router->lb_rif_index);
9199 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
9200 struct netlink_ext_ack *extack)
9202 struct mlxsw_sp_router *router;
9205 router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
9208 mutex_init(&router->lock);
9209 mlxsw_sp->router = router;
9210 router->mlxsw_sp = mlxsw_sp;
9212 err = mlxsw_sp_router_xm_init(mlxsw_sp);
9216 router->proto_ll_ops[MLXSW_SP_L3_PROTO_IPV4] = mlxsw_sp_router_xm_ipv4_is_supported(mlxsw_sp) ?
9217 &mlxsw_sp_router_ll_xm_ops :
9218 &mlxsw_sp_router_ll_basic_ops;
9219 router->proto_ll_ops[MLXSW_SP_L3_PROTO_IPV6] = &mlxsw_sp_router_ll_basic_ops;
9221 err = mlxsw_sp_router_ll_op_ctx_init(router);
9223 goto err_ll_op_ctx_init;
9225 INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
9226 err = __mlxsw_sp_router_init(mlxsw_sp);
9228 goto err_router_init;
9230 err = mlxsw_sp_rifs_init(mlxsw_sp);
9234 err = mlxsw_sp_ipips_init(mlxsw_sp);
9236 goto err_ipips_init;
9238 err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
9239 &mlxsw_sp_nexthop_ht_params);
9241 goto err_nexthop_ht_init;
9243 err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
9244 &mlxsw_sp_nexthop_group_ht_params);
9246 goto err_nexthop_group_ht_init;
9248 INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
9249 err = mlxsw_sp_lpm_init(mlxsw_sp);
9253 err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
9257 err = mlxsw_sp_vrs_init(mlxsw_sp);
9261 err = mlxsw_sp_lb_rif_init(mlxsw_sp);
9263 goto err_lb_rif_init;
9265 err = mlxsw_sp_neigh_init(mlxsw_sp);
9267 goto err_neigh_init;
9269 err = mlxsw_sp_mp_hash_init(mlxsw_sp);
9271 goto err_mp_hash_init;
9273 err = mlxsw_sp_dscp_init(mlxsw_sp);
9277 INIT_WORK(&router->fib_event_work, mlxsw_sp_router_fib_event_work);
9278 INIT_LIST_HEAD(&router->fib_event_queue);
9279 spin_lock_init(&router->fib_event_queue_lock);
9281 router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
9282 err = register_inetaddr_notifier(&router->inetaddr_nb);
9284 goto err_register_inetaddr_notifier;
9286 router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
9287 err = register_inet6addr_notifier(&router->inet6addr_nb);
9289 goto err_register_inet6addr_notifier;
9291 mlxsw_sp->router->netevent_nb.notifier_call =
9292 mlxsw_sp_router_netevent_event;
9293 err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
9295 goto err_register_netevent_notifier;
9297 mlxsw_sp->router->nexthop_nb.notifier_call =
9298 mlxsw_sp_nexthop_obj_event;
9299 err = register_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
9300 &mlxsw_sp->router->nexthop_nb,
9303 goto err_register_nexthop_notifier;
9305 mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
9306 err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp),
9307 &mlxsw_sp->router->fib_nb,
9308 mlxsw_sp_router_fib_dump_flush, extack);
9310 goto err_register_fib_notifier;
9314 err_register_fib_notifier:
9315 unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
9316 &mlxsw_sp->router->nexthop_nb);
9317 err_register_nexthop_notifier:
9318 unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
9319 err_register_netevent_notifier:
9320 unregister_inet6addr_notifier(&router->inet6addr_nb);
9321 err_register_inet6addr_notifier:
9322 unregister_inetaddr_notifier(&router->inetaddr_nb);
9323 err_register_inetaddr_notifier:
9324 mlxsw_core_flush_owq();
9325 WARN_ON(!list_empty(&router->fib_event_queue));
9328 mlxsw_sp_neigh_fini(mlxsw_sp);
9330 mlxsw_sp_lb_rif_fini(mlxsw_sp);
9332 mlxsw_sp_vrs_fini(mlxsw_sp);
9334 mlxsw_sp_mr_fini(mlxsw_sp);
9336 mlxsw_sp_lpm_fini(mlxsw_sp);
9338 rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
9339 err_nexthop_group_ht_init:
9340 rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
9341 err_nexthop_ht_init:
9342 mlxsw_sp_ipips_fini(mlxsw_sp);
9344 mlxsw_sp_rifs_fini(mlxsw_sp);
9346 __mlxsw_sp_router_fini(mlxsw_sp);
9348 mlxsw_sp_router_ll_op_ctx_fini(router);
9350 mlxsw_sp_router_xm_fini(mlxsw_sp);
9352 mutex_destroy(&mlxsw_sp->router->lock);
9353 kfree(mlxsw_sp->router);
9357 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
9359 unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp),
9360 &mlxsw_sp->router->fib_nb);
9361 unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
9362 &mlxsw_sp->router->nexthop_nb);
9363 unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
9364 unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb);
9365 unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb);
9366 mlxsw_core_flush_owq();
9367 WARN_ON(!list_empty(&mlxsw_sp->router->fib_event_queue));
9368 mlxsw_sp_neigh_fini(mlxsw_sp);
9369 mlxsw_sp_lb_rif_fini(mlxsw_sp);
9370 mlxsw_sp_vrs_fini(mlxsw_sp);
9371 mlxsw_sp_mr_fini(mlxsw_sp);
9372 mlxsw_sp_lpm_fini(mlxsw_sp);
9373 rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
9374 rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
9375 mlxsw_sp_ipips_fini(mlxsw_sp);
9376 mlxsw_sp_rifs_fini(mlxsw_sp);
9377 __mlxsw_sp_router_fini(mlxsw_sp);
9378 mlxsw_sp_router_ll_op_ctx_fini(mlxsw_sp->router);
9379 mlxsw_sp_router_xm_fini(mlxsw_sp);
9380 mutex_destroy(&mlxsw_sp->router->lock);
9381 kfree(mlxsw_sp->router);