1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/if_macvlan.h>
17 #include <linux/refcount.h>
18 #include <linux/jhash.h>
19 #include <linux/net_namespace.h>
20 #include <linux/mutex.h>
21 #include <net/netevent.h>
22 #include <net/neighbour.h>
24 #include <net/ip_fib.h>
25 #include <net/ip6_fib.h>
26 #include <net/nexthop.h>
27 #include <net/fib_rules.h>
28 #include <net/ip_tunnels.h>
29 #include <net/l3mdev.h>
30 #include <net/addrconf.h>
31 #include <net/ndisc.h>
33 #include <net/fib_notifier.h>
34 #include <net/switchdev.h>
39 #include "spectrum_cnt.h"
40 #include "spectrum_dpipe.h"
41 #include "spectrum_ipip.h"
42 #include "spectrum_mr.h"
43 #include "spectrum_mr_tcam.h"
44 #include "spectrum_router.h"
45 #include "spectrum_span.h"
49 struct mlxsw_sp_lpm_tree;
50 struct mlxsw_sp_rif_ops;
53 struct list_head nexthop_list;
54 struct list_head neigh_list;
55 struct net_device *dev; /* NULL for underlay RIF */
56 struct mlxsw_sp_fid *fid;
57 unsigned char addr[ETH_ALEN];
61 const struct mlxsw_sp_rif_ops *ops;
62 struct mlxsw_sp *mlxsw_sp;
64 unsigned int counter_ingress;
65 bool counter_ingress_valid;
66 unsigned int counter_egress;
67 bool counter_egress_valid;
70 struct mlxsw_sp_rif_params {
71 struct net_device *dev;
80 struct mlxsw_sp_rif_subport {
81 struct mlxsw_sp_rif common;
91 struct mlxsw_sp_rif_ipip_lb {
92 struct mlxsw_sp_rif common;
93 struct mlxsw_sp_rif_ipip_lb_config lb_config;
94 u16 ul_vr_id; /* Reserved for Spectrum-2. */
95 u16 ul_rif_id; /* Reserved for Spectrum. */
98 struct mlxsw_sp_rif_params_ipip_lb {
99 struct mlxsw_sp_rif_params common;
100 struct mlxsw_sp_rif_ipip_lb_config lb_config;
103 struct mlxsw_sp_rif_ops {
104 enum mlxsw_sp_rif_type type;
107 void (*setup)(struct mlxsw_sp_rif *rif,
108 const struct mlxsw_sp_rif_params *params);
109 int (*configure)(struct mlxsw_sp_rif *rif);
110 void (*deconfigure)(struct mlxsw_sp_rif *rif);
111 struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
112 struct netlink_ext_ack *extack);
113 void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
116 struct mlxsw_sp_router_ops {
117 int (*init)(struct mlxsw_sp *mlxsw_sp);
120 static struct mlxsw_sp_rif *
121 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
122 const struct net_device *dev);
123 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
124 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
125 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
126 struct mlxsw_sp_lpm_tree *lpm_tree);
127 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
128 const struct mlxsw_sp_fib *fib,
130 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
131 const struct mlxsw_sp_fib *fib);
133 static unsigned int *
134 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
135 enum mlxsw_sp_rif_counter_dir dir)
138 case MLXSW_SP_RIF_COUNTER_EGRESS:
139 return &rif->counter_egress;
140 case MLXSW_SP_RIF_COUNTER_INGRESS:
141 return &rif->counter_ingress;
147 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
148 enum mlxsw_sp_rif_counter_dir dir)
151 case MLXSW_SP_RIF_COUNTER_EGRESS:
152 return rif->counter_egress_valid;
153 case MLXSW_SP_RIF_COUNTER_INGRESS:
154 return rif->counter_ingress_valid;
160 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
161 enum mlxsw_sp_rif_counter_dir dir,
165 case MLXSW_SP_RIF_COUNTER_EGRESS:
166 rif->counter_egress_valid = valid;
168 case MLXSW_SP_RIF_COUNTER_INGRESS:
169 rif->counter_ingress_valid = valid;
174 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
175 unsigned int counter_index, bool enable,
176 enum mlxsw_sp_rif_counter_dir dir)
178 char ritr_pl[MLXSW_REG_RITR_LEN];
179 bool is_egress = false;
182 if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
184 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
185 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
189 mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
191 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
194 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
195 struct mlxsw_sp_rif *rif,
196 enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
198 char ricnt_pl[MLXSW_REG_RICNT_LEN];
199 unsigned int *p_counter_index;
203 valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
207 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
208 if (!p_counter_index)
210 mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
211 MLXSW_REG_RICNT_OPCODE_NOP);
212 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
215 *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
219 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
220 unsigned int counter_index)
222 char ricnt_pl[MLXSW_REG_RICNT_LEN];
224 mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
225 MLXSW_REG_RICNT_OPCODE_CLEAR);
226 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
229 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
230 struct mlxsw_sp_rif *rif,
231 enum mlxsw_sp_rif_counter_dir dir)
233 unsigned int *p_counter_index;
236 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
237 if (!p_counter_index)
239 err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
244 err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
246 goto err_counter_clear;
248 err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
249 *p_counter_index, true, dir);
251 goto err_counter_edit;
252 mlxsw_sp_rif_counter_valid_set(rif, dir, true);
257 mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
262 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
263 struct mlxsw_sp_rif *rif,
264 enum mlxsw_sp_rif_counter_dir dir)
266 unsigned int *p_counter_index;
268 if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
271 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
272 if (WARN_ON(!p_counter_index))
274 mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
275 *p_counter_index, false, dir);
276 mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
278 mlxsw_sp_rif_counter_valid_set(rif, dir, false);
281 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
283 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
284 struct devlink *devlink;
286 devlink = priv_to_devlink(mlxsw_sp->core);
287 if (!devlink_dpipe_table_counter_enabled(devlink,
288 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
290 mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
293 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
295 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
297 mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
300 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
302 struct mlxsw_sp_prefix_usage {
303 DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
306 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
307 for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
310 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
311 struct mlxsw_sp_prefix_usage *prefix_usage2)
313 return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
317 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
318 struct mlxsw_sp_prefix_usage *prefix_usage2)
320 memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
324 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
325 unsigned char prefix_len)
327 set_bit(prefix_len, prefix_usage->b);
331 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
332 unsigned char prefix_len)
334 clear_bit(prefix_len, prefix_usage->b);
337 struct mlxsw_sp_fib_key {
338 unsigned char addr[sizeof(struct in6_addr)];
339 unsigned char prefix_len;
342 enum mlxsw_sp_fib_entry_type {
343 MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
344 MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
345 MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
346 MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE,
347 MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE,
349 /* This is a special case of local delivery, where a packet should be
350 * decapsulated on reception. Note that there is no corresponding ENCAP,
351 * because that's a type of next hop, not of FIB entry. (There can be
352 * several next hops in a REMOTE entry, and some of them may be
353 * encapsulating entries.)
355 MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
356 MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
359 struct mlxsw_sp_nexthop_group_info;
360 struct mlxsw_sp_nexthop_group;
361 struct mlxsw_sp_fib_entry;
363 struct mlxsw_sp_fib_node {
364 struct mlxsw_sp_fib_entry *fib_entry;
365 struct list_head list;
366 struct rhash_head ht_node;
367 struct mlxsw_sp_fib *fib;
368 struct mlxsw_sp_fib_key key;
371 struct mlxsw_sp_fib_entry_decap {
372 struct mlxsw_sp_ipip_entry *ipip_entry;
376 static struct mlxsw_sp_fib_entry_priv *
377 mlxsw_sp_fib_entry_priv_create(const struct mlxsw_sp_router_ll_ops *ll_ops)
379 struct mlxsw_sp_fib_entry_priv *priv;
381 if (!ll_ops->fib_entry_priv_size)
382 /* No need to have priv */
385 priv = kzalloc(sizeof(*priv) + ll_ops->fib_entry_priv_size, GFP_KERNEL);
387 return ERR_PTR(-ENOMEM);
388 refcount_set(&priv->refcnt, 1);
393 mlxsw_sp_fib_entry_priv_destroy(struct mlxsw_sp_fib_entry_priv *priv)
398 static void mlxsw_sp_fib_entry_priv_hold(struct mlxsw_sp_fib_entry_priv *priv)
400 refcount_inc(&priv->refcnt);
403 static void mlxsw_sp_fib_entry_priv_put(struct mlxsw_sp_fib_entry_priv *priv)
405 if (!priv || !refcount_dec_and_test(&priv->refcnt))
407 mlxsw_sp_fib_entry_priv_destroy(priv);
410 static void mlxsw_sp_fib_entry_op_ctx_priv_hold(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
411 struct mlxsw_sp_fib_entry_priv *priv)
415 mlxsw_sp_fib_entry_priv_hold(priv);
416 list_add(&priv->list, &op_ctx->fib_entry_priv_list);
419 static void mlxsw_sp_fib_entry_op_ctx_priv_put_all(struct mlxsw_sp_fib_entry_op_ctx *op_ctx)
421 struct mlxsw_sp_fib_entry_priv *priv, *tmp;
423 list_for_each_entry_safe(priv, tmp, &op_ctx->fib_entry_priv_list, list)
424 mlxsw_sp_fib_entry_priv_put(priv);
425 INIT_LIST_HEAD(&op_ctx->fib_entry_priv_list);
428 struct mlxsw_sp_fib_entry {
429 struct mlxsw_sp_fib_node *fib_node;
430 enum mlxsw_sp_fib_entry_type type;
431 struct list_head nexthop_group_node;
432 struct mlxsw_sp_nexthop_group *nh_group;
433 struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
434 struct mlxsw_sp_fib_entry_priv *priv;
437 struct mlxsw_sp_fib4_entry {
438 struct mlxsw_sp_fib_entry common;
445 struct mlxsw_sp_fib6_entry {
446 struct mlxsw_sp_fib_entry common;
447 struct list_head rt6_list;
451 struct mlxsw_sp_rt6 {
452 struct list_head list;
453 struct fib6_info *rt;
456 struct mlxsw_sp_lpm_tree {
458 unsigned int ref_count;
459 enum mlxsw_sp_l3proto proto;
460 unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
461 struct mlxsw_sp_prefix_usage prefix_usage;
464 struct mlxsw_sp_fib {
465 struct rhashtable ht;
466 struct list_head node_list;
467 struct mlxsw_sp_vr *vr;
468 struct mlxsw_sp_lpm_tree *lpm_tree;
469 enum mlxsw_sp_l3proto proto;
470 const struct mlxsw_sp_router_ll_ops *ll_ops;
474 u16 id; /* virtual router ID */
475 u32 tb_id; /* kernel fib table id */
476 unsigned int rif_count;
477 struct mlxsw_sp_fib *fib4;
478 struct mlxsw_sp_fib *fib6;
479 struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
480 struct mlxsw_sp_rif *ul_rif;
481 refcount_t ul_rif_refcnt;
484 static int mlxsw_sp_router_ll_basic_init(struct mlxsw_sp *mlxsw_sp, u16 vr_id,
485 enum mlxsw_sp_l3proto proto)
490 static int mlxsw_sp_router_ll_basic_ralta_write(struct mlxsw_sp *mlxsw_sp, char *xralta_pl)
492 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta),
493 xralta_pl + MLXSW_REG_XRALTA_RALTA_OFFSET);
496 static int mlxsw_sp_router_ll_basic_ralst_write(struct mlxsw_sp *mlxsw_sp, char *xralst_pl)
498 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst),
499 xralst_pl + MLXSW_REG_XRALST_RALST_OFFSET);
502 static int mlxsw_sp_router_ll_basic_raltb_write(struct mlxsw_sp *mlxsw_sp, char *xraltb_pl)
504 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
505 xraltb_pl + MLXSW_REG_XRALTB_RALTB_OFFSET);
508 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
510 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
511 struct mlxsw_sp_vr *vr,
512 enum mlxsw_sp_l3proto proto)
514 const struct mlxsw_sp_router_ll_ops *ll_ops = mlxsw_sp->router->proto_ll_ops[proto];
515 struct mlxsw_sp_lpm_tree *lpm_tree;
516 struct mlxsw_sp_fib *fib;
519 err = ll_ops->init(mlxsw_sp, vr->id, proto);
523 lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
524 fib = kzalloc(sizeof(*fib), GFP_KERNEL);
526 return ERR_PTR(-ENOMEM);
527 err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
529 goto err_rhashtable_init;
530 INIT_LIST_HEAD(&fib->node_list);
533 fib->lpm_tree = lpm_tree;
534 fib->ll_ops = ll_ops;
535 mlxsw_sp_lpm_tree_hold(lpm_tree);
536 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
538 goto err_lpm_tree_bind;
542 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
548 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
549 struct mlxsw_sp_fib *fib)
551 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
552 mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
553 WARN_ON(!list_empty(&fib->node_list));
554 rhashtable_destroy(&fib->ht);
558 static struct mlxsw_sp_lpm_tree *
559 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
561 static struct mlxsw_sp_lpm_tree *lpm_tree;
564 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
565 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
566 if (lpm_tree->ref_count == 0)
572 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
573 const struct mlxsw_sp_router_ll_ops *ll_ops,
574 struct mlxsw_sp_lpm_tree *lpm_tree)
576 char xralta_pl[MLXSW_REG_XRALTA_LEN];
578 mlxsw_reg_xralta_pack(xralta_pl, true,
579 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
581 return ll_ops->ralta_write(mlxsw_sp, xralta_pl);
584 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
585 const struct mlxsw_sp_router_ll_ops *ll_ops,
586 struct mlxsw_sp_lpm_tree *lpm_tree)
588 char xralta_pl[MLXSW_REG_XRALTA_LEN];
590 mlxsw_reg_xralta_pack(xralta_pl, false,
591 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
593 ll_ops->ralta_write(mlxsw_sp, xralta_pl);
597 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
598 const struct mlxsw_sp_router_ll_ops *ll_ops,
599 struct mlxsw_sp_prefix_usage *prefix_usage,
600 struct mlxsw_sp_lpm_tree *lpm_tree)
602 char xralst_pl[MLXSW_REG_XRALST_LEN];
605 u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
607 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
610 mlxsw_reg_xralst_pack(xralst_pl, root_bin, lpm_tree->id);
611 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
614 mlxsw_reg_xralst_bin_pack(xralst_pl, prefix, last_prefix,
615 MLXSW_REG_RALST_BIN_NO_CHILD);
616 last_prefix = prefix;
618 return ll_ops->ralst_write(mlxsw_sp, xralst_pl);
621 static struct mlxsw_sp_lpm_tree *
622 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
623 const struct mlxsw_sp_router_ll_ops *ll_ops,
624 struct mlxsw_sp_prefix_usage *prefix_usage,
625 enum mlxsw_sp_l3proto proto)
627 struct mlxsw_sp_lpm_tree *lpm_tree;
630 lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
632 return ERR_PTR(-EBUSY);
633 lpm_tree->proto = proto;
634 err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, ll_ops, lpm_tree);
638 err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, ll_ops, prefix_usage, lpm_tree);
640 goto err_left_struct_set;
641 memcpy(&lpm_tree->prefix_usage, prefix_usage,
642 sizeof(lpm_tree->prefix_usage));
643 memset(&lpm_tree->prefix_ref_count, 0,
644 sizeof(lpm_tree->prefix_ref_count));
645 lpm_tree->ref_count = 1;
649 mlxsw_sp_lpm_tree_free(mlxsw_sp, ll_ops, lpm_tree);
653 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
654 const struct mlxsw_sp_router_ll_ops *ll_ops,
655 struct mlxsw_sp_lpm_tree *lpm_tree)
657 mlxsw_sp_lpm_tree_free(mlxsw_sp, ll_ops, lpm_tree);
660 static struct mlxsw_sp_lpm_tree *
661 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
662 struct mlxsw_sp_prefix_usage *prefix_usage,
663 enum mlxsw_sp_l3proto proto)
665 const struct mlxsw_sp_router_ll_ops *ll_ops = mlxsw_sp->router->proto_ll_ops[proto];
666 struct mlxsw_sp_lpm_tree *lpm_tree;
669 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
670 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
671 if (lpm_tree->ref_count != 0 &&
672 lpm_tree->proto == proto &&
673 mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
675 mlxsw_sp_lpm_tree_hold(lpm_tree);
679 return mlxsw_sp_lpm_tree_create(mlxsw_sp, ll_ops, prefix_usage, proto);
682 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
684 lpm_tree->ref_count++;
687 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
688 struct mlxsw_sp_lpm_tree *lpm_tree)
690 const struct mlxsw_sp_router_ll_ops *ll_ops =
691 mlxsw_sp->router->proto_ll_ops[lpm_tree->proto];
693 if (--lpm_tree->ref_count == 0)
694 mlxsw_sp_lpm_tree_destroy(mlxsw_sp, ll_ops, lpm_tree);
697 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
699 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
701 struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
702 struct mlxsw_sp_lpm_tree *lpm_tree;
706 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
709 max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
710 mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
711 mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
712 sizeof(struct mlxsw_sp_lpm_tree),
714 if (!mlxsw_sp->router->lpm.trees)
717 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
718 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
719 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
722 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
723 MLXSW_SP_L3_PROTO_IPV4);
724 if (IS_ERR(lpm_tree)) {
725 err = PTR_ERR(lpm_tree);
726 goto err_ipv4_tree_get;
728 mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
730 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
731 MLXSW_SP_L3_PROTO_IPV6);
732 if (IS_ERR(lpm_tree)) {
733 err = PTR_ERR(lpm_tree);
734 goto err_ipv6_tree_get;
736 mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
741 lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
742 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
744 kfree(mlxsw_sp->router->lpm.trees);
748 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
750 struct mlxsw_sp_lpm_tree *lpm_tree;
752 lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
753 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
755 lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
756 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
758 kfree(mlxsw_sp->router->lpm.trees);
761 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
763 return !!vr->fib4 || !!vr->fib6 ||
764 !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
765 !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
768 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
770 struct mlxsw_sp_vr *vr;
773 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
774 vr = &mlxsw_sp->router->vrs[i];
775 if (!mlxsw_sp_vr_is_used(vr))
781 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
782 const struct mlxsw_sp_fib *fib, u8 tree_id)
784 char xraltb_pl[MLXSW_REG_XRALTB_LEN];
786 mlxsw_reg_xraltb_pack(xraltb_pl, fib->vr->id,
787 (enum mlxsw_reg_ralxx_protocol) fib->proto,
789 return fib->ll_ops->raltb_write(mlxsw_sp, xraltb_pl);
792 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
793 const struct mlxsw_sp_fib *fib)
795 char xraltb_pl[MLXSW_REG_XRALTB_LEN];
797 /* Bind to tree 0 which is default */
798 mlxsw_reg_xraltb_pack(xraltb_pl, fib->vr->id,
799 (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
800 return fib->ll_ops->raltb_write(mlxsw_sp, xraltb_pl);
803 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
805 /* For our purpose, squash main, default and local tables into one */
806 if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
807 tb_id = RT_TABLE_MAIN;
811 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
814 struct mlxsw_sp_vr *vr;
817 tb_id = mlxsw_sp_fix_tb_id(tb_id);
819 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
820 vr = &mlxsw_sp->router->vrs[i];
821 if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
827 int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
830 struct mlxsw_sp_vr *vr;
833 mutex_lock(&mlxsw_sp->router->lock);
834 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
841 mutex_unlock(&mlxsw_sp->router->lock);
845 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
846 enum mlxsw_sp_l3proto proto)
849 case MLXSW_SP_L3_PROTO_IPV4:
851 case MLXSW_SP_L3_PROTO_IPV6:
857 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
859 struct netlink_ext_ack *extack)
861 struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
862 struct mlxsw_sp_fib *fib4;
863 struct mlxsw_sp_fib *fib6;
864 struct mlxsw_sp_vr *vr;
867 vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
869 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
870 return ERR_PTR(-EBUSY);
872 fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
874 return ERR_CAST(fib4);
875 fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
878 goto err_fib6_create;
880 mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
881 MLXSW_SP_L3_PROTO_IPV4);
882 if (IS_ERR(mr4_table)) {
883 err = PTR_ERR(mr4_table);
884 goto err_mr4_table_create;
886 mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
887 MLXSW_SP_L3_PROTO_IPV6);
888 if (IS_ERR(mr6_table)) {
889 err = PTR_ERR(mr6_table);
890 goto err_mr6_table_create;
895 vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
896 vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
900 err_mr6_table_create:
901 mlxsw_sp_mr_table_destroy(mr4_table);
902 err_mr4_table_create:
903 mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
905 mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
909 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
910 struct mlxsw_sp_vr *vr)
912 mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
913 vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
914 mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
915 vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
916 mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
918 mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
922 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
923 struct netlink_ext_ack *extack)
925 struct mlxsw_sp_vr *vr;
927 tb_id = mlxsw_sp_fix_tb_id(tb_id);
928 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
930 vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
934 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
936 if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
937 list_empty(&vr->fib6->node_list) &&
938 mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
939 mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
940 mlxsw_sp_vr_destroy(mlxsw_sp, vr);
944 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
945 enum mlxsw_sp_l3proto proto, u8 tree_id)
947 struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
949 if (!mlxsw_sp_vr_is_used(vr))
951 if (fib->lpm_tree->id == tree_id)
956 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
957 struct mlxsw_sp_fib *fib,
958 struct mlxsw_sp_lpm_tree *new_tree)
960 struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
963 fib->lpm_tree = new_tree;
964 mlxsw_sp_lpm_tree_hold(new_tree);
965 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
968 mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
972 mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
973 fib->lpm_tree = old_tree;
977 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
978 struct mlxsw_sp_fib *fib,
979 struct mlxsw_sp_lpm_tree *new_tree)
981 enum mlxsw_sp_l3proto proto = fib->proto;
982 struct mlxsw_sp_lpm_tree *old_tree;
983 u8 old_id, new_id = new_tree->id;
984 struct mlxsw_sp_vr *vr;
987 old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
988 old_id = old_tree->id;
990 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
991 vr = &mlxsw_sp->router->vrs[i];
992 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
994 err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
995 mlxsw_sp_vr_fib(vr, proto),
998 goto err_tree_replace;
1001 memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
1002 sizeof(new_tree->prefix_ref_count));
1003 mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
1004 mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
1009 for (i--; i >= 0; i--) {
1010 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
1012 mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
1013 mlxsw_sp_vr_fib(vr, proto),
1019 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
1021 struct mlxsw_sp_vr *vr;
1025 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
1028 max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
1029 mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
1031 if (!mlxsw_sp->router->vrs)
1034 for (i = 0; i < max_vrs; i++) {
1035 vr = &mlxsw_sp->router->vrs[i];
1042 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
1044 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
1046 /* At this stage we're guaranteed not to have new incoming
1047 * FIB notifications and the work queue is free from FIBs
1048 * sitting on top of mlxsw netdevs. However, we can still
1049 * have other FIBs queued. Flush the queue before flushing
1050 * the device's tables. No need for locks, as we're the only
1053 mlxsw_core_flush_owq();
1054 mlxsw_sp_router_fib_flush(mlxsw_sp);
1055 kfree(mlxsw_sp->router->vrs);
1058 static struct net_device *
1059 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
1061 struct ip_tunnel *tun = netdev_priv(ol_dev);
1062 struct net *net = dev_net(ol_dev);
1064 return dev_get_by_index_rcu(net, tun->parms.link);
1067 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
1069 struct net_device *d;
1073 d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1075 tb_id = l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
1077 tb_id = RT_TABLE_MAIN;
1083 static struct mlxsw_sp_rif *
1084 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
1085 const struct mlxsw_sp_rif_params *params,
1086 struct netlink_ext_ack *extack);
1088 static struct mlxsw_sp_rif_ipip_lb *
1089 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1090 enum mlxsw_sp_ipip_type ipipt,
1091 struct net_device *ol_dev,
1092 struct netlink_ext_ack *extack)
1094 struct mlxsw_sp_rif_params_ipip_lb lb_params;
1095 const struct mlxsw_sp_ipip_ops *ipip_ops;
1096 struct mlxsw_sp_rif *rif;
1098 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1099 lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1100 .common.dev = ol_dev,
1101 .common.lag = false,
1102 .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1105 rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1107 return ERR_CAST(rif);
1108 return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1111 static struct mlxsw_sp_ipip_entry *
1112 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1113 enum mlxsw_sp_ipip_type ipipt,
1114 struct net_device *ol_dev)
1116 const struct mlxsw_sp_ipip_ops *ipip_ops;
1117 struct mlxsw_sp_ipip_entry *ipip_entry;
1118 struct mlxsw_sp_ipip_entry *ret = NULL;
1120 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1121 ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1123 return ERR_PTR(-ENOMEM);
1125 ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1127 if (IS_ERR(ipip_entry->ol_lb)) {
1128 ret = ERR_CAST(ipip_entry->ol_lb);
1129 goto err_ol_ipip_lb_create;
1132 ipip_entry->ipipt = ipipt;
1133 ipip_entry->ol_dev = ol_dev;
1135 switch (ipip_ops->ul_proto) {
1136 case MLXSW_SP_L3_PROTO_IPV4:
1137 ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1139 case MLXSW_SP_L3_PROTO_IPV6:
1146 err_ol_ipip_lb_create:
1152 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1154 mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1159 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1160 const enum mlxsw_sp_l3proto ul_proto,
1161 union mlxsw_sp_l3addr saddr,
1163 struct mlxsw_sp_ipip_entry *ipip_entry)
1165 u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1166 enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1167 union mlxsw_sp_l3addr tun_saddr;
1169 if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1172 tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1173 return tun_ul_tb_id == ul_tb_id &&
1174 mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1178 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1179 struct mlxsw_sp_fib_entry *fib_entry,
1180 struct mlxsw_sp_ipip_entry *ipip_entry)
1185 err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1190 ipip_entry->decap_fib_entry = fib_entry;
1191 fib_entry->decap.ipip_entry = ipip_entry;
1192 fib_entry->decap.tunnel_index = tunnel_index;
1196 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1197 struct mlxsw_sp_fib_entry *fib_entry)
1199 /* Unlink this node from the IPIP entry that it's the decap entry of. */
1200 fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1201 fib_entry->decap.ipip_entry = NULL;
1202 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1203 1, fib_entry->decap.tunnel_index);
1206 static struct mlxsw_sp_fib_node *
1207 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1208 size_t addr_len, unsigned char prefix_len);
1209 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1210 struct mlxsw_sp_fib_entry *fib_entry);
1213 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1214 struct mlxsw_sp_ipip_entry *ipip_entry)
1216 struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1218 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1219 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1221 mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1225 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1226 struct mlxsw_sp_ipip_entry *ipip_entry,
1227 struct mlxsw_sp_fib_entry *decap_fib_entry)
1229 if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1232 decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1234 if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1235 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1238 static struct mlxsw_sp_fib_entry *
1239 mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1240 enum mlxsw_sp_l3proto proto,
1241 const union mlxsw_sp_l3addr *addr,
1242 enum mlxsw_sp_fib_entry_type type)
1244 struct mlxsw_sp_fib_node *fib_node;
1245 unsigned char addr_prefix_len;
1246 struct mlxsw_sp_fib *fib;
1247 struct mlxsw_sp_vr *vr;
1252 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1255 fib = mlxsw_sp_vr_fib(vr, proto);
1258 case MLXSW_SP_L3_PROTO_IPV4:
1259 addr4 = be32_to_cpu(addr->addr4);
1262 addr_prefix_len = 32;
1264 case MLXSW_SP_L3_PROTO_IPV6:
1270 fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1272 if (!fib_node || fib_node->fib_entry->type != type)
1275 return fib_node->fib_entry;
1278 /* Given an IPIP entry, find the corresponding decap route. */
1279 static struct mlxsw_sp_fib_entry *
1280 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1281 struct mlxsw_sp_ipip_entry *ipip_entry)
1283 static struct mlxsw_sp_fib_node *fib_node;
1284 const struct mlxsw_sp_ipip_ops *ipip_ops;
1285 unsigned char saddr_prefix_len;
1286 union mlxsw_sp_l3addr saddr;
1287 struct mlxsw_sp_fib *ul_fib;
1288 struct mlxsw_sp_vr *ul_vr;
1294 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1296 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1297 ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1301 ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1302 saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1303 ipip_entry->ol_dev);
1305 switch (ipip_ops->ul_proto) {
1306 case MLXSW_SP_L3_PROTO_IPV4:
1307 saddr4 = be32_to_cpu(saddr.addr4);
1310 saddr_prefix_len = 32;
1317 fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1320 fib_node->fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1323 return fib_node->fib_entry;
1326 static struct mlxsw_sp_ipip_entry *
1327 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1328 enum mlxsw_sp_ipip_type ipipt,
1329 struct net_device *ol_dev)
1331 struct mlxsw_sp_ipip_entry *ipip_entry;
1333 ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1334 if (IS_ERR(ipip_entry))
1337 list_add_tail(&ipip_entry->ipip_list_node,
1338 &mlxsw_sp->router->ipip_list);
1344 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1345 struct mlxsw_sp_ipip_entry *ipip_entry)
1347 list_del(&ipip_entry->ipip_list_node);
1348 mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1352 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1353 const struct net_device *ul_dev,
1354 enum mlxsw_sp_l3proto ul_proto,
1355 union mlxsw_sp_l3addr ul_dip,
1356 struct mlxsw_sp_ipip_entry *ipip_entry)
1358 u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1359 enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1361 if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1364 return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1365 ul_tb_id, ipip_entry);
1368 /* Given decap parameters, find the corresponding IPIP entry. */
1369 static struct mlxsw_sp_ipip_entry *
1370 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, int ul_dev_ifindex,
1371 enum mlxsw_sp_l3proto ul_proto,
1372 union mlxsw_sp_l3addr ul_dip)
1374 struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1375 struct net_device *ul_dev;
1379 ul_dev = dev_get_by_index_rcu(mlxsw_sp_net(mlxsw_sp), ul_dev_ifindex);
1383 list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1385 if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1399 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1400 const struct net_device *dev,
1401 enum mlxsw_sp_ipip_type *p_type)
1403 struct mlxsw_sp_router *router = mlxsw_sp->router;
1404 const struct mlxsw_sp_ipip_ops *ipip_ops;
1405 enum mlxsw_sp_ipip_type ipipt;
1407 for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1408 ipip_ops = router->ipip_ops_arr[ipipt];
1409 if (dev->type == ipip_ops->dev_type) {
1418 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1419 const struct net_device *dev)
1421 return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1424 static struct mlxsw_sp_ipip_entry *
1425 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1426 const struct net_device *ol_dev)
1428 struct mlxsw_sp_ipip_entry *ipip_entry;
1430 list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1432 if (ipip_entry->ol_dev == ol_dev)
1438 static struct mlxsw_sp_ipip_entry *
1439 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1440 const struct net_device *ul_dev,
1441 struct mlxsw_sp_ipip_entry *start)
1443 struct mlxsw_sp_ipip_entry *ipip_entry;
1445 ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1447 list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1449 struct net_device *ol_dev = ipip_entry->ol_dev;
1450 struct net_device *ipip_ul_dev;
1453 ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1456 if (ipip_ul_dev == ul_dev)
1463 bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp,
1464 const struct net_device *dev)
1468 mutex_lock(&mlxsw_sp->router->lock);
1469 is_ipip_ul = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1470 mutex_unlock(&mlxsw_sp->router->lock);
1475 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1476 const struct net_device *ol_dev,
1477 enum mlxsw_sp_ipip_type ipipt)
1479 const struct mlxsw_sp_ipip_ops *ops
1480 = mlxsw_sp->router->ipip_ops_arr[ipipt];
1482 return ops->can_offload(mlxsw_sp, ol_dev);
1485 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1486 struct net_device *ol_dev)
1488 enum mlxsw_sp_ipip_type ipipt = MLXSW_SP_IPIP_TYPE_MAX;
1489 struct mlxsw_sp_ipip_entry *ipip_entry;
1490 enum mlxsw_sp_l3proto ul_proto;
1491 union mlxsw_sp_l3addr saddr;
1494 mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1495 if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1496 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1497 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1498 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1499 if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1502 ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1504 if (IS_ERR(ipip_entry))
1505 return PTR_ERR(ipip_entry);
1512 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1513 struct net_device *ol_dev)
1515 struct mlxsw_sp_ipip_entry *ipip_entry;
1517 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1519 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1523 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1524 struct mlxsw_sp_ipip_entry *ipip_entry)
1526 struct mlxsw_sp_fib_entry *decap_fib_entry;
1528 decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1529 if (decap_fib_entry)
1530 mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1535 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
1536 u16 ul_rif_id, bool enable)
1538 struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1539 struct mlxsw_sp_rif *rif = &lb_rif->common;
1540 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1541 char ritr_pl[MLXSW_REG_RITR_LEN];
1544 switch (lb_cf.ul_protocol) {
1545 case MLXSW_SP_L3_PROTO_IPV4:
1546 saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1547 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1548 rif->rif_index, rif->vr_id, rif->dev->mtu);
1549 mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1550 MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1551 ul_vr_id, ul_rif_id, saddr4, lb_cf.okey);
1554 case MLXSW_SP_L3_PROTO_IPV6:
1555 return -EAFNOSUPPORT;
1558 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1561 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1562 struct net_device *ol_dev)
1564 struct mlxsw_sp_ipip_entry *ipip_entry;
1565 struct mlxsw_sp_rif_ipip_lb *lb_rif;
1568 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1570 lb_rif = ipip_entry->ol_lb;
1571 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
1572 lb_rif->ul_rif_id, true);
1575 lb_rif->common.mtu = ol_dev->mtu;
1582 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1583 struct net_device *ol_dev)
1585 struct mlxsw_sp_ipip_entry *ipip_entry;
1587 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1589 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1593 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1594 struct mlxsw_sp_ipip_entry *ipip_entry)
1596 if (ipip_entry->decap_fib_entry)
1597 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1600 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1601 struct net_device *ol_dev)
1603 struct mlxsw_sp_ipip_entry *ipip_entry;
1605 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1607 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1610 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1611 struct mlxsw_sp_rif *old_rif,
1612 struct mlxsw_sp_rif *new_rif);
1614 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1615 struct mlxsw_sp_ipip_entry *ipip_entry,
1617 struct netlink_ext_ack *extack)
1619 struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1620 struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1622 new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1626 if (IS_ERR(new_lb_rif))
1627 return PTR_ERR(new_lb_rif);
1628 ipip_entry->ol_lb = new_lb_rif;
1631 mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1632 &new_lb_rif->common);
1634 mlxsw_sp_rif_destroy(&old_lb_rif->common);
1639 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1640 struct mlxsw_sp_rif *rif);
1643 * __mlxsw_sp_ipip_entry_update_tunnel - Update offload related to IPIP entry.
1644 * @mlxsw_sp: mlxsw_sp.
1645 * @ipip_entry: IPIP entry.
1646 * @recreate_loopback: Recreates the associated loopback RIF.
1647 * @keep_encap: Updates next hops that use the tunnel netdevice. This is only
1648 * relevant when recreate_loopback is true.
1649 * @update_nexthops: Updates next hops, keeping the current loopback RIF. This
1650 * is only relevant when recreate_loopback is false.
1653 * Return: Non-zero value on failure.
1655 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1656 struct mlxsw_sp_ipip_entry *ipip_entry,
1657 bool recreate_loopback,
1659 bool update_nexthops,
1660 struct netlink_ext_ack *extack)
1664 /* RIFs can't be edited, so to update loopback, we need to destroy and
1665 * recreate it. That creates a window of opportunity where RALUE and
1666 * RATR registers end up referencing a RIF that's already gone. RATRs
1667 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1668 * of RALUE, demote the decap route back.
1670 if (ipip_entry->decap_fib_entry)
1671 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1673 if (recreate_loopback) {
1674 err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1675 keep_encap, extack);
1678 } else if (update_nexthops) {
1679 mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1680 &ipip_entry->ol_lb->common);
1683 if (ipip_entry->ol_dev->flags & IFF_UP)
1684 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1689 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1690 struct net_device *ol_dev,
1691 struct netlink_ext_ack *extack)
1693 struct mlxsw_sp_ipip_entry *ipip_entry =
1694 mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1699 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1700 true, false, false, extack);
1704 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1705 struct mlxsw_sp_ipip_entry *ipip_entry,
1706 struct net_device *ul_dev,
1708 struct netlink_ext_ack *extack)
1710 u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1711 enum mlxsw_sp_l3proto ul_proto;
1712 union mlxsw_sp_l3addr saddr;
1714 /* Moving underlay to a different VRF might cause local address
1715 * conflict, and the conflicting tunnels need to be demoted.
1717 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1718 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1719 if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1722 *demote_this = true;
1726 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1727 true, true, false, extack);
1731 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1732 struct mlxsw_sp_ipip_entry *ipip_entry,
1733 struct net_device *ul_dev)
1735 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1736 false, false, true, NULL);
1740 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1741 struct mlxsw_sp_ipip_entry *ipip_entry,
1742 struct net_device *ul_dev)
1744 /* A down underlay device causes encapsulated packets to not be
1745 * forwarded, but decap still works. So refresh next hops without
1746 * touching anything else.
1748 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1749 false, false, true, NULL);
1753 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1754 struct net_device *ol_dev,
1755 struct netlink_ext_ack *extack)
1757 const struct mlxsw_sp_ipip_ops *ipip_ops;
1758 struct mlxsw_sp_ipip_entry *ipip_entry;
1761 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1763 /* A change might make a tunnel eligible for offloading, but
1764 * that is currently not implemented. What falls to slow path
1769 /* A change might make a tunnel not eligible for offloading. */
1770 if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1771 ipip_entry->ipipt)) {
1772 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1776 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1777 err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1781 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1782 struct mlxsw_sp_ipip_entry *ipip_entry)
1784 struct net_device *ol_dev = ipip_entry->ol_dev;
1786 if (ol_dev->flags & IFF_UP)
1787 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1788 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1791 /* The configuration where several tunnels have the same local address in the
1792 * same underlay table needs special treatment in the HW. That is currently not
1793 * implemented in the driver. This function finds and demotes the first tunnel
1794 * with a given source address, except the one passed in in the argument
1798 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1799 enum mlxsw_sp_l3proto ul_proto,
1800 union mlxsw_sp_l3addr saddr,
1802 const struct mlxsw_sp_ipip_entry *except)
1804 struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1806 list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1808 if (ipip_entry != except &&
1809 mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1810 ul_tb_id, ipip_entry)) {
1811 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1819 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1820 struct net_device *ul_dev)
1822 struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1824 list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1826 struct net_device *ol_dev = ipip_entry->ol_dev;
1827 struct net_device *ipip_ul_dev;
1830 ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1832 if (ipip_ul_dev == ul_dev)
1833 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1837 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1838 struct net_device *ol_dev,
1839 unsigned long event,
1840 struct netdev_notifier_info *info)
1842 struct netdev_notifier_changeupper_info *chup;
1843 struct netlink_ext_ack *extack;
1846 mutex_lock(&mlxsw_sp->router->lock);
1848 case NETDEV_REGISTER:
1849 err = mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1851 case NETDEV_UNREGISTER:
1852 mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1855 mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1858 mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1860 case NETDEV_CHANGEUPPER:
1861 chup = container_of(info, typeof(*chup), info);
1862 extack = info->extack;
1863 if (netif_is_l3_master(chup->upper_dev))
1864 err = mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1869 extack = info->extack;
1870 err = mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1873 case NETDEV_CHANGEMTU:
1874 err = mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1877 mutex_unlock(&mlxsw_sp->router->lock);
1882 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1883 struct mlxsw_sp_ipip_entry *ipip_entry,
1884 struct net_device *ul_dev,
1886 unsigned long event,
1887 struct netdev_notifier_info *info)
1889 struct netdev_notifier_changeupper_info *chup;
1890 struct netlink_ext_ack *extack;
1893 case NETDEV_CHANGEUPPER:
1894 chup = container_of(info, typeof(*chup), info);
1895 extack = info->extack;
1896 if (netif_is_l3_master(chup->upper_dev))
1897 return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1905 return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1908 return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1916 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1917 struct net_device *ul_dev,
1918 unsigned long event,
1919 struct netdev_notifier_info *info)
1921 struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1924 mutex_lock(&mlxsw_sp->router->lock);
1925 while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1928 struct mlxsw_sp_ipip_entry *prev;
1929 bool demote_this = false;
1931 err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1932 ul_dev, &demote_this,
1935 mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1941 if (list_is_first(&ipip_entry->ipip_list_node,
1942 &mlxsw_sp->router->ipip_list))
1945 /* This can't be cached from previous iteration,
1946 * because that entry could be gone now.
1948 prev = list_prev_entry(ipip_entry,
1950 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1954 mutex_unlock(&mlxsw_sp->router->lock);
1959 int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1960 enum mlxsw_sp_l3proto ul_proto,
1961 const union mlxsw_sp_l3addr *ul_sip,
1964 enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1965 struct mlxsw_sp_router *router = mlxsw_sp->router;
1966 struct mlxsw_sp_fib_entry *fib_entry;
1969 mutex_lock(&mlxsw_sp->router->lock);
1971 if (WARN_ON_ONCE(router->nve_decap_config.valid)) {
1976 router->nve_decap_config.ul_tb_id = ul_tb_id;
1977 router->nve_decap_config.tunnel_index = tunnel_index;
1978 router->nve_decap_config.ul_proto = ul_proto;
1979 router->nve_decap_config.ul_sip = *ul_sip;
1980 router->nve_decap_config.valid = true;
1982 /* It is valid to create a tunnel with a local IP and only later
1983 * assign this IP address to a local interface
1985 fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1991 fib_entry->decap.tunnel_index = tunnel_index;
1992 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1994 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1996 goto err_fib_entry_update;
2000 err_fib_entry_update:
2001 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2002 mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2004 mutex_unlock(&mlxsw_sp->router->lock);
2008 void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
2009 enum mlxsw_sp_l3proto ul_proto,
2010 const union mlxsw_sp_l3addr *ul_sip)
2012 enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
2013 struct mlxsw_sp_router *router = mlxsw_sp->router;
2014 struct mlxsw_sp_fib_entry *fib_entry;
2016 mutex_lock(&mlxsw_sp->router->lock);
2018 if (WARN_ON_ONCE(!router->nve_decap_config.valid))
2021 router->nve_decap_config.valid = false;
2023 fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
2029 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2030 mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2032 mutex_unlock(&mlxsw_sp->router->lock);
2035 static bool mlxsw_sp_router_nve_is_decap(struct mlxsw_sp *mlxsw_sp,
2037 enum mlxsw_sp_l3proto ul_proto,
2038 const union mlxsw_sp_l3addr *ul_sip)
2040 struct mlxsw_sp_router *router = mlxsw_sp->router;
2042 return router->nve_decap_config.valid &&
2043 router->nve_decap_config.ul_tb_id == ul_tb_id &&
2044 router->nve_decap_config.ul_proto == ul_proto &&
2045 !memcmp(&router->nve_decap_config.ul_sip, ul_sip,
2049 struct mlxsw_sp_neigh_key {
2050 struct neighbour *n;
2053 struct mlxsw_sp_neigh_entry {
2054 struct list_head rif_list_node;
2055 struct rhash_head ht_node;
2056 struct mlxsw_sp_neigh_key key;
2059 unsigned char ha[ETH_ALEN];
2060 struct list_head nexthop_list; /* list of nexthops using
2063 struct list_head nexthop_neighs_list_node;
2064 unsigned int counter_index;
2068 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
2069 .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
2070 .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
2071 .key_len = sizeof(struct mlxsw_sp_neigh_key),
2074 struct mlxsw_sp_neigh_entry *
2075 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
2076 struct mlxsw_sp_neigh_entry *neigh_entry)
2079 if (list_empty(&rif->neigh_list))
2082 return list_first_entry(&rif->neigh_list,
2083 typeof(*neigh_entry),
2086 if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
2088 return list_next_entry(neigh_entry, rif_list_node);
2091 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
2093 return neigh_entry->key.n->tbl->family;
2097 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
2099 return neigh_entry->ha;
2102 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
2104 struct neighbour *n;
2106 n = neigh_entry->key.n;
2107 return ntohl(*((__be32 *) n->primary_key));
2111 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
2113 struct neighbour *n;
2115 n = neigh_entry->key.n;
2116 return (struct in6_addr *) &n->primary_key;
2119 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
2120 struct mlxsw_sp_neigh_entry *neigh_entry,
2123 if (!neigh_entry->counter_valid)
2126 return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
2130 static struct mlxsw_sp_neigh_entry *
2131 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
2134 struct mlxsw_sp_neigh_entry *neigh_entry;
2136 neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
2140 neigh_entry->key.n = n;
2141 neigh_entry->rif = rif;
2142 INIT_LIST_HEAD(&neigh_entry->nexthop_list);
2147 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
2153 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
2154 struct mlxsw_sp_neigh_entry *neigh_entry)
2156 return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
2157 &neigh_entry->ht_node,
2158 mlxsw_sp_neigh_ht_params);
2162 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
2163 struct mlxsw_sp_neigh_entry *neigh_entry)
2165 rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
2166 &neigh_entry->ht_node,
2167 mlxsw_sp_neigh_ht_params);
2171 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
2172 struct mlxsw_sp_neigh_entry *neigh_entry)
2174 struct devlink *devlink;
2175 const char *table_name;
2177 switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2179 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2182 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2189 devlink = priv_to_devlink(mlxsw_sp->core);
2190 return devlink_dpipe_table_counter_enabled(devlink, table_name);
2194 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2195 struct mlxsw_sp_neigh_entry *neigh_entry)
2197 if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2200 if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2203 neigh_entry->counter_valid = true;
2207 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2208 struct mlxsw_sp_neigh_entry *neigh_entry)
2210 if (!neigh_entry->counter_valid)
2212 mlxsw_sp_flow_counter_free(mlxsw_sp,
2213 neigh_entry->counter_index);
2214 neigh_entry->counter_valid = false;
2217 static struct mlxsw_sp_neigh_entry *
2218 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2220 struct mlxsw_sp_neigh_entry *neigh_entry;
2221 struct mlxsw_sp_rif *rif;
2224 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2226 return ERR_PTR(-EINVAL);
2228 neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2230 return ERR_PTR(-ENOMEM);
2232 err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2234 goto err_neigh_entry_insert;
2236 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2237 list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2241 err_neigh_entry_insert:
2242 mlxsw_sp_neigh_entry_free(neigh_entry);
2243 return ERR_PTR(err);
2247 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2248 struct mlxsw_sp_neigh_entry *neigh_entry)
2250 list_del(&neigh_entry->rif_list_node);
2251 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2252 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2253 mlxsw_sp_neigh_entry_free(neigh_entry);
2256 static struct mlxsw_sp_neigh_entry *
2257 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2259 struct mlxsw_sp_neigh_key key;
2262 return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2263 &key, mlxsw_sp_neigh_ht_params);
2267 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2269 unsigned long interval;
2271 #if IS_ENABLED(CONFIG_IPV6)
2272 interval = min_t(unsigned long,
2273 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2274 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2276 interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2278 mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2281 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2285 u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
2286 struct net_device *dev;
2287 struct neighbour *n;
2292 mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2294 if (WARN_ON_ONCE(rif >= max_rifs))
2296 if (!mlxsw_sp->router->rifs[rif]) {
2297 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2302 dev = mlxsw_sp->router->rifs[rif]->dev;
2303 n = neigh_lookup(&arp_tbl, &dipn, dev);
2307 netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2308 neigh_event_send(n, NULL);
2312 #if IS_ENABLED(CONFIG_IPV6)
2313 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2317 struct net_device *dev;
2318 struct neighbour *n;
2319 struct in6_addr dip;
2322 mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2325 if (!mlxsw_sp->router->rifs[rif]) {
2326 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2330 dev = mlxsw_sp->router->rifs[rif]->dev;
2331 n = neigh_lookup(&nd_tbl, &dip, dev);
2335 netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2336 neigh_event_send(n, NULL);
2340 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2347 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2354 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2356 /* Hardware starts counting at 0, so add 1. */
2359 /* Each record consists of several neighbour entries. */
2360 for (i = 0; i < num_entries; i++) {
2363 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2364 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2370 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2374 /* One record contains one entry. */
2375 mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2379 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2380 char *rauhtd_pl, int rec_index)
2382 switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2383 case MLXSW_REG_RAUHTD_TYPE_IPV4:
2384 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2387 case MLXSW_REG_RAUHTD_TYPE_IPV6:
2388 mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2394 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2396 u8 num_rec, last_rec_index, num_entries;
2398 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2399 last_rec_index = num_rec - 1;
2401 if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2403 if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2404 MLXSW_REG_RAUHTD_TYPE_IPV6)
2407 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2409 if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2415 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2417 enum mlxsw_reg_rauhtd_type type)
2422 /* Ensure the RIF we read from the device does not change mid-dump. */
2423 mutex_lock(&mlxsw_sp->router->lock);
2425 mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2426 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2429 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2432 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2433 for (i = 0; i < num_rec; i++)
2434 mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2436 } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2437 mutex_unlock(&mlxsw_sp->router->lock);
2442 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2444 enum mlxsw_reg_rauhtd_type type;
2448 rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2452 type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2453 err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2457 type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2458 err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2464 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2466 struct mlxsw_sp_neigh_entry *neigh_entry;
2468 mutex_lock(&mlxsw_sp->router->lock);
2469 list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2470 nexthop_neighs_list_node)
2471 /* If this neigh have nexthops, make the kernel think this neigh
2472 * is active regardless of the traffic.
2474 neigh_event_send(neigh_entry->key.n, NULL);
2475 mutex_unlock(&mlxsw_sp->router->lock);
2479 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2481 unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2483 mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2484 msecs_to_jiffies(interval));
2487 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2489 struct mlxsw_sp_router *router;
2492 router = container_of(work, struct mlxsw_sp_router,
2493 neighs_update.dw.work);
2494 err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2496 dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2498 mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2500 mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2503 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2505 struct mlxsw_sp_neigh_entry *neigh_entry;
2506 struct mlxsw_sp_router *router;
2508 router = container_of(work, struct mlxsw_sp_router,
2509 nexthop_probe_dw.work);
2510 /* Iterate over nexthop neighbours, find those who are unresolved and
2511 * send arp on them. This solves the chicken-egg problem when
2512 * the nexthop wouldn't get offloaded until the neighbor is resolved
2513 * but it wouldn't get resolved ever in case traffic is flowing in HW
2514 * using different nexthop.
2516 mutex_lock(&router->lock);
2517 list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2518 nexthop_neighs_list_node)
2519 if (!neigh_entry->connected)
2520 neigh_event_send(neigh_entry->key.n, NULL);
2521 mutex_unlock(&router->lock);
2523 mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2524 MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2528 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2529 struct mlxsw_sp_neigh_entry *neigh_entry,
2530 bool removing, bool dead);
2532 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2534 return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2535 MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2539 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2540 struct mlxsw_sp_neigh_entry *neigh_entry,
2541 enum mlxsw_reg_rauht_op op)
2543 struct neighbour *n = neigh_entry->key.n;
2544 u32 dip = ntohl(*((__be32 *) n->primary_key));
2545 char rauht_pl[MLXSW_REG_RAUHT_LEN];
2547 mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2549 if (neigh_entry->counter_valid)
2550 mlxsw_reg_rauht_pack_counter(rauht_pl,
2551 neigh_entry->counter_index);
2552 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2556 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2557 struct mlxsw_sp_neigh_entry *neigh_entry,
2558 enum mlxsw_reg_rauht_op op)
2560 struct neighbour *n = neigh_entry->key.n;
2561 char rauht_pl[MLXSW_REG_RAUHT_LEN];
2562 const char *dip = n->primary_key;
2564 mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2566 if (neigh_entry->counter_valid)
2567 mlxsw_reg_rauht_pack_counter(rauht_pl,
2568 neigh_entry->counter_index);
2569 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2572 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2574 struct neighbour *n = neigh_entry->key.n;
2576 /* Packets with a link-local destination address are trapped
2577 * after LPM lookup and never reach the neighbour table, so
2578 * there is no need to program such neighbours to the device.
2580 if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2581 IPV6_ADDR_LINKLOCAL)
2587 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2588 struct mlxsw_sp_neigh_entry *neigh_entry,
2591 enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding);
2594 if (!adding && !neigh_entry->connected)
2596 neigh_entry->connected = adding;
2597 if (neigh_entry->key.n->tbl->family == AF_INET) {
2598 err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2602 } else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2603 if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2605 err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2615 neigh_entry->key.n->flags |= NTF_OFFLOADED;
2617 neigh_entry->key.n->flags &= ~NTF_OFFLOADED;
2621 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2622 struct mlxsw_sp_neigh_entry *neigh_entry,
2626 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2628 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2629 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2632 struct mlxsw_sp_netevent_work {
2633 struct work_struct work;
2634 struct mlxsw_sp *mlxsw_sp;
2635 struct neighbour *n;
2638 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2640 struct mlxsw_sp_netevent_work *net_work =
2641 container_of(work, struct mlxsw_sp_netevent_work, work);
2642 struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2643 struct mlxsw_sp_neigh_entry *neigh_entry;
2644 struct neighbour *n = net_work->n;
2645 unsigned char ha[ETH_ALEN];
2646 bool entry_connected;
2649 /* If these parameters are changed after we release the lock,
2650 * then we are guaranteed to receive another event letting us
2653 read_lock_bh(&n->lock);
2654 memcpy(ha, n->ha, ETH_ALEN);
2655 nud_state = n->nud_state;
2657 read_unlock_bh(&n->lock);
2659 mutex_lock(&mlxsw_sp->router->lock);
2660 mlxsw_sp_span_respin(mlxsw_sp);
2662 entry_connected = nud_state & NUD_VALID && !dead;
2663 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2664 if (!entry_connected && !neigh_entry)
2667 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2668 if (IS_ERR(neigh_entry))
2672 if (neigh_entry->connected && entry_connected &&
2673 !memcmp(neigh_entry->ha, ha, ETH_ALEN))
2676 memcpy(neigh_entry->ha, ha, ETH_ALEN);
2677 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2678 mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
2681 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2682 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2685 mutex_unlock(&mlxsw_sp->router->lock);
2690 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2692 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2694 struct mlxsw_sp_netevent_work *net_work =
2695 container_of(work, struct mlxsw_sp_netevent_work, work);
2696 struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2698 mlxsw_sp_mp_hash_init(mlxsw_sp);
2702 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2704 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2706 struct mlxsw_sp_netevent_work *net_work =
2707 container_of(work, struct mlxsw_sp_netevent_work, work);
2708 struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2710 __mlxsw_sp_router_init(mlxsw_sp);
2714 static int mlxsw_sp_router_schedule_work(struct net *net,
2715 struct notifier_block *nb,
2716 void (*cb)(struct work_struct *))
2718 struct mlxsw_sp_netevent_work *net_work;
2719 struct mlxsw_sp_router *router;
2721 router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2722 if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp)))
2725 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2729 INIT_WORK(&net_work->work, cb);
2730 net_work->mlxsw_sp = router->mlxsw_sp;
2731 mlxsw_core_schedule_work(&net_work->work);
2735 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2736 unsigned long event, void *ptr)
2738 struct mlxsw_sp_netevent_work *net_work;
2739 struct mlxsw_sp_port *mlxsw_sp_port;
2740 struct mlxsw_sp *mlxsw_sp;
2741 unsigned long interval;
2742 struct neigh_parms *p;
2743 struct neighbour *n;
2746 case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2749 /* We don't care about changes in the default table. */
2750 if (!p->dev || (p->tbl->family != AF_INET &&
2751 p->tbl->family != AF_INET6))
2754 /* We are in atomic context and can't take RTNL mutex,
2755 * so use RCU variant to walk the device chain.
2757 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2761 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2762 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2763 mlxsw_sp->router->neighs_update.interval = interval;
2765 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2767 case NETEVENT_NEIGH_UPDATE:
2770 if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2773 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2777 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2779 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2783 INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2784 net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2787 /* Take a reference to ensure the neighbour won't be
2788 * destructed until we drop the reference in delayed
2792 mlxsw_core_schedule_work(&net_work->work);
2793 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2795 case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2796 case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2797 return mlxsw_sp_router_schedule_work(ptr, nb,
2798 mlxsw_sp_router_mp_hash_event_work);
2800 case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2801 return mlxsw_sp_router_schedule_work(ptr, nb,
2802 mlxsw_sp_router_update_priority_work);
2808 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2812 err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2813 &mlxsw_sp_neigh_ht_params);
2817 /* Initialize the polling interval according to the default
2820 mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2822 /* Create the delayed works for the activity_update */
2823 INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2824 mlxsw_sp_router_neighs_update_work);
2825 INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2826 mlxsw_sp_router_probe_unresolved_nexthops);
2827 mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2828 mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2832 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2834 cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2835 cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2836 rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2839 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2840 struct mlxsw_sp_rif *rif)
2842 struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2844 list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2846 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2847 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2851 enum mlxsw_sp_nexthop_type {
2852 MLXSW_SP_NEXTHOP_TYPE_ETH,
2853 MLXSW_SP_NEXTHOP_TYPE_IPIP,
2856 enum mlxsw_sp_nexthop_action {
2857 /* Nexthop forwards packets to an egress RIF */
2858 MLXSW_SP_NEXTHOP_ACTION_FORWARD,
2859 /* Nexthop discards packets */
2860 MLXSW_SP_NEXTHOP_ACTION_DISCARD,
2861 /* Nexthop traps packets */
2862 MLXSW_SP_NEXTHOP_ACTION_TRAP,
2865 struct mlxsw_sp_nexthop_key {
2866 struct fib_nh *fib_nh;
2869 struct mlxsw_sp_nexthop {
2870 struct list_head neigh_list_node; /* member of neigh entry list */
2871 struct list_head rif_list_node;
2872 struct list_head router_list_node;
2873 struct mlxsw_sp_nexthop_group_info *nhgi; /* pointer back to the group
2874 * this nexthop belongs to
2876 struct rhash_head ht_node;
2877 struct neigh_table *neigh_tbl;
2878 struct mlxsw_sp_nexthop_key key;
2879 unsigned char gw_addr[sizeof(struct in6_addr)];
2883 int num_adj_entries;
2884 struct mlxsw_sp_rif *rif;
2885 u8 should_offload:1, /* set indicates this nexthop should be written
2886 * to the adjacency table.
2888 offloaded:1, /* set indicates this nexthop was written to the
2891 update:1; /* set indicates this nexthop should be updated in the
2892 * adjacency table (f.e., its MAC changed).
2894 enum mlxsw_sp_nexthop_action action;
2895 enum mlxsw_sp_nexthop_type type;
2897 struct mlxsw_sp_neigh_entry *neigh_entry;
2898 struct mlxsw_sp_ipip_entry *ipip_entry;
2900 unsigned int counter_index;
2904 enum mlxsw_sp_nexthop_group_type {
2905 MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4,
2906 MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6,
2907 MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ,
2910 struct mlxsw_sp_nexthop_group_info {
2911 struct mlxsw_sp_nexthop_group *nh_grp;
2915 int sum_norm_weight;
2916 u8 adj_index_valid:1,
2917 gateway:1, /* routes using the group use a gateway */
2919 struct list_head list; /* member in nh_res_grp_list */
2920 struct mlxsw_sp_nexthop nexthops[0];
2921 #define nh_rif nexthops[0].rif
2924 struct mlxsw_sp_nexthop_group_vr_key {
2926 enum mlxsw_sp_l3proto proto;
2929 struct mlxsw_sp_nexthop_group_vr_entry {
2930 struct list_head list; /* member in vr_list */
2931 struct rhash_head ht_node; /* member in vr_ht */
2932 refcount_t ref_count;
2933 struct mlxsw_sp_nexthop_group_vr_key key;
2936 struct mlxsw_sp_nexthop_group {
2937 struct rhash_head ht_node;
2938 struct list_head fib_list; /* list of fib entries that use this group */
2941 struct fib_info *fi;
2947 struct mlxsw_sp_nexthop_group_info *nhgi;
2948 struct list_head vr_list;
2949 struct rhashtable vr_ht;
2950 enum mlxsw_sp_nexthop_group_type type;
2954 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2955 struct mlxsw_sp_nexthop *nh)
2957 struct devlink *devlink;
2959 devlink = priv_to_devlink(mlxsw_sp->core);
2960 if (!devlink_dpipe_table_counter_enabled(devlink,
2961 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2964 if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2967 nh->counter_valid = true;
2970 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2971 struct mlxsw_sp_nexthop *nh)
2973 if (!nh->counter_valid)
2975 mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2976 nh->counter_valid = false;
2979 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2980 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2982 if (!nh->counter_valid)
2985 return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2989 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2990 struct mlxsw_sp_nexthop *nh)
2993 if (list_empty(&router->nexthop_list))
2996 return list_first_entry(&router->nexthop_list,
2997 typeof(*nh), router_list_node);
2999 if (list_is_last(&nh->router_list_node, &router->nexthop_list))
3001 return list_next_entry(nh, router_list_node);
3004 bool mlxsw_sp_nexthop_is_forward(const struct mlxsw_sp_nexthop *nh)
3006 return nh->offloaded && nh->action == MLXSW_SP_NEXTHOP_ACTION_FORWARD;
3009 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
3011 if (nh->type != MLXSW_SP_NEXTHOP_TYPE_ETH ||
3012 !mlxsw_sp_nexthop_is_forward(nh))
3014 return nh->neigh_entry->ha;
3017 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
3018 u32 *p_adj_size, u32 *p_adj_hash_index)
3020 struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi;
3021 u32 adj_hash_index = 0;
3024 if (!nh->offloaded || !nhgi->adj_index_valid)
3027 *p_adj_index = nhgi->adj_index;
3028 *p_adj_size = nhgi->ecmp_size;
3030 for (i = 0; i < nhgi->count; i++) {
3031 struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i];
3035 if (nh_iter->offloaded)
3036 adj_hash_index += nh_iter->num_adj_entries;
3039 *p_adj_hash_index = adj_hash_index;
3043 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
3048 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
3050 struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi;
3053 for (i = 0; i < nhgi->count; i++) {
3054 struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i];
3056 if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
3062 static const struct rhashtable_params mlxsw_sp_nexthop_group_vr_ht_params = {
3063 .key_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, key),
3064 .head_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, ht_node),
3065 .key_len = sizeof(struct mlxsw_sp_nexthop_group_vr_key),
3066 .automatic_shrinking = true,
3069 static struct mlxsw_sp_nexthop_group_vr_entry *
3070 mlxsw_sp_nexthop_group_vr_entry_lookup(struct mlxsw_sp_nexthop_group *nh_grp,
3071 const struct mlxsw_sp_fib *fib)
3073 struct mlxsw_sp_nexthop_group_vr_key key;
3075 memset(&key, 0, sizeof(key));
3076 key.vr_id = fib->vr->id;
3077 key.proto = fib->proto;
3078 return rhashtable_lookup_fast(&nh_grp->vr_ht, &key,
3079 mlxsw_sp_nexthop_group_vr_ht_params);
3083 mlxsw_sp_nexthop_group_vr_entry_create(struct mlxsw_sp_nexthop_group *nh_grp,
3084 const struct mlxsw_sp_fib *fib)
3086 struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3089 vr_entry = kzalloc(sizeof(*vr_entry), GFP_KERNEL);
3093 vr_entry->key.vr_id = fib->vr->id;
3094 vr_entry->key.proto = fib->proto;
3095 refcount_set(&vr_entry->ref_count, 1);
3097 err = rhashtable_insert_fast(&nh_grp->vr_ht, &vr_entry->ht_node,
3098 mlxsw_sp_nexthop_group_vr_ht_params);
3100 goto err_hashtable_insert;
3102 list_add(&vr_entry->list, &nh_grp->vr_list);
3106 err_hashtable_insert:
3112 mlxsw_sp_nexthop_group_vr_entry_destroy(struct mlxsw_sp_nexthop_group *nh_grp,
3113 struct mlxsw_sp_nexthop_group_vr_entry *vr_entry)
3115 list_del(&vr_entry->list);
3116 rhashtable_remove_fast(&nh_grp->vr_ht, &vr_entry->ht_node,
3117 mlxsw_sp_nexthop_group_vr_ht_params);
3122 mlxsw_sp_nexthop_group_vr_link(struct mlxsw_sp_nexthop_group *nh_grp,
3123 const struct mlxsw_sp_fib *fib)
3125 struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3127 vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib);
3129 refcount_inc(&vr_entry->ref_count);
3133 return mlxsw_sp_nexthop_group_vr_entry_create(nh_grp, fib);
3137 mlxsw_sp_nexthop_group_vr_unlink(struct mlxsw_sp_nexthop_group *nh_grp,
3138 const struct mlxsw_sp_fib *fib)
3140 struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3142 vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib);
3143 if (WARN_ON_ONCE(!vr_entry))
3146 if (!refcount_dec_and_test(&vr_entry->ref_count))
3149 mlxsw_sp_nexthop_group_vr_entry_destroy(nh_grp, vr_entry);
3152 struct mlxsw_sp_nexthop_group_cmp_arg {
3153 enum mlxsw_sp_nexthop_group_type type;
3155 struct fib_info *fi;
3156 struct mlxsw_sp_fib6_entry *fib6_entry;
3162 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
3163 const struct in6_addr *gw, int ifindex,
3168 for (i = 0; i < nh_grp->nhgi->count; i++) {
3169 const struct mlxsw_sp_nexthop *nh;
3171 nh = &nh_grp->nhgi->nexthops[i];
3172 if (nh->ifindex == ifindex && nh->nh_weight == weight &&
3173 ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
3181 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
3182 const struct mlxsw_sp_fib6_entry *fib6_entry)
3184 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3186 if (nh_grp->nhgi->count != fib6_entry->nrt6)
3189 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3190 struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3191 struct in6_addr *gw;
3192 int ifindex, weight;
3194 ifindex = fib6_nh->fib_nh_dev->ifindex;
3195 weight = fib6_nh->fib_nh_weight;
3196 gw = &fib6_nh->fib_nh_gw6;
3197 if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
3206 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
3208 const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
3209 const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
3211 if (nh_grp->type != cmp_arg->type)
3214 switch (cmp_arg->type) {
3215 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3216 return cmp_arg->fi != nh_grp->ipv4.fi;
3217 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3218 return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
3219 cmp_arg->fib6_entry);
3220 case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3221 return cmp_arg->id != nh_grp->obj.id;
3228 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
3230 const struct mlxsw_sp_nexthop_group *nh_grp = data;
3231 const struct mlxsw_sp_nexthop *nh;
3232 struct fib_info *fi;
3236 switch (nh_grp->type) {
3237 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3238 fi = nh_grp->ipv4.fi;
3239 return jhash(&fi, sizeof(fi), seed);
3240 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3241 val = nh_grp->nhgi->count;
3242 for (i = 0; i < nh_grp->nhgi->count; i++) {
3243 nh = &nh_grp->nhgi->nexthops[i];
3244 val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed);
3245 val ^= jhash(&nh->gw_addr, sizeof(nh->gw_addr), seed);
3247 return jhash(&val, sizeof(val), seed);
3248 case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3249 return jhash(&nh_grp->obj.id, sizeof(nh_grp->obj.id), seed);
3257 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
3259 unsigned int val = fib6_entry->nrt6;
3260 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3262 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3263 struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3264 struct net_device *dev = fib6_nh->fib_nh_dev;
3265 struct in6_addr *gw = &fib6_nh->fib_nh_gw6;
3267 val ^= jhash(&dev->ifindex, sizeof(dev->ifindex), seed);
3268 val ^= jhash(gw, sizeof(*gw), seed);
3271 return jhash(&val, sizeof(val), seed);
3275 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
3277 const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
3279 switch (cmp_arg->type) {
3280 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3281 return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
3282 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3283 return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
3284 case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3285 return jhash(&cmp_arg->id, sizeof(cmp_arg->id), seed);
3292 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
3293 .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
3294 .hashfn = mlxsw_sp_nexthop_group_hash,
3295 .obj_hashfn = mlxsw_sp_nexthop_group_hash_obj,
3296 .obj_cmpfn = mlxsw_sp_nexthop_group_cmp,
3299 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
3300 struct mlxsw_sp_nexthop_group *nh_grp)
3302 if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 &&
3303 !nh_grp->nhgi->gateway)
3306 return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
3308 mlxsw_sp_nexthop_group_ht_params);
3311 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
3312 struct mlxsw_sp_nexthop_group *nh_grp)
3314 if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 &&
3315 !nh_grp->nhgi->gateway)
3318 rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
3320 mlxsw_sp_nexthop_group_ht_params);
3323 static struct mlxsw_sp_nexthop_group *
3324 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
3325 struct fib_info *fi)
3327 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3329 cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4;
3331 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3333 mlxsw_sp_nexthop_group_ht_params);
3336 static struct mlxsw_sp_nexthop_group *
3337 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3338 struct mlxsw_sp_fib6_entry *fib6_entry)
3340 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3342 cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6;
3343 cmp_arg.fib6_entry = fib6_entry;
3344 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3346 mlxsw_sp_nexthop_group_ht_params);
3349 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3350 .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3351 .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3352 .key_len = sizeof(struct mlxsw_sp_nexthop_key),
3355 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3356 struct mlxsw_sp_nexthop *nh)
3358 return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3359 &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3362 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3363 struct mlxsw_sp_nexthop *nh)
3365 rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3366 mlxsw_sp_nexthop_ht_params);
3369 static struct mlxsw_sp_nexthop *
3370 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3371 struct mlxsw_sp_nexthop_key key)
3373 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3374 mlxsw_sp_nexthop_ht_params);
3377 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3378 enum mlxsw_sp_l3proto proto,
3380 u32 adj_index, u16 ecmp_size,
3384 char raleu_pl[MLXSW_REG_RALEU_LEN];
3386 mlxsw_reg_raleu_pack(raleu_pl,
3387 (enum mlxsw_reg_ralxx_protocol) proto, vr_id,
3388 adj_index, ecmp_size, new_adj_index,
3390 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3393 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3394 struct mlxsw_sp_nexthop_group *nh_grp,
3395 u32 old_adj_index, u16 old_ecmp_size)
3397 struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
3398 struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3401 list_for_each_entry(vr_entry, &nh_grp->vr_list, list) {
3402 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp,
3403 vr_entry->key.proto,
3404 vr_entry->key.vr_id,
3410 goto err_mass_update_vr;
3415 list_for_each_entry_continue_reverse(vr_entry, &nh_grp->vr_list, list)
3416 mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr_entry->key.proto,
3417 vr_entry->key.vr_id,
3420 old_adj_index, old_ecmp_size);
3424 static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp,
3426 struct mlxsw_sp_nexthop *nh,
3427 bool force, char *ratr_pl)
3429 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3430 enum mlxsw_reg_ratr_op op;
3433 rif_index = nh->rif ? nh->rif->rif_index :
3434 mlxsw_sp->router->lb_rif_index;
3435 op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY :
3436 MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY;
3437 mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_ETHERNET,
3438 adj_index, rif_index);
3439 switch (nh->action) {
3440 case MLXSW_SP_NEXTHOP_ACTION_FORWARD:
3441 mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3443 case MLXSW_SP_NEXTHOP_ACTION_DISCARD:
3444 mlxsw_reg_ratr_trap_action_set(ratr_pl,
3445 MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS);
3447 case MLXSW_SP_NEXTHOP_ACTION_TRAP:
3448 mlxsw_reg_ratr_trap_action_set(ratr_pl,
3449 MLXSW_REG_RATR_TRAP_ACTION_TRAP);
3450 mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
3456 if (nh->counter_valid)
3457 mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3459 mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3461 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3464 int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3465 struct mlxsw_sp_nexthop *nh, bool force,
3470 for (i = 0; i < nh->num_adj_entries; i++) {
3473 err = __mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index + i,
3474 nh, force, ratr_pl);
3482 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3484 struct mlxsw_sp_nexthop *nh,
3485 bool force, char *ratr_pl)
3487 const struct mlxsw_sp_ipip_ops *ipip_ops;
3489 ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3490 return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry,
3494 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3496 struct mlxsw_sp_nexthop *nh, bool force,
3501 for (i = 0; i < nh->num_adj_entries; i++) {
3504 err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3505 nh, force, ratr_pl);
3513 static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3514 struct mlxsw_sp_nexthop *nh, bool force,
3517 /* When action is discard or trap, the nexthop must be
3518 * programmed as an Ethernet nexthop.
3520 if (nh->type == MLXSW_SP_NEXTHOP_TYPE_ETH ||
3521 nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD ||
3522 nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
3523 return mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index, nh,
3526 return mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index, nh,
3531 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3532 struct mlxsw_sp_nexthop_group_info *nhgi,
3535 char ratr_pl[MLXSW_REG_RATR_LEN];
3536 u32 adj_index = nhgi->adj_index; /* base */
3537 struct mlxsw_sp_nexthop *nh;
3540 for (i = 0; i < nhgi->count; i++) {
3541 nh = &nhgi->nexthops[i];
3543 if (!nh->should_offload) {
3548 if (nh->update || reallocate) {
3551 err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh,
3558 adj_index += nh->num_adj_entries;
3564 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3565 struct mlxsw_sp_nexthop_group *nh_grp)
3567 struct mlxsw_sp_fib_entry *fib_entry;
3570 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3571 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3578 struct mlxsw_sp_adj_grp_size_range {
3579 u16 start; /* Inclusive */
3580 u16 end; /* Inclusive */
3583 /* Ordered by range start value */
3584 static const struct mlxsw_sp_adj_grp_size_range
3585 mlxsw_sp1_adj_grp_size_ranges[] = {
3586 { .start = 1, .end = 64 },
3587 { .start = 512, .end = 512 },
3588 { .start = 1024, .end = 1024 },
3589 { .start = 2048, .end = 2048 },
3590 { .start = 4096, .end = 4096 },
3593 /* Ordered by range start value */
3594 static const struct mlxsw_sp_adj_grp_size_range
3595 mlxsw_sp2_adj_grp_size_ranges[] = {
3596 { .start = 1, .end = 128 },
3597 { .start = 256, .end = 256 },
3598 { .start = 512, .end = 512 },
3599 { .start = 1024, .end = 1024 },
3600 { .start = 2048, .end = 2048 },
3601 { .start = 4096, .end = 4096 },
3604 static void mlxsw_sp_adj_grp_size_round_up(const struct mlxsw_sp *mlxsw_sp,
3605 u16 *p_adj_grp_size)
3609 for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) {
3610 const struct mlxsw_sp_adj_grp_size_range *size_range;
3612 size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
3614 if (*p_adj_grp_size >= size_range->start &&
3615 *p_adj_grp_size <= size_range->end)
3618 if (*p_adj_grp_size <= size_range->end) {
3619 *p_adj_grp_size = size_range->end;
3625 static void mlxsw_sp_adj_grp_size_round_down(const struct mlxsw_sp *mlxsw_sp,
3626 u16 *p_adj_grp_size,
3627 unsigned int alloc_size)
3631 for (i = mlxsw_sp->router->adj_grp_size_ranges_count - 1; i >= 0; i--) {
3632 const struct mlxsw_sp_adj_grp_size_range *size_range;
3634 size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
3636 if (alloc_size >= size_range->end) {
3637 *p_adj_grp_size = size_range->end;
3643 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3644 u16 *p_adj_grp_size)
3646 unsigned int alloc_size;
3649 /* Round up the requested group size to the next size supported
3650 * by the device and make sure the request can be satisfied.
3652 mlxsw_sp_adj_grp_size_round_up(mlxsw_sp, p_adj_grp_size);
3653 err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3654 MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3655 *p_adj_grp_size, &alloc_size);
3658 /* It is possible the allocation results in more allocated
3659 * entries than requested. Try to use as much of them as
3662 mlxsw_sp_adj_grp_size_round_down(mlxsw_sp, p_adj_grp_size, alloc_size);
3668 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group_info *nhgi)
3670 int i, g = 0, sum_norm_weight = 0;
3671 struct mlxsw_sp_nexthop *nh;
3673 for (i = 0; i < nhgi->count; i++) {
3674 nh = &nhgi->nexthops[i];
3676 if (!nh->should_offload)
3679 g = gcd(nh->nh_weight, g);
3684 for (i = 0; i < nhgi->count; i++) {
3685 nh = &nhgi->nexthops[i];
3687 if (!nh->should_offload)
3689 nh->norm_nh_weight = nh->nh_weight / g;
3690 sum_norm_weight += nh->norm_nh_weight;
3693 nhgi->sum_norm_weight = sum_norm_weight;
3697 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group_info *nhgi)
3699 int i, weight = 0, lower_bound = 0;
3700 int total = nhgi->sum_norm_weight;
3701 u16 ecmp_size = nhgi->ecmp_size;
3703 for (i = 0; i < nhgi->count; i++) {
3704 struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
3707 if (!nh->should_offload)
3709 weight += nh->norm_nh_weight;
3710 upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3711 nh->num_adj_entries = upper_bound - lower_bound;
3712 lower_bound = upper_bound;
3716 static struct mlxsw_sp_nexthop *
3717 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3718 const struct mlxsw_sp_rt6 *mlxsw_sp_rt6);
3721 mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3722 struct mlxsw_sp_nexthop_group *nh_grp)
3726 for (i = 0; i < nh_grp->nhgi->count; i++) {
3727 struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
3730 nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3732 nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3737 __mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp_nexthop_group *nh_grp,
3738 struct mlxsw_sp_fib6_entry *fib6_entry)
3740 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3742 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3743 struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3744 struct mlxsw_sp_nexthop *nh;
3746 nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3747 if (nh && nh->offloaded)
3748 fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3750 fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3755 mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3756 struct mlxsw_sp_nexthop_group *nh_grp)
3758 struct mlxsw_sp_fib6_entry *fib6_entry;
3760 /* Unfortunately, in IPv6 the route and the nexthop are described by
3761 * the same struct, so we need to iterate over all the routes using the
3762 * nexthop group and set / clear the offload indication for them.
3764 list_for_each_entry(fib6_entry, &nh_grp->fib_list,
3765 common.nexthop_group_node)
3766 __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
3770 mlxsw_sp_nexthop_bucket_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3771 const struct mlxsw_sp_nexthop *nh,
3774 struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp;
3775 bool offload = false, trap = false;
3777 if (nh->offloaded) {
3778 if (nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
3783 nexthop_bucket_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
3784 bucket_index, offload, trap);
3788 mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3789 struct mlxsw_sp_nexthop_group *nh_grp)
3793 /* Do not update the flags if the nexthop group is being destroyed
3795 * 1. The nexthop objects is being deleted, in which case the flags are
3797 * 2. The nexthop group was replaced by a newer group, in which case
3798 * the flags of the nexthop object were already updated based on the
3801 if (nh_grp->can_destroy)
3804 nexthop_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
3805 nh_grp->nhgi->adj_index_valid, false);
3807 /* Update flags of individual nexthop buckets in case of a resilient
3810 if (!nh_grp->nhgi->is_resilient)
3813 for (i = 0; i < nh_grp->nhgi->count; i++) {
3814 struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
3816 mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, i);
3821 mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3822 struct mlxsw_sp_nexthop_group *nh_grp)
3824 switch (nh_grp->type) {
3825 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3826 mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp);
3828 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3829 mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp);
3831 case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3832 mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, nh_grp);
3838 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3839 struct mlxsw_sp_nexthop_group *nh_grp)
3841 struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
3842 u16 ecmp_size, old_ecmp_size;
3843 struct mlxsw_sp_nexthop *nh;
3844 bool offload_change = false;
3846 bool old_adj_index_valid;
3851 return mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3853 for (i = 0; i < nhgi->count; i++) {
3854 nh = &nhgi->nexthops[i];
3856 if (nh->should_offload != nh->offloaded) {
3857 offload_change = true;
3858 if (nh->should_offload)
3862 if (!offload_change) {
3863 /* Nothing was added or removed, so no need to reallocate. Just
3864 * update MAC on existing adjacency indexes.
3866 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, false);
3868 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3871 /* Flags of individual nexthop buckets might need to be
3874 mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
3877 mlxsw_sp_nexthop_group_normalize(nhgi);
3878 if (!nhgi->sum_norm_weight) {
3879 /* No neigh of this group is connected so we just set
3880 * the trap and let everthing flow through kernel.
3886 ecmp_size = nhgi->sum_norm_weight;
3887 err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3889 /* No valid allocation size available. */
3892 err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3893 ecmp_size, &adj_index);
3895 /* We ran out of KVD linear space, just set the
3896 * trap and let everything flow through kernel.
3898 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3901 old_adj_index_valid = nhgi->adj_index_valid;
3902 old_adj_index = nhgi->adj_index;
3903 old_ecmp_size = nhgi->ecmp_size;
3904 nhgi->adj_index_valid = 1;
3905 nhgi->adj_index = adj_index;
3906 nhgi->ecmp_size = ecmp_size;
3907 mlxsw_sp_nexthop_group_rebalance(nhgi);
3908 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, true);
3910 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3914 mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
3916 if (!old_adj_index_valid) {
3917 /* The trap was set for fib entries, so we have to call
3918 * fib entry update to unset it and use adjacency index.
3920 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3922 dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3928 err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3929 old_adj_index, old_ecmp_size);
3930 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3931 old_ecmp_size, old_adj_index);
3933 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3940 old_adj_index_valid = nhgi->adj_index_valid;
3941 nhgi->adj_index_valid = 0;
3942 for (i = 0; i < nhgi->count; i++) {
3943 nh = &nhgi->nexthops[i];
3946 err2 = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3948 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3949 mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
3950 if (old_adj_index_valid)
3951 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3952 nhgi->ecmp_size, nhgi->adj_index);
3956 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3960 nh->action = MLXSW_SP_NEXTHOP_ACTION_FORWARD;
3961 nh->should_offload = 1;
3962 } else if (nh->nhgi->is_resilient) {
3963 nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
3964 nh->should_offload = 1;
3966 nh->should_offload = 0;
3972 mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
3973 struct mlxsw_sp_neigh_entry *neigh_entry)
3975 struct neighbour *n, *old_n = neigh_entry->key.n;
3976 struct mlxsw_sp_nexthop *nh;
3977 bool entry_connected;
3981 nh = list_first_entry(&neigh_entry->nexthop_list,
3982 struct mlxsw_sp_nexthop, neigh_list_node);
3984 n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3986 n = neigh_create(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3989 neigh_event_send(n, NULL);
3992 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
3993 neigh_entry->key.n = n;
3994 err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
3996 goto err_neigh_entry_insert;
3998 read_lock_bh(&n->lock);
3999 nud_state = n->nud_state;
4001 read_unlock_bh(&n->lock);
4002 entry_connected = nud_state & NUD_VALID && !dead;
4004 list_for_each_entry(nh, &neigh_entry->nexthop_list,
4006 neigh_release(old_n);
4008 __mlxsw_sp_nexthop_neigh_update(nh, !entry_connected);
4009 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4016 err_neigh_entry_insert:
4017 neigh_entry->key.n = old_n;
4018 mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
4024 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
4025 struct mlxsw_sp_neigh_entry *neigh_entry,
4026 bool removing, bool dead)
4028 struct mlxsw_sp_nexthop *nh;
4030 if (list_empty(&neigh_entry->nexthop_list))
4036 err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp,
4039 dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n");
4043 list_for_each_entry(nh, &neigh_entry->nexthop_list,
4045 __mlxsw_sp_nexthop_neigh_update(nh, removing);
4046 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4050 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
4051 struct mlxsw_sp_rif *rif)
4057 list_add(&nh->rif_list_node, &rif->nexthop_list);
4060 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
4065 list_del(&nh->rif_list_node);
4069 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
4070 struct mlxsw_sp_nexthop *nh)
4072 struct mlxsw_sp_neigh_entry *neigh_entry;
4073 struct neighbour *n;
4077 if (!nh->nhgi->gateway || nh->neigh_entry)
4080 /* Take a reference of neigh here ensuring that neigh would
4081 * not be destructed before the nexthop entry is finished.
4082 * The reference is taken either in neigh_lookup() or
4083 * in neigh_create() in case n is not found.
4085 n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev);
4087 n = neigh_create(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev);
4090 neigh_event_send(n, NULL);
4092 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
4094 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
4095 if (IS_ERR(neigh_entry)) {
4097 goto err_neigh_entry_create;
4101 /* If that is the first nexthop connected to that neigh, add to
4102 * nexthop_neighs_list
4104 if (list_empty(&neigh_entry->nexthop_list))
4105 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
4106 &mlxsw_sp->router->nexthop_neighs_list);
4108 nh->neigh_entry = neigh_entry;
4109 list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
4110 read_lock_bh(&n->lock);
4111 nud_state = n->nud_state;
4113 read_unlock_bh(&n->lock);
4114 __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
4118 err_neigh_entry_create:
4123 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
4124 struct mlxsw_sp_nexthop *nh)
4126 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
4127 struct neighbour *n;
4131 n = neigh_entry->key.n;
4133 __mlxsw_sp_nexthop_neigh_update(nh, true);
4134 list_del(&nh->neigh_list_node);
4135 nh->neigh_entry = NULL;
4137 /* If that is the last nexthop connected to that neigh, remove from
4138 * nexthop_neighs_list
4140 if (list_empty(&neigh_entry->nexthop_list))
4141 list_del(&neigh_entry->nexthop_neighs_list_node);
4143 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
4144 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
4149 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
4151 struct net_device *ul_dev;
4155 ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
4156 is_up = ul_dev ? (ul_dev->flags & IFF_UP) : true;
4162 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
4163 struct mlxsw_sp_nexthop *nh,
4164 struct mlxsw_sp_ipip_entry *ipip_entry)
4168 if (!nh->nhgi->gateway || nh->ipip_entry)
4171 nh->ipip_entry = ipip_entry;
4172 removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
4173 __mlxsw_sp_nexthop_neigh_update(nh, removing);
4174 mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
4177 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
4178 struct mlxsw_sp_nexthop *nh)
4180 struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
4185 __mlxsw_sp_nexthop_neigh_update(nh, true);
4186 nh->ipip_entry = NULL;
4189 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4190 const struct fib_nh *fib_nh,
4191 enum mlxsw_sp_ipip_type *p_ipipt)
4193 struct net_device *dev = fib_nh->fib_nh_dev;
4196 fib_nh->nh_parent->fib_type == RTN_UNICAST &&
4197 mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
4200 static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp,
4201 struct mlxsw_sp_nexthop *nh,
4202 const struct net_device *dev)
4204 const struct mlxsw_sp_ipip_ops *ipip_ops;
4205 struct mlxsw_sp_ipip_entry *ipip_entry;
4206 struct mlxsw_sp_rif *rif;
4209 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4211 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4212 if (ipip_ops->can_offload(mlxsw_sp, dev)) {
4213 nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4214 mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4219 nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4220 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4224 mlxsw_sp_nexthop_rif_init(nh, rif);
4225 err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4227 goto err_neigh_init;
4232 mlxsw_sp_nexthop_rif_fini(nh);
4236 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
4237 struct mlxsw_sp_nexthop *nh)
4240 case MLXSW_SP_NEXTHOP_TYPE_ETH:
4241 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
4242 mlxsw_sp_nexthop_rif_fini(nh);
4244 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4245 mlxsw_sp_nexthop_rif_fini(nh);
4246 mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
4251 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
4252 struct mlxsw_sp_nexthop_group *nh_grp,
4253 struct mlxsw_sp_nexthop *nh,
4254 struct fib_nh *fib_nh)
4256 struct net_device *dev = fib_nh->fib_nh_dev;
4257 struct in_device *in_dev;
4260 nh->nhgi = nh_grp->nhgi;
4261 nh->key.fib_nh = fib_nh;
4262 #ifdef CONFIG_IP_ROUTE_MULTIPATH
4263 nh->nh_weight = fib_nh->fib_nh_weight;
4267 memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4));
4268 nh->neigh_tbl = &arp_tbl;
4269 err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
4273 mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4274 list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4278 nh->ifindex = dev->ifindex;
4281 in_dev = __in_dev_get_rcu(dev);
4282 if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
4283 fib_nh->fib_nh_flags & RTNH_F_LINKDOWN) {
4289 err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
4291 goto err_nexthop_neigh_init;
4295 err_nexthop_neigh_init:
4296 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
4300 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
4301 struct mlxsw_sp_nexthop *nh)
4303 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4304 list_del(&nh->router_list_node);
4305 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4306 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
4309 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
4310 unsigned long event, struct fib_nh *fib_nh)
4312 struct mlxsw_sp_nexthop_key key;
4313 struct mlxsw_sp_nexthop *nh;
4315 key.fib_nh = fib_nh;
4316 nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
4321 case FIB_EVENT_NH_ADD:
4322 mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, fib_nh->fib_nh_dev);
4324 case FIB_EVENT_NH_DEL:
4325 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4329 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4332 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
4333 struct mlxsw_sp_rif *rif)
4335 struct mlxsw_sp_nexthop *nh;
4338 list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
4340 case MLXSW_SP_NEXTHOP_TYPE_ETH:
4343 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4344 removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
4351 __mlxsw_sp_nexthop_neigh_update(nh, removing);
4352 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4356 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
4357 struct mlxsw_sp_rif *old_rif,
4358 struct mlxsw_sp_rif *new_rif)
4360 struct mlxsw_sp_nexthop *nh;
4362 list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
4363 list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
4365 mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
4368 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
4369 struct mlxsw_sp_rif *rif)
4371 struct mlxsw_sp_nexthop *nh, *tmp;
4373 list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
4374 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4375 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4380 mlxsw_sp_nh_grp_activity_get(struct mlxsw_sp *mlxsw_sp,
4381 const struct mlxsw_sp_nexthop_group *nh_grp,
4382 unsigned long *activity)
4387 ratrad_pl = kmalloc(MLXSW_REG_RATRAD_LEN, GFP_KERNEL);
4391 mlxsw_reg_ratrad_pack(ratrad_pl, nh_grp->nhgi->adj_index,
4392 nh_grp->nhgi->count);
4393 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratrad), ratrad_pl);
4397 for (i = 0; i < nh_grp->nhgi->count; i++) {
4398 if (!mlxsw_reg_ratrad_activity_vector_get(ratrad_pl, i))
4400 bitmap_set(activity, i, 1);
4407 #define MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL 1000 /* ms */
4410 mlxsw_sp_nh_grp_activity_update(struct mlxsw_sp *mlxsw_sp,
4411 const struct mlxsw_sp_nexthop_group *nh_grp)
4413 unsigned long *activity;
4415 activity = bitmap_zalloc(nh_grp->nhgi->count, GFP_KERNEL);
4419 mlxsw_sp_nh_grp_activity_get(mlxsw_sp, nh_grp, activity);
4420 nexthop_res_grp_activity_update(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
4421 nh_grp->nhgi->count, activity);
4423 bitmap_free(activity);
4427 mlxsw_sp_nh_grp_activity_work_schedule(struct mlxsw_sp *mlxsw_sp)
4429 unsigned int interval = MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL;
4431 mlxsw_core_schedule_dw(&mlxsw_sp->router->nh_grp_activity_dw,
4432 msecs_to_jiffies(interval));
4435 static void mlxsw_sp_nh_grp_activity_work(struct work_struct *work)
4437 struct mlxsw_sp_nexthop_group_info *nhgi;
4438 struct mlxsw_sp_router *router;
4439 bool reschedule = false;
4441 router = container_of(work, struct mlxsw_sp_router,
4442 nh_grp_activity_dw.work);
4444 mutex_lock(&router->lock);
4446 list_for_each_entry(nhgi, &router->nh_res_grp_list, list) {
4447 mlxsw_sp_nh_grp_activity_update(router->mlxsw_sp, nhgi->nh_grp);
4451 mutex_unlock(&router->lock);
4455 mlxsw_sp_nh_grp_activity_work_schedule(router->mlxsw_sp);
4459 mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp,
4460 const struct nh_notifier_single_info *nh,
4461 struct netlink_ext_ack *extack)
4466 NL_SET_ERR_MSG_MOD(extack, "FDB nexthops are not supported");
4467 else if (nh->has_encap)
4468 NL_SET_ERR_MSG_MOD(extack, "Encapsulating nexthops are not supported");
4476 mlxsw_sp_nexthop_obj_group_entry_validate(struct mlxsw_sp *mlxsw_sp,
4477 const struct nh_notifier_single_info *nh,
4478 struct netlink_ext_ack *extack)
4482 err = mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, nh, extack);
4486 /* Device only nexthops with an IPIP device are programmed as
4487 * encapsulating adjacency entries.
4489 if (!nh->gw_family && !nh->is_reject &&
4490 !mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) {
4491 NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway");
4499 mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp,
4500 const struct nh_notifier_grp_info *nh_grp,
4501 struct netlink_ext_ack *extack)
4505 if (nh_grp->is_fdb) {
4506 NL_SET_ERR_MSG_MOD(extack, "FDB nexthop groups are not supported");
4510 for (i = 0; i < nh_grp->num_nh; i++) {
4511 const struct nh_notifier_single_info *nh;
4514 nh = &nh_grp->nh_entries[i].nh;
4515 err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4525 mlxsw_sp_nexthop_obj_res_group_size_validate(struct mlxsw_sp *mlxsw_sp,
4526 const struct nh_notifier_res_table_info *nh_res_table,
4527 struct netlink_ext_ack *extack)
4529 unsigned int alloc_size;
4530 bool valid_size = false;
4533 if (nh_res_table->num_nh_buckets < 32) {
4534 NL_SET_ERR_MSG_MOD(extack, "Minimum number of buckets is 32");
4538 for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) {
4539 const struct mlxsw_sp_adj_grp_size_range *size_range;
4541 size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
4543 if (nh_res_table->num_nh_buckets >= size_range->start &&
4544 nh_res_table->num_nh_buckets <= size_range->end) {
4551 NL_SET_ERR_MSG_MOD(extack, "Invalid number of buckets");
4555 err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
4556 MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
4557 nh_res_table->num_nh_buckets,
4559 if (err || nh_res_table->num_nh_buckets != alloc_size) {
4560 NL_SET_ERR_MSG_MOD(extack, "Number of buckets does not fit allocation size of any KVDL partition");
4568 mlxsw_sp_nexthop_obj_res_group_validate(struct mlxsw_sp *mlxsw_sp,
4569 const struct nh_notifier_res_table_info *nh_res_table,
4570 struct netlink_ext_ack *extack)
4575 err = mlxsw_sp_nexthop_obj_res_group_size_validate(mlxsw_sp,
4581 for (i = 0; i < nh_res_table->num_nh_buckets; i++) {
4582 const struct nh_notifier_single_info *nh;
4585 nh = &nh_res_table->nhs[i];
4586 err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4595 static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp,
4596 unsigned long event,
4597 struct nh_notifier_info *info)
4599 struct nh_notifier_single_info *nh;
4601 if (event != NEXTHOP_EVENT_REPLACE &&
4602 event != NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE &&
4603 event != NEXTHOP_EVENT_BUCKET_REPLACE)
4606 switch (info->type) {
4607 case NH_NOTIFIER_INFO_TYPE_SINGLE:
4608 return mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, info->nh,
4610 case NH_NOTIFIER_INFO_TYPE_GRP:
4611 return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp,
4614 case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4615 return mlxsw_sp_nexthop_obj_res_group_validate(mlxsw_sp,
4618 case NH_NOTIFIER_INFO_TYPE_RES_BUCKET:
4619 nh = &info->nh_res_bucket->new_nh;
4620 return mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4623 NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type");
4628 static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp,
4629 const struct nh_notifier_info *info)
4631 const struct net_device *dev;
4633 switch (info->type) {
4634 case NH_NOTIFIER_INFO_TYPE_SINGLE:
4635 dev = info->nh->dev;
4636 return info->nh->gw_family || info->nh->is_reject ||
4637 mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
4638 case NH_NOTIFIER_INFO_TYPE_GRP:
4639 case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4640 /* Already validated earlier. */
4647 static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp,
4648 struct mlxsw_sp_nexthop *nh)
4650 u16 lb_rif_index = mlxsw_sp->router->lb_rif_index;
4652 nh->action = MLXSW_SP_NEXTHOP_ACTION_DISCARD;
4653 nh->should_offload = 1;
4654 /* While nexthops that discard packets do not forward packets
4655 * via an egress RIF, they still need to be programmed using a
4656 * valid RIF, so use the loopback RIF created during init.
4658 nh->rif = mlxsw_sp->router->rifs[lb_rif_index];
4661 static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp,
4662 struct mlxsw_sp_nexthop *nh)
4665 nh->should_offload = 0;
4669 mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
4670 struct mlxsw_sp_nexthop_group *nh_grp,
4671 struct mlxsw_sp_nexthop *nh,
4672 struct nh_notifier_single_info *nh_obj, int weight)
4674 struct net_device *dev = nh_obj->dev;
4677 nh->nhgi = nh_grp->nhgi;
4678 nh->nh_weight = weight;
4680 switch (nh_obj->gw_family) {
4682 memcpy(&nh->gw_addr, &nh_obj->ipv4, sizeof(nh_obj->ipv4));
4683 nh->neigh_tbl = &arp_tbl;
4686 memcpy(&nh->gw_addr, &nh_obj->ipv6, sizeof(nh_obj->ipv6));
4687 #if IS_ENABLED(CONFIG_IPV6)
4688 nh->neigh_tbl = &nd_tbl;
4693 mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4694 list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4695 nh->ifindex = dev->ifindex;
4697 err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
4701 if (nh_obj->is_reject)
4702 mlxsw_sp_nexthop_obj_blackhole_init(mlxsw_sp, nh);
4704 /* In a resilient nexthop group, all the nexthops must be written to
4705 * the adjacency table. Even if they do not have a valid neighbour or
4708 if (nh_grp->nhgi->is_resilient && !nh->should_offload) {
4709 nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
4710 nh->should_offload = 1;
4716 list_del(&nh->router_list_node);
4717 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4721 static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp,
4722 struct mlxsw_sp_nexthop *nh)
4724 if (nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD)
4725 mlxsw_sp_nexthop_obj_blackhole_fini(mlxsw_sp, nh);
4726 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4727 list_del(&nh->router_list_node);
4728 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4729 nh->should_offload = 0;
4733 mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
4734 struct mlxsw_sp_nexthop_group *nh_grp,
4735 struct nh_notifier_info *info)
4737 struct mlxsw_sp_nexthop_group_info *nhgi;
4738 struct mlxsw_sp_nexthop *nh;
4739 bool is_resilient = false;
4743 switch (info->type) {
4744 case NH_NOTIFIER_INFO_TYPE_SINGLE:
4747 case NH_NOTIFIER_INFO_TYPE_GRP:
4748 nhs = info->nh_grp->num_nh;
4750 case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4751 nhs = info->nh_res_table->num_nh_buckets;
4752 is_resilient = true;
4758 nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL);
4761 nh_grp->nhgi = nhgi;
4762 nhgi->nh_grp = nh_grp;
4763 nhgi->gateway = mlxsw_sp_nexthop_obj_is_gateway(mlxsw_sp, info);
4764 nhgi->is_resilient = is_resilient;
4766 for (i = 0; i < nhgi->count; i++) {
4767 struct nh_notifier_single_info *nh_obj;
4770 nh = &nhgi->nexthops[i];
4771 switch (info->type) {
4772 case NH_NOTIFIER_INFO_TYPE_SINGLE:
4776 case NH_NOTIFIER_INFO_TYPE_GRP:
4777 nh_obj = &info->nh_grp->nh_entries[i].nh;
4778 weight = info->nh_grp->nh_entries[i].weight;
4780 case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4781 nh_obj = &info->nh_res_table->nhs[i];
4786 goto err_nexthop_obj_init;
4788 err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj,
4791 goto err_nexthop_obj_init;
4793 err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4795 NL_SET_ERR_MSG_MOD(info->extack, "Failed to write adjacency entries to the device");
4796 goto err_group_refresh;
4799 /* Add resilient nexthop groups to a list so that the activity of their
4800 * nexthop buckets will be periodically queried and cleared.
4802 if (nhgi->is_resilient) {
4803 if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
4804 mlxsw_sp_nh_grp_activity_work_schedule(mlxsw_sp);
4805 list_add(&nhgi->list, &mlxsw_sp->router->nh_res_grp_list);
4812 err_nexthop_obj_init:
4813 for (i--; i >= 0; i--) {
4814 nh = &nhgi->nexthops[i];
4815 mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
4822 mlxsw_sp_nexthop_obj_group_info_fini(struct mlxsw_sp *mlxsw_sp,
4823 struct mlxsw_sp_nexthop_group *nh_grp)
4825 struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
4826 struct mlxsw_sp_router *router = mlxsw_sp->router;
4829 if (nhgi->is_resilient) {
4830 list_del(&nhgi->list);
4831 if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
4832 cancel_delayed_work(&router->nh_grp_activity_dw);
4835 for (i = nhgi->count - 1; i >= 0; i--) {
4836 struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
4838 mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
4840 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4841 WARN_ON_ONCE(nhgi->adj_index_valid);
4845 static struct mlxsw_sp_nexthop_group *
4846 mlxsw_sp_nexthop_obj_group_create(struct mlxsw_sp *mlxsw_sp,
4847 struct nh_notifier_info *info)
4849 struct mlxsw_sp_nexthop_group *nh_grp;
4852 nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
4854 return ERR_PTR(-ENOMEM);
4855 INIT_LIST_HEAD(&nh_grp->vr_list);
4856 err = rhashtable_init(&nh_grp->vr_ht,
4857 &mlxsw_sp_nexthop_group_vr_ht_params);
4859 goto err_nexthop_group_vr_ht_init;
4860 INIT_LIST_HEAD(&nh_grp->fib_list);
4861 nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
4862 nh_grp->obj.id = info->id;
4864 err = mlxsw_sp_nexthop_obj_group_info_init(mlxsw_sp, nh_grp, info);
4866 goto err_nexthop_group_info_init;
4868 nh_grp->can_destroy = false;
4872 err_nexthop_group_info_init:
4873 rhashtable_destroy(&nh_grp->vr_ht);
4874 err_nexthop_group_vr_ht_init:
4876 return ERR_PTR(err);
4880 mlxsw_sp_nexthop_obj_group_destroy(struct mlxsw_sp *mlxsw_sp,
4881 struct mlxsw_sp_nexthop_group *nh_grp)
4883 if (!nh_grp->can_destroy)
4885 mlxsw_sp_nexthop_obj_group_info_fini(mlxsw_sp, nh_grp);
4886 WARN_ON_ONCE(!list_empty(&nh_grp->fib_list));
4887 WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
4888 rhashtable_destroy(&nh_grp->vr_ht);
4892 static struct mlxsw_sp_nexthop_group *
4893 mlxsw_sp_nexthop_obj_group_lookup(struct mlxsw_sp *mlxsw_sp, u32 id)
4895 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
4897 cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
4899 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
4901 mlxsw_sp_nexthop_group_ht_params);
4904 static int mlxsw_sp_nexthop_obj_group_add(struct mlxsw_sp *mlxsw_sp,
4905 struct mlxsw_sp_nexthop_group *nh_grp)
4907 return mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4911 mlxsw_sp_nexthop_obj_group_replace(struct mlxsw_sp *mlxsw_sp,
4912 struct mlxsw_sp_nexthop_group *nh_grp,
4913 struct mlxsw_sp_nexthop_group *old_nh_grp,
4914 struct netlink_ext_ack *extack)
4916 struct mlxsw_sp_nexthop_group_info *old_nhgi = old_nh_grp->nhgi;
4917 struct mlxsw_sp_nexthop_group_info *new_nhgi = nh_grp->nhgi;
4920 old_nh_grp->nhgi = new_nhgi;
4921 new_nhgi->nh_grp = old_nh_grp;
4922 nh_grp->nhgi = old_nhgi;
4923 old_nhgi->nh_grp = nh_grp;
4925 if (old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
4926 /* Both the old adjacency index and the new one are valid.
4927 * Routes are currently using the old one. Tell the device to
4928 * replace the old adjacency index with the new one.
4930 err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, old_nh_grp,
4931 old_nhgi->adj_index,
4932 old_nhgi->ecmp_size);
4934 NL_SET_ERR_MSG_MOD(extack, "Failed to replace old adjacency index with new one");
4937 } else if (old_nhgi->adj_index_valid && !new_nhgi->adj_index_valid) {
4938 /* The old adjacency index is valid, while the new one is not.
4939 * Iterate over all the routes using the group and change them
4940 * to trap packets to the CPU.
4942 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
4944 NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to trap packets");
4947 } else if (!old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
4948 /* The old adjacency index is invalid, while the new one is.
4949 * Iterate over all the routes using the group and change them
4950 * to forward packets using the new valid index.
4952 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
4954 NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to forward packets");
4959 /* Make sure the flags are set / cleared based on the new nexthop group
4962 mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, old_nh_grp);
4964 /* At this point 'nh_grp' is just a shell that is not used by anyone
4965 * and its nexthop group info is the old info that was just replaced
4966 * with the new one. Remove it.
4968 nh_grp->can_destroy = true;
4969 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
4974 old_nhgi->nh_grp = old_nh_grp;
4975 nh_grp->nhgi = new_nhgi;
4976 new_nhgi->nh_grp = nh_grp;
4977 old_nh_grp->nhgi = old_nhgi;
4981 static int mlxsw_sp_nexthop_obj_new(struct mlxsw_sp *mlxsw_sp,
4982 struct nh_notifier_info *info)
4984 struct mlxsw_sp_nexthop_group *nh_grp, *old_nh_grp;
4985 struct netlink_ext_ack *extack = info->extack;
4988 nh_grp = mlxsw_sp_nexthop_obj_group_create(mlxsw_sp, info);
4990 return PTR_ERR(nh_grp);
4992 old_nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
4994 err = mlxsw_sp_nexthop_obj_group_add(mlxsw_sp, nh_grp);
4996 err = mlxsw_sp_nexthop_obj_group_replace(mlxsw_sp, nh_grp,
4997 old_nh_grp, extack);
5000 nh_grp->can_destroy = true;
5001 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5007 static void mlxsw_sp_nexthop_obj_del(struct mlxsw_sp *mlxsw_sp,
5008 struct nh_notifier_info *info)
5010 struct mlxsw_sp_nexthop_group *nh_grp;
5012 nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5016 nh_grp->can_destroy = true;
5017 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5019 /* If the group still has routes using it, then defer the delete
5020 * operation until the last route using it is deleted.
5022 if (!list_empty(&nh_grp->fib_list))
5024 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5027 static int mlxsw_sp_nexthop_obj_bucket_query(struct mlxsw_sp *mlxsw_sp,
5028 u32 adj_index, char *ratr_pl)
5030 MLXSW_REG_ZERO(ratr, ratr_pl);
5031 mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
5032 mlxsw_reg_ratr_adjacency_index_low_set(ratr_pl, adj_index);
5033 mlxsw_reg_ratr_adjacency_index_high_set(ratr_pl, adj_index >> 16);
5035 return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
5038 static int mlxsw_sp_nexthop_obj_bucket_compare(char *ratr_pl, char *ratr_pl_new)
5040 /* Clear the opcode and activity on both the old and new payload as
5041 * they are irrelevant for the comparison.
5043 mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
5044 mlxsw_reg_ratr_a_set(ratr_pl, 0);
5045 mlxsw_reg_ratr_op_set(ratr_pl_new, MLXSW_REG_RATR_OP_QUERY_READ);
5046 mlxsw_reg_ratr_a_set(ratr_pl_new, 0);
5048 /* If the contents of the adjacency entry are consistent with the
5049 * replacement request, then replacement was successful.
5051 if (!memcmp(ratr_pl, ratr_pl_new, MLXSW_REG_RATR_LEN))
5058 mlxsw_sp_nexthop_obj_bucket_adj_update(struct mlxsw_sp *mlxsw_sp,
5059 struct mlxsw_sp_nexthop *nh,
5060 struct nh_notifier_info *info)
5062 u16 bucket_index = info->nh_res_bucket->bucket_index;
5063 struct netlink_ext_ack *extack = info->extack;
5064 bool force = info->nh_res_bucket->force;
5065 char ratr_pl_new[MLXSW_REG_RATR_LEN];
5066 char ratr_pl[MLXSW_REG_RATR_LEN];
5070 /* No point in trying an atomic replacement if the idle timer interval
5071 * is smaller than the interval in which we query and clear activity.
5073 if (!force && info->nh_res_bucket->idle_timer_ms <
5074 MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL)
5077 adj_index = nh->nhgi->adj_index + bucket_index;
5078 err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, force, ratr_pl);
5080 NL_SET_ERR_MSG_MOD(extack, "Failed to overwrite nexthop bucket");
5085 err = mlxsw_sp_nexthop_obj_bucket_query(mlxsw_sp, adj_index,
5088 NL_SET_ERR_MSG_MOD(extack, "Failed to query nexthop bucket state after replacement. State might be inconsistent");
5092 err = mlxsw_sp_nexthop_obj_bucket_compare(ratr_pl, ratr_pl_new);
5094 NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket was not replaced because it was active during replacement");
5101 mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, bucket_index);
5106 static int mlxsw_sp_nexthop_obj_bucket_replace(struct mlxsw_sp *mlxsw_sp,
5107 struct nh_notifier_info *info)
5109 u16 bucket_index = info->nh_res_bucket->bucket_index;
5110 struct netlink_ext_ack *extack = info->extack;
5111 struct mlxsw_sp_nexthop_group_info *nhgi;
5112 struct nh_notifier_single_info *nh_obj;
5113 struct mlxsw_sp_nexthop_group *nh_grp;
5114 struct mlxsw_sp_nexthop *nh;
5117 nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5119 NL_SET_ERR_MSG_MOD(extack, "Nexthop group was not found");
5123 nhgi = nh_grp->nhgi;
5125 if (bucket_index >= nhgi->count) {
5126 NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket index out of range");
5130 nh = &nhgi->nexthops[bucket_index];
5131 mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5133 nh_obj = &info->nh_res_bucket->new_nh;
5134 err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
5136 NL_SET_ERR_MSG_MOD(extack, "Failed to initialize nexthop object for nexthop bucket replacement");
5137 goto err_nexthop_obj_init;
5140 err = mlxsw_sp_nexthop_obj_bucket_adj_update(mlxsw_sp, nh, info);
5142 goto err_nexthop_obj_bucket_adj_update;
5146 err_nexthop_obj_bucket_adj_update:
5147 mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5148 err_nexthop_obj_init:
5149 nh_obj = &info->nh_res_bucket->old_nh;
5150 mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
5151 /* The old adjacency entry was not overwritten */
5157 static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
5158 unsigned long event, void *ptr)
5160 struct nh_notifier_info *info = ptr;
5161 struct mlxsw_sp_router *router;
5164 router = container_of(nb, struct mlxsw_sp_router, nexthop_nb);
5165 err = mlxsw_sp_nexthop_obj_validate(router->mlxsw_sp, event, info);
5169 mutex_lock(&router->lock);
5172 case NEXTHOP_EVENT_REPLACE:
5173 err = mlxsw_sp_nexthop_obj_new(router->mlxsw_sp, info);
5175 case NEXTHOP_EVENT_DEL:
5176 mlxsw_sp_nexthop_obj_del(router->mlxsw_sp, info);
5178 case NEXTHOP_EVENT_BUCKET_REPLACE:
5179 err = mlxsw_sp_nexthop_obj_bucket_replace(router->mlxsw_sp,
5186 mutex_unlock(&router->lock);
5189 return notifier_from_errno(err);
5192 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
5193 struct fib_info *fi)
5195 const struct fib_nh *nh = fib_info_nh(fi, 0);
5197 return nh->fib_nh_scope == RT_SCOPE_LINK ||
5198 mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
5202 mlxsw_sp_nexthop4_group_info_init(struct mlxsw_sp *mlxsw_sp,
5203 struct mlxsw_sp_nexthop_group *nh_grp)
5205 unsigned int nhs = fib_info_num_path(nh_grp->ipv4.fi);
5206 struct mlxsw_sp_nexthop_group_info *nhgi;
5207 struct mlxsw_sp_nexthop *nh;
5210 nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL);
5213 nh_grp->nhgi = nhgi;
5214 nhgi->nh_grp = nh_grp;
5215 nhgi->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, nh_grp->ipv4.fi);
5217 for (i = 0; i < nhgi->count; i++) {
5218 struct fib_nh *fib_nh;
5220 nh = &nhgi->nexthops[i];
5221 fib_nh = fib_info_nh(nh_grp->ipv4.fi, i);
5222 err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
5224 goto err_nexthop4_init;
5226 err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5228 goto err_group_refresh;
5235 for (i--; i >= 0; i--) {
5236 nh = &nhgi->nexthops[i];
5237 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
5244 mlxsw_sp_nexthop4_group_info_fini(struct mlxsw_sp *mlxsw_sp,
5245 struct mlxsw_sp_nexthop_group *nh_grp)
5247 struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
5250 for (i = nhgi->count - 1; i >= 0; i--) {
5251 struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
5253 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
5255 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5256 WARN_ON_ONCE(nhgi->adj_index_valid);
5260 static struct mlxsw_sp_nexthop_group *
5261 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
5263 struct mlxsw_sp_nexthop_group *nh_grp;
5266 nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
5268 return ERR_PTR(-ENOMEM);
5269 INIT_LIST_HEAD(&nh_grp->vr_list);
5270 err = rhashtable_init(&nh_grp->vr_ht,
5271 &mlxsw_sp_nexthop_group_vr_ht_params);
5273 goto err_nexthop_group_vr_ht_init;
5274 INIT_LIST_HEAD(&nh_grp->fib_list);
5275 nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4;
5276 nh_grp->ipv4.fi = fi;
5279 err = mlxsw_sp_nexthop4_group_info_init(mlxsw_sp, nh_grp);
5281 goto err_nexthop_group_info_init;
5283 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5285 goto err_nexthop_group_insert;
5287 nh_grp->can_destroy = true;
5291 err_nexthop_group_insert:
5292 mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp);
5293 err_nexthop_group_info_init:
5295 rhashtable_destroy(&nh_grp->vr_ht);
5296 err_nexthop_group_vr_ht_init:
5298 return ERR_PTR(err);
5302 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
5303 struct mlxsw_sp_nexthop_group *nh_grp)
5305 if (!nh_grp->can_destroy)
5307 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5308 mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp);
5309 fib_info_put(nh_grp->ipv4.fi);
5310 WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
5311 rhashtable_destroy(&nh_grp->vr_ht);
5315 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
5316 struct mlxsw_sp_fib_entry *fib_entry,
5317 struct fib_info *fi)
5319 struct mlxsw_sp_nexthop_group *nh_grp;
5322 nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp,
5324 if (WARN_ON_ONCE(!nh_grp))
5329 nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
5331 nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
5333 return PTR_ERR(nh_grp);
5336 list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
5337 fib_entry->nh_group = nh_grp;
5341 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
5342 struct mlxsw_sp_fib_entry *fib_entry)
5344 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5346 list_del(&fib_entry->nexthop_group_node);
5347 if (!list_empty(&nh_grp->fib_list))
5350 if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) {
5351 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5355 mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
5359 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
5361 struct mlxsw_sp_fib4_entry *fib4_entry;
5363 fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5365 return !fib4_entry->tos;
5369 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
5371 struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
5373 switch (fib_entry->fib_node->fib->proto) {
5374 case MLXSW_SP_L3_PROTO_IPV4:
5375 if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
5378 case MLXSW_SP_L3_PROTO_IPV6:
5382 switch (fib_entry->type) {
5383 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
5384 return !!nh_group->nhgi->adj_index_valid;
5385 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
5386 return !!nh_group->nhgi->nh_rif;
5387 case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
5388 case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
5389 case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
5396 static struct mlxsw_sp_nexthop *
5397 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
5398 const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
5402 for (i = 0; i < nh_grp->nhgi->count; i++) {
5403 struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
5404 struct fib6_info *rt = mlxsw_sp_rt6->rt;
5406 if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev &&
5407 ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
5408 &rt->fib6_nh->fib_nh_gw6))
5416 mlxsw_sp_fib4_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5417 struct fib_entry_notifier_info *fen_info)
5419 u32 *p_dst = (u32 *) &fen_info->dst;
5420 struct fib_rt_info fri;
5422 fri.fi = fen_info->fi;
5423 fri.tb_id = fen_info->tb_id;
5424 fri.dst = cpu_to_be32(*p_dst);
5425 fri.dst_len = fen_info->dst_len;
5426 fri.tos = fen_info->tos;
5427 fri.type = fen_info->type;
5428 fri.offload = false;
5430 fri.offload_failed = true;
5431 fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
5435 mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5436 struct mlxsw_sp_fib_entry *fib_entry)
5438 u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
5439 int dst_len = fib_entry->fib_node->key.prefix_len;
5440 struct mlxsw_sp_fib4_entry *fib4_entry;
5441 struct fib_rt_info fri;
5442 bool should_offload;
5444 should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
5445 fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5447 fri.fi = fib4_entry->fi;
5448 fri.tb_id = fib4_entry->tb_id;
5449 fri.dst = cpu_to_be32(*p_dst);
5450 fri.dst_len = dst_len;
5451 fri.tos = fib4_entry->tos;
5452 fri.type = fib4_entry->type;
5453 fri.offload = should_offload;
5454 fri.trap = !should_offload;
5455 fri.offload_failed = false;
5456 fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
5460 mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5461 struct mlxsw_sp_fib_entry *fib_entry)
5463 u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
5464 int dst_len = fib_entry->fib_node->key.prefix_len;
5465 struct mlxsw_sp_fib4_entry *fib4_entry;
5466 struct fib_rt_info fri;
5468 fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5470 fri.fi = fib4_entry->fi;
5471 fri.tb_id = fib4_entry->tb_id;
5472 fri.dst = cpu_to_be32(*p_dst);
5473 fri.dst_len = dst_len;
5474 fri.tos = fib4_entry->tos;
5475 fri.type = fib4_entry->type;
5476 fri.offload = false;
5478 fri.offload_failed = false;
5479 fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
5482 #if IS_ENABLED(CONFIG_IPV6)
5484 mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5485 struct fib6_info **rt_arr,
5490 /* In IPv6 a multipath route is represented using multiple routes, so
5491 * we need to set the flags on all of them.
5493 for (i = 0; i < nrt6; i++)
5494 fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), rt_arr[i],
5495 false, false, true);
5499 mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5500 struct fib6_info **rt_arr,
5506 #if IS_ENABLED(CONFIG_IPV6)
5508 mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5509 struct mlxsw_sp_fib_entry *fib_entry)
5511 struct mlxsw_sp_fib6_entry *fib6_entry;
5512 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5513 bool should_offload;
5515 should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
5517 /* In IPv6 a multipath route is represented using multiple routes, so
5518 * we need to set the flags on all of them.
5520 fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
5522 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
5523 fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
5524 should_offload, !should_offload, false);
5528 mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5529 struct mlxsw_sp_fib_entry *fib_entry)
5534 #if IS_ENABLED(CONFIG_IPV6)
5536 mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5537 struct mlxsw_sp_fib_entry *fib_entry)
5539 struct mlxsw_sp_fib6_entry *fib6_entry;
5540 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5542 fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
5544 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
5545 fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
5546 false, false, false);
5550 mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5551 struct mlxsw_sp_fib_entry *fib_entry)
5557 mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5558 struct mlxsw_sp_fib_entry *fib_entry)
5560 switch (fib_entry->fib_node->fib->proto) {
5561 case MLXSW_SP_L3_PROTO_IPV4:
5562 mlxsw_sp_fib4_entry_hw_flags_set(mlxsw_sp, fib_entry);
5564 case MLXSW_SP_L3_PROTO_IPV6:
5565 mlxsw_sp_fib6_entry_hw_flags_set(mlxsw_sp, fib_entry);
5571 mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5572 struct mlxsw_sp_fib_entry *fib_entry)
5574 switch (fib_entry->fib_node->fib->proto) {
5575 case MLXSW_SP_L3_PROTO_IPV4:
5576 mlxsw_sp_fib4_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5578 case MLXSW_SP_L3_PROTO_IPV6:
5579 mlxsw_sp_fib6_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5585 mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp,
5586 struct mlxsw_sp_fib_entry *fib_entry,
5587 enum mlxsw_sp_fib_entry_op op)
5590 case MLXSW_SP_FIB_ENTRY_OP_WRITE:
5591 case MLXSW_SP_FIB_ENTRY_OP_UPDATE:
5592 mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry);
5594 case MLXSW_SP_FIB_ENTRY_OP_DELETE:
5595 mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5602 struct mlxsw_sp_fib_entry_op_ctx_basic {
5603 char ralue_pl[MLXSW_REG_RALUE_LEN];
5607 mlxsw_sp_router_ll_basic_fib_entry_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5608 enum mlxsw_sp_l3proto proto,
5609 enum mlxsw_sp_fib_entry_op op,
5610 u16 virtual_router, u8 prefix_len,
5611 unsigned char *addr,
5612 struct mlxsw_sp_fib_entry_priv *priv)
5614 struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
5615 enum mlxsw_reg_ralxx_protocol ralxx_proto;
5616 char *ralue_pl = op_ctx_basic->ralue_pl;
5617 enum mlxsw_reg_ralue_op ralue_op;
5619 ralxx_proto = (enum mlxsw_reg_ralxx_protocol) proto;
5622 case MLXSW_SP_FIB_ENTRY_OP_WRITE:
5623 case MLXSW_SP_FIB_ENTRY_OP_UPDATE:
5624 ralue_op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
5626 case MLXSW_SP_FIB_ENTRY_OP_DELETE:
5627 ralue_op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
5635 case MLXSW_SP_L3_PROTO_IPV4:
5636 mlxsw_reg_ralue_pack4(ralue_pl, ralxx_proto, ralue_op,
5637 virtual_router, prefix_len, (u32 *) addr);
5639 case MLXSW_SP_L3_PROTO_IPV6:
5640 mlxsw_reg_ralue_pack6(ralue_pl, ralxx_proto, ralue_op,
5641 virtual_router, prefix_len, addr);
5647 mlxsw_sp_router_ll_basic_fib_entry_act_remote_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5648 enum mlxsw_reg_ralue_trap_action trap_action,
5649 u16 trap_id, u32 adjacency_index, u16 ecmp_size)
5651 struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
5653 mlxsw_reg_ralue_act_remote_pack(op_ctx_basic->ralue_pl, trap_action,
5654 trap_id, adjacency_index, ecmp_size);
5658 mlxsw_sp_router_ll_basic_fib_entry_act_local_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5659 enum mlxsw_reg_ralue_trap_action trap_action,
5660 u16 trap_id, u16 local_erif)
5662 struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
5664 mlxsw_reg_ralue_act_local_pack(op_ctx_basic->ralue_pl, trap_action,
5665 trap_id, local_erif);
5669 mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx)
5671 struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
5673 mlxsw_reg_ralue_act_ip2me_pack(op_ctx_basic->ralue_pl);
5677 mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_tun_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5680 struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
5682 mlxsw_reg_ralue_act_ip2me_tun_pack(op_ctx_basic->ralue_pl, tunnel_ptr);
5686 mlxsw_sp_router_ll_basic_fib_entry_commit(struct mlxsw_sp *mlxsw_sp,
5687 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5688 bool *postponed_for_bulk)
5690 struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv;
5692 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5693 op_ctx_basic->ralue_pl);
5697 mlxsw_sp_router_ll_basic_fib_entry_is_committed(struct mlxsw_sp_fib_entry_priv *priv)
5702 static void mlxsw_sp_fib_entry_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5703 struct mlxsw_sp_fib_entry *fib_entry,
5704 enum mlxsw_sp_fib_entry_op op)
5706 struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
5708 mlxsw_sp_fib_entry_op_ctx_priv_hold(op_ctx, fib_entry->priv);
5709 fib->ll_ops->fib_entry_pack(op_ctx, fib->proto, op, fib->vr->id,
5710 fib_entry->fib_node->key.prefix_len,
5711 fib_entry->fib_node->key.addr,
5715 static int mlxsw_sp_fib_entry_commit(struct mlxsw_sp *mlxsw_sp,
5716 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5717 const struct mlxsw_sp_router_ll_ops *ll_ops)
5719 bool postponed_for_bulk = false;
5722 err = ll_ops->fib_entry_commit(mlxsw_sp, op_ctx, &postponed_for_bulk);
5723 if (!postponed_for_bulk)
5724 mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
5728 static int mlxsw_sp_adj_discard_write(struct mlxsw_sp *mlxsw_sp)
5730 enum mlxsw_reg_ratr_trap_action trap_action;
5731 char ratr_pl[MLXSW_REG_RATR_LEN];
5734 if (mlxsw_sp->router->adj_discard_index_valid)
5737 err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
5738 &mlxsw_sp->router->adj_discard_index);
5742 trap_action = MLXSW_REG_RATR_TRAP_ACTION_TRAP;
5743 mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true,
5744 MLXSW_REG_RATR_TYPE_ETHERNET,
5745 mlxsw_sp->router->adj_discard_index,
5746 mlxsw_sp->router->lb_rif_index);
5747 mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action);
5748 mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
5749 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
5751 goto err_ratr_write;
5753 mlxsw_sp->router->adj_discard_index_valid = true;
5758 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
5759 mlxsw_sp->router->adj_discard_index);
5763 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
5764 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5765 struct mlxsw_sp_fib_entry *fib_entry,
5766 enum mlxsw_sp_fib_entry_op op)
5768 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5769 struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
5770 struct mlxsw_sp_nexthop_group_info *nhgi = nh_group->nhgi;
5771 enum mlxsw_reg_ralue_trap_action trap_action;
5773 u32 adjacency_index = 0;
5777 /* In case the nexthop group adjacency index is valid, use it
5778 * with provided ECMP size. Otherwise, setup trap and pass
5779 * traffic to kernel.
5781 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
5782 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5783 adjacency_index = nhgi->adj_index;
5784 ecmp_size = nhgi->ecmp_size;
5785 } else if (!nhgi->adj_index_valid && nhgi->count && nhgi->nh_rif) {
5786 err = mlxsw_sp_adj_discard_write(mlxsw_sp);
5789 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5790 adjacency_index = mlxsw_sp->router->adj_discard_index;
5793 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5794 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
5797 mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
5798 ll_ops->fib_entry_act_remote_pack(op_ctx, trap_action, trap_id,
5799 adjacency_index, ecmp_size);
5800 return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
5803 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
5804 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5805 struct mlxsw_sp_fib_entry *fib_entry,
5806 enum mlxsw_sp_fib_entry_op op)
5808 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5809 struct mlxsw_sp_rif *rif = fib_entry->nh_group->nhgi->nh_rif;
5810 enum mlxsw_reg_ralue_trap_action trap_action;
5814 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
5815 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5816 rif_index = rif->rif_index;
5818 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5819 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
5822 mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
5823 ll_ops->fib_entry_act_local_pack(op_ctx, trap_action, trap_id, rif_index);
5824 return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
5827 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
5828 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5829 struct mlxsw_sp_fib_entry *fib_entry,
5830 enum mlxsw_sp_fib_entry_op op)
5832 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5834 mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
5835 ll_ops->fib_entry_act_ip2me_pack(op_ctx);
5836 return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
5839 static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
5840 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5841 struct mlxsw_sp_fib_entry *fib_entry,
5842 enum mlxsw_sp_fib_entry_op op)
5844 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5845 enum mlxsw_reg_ralue_trap_action trap_action;
5847 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR;
5848 mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
5849 ll_ops->fib_entry_act_local_pack(op_ctx, trap_action, 0, 0);
5850 return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
5854 mlxsw_sp_fib_entry_op_unreachable(struct mlxsw_sp *mlxsw_sp,
5855 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5856 struct mlxsw_sp_fib_entry *fib_entry,
5857 enum mlxsw_sp_fib_entry_op op)
5859 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5860 enum mlxsw_reg_ralue_trap_action trap_action;
5863 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5864 trap_id = MLXSW_TRAP_ID_RTR_INGRESS1;
5866 mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
5867 ll_ops->fib_entry_act_local_pack(op_ctx, trap_action, trap_id, 0);
5868 return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
5872 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
5873 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5874 struct mlxsw_sp_fib_entry *fib_entry,
5875 enum mlxsw_sp_fib_entry_op op)
5877 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5878 struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
5879 const struct mlxsw_sp_ipip_ops *ipip_ops;
5882 if (WARN_ON(!ipip_entry))
5885 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
5886 err = ipip_ops->decap_config(mlxsw_sp, ipip_entry,
5887 fib_entry->decap.tunnel_index);
5891 mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
5892 ll_ops->fib_entry_act_ip2me_tun_pack(op_ctx,
5893 fib_entry->decap.tunnel_index);
5894 return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
5897 static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
5898 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5899 struct mlxsw_sp_fib_entry *fib_entry,
5900 enum mlxsw_sp_fib_entry_op op)
5902 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5904 mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op);
5905 ll_ops->fib_entry_act_ip2me_tun_pack(op_ctx,
5906 fib_entry->decap.tunnel_index);
5907 return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
5910 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
5911 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5912 struct mlxsw_sp_fib_entry *fib_entry,
5913 enum mlxsw_sp_fib_entry_op op)
5915 switch (fib_entry->type) {
5916 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
5917 return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, op_ctx, fib_entry, op);
5918 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
5919 return mlxsw_sp_fib_entry_op_local(mlxsw_sp, op_ctx, fib_entry, op);
5920 case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
5921 return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, op_ctx, fib_entry, op);
5922 case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
5923 return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, op_ctx, fib_entry, op);
5924 case MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE:
5925 return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, op_ctx, fib_entry, op);
5926 case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
5927 return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp, op_ctx, fib_entry, op);
5928 case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
5929 return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, op_ctx, fib_entry, op);
5934 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
5935 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5936 struct mlxsw_sp_fib_entry *fib_entry,
5937 enum mlxsw_sp_fib_entry_op op)
5939 int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, op_ctx, fib_entry, op);
5944 mlxsw_sp_fib_entry_hw_flags_refresh(mlxsw_sp, fib_entry, op);
5949 static int __mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
5950 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5951 struct mlxsw_sp_fib_entry *fib_entry,
5954 return mlxsw_sp_fib_entry_op(mlxsw_sp, op_ctx, fib_entry,
5955 is_new ? MLXSW_SP_FIB_ENTRY_OP_WRITE :
5956 MLXSW_SP_FIB_ENTRY_OP_UPDATE);
5959 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
5960 struct mlxsw_sp_fib_entry *fib_entry)
5962 struct mlxsw_sp_fib_entry_op_ctx *op_ctx = mlxsw_sp->router->ll_op_ctx;
5964 mlxsw_sp_fib_entry_op_ctx_clear(op_ctx);
5965 return __mlxsw_sp_fib_entry_update(mlxsw_sp, op_ctx, fib_entry, false);
5968 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
5969 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
5970 struct mlxsw_sp_fib_entry *fib_entry)
5972 const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops;
5974 if (!ll_ops->fib_entry_is_committed(fib_entry->priv))
5976 return mlxsw_sp_fib_entry_op(mlxsw_sp, op_ctx, fib_entry,
5977 MLXSW_SP_FIB_ENTRY_OP_DELETE);
5981 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5982 const struct fib_entry_notifier_info *fen_info,
5983 struct mlxsw_sp_fib_entry *fib_entry)
5985 struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi;
5986 union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
5987 struct mlxsw_sp_router *router = mlxsw_sp->router;
5988 u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
5989 int ifindex = nhgi->nexthops[0].ifindex;
5990 struct mlxsw_sp_ipip_entry *ipip_entry;
5992 switch (fen_info->type) {
5994 ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex,
5995 MLXSW_SP_L3_PROTO_IPV4, dip);
5996 if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
5997 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
5998 return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
6002 if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id,
6003 MLXSW_SP_L3_PROTO_IPV4,
6007 tunnel_index = router->nve_decap_config.tunnel_index;
6008 fib_entry->decap.tunnel_index = tunnel_index;
6009 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
6014 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
6017 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
6019 case RTN_UNREACHABLE:
6021 /* Packets hitting these routes need to be trapped, but
6022 * can do so with a lower priority than packets directed
6023 * at the host, so use action type local instead of trap.
6025 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
6029 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
6031 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
6039 mlxsw_sp_fib4_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
6040 struct mlxsw_sp_fib_entry *fib_entry)
6042 switch (fib_entry->type) {
6043 case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
6044 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
6051 static struct mlxsw_sp_fib4_entry *
6052 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
6053 struct mlxsw_sp_fib_node *fib_node,
6054 const struct fib_entry_notifier_info *fen_info)
6056 struct mlxsw_sp_fib4_entry *fib4_entry;
6057 struct mlxsw_sp_fib_entry *fib_entry;
6060 fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
6062 return ERR_PTR(-ENOMEM);
6063 fib_entry = &fib4_entry->common;
6065 fib_entry->priv = mlxsw_sp_fib_entry_priv_create(fib_node->fib->ll_ops);
6066 if (IS_ERR(fib_entry->priv)) {
6067 err = PTR_ERR(fib_entry->priv);
6068 goto err_fib_entry_priv_create;
6071 err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
6073 goto err_nexthop4_group_get;
6075 err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
6078 goto err_nexthop_group_vr_link;
6080 err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
6082 goto err_fib4_entry_type_set;
6084 fib4_entry->fi = fen_info->fi;
6085 fib_info_hold(fib4_entry->fi);
6086 fib4_entry->tb_id = fen_info->tb_id;
6087 fib4_entry->type = fen_info->type;
6088 fib4_entry->tos = fen_info->tos;
6090 fib_entry->fib_node = fib_node;
6094 err_fib4_entry_type_set:
6095 mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib);
6096 err_nexthop_group_vr_link:
6097 mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
6098 err_nexthop4_group_get:
6099 mlxsw_sp_fib_entry_priv_put(fib_entry->priv);
6100 err_fib_entry_priv_create:
6102 return ERR_PTR(err);
6105 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
6106 struct mlxsw_sp_fib4_entry *fib4_entry)
6108 struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
6110 fib_info_put(fib4_entry->fi);
6111 mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, &fib4_entry->common);
6112 mlxsw_sp_nexthop_group_vr_unlink(fib4_entry->common.nh_group,
6114 mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
6115 mlxsw_sp_fib_entry_priv_put(fib4_entry->common.priv);
6119 static struct mlxsw_sp_fib4_entry *
6120 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
6121 const struct fib_entry_notifier_info *fen_info)
6123 struct mlxsw_sp_fib4_entry *fib4_entry;
6124 struct mlxsw_sp_fib_node *fib_node;
6125 struct mlxsw_sp_fib *fib;
6126 struct mlxsw_sp_vr *vr;
6128 vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
6131 fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
6133 fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
6134 sizeof(fen_info->dst),
6139 fib4_entry = container_of(fib_node->fib_entry,
6140 struct mlxsw_sp_fib4_entry, common);
6141 if (fib4_entry->tb_id == fen_info->tb_id &&
6142 fib4_entry->tos == fen_info->tos &&
6143 fib4_entry->type == fen_info->type &&
6144 fib4_entry->fi == fen_info->fi)
6150 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
6151 .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
6152 .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
6153 .key_len = sizeof(struct mlxsw_sp_fib_key),
6154 .automatic_shrinking = true,
6157 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
6158 struct mlxsw_sp_fib_node *fib_node)
6160 return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
6161 mlxsw_sp_fib_ht_params);
6164 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
6165 struct mlxsw_sp_fib_node *fib_node)
6167 rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
6168 mlxsw_sp_fib_ht_params);
6171 static struct mlxsw_sp_fib_node *
6172 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
6173 size_t addr_len, unsigned char prefix_len)
6175 struct mlxsw_sp_fib_key key;
6177 memset(&key, 0, sizeof(key));
6178 memcpy(key.addr, addr, addr_len);
6179 key.prefix_len = prefix_len;
6180 return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
6183 static struct mlxsw_sp_fib_node *
6184 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
6185 size_t addr_len, unsigned char prefix_len)
6187 struct mlxsw_sp_fib_node *fib_node;
6189 fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
6193 list_add(&fib_node->list, &fib->node_list);
6194 memcpy(fib_node->key.addr, addr, addr_len);
6195 fib_node->key.prefix_len = prefix_len;
6200 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
6202 list_del(&fib_node->list);
6206 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
6207 struct mlxsw_sp_fib_node *fib_node)
6209 struct mlxsw_sp_prefix_usage req_prefix_usage;
6210 struct mlxsw_sp_fib *fib = fib_node->fib;
6211 struct mlxsw_sp_lpm_tree *lpm_tree;
6214 lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
6215 if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
6218 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
6219 mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
6220 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
6222 if (IS_ERR(lpm_tree))
6223 return PTR_ERR(lpm_tree);
6225 err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
6227 goto err_lpm_tree_replace;
6230 lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
6233 err_lpm_tree_replace:
6234 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
6238 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
6239 struct mlxsw_sp_fib_node *fib_node)
6241 struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
6242 struct mlxsw_sp_prefix_usage req_prefix_usage;
6243 struct mlxsw_sp_fib *fib = fib_node->fib;
6246 if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
6248 /* Try to construct a new LPM tree from the current prefix usage
6249 * minus the unused one. If we fail, continue using the old one.
6251 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
6252 mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
6253 fib_node->key.prefix_len);
6254 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
6256 if (IS_ERR(lpm_tree))
6259 err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
6261 goto err_lpm_tree_replace;
6265 err_lpm_tree_replace:
6266 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
6269 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
6270 struct mlxsw_sp_fib_node *fib_node,
6271 struct mlxsw_sp_fib *fib)
6275 err = mlxsw_sp_fib_node_insert(fib, fib_node);
6278 fib_node->fib = fib;
6280 err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
6282 goto err_fib_lpm_tree_link;
6286 err_fib_lpm_tree_link:
6287 fib_node->fib = NULL;
6288 mlxsw_sp_fib_node_remove(fib, fib_node);
6292 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
6293 struct mlxsw_sp_fib_node *fib_node)
6295 struct mlxsw_sp_fib *fib = fib_node->fib;
6297 mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
6298 fib_node->fib = NULL;
6299 mlxsw_sp_fib_node_remove(fib, fib_node);
6302 static struct mlxsw_sp_fib_node *
6303 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
6304 size_t addr_len, unsigned char prefix_len,
6305 enum mlxsw_sp_l3proto proto)
6307 struct mlxsw_sp_fib_node *fib_node;
6308 struct mlxsw_sp_fib *fib;
6309 struct mlxsw_sp_vr *vr;
6312 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
6314 return ERR_CAST(vr);
6315 fib = mlxsw_sp_vr_fib(vr, proto);
6317 fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
6321 fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
6324 goto err_fib_node_create;
6327 err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
6329 goto err_fib_node_init;
6334 mlxsw_sp_fib_node_destroy(fib_node);
6335 err_fib_node_create:
6336 mlxsw_sp_vr_put(mlxsw_sp, vr);
6337 return ERR_PTR(err);
6340 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
6341 struct mlxsw_sp_fib_node *fib_node)
6343 struct mlxsw_sp_vr *vr = fib_node->fib->vr;
6345 if (fib_node->fib_entry)
6347 mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
6348 mlxsw_sp_fib_node_destroy(fib_node);
6349 mlxsw_sp_vr_put(mlxsw_sp, vr);
6352 static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp,
6353 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6354 struct mlxsw_sp_fib_entry *fib_entry)
6356 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
6357 bool is_new = !fib_node->fib_entry;
6360 fib_node->fib_entry = fib_entry;
6362 err = __mlxsw_sp_fib_entry_update(mlxsw_sp, op_ctx, fib_entry, is_new);
6364 goto err_fib_entry_update;
6368 err_fib_entry_update:
6369 fib_node->fib_entry = NULL;
6373 static int __mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
6374 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6375 struct mlxsw_sp_fib_entry *fib_entry)
6377 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
6380 err = mlxsw_sp_fib_entry_del(mlxsw_sp, op_ctx, fib_entry);
6381 fib_node->fib_entry = NULL;
6385 static void mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
6386 struct mlxsw_sp_fib_entry *fib_entry)
6388 struct mlxsw_sp_fib_entry_op_ctx *op_ctx = mlxsw_sp->router->ll_op_ctx;
6390 mlxsw_sp_fib_entry_op_ctx_clear(op_ctx);
6391 __mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, op_ctx, fib_entry);
6394 static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry)
6396 struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
6397 struct mlxsw_sp_fib4_entry *fib4_replaced;
6399 if (!fib_node->fib_entry)
6402 fib4_replaced = container_of(fib_node->fib_entry,
6403 struct mlxsw_sp_fib4_entry, common);
6404 if (fib4_entry->tb_id == RT_TABLE_MAIN &&
6405 fib4_replaced->tb_id == RT_TABLE_LOCAL)
6412 mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
6413 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6414 const struct fib_entry_notifier_info *fen_info)
6416 struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced;
6417 struct mlxsw_sp_fib_entry *replaced;
6418 struct mlxsw_sp_fib_node *fib_node;
6421 if (fen_info->fi->nh &&
6422 !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, fen_info->fi->nh->id))
6425 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
6426 &fen_info->dst, sizeof(fen_info->dst),
6428 MLXSW_SP_L3_PROTO_IPV4);
6429 if (IS_ERR(fib_node)) {
6430 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
6431 return PTR_ERR(fib_node);
6434 fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
6435 if (IS_ERR(fib4_entry)) {
6436 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
6437 err = PTR_ERR(fib4_entry);
6438 goto err_fib4_entry_create;
6441 if (!mlxsw_sp_fib4_allow_replace(fib4_entry)) {
6442 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6443 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6447 replaced = fib_node->fib_entry;
6448 err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, op_ctx, &fib4_entry->common);
6450 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
6451 goto err_fib_node_entry_link;
6454 /* Nothing to replace */
6458 mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
6459 fib4_replaced = container_of(replaced, struct mlxsw_sp_fib4_entry,
6461 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_replaced);
6465 err_fib_node_entry_link:
6466 fib_node->fib_entry = replaced;
6467 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6468 err_fib4_entry_create:
6469 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6473 static int mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
6474 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6475 struct fib_entry_notifier_info *fen_info)
6477 struct mlxsw_sp_fib4_entry *fib4_entry;
6478 struct mlxsw_sp_fib_node *fib_node;
6481 fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
6484 fib_node = fib4_entry->common.fib_node;
6486 err = __mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, op_ctx, &fib4_entry->common);
6487 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6488 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6492 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
6494 /* Multicast routes aren't supported, so ignore them. Neighbour
6495 * Discovery packets are specifically trapped.
6497 if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
6500 /* Cloned routes are irrelevant in the forwarding path. */
6501 if (rt->fib6_flags & RTF_CACHE)
6507 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
6509 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6511 mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
6513 return ERR_PTR(-ENOMEM);
6515 /* In case of route replace, replaced route is deleted with
6516 * no notification. Take reference to prevent accessing freed
6519 mlxsw_sp_rt6->rt = rt;
6522 return mlxsw_sp_rt6;
6525 #if IS_ENABLED(CONFIG_IPV6)
6526 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
6528 fib6_info_release(rt);
6531 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
6536 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
6538 struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
6540 if (!mlxsw_sp_rt6->rt->nh)
6541 fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
6542 mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
6543 kfree(mlxsw_sp_rt6);
6546 static struct fib6_info *
6547 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
6549 return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
6553 static struct mlxsw_sp_rt6 *
6554 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
6555 const struct fib6_info *rt)
6557 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6559 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
6560 if (mlxsw_sp_rt6->rt == rt)
6561 return mlxsw_sp_rt6;
6567 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
6568 const struct fib6_info *rt,
6569 enum mlxsw_sp_ipip_type *ret)
6571 return rt->fib6_nh->fib_nh_dev &&
6572 mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret);
6575 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
6576 struct mlxsw_sp_nexthop_group *nh_grp,
6577 struct mlxsw_sp_nexthop *nh,
6578 const struct fib6_info *rt)
6580 struct net_device *dev = rt->fib6_nh->fib_nh_dev;
6582 nh->nhgi = nh_grp->nhgi;
6583 nh->nh_weight = rt->fib6_nh->fib_nh_weight;
6584 memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr));
6585 #if IS_ENABLED(CONFIG_IPV6)
6586 nh->neigh_tbl = &nd_tbl;
6588 mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
6590 list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
6594 nh->ifindex = dev->ifindex;
6596 return mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
6599 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
6600 struct mlxsw_sp_nexthop *nh)
6602 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
6603 list_del(&nh->router_list_node);
6604 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
6607 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
6608 const struct fib6_info *rt)
6610 return rt->fib6_nh->fib_nh_gw_family ||
6611 mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
6615 mlxsw_sp_nexthop6_group_info_init(struct mlxsw_sp *mlxsw_sp,
6616 struct mlxsw_sp_nexthop_group *nh_grp,
6617 struct mlxsw_sp_fib6_entry *fib6_entry)
6619 struct mlxsw_sp_nexthop_group_info *nhgi;
6620 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6621 struct mlxsw_sp_nexthop *nh;
6624 nhgi = kzalloc(struct_size(nhgi, nexthops, fib6_entry->nrt6),
6628 nh_grp->nhgi = nhgi;
6629 nhgi->nh_grp = nh_grp;
6630 mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
6631 struct mlxsw_sp_rt6, list);
6632 nhgi->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
6633 nhgi->count = fib6_entry->nrt6;
6634 for (i = 0; i < nhgi->count; i++) {
6635 struct fib6_info *rt = mlxsw_sp_rt6->rt;
6637 nh = &nhgi->nexthops[i];
6638 err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
6640 goto err_nexthop6_init;
6641 mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
6643 nh_grp->nhgi = nhgi;
6644 err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
6646 goto err_group_refresh;
6653 for (i--; i >= 0; i--) {
6654 nh = &nhgi->nexthops[i];
6655 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
6662 mlxsw_sp_nexthop6_group_info_fini(struct mlxsw_sp *mlxsw_sp,
6663 struct mlxsw_sp_nexthop_group *nh_grp)
6665 struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
6668 for (i = nhgi->count - 1; i >= 0; i--) {
6669 struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
6671 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
6673 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
6674 WARN_ON_ONCE(nhgi->adj_index_valid);
6678 static struct mlxsw_sp_nexthop_group *
6679 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
6680 struct mlxsw_sp_fib6_entry *fib6_entry)
6682 struct mlxsw_sp_nexthop_group *nh_grp;
6685 nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
6687 return ERR_PTR(-ENOMEM);
6688 INIT_LIST_HEAD(&nh_grp->vr_list);
6689 err = rhashtable_init(&nh_grp->vr_ht,
6690 &mlxsw_sp_nexthop_group_vr_ht_params);
6692 goto err_nexthop_group_vr_ht_init;
6693 INIT_LIST_HEAD(&nh_grp->fib_list);
6694 nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6;
6696 err = mlxsw_sp_nexthop6_group_info_init(mlxsw_sp, nh_grp, fib6_entry);
6698 goto err_nexthop_group_info_init;
6700 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
6702 goto err_nexthop_group_insert;
6704 nh_grp->can_destroy = true;
6708 err_nexthop_group_insert:
6709 mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp);
6710 err_nexthop_group_info_init:
6711 rhashtable_destroy(&nh_grp->vr_ht);
6712 err_nexthop_group_vr_ht_init:
6714 return ERR_PTR(err);
6718 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
6719 struct mlxsw_sp_nexthop_group *nh_grp)
6721 if (!nh_grp->can_destroy)
6723 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
6724 mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp);
6725 WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
6726 rhashtable_destroy(&nh_grp->vr_ht);
6730 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
6731 struct mlxsw_sp_fib6_entry *fib6_entry)
6733 struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
6734 struct mlxsw_sp_nexthop_group *nh_grp;
6737 nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp,
6739 if (WARN_ON_ONCE(!nh_grp))
6744 nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
6746 nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
6748 return PTR_ERR(nh_grp);
6751 /* The route and the nexthop are described by the same struct, so we
6752 * need to the update the nexthop offload indication for the new route.
6754 __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
6757 list_add_tail(&fib6_entry->common.nexthop_group_node,
6759 fib6_entry->common.nh_group = nh_grp;
6764 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
6765 struct mlxsw_sp_fib_entry *fib_entry)
6767 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
6769 list_del(&fib_entry->nexthop_group_node);
6770 if (!list_empty(&nh_grp->fib_list))
6773 if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) {
6774 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
6778 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
6781 static int mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
6782 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6783 struct mlxsw_sp_fib6_entry *fib6_entry)
6785 struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
6786 struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
6789 mlxsw_sp_nexthop_group_vr_unlink(old_nh_grp, fib_node->fib);
6790 fib6_entry->common.nh_group = NULL;
6791 list_del(&fib6_entry->common.nexthop_group_node);
6793 err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
6795 goto err_nexthop6_group_get;
6797 err = mlxsw_sp_nexthop_group_vr_link(fib6_entry->common.nh_group,
6800 goto err_nexthop_group_vr_link;
6802 /* In case this entry is offloaded, then the adjacency index
6803 * currently associated with it in the device's table is that
6804 * of the old group. Start using the new one instead.
6806 err = __mlxsw_sp_fib_entry_update(mlxsw_sp, op_ctx,
6807 &fib6_entry->common, false);
6809 goto err_fib_entry_update;
6811 if (list_empty(&old_nh_grp->fib_list))
6812 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
6816 err_fib_entry_update:
6817 mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group,
6819 err_nexthop_group_vr_link:
6820 mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
6821 err_nexthop6_group_get:
6822 list_add_tail(&fib6_entry->common.nexthop_group_node,
6823 &old_nh_grp->fib_list);
6824 fib6_entry->common.nh_group = old_nh_grp;
6825 mlxsw_sp_nexthop_group_vr_link(old_nh_grp, fib_node->fib);
6830 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
6831 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6832 struct mlxsw_sp_fib6_entry *fib6_entry,
6833 struct fib6_info **rt_arr, unsigned int nrt6)
6835 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6838 for (i = 0; i < nrt6; i++) {
6839 mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
6840 if (IS_ERR(mlxsw_sp_rt6)) {
6841 err = PTR_ERR(mlxsw_sp_rt6);
6842 goto err_rt6_create;
6845 list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
6849 err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, op_ctx, fib6_entry);
6851 goto err_nexthop6_group_update;
6855 err_nexthop6_group_update:
6858 for (i--; i >= 0; i--) {
6860 mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
6861 struct mlxsw_sp_rt6, list);
6862 list_del(&mlxsw_sp_rt6->list);
6863 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6869 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
6870 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
6871 struct mlxsw_sp_fib6_entry *fib6_entry,
6872 struct fib6_info **rt_arr, unsigned int nrt6)
6874 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6877 for (i = 0; i < nrt6; i++) {
6878 mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry,
6880 if (WARN_ON_ONCE(!mlxsw_sp_rt6))
6884 list_del(&mlxsw_sp_rt6->list);
6885 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6888 mlxsw_sp_nexthop6_group_update(mlxsw_sp, op_ctx, fib6_entry);
6891 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
6892 struct mlxsw_sp_fib_entry *fib_entry,
6893 const struct fib6_info *rt)
6895 if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
6896 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
6897 else if (rt->fib6_type == RTN_BLACKHOLE)
6898 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
6899 else if (rt->fib6_flags & RTF_REJECT)
6900 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
6901 else if (fib_entry->nh_group->nhgi->gateway)
6902 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
6904 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
6908 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
6910 struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
6912 list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
6915 list_del(&mlxsw_sp_rt6->list);
6916 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6920 static struct mlxsw_sp_fib6_entry *
6921 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
6922 struct mlxsw_sp_fib_node *fib_node,
6923 struct fib6_info **rt_arr, unsigned int nrt6)
6925 struct mlxsw_sp_fib6_entry *fib6_entry;
6926 struct mlxsw_sp_fib_entry *fib_entry;
6927 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6930 fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
6932 return ERR_PTR(-ENOMEM);
6933 fib_entry = &fib6_entry->common;
6935 fib_entry->priv = mlxsw_sp_fib_entry_priv_create(fib_node->fib->ll_ops);
6936 if (IS_ERR(fib_entry->priv)) {
6937 err = PTR_ERR(fib_entry->priv);
6938 goto err_fib_entry_priv_create;
6941 INIT_LIST_HEAD(&fib6_entry->rt6_list);
6943 for (i = 0; i < nrt6; i++) {
6944 mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
6945 if (IS_ERR(mlxsw_sp_rt6)) {
6946 err = PTR_ERR(mlxsw_sp_rt6);
6947 goto err_rt6_create;
6949 list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
6953 err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
6955 goto err_nexthop6_group_get;
6957 err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
6960 goto err_nexthop_group_vr_link;
6962 mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]);
6964 fib_entry->fib_node = fib_node;
6968 err_nexthop_group_vr_link:
6969 mlxsw_sp_nexthop6_group_put(mlxsw_sp, fib_entry);
6970 err_nexthop6_group_get:
6973 for (i--; i >= 0; i--) {
6975 mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
6976 struct mlxsw_sp_rt6, list);
6977 list_del(&mlxsw_sp_rt6->list);
6978 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6980 mlxsw_sp_fib_entry_priv_put(fib_entry->priv);
6981 err_fib_entry_priv_create:
6983 return ERR_PTR(err);
6986 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
6987 struct mlxsw_sp_fib6_entry *fib6_entry)
6989 struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
6991 mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group,
6993 mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
6994 mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
6995 WARN_ON(fib6_entry->nrt6);
6996 mlxsw_sp_fib_entry_priv_put(fib6_entry->common.priv);
7000 static struct mlxsw_sp_fib6_entry *
7001 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
7002 const struct fib6_info *rt)
7004 struct mlxsw_sp_fib6_entry *fib6_entry;
7005 struct mlxsw_sp_fib_node *fib_node;
7006 struct mlxsw_sp_fib *fib;
7007 struct fib6_info *cmp_rt;
7008 struct mlxsw_sp_vr *vr;
7010 vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
7013 fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
7015 fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
7016 sizeof(rt->fib6_dst.addr),
7021 fib6_entry = container_of(fib_node->fib_entry,
7022 struct mlxsw_sp_fib6_entry, common);
7023 cmp_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
7024 if (rt->fib6_table->tb6_id == cmp_rt->fib6_table->tb6_id &&
7025 rt->fib6_metric == cmp_rt->fib6_metric &&
7026 mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
7032 static bool mlxsw_sp_fib6_allow_replace(struct mlxsw_sp_fib6_entry *fib6_entry)
7034 struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
7035 struct mlxsw_sp_fib6_entry *fib6_replaced;
7036 struct fib6_info *rt, *rt_replaced;
7038 if (!fib_node->fib_entry)
7041 fib6_replaced = container_of(fib_node->fib_entry,
7042 struct mlxsw_sp_fib6_entry,
7044 rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
7045 rt_replaced = mlxsw_sp_fib6_entry_rt(fib6_replaced);
7046 if (rt->fib6_table->tb6_id == RT_TABLE_MAIN &&
7047 rt_replaced->fib6_table->tb6_id == RT_TABLE_LOCAL)
7053 static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
7054 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
7055 struct fib6_info **rt_arr, unsigned int nrt6)
7057 struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced;
7058 struct mlxsw_sp_fib_entry *replaced;
7059 struct mlxsw_sp_fib_node *fib_node;
7060 struct fib6_info *rt = rt_arr[0];
7063 if (rt->fib6_src.plen)
7066 if (mlxsw_sp_fib6_rt_should_ignore(rt))
7069 if (rt->nh && !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, rt->nh->id))
7072 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
7074 sizeof(rt->fib6_dst.addr),
7076 MLXSW_SP_L3_PROTO_IPV6);
7077 if (IS_ERR(fib_node))
7078 return PTR_ERR(fib_node);
7080 fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
7082 if (IS_ERR(fib6_entry)) {
7083 err = PTR_ERR(fib6_entry);
7084 goto err_fib6_entry_create;
7087 if (!mlxsw_sp_fib6_allow_replace(fib6_entry)) {
7088 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7089 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7093 replaced = fib_node->fib_entry;
7094 err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, op_ctx, &fib6_entry->common);
7096 goto err_fib_node_entry_link;
7098 /* Nothing to replace */
7102 mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
7103 fib6_replaced = container_of(replaced, struct mlxsw_sp_fib6_entry,
7105 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_replaced);
7109 err_fib_node_entry_link:
7110 fib_node->fib_entry = replaced;
7111 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7112 err_fib6_entry_create:
7113 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7117 static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp,
7118 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
7119 struct fib6_info **rt_arr, unsigned int nrt6)
7121 struct mlxsw_sp_fib6_entry *fib6_entry;
7122 struct mlxsw_sp_fib_node *fib_node;
7123 struct fib6_info *rt = rt_arr[0];
7126 if (rt->fib6_src.plen)
7129 if (mlxsw_sp_fib6_rt_should_ignore(rt))
7132 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
7134 sizeof(rt->fib6_dst.addr),
7136 MLXSW_SP_L3_PROTO_IPV6);
7137 if (IS_ERR(fib_node))
7138 return PTR_ERR(fib_node);
7140 if (WARN_ON_ONCE(!fib_node->fib_entry)) {
7141 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7145 fib6_entry = container_of(fib_node->fib_entry,
7146 struct mlxsw_sp_fib6_entry, common);
7147 err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, op_ctx, fib6_entry, rt_arr, nrt6);
7149 goto err_fib6_entry_nexthop_add;
7153 err_fib6_entry_nexthop_add:
7154 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7158 static int mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
7159 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
7160 struct fib6_info **rt_arr, unsigned int nrt6)
7162 struct mlxsw_sp_fib6_entry *fib6_entry;
7163 struct mlxsw_sp_fib_node *fib_node;
7164 struct fib6_info *rt = rt_arr[0];
7167 if (mlxsw_sp_fib6_rt_should_ignore(rt))
7170 /* Multipath routes are first added to the FIB trie and only then
7171 * notified. If we vetoed the addition, we will get a delete
7172 * notification for a route we do not have. Therefore, do not warn if
7173 * route was not found.
7175 fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
7179 /* If not all the nexthops are deleted, then only reduce the nexthop
7182 if (nrt6 != fib6_entry->nrt6) {
7183 mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, op_ctx, fib6_entry, rt_arr, nrt6);
7187 fib_node = fib6_entry->common.fib_node;
7189 err = __mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, op_ctx, &fib6_entry->common);
7190 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7191 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7195 static struct mlxsw_sp_mr_table *
7196 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
7198 if (family == RTNL_FAMILY_IPMR)
7199 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
7201 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
7204 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
7205 struct mfc_entry_notifier_info *men_info,
7208 struct mlxsw_sp_mr_table *mrt;
7209 struct mlxsw_sp_vr *vr;
7211 vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
7215 mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
7216 return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
7219 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
7220 struct mfc_entry_notifier_info *men_info)
7222 struct mlxsw_sp_mr_table *mrt;
7223 struct mlxsw_sp_vr *vr;
7225 vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
7229 mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
7230 mlxsw_sp_mr_route_del(mrt, men_info->mfc);
7231 mlxsw_sp_vr_put(mlxsw_sp, vr);
7235 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
7236 struct vif_entry_notifier_info *ven_info)
7238 struct mlxsw_sp_mr_table *mrt;
7239 struct mlxsw_sp_rif *rif;
7240 struct mlxsw_sp_vr *vr;
7242 vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
7246 mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
7247 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
7248 return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
7249 ven_info->vif_index,
7250 ven_info->vif_flags, rif);
7254 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
7255 struct vif_entry_notifier_info *ven_info)
7257 struct mlxsw_sp_mr_table *mrt;
7258 struct mlxsw_sp_vr *vr;
7260 vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
7264 mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
7265 mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
7266 mlxsw_sp_vr_put(mlxsw_sp, vr);
7269 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
7270 struct mlxsw_sp_fib_node *fib_node)
7272 struct mlxsw_sp_fib4_entry *fib4_entry;
7274 fib4_entry = container_of(fib_node->fib_entry,
7275 struct mlxsw_sp_fib4_entry, common);
7276 mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
7277 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
7278 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7281 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
7282 struct mlxsw_sp_fib_node *fib_node)
7284 struct mlxsw_sp_fib6_entry *fib6_entry;
7286 fib6_entry = container_of(fib_node->fib_entry,
7287 struct mlxsw_sp_fib6_entry, common);
7288 mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
7289 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7290 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7293 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
7294 struct mlxsw_sp_fib_node *fib_node)
7296 switch (fib_node->fib->proto) {
7297 case MLXSW_SP_L3_PROTO_IPV4:
7298 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
7300 case MLXSW_SP_L3_PROTO_IPV6:
7301 mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
7306 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
7307 struct mlxsw_sp_vr *vr,
7308 enum mlxsw_sp_l3proto proto)
7310 struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
7311 struct mlxsw_sp_fib_node *fib_node, *tmp;
7313 list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
7314 bool do_break = &tmp->list == &fib->node_list;
7316 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
7322 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
7326 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
7327 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
7329 if (!mlxsw_sp_vr_is_used(vr))
7332 for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
7333 mlxsw_sp_mr_table_flush(vr->mr_table[j]);
7334 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
7336 /* If virtual router was only used for IPv4, then it's no
7339 if (!mlxsw_sp_vr_is_used(vr))
7341 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
7344 /* After flushing all the routes, it is not possible anyone is still
7345 * using the adjacency index that is discarding packets, so free it in
7346 * case it was allocated.
7348 if (!mlxsw_sp->router->adj_discard_index_valid)
7350 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
7351 mlxsw_sp->router->adj_discard_index);
7352 mlxsw_sp->router->adj_discard_index_valid = false;
7355 struct mlxsw_sp_fib6_event {
7356 struct fib6_info **rt_arr;
7360 struct mlxsw_sp_fib_event {
7361 struct list_head list; /* node in fib queue */
7363 struct mlxsw_sp_fib6_event fib6_event;
7364 struct fib_entry_notifier_info fen_info;
7365 struct fib_rule_notifier_info fr_info;
7366 struct fib_nh_notifier_info fnh_info;
7367 struct mfc_entry_notifier_info men_info;
7368 struct vif_entry_notifier_info ven_info;
7370 struct mlxsw_sp *mlxsw_sp;
7371 unsigned long event;
7376 mlxsw_sp_router_fib6_event_init(struct mlxsw_sp_fib6_event *fib6_event,
7377 struct fib6_entry_notifier_info *fen6_info)
7379 struct fib6_info *rt = fen6_info->rt;
7380 struct fib6_info **rt_arr;
7381 struct fib6_info *iter;
7385 nrt6 = fen6_info->nsiblings + 1;
7387 rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC);
7391 fib6_event->rt_arr = rt_arr;
7392 fib6_event->nrt6 = nrt6;
7397 if (!fen6_info->nsiblings)
7400 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
7401 if (i == fen6_info->nsiblings)
7404 rt_arr[i + 1] = iter;
7405 fib6_info_hold(iter);
7408 WARN_ON_ONCE(i != fen6_info->nsiblings);
7414 mlxsw_sp_router_fib6_event_fini(struct mlxsw_sp_fib6_event *fib6_event)
7418 for (i = 0; i < fib6_event->nrt6; i++)
7419 mlxsw_sp_rt6_release(fib6_event->rt_arr[i]);
7420 kfree(fib6_event->rt_arr);
7423 static void mlxsw_sp_router_fib4_event_process(struct mlxsw_sp *mlxsw_sp,
7424 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
7425 struct mlxsw_sp_fib_event *fib_event)
7429 mlxsw_sp_span_respin(mlxsw_sp);
7431 switch (fib_event->event) {
7432 case FIB_EVENT_ENTRY_REPLACE:
7433 err = mlxsw_sp_router_fib4_replace(mlxsw_sp, op_ctx, &fib_event->fen_info);
7435 mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
7436 dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n");
7437 mlxsw_sp_fib4_offload_failed_flag_set(mlxsw_sp,
7438 &fib_event->fen_info);
7440 fib_info_put(fib_event->fen_info.fi);
7442 case FIB_EVENT_ENTRY_DEL:
7443 err = mlxsw_sp_router_fib4_del(mlxsw_sp, op_ctx, &fib_event->fen_info);
7445 mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
7446 fib_info_put(fib_event->fen_info.fi);
7448 case FIB_EVENT_NH_ADD:
7449 case FIB_EVENT_NH_DEL:
7450 mlxsw_sp_nexthop4_event(mlxsw_sp, fib_event->event, fib_event->fnh_info.fib_nh);
7451 fib_info_put(fib_event->fnh_info.fib_nh->nh_parent);
7456 static void mlxsw_sp_router_fib6_event_process(struct mlxsw_sp *mlxsw_sp,
7457 struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
7458 struct mlxsw_sp_fib_event *fib_event)
7460 struct mlxsw_sp_fib6_event *fib6_event = &fib_event->fib6_event;
7463 mlxsw_sp_span_respin(mlxsw_sp);
7465 switch (fib_event->event) {
7466 case FIB_EVENT_ENTRY_REPLACE:
7467 err = mlxsw_sp_router_fib6_replace(mlxsw_sp, op_ctx, fib_event->fib6_event.rt_arr,
7468 fib_event->fib6_event.nrt6);
7470 mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
7471 dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n");
7472 mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
7476 mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event);
7478 case FIB_EVENT_ENTRY_APPEND:
7479 err = mlxsw_sp_router_fib6_append(mlxsw_sp, op_ctx, fib_event->fib6_event.rt_arr,
7480 fib_event->fib6_event.nrt6);
7482 mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
7483 dev_warn(mlxsw_sp->bus_info->dev, "FIB append failed.\n");
7484 mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
7488 mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event);
7490 case FIB_EVENT_ENTRY_DEL:
7491 err = mlxsw_sp_router_fib6_del(mlxsw_sp, op_ctx, fib_event->fib6_event.rt_arr,
7492 fib_event->fib6_event.nrt6);
7494 mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
7495 mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event);
7500 static void mlxsw_sp_router_fibmr_event_process(struct mlxsw_sp *mlxsw_sp,
7501 struct mlxsw_sp_fib_event *fib_event)
7507 mutex_lock(&mlxsw_sp->router->lock);
7508 switch (fib_event->event) {
7509 case FIB_EVENT_ENTRY_REPLACE:
7510 case FIB_EVENT_ENTRY_ADD:
7511 replace = fib_event->event == FIB_EVENT_ENTRY_REPLACE;
7513 err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_event->men_info, replace);
7515 dev_warn(mlxsw_sp->bus_info->dev, "MR entry add failed.\n");
7516 mr_cache_put(fib_event->men_info.mfc);
7518 case FIB_EVENT_ENTRY_DEL:
7519 mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_event->men_info);
7520 mr_cache_put(fib_event->men_info.mfc);
7522 case FIB_EVENT_VIF_ADD:
7523 err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
7524 &fib_event->ven_info);
7526 dev_warn(mlxsw_sp->bus_info->dev, "MR VIF add failed.\n");
7527 dev_put(fib_event->ven_info.dev);
7529 case FIB_EVENT_VIF_DEL:
7530 mlxsw_sp_router_fibmr_vif_del(mlxsw_sp, &fib_event->ven_info);
7531 dev_put(fib_event->ven_info.dev);
7534 mutex_unlock(&mlxsw_sp->router->lock);
7538 static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
7540 struct mlxsw_sp_router *router = container_of(work, struct mlxsw_sp_router, fib_event_work);
7541 struct mlxsw_sp_fib_entry_op_ctx *op_ctx = router->ll_op_ctx;
7542 struct mlxsw_sp *mlxsw_sp = router->mlxsw_sp;
7543 struct mlxsw_sp_fib_event *next_fib_event;
7544 struct mlxsw_sp_fib_event *fib_event;
7545 int last_family = AF_UNSPEC;
7546 LIST_HEAD(fib_event_queue);
7548 spin_lock_bh(&router->fib_event_queue_lock);
7549 list_splice_init(&router->fib_event_queue, &fib_event_queue);
7550 spin_unlock_bh(&router->fib_event_queue_lock);
7552 /* Router lock is held here to make sure per-instance
7553 * operation context is not used in between FIB4/6 events
7556 mutex_lock(&router->lock);
7557 mlxsw_sp_fib_entry_op_ctx_clear(op_ctx);
7558 list_for_each_entry_safe(fib_event, next_fib_event,
7559 &fib_event_queue, list) {
7560 /* Check if the next entry in the queue exists and it is
7561 * of the same type (family and event) as the currect one.
7562 * In that case it is permitted to do the bulking
7563 * of multiple FIB entries to a single register write.
7565 op_ctx->bulk_ok = !list_is_last(&fib_event->list, &fib_event_queue) &&
7566 fib_event->family == next_fib_event->family &&
7567 fib_event->event == next_fib_event->event;
7568 op_ctx->event = fib_event->event;
7570 /* In case family of this and the previous entry are different, context
7571 * reinitialization is going to be needed now, indicate that.
7572 * Note that since last_family is initialized to AF_UNSPEC, this is always
7573 * going to happen for the first entry processed in the work.
7575 if (fib_event->family != last_family)
7576 op_ctx->initialized = false;
7578 switch (fib_event->family) {
7580 mlxsw_sp_router_fib4_event_process(mlxsw_sp, op_ctx,
7584 mlxsw_sp_router_fib6_event_process(mlxsw_sp, op_ctx,
7587 case RTNL_FAMILY_IP6MR:
7588 case RTNL_FAMILY_IPMR:
7589 /* Unlock here as inside FIBMR the lock is taken again
7590 * under RTNL. The per-instance operation context
7591 * is not used by FIBMR.
7593 mutex_unlock(&router->lock);
7594 mlxsw_sp_router_fibmr_event_process(mlxsw_sp,
7596 mutex_lock(&router->lock);
7601 last_family = fib_event->family;
7605 WARN_ON_ONCE(!list_empty(&router->ll_op_ctx->fib_entry_priv_list));
7606 mutex_unlock(&router->lock);
7609 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event *fib_event,
7610 struct fib_notifier_info *info)
7612 struct fib_entry_notifier_info *fen_info;
7613 struct fib_nh_notifier_info *fnh_info;
7615 switch (fib_event->event) {
7616 case FIB_EVENT_ENTRY_REPLACE:
7617 case FIB_EVENT_ENTRY_DEL:
7618 fen_info = container_of(info, struct fib_entry_notifier_info,
7620 fib_event->fen_info = *fen_info;
7621 /* Take reference on fib_info to prevent it from being
7622 * freed while event is queued. Release it afterwards.
7624 fib_info_hold(fib_event->fen_info.fi);
7626 case FIB_EVENT_NH_ADD:
7627 case FIB_EVENT_NH_DEL:
7628 fnh_info = container_of(info, struct fib_nh_notifier_info,
7630 fib_event->fnh_info = *fnh_info;
7631 fib_info_hold(fib_event->fnh_info.fib_nh->nh_parent);
7636 static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event *fib_event,
7637 struct fib_notifier_info *info)
7639 struct fib6_entry_notifier_info *fen6_info;
7642 switch (fib_event->event) {
7643 case FIB_EVENT_ENTRY_REPLACE:
7644 case FIB_EVENT_ENTRY_APPEND:
7645 case FIB_EVENT_ENTRY_DEL:
7646 fen6_info = container_of(info, struct fib6_entry_notifier_info,
7648 err = mlxsw_sp_router_fib6_event_init(&fib_event->fib6_event,
7659 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event *fib_event,
7660 struct fib_notifier_info *info)
7662 switch (fib_event->event) {
7663 case FIB_EVENT_ENTRY_REPLACE:
7664 case FIB_EVENT_ENTRY_ADD:
7665 case FIB_EVENT_ENTRY_DEL:
7666 memcpy(&fib_event->men_info, info, sizeof(fib_event->men_info));
7667 mr_cache_hold(fib_event->men_info.mfc);
7669 case FIB_EVENT_VIF_ADD:
7670 case FIB_EVENT_VIF_DEL:
7671 memcpy(&fib_event->ven_info, info, sizeof(fib_event->ven_info));
7672 dev_hold(fib_event->ven_info.dev);
7677 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
7678 struct fib_notifier_info *info,
7679 struct mlxsw_sp *mlxsw_sp)
7681 struct netlink_ext_ack *extack = info->extack;
7682 struct fib_rule_notifier_info *fr_info;
7683 struct fib_rule *rule;
7686 /* nothing to do at the moment */
7687 if (event == FIB_EVENT_RULE_DEL)
7690 fr_info = container_of(info, struct fib_rule_notifier_info, info);
7691 rule = fr_info->rule;
7693 /* Rule only affects locally generated traffic */
7694 if (rule->iifindex == mlxsw_sp_net(mlxsw_sp)->loopback_dev->ifindex)
7697 switch (info->family) {
7699 if (!fib4_rule_default(rule) && !rule->l3mdev)
7703 if (!fib6_rule_default(rule) && !rule->l3mdev)
7706 case RTNL_FAMILY_IPMR:
7707 if (!ipmr_rule_default(rule) && !rule->l3mdev)
7710 case RTNL_FAMILY_IP6MR:
7711 if (!ip6mr_rule_default(rule) && !rule->l3mdev)
7717 NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
7722 /* Called with rcu_read_lock() */
7723 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
7724 unsigned long event, void *ptr)
7726 struct mlxsw_sp_fib_event *fib_event;
7727 struct fib_notifier_info *info = ptr;
7728 struct mlxsw_sp_router *router;
7731 if ((info->family != AF_INET && info->family != AF_INET6 &&
7732 info->family != RTNL_FAMILY_IPMR &&
7733 info->family != RTNL_FAMILY_IP6MR))
7736 router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7739 case FIB_EVENT_RULE_ADD:
7740 case FIB_EVENT_RULE_DEL:
7741 err = mlxsw_sp_router_fib_rule_event(event, info,
7743 return notifier_from_errno(err);
7744 case FIB_EVENT_ENTRY_ADD:
7745 case FIB_EVENT_ENTRY_REPLACE:
7746 case FIB_EVENT_ENTRY_APPEND:
7747 if (info->family == AF_INET) {
7748 struct fib_entry_notifier_info *fen_info = ptr;
7750 if (fen_info->fi->fib_nh_is_v6) {
7751 NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
7752 return notifier_from_errno(-EINVAL);
7758 fib_event = kzalloc(sizeof(*fib_event), GFP_ATOMIC);
7762 fib_event->mlxsw_sp = router->mlxsw_sp;
7763 fib_event->event = event;
7764 fib_event->family = info->family;
7766 switch (info->family) {
7768 mlxsw_sp_router_fib4_event(fib_event, info);
7771 err = mlxsw_sp_router_fib6_event(fib_event, info);
7775 case RTNL_FAMILY_IP6MR:
7776 case RTNL_FAMILY_IPMR:
7777 mlxsw_sp_router_fibmr_event(fib_event, info);
7781 /* Enqueue the event and trigger the work */
7782 spin_lock_bh(&router->fib_event_queue_lock);
7783 list_add_tail(&fib_event->list, &router->fib_event_queue);
7784 spin_unlock_bh(&router->fib_event_queue_lock);
7785 mlxsw_core_schedule_work(&router->fib_event_work);
7794 static struct mlxsw_sp_rif *
7795 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
7796 const struct net_device *dev)
7800 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7801 if (mlxsw_sp->router->rifs[i] &&
7802 mlxsw_sp->router->rifs[i]->dev == dev)
7803 return mlxsw_sp->router->rifs[i];
7808 bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp,
7809 const struct net_device *dev)
7811 struct mlxsw_sp_rif *rif;
7813 mutex_lock(&mlxsw_sp->router->lock);
7814 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7815 mutex_unlock(&mlxsw_sp->router->lock);
7820 u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev)
7822 struct mlxsw_sp_rif *rif;
7825 mutex_lock(&mlxsw_sp->router->lock);
7826 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7830 /* We only return the VID for VLAN RIFs. Otherwise we return an
7831 * invalid value (0).
7833 if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN)
7836 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7839 mutex_unlock(&mlxsw_sp->router->lock);
7843 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
7845 char ritr_pl[MLXSW_REG_RITR_LEN];
7848 mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
7849 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7853 mlxsw_reg_ritr_enable_set(ritr_pl, false);
7854 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7857 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
7858 struct mlxsw_sp_rif *rif)
7860 mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
7861 mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
7862 mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
7866 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
7867 unsigned long event)
7869 struct inet6_dev *inet6_dev;
7870 bool addr_list_empty = true;
7871 struct in_device *idev;
7878 idev = __in_dev_get_rcu(dev);
7879 if (idev && idev->ifa_list)
7880 addr_list_empty = false;
7882 inet6_dev = __in6_dev_get(dev);
7883 if (addr_list_empty && inet6_dev &&
7884 !list_empty(&inet6_dev->addr_list))
7885 addr_list_empty = false;
7888 /* macvlans do not have a RIF, but rather piggy back on the
7889 * RIF of their lower device.
7891 if (netif_is_macvlan(dev) && addr_list_empty)
7894 if (rif && addr_list_empty &&
7895 !netif_is_l3_slave(rif->dev))
7897 /* It is possible we already removed the RIF ourselves
7898 * if it was assigned to a netdev that is now a bridge
7907 static enum mlxsw_sp_rif_type
7908 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
7909 const struct net_device *dev)
7911 enum mlxsw_sp_fid_type type;
7913 if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
7914 return MLXSW_SP_RIF_TYPE_IPIP_LB;
7916 /* Otherwise RIF type is derived from the type of the underlying FID. */
7917 if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
7918 type = MLXSW_SP_FID_TYPE_8021Q;
7919 else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
7920 type = MLXSW_SP_FID_TYPE_8021Q;
7921 else if (netif_is_bridge_master(dev))
7922 type = MLXSW_SP_FID_TYPE_8021D;
7924 type = MLXSW_SP_FID_TYPE_RFID;
7926 return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
7929 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
7933 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
7934 if (!mlxsw_sp->router->rifs[i]) {
7943 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
7945 struct net_device *l3_dev)
7947 struct mlxsw_sp_rif *rif;
7949 rif = kzalloc(rif_size, GFP_KERNEL);
7953 INIT_LIST_HEAD(&rif->nexthop_list);
7954 INIT_LIST_HEAD(&rif->neigh_list);
7956 ether_addr_copy(rif->addr, l3_dev->dev_addr);
7957 rif->mtu = l3_dev->mtu;
7961 rif->rif_index = rif_index;
7966 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
7969 return mlxsw_sp->router->rifs[rif_index];
7972 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
7974 return rif->rif_index;
7977 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
7979 return lb_rif->common.rif_index;
7982 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
7984 u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev);
7985 struct mlxsw_sp_vr *ul_vr;
7987 ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
7988 if (WARN_ON(IS_ERR(ul_vr)))
7994 u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
7996 return lb_rif->ul_rif_id;
7999 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
8001 return rif->dev->ifindex;
8004 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
8009 static struct mlxsw_sp_rif *
8010 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
8011 const struct mlxsw_sp_rif_params *params,
8012 struct netlink_ext_ack *extack)
8014 u32 tb_id = l3mdev_fib_table(params->dev);
8015 const struct mlxsw_sp_rif_ops *ops;
8016 struct mlxsw_sp_fid *fid = NULL;
8017 enum mlxsw_sp_rif_type type;
8018 struct mlxsw_sp_rif *rif;
8019 struct mlxsw_sp_vr *vr;
8023 type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
8024 ops = mlxsw_sp->router->rif_ops_arr[type];
8026 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
8028 return ERR_CAST(vr);
8031 err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
8033 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
8034 goto err_rif_index_alloc;
8037 rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
8043 mlxsw_sp->router->rifs[rif_index] = rif;
8044 rif->mlxsw_sp = mlxsw_sp;
8048 fid = ops->fid_get(rif, extack);
8057 ops->setup(rif, params);
8059 err = ops->configure(rif);
8063 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
8064 err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
8066 goto err_mr_rif_add;
8069 mlxsw_sp_rif_counters_alloc(rif);
8074 for (i--; i >= 0; i--)
8075 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
8076 ops->deconfigure(rif);
8079 mlxsw_sp_fid_put(fid);
8081 mlxsw_sp->router->rifs[rif_index] = NULL;
8085 err_rif_index_alloc:
8087 mlxsw_sp_vr_put(mlxsw_sp, vr);
8088 return ERR_PTR(err);
8091 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
8093 const struct mlxsw_sp_rif_ops *ops = rif->ops;
8094 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8095 struct mlxsw_sp_fid *fid = rif->fid;
8096 struct mlxsw_sp_vr *vr;
8099 mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
8100 vr = &mlxsw_sp->router->vrs[rif->vr_id];
8102 mlxsw_sp_rif_counters_free(rif);
8103 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
8104 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
8105 ops->deconfigure(rif);
8107 /* Loopback RIFs are not associated with a FID. */
8108 mlxsw_sp_fid_put(fid);
8109 mlxsw_sp->router->rifs[rif->rif_index] = NULL;
8113 mlxsw_sp_vr_put(mlxsw_sp, vr);
8116 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
8117 struct net_device *dev)
8119 struct mlxsw_sp_rif *rif;
8121 mutex_lock(&mlxsw_sp->router->lock);
8122 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8125 mlxsw_sp_rif_destroy(rif);
8127 mutex_unlock(&mlxsw_sp->router->lock);
8131 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
8132 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8134 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8136 params->vid = mlxsw_sp_port_vlan->vid;
8137 params->lag = mlxsw_sp_port->lagged;
8139 params->lag_id = mlxsw_sp_port->lag_id;
8141 params->system_port = mlxsw_sp_port->local_port;
8144 static struct mlxsw_sp_rif_subport *
8145 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
8147 return container_of(rif, struct mlxsw_sp_rif_subport, common);
8150 static struct mlxsw_sp_rif *
8151 mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
8152 const struct mlxsw_sp_rif_params *params,
8153 struct netlink_ext_ack *extack)
8155 struct mlxsw_sp_rif_subport *rif_subport;
8156 struct mlxsw_sp_rif *rif;
8158 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev);
8160 return mlxsw_sp_rif_create(mlxsw_sp, params, extack);
8162 rif_subport = mlxsw_sp_rif_subport_rif(rif);
8163 refcount_inc(&rif_subport->ref_count);
8167 static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif)
8169 struct mlxsw_sp_rif_subport *rif_subport;
8171 rif_subport = mlxsw_sp_rif_subport_rif(rif);
8172 if (!refcount_dec_and_test(&rif_subport->ref_count))
8175 mlxsw_sp_rif_destroy(rif);
8179 __mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
8180 struct net_device *l3_dev,
8181 struct netlink_ext_ack *extack)
8183 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8184 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
8185 struct mlxsw_sp_rif_params params = {
8188 u16 vid = mlxsw_sp_port_vlan->vid;
8189 struct mlxsw_sp_rif *rif;
8190 struct mlxsw_sp_fid *fid;
8193 mlxsw_sp_rif_subport_params_init(¶ms, mlxsw_sp_port_vlan);
8194 rif = mlxsw_sp_rif_subport_get(mlxsw_sp, ¶ms, extack);
8196 return PTR_ERR(rif);
8198 /* FID was already created, just take a reference */
8199 fid = rif->ops->fid_get(rif, extack);
8200 err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
8202 goto err_fid_port_vid_map;
8204 err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
8206 goto err_port_vid_learning_set;
8208 err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
8209 BR_STATE_FORWARDING);
8211 goto err_port_vid_stp_set;
8213 mlxsw_sp_port_vlan->fid = fid;
8217 err_port_vid_stp_set:
8218 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
8219 err_port_vid_learning_set:
8220 mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
8221 err_fid_port_vid_map:
8222 mlxsw_sp_fid_put(fid);
8223 mlxsw_sp_rif_subport_put(rif);
8228 __mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8230 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8231 struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
8232 struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid);
8233 u16 vid = mlxsw_sp_port_vlan->vid;
8235 if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
8238 mlxsw_sp_port_vlan->fid = NULL;
8239 mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
8240 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
8241 mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
8242 mlxsw_sp_fid_put(fid);
8243 mlxsw_sp_rif_subport_put(rif);
8247 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
8248 struct net_device *l3_dev,
8249 struct netlink_ext_ack *extack)
8251 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
8252 struct mlxsw_sp_rif *rif;
8255 mutex_lock(&mlxsw_sp->router->lock);
8256 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
8260 err = __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, l3_dev,
8263 mutex_unlock(&mlxsw_sp->router->lock);
8268 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8270 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
8272 mutex_lock(&mlxsw_sp->router->lock);
8273 __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
8274 mutex_unlock(&mlxsw_sp->router->lock);
8277 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
8278 struct net_device *port_dev,
8279 unsigned long event, u16 vid,
8280 struct netlink_ext_ack *extack)
8282 struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
8283 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
8285 mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
8286 if (WARN_ON(!mlxsw_sp_port_vlan))
8291 return __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
8294 __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
8301 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
8302 unsigned long event,
8303 struct netlink_ext_ack *extack)
8305 if (netif_is_bridge_port(port_dev) ||
8306 netif_is_lag_port(port_dev) ||
8307 netif_is_ovs_port(port_dev))
8310 return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
8311 MLXSW_SP_DEFAULT_VID, extack);
8314 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
8315 struct net_device *lag_dev,
8316 unsigned long event, u16 vid,
8317 struct netlink_ext_ack *extack)
8319 struct net_device *port_dev;
8320 struct list_head *iter;
8323 netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
8324 if (mlxsw_sp_port_dev_check(port_dev)) {
8325 err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
8337 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
8338 unsigned long event,
8339 struct netlink_ext_ack *extack)
8341 if (netif_is_bridge_port(lag_dev))
8344 return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
8345 MLXSW_SP_DEFAULT_VID, extack);
8348 static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
8349 struct net_device *l3_dev,
8350 unsigned long event,
8351 struct netlink_ext_ack *extack)
8353 struct mlxsw_sp_rif_params params = {
8356 struct mlxsw_sp_rif *rif;
8360 if (netif_is_bridge_master(l3_dev) && br_vlan_enabled(l3_dev)) {
8363 br_vlan_get_proto(l3_dev, &proto);
8364 if (proto == ETH_P_8021AD) {
8365 NL_SET_ERR_MSG_MOD(extack, "Adding an IP address to 802.1ad bridge is not supported");
8369 rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms, extack);
8371 return PTR_ERR(rif);
8374 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
8375 mlxsw_sp_rif_destroy(rif);
8382 static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
8383 struct net_device *vlan_dev,
8384 unsigned long event,
8385 struct netlink_ext_ack *extack)
8387 struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
8388 u16 vid = vlan_dev_vlan_id(vlan_dev);
8390 if (netif_is_bridge_port(vlan_dev))
8393 if (mlxsw_sp_port_dev_check(real_dev))
8394 return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
8395 event, vid, extack);
8396 else if (netif_is_lag_master(real_dev))
8397 return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
8399 else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
8400 return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event,
8406 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
8408 u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
8409 u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
8411 return ether_addr_equal_masked(mac, vrrp4, mask);
8414 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
8416 u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
8417 u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
8419 return ether_addr_equal_masked(mac, vrrp6, mask);
8422 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
8423 const u8 *mac, bool adding)
8425 char ritr_pl[MLXSW_REG_RITR_LEN];
8426 u8 vrrp_id = adding ? mac[5] : 0;
8429 if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
8430 !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
8433 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
8434 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8438 if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
8439 mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
8441 mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
8443 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8446 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
8447 const struct net_device *macvlan_dev,
8448 struct netlink_ext_ack *extack)
8450 struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
8451 struct mlxsw_sp_rif *rif;
8454 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
8456 NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
8460 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
8461 mlxsw_sp_fid_index(rif->fid), true);
8465 err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
8466 macvlan_dev->dev_addr, true);
8468 goto err_rif_vrrp_add;
8470 /* Make sure the bridge driver does not have this MAC pointing at
8473 if (rif->ops->fdb_del)
8474 rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
8479 mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
8480 mlxsw_sp_fid_index(rif->fid), false);
8484 static void __mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
8485 const struct net_device *macvlan_dev)
8487 struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
8488 struct mlxsw_sp_rif *rif;
8490 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
8491 /* If we do not have a RIF, then we already took care of
8492 * removing the macvlan's MAC during RIF deletion.
8496 mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
8498 mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
8499 mlxsw_sp_fid_index(rif->fid), false);
8502 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
8503 const struct net_device *macvlan_dev)
8505 mutex_lock(&mlxsw_sp->router->lock);
8506 __mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
8507 mutex_unlock(&mlxsw_sp->router->lock);
8510 static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
8511 struct net_device *macvlan_dev,
8512 unsigned long event,
8513 struct netlink_ext_ack *extack)
8517 return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
8519 __mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
8526 static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp,
8527 struct net_device *dev,
8528 const unsigned char *dev_addr,
8529 struct netlink_ext_ack *extack)
8531 struct mlxsw_sp_rif *rif;
8534 /* A RIF is not created for macvlan netdevs. Their MAC is used to
8537 if (netif_is_macvlan(dev) || netif_is_l3_master(dev))
8540 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
8541 rif = mlxsw_sp->router->rifs[i];
8542 if (rif && rif->ops &&
8543 rif->ops->type == MLXSW_SP_RIF_TYPE_IPIP_LB)
8545 if (rif && rif->dev && rif->dev != dev &&
8546 !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr,
8547 mlxsw_sp->mac_mask)) {
8548 NL_SET_ERR_MSG_MOD(extack, "All router interface MAC addresses must have the same prefix");
8556 static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
8557 struct net_device *dev,
8558 unsigned long event,
8559 struct netlink_ext_ack *extack)
8561 if (mlxsw_sp_port_dev_check(dev))
8562 return mlxsw_sp_inetaddr_port_event(dev, event, extack);
8563 else if (netif_is_lag_master(dev))
8564 return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
8565 else if (netif_is_bridge_master(dev))
8566 return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event,
8568 else if (is_vlan_dev(dev))
8569 return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
8571 else if (netif_is_macvlan(dev))
8572 return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
8578 static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
8579 unsigned long event, void *ptr)
8581 struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
8582 struct net_device *dev = ifa->ifa_dev->dev;
8583 struct mlxsw_sp_router *router;
8584 struct mlxsw_sp_rif *rif;
8587 /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
8588 if (event == NETDEV_UP)
8591 router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
8592 mutex_lock(&router->lock);
8593 rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
8594 if (!mlxsw_sp_rif_should_config(rif, dev, event))
8597 err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL);
8599 mutex_unlock(&router->lock);
8600 return notifier_from_errno(err);
8603 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
8604 unsigned long event, void *ptr)
8606 struct in_validator_info *ivi = (struct in_validator_info *) ptr;
8607 struct net_device *dev = ivi->ivi_dev->dev;
8608 struct mlxsw_sp *mlxsw_sp;
8609 struct mlxsw_sp_rif *rif;
8612 mlxsw_sp = mlxsw_sp_lower_get(dev);
8616 mutex_lock(&mlxsw_sp->router->lock);
8617 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8618 if (!mlxsw_sp_rif_should_config(rif, dev, event))
8621 err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
8626 err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack);
8628 mutex_unlock(&mlxsw_sp->router->lock);
8629 return notifier_from_errno(err);
8632 struct mlxsw_sp_inet6addr_event_work {
8633 struct work_struct work;
8634 struct mlxsw_sp *mlxsw_sp;
8635 struct net_device *dev;
8636 unsigned long event;
8639 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
8641 struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
8642 container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
8643 struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp;
8644 struct net_device *dev = inet6addr_work->dev;
8645 unsigned long event = inet6addr_work->event;
8646 struct mlxsw_sp_rif *rif;
8649 mutex_lock(&mlxsw_sp->router->lock);
8651 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8652 if (!mlxsw_sp_rif_should_config(rif, dev, event))
8655 __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL);
8657 mutex_unlock(&mlxsw_sp->router->lock);
8660 kfree(inet6addr_work);
8663 /* Called with rcu_read_lock() */
8664 static int mlxsw_sp_inet6addr_event(struct notifier_block *nb,
8665 unsigned long event, void *ptr)
8667 struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
8668 struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
8669 struct net_device *dev = if6->idev->dev;
8670 struct mlxsw_sp_router *router;
8672 /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
8673 if (event == NETDEV_UP)
8676 inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
8677 if (!inet6addr_work)
8680 router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb);
8681 INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
8682 inet6addr_work->mlxsw_sp = router->mlxsw_sp;
8683 inet6addr_work->dev = dev;
8684 inet6addr_work->event = event;
8686 mlxsw_core_schedule_work(&inet6addr_work->work);
8691 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
8692 unsigned long event, void *ptr)
8694 struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
8695 struct net_device *dev = i6vi->i6vi_dev->dev;
8696 struct mlxsw_sp *mlxsw_sp;
8697 struct mlxsw_sp_rif *rif;
8700 mlxsw_sp = mlxsw_sp_lower_get(dev);
8704 mutex_lock(&mlxsw_sp->router->lock);
8705 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8706 if (!mlxsw_sp_rif_should_config(rif, dev, event))
8709 err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
8714 err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack);
8716 mutex_unlock(&mlxsw_sp->router->lock);
8717 return notifier_from_errno(err);
8720 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
8721 const char *mac, int mtu)
8723 char ritr_pl[MLXSW_REG_RITR_LEN];
8726 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
8727 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8731 mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
8732 mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
8733 mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
8734 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8738 mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp,
8739 struct mlxsw_sp_rif *rif)
8741 struct net_device *dev = rif->dev;
8745 fid_index = mlxsw_sp_fid_index(rif->fid);
8747 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
8751 err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
8756 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
8758 goto err_rif_fdb_op;
8760 if (rif->mtu != dev->mtu) {
8761 struct mlxsw_sp_vr *vr;
8764 /* The RIF is relevant only to its mr_table instance, as unlike
8765 * unicast routing, in multicast routing a RIF cannot be shared
8766 * between several multicast routing tables.
8768 vr = &mlxsw_sp->router->vrs[rif->vr_id];
8769 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
8770 mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
8774 ether_addr_copy(rif->addr, dev->dev_addr);
8775 rif->mtu = dev->mtu;
8777 netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
8782 mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
8784 mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
8788 static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
8789 struct netdev_notifier_pre_changeaddr_info *info)
8791 struct netlink_ext_ack *extack;
8793 extack = netdev_notifier_info_to_extack(&info->info);
8794 return mlxsw_sp_router_port_check_rif_addr(rif->mlxsw_sp, rif->dev,
8795 info->dev_addr, extack);
8798 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
8799 unsigned long event, void *ptr)
8801 struct mlxsw_sp *mlxsw_sp;
8802 struct mlxsw_sp_rif *rif;
8805 mlxsw_sp = mlxsw_sp_lower_get(dev);
8809 mutex_lock(&mlxsw_sp->router->lock);
8810 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8815 case NETDEV_CHANGEMTU:
8816 case NETDEV_CHANGEADDR:
8817 err = mlxsw_sp_router_port_change_event(mlxsw_sp, rif);
8819 case NETDEV_PRE_CHANGEADDR:
8820 err = mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
8825 mutex_unlock(&mlxsw_sp->router->lock);
8829 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
8830 struct net_device *l3_dev,
8831 struct netlink_ext_ack *extack)
8833 struct mlxsw_sp_rif *rif;
8835 /* If netdev is already associated with a RIF, then we need to
8836 * destroy it and create a new one with the new virtual router ID.
8838 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
8840 __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN,
8843 return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack);
8846 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
8847 struct net_device *l3_dev)
8849 struct mlxsw_sp_rif *rif;
8851 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
8854 __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL);
8857 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
8858 struct netdev_notifier_changeupper_info *info)
8860 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
8863 /* We do not create a RIF for a macvlan, but only use it to
8864 * direct more MAC addresses to the router.
8866 if (!mlxsw_sp || netif_is_macvlan(l3_dev))
8869 mutex_lock(&mlxsw_sp->router->lock);
8871 case NETDEV_PRECHANGEUPPER:
8873 case NETDEV_CHANGEUPPER:
8874 if (info->linking) {
8875 struct netlink_ext_ack *extack;
8877 extack = netdev_notifier_info_to_extack(&info->info);
8878 err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
8880 mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
8884 mutex_unlock(&mlxsw_sp->router->lock);
8889 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev,
8890 struct netdev_nested_priv *priv)
8892 struct mlxsw_sp_rif *rif = (struct mlxsw_sp_rif *)priv->data;
8894 if (!netif_is_macvlan(dev))
8897 return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
8898 mlxsw_sp_fid_index(rif->fid), false);
8901 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
8903 struct netdev_nested_priv priv = {
8904 .data = (void *)rif,
8907 if (!netif_is_macvlan_port(rif->dev))
8910 netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
8911 return netdev_walk_all_upper_dev_rcu(rif->dev,
8912 __mlxsw_sp_rif_macvlan_flush, &priv);
8915 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
8916 const struct mlxsw_sp_rif_params *params)
8918 struct mlxsw_sp_rif_subport *rif_subport;
8920 rif_subport = mlxsw_sp_rif_subport_rif(rif);
8921 refcount_set(&rif_subport->ref_count, 1);
8922 rif_subport->vid = params->vid;
8923 rif_subport->lag = params->lag;
8925 rif_subport->lag_id = params->lag_id;
8927 rif_subport->system_port = params->system_port;
8930 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
8932 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8933 struct mlxsw_sp_rif_subport *rif_subport;
8934 char ritr_pl[MLXSW_REG_RITR_LEN];
8936 rif_subport = mlxsw_sp_rif_subport_rif(rif);
8937 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
8938 rif->rif_index, rif->vr_id, rif->dev->mtu);
8939 mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
8940 mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
8941 rif_subport->lag ? rif_subport->lag_id :
8942 rif_subport->system_port,
8945 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8948 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
8952 err = mlxsw_sp_rif_subport_op(rif, true);
8956 err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
8957 mlxsw_sp_fid_index(rif->fid), true);
8959 goto err_rif_fdb_op;
8961 mlxsw_sp_fid_rif_set(rif->fid, rif);
8965 mlxsw_sp_rif_subport_op(rif, false);
8969 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
8971 struct mlxsw_sp_fid *fid = rif->fid;
8973 mlxsw_sp_fid_rif_set(fid, NULL);
8974 mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
8975 mlxsw_sp_fid_index(fid), false);
8976 mlxsw_sp_rif_macvlan_flush(rif);
8977 mlxsw_sp_rif_subport_op(rif, false);
8980 static struct mlxsw_sp_fid *
8981 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
8982 struct netlink_ext_ack *extack)
8984 return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
8987 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
8988 .type = MLXSW_SP_RIF_TYPE_SUBPORT,
8989 .rif_size = sizeof(struct mlxsw_sp_rif_subport),
8990 .setup = mlxsw_sp_rif_subport_setup,
8991 .configure = mlxsw_sp_rif_subport_configure,
8992 .deconfigure = mlxsw_sp_rif_subport_deconfigure,
8993 .fid_get = mlxsw_sp_rif_subport_fid_get,
8996 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
8997 enum mlxsw_reg_ritr_if_type type,
8998 u16 vid_fid, bool enable)
9000 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9001 char ritr_pl[MLXSW_REG_RITR_LEN];
9003 mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
9005 mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
9006 mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
9008 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9011 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
9013 return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
9016 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
9018 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9019 u16 fid_index = mlxsw_sp_fid_index(rif->fid);
9022 err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
9027 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9028 mlxsw_sp_router_port(mlxsw_sp), true);
9030 goto err_fid_mc_flood_set;
9032 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9033 mlxsw_sp_router_port(mlxsw_sp), true);
9035 goto err_fid_bc_flood_set;
9037 err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
9038 mlxsw_sp_fid_index(rif->fid), true);
9040 goto err_rif_fdb_op;
9042 mlxsw_sp_fid_rif_set(rif->fid, rif);
9046 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9047 mlxsw_sp_router_port(mlxsw_sp), false);
9048 err_fid_bc_flood_set:
9049 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9050 mlxsw_sp_router_port(mlxsw_sp), false);
9051 err_fid_mc_flood_set:
9052 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
9056 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
9058 u16 fid_index = mlxsw_sp_fid_index(rif->fid);
9059 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9060 struct mlxsw_sp_fid *fid = rif->fid;
9062 mlxsw_sp_fid_rif_set(fid, NULL);
9063 mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
9064 mlxsw_sp_fid_index(fid), false);
9065 mlxsw_sp_rif_macvlan_flush(rif);
9066 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9067 mlxsw_sp_router_port(mlxsw_sp), false);
9068 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9069 mlxsw_sp_router_port(mlxsw_sp), false);
9070 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
9073 static struct mlxsw_sp_fid *
9074 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
9075 struct netlink_ext_ack *extack)
9077 return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
9080 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
9082 struct switchdev_notifier_fdb_info info;
9083 struct net_device *dev;
9085 dev = br_fdb_find_port(rif->dev, mac, 0);
9091 call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
9095 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
9096 .type = MLXSW_SP_RIF_TYPE_FID,
9097 .rif_size = sizeof(struct mlxsw_sp_rif),
9098 .configure = mlxsw_sp_rif_fid_configure,
9099 .deconfigure = mlxsw_sp_rif_fid_deconfigure,
9100 .fid_get = mlxsw_sp_rif_fid_fid_get,
9101 .fdb_del = mlxsw_sp_rif_fid_fdb_del,
9104 static struct mlxsw_sp_fid *
9105 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
9106 struct netlink_ext_ack *extack)
9108 struct net_device *br_dev;
9112 if (is_vlan_dev(rif->dev)) {
9113 vid = vlan_dev_vlan_id(rif->dev);
9114 br_dev = vlan_dev_real_dev(rif->dev);
9115 if (WARN_ON(!netif_is_bridge_master(br_dev)))
9116 return ERR_PTR(-EINVAL);
9118 err = br_vlan_get_pvid(rif->dev, &vid);
9119 if (err < 0 || !vid) {
9120 NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
9121 return ERR_PTR(-EINVAL);
9125 return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
9128 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
9130 u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
9131 struct switchdev_notifier_fdb_info info;
9132 struct net_device *br_dev;
9133 struct net_device *dev;
9135 br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
9136 dev = br_fdb_find_port(br_dev, mac, vid);
9142 call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
9146 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_emu_ops = {
9147 .type = MLXSW_SP_RIF_TYPE_VLAN,
9148 .rif_size = sizeof(struct mlxsw_sp_rif),
9149 .configure = mlxsw_sp_rif_fid_configure,
9150 .deconfigure = mlxsw_sp_rif_fid_deconfigure,
9151 .fid_get = mlxsw_sp_rif_vlan_fid_get,
9152 .fdb_del = mlxsw_sp_rif_vlan_fdb_del,
9155 static struct mlxsw_sp_rif_ipip_lb *
9156 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
9158 return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
9162 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
9163 const struct mlxsw_sp_rif_params *params)
9165 struct mlxsw_sp_rif_params_ipip_lb *params_lb;
9166 struct mlxsw_sp_rif_ipip_lb *rif_lb;
9168 params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
9170 rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
9171 rif_lb->lb_config = params_lb->lb_config;
9175 mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
9177 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
9178 u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
9179 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9180 struct mlxsw_sp_vr *ul_vr;
9183 ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
9185 return PTR_ERR(ul_vr);
9187 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
9189 goto err_loopback_op;
9191 lb_rif->ul_vr_id = ul_vr->id;
9192 lb_rif->ul_rif_id = 0;
9197 mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
9201 static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
9203 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
9204 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9205 struct mlxsw_sp_vr *ul_vr;
9207 ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
9208 mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
9211 mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
9214 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
9215 .type = MLXSW_SP_RIF_TYPE_IPIP_LB,
9216 .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb),
9217 .setup = mlxsw_sp_rif_ipip_lb_setup,
9218 .configure = mlxsw_sp1_rif_ipip_lb_configure,
9219 .deconfigure = mlxsw_sp1_rif_ipip_lb_deconfigure,
9222 static const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
9223 [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops,
9224 [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops,
9225 [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops,
9226 [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp1_rif_ipip_lb_ops,
9230 mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
9232 struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
9233 char ritr_pl[MLXSW_REG_RITR_LEN];
9235 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
9236 ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
9237 mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
9238 MLXSW_REG_RITR_LOOPBACK_GENERIC);
9240 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9243 static struct mlxsw_sp_rif *
9244 mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
9245 struct netlink_ext_ack *extack)
9247 struct mlxsw_sp_rif *ul_rif;
9251 err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
9253 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
9254 return ERR_PTR(err);
9257 ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL);
9259 return ERR_PTR(-ENOMEM);
9261 mlxsw_sp->router->rifs[rif_index] = ul_rif;
9262 ul_rif->mlxsw_sp = mlxsw_sp;
9263 err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
9270 mlxsw_sp->router->rifs[rif_index] = NULL;
9272 return ERR_PTR(err);
9275 static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
9277 struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
9279 mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
9280 mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
9284 static struct mlxsw_sp_rif *
9285 mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
9286 struct netlink_ext_ack *extack)
9288 struct mlxsw_sp_vr *vr;
9291 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
9293 return ERR_CAST(vr);
9295 if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
9298 vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack);
9299 if (IS_ERR(vr->ul_rif)) {
9300 err = PTR_ERR(vr->ul_rif);
9301 goto err_ul_rif_create;
9305 refcount_set(&vr->ul_rif_refcnt, 1);
9310 mlxsw_sp_vr_put(mlxsw_sp, vr);
9311 return ERR_PTR(err);
9314 static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
9316 struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
9317 struct mlxsw_sp_vr *vr;
9319 vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
9321 if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
9325 mlxsw_sp_ul_rif_destroy(ul_rif);
9326 mlxsw_sp_vr_put(mlxsw_sp, vr);
9329 int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
9332 struct mlxsw_sp_rif *ul_rif;
9335 mutex_lock(&mlxsw_sp->router->lock);
9336 ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
9337 if (IS_ERR(ul_rif)) {
9338 err = PTR_ERR(ul_rif);
9341 *ul_rif_index = ul_rif->rif_index;
9343 mutex_unlock(&mlxsw_sp->router->lock);
9347 void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index)
9349 struct mlxsw_sp_rif *ul_rif;
9351 mutex_lock(&mlxsw_sp->router->lock);
9352 ul_rif = mlxsw_sp->router->rifs[ul_rif_index];
9353 if (WARN_ON(!ul_rif))
9356 mlxsw_sp_ul_rif_put(ul_rif);
9358 mutex_unlock(&mlxsw_sp->router->lock);
9362 mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
9364 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
9365 u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
9366 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9367 struct mlxsw_sp_rif *ul_rif;
9370 ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
9372 return PTR_ERR(ul_rif);
9374 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
9376 goto err_loopback_op;
9378 lb_rif->ul_vr_id = 0;
9379 lb_rif->ul_rif_id = ul_rif->rif_index;
9384 mlxsw_sp_ul_rif_put(ul_rif);
9388 static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
9390 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
9391 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9392 struct mlxsw_sp_rif *ul_rif;
9394 ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
9395 mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
9396 mlxsw_sp_ul_rif_put(ul_rif);
9399 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
9400 .type = MLXSW_SP_RIF_TYPE_IPIP_LB,
9401 .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb),
9402 .setup = mlxsw_sp_rif_ipip_lb_setup,
9403 .configure = mlxsw_sp2_rif_ipip_lb_configure,
9404 .deconfigure = mlxsw_sp2_rif_ipip_lb_deconfigure,
9407 static const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
9408 [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops,
9409 [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops,
9410 [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops,
9411 [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp2_rif_ipip_lb_ops,
9414 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
9416 u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
9418 mlxsw_sp->router->rifs = kcalloc(max_rifs,
9419 sizeof(struct mlxsw_sp_rif *),
9421 if (!mlxsw_sp->router->rifs)
9427 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
9431 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
9432 WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
9434 kfree(mlxsw_sp->router->rifs);
9438 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
9440 char tigcr_pl[MLXSW_REG_TIGCR_LEN];
9442 mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
9443 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
9446 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
9450 mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
9451 INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
9453 err = mlxsw_sp_ipip_ecn_encap_init(mlxsw_sp);
9456 err = mlxsw_sp_ipip_ecn_decap_init(mlxsw_sp);
9460 return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
9463 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
9465 WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
9468 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
9470 struct mlxsw_sp_router *router;
9472 /* Flush pending FIB notifications and then flush the device's
9473 * table before requesting another dump. The FIB notification
9474 * block is unregistered, so no need to take RTNL.
9476 mlxsw_core_flush_owq();
9477 router = container_of(nb, struct mlxsw_sp_router, fib_nb);
9478 mlxsw_sp_router_fib_flush(router->mlxsw_sp);
9481 #ifdef CONFIG_IP_ROUTE_MULTIPATH
9482 struct mlxsw_sp_mp_hash_config {
9483 DECLARE_BITMAP(headers, __MLXSW_REG_RECR2_HEADER_CNT);
9484 DECLARE_BITMAP(fields, __MLXSW_REG_RECR2_FIELD_CNT);
9485 DECLARE_BITMAP(inner_headers, __MLXSW_REG_RECR2_HEADER_CNT);
9486 DECLARE_BITMAP(inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT);
9489 #define MLXSW_SP_MP_HASH_HEADER_SET(_headers, _header) \
9490 bitmap_set(_headers, MLXSW_REG_RECR2_##_header, 1)
9492 #define MLXSW_SP_MP_HASH_FIELD_SET(_fields, _field) \
9493 bitmap_set(_fields, MLXSW_REG_RECR2_##_field, 1)
9495 #define MLXSW_SP_MP_HASH_FIELD_RANGE_SET(_fields, _field, _nr) \
9496 bitmap_set(_fields, MLXSW_REG_RECR2_##_field, _nr)
9498 static void mlxsw_sp_mp_hash_inner_l3(struct mlxsw_sp_mp_hash_config *config)
9500 unsigned long *inner_headers = config->inner_headers;
9501 unsigned long *inner_fields = config->inner_fields;
9504 MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP);
9505 MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP);
9506 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4);
9507 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4);
9509 MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP);
9510 MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP);
9511 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7);
9512 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8);
9513 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7);
9514 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8);
9515 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER);
9516 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL);
9519 static void mlxsw_sp_mp4_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config)
9521 unsigned long *headers = config->headers;
9522 unsigned long *fields = config->fields;
9524 MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
9525 MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
9526 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
9527 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
9531 mlxsw_sp_mp_hash_inner_custom(struct mlxsw_sp_mp_hash_config *config,
9534 unsigned long *inner_headers = config->inner_headers;
9535 unsigned long *inner_fields = config->inner_fields;
9538 MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP);
9539 MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP);
9540 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
9541 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4);
9542 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
9543 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4);
9544 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
9545 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV4_PROTOCOL);
9547 MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP);
9548 MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP);
9549 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) {
9550 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7);
9551 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8);
9553 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) {
9554 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7);
9555 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8);
9557 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
9558 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER);
9559 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
9560 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL);
9562 MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV4);
9563 MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV6);
9564 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
9565 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_SPORT);
9566 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
9567 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_DPORT);
9570 static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
9571 struct mlxsw_sp_mp_hash_config *config)
9573 struct net *net = mlxsw_sp_net(mlxsw_sp);
9574 unsigned long *headers = config->headers;
9575 unsigned long *fields = config->fields;
9578 switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
9580 mlxsw_sp_mp4_hash_outer_addr(config);
9583 mlxsw_sp_mp4_hash_outer_addr(config);
9584 MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4);
9585 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL);
9586 MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
9587 MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
9591 mlxsw_sp_mp4_hash_outer_addr(config);
9593 mlxsw_sp_mp_hash_inner_l3(config);
9596 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
9598 MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
9599 MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
9600 MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4);
9601 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
9602 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
9603 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
9604 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
9605 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
9606 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL);
9607 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
9608 MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
9609 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
9610 MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
9612 mlxsw_sp_mp_hash_inner_custom(config, hash_fields);
9617 static void mlxsw_sp_mp6_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config)
9619 unsigned long *headers = config->headers;
9620 unsigned long *fields = config->fields;
9622 MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
9623 MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
9624 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
9625 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
9626 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
9627 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
9630 static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp,
9631 struct mlxsw_sp_mp_hash_config *config)
9633 u32 hash_fields = ip6_multipath_hash_fields(mlxsw_sp_net(mlxsw_sp));
9634 unsigned long *headers = config->headers;
9635 unsigned long *fields = config->fields;
9637 switch (ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp))) {
9639 mlxsw_sp_mp6_hash_outer_addr(config);
9640 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
9641 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
9644 mlxsw_sp_mp6_hash_outer_addr(config);
9645 MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6);
9646 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
9647 MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
9648 MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
9652 mlxsw_sp_mp6_hash_outer_addr(config);
9653 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
9654 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
9656 mlxsw_sp_mp_hash_inner_l3(config);
9660 MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
9661 MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
9662 MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6);
9663 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) {
9664 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
9665 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
9667 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) {
9668 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
9669 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
9671 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
9672 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
9673 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
9674 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
9675 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
9676 MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
9677 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
9678 MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
9680 mlxsw_sp_mp_hash_inner_custom(config, hash_fields);
9685 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
9687 struct mlxsw_sp_mp_hash_config config = {};
9688 char recr2_pl[MLXSW_REG_RECR2_LEN];
9692 seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
9693 mlxsw_reg_recr2_pack(recr2_pl, seed);
9694 mlxsw_sp_mp4_hash_init(mlxsw_sp, &config);
9695 mlxsw_sp_mp6_hash_init(mlxsw_sp, &config);
9697 for_each_set_bit(bit, config.headers, __MLXSW_REG_RECR2_HEADER_CNT)
9698 mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, bit, 1);
9699 for_each_set_bit(bit, config.fields, __MLXSW_REG_RECR2_FIELD_CNT)
9700 mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, bit, 1);
9701 for_each_set_bit(bit, config.inner_headers, __MLXSW_REG_RECR2_HEADER_CNT)
9702 mlxsw_reg_recr2_inner_header_enables_set(recr2_pl, bit, 1);
9703 for_each_set_bit(bit, config.inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT)
9704 mlxsw_reg_recr2_inner_header_fields_enable_set(recr2_pl, bit, 1);
9706 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
9709 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
9715 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
9717 char rdpm_pl[MLXSW_REG_RDPM_LEN];
9720 MLXSW_REG_ZERO(rdpm, rdpm_pl);
9722 /* HW is determining switch priority based on DSCP-bits, but the
9723 * kernel is still doing that based on the ToS. Since there's a
9724 * mismatch in bits we need to make sure to translate the right
9725 * value ToS would observe, skipping the 2 least-significant ECN bits.
9727 for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
9728 mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
9730 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
9733 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
9735 struct net *net = mlxsw_sp_net(mlxsw_sp);
9736 bool usp = net->ipv4.sysctl_ip_fwd_update_priority;
9737 char rgcr_pl[MLXSW_REG_RGCR_LEN];
9740 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
9742 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
9744 mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
9745 mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
9746 mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
9747 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
9750 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
9752 char rgcr_pl[MLXSW_REG_RGCR_LEN];
9754 mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
9755 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
9758 static const struct mlxsw_sp_router_ll_ops mlxsw_sp_router_ll_basic_ops = {
9759 .init = mlxsw_sp_router_ll_basic_init,
9760 .ralta_write = mlxsw_sp_router_ll_basic_ralta_write,
9761 .ralst_write = mlxsw_sp_router_ll_basic_ralst_write,
9762 .raltb_write = mlxsw_sp_router_ll_basic_raltb_write,
9763 .fib_entry_op_ctx_size = sizeof(struct mlxsw_sp_fib_entry_op_ctx_basic),
9764 .fib_entry_pack = mlxsw_sp_router_ll_basic_fib_entry_pack,
9765 .fib_entry_act_remote_pack = mlxsw_sp_router_ll_basic_fib_entry_act_remote_pack,
9766 .fib_entry_act_local_pack = mlxsw_sp_router_ll_basic_fib_entry_act_local_pack,
9767 .fib_entry_act_ip2me_pack = mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_pack,
9768 .fib_entry_act_ip2me_tun_pack = mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_tun_pack,
9769 .fib_entry_commit = mlxsw_sp_router_ll_basic_fib_entry_commit,
9770 .fib_entry_is_committed = mlxsw_sp_router_ll_basic_fib_entry_is_committed,
9773 static int mlxsw_sp_router_ll_op_ctx_init(struct mlxsw_sp_router *router)
9775 size_t max_size = 0;
9778 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
9779 size_t size = router->proto_ll_ops[i]->fib_entry_op_ctx_size;
9781 if (size > max_size)
9784 router->ll_op_ctx = kzalloc(sizeof(*router->ll_op_ctx) + max_size,
9786 if (!router->ll_op_ctx)
9788 INIT_LIST_HEAD(&router->ll_op_ctx->fib_entry_priv_list);
9792 static void mlxsw_sp_router_ll_op_ctx_fini(struct mlxsw_sp_router *router)
9794 WARN_ON(!list_empty(&router->ll_op_ctx->fib_entry_priv_list));
9795 kfree(router->ll_op_ctx);
9798 static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp)
9803 /* Create a generic loopback RIF associated with the main table
9804 * (default VRF). Any table can be used, but the main table exists
9805 * anyway, so we do not waste resources.
9807 err = mlxsw_sp_router_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN,
9812 mlxsw_sp->router->lb_rif_index = lb_rif_index;
9817 static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp)
9819 mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->router->lb_rif_index);
9822 static int mlxsw_sp1_router_init(struct mlxsw_sp *mlxsw_sp)
9824 size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp1_adj_grp_size_ranges);
9826 mlxsw_sp->router->rif_ops_arr = mlxsw_sp1_rif_ops_arr;
9827 mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp1_adj_grp_size_ranges;
9828 mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count;
9833 const struct mlxsw_sp_router_ops mlxsw_sp1_router_ops = {
9834 .init = mlxsw_sp1_router_init,
9837 static int mlxsw_sp2_router_init(struct mlxsw_sp *mlxsw_sp)
9839 size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp2_adj_grp_size_ranges);
9841 mlxsw_sp->router->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
9842 mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp2_adj_grp_size_ranges;
9843 mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count;
9848 const struct mlxsw_sp_router_ops mlxsw_sp2_router_ops = {
9849 .init = mlxsw_sp2_router_init,
9852 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
9853 struct netlink_ext_ack *extack)
9855 struct mlxsw_sp_router *router;
9858 router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
9861 mutex_init(&router->lock);
9862 mlxsw_sp->router = router;
9863 router->mlxsw_sp = mlxsw_sp;
9865 err = mlxsw_sp->router_ops->init(mlxsw_sp);
9867 goto err_router_ops_init;
9869 err = mlxsw_sp_router_xm_init(mlxsw_sp);
9873 router->proto_ll_ops[MLXSW_SP_L3_PROTO_IPV4] = mlxsw_sp_router_xm_ipv4_is_supported(mlxsw_sp) ?
9874 &mlxsw_sp_router_ll_xm_ops :
9875 &mlxsw_sp_router_ll_basic_ops;
9876 router->proto_ll_ops[MLXSW_SP_L3_PROTO_IPV6] = &mlxsw_sp_router_ll_basic_ops;
9878 err = mlxsw_sp_router_ll_op_ctx_init(router);
9880 goto err_ll_op_ctx_init;
9882 INIT_LIST_HEAD(&mlxsw_sp->router->nh_res_grp_list);
9883 INIT_DELAYED_WORK(&mlxsw_sp->router->nh_grp_activity_dw,
9884 mlxsw_sp_nh_grp_activity_work);
9886 INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
9887 err = __mlxsw_sp_router_init(mlxsw_sp);
9889 goto err_router_init;
9891 err = mlxsw_sp_rifs_init(mlxsw_sp);
9895 err = mlxsw_sp_ipips_init(mlxsw_sp);
9897 goto err_ipips_init;
9899 err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
9900 &mlxsw_sp_nexthop_ht_params);
9902 goto err_nexthop_ht_init;
9904 err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
9905 &mlxsw_sp_nexthop_group_ht_params);
9907 goto err_nexthop_group_ht_init;
9909 INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
9910 err = mlxsw_sp_lpm_init(mlxsw_sp);
9914 err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
9918 err = mlxsw_sp_vrs_init(mlxsw_sp);
9922 err = mlxsw_sp_lb_rif_init(mlxsw_sp);
9924 goto err_lb_rif_init;
9926 err = mlxsw_sp_neigh_init(mlxsw_sp);
9928 goto err_neigh_init;
9930 err = mlxsw_sp_mp_hash_init(mlxsw_sp);
9932 goto err_mp_hash_init;
9934 err = mlxsw_sp_dscp_init(mlxsw_sp);
9938 INIT_WORK(&router->fib_event_work, mlxsw_sp_router_fib_event_work);
9939 INIT_LIST_HEAD(&router->fib_event_queue);
9940 spin_lock_init(&router->fib_event_queue_lock);
9942 router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
9943 err = register_inetaddr_notifier(&router->inetaddr_nb);
9945 goto err_register_inetaddr_notifier;
9947 router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
9948 err = register_inet6addr_notifier(&router->inet6addr_nb);
9950 goto err_register_inet6addr_notifier;
9952 mlxsw_sp->router->netevent_nb.notifier_call =
9953 mlxsw_sp_router_netevent_event;
9954 err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
9956 goto err_register_netevent_notifier;
9958 mlxsw_sp->router->nexthop_nb.notifier_call =
9959 mlxsw_sp_nexthop_obj_event;
9960 err = register_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
9961 &mlxsw_sp->router->nexthop_nb,
9964 goto err_register_nexthop_notifier;
9966 mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
9967 err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp),
9968 &mlxsw_sp->router->fib_nb,
9969 mlxsw_sp_router_fib_dump_flush, extack);
9971 goto err_register_fib_notifier;
9975 err_register_fib_notifier:
9976 unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
9977 &mlxsw_sp->router->nexthop_nb);
9978 err_register_nexthop_notifier:
9979 unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
9980 err_register_netevent_notifier:
9981 unregister_inet6addr_notifier(&router->inet6addr_nb);
9982 err_register_inet6addr_notifier:
9983 unregister_inetaddr_notifier(&router->inetaddr_nb);
9984 err_register_inetaddr_notifier:
9985 mlxsw_core_flush_owq();
9986 WARN_ON(!list_empty(&router->fib_event_queue));
9989 mlxsw_sp_neigh_fini(mlxsw_sp);
9991 mlxsw_sp_lb_rif_fini(mlxsw_sp);
9993 mlxsw_sp_vrs_fini(mlxsw_sp);
9995 mlxsw_sp_mr_fini(mlxsw_sp);
9997 mlxsw_sp_lpm_fini(mlxsw_sp);
9999 rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
10000 err_nexthop_group_ht_init:
10001 rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
10002 err_nexthop_ht_init:
10003 mlxsw_sp_ipips_fini(mlxsw_sp);
10005 mlxsw_sp_rifs_fini(mlxsw_sp);
10007 __mlxsw_sp_router_fini(mlxsw_sp);
10009 cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw);
10010 mlxsw_sp_router_ll_op_ctx_fini(router);
10011 err_ll_op_ctx_init:
10012 mlxsw_sp_router_xm_fini(mlxsw_sp);
10014 err_router_ops_init:
10015 mutex_destroy(&mlxsw_sp->router->lock);
10016 kfree(mlxsw_sp->router);
10020 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
10022 unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp),
10023 &mlxsw_sp->router->fib_nb);
10024 unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
10025 &mlxsw_sp->router->nexthop_nb);
10026 unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
10027 unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb);
10028 unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb);
10029 mlxsw_core_flush_owq();
10030 WARN_ON(!list_empty(&mlxsw_sp->router->fib_event_queue));
10031 mlxsw_sp_neigh_fini(mlxsw_sp);
10032 mlxsw_sp_lb_rif_fini(mlxsw_sp);
10033 mlxsw_sp_vrs_fini(mlxsw_sp);
10034 mlxsw_sp_mr_fini(mlxsw_sp);
10035 mlxsw_sp_lpm_fini(mlxsw_sp);
10036 rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
10037 rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
10038 mlxsw_sp_ipips_fini(mlxsw_sp);
10039 mlxsw_sp_rifs_fini(mlxsw_sp);
10040 __mlxsw_sp_router_fini(mlxsw_sp);
10041 cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw);
10042 mlxsw_sp_router_ll_op_ctx_fini(mlxsw_sp->router);
10043 mlxsw_sp_router_xm_fini(mlxsw_sp);
10044 mutex_destroy(&mlxsw_sp->router->lock);
10045 kfree(mlxsw_sp->router);