Merge tag 'nds32-for-linus-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / drivers / net / ethernet / mellanox / mlxsw / spectrum_router.c
1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the names of the copyright holders nor the names of its
18  *    contributors may be used to endorse or promote products derived from
19  *    this software without specific prior written permission.
20  *
21  * Alternatively, this software may be distributed under the terms of the
22  * GNU General Public License ("GPL") version 2 as published by the Free
23  * Software Foundation.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37
38 #include <linux/kernel.h>
39 #include <linux/types.h>
40 #include <linux/rhashtable.h>
41 #include <linux/bitops.h>
42 #include <linux/in6.h>
43 #include <linux/notifier.h>
44 #include <linux/inetdevice.h>
45 #include <linux/netdevice.h>
46 #include <linux/if_bridge.h>
47 #include <linux/socket.h>
48 #include <linux/route.h>
49 #include <linux/gcd.h>
50 #include <linux/random.h>
51 #include <net/netevent.h>
52 #include <net/neighbour.h>
53 #include <net/arp.h>
54 #include <net/ip_fib.h>
55 #include <net/ip6_fib.h>
56 #include <net/fib_rules.h>
57 #include <net/ip_tunnels.h>
58 #include <net/l3mdev.h>
59 #include <net/addrconf.h>
60 #include <net/ndisc.h>
61 #include <net/ipv6.h>
62 #include <net/fib_notifier.h>
63
64 #include "spectrum.h"
65 #include "core.h"
66 #include "reg.h"
67 #include "spectrum_cnt.h"
68 #include "spectrum_dpipe.h"
69 #include "spectrum_ipip.h"
70 #include "spectrum_mr.h"
71 #include "spectrum_mr_tcam.h"
72 #include "spectrum_router.h"
73 #include "spectrum_span.h"
74
75 struct mlxsw_sp_fib;
76 struct mlxsw_sp_vr;
77 struct mlxsw_sp_lpm_tree;
78 struct mlxsw_sp_rif_ops;
79
80 struct mlxsw_sp_router {
81         struct mlxsw_sp *mlxsw_sp;
82         struct mlxsw_sp_rif **rifs;
83         struct mlxsw_sp_vr *vrs;
84         struct rhashtable neigh_ht;
85         struct rhashtable nexthop_group_ht;
86         struct rhashtable nexthop_ht;
87         struct list_head nexthop_list;
88         struct {
89                 /* One tree for each protocol: IPv4 and IPv6 */
90                 struct mlxsw_sp_lpm_tree *proto_trees[2];
91                 struct mlxsw_sp_lpm_tree *trees;
92                 unsigned int tree_count;
93         } lpm;
94         struct {
95                 struct delayed_work dw;
96                 unsigned long interval; /* ms */
97         } neighs_update;
98         struct delayed_work nexthop_probe_dw;
99 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
100         struct list_head nexthop_neighs_list;
101         struct list_head ipip_list;
102         bool aborted;
103         struct notifier_block fib_nb;
104         struct notifier_block netevent_nb;
105         const struct mlxsw_sp_rif_ops **rif_ops_arr;
106         const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
107 };
108
109 struct mlxsw_sp_rif {
110         struct list_head nexthop_list;
111         struct list_head neigh_list;
112         struct net_device *dev;
113         struct mlxsw_sp_fid *fid;
114         unsigned char addr[ETH_ALEN];
115         int mtu;
116         u16 rif_index;
117         u16 vr_id;
118         const struct mlxsw_sp_rif_ops *ops;
119         struct mlxsw_sp *mlxsw_sp;
120
121         unsigned int counter_ingress;
122         bool counter_ingress_valid;
123         unsigned int counter_egress;
124         bool counter_egress_valid;
125 };
126
127 struct mlxsw_sp_rif_params {
128         struct net_device *dev;
129         union {
130                 u16 system_port;
131                 u16 lag_id;
132         };
133         u16 vid;
134         bool lag;
135 };
136
137 struct mlxsw_sp_rif_subport {
138         struct mlxsw_sp_rif common;
139         union {
140                 u16 system_port;
141                 u16 lag_id;
142         };
143         u16 vid;
144         bool lag;
145 };
146
147 struct mlxsw_sp_rif_ipip_lb {
148         struct mlxsw_sp_rif common;
149         struct mlxsw_sp_rif_ipip_lb_config lb_config;
150         u16 ul_vr_id; /* Reserved for Spectrum-2. */
151 };
152
153 struct mlxsw_sp_rif_params_ipip_lb {
154         struct mlxsw_sp_rif_params common;
155         struct mlxsw_sp_rif_ipip_lb_config lb_config;
156 };
157
158 struct mlxsw_sp_rif_ops {
159         enum mlxsw_sp_rif_type type;
160         size_t rif_size;
161
162         void (*setup)(struct mlxsw_sp_rif *rif,
163                       const struct mlxsw_sp_rif_params *params);
164         int (*configure)(struct mlxsw_sp_rif *rif);
165         void (*deconfigure)(struct mlxsw_sp_rif *rif);
166         struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif);
167 };
168
169 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
170 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
171                                   struct mlxsw_sp_lpm_tree *lpm_tree);
172 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
173                                      const struct mlxsw_sp_fib *fib,
174                                      u8 tree_id);
175 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
176                                        const struct mlxsw_sp_fib *fib);
177
178 static unsigned int *
179 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
180                            enum mlxsw_sp_rif_counter_dir dir)
181 {
182         switch (dir) {
183         case MLXSW_SP_RIF_COUNTER_EGRESS:
184                 return &rif->counter_egress;
185         case MLXSW_SP_RIF_COUNTER_INGRESS:
186                 return &rif->counter_ingress;
187         }
188         return NULL;
189 }
190
191 static bool
192 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
193                                enum mlxsw_sp_rif_counter_dir dir)
194 {
195         switch (dir) {
196         case MLXSW_SP_RIF_COUNTER_EGRESS:
197                 return rif->counter_egress_valid;
198         case MLXSW_SP_RIF_COUNTER_INGRESS:
199                 return rif->counter_ingress_valid;
200         }
201         return false;
202 }
203
204 static void
205 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
206                                enum mlxsw_sp_rif_counter_dir dir,
207                                bool valid)
208 {
209         switch (dir) {
210         case MLXSW_SP_RIF_COUNTER_EGRESS:
211                 rif->counter_egress_valid = valid;
212                 break;
213         case MLXSW_SP_RIF_COUNTER_INGRESS:
214                 rif->counter_ingress_valid = valid;
215                 break;
216         }
217 }
218
219 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
220                                      unsigned int counter_index, bool enable,
221                                      enum mlxsw_sp_rif_counter_dir dir)
222 {
223         char ritr_pl[MLXSW_REG_RITR_LEN];
224         bool is_egress = false;
225         int err;
226
227         if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
228                 is_egress = true;
229         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
230         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
231         if (err)
232                 return err;
233
234         mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
235                                     is_egress);
236         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
237 }
238
239 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
240                                    struct mlxsw_sp_rif *rif,
241                                    enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
242 {
243         char ricnt_pl[MLXSW_REG_RICNT_LEN];
244         unsigned int *p_counter_index;
245         bool valid;
246         int err;
247
248         valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
249         if (!valid)
250                 return -EINVAL;
251
252         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
253         if (!p_counter_index)
254                 return -EINVAL;
255         mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
256                              MLXSW_REG_RICNT_OPCODE_NOP);
257         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
258         if (err)
259                 return err;
260         *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
261         return 0;
262 }
263
264 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
265                                       unsigned int counter_index)
266 {
267         char ricnt_pl[MLXSW_REG_RICNT_LEN];
268
269         mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
270                              MLXSW_REG_RICNT_OPCODE_CLEAR);
271         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
272 }
273
274 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
275                                struct mlxsw_sp_rif *rif,
276                                enum mlxsw_sp_rif_counter_dir dir)
277 {
278         unsigned int *p_counter_index;
279         int err;
280
281         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
282         if (!p_counter_index)
283                 return -EINVAL;
284         err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
285                                      p_counter_index);
286         if (err)
287                 return err;
288
289         err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
290         if (err)
291                 goto err_counter_clear;
292
293         err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
294                                         *p_counter_index, true, dir);
295         if (err)
296                 goto err_counter_edit;
297         mlxsw_sp_rif_counter_valid_set(rif, dir, true);
298         return 0;
299
300 err_counter_edit:
301 err_counter_clear:
302         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
303                               *p_counter_index);
304         return err;
305 }
306
307 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
308                                struct mlxsw_sp_rif *rif,
309                                enum mlxsw_sp_rif_counter_dir dir)
310 {
311         unsigned int *p_counter_index;
312
313         if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
314                 return;
315
316         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
317         if (WARN_ON(!p_counter_index))
318                 return;
319         mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
320                                   *p_counter_index, false, dir);
321         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
322                               *p_counter_index);
323         mlxsw_sp_rif_counter_valid_set(rif, dir, false);
324 }
325
326 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
327 {
328         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
329         struct devlink *devlink;
330
331         devlink = priv_to_devlink(mlxsw_sp->core);
332         if (!devlink_dpipe_table_counter_enabled(devlink,
333                                                  MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
334                 return;
335         mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
336 }
337
338 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
339 {
340         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
341
342         mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
343 }
344
345 static struct mlxsw_sp_rif *
346 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
347                          const struct net_device *dev);
348
349 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
350
351 struct mlxsw_sp_prefix_usage {
352         DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
353 };
354
355 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
356         for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
357
358 static bool
359 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
360                          struct mlxsw_sp_prefix_usage *prefix_usage2)
361 {
362         return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
363 }
364
365 static void
366 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
367                           struct mlxsw_sp_prefix_usage *prefix_usage2)
368 {
369         memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
370 }
371
372 static void
373 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
374                           unsigned char prefix_len)
375 {
376         set_bit(prefix_len, prefix_usage->b);
377 }
378
379 static void
380 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
381                             unsigned char prefix_len)
382 {
383         clear_bit(prefix_len, prefix_usage->b);
384 }
385
386 struct mlxsw_sp_fib_key {
387         unsigned char addr[sizeof(struct in6_addr)];
388         unsigned char prefix_len;
389 };
390
391 enum mlxsw_sp_fib_entry_type {
392         MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
393         MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
394         MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
395
396         /* This is a special case of local delivery, where a packet should be
397          * decapsulated on reception. Note that there is no corresponding ENCAP,
398          * because that's a type of next hop, not of FIB entry. (There can be
399          * several next hops in a REMOTE entry, and some of them may be
400          * encapsulating entries.)
401          */
402         MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
403 };
404
405 struct mlxsw_sp_nexthop_group;
406
407 struct mlxsw_sp_fib_node {
408         struct list_head entry_list;
409         struct list_head list;
410         struct rhash_head ht_node;
411         struct mlxsw_sp_fib *fib;
412         struct mlxsw_sp_fib_key key;
413 };
414
415 struct mlxsw_sp_fib_entry_decap {
416         struct mlxsw_sp_ipip_entry *ipip_entry;
417         u32 tunnel_index;
418 };
419
420 struct mlxsw_sp_fib_entry {
421         struct list_head list;
422         struct mlxsw_sp_fib_node *fib_node;
423         enum mlxsw_sp_fib_entry_type type;
424         struct list_head nexthop_group_node;
425         struct mlxsw_sp_nexthop_group *nh_group;
426         struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
427 };
428
429 struct mlxsw_sp_fib4_entry {
430         struct mlxsw_sp_fib_entry common;
431         u32 tb_id;
432         u32 prio;
433         u8 tos;
434         u8 type;
435 };
436
437 struct mlxsw_sp_fib6_entry {
438         struct mlxsw_sp_fib_entry common;
439         struct list_head rt6_list;
440         unsigned int nrt6;
441 };
442
443 struct mlxsw_sp_rt6 {
444         struct list_head list;
445         struct fib6_info *rt;
446 };
447
448 struct mlxsw_sp_lpm_tree {
449         u8 id; /* tree ID */
450         unsigned int ref_count;
451         enum mlxsw_sp_l3proto proto;
452         unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
453         struct mlxsw_sp_prefix_usage prefix_usage;
454 };
455
456 struct mlxsw_sp_fib {
457         struct rhashtable ht;
458         struct list_head node_list;
459         struct mlxsw_sp_vr *vr;
460         struct mlxsw_sp_lpm_tree *lpm_tree;
461         enum mlxsw_sp_l3proto proto;
462 };
463
464 struct mlxsw_sp_vr {
465         u16 id; /* virtual router ID */
466         u32 tb_id; /* kernel fib table id */
467         unsigned int rif_count;
468         struct mlxsw_sp_fib *fib4;
469         struct mlxsw_sp_fib *fib6;
470         struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
471 };
472
473 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
474
475 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
476                                                 struct mlxsw_sp_vr *vr,
477                                                 enum mlxsw_sp_l3proto proto)
478 {
479         struct mlxsw_sp_lpm_tree *lpm_tree;
480         struct mlxsw_sp_fib *fib;
481         int err;
482
483         lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
484         fib = kzalloc(sizeof(*fib), GFP_KERNEL);
485         if (!fib)
486                 return ERR_PTR(-ENOMEM);
487         err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
488         if (err)
489                 goto err_rhashtable_init;
490         INIT_LIST_HEAD(&fib->node_list);
491         fib->proto = proto;
492         fib->vr = vr;
493         fib->lpm_tree = lpm_tree;
494         mlxsw_sp_lpm_tree_hold(lpm_tree);
495         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
496         if (err)
497                 goto err_lpm_tree_bind;
498         return fib;
499
500 err_lpm_tree_bind:
501         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
502 err_rhashtable_init:
503         kfree(fib);
504         return ERR_PTR(err);
505 }
506
507 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
508                                  struct mlxsw_sp_fib *fib)
509 {
510         mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
511         mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
512         WARN_ON(!list_empty(&fib->node_list));
513         rhashtable_destroy(&fib->ht);
514         kfree(fib);
515 }
516
517 static struct mlxsw_sp_lpm_tree *
518 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
519 {
520         static struct mlxsw_sp_lpm_tree *lpm_tree;
521         int i;
522
523         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
524                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
525                 if (lpm_tree->ref_count == 0)
526                         return lpm_tree;
527         }
528         return NULL;
529 }
530
531 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
532                                    struct mlxsw_sp_lpm_tree *lpm_tree)
533 {
534         char ralta_pl[MLXSW_REG_RALTA_LEN];
535
536         mlxsw_reg_ralta_pack(ralta_pl, true,
537                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
538                              lpm_tree->id);
539         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
540 }
541
542 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
543                                    struct mlxsw_sp_lpm_tree *lpm_tree)
544 {
545         char ralta_pl[MLXSW_REG_RALTA_LEN];
546
547         mlxsw_reg_ralta_pack(ralta_pl, false,
548                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
549                              lpm_tree->id);
550         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
551 }
552
553 static int
554 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
555                                   struct mlxsw_sp_prefix_usage *prefix_usage,
556                                   struct mlxsw_sp_lpm_tree *lpm_tree)
557 {
558         char ralst_pl[MLXSW_REG_RALST_LEN];
559         u8 root_bin = 0;
560         u8 prefix;
561         u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
562
563         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
564                 root_bin = prefix;
565
566         mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
567         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
568                 if (prefix == 0)
569                         continue;
570                 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
571                                          MLXSW_REG_RALST_BIN_NO_CHILD);
572                 last_prefix = prefix;
573         }
574         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
575 }
576
577 static struct mlxsw_sp_lpm_tree *
578 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
579                          struct mlxsw_sp_prefix_usage *prefix_usage,
580                          enum mlxsw_sp_l3proto proto)
581 {
582         struct mlxsw_sp_lpm_tree *lpm_tree;
583         int err;
584
585         lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
586         if (!lpm_tree)
587                 return ERR_PTR(-EBUSY);
588         lpm_tree->proto = proto;
589         err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
590         if (err)
591                 return ERR_PTR(err);
592
593         err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
594                                                 lpm_tree);
595         if (err)
596                 goto err_left_struct_set;
597         memcpy(&lpm_tree->prefix_usage, prefix_usage,
598                sizeof(lpm_tree->prefix_usage));
599         memset(&lpm_tree->prefix_ref_count, 0,
600                sizeof(lpm_tree->prefix_ref_count));
601         lpm_tree->ref_count = 1;
602         return lpm_tree;
603
604 err_left_struct_set:
605         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
606         return ERR_PTR(err);
607 }
608
609 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
610                                       struct mlxsw_sp_lpm_tree *lpm_tree)
611 {
612         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
613 }
614
615 static struct mlxsw_sp_lpm_tree *
616 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
617                       struct mlxsw_sp_prefix_usage *prefix_usage,
618                       enum mlxsw_sp_l3proto proto)
619 {
620         struct mlxsw_sp_lpm_tree *lpm_tree;
621         int i;
622
623         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
624                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
625                 if (lpm_tree->ref_count != 0 &&
626                     lpm_tree->proto == proto &&
627                     mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
628                                              prefix_usage)) {
629                         mlxsw_sp_lpm_tree_hold(lpm_tree);
630                         return lpm_tree;
631                 }
632         }
633         return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
634 }
635
636 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
637 {
638         lpm_tree->ref_count++;
639 }
640
641 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
642                                   struct mlxsw_sp_lpm_tree *lpm_tree)
643 {
644         if (--lpm_tree->ref_count == 0)
645                 mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
646 }
647
648 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
649
650 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
651 {
652         struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
653         struct mlxsw_sp_lpm_tree *lpm_tree;
654         u64 max_trees;
655         int err, i;
656
657         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
658                 return -EIO;
659
660         max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
661         mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
662         mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
663                                              sizeof(struct mlxsw_sp_lpm_tree),
664                                              GFP_KERNEL);
665         if (!mlxsw_sp->router->lpm.trees)
666                 return -ENOMEM;
667
668         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
669                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
670                 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
671         }
672
673         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
674                                          MLXSW_SP_L3_PROTO_IPV4);
675         if (IS_ERR(lpm_tree)) {
676                 err = PTR_ERR(lpm_tree);
677                 goto err_ipv4_tree_get;
678         }
679         mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
680
681         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
682                                          MLXSW_SP_L3_PROTO_IPV6);
683         if (IS_ERR(lpm_tree)) {
684                 err = PTR_ERR(lpm_tree);
685                 goto err_ipv6_tree_get;
686         }
687         mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
688
689         return 0;
690
691 err_ipv6_tree_get:
692         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
693         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
694 err_ipv4_tree_get:
695         kfree(mlxsw_sp->router->lpm.trees);
696         return err;
697 }
698
699 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
700 {
701         struct mlxsw_sp_lpm_tree *lpm_tree;
702
703         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
704         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
705
706         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
707         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
708
709         kfree(mlxsw_sp->router->lpm.trees);
710 }
711
712 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
713 {
714         return !!vr->fib4 || !!vr->fib6 ||
715                !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
716                !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
717 }
718
719 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
720 {
721         struct mlxsw_sp_vr *vr;
722         int i;
723
724         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
725                 vr = &mlxsw_sp->router->vrs[i];
726                 if (!mlxsw_sp_vr_is_used(vr))
727                         return vr;
728         }
729         return NULL;
730 }
731
732 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
733                                      const struct mlxsw_sp_fib *fib, u8 tree_id)
734 {
735         char raltb_pl[MLXSW_REG_RALTB_LEN];
736
737         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
738                              (enum mlxsw_reg_ralxx_protocol) fib->proto,
739                              tree_id);
740         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
741 }
742
743 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
744                                        const struct mlxsw_sp_fib *fib)
745 {
746         char raltb_pl[MLXSW_REG_RALTB_LEN];
747
748         /* Bind to tree 0 which is default */
749         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
750                              (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
751         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
752 }
753
754 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
755 {
756         /* For our purpose, squash main, default and local tables into one */
757         if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
758                 tb_id = RT_TABLE_MAIN;
759         return tb_id;
760 }
761
762 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
763                                             u32 tb_id)
764 {
765         struct mlxsw_sp_vr *vr;
766         int i;
767
768         tb_id = mlxsw_sp_fix_tb_id(tb_id);
769
770         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
771                 vr = &mlxsw_sp->router->vrs[i];
772                 if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
773                         return vr;
774         }
775         return NULL;
776 }
777
778 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
779                                             enum mlxsw_sp_l3proto proto)
780 {
781         switch (proto) {
782         case MLXSW_SP_L3_PROTO_IPV4:
783                 return vr->fib4;
784         case MLXSW_SP_L3_PROTO_IPV6:
785                 return vr->fib6;
786         }
787         return NULL;
788 }
789
790 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
791                                               u32 tb_id,
792                                               struct netlink_ext_ack *extack)
793 {
794         struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
795         struct mlxsw_sp_fib *fib4;
796         struct mlxsw_sp_fib *fib6;
797         struct mlxsw_sp_vr *vr;
798         int err;
799
800         vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
801         if (!vr) {
802                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
803                 return ERR_PTR(-EBUSY);
804         }
805         fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
806         if (IS_ERR(fib4))
807                 return ERR_CAST(fib4);
808         fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
809         if (IS_ERR(fib6)) {
810                 err = PTR_ERR(fib6);
811                 goto err_fib6_create;
812         }
813         mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
814                                              MLXSW_SP_L3_PROTO_IPV4);
815         if (IS_ERR(mr4_table)) {
816                 err = PTR_ERR(mr4_table);
817                 goto err_mr4_table_create;
818         }
819         mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
820                                              MLXSW_SP_L3_PROTO_IPV6);
821         if (IS_ERR(mr6_table)) {
822                 err = PTR_ERR(mr6_table);
823                 goto err_mr6_table_create;
824         }
825
826         vr->fib4 = fib4;
827         vr->fib6 = fib6;
828         vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
829         vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
830         vr->tb_id = tb_id;
831         return vr;
832
833 err_mr6_table_create:
834         mlxsw_sp_mr_table_destroy(mr4_table);
835 err_mr4_table_create:
836         mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
837 err_fib6_create:
838         mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
839         return ERR_PTR(err);
840 }
841
842 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
843                                 struct mlxsw_sp_vr *vr)
844 {
845         mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
846         vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
847         mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
848         vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
849         mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
850         vr->fib6 = NULL;
851         mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
852         vr->fib4 = NULL;
853 }
854
855 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
856                                            struct netlink_ext_ack *extack)
857 {
858         struct mlxsw_sp_vr *vr;
859
860         tb_id = mlxsw_sp_fix_tb_id(tb_id);
861         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
862         if (!vr)
863                 vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
864         return vr;
865 }
866
867 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
868 {
869         if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
870             list_empty(&vr->fib6->node_list) &&
871             mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
872             mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
873                 mlxsw_sp_vr_destroy(mlxsw_sp, vr);
874 }
875
876 static bool
877 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
878                                     enum mlxsw_sp_l3proto proto, u8 tree_id)
879 {
880         struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
881
882         if (!mlxsw_sp_vr_is_used(vr))
883                 return false;
884         if (fib->lpm_tree->id == tree_id)
885                 return true;
886         return false;
887 }
888
889 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
890                                         struct mlxsw_sp_fib *fib,
891                                         struct mlxsw_sp_lpm_tree *new_tree)
892 {
893         struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
894         int err;
895
896         fib->lpm_tree = new_tree;
897         mlxsw_sp_lpm_tree_hold(new_tree);
898         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
899         if (err)
900                 goto err_tree_bind;
901         mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
902         return 0;
903
904 err_tree_bind:
905         mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
906         fib->lpm_tree = old_tree;
907         return err;
908 }
909
910 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
911                                          struct mlxsw_sp_fib *fib,
912                                          struct mlxsw_sp_lpm_tree *new_tree)
913 {
914         enum mlxsw_sp_l3proto proto = fib->proto;
915         struct mlxsw_sp_lpm_tree *old_tree;
916         u8 old_id, new_id = new_tree->id;
917         struct mlxsw_sp_vr *vr;
918         int i, err;
919
920         old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
921         old_id = old_tree->id;
922
923         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
924                 vr = &mlxsw_sp->router->vrs[i];
925                 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
926                         continue;
927                 err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
928                                                    mlxsw_sp_vr_fib(vr, proto),
929                                                    new_tree);
930                 if (err)
931                         goto err_tree_replace;
932         }
933
934         memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
935                sizeof(new_tree->prefix_ref_count));
936         mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
937         mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
938
939         return 0;
940
941 err_tree_replace:
942         for (i--; i >= 0; i--) {
943                 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
944                         continue;
945                 mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
946                                              mlxsw_sp_vr_fib(vr, proto),
947                                              old_tree);
948         }
949         return err;
950 }
951
952 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
953 {
954         struct mlxsw_sp_vr *vr;
955         u64 max_vrs;
956         int i;
957
958         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
959                 return -EIO;
960
961         max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
962         mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
963                                         GFP_KERNEL);
964         if (!mlxsw_sp->router->vrs)
965                 return -ENOMEM;
966
967         for (i = 0; i < max_vrs; i++) {
968                 vr = &mlxsw_sp->router->vrs[i];
969                 vr->id = i;
970         }
971
972         return 0;
973 }
974
975 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
976
977 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
978 {
979         /* At this stage we're guaranteed not to have new incoming
980          * FIB notifications and the work queue is free from FIBs
981          * sitting on top of mlxsw netdevs. However, we can still
982          * have other FIBs queued. Flush the queue before flushing
983          * the device's tables. No need for locks, as we're the only
984          * writer.
985          */
986         mlxsw_core_flush_owq();
987         mlxsw_sp_router_fib_flush(mlxsw_sp);
988         kfree(mlxsw_sp->router->vrs);
989 }
990
991 static struct net_device *
992 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
993 {
994         struct ip_tunnel *tun = netdev_priv(ol_dev);
995         struct net *net = dev_net(ol_dev);
996
997         return __dev_get_by_index(net, tun->parms.link);
998 }
999
1000 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
1001 {
1002         struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1003
1004         if (d)
1005                 return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
1006         else
1007                 return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
1008 }
1009
1010 static struct mlxsw_sp_rif *
1011 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
1012                     const struct mlxsw_sp_rif_params *params,
1013                     struct netlink_ext_ack *extack);
1014
1015 static struct mlxsw_sp_rif_ipip_lb *
1016 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1017                                 enum mlxsw_sp_ipip_type ipipt,
1018                                 struct net_device *ol_dev,
1019                                 struct netlink_ext_ack *extack)
1020 {
1021         struct mlxsw_sp_rif_params_ipip_lb lb_params;
1022         const struct mlxsw_sp_ipip_ops *ipip_ops;
1023         struct mlxsw_sp_rif *rif;
1024
1025         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1026         lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1027                 .common.dev = ol_dev,
1028                 .common.lag = false,
1029                 .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1030         };
1031
1032         rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1033         if (IS_ERR(rif))
1034                 return ERR_CAST(rif);
1035         return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1036 }
1037
1038 static struct mlxsw_sp_ipip_entry *
1039 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1040                           enum mlxsw_sp_ipip_type ipipt,
1041                           struct net_device *ol_dev)
1042 {
1043         const struct mlxsw_sp_ipip_ops *ipip_ops;
1044         struct mlxsw_sp_ipip_entry *ipip_entry;
1045         struct mlxsw_sp_ipip_entry *ret = NULL;
1046
1047         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1048         ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1049         if (!ipip_entry)
1050                 return ERR_PTR(-ENOMEM);
1051
1052         ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1053                                                             ol_dev, NULL);
1054         if (IS_ERR(ipip_entry->ol_lb)) {
1055                 ret = ERR_CAST(ipip_entry->ol_lb);
1056                 goto err_ol_ipip_lb_create;
1057         }
1058
1059         ipip_entry->ipipt = ipipt;
1060         ipip_entry->ol_dev = ol_dev;
1061
1062         switch (ipip_ops->ul_proto) {
1063         case MLXSW_SP_L3_PROTO_IPV4:
1064                 ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1065                 break;
1066         case MLXSW_SP_L3_PROTO_IPV6:
1067                 WARN_ON(1);
1068                 break;
1069         }
1070
1071         return ipip_entry;
1072
1073 err_ol_ipip_lb_create:
1074         kfree(ipip_entry);
1075         return ret;
1076 }
1077
1078 static void
1079 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1080 {
1081         mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1082         kfree(ipip_entry);
1083 }
1084
1085 static bool
1086 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1087                                   const enum mlxsw_sp_l3proto ul_proto,
1088                                   union mlxsw_sp_l3addr saddr,
1089                                   u32 ul_tb_id,
1090                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1091 {
1092         u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1093         enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1094         union mlxsw_sp_l3addr tun_saddr;
1095
1096         if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1097                 return false;
1098
1099         tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1100         return tun_ul_tb_id == ul_tb_id &&
1101                mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1102 }
1103
1104 static int
1105 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1106                               struct mlxsw_sp_fib_entry *fib_entry,
1107                               struct mlxsw_sp_ipip_entry *ipip_entry)
1108 {
1109         u32 tunnel_index;
1110         int err;
1111
1112         err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &tunnel_index);
1113         if (err)
1114                 return err;
1115
1116         ipip_entry->decap_fib_entry = fib_entry;
1117         fib_entry->decap.ipip_entry = ipip_entry;
1118         fib_entry->decap.tunnel_index = tunnel_index;
1119         return 0;
1120 }
1121
1122 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1123                                           struct mlxsw_sp_fib_entry *fib_entry)
1124 {
1125         /* Unlink this node from the IPIP entry that it's the decap entry of. */
1126         fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1127         fib_entry->decap.ipip_entry = NULL;
1128         mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index);
1129 }
1130
1131 static struct mlxsw_sp_fib_node *
1132 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1133                          size_t addr_len, unsigned char prefix_len);
1134 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1135                                      struct mlxsw_sp_fib_entry *fib_entry);
1136
1137 static void
1138 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1139                                  struct mlxsw_sp_ipip_entry *ipip_entry)
1140 {
1141         struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1142
1143         mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1144         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1145
1146         mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1147 }
1148
1149 static void
1150 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1151                                   struct mlxsw_sp_ipip_entry *ipip_entry,
1152                                   struct mlxsw_sp_fib_entry *decap_fib_entry)
1153 {
1154         if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1155                                           ipip_entry))
1156                 return;
1157         decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1158
1159         if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1160                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1161 }
1162
1163 /* Given an IPIP entry, find the corresponding decap route. */
1164 static struct mlxsw_sp_fib_entry *
1165 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1166                                struct mlxsw_sp_ipip_entry *ipip_entry)
1167 {
1168         static struct mlxsw_sp_fib_node *fib_node;
1169         const struct mlxsw_sp_ipip_ops *ipip_ops;
1170         struct mlxsw_sp_fib_entry *fib_entry;
1171         unsigned char saddr_prefix_len;
1172         union mlxsw_sp_l3addr saddr;
1173         struct mlxsw_sp_fib *ul_fib;
1174         struct mlxsw_sp_vr *ul_vr;
1175         const void *saddrp;
1176         size_t saddr_len;
1177         u32 ul_tb_id;
1178         u32 saddr4;
1179
1180         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1181
1182         ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1183         ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1184         if (!ul_vr)
1185                 return NULL;
1186
1187         ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1188         saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1189                                            ipip_entry->ol_dev);
1190
1191         switch (ipip_ops->ul_proto) {
1192         case MLXSW_SP_L3_PROTO_IPV4:
1193                 saddr4 = be32_to_cpu(saddr.addr4);
1194                 saddrp = &saddr4;
1195                 saddr_len = 4;
1196                 saddr_prefix_len = 32;
1197                 break;
1198         case MLXSW_SP_L3_PROTO_IPV6:
1199                 WARN_ON(1);
1200                 return NULL;
1201         }
1202
1203         fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1204                                             saddr_prefix_len);
1205         if (!fib_node || list_empty(&fib_node->entry_list))
1206                 return NULL;
1207
1208         fib_entry = list_first_entry(&fib_node->entry_list,
1209                                      struct mlxsw_sp_fib_entry, list);
1210         if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1211                 return NULL;
1212
1213         return fib_entry;
1214 }
1215
1216 static struct mlxsw_sp_ipip_entry *
1217 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1218                            enum mlxsw_sp_ipip_type ipipt,
1219                            struct net_device *ol_dev)
1220 {
1221         struct mlxsw_sp_ipip_entry *ipip_entry;
1222
1223         ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1224         if (IS_ERR(ipip_entry))
1225                 return ipip_entry;
1226
1227         list_add_tail(&ipip_entry->ipip_list_node,
1228                       &mlxsw_sp->router->ipip_list);
1229
1230         return ipip_entry;
1231 }
1232
1233 static void
1234 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1235                             struct mlxsw_sp_ipip_entry *ipip_entry)
1236 {
1237         list_del(&ipip_entry->ipip_list_node);
1238         mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1239 }
1240
1241 static bool
1242 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1243                                   const struct net_device *ul_dev,
1244                                   enum mlxsw_sp_l3proto ul_proto,
1245                                   union mlxsw_sp_l3addr ul_dip,
1246                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1247 {
1248         u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1249         enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1250         struct net_device *ipip_ul_dev;
1251
1252         if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1253                 return false;
1254
1255         ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1256         return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1257                                                  ul_tb_id, ipip_entry) &&
1258                (!ipip_ul_dev || ipip_ul_dev == ul_dev);
1259 }
1260
1261 /* Given decap parameters, find the corresponding IPIP entry. */
1262 static struct mlxsw_sp_ipip_entry *
1263 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1264                                   const struct net_device *ul_dev,
1265                                   enum mlxsw_sp_l3proto ul_proto,
1266                                   union mlxsw_sp_l3addr ul_dip)
1267 {
1268         struct mlxsw_sp_ipip_entry *ipip_entry;
1269
1270         list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1271                             ipip_list_node)
1272                 if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1273                                                       ul_proto, ul_dip,
1274                                                       ipip_entry))
1275                         return ipip_entry;
1276
1277         return NULL;
1278 }
1279
1280 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1281                                       const struct net_device *dev,
1282                                       enum mlxsw_sp_ipip_type *p_type)
1283 {
1284         struct mlxsw_sp_router *router = mlxsw_sp->router;
1285         const struct mlxsw_sp_ipip_ops *ipip_ops;
1286         enum mlxsw_sp_ipip_type ipipt;
1287
1288         for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1289                 ipip_ops = router->ipip_ops_arr[ipipt];
1290                 if (dev->type == ipip_ops->dev_type) {
1291                         if (p_type)
1292                                 *p_type = ipipt;
1293                         return true;
1294                 }
1295         }
1296         return false;
1297 }
1298
1299 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1300                                 const struct net_device *dev)
1301 {
1302         return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1303 }
1304
1305 static struct mlxsw_sp_ipip_entry *
1306 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1307                                    const struct net_device *ol_dev)
1308 {
1309         struct mlxsw_sp_ipip_entry *ipip_entry;
1310
1311         list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1312                             ipip_list_node)
1313                 if (ipip_entry->ol_dev == ol_dev)
1314                         return ipip_entry;
1315
1316         return NULL;
1317 }
1318
1319 static struct mlxsw_sp_ipip_entry *
1320 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1321                                    const struct net_device *ul_dev,
1322                                    struct mlxsw_sp_ipip_entry *start)
1323 {
1324         struct mlxsw_sp_ipip_entry *ipip_entry;
1325
1326         ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1327                                         ipip_list_node);
1328         list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1329                                      ipip_list_node) {
1330                 struct net_device *ipip_ul_dev =
1331                         __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1332
1333                 if (ipip_ul_dev == ul_dev)
1334                         return ipip_entry;
1335         }
1336
1337         return NULL;
1338 }
1339
1340 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1341                                 const struct net_device *dev)
1342 {
1343         return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1344 }
1345
1346 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1347                                                 const struct net_device *ol_dev,
1348                                                 enum mlxsw_sp_ipip_type ipipt)
1349 {
1350         const struct mlxsw_sp_ipip_ops *ops
1351                 = mlxsw_sp->router->ipip_ops_arr[ipipt];
1352
1353         /* For deciding whether decap should be offloaded, we don't care about
1354          * overlay protocol, so ask whether either one is supported.
1355          */
1356         return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1357                ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1358 }
1359
1360 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1361                                                 struct net_device *ol_dev)
1362 {
1363         struct mlxsw_sp_ipip_entry *ipip_entry;
1364         enum mlxsw_sp_l3proto ul_proto;
1365         enum mlxsw_sp_ipip_type ipipt;
1366         union mlxsw_sp_l3addr saddr;
1367         u32 ul_tb_id;
1368
1369         mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1370         if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1371                 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1372                 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1373                 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1374                 if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1375                                                           saddr, ul_tb_id,
1376                                                           NULL)) {
1377                         ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1378                                                                 ol_dev);
1379                         if (IS_ERR(ipip_entry))
1380                                 return PTR_ERR(ipip_entry);
1381                 }
1382         }
1383
1384         return 0;
1385 }
1386
1387 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1388                                                    struct net_device *ol_dev)
1389 {
1390         struct mlxsw_sp_ipip_entry *ipip_entry;
1391
1392         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1393         if (ipip_entry)
1394                 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1395 }
1396
1397 static void
1398 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1399                                 struct mlxsw_sp_ipip_entry *ipip_entry)
1400 {
1401         struct mlxsw_sp_fib_entry *decap_fib_entry;
1402
1403         decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1404         if (decap_fib_entry)
1405                 mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1406                                                   decap_fib_entry);
1407 }
1408
1409 static int
1410 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
1411                         struct mlxsw_sp_vr *ul_vr, bool enable)
1412 {
1413         struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1414         struct mlxsw_sp_rif *rif = &lb_rif->common;
1415         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1416         char ritr_pl[MLXSW_REG_RITR_LEN];
1417         u32 saddr4;
1418
1419         switch (lb_cf.ul_protocol) {
1420         case MLXSW_SP_L3_PROTO_IPV4:
1421                 saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1422                 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1423                                     rif->rif_index, rif->vr_id, rif->dev->mtu);
1424                 mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1425                             MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1426                             ul_vr->id, saddr4, lb_cf.okey);
1427                 break;
1428
1429         case MLXSW_SP_L3_PROTO_IPV6:
1430                 return -EAFNOSUPPORT;
1431         }
1432
1433         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1434 }
1435
1436 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1437                                                  struct net_device *ol_dev)
1438 {
1439         struct mlxsw_sp_ipip_entry *ipip_entry;
1440         struct mlxsw_sp_rif_ipip_lb *lb_rif;
1441         struct mlxsw_sp_vr *ul_vr;
1442         int err = 0;
1443
1444         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1445         if (ipip_entry) {
1446                 lb_rif = ipip_entry->ol_lb;
1447                 ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
1448                 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
1449                 if (err)
1450                         goto out;
1451                 lb_rif->common.mtu = ol_dev->mtu;
1452         }
1453
1454 out:
1455         return err;
1456 }
1457
1458 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1459                                                 struct net_device *ol_dev)
1460 {
1461         struct mlxsw_sp_ipip_entry *ipip_entry;
1462
1463         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1464         if (ipip_entry)
1465                 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1466 }
1467
1468 static void
1469 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1470                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1471 {
1472         if (ipip_entry->decap_fib_entry)
1473                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1474 }
1475
1476 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1477                                                   struct net_device *ol_dev)
1478 {
1479         struct mlxsw_sp_ipip_entry *ipip_entry;
1480
1481         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1482         if (ipip_entry)
1483                 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1484 }
1485
1486 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1487                                          struct mlxsw_sp_rif *old_rif,
1488                                          struct mlxsw_sp_rif *new_rif);
1489 static int
1490 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1491                                  struct mlxsw_sp_ipip_entry *ipip_entry,
1492                                  bool keep_encap,
1493                                  struct netlink_ext_ack *extack)
1494 {
1495         struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1496         struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1497
1498         new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1499                                                      ipip_entry->ipipt,
1500                                                      ipip_entry->ol_dev,
1501                                                      extack);
1502         if (IS_ERR(new_lb_rif))
1503                 return PTR_ERR(new_lb_rif);
1504         ipip_entry->ol_lb = new_lb_rif;
1505
1506         if (keep_encap)
1507                 mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1508                                              &new_lb_rif->common);
1509
1510         mlxsw_sp_rif_destroy(&old_lb_rif->common);
1511
1512         return 0;
1513 }
1514
1515 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1516                                         struct mlxsw_sp_rif *rif);
1517
1518 /**
1519  * Update the offload related to an IPIP entry. This always updates decap, and
1520  * in addition to that it also:
1521  * @recreate_loopback: recreates the associated loopback RIF
1522  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1523  *              relevant when recreate_loopback is true.
1524  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1525  *                   is only relevant when recreate_loopback is false.
1526  */
1527 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1528                                         struct mlxsw_sp_ipip_entry *ipip_entry,
1529                                         bool recreate_loopback,
1530                                         bool keep_encap,
1531                                         bool update_nexthops,
1532                                         struct netlink_ext_ack *extack)
1533 {
1534         int err;
1535
1536         /* RIFs can't be edited, so to update loopback, we need to destroy and
1537          * recreate it. That creates a window of opportunity where RALUE and
1538          * RATR registers end up referencing a RIF that's already gone. RATRs
1539          * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1540          * of RALUE, demote the decap route back.
1541          */
1542         if (ipip_entry->decap_fib_entry)
1543                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1544
1545         if (recreate_loopback) {
1546                 err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1547                                                        keep_encap, extack);
1548                 if (err)
1549                         return err;
1550         } else if (update_nexthops) {
1551                 mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1552                                             &ipip_entry->ol_lb->common);
1553         }
1554
1555         if (ipip_entry->ol_dev->flags & IFF_UP)
1556                 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1557
1558         return 0;
1559 }
1560
1561 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1562                                                 struct net_device *ol_dev,
1563                                                 struct netlink_ext_ack *extack)
1564 {
1565         struct mlxsw_sp_ipip_entry *ipip_entry =
1566                 mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1567         enum mlxsw_sp_l3proto ul_proto;
1568         union mlxsw_sp_l3addr saddr;
1569         u32 ul_tb_id;
1570
1571         if (!ipip_entry)
1572                 return 0;
1573
1574         /* For flat configuration cases, moving overlay to a different VRF might
1575          * cause local address conflict, and the conflicting tunnels need to be
1576          * demoted.
1577          */
1578         ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1579         ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1580         saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1581         if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1582                                                  saddr, ul_tb_id,
1583                                                  ipip_entry)) {
1584                 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1585                 return 0;
1586         }
1587
1588         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1589                                                    true, false, false, extack);
1590 }
1591
1592 static int
1593 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1594                                      struct mlxsw_sp_ipip_entry *ipip_entry,
1595                                      struct net_device *ul_dev,
1596                                      struct netlink_ext_ack *extack)
1597 {
1598         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1599                                                    true, true, false, extack);
1600 }
1601
1602 static int
1603 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1604                                     struct mlxsw_sp_ipip_entry *ipip_entry,
1605                                     struct net_device *ul_dev)
1606 {
1607         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1608                                                    false, false, true, NULL);
1609 }
1610
1611 static int
1612 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1613                                       struct mlxsw_sp_ipip_entry *ipip_entry,
1614                                       struct net_device *ul_dev)
1615 {
1616         /* A down underlay device causes encapsulated packets to not be
1617          * forwarded, but decap still works. So refresh next hops without
1618          * touching anything else.
1619          */
1620         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1621                                                    false, false, true, NULL);
1622 }
1623
1624 static int
1625 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1626                                         struct net_device *ol_dev,
1627                                         struct netlink_ext_ack *extack)
1628 {
1629         const struct mlxsw_sp_ipip_ops *ipip_ops;
1630         struct mlxsw_sp_ipip_entry *ipip_entry;
1631         int err;
1632
1633         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1634         if (!ipip_entry)
1635                 /* A change might make a tunnel eligible for offloading, but
1636                  * that is currently not implemented. What falls to slow path
1637                  * stays there.
1638                  */
1639                 return 0;
1640
1641         /* A change might make a tunnel not eligible for offloading. */
1642         if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1643                                                  ipip_entry->ipipt)) {
1644                 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1645                 return 0;
1646         }
1647
1648         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1649         err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1650         return err;
1651 }
1652
1653 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1654                                        struct mlxsw_sp_ipip_entry *ipip_entry)
1655 {
1656         struct net_device *ol_dev = ipip_entry->ol_dev;
1657
1658         if (ol_dev->flags & IFF_UP)
1659                 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1660         mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1661 }
1662
1663 /* The configuration where several tunnels have the same local address in the
1664  * same underlay table needs special treatment in the HW. That is currently not
1665  * implemented in the driver. This function finds and demotes the first tunnel
1666  * with a given source address, except the one passed in in the argument
1667  * `except'.
1668  */
1669 bool
1670 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1671                                      enum mlxsw_sp_l3proto ul_proto,
1672                                      union mlxsw_sp_l3addr saddr,
1673                                      u32 ul_tb_id,
1674                                      const struct mlxsw_sp_ipip_entry *except)
1675 {
1676         struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1677
1678         list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1679                                  ipip_list_node) {
1680                 if (ipip_entry != except &&
1681                     mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1682                                                       ul_tb_id, ipip_entry)) {
1683                         mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1684                         return true;
1685                 }
1686         }
1687
1688         return false;
1689 }
1690
1691 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1692                                                      struct net_device *ul_dev)
1693 {
1694         struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1695
1696         list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1697                                  ipip_list_node) {
1698                 struct net_device *ipip_ul_dev =
1699                         __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1700
1701                 if (ipip_ul_dev == ul_dev)
1702                         mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1703         }
1704 }
1705
1706 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1707                                      struct net_device *ol_dev,
1708                                      unsigned long event,
1709                                      struct netdev_notifier_info *info)
1710 {
1711         struct netdev_notifier_changeupper_info *chup;
1712         struct netlink_ext_ack *extack;
1713
1714         switch (event) {
1715         case NETDEV_REGISTER:
1716                 return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1717         case NETDEV_UNREGISTER:
1718                 mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1719                 return 0;
1720         case NETDEV_UP:
1721                 mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1722                 return 0;
1723         case NETDEV_DOWN:
1724                 mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1725                 return 0;
1726         case NETDEV_CHANGEUPPER:
1727                 chup = container_of(info, typeof(*chup), info);
1728                 extack = info->extack;
1729                 if (netif_is_l3_master(chup->upper_dev))
1730                         return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1731                                                                     ol_dev,
1732                                                                     extack);
1733                 return 0;
1734         case NETDEV_CHANGE:
1735                 extack = info->extack;
1736                 return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1737                                                                ol_dev, extack);
1738         case NETDEV_CHANGEMTU:
1739                 return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1740         }
1741         return 0;
1742 }
1743
1744 static int
1745 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1746                                    struct mlxsw_sp_ipip_entry *ipip_entry,
1747                                    struct net_device *ul_dev,
1748                                    unsigned long event,
1749                                    struct netdev_notifier_info *info)
1750 {
1751         struct netdev_notifier_changeupper_info *chup;
1752         struct netlink_ext_ack *extack;
1753
1754         switch (event) {
1755         case NETDEV_CHANGEUPPER:
1756                 chup = container_of(info, typeof(*chup), info);
1757                 extack = info->extack;
1758                 if (netif_is_l3_master(chup->upper_dev))
1759                         return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1760                                                                     ipip_entry,
1761                                                                     ul_dev,
1762                                                                     extack);
1763                 break;
1764
1765         case NETDEV_UP:
1766                 return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1767                                                            ul_dev);
1768         case NETDEV_DOWN:
1769                 return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1770                                                              ipip_entry,
1771                                                              ul_dev);
1772         }
1773         return 0;
1774 }
1775
1776 int
1777 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1778                                  struct net_device *ul_dev,
1779                                  unsigned long event,
1780                                  struct netdev_notifier_info *info)
1781 {
1782         struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1783         int err;
1784
1785         while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1786                                                                 ul_dev,
1787                                                                 ipip_entry))) {
1788                 err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1789                                                          ul_dev, event, info);
1790                 if (err) {
1791                         mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1792                                                                  ul_dev);
1793                         return err;
1794                 }
1795         }
1796
1797         return 0;
1798 }
1799
1800 struct mlxsw_sp_neigh_key {
1801         struct neighbour *n;
1802 };
1803
1804 struct mlxsw_sp_neigh_entry {
1805         struct list_head rif_list_node;
1806         struct rhash_head ht_node;
1807         struct mlxsw_sp_neigh_key key;
1808         u16 rif;
1809         bool connected;
1810         unsigned char ha[ETH_ALEN];
1811         struct list_head nexthop_list; /* list of nexthops using
1812                                         * this neigh entry
1813                                         */
1814         struct list_head nexthop_neighs_list_node;
1815         unsigned int counter_index;
1816         bool counter_valid;
1817 };
1818
1819 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1820         .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1821         .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1822         .key_len = sizeof(struct mlxsw_sp_neigh_key),
1823 };
1824
1825 struct mlxsw_sp_neigh_entry *
1826 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1827                         struct mlxsw_sp_neigh_entry *neigh_entry)
1828 {
1829         if (!neigh_entry) {
1830                 if (list_empty(&rif->neigh_list))
1831                         return NULL;
1832                 else
1833                         return list_first_entry(&rif->neigh_list,
1834                                                 typeof(*neigh_entry),
1835                                                 rif_list_node);
1836         }
1837         if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1838                 return NULL;
1839         return list_next_entry(neigh_entry, rif_list_node);
1840 }
1841
1842 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1843 {
1844         return neigh_entry->key.n->tbl->family;
1845 }
1846
1847 unsigned char *
1848 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1849 {
1850         return neigh_entry->ha;
1851 }
1852
1853 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1854 {
1855         struct neighbour *n;
1856
1857         n = neigh_entry->key.n;
1858         return ntohl(*((__be32 *) n->primary_key));
1859 }
1860
1861 struct in6_addr *
1862 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1863 {
1864         struct neighbour *n;
1865
1866         n = neigh_entry->key.n;
1867         return (struct in6_addr *) &n->primary_key;
1868 }
1869
1870 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1871                                struct mlxsw_sp_neigh_entry *neigh_entry,
1872                                u64 *p_counter)
1873 {
1874         if (!neigh_entry->counter_valid)
1875                 return -EINVAL;
1876
1877         return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1878                                          p_counter, NULL);
1879 }
1880
1881 static struct mlxsw_sp_neigh_entry *
1882 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1883                            u16 rif)
1884 {
1885         struct mlxsw_sp_neigh_entry *neigh_entry;
1886
1887         neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1888         if (!neigh_entry)
1889                 return NULL;
1890
1891         neigh_entry->key.n = n;
1892         neigh_entry->rif = rif;
1893         INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1894
1895         return neigh_entry;
1896 }
1897
1898 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1899 {
1900         kfree(neigh_entry);
1901 }
1902
1903 static int
1904 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1905                             struct mlxsw_sp_neigh_entry *neigh_entry)
1906 {
1907         return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1908                                       &neigh_entry->ht_node,
1909                                       mlxsw_sp_neigh_ht_params);
1910 }
1911
1912 static void
1913 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1914                             struct mlxsw_sp_neigh_entry *neigh_entry)
1915 {
1916         rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1917                                &neigh_entry->ht_node,
1918                                mlxsw_sp_neigh_ht_params);
1919 }
1920
1921 static bool
1922 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
1923                                     struct mlxsw_sp_neigh_entry *neigh_entry)
1924 {
1925         struct devlink *devlink;
1926         const char *table_name;
1927
1928         switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
1929         case AF_INET:
1930                 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
1931                 break;
1932         case AF_INET6:
1933                 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
1934                 break;
1935         default:
1936                 WARN_ON(1);
1937                 return false;
1938         }
1939
1940         devlink = priv_to_devlink(mlxsw_sp->core);
1941         return devlink_dpipe_table_counter_enabled(devlink, table_name);
1942 }
1943
1944 static void
1945 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
1946                              struct mlxsw_sp_neigh_entry *neigh_entry)
1947 {
1948         if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
1949                 return;
1950
1951         if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
1952                 return;
1953
1954         neigh_entry->counter_valid = true;
1955 }
1956
1957 static void
1958 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
1959                             struct mlxsw_sp_neigh_entry *neigh_entry)
1960 {
1961         if (!neigh_entry->counter_valid)
1962                 return;
1963         mlxsw_sp_flow_counter_free(mlxsw_sp,
1964                                    neigh_entry->counter_index);
1965         neigh_entry->counter_valid = false;
1966 }
1967
1968 static struct mlxsw_sp_neigh_entry *
1969 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1970 {
1971         struct mlxsw_sp_neigh_entry *neigh_entry;
1972         struct mlxsw_sp_rif *rif;
1973         int err;
1974
1975         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
1976         if (!rif)
1977                 return ERR_PTR(-EINVAL);
1978
1979         neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
1980         if (!neigh_entry)
1981                 return ERR_PTR(-ENOMEM);
1982
1983         err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
1984         if (err)
1985                 goto err_neigh_entry_insert;
1986
1987         mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
1988         list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
1989
1990         return neigh_entry;
1991
1992 err_neigh_entry_insert:
1993         mlxsw_sp_neigh_entry_free(neigh_entry);
1994         return ERR_PTR(err);
1995 }
1996
1997 static void
1998 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1999                              struct mlxsw_sp_neigh_entry *neigh_entry)
2000 {
2001         list_del(&neigh_entry->rif_list_node);
2002         mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2003         mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2004         mlxsw_sp_neigh_entry_free(neigh_entry);
2005 }
2006
2007 static struct mlxsw_sp_neigh_entry *
2008 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2009 {
2010         struct mlxsw_sp_neigh_key key;
2011
2012         key.n = n;
2013         return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2014                                       &key, mlxsw_sp_neigh_ht_params);
2015 }
2016
2017 static void
2018 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2019 {
2020         unsigned long interval;
2021
2022 #if IS_ENABLED(CONFIG_IPV6)
2023         interval = min_t(unsigned long,
2024                          NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2025                          NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2026 #else
2027         interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2028 #endif
2029         mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2030 }
2031
2032 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2033                                                    char *rauhtd_pl,
2034                                                    int ent_index)
2035 {
2036         struct net_device *dev;
2037         struct neighbour *n;
2038         __be32 dipn;
2039         u32 dip;
2040         u16 rif;
2041
2042         mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2043
2044         if (!mlxsw_sp->router->rifs[rif]) {
2045                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2046                 return;
2047         }
2048
2049         dipn = htonl(dip);
2050         dev = mlxsw_sp->router->rifs[rif]->dev;
2051         n = neigh_lookup(&arp_tbl, &dipn, dev);
2052         if (!n)
2053                 return;
2054
2055         netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2056         neigh_event_send(n, NULL);
2057         neigh_release(n);
2058 }
2059
2060 #if IS_ENABLED(CONFIG_IPV6)
2061 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2062                                                    char *rauhtd_pl,
2063                                                    int rec_index)
2064 {
2065         struct net_device *dev;
2066         struct neighbour *n;
2067         struct in6_addr dip;
2068         u16 rif;
2069
2070         mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2071                                          (char *) &dip);
2072
2073         if (!mlxsw_sp->router->rifs[rif]) {
2074                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2075                 return;
2076         }
2077
2078         dev = mlxsw_sp->router->rifs[rif]->dev;
2079         n = neigh_lookup(&nd_tbl, &dip, dev);
2080         if (!n)
2081                 return;
2082
2083         netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2084         neigh_event_send(n, NULL);
2085         neigh_release(n);
2086 }
2087 #else
2088 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2089                                                    char *rauhtd_pl,
2090                                                    int rec_index)
2091 {
2092 }
2093 #endif
2094
2095 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2096                                                    char *rauhtd_pl,
2097                                                    int rec_index)
2098 {
2099         u8 num_entries;
2100         int i;
2101
2102         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2103                                                                 rec_index);
2104         /* Hardware starts counting at 0, so add 1. */
2105         num_entries++;
2106
2107         /* Each record consists of several neighbour entries. */
2108         for (i = 0; i < num_entries; i++) {
2109                 int ent_index;
2110
2111                 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2112                 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2113                                                        ent_index);
2114         }
2115
2116 }
2117
2118 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2119                                                    char *rauhtd_pl,
2120                                                    int rec_index)
2121 {
2122         /* One record contains one entry. */
2123         mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2124                                                rec_index);
2125 }
2126
2127 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2128                                               char *rauhtd_pl, int rec_index)
2129 {
2130         switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2131         case MLXSW_REG_RAUHTD_TYPE_IPV4:
2132                 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2133                                                        rec_index);
2134                 break;
2135         case MLXSW_REG_RAUHTD_TYPE_IPV6:
2136                 mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2137                                                        rec_index);
2138                 break;
2139         }
2140 }
2141
2142 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2143 {
2144         u8 num_rec, last_rec_index, num_entries;
2145
2146         num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2147         last_rec_index = num_rec - 1;
2148
2149         if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2150                 return false;
2151         if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2152             MLXSW_REG_RAUHTD_TYPE_IPV6)
2153                 return true;
2154
2155         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2156                                                                 last_rec_index);
2157         if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2158                 return true;
2159         return false;
2160 }
2161
2162 static int
2163 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2164                                        char *rauhtd_pl,
2165                                        enum mlxsw_reg_rauhtd_type type)
2166 {
2167         int i, num_rec;
2168         int err;
2169
2170         /* Make sure the neighbour's netdev isn't removed in the
2171          * process.
2172          */
2173         rtnl_lock();
2174         do {
2175                 mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2176                 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2177                                       rauhtd_pl);
2178                 if (err) {
2179                         dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2180                         break;
2181                 }
2182                 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2183                 for (i = 0; i < num_rec; i++)
2184                         mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2185                                                           i);
2186         } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2187         rtnl_unlock();
2188
2189         return err;
2190 }
2191
2192 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2193 {
2194         enum mlxsw_reg_rauhtd_type type;
2195         char *rauhtd_pl;
2196         int err;
2197
2198         rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2199         if (!rauhtd_pl)
2200                 return -ENOMEM;
2201
2202         type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2203         err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2204         if (err)
2205                 goto out;
2206
2207         type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2208         err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2209 out:
2210         kfree(rauhtd_pl);
2211         return err;
2212 }
2213
2214 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2215 {
2216         struct mlxsw_sp_neigh_entry *neigh_entry;
2217
2218         /* Take RTNL mutex here to prevent lists from changes */
2219         rtnl_lock();
2220         list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2221                             nexthop_neighs_list_node)
2222                 /* If this neigh have nexthops, make the kernel think this neigh
2223                  * is active regardless of the traffic.
2224                  */
2225                 neigh_event_send(neigh_entry->key.n, NULL);
2226         rtnl_unlock();
2227 }
2228
2229 static void
2230 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2231 {
2232         unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2233
2234         mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2235                                msecs_to_jiffies(interval));
2236 }
2237
2238 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2239 {
2240         struct mlxsw_sp_router *router;
2241         int err;
2242
2243         router = container_of(work, struct mlxsw_sp_router,
2244                               neighs_update.dw.work);
2245         err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2246         if (err)
2247                 dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2248
2249         mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2250
2251         mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2252 }
2253
2254 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2255 {
2256         struct mlxsw_sp_neigh_entry *neigh_entry;
2257         struct mlxsw_sp_router *router;
2258
2259         router = container_of(work, struct mlxsw_sp_router,
2260                               nexthop_probe_dw.work);
2261         /* Iterate over nexthop neighbours, find those who are unresolved and
2262          * send arp on them. This solves the chicken-egg problem when
2263          * the nexthop wouldn't get offloaded until the neighbor is resolved
2264          * but it wouldn't get resolved ever in case traffic is flowing in HW
2265          * using different nexthop.
2266          *
2267          * Take RTNL mutex here to prevent lists from changes.
2268          */
2269         rtnl_lock();
2270         list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2271                             nexthop_neighs_list_node)
2272                 if (!neigh_entry->connected)
2273                         neigh_event_send(neigh_entry->key.n, NULL);
2274         rtnl_unlock();
2275
2276         mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2277                                MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2278 }
2279
2280 static void
2281 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2282                               struct mlxsw_sp_neigh_entry *neigh_entry,
2283                               bool removing);
2284
2285 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2286 {
2287         return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2288                         MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2289 }
2290
2291 static void
2292 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2293                                 struct mlxsw_sp_neigh_entry *neigh_entry,
2294                                 enum mlxsw_reg_rauht_op op)
2295 {
2296         struct neighbour *n = neigh_entry->key.n;
2297         u32 dip = ntohl(*((__be32 *) n->primary_key));
2298         char rauht_pl[MLXSW_REG_RAUHT_LEN];
2299
2300         mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2301                               dip);
2302         if (neigh_entry->counter_valid)
2303                 mlxsw_reg_rauht_pack_counter(rauht_pl,
2304                                              neigh_entry->counter_index);
2305         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2306 }
2307
2308 static void
2309 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2310                                 struct mlxsw_sp_neigh_entry *neigh_entry,
2311                                 enum mlxsw_reg_rauht_op op)
2312 {
2313         struct neighbour *n = neigh_entry->key.n;
2314         char rauht_pl[MLXSW_REG_RAUHT_LEN];
2315         const char *dip = n->primary_key;
2316
2317         mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2318                               dip);
2319         if (neigh_entry->counter_valid)
2320                 mlxsw_reg_rauht_pack_counter(rauht_pl,
2321                                              neigh_entry->counter_index);
2322         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2323 }
2324
2325 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2326 {
2327         struct neighbour *n = neigh_entry->key.n;
2328
2329         /* Packets with a link-local destination address are trapped
2330          * after LPM lookup and never reach the neighbour table, so
2331          * there is no need to program such neighbours to the device.
2332          */
2333         if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2334             IPV6_ADDR_LINKLOCAL)
2335                 return true;
2336         return false;
2337 }
2338
2339 static void
2340 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2341                             struct mlxsw_sp_neigh_entry *neigh_entry,
2342                             bool adding)
2343 {
2344         if (!adding && !neigh_entry->connected)
2345                 return;
2346         neigh_entry->connected = adding;
2347         if (neigh_entry->key.n->tbl->family == AF_INET) {
2348                 mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2349                                                 mlxsw_sp_rauht_op(adding));
2350         } else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2351                 if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2352                         return;
2353                 mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2354                                                 mlxsw_sp_rauht_op(adding));
2355         } else {
2356                 WARN_ON_ONCE(1);
2357         }
2358 }
2359
2360 void
2361 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2362                                     struct mlxsw_sp_neigh_entry *neigh_entry,
2363                                     bool adding)
2364 {
2365         if (adding)
2366                 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2367         else
2368                 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2369         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2370 }
2371
2372 struct mlxsw_sp_netevent_work {
2373         struct work_struct work;
2374         struct mlxsw_sp *mlxsw_sp;
2375         struct neighbour *n;
2376 };
2377
2378 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2379 {
2380         struct mlxsw_sp_netevent_work *net_work =
2381                 container_of(work, struct mlxsw_sp_netevent_work, work);
2382         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2383         struct mlxsw_sp_neigh_entry *neigh_entry;
2384         struct neighbour *n = net_work->n;
2385         unsigned char ha[ETH_ALEN];
2386         bool entry_connected;
2387         u8 nud_state, dead;
2388
2389         /* If these parameters are changed after we release the lock,
2390          * then we are guaranteed to receive another event letting us
2391          * know about it.
2392          */
2393         read_lock_bh(&n->lock);
2394         memcpy(ha, n->ha, ETH_ALEN);
2395         nud_state = n->nud_state;
2396         dead = n->dead;
2397         read_unlock_bh(&n->lock);
2398
2399         rtnl_lock();
2400         mlxsw_sp_span_respin(mlxsw_sp);
2401
2402         entry_connected = nud_state & NUD_VALID && !dead;
2403         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2404         if (!entry_connected && !neigh_entry)
2405                 goto out;
2406         if (!neigh_entry) {
2407                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2408                 if (IS_ERR(neigh_entry))
2409                         goto out;
2410         }
2411
2412         memcpy(neigh_entry->ha, ha, ETH_ALEN);
2413         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2414         mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2415
2416         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2417                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2418
2419 out:
2420         rtnl_unlock();
2421         neigh_release(n);
2422         kfree(net_work);
2423 }
2424
2425 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2426
2427 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2428 {
2429         struct mlxsw_sp_netevent_work *net_work =
2430                 container_of(work, struct mlxsw_sp_netevent_work, work);
2431         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2432
2433         mlxsw_sp_mp_hash_init(mlxsw_sp);
2434         kfree(net_work);
2435 }
2436
2437 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2438                                           unsigned long event, void *ptr)
2439 {
2440         struct mlxsw_sp_netevent_work *net_work;
2441         struct mlxsw_sp_port *mlxsw_sp_port;
2442         struct mlxsw_sp_router *router;
2443         struct mlxsw_sp *mlxsw_sp;
2444         unsigned long interval;
2445         struct neigh_parms *p;
2446         struct neighbour *n;
2447         struct net *net;
2448
2449         switch (event) {
2450         case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2451                 p = ptr;
2452
2453                 /* We don't care about changes in the default table. */
2454                 if (!p->dev || (p->tbl->family != AF_INET &&
2455                                 p->tbl->family != AF_INET6))
2456                         return NOTIFY_DONE;
2457
2458                 /* We are in atomic context and can't take RTNL mutex,
2459                  * so use RCU variant to walk the device chain.
2460                  */
2461                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2462                 if (!mlxsw_sp_port)
2463                         return NOTIFY_DONE;
2464
2465                 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2466                 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2467                 mlxsw_sp->router->neighs_update.interval = interval;
2468
2469                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2470                 break;
2471         case NETEVENT_NEIGH_UPDATE:
2472                 n = ptr;
2473
2474                 if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2475                         return NOTIFY_DONE;
2476
2477                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2478                 if (!mlxsw_sp_port)
2479                         return NOTIFY_DONE;
2480
2481                 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2482                 if (!net_work) {
2483                         mlxsw_sp_port_dev_put(mlxsw_sp_port);
2484                         return NOTIFY_BAD;
2485                 }
2486
2487                 INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2488                 net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2489                 net_work->n = n;
2490
2491                 /* Take a reference to ensure the neighbour won't be
2492                  * destructed until we drop the reference in delayed
2493                  * work.
2494                  */
2495                 neigh_clone(n);
2496                 mlxsw_core_schedule_work(&net_work->work);
2497                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2498                 break;
2499         case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2500         case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2501                 net = ptr;
2502
2503                 if (!net_eq(net, &init_net))
2504                         return NOTIFY_DONE;
2505
2506                 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2507                 if (!net_work)
2508                         return NOTIFY_BAD;
2509
2510                 router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2511                 INIT_WORK(&net_work->work, mlxsw_sp_router_mp_hash_event_work);
2512                 net_work->mlxsw_sp = router->mlxsw_sp;
2513                 mlxsw_core_schedule_work(&net_work->work);
2514                 break;
2515         }
2516
2517         return NOTIFY_DONE;
2518 }
2519
2520 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2521 {
2522         int err;
2523
2524         err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2525                               &mlxsw_sp_neigh_ht_params);
2526         if (err)
2527                 return err;
2528
2529         /* Initialize the polling interval according to the default
2530          * table.
2531          */
2532         mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2533
2534         /* Create the delayed works for the activity_update */
2535         INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2536                           mlxsw_sp_router_neighs_update_work);
2537         INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2538                           mlxsw_sp_router_probe_unresolved_nexthops);
2539         mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2540         mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2541         return 0;
2542 }
2543
2544 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2545 {
2546         cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2547         cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2548         rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2549 }
2550
2551 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2552                                          struct mlxsw_sp_rif *rif)
2553 {
2554         struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2555
2556         list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2557                                  rif_list_node) {
2558                 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2559                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2560         }
2561 }
2562
2563 enum mlxsw_sp_nexthop_type {
2564         MLXSW_SP_NEXTHOP_TYPE_ETH,
2565         MLXSW_SP_NEXTHOP_TYPE_IPIP,
2566 };
2567
2568 struct mlxsw_sp_nexthop_key {
2569         struct fib_nh *fib_nh;
2570 };
2571
2572 struct mlxsw_sp_nexthop {
2573         struct list_head neigh_list_node; /* member of neigh entry list */
2574         struct list_head rif_list_node;
2575         struct list_head router_list_node;
2576         struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2577                                                 * this belongs to
2578                                                 */
2579         struct rhash_head ht_node;
2580         struct mlxsw_sp_nexthop_key key;
2581         unsigned char gw_addr[sizeof(struct in6_addr)];
2582         int ifindex;
2583         int nh_weight;
2584         int norm_nh_weight;
2585         int num_adj_entries;
2586         struct mlxsw_sp_rif *rif;
2587         u8 should_offload:1, /* set indicates this neigh is connected and
2588                               * should be put to KVD linear area of this group.
2589                               */
2590            offloaded:1, /* set in case the neigh is actually put into
2591                          * KVD linear area of this group.
2592                          */
2593            update:1; /* set indicates that MAC of this neigh should be
2594                       * updated in HW
2595                       */
2596         enum mlxsw_sp_nexthop_type type;
2597         union {
2598                 struct mlxsw_sp_neigh_entry *neigh_entry;
2599                 struct mlxsw_sp_ipip_entry *ipip_entry;
2600         };
2601         unsigned int counter_index;
2602         bool counter_valid;
2603 };
2604
2605 struct mlxsw_sp_nexthop_group {
2606         void *priv;
2607         struct rhash_head ht_node;
2608         struct list_head fib_list; /* list of fib entries that use this group */
2609         struct neigh_table *neigh_tbl;
2610         u8 adj_index_valid:1,
2611            gateway:1; /* routes using the group use a gateway */
2612         u32 adj_index;
2613         u16 ecmp_size;
2614         u16 count;
2615         int sum_norm_weight;
2616         struct mlxsw_sp_nexthop nexthops[0];
2617 #define nh_rif  nexthops[0].rif
2618 };
2619
2620 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2621                                     struct mlxsw_sp_nexthop *nh)
2622 {
2623         struct devlink *devlink;
2624
2625         devlink = priv_to_devlink(mlxsw_sp->core);
2626         if (!devlink_dpipe_table_counter_enabled(devlink,
2627                                                  MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2628                 return;
2629
2630         if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2631                 return;
2632
2633         nh->counter_valid = true;
2634 }
2635
2636 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2637                                    struct mlxsw_sp_nexthop *nh)
2638 {
2639         if (!nh->counter_valid)
2640                 return;
2641         mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2642         nh->counter_valid = false;
2643 }
2644
2645 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2646                                  struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2647 {
2648         if (!nh->counter_valid)
2649                 return -EINVAL;
2650
2651         return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2652                                          p_counter, NULL);
2653 }
2654
2655 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2656                                                struct mlxsw_sp_nexthop *nh)
2657 {
2658         if (!nh) {
2659                 if (list_empty(&router->nexthop_list))
2660                         return NULL;
2661                 else
2662                         return list_first_entry(&router->nexthop_list,
2663                                                 typeof(*nh), router_list_node);
2664         }
2665         if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2666                 return NULL;
2667         return list_next_entry(nh, router_list_node);
2668 }
2669
2670 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2671 {
2672         return nh->offloaded;
2673 }
2674
2675 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2676 {
2677         if (!nh->offloaded)
2678                 return NULL;
2679         return nh->neigh_entry->ha;
2680 }
2681
2682 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2683                              u32 *p_adj_size, u32 *p_adj_hash_index)
2684 {
2685         struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2686         u32 adj_hash_index = 0;
2687         int i;
2688
2689         if (!nh->offloaded || !nh_grp->adj_index_valid)
2690                 return -EINVAL;
2691
2692         *p_adj_index = nh_grp->adj_index;
2693         *p_adj_size = nh_grp->ecmp_size;
2694
2695         for (i = 0; i < nh_grp->count; i++) {
2696                 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2697
2698                 if (nh_iter == nh)
2699                         break;
2700                 if (nh_iter->offloaded)
2701                         adj_hash_index += nh_iter->num_adj_entries;
2702         }
2703
2704         *p_adj_hash_index = adj_hash_index;
2705         return 0;
2706 }
2707
2708 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2709 {
2710         return nh->rif;
2711 }
2712
2713 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2714 {
2715         struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2716         int i;
2717
2718         for (i = 0; i < nh_grp->count; i++) {
2719                 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2720
2721                 if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2722                         return true;
2723         }
2724         return false;
2725 }
2726
2727 static struct fib_info *
2728 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2729 {
2730         return nh_grp->priv;
2731 }
2732
2733 struct mlxsw_sp_nexthop_group_cmp_arg {
2734         enum mlxsw_sp_l3proto proto;
2735         union {
2736                 struct fib_info *fi;
2737                 struct mlxsw_sp_fib6_entry *fib6_entry;
2738         };
2739 };
2740
2741 static bool
2742 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2743                                     const struct in6_addr *gw, int ifindex,
2744                                     int weight)
2745 {
2746         int i;
2747
2748         for (i = 0; i < nh_grp->count; i++) {
2749                 const struct mlxsw_sp_nexthop *nh;
2750
2751                 nh = &nh_grp->nexthops[i];
2752                 if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2753                     ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2754                         return true;
2755         }
2756
2757         return false;
2758 }
2759
2760 static bool
2761 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2762                             const struct mlxsw_sp_fib6_entry *fib6_entry)
2763 {
2764         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2765
2766         if (nh_grp->count != fib6_entry->nrt6)
2767                 return false;
2768
2769         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2770                 struct in6_addr *gw;
2771                 int ifindex, weight;
2772
2773                 ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex;
2774                 weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight;
2775                 gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw;
2776                 if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2777                                                          weight))
2778                         return false;
2779         }
2780
2781         return true;
2782 }
2783
2784 static int
2785 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2786 {
2787         const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2788         const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2789
2790         switch (cmp_arg->proto) {
2791         case MLXSW_SP_L3_PROTO_IPV4:
2792                 return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2793         case MLXSW_SP_L3_PROTO_IPV6:
2794                 return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2795                                                     cmp_arg->fib6_entry);
2796         default:
2797                 WARN_ON(1);
2798                 return 1;
2799         }
2800 }
2801
2802 static int
2803 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2804 {
2805         return nh_grp->neigh_tbl->family;
2806 }
2807
2808 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2809 {
2810         const struct mlxsw_sp_nexthop_group *nh_grp = data;
2811         const struct mlxsw_sp_nexthop *nh;
2812         struct fib_info *fi;
2813         unsigned int val;
2814         int i;
2815
2816         switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2817         case AF_INET:
2818                 fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2819                 return jhash(&fi, sizeof(fi), seed);
2820         case AF_INET6:
2821                 val = nh_grp->count;
2822                 for (i = 0; i < nh_grp->count; i++) {
2823                         nh = &nh_grp->nexthops[i];
2824                         val ^= nh->ifindex;
2825                 }
2826                 return jhash(&val, sizeof(val), seed);
2827         default:
2828                 WARN_ON(1);
2829                 return 0;
2830         }
2831 }
2832
2833 static u32
2834 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2835 {
2836         unsigned int val = fib6_entry->nrt6;
2837         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2838         struct net_device *dev;
2839
2840         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2841                 dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev;
2842                 val ^= dev->ifindex;
2843         }
2844
2845         return jhash(&val, sizeof(val), seed);
2846 }
2847
2848 static u32
2849 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2850 {
2851         const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2852
2853         switch (cmp_arg->proto) {
2854         case MLXSW_SP_L3_PROTO_IPV4:
2855                 return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2856         case MLXSW_SP_L3_PROTO_IPV6:
2857                 return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2858         default:
2859                 WARN_ON(1);
2860                 return 0;
2861         }
2862 }
2863
2864 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2865         .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2866         .hashfn      = mlxsw_sp_nexthop_group_hash,
2867         .obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2868         .obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2869 };
2870
2871 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2872                                          struct mlxsw_sp_nexthop_group *nh_grp)
2873 {
2874         if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2875             !nh_grp->gateway)
2876                 return 0;
2877
2878         return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2879                                       &nh_grp->ht_node,
2880                                       mlxsw_sp_nexthop_group_ht_params);
2881 }
2882
2883 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2884                                           struct mlxsw_sp_nexthop_group *nh_grp)
2885 {
2886         if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2887             !nh_grp->gateway)
2888                 return;
2889
2890         rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2891                                &nh_grp->ht_node,
2892                                mlxsw_sp_nexthop_group_ht_params);
2893 }
2894
2895 static struct mlxsw_sp_nexthop_group *
2896 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
2897                                struct fib_info *fi)
2898 {
2899         struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2900
2901         cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
2902         cmp_arg.fi = fi;
2903         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2904                                       &cmp_arg,
2905                                       mlxsw_sp_nexthop_group_ht_params);
2906 }
2907
2908 static struct mlxsw_sp_nexthop_group *
2909 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
2910                                struct mlxsw_sp_fib6_entry *fib6_entry)
2911 {
2912         struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2913
2914         cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
2915         cmp_arg.fib6_entry = fib6_entry;
2916         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2917                                       &cmp_arg,
2918                                       mlxsw_sp_nexthop_group_ht_params);
2919 }
2920
2921 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
2922         .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
2923         .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
2924         .key_len = sizeof(struct mlxsw_sp_nexthop_key),
2925 };
2926
2927 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
2928                                    struct mlxsw_sp_nexthop *nh)
2929 {
2930         return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
2931                                       &nh->ht_node, mlxsw_sp_nexthop_ht_params);
2932 }
2933
2934 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
2935                                     struct mlxsw_sp_nexthop *nh)
2936 {
2937         rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
2938                                mlxsw_sp_nexthop_ht_params);
2939 }
2940
2941 static struct mlxsw_sp_nexthop *
2942 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
2943                         struct mlxsw_sp_nexthop_key key)
2944 {
2945         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
2946                                       mlxsw_sp_nexthop_ht_params);
2947 }
2948
2949 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
2950                                              const struct mlxsw_sp_fib *fib,
2951                                              u32 adj_index, u16 ecmp_size,
2952                                              u32 new_adj_index,
2953                                              u16 new_ecmp_size)
2954 {
2955         char raleu_pl[MLXSW_REG_RALEU_LEN];
2956
2957         mlxsw_reg_raleu_pack(raleu_pl,
2958                              (enum mlxsw_reg_ralxx_protocol) fib->proto,
2959                              fib->vr->id, adj_index, ecmp_size, new_adj_index,
2960                              new_ecmp_size);
2961         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
2962 }
2963
2964 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
2965                                           struct mlxsw_sp_nexthop_group *nh_grp,
2966                                           u32 old_adj_index, u16 old_ecmp_size)
2967 {
2968         struct mlxsw_sp_fib_entry *fib_entry;
2969         struct mlxsw_sp_fib *fib = NULL;
2970         int err;
2971
2972         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2973                 if (fib == fib_entry->fib_node->fib)
2974                         continue;
2975                 fib = fib_entry->fib_node->fib;
2976                 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
2977                                                         old_adj_index,
2978                                                         old_ecmp_size,
2979                                                         nh_grp->adj_index,
2980                                                         nh_grp->ecmp_size);
2981                 if (err)
2982                         return err;
2983         }
2984         return 0;
2985 }
2986
2987 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2988                                      struct mlxsw_sp_nexthop *nh)
2989 {
2990         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
2991         char ratr_pl[MLXSW_REG_RATR_LEN];
2992
2993         mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
2994                             true, MLXSW_REG_RATR_TYPE_ETHERNET,
2995                             adj_index, neigh_entry->rif);
2996         mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
2997         if (nh->counter_valid)
2998                 mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
2999         else
3000                 mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3001
3002         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3003 }
3004
3005 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3006                             struct mlxsw_sp_nexthop *nh)
3007 {
3008         int i;
3009
3010         for (i = 0; i < nh->num_adj_entries; i++) {
3011                 int err;
3012
3013                 err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
3014                 if (err)
3015                         return err;
3016         }
3017
3018         return 0;
3019 }
3020
3021 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3022                                           u32 adj_index,
3023                                           struct mlxsw_sp_nexthop *nh)
3024 {
3025         const struct mlxsw_sp_ipip_ops *ipip_ops;
3026
3027         ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3028         return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
3029 }
3030
3031 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3032                                         u32 adj_index,
3033                                         struct mlxsw_sp_nexthop *nh)
3034 {
3035         int i;
3036
3037         for (i = 0; i < nh->num_adj_entries; i++) {
3038                 int err;
3039
3040                 err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3041                                                      nh);
3042                 if (err)
3043                         return err;
3044         }
3045
3046         return 0;
3047 }
3048
3049 static int
3050 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3051                               struct mlxsw_sp_nexthop_group *nh_grp,
3052                               bool reallocate)
3053 {
3054         u32 adj_index = nh_grp->adj_index; /* base */
3055         struct mlxsw_sp_nexthop *nh;
3056         int i;
3057         int err;
3058
3059         for (i = 0; i < nh_grp->count; i++) {
3060                 nh = &nh_grp->nexthops[i];
3061
3062                 if (!nh->should_offload) {
3063                         nh->offloaded = 0;
3064                         continue;
3065                 }
3066
3067                 if (nh->update || reallocate) {
3068                         switch (nh->type) {
3069                         case MLXSW_SP_NEXTHOP_TYPE_ETH:
3070                                 err = mlxsw_sp_nexthop_update
3071                                             (mlxsw_sp, adj_index, nh);
3072                                 break;
3073                         case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3074                                 err = mlxsw_sp_nexthop_ipip_update
3075                                             (mlxsw_sp, adj_index, nh);
3076                                 break;
3077                         }
3078                         if (err)
3079                                 return err;
3080                         nh->update = 0;
3081                         nh->offloaded = 1;
3082                 }
3083                 adj_index += nh->num_adj_entries;
3084         }
3085         return 0;
3086 }
3087
3088 static bool
3089 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3090                                  const struct mlxsw_sp_fib_entry *fib_entry);
3091
3092 static int
3093 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3094                                     struct mlxsw_sp_nexthop_group *nh_grp)
3095 {
3096         struct mlxsw_sp_fib_entry *fib_entry;
3097         int err;
3098
3099         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3100                 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3101                                                       fib_entry))
3102                         continue;
3103                 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3104                 if (err)
3105                         return err;
3106         }
3107         return 0;
3108 }
3109
3110 static void
3111 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3112                                    enum mlxsw_reg_ralue_op op, int err);
3113
3114 static void
3115 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3116 {
3117         enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3118         struct mlxsw_sp_fib_entry *fib_entry;
3119
3120         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3121                 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3122                                                       fib_entry))
3123                         continue;
3124                 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3125         }
3126 }
3127
3128 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3129 {
3130         /* Valid sizes for an adjacency group are:
3131          * 1-64, 512, 1024, 2048 and 4096.
3132          */
3133         if (*p_adj_grp_size <= 64)
3134                 return;
3135         else if (*p_adj_grp_size <= 512)
3136                 *p_adj_grp_size = 512;
3137         else if (*p_adj_grp_size <= 1024)
3138                 *p_adj_grp_size = 1024;
3139         else if (*p_adj_grp_size <= 2048)
3140                 *p_adj_grp_size = 2048;
3141         else
3142                 *p_adj_grp_size = 4096;
3143 }
3144
3145 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3146                                              unsigned int alloc_size)
3147 {
3148         if (alloc_size >= 4096)
3149                 *p_adj_grp_size = 4096;
3150         else if (alloc_size >= 2048)
3151                 *p_adj_grp_size = 2048;
3152         else if (alloc_size >= 1024)
3153                 *p_adj_grp_size = 1024;
3154         else if (alloc_size >= 512)
3155                 *p_adj_grp_size = 512;
3156 }
3157
3158 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3159                                      u16 *p_adj_grp_size)
3160 {
3161         unsigned int alloc_size;
3162         int err;
3163
3164         /* Round up the requested group size to the next size supported
3165          * by the device and make sure the request can be satisfied.
3166          */
3167         mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3168         err = mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp, *p_adj_grp_size,
3169                                              &alloc_size);
3170         if (err)
3171                 return err;
3172         /* It is possible the allocation results in more allocated
3173          * entries than requested. Try to use as much of them as
3174          * possible.
3175          */
3176         mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3177
3178         return 0;
3179 }
3180
3181 static void
3182 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3183 {
3184         int i, g = 0, sum_norm_weight = 0;
3185         struct mlxsw_sp_nexthop *nh;
3186
3187         for (i = 0; i < nh_grp->count; i++) {
3188                 nh = &nh_grp->nexthops[i];
3189
3190                 if (!nh->should_offload)
3191                         continue;
3192                 if (g > 0)
3193                         g = gcd(nh->nh_weight, g);
3194                 else
3195                         g = nh->nh_weight;
3196         }
3197
3198         for (i = 0; i < nh_grp->count; i++) {
3199                 nh = &nh_grp->nexthops[i];
3200
3201                 if (!nh->should_offload)
3202                         continue;
3203                 nh->norm_nh_weight = nh->nh_weight / g;
3204                 sum_norm_weight += nh->norm_nh_weight;
3205         }
3206
3207         nh_grp->sum_norm_weight = sum_norm_weight;
3208 }
3209
3210 static void
3211 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3212 {
3213         int total = nh_grp->sum_norm_weight;
3214         u16 ecmp_size = nh_grp->ecmp_size;
3215         int i, weight = 0, lower_bound = 0;
3216
3217         for (i = 0; i < nh_grp->count; i++) {
3218                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3219                 int upper_bound;
3220
3221                 if (!nh->should_offload)
3222                         continue;
3223                 weight += nh->norm_nh_weight;
3224                 upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3225                 nh->num_adj_entries = upper_bound - lower_bound;
3226                 lower_bound = upper_bound;
3227         }
3228 }
3229
3230 static void
3231 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3232                                struct mlxsw_sp_nexthop_group *nh_grp)
3233 {
3234         u16 ecmp_size, old_ecmp_size;
3235         struct mlxsw_sp_nexthop *nh;
3236         bool offload_change = false;
3237         u32 adj_index;
3238         bool old_adj_index_valid;
3239         u32 old_adj_index;
3240         int i;
3241         int err;
3242
3243         if (!nh_grp->gateway) {
3244                 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3245                 return;
3246         }
3247
3248         for (i = 0; i < nh_grp->count; i++) {
3249                 nh = &nh_grp->nexthops[i];
3250
3251                 if (nh->should_offload != nh->offloaded) {
3252                         offload_change = true;
3253                         if (nh->should_offload)
3254                                 nh->update = 1;
3255                 }
3256         }
3257         if (!offload_change) {
3258                 /* Nothing was added or removed, so no need to reallocate. Just
3259                  * update MAC on existing adjacency indexes.
3260                  */
3261                 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3262                 if (err) {
3263                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3264                         goto set_trap;
3265                 }
3266                 return;
3267         }
3268         mlxsw_sp_nexthop_group_normalize(nh_grp);
3269         if (!nh_grp->sum_norm_weight)
3270                 /* No neigh of this group is connected so we just set
3271                  * the trap and let everthing flow through kernel.
3272                  */
3273                 goto set_trap;
3274
3275         ecmp_size = nh_grp->sum_norm_weight;
3276         err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3277         if (err)
3278                 /* No valid allocation size available. */
3279                 goto set_trap;
3280
3281         err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
3282         if (err) {
3283                 /* We ran out of KVD linear space, just set the
3284                  * trap and let everything flow through kernel.
3285                  */
3286                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3287                 goto set_trap;
3288         }
3289         old_adj_index_valid = nh_grp->adj_index_valid;
3290         old_adj_index = nh_grp->adj_index;
3291         old_ecmp_size = nh_grp->ecmp_size;
3292         nh_grp->adj_index_valid = 1;
3293         nh_grp->adj_index = adj_index;
3294         nh_grp->ecmp_size = ecmp_size;
3295         mlxsw_sp_nexthop_group_rebalance(nh_grp);
3296         err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3297         if (err) {
3298                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3299                 goto set_trap;
3300         }
3301
3302         if (!old_adj_index_valid) {
3303                 /* The trap was set for fib entries, so we have to call
3304                  * fib entry update to unset it and use adjacency index.
3305                  */
3306                 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3307                 if (err) {
3308                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3309                         goto set_trap;
3310                 }
3311                 return;
3312         }
3313
3314         err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3315                                              old_adj_index, old_ecmp_size);
3316         mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
3317         if (err) {
3318                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3319                 goto set_trap;
3320         }
3321
3322         /* Offload state within the group changed, so update the flags. */
3323         mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3324
3325         return;
3326
3327 set_trap:
3328         old_adj_index_valid = nh_grp->adj_index_valid;
3329         nh_grp->adj_index_valid = 0;
3330         for (i = 0; i < nh_grp->count; i++) {
3331                 nh = &nh_grp->nexthops[i];
3332                 nh->offloaded = 0;
3333         }
3334         err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3335         if (err)
3336                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3337         if (old_adj_index_valid)
3338                 mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
3339 }
3340
3341 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3342                                             bool removing)
3343 {
3344         if (!removing)
3345                 nh->should_offload = 1;
3346         else
3347                 nh->should_offload = 0;
3348         nh->update = 1;
3349 }
3350
3351 static void
3352 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3353                               struct mlxsw_sp_neigh_entry *neigh_entry,
3354                               bool removing)
3355 {
3356         struct mlxsw_sp_nexthop *nh;
3357
3358         list_for_each_entry(nh, &neigh_entry->nexthop_list,
3359                             neigh_list_node) {
3360                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3361                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3362         }
3363 }
3364
3365 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3366                                       struct mlxsw_sp_rif *rif)
3367 {
3368         if (nh->rif)
3369                 return;
3370
3371         nh->rif = rif;
3372         list_add(&nh->rif_list_node, &rif->nexthop_list);
3373 }
3374
3375 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3376 {
3377         if (!nh->rif)
3378                 return;
3379
3380         list_del(&nh->rif_list_node);
3381         nh->rif = NULL;
3382 }
3383
3384 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3385                                        struct mlxsw_sp_nexthop *nh)
3386 {
3387         struct mlxsw_sp_neigh_entry *neigh_entry;
3388         struct neighbour *n;
3389         u8 nud_state, dead;
3390         int err;
3391
3392         if (!nh->nh_grp->gateway || nh->neigh_entry)
3393                 return 0;
3394
3395         /* Take a reference of neigh here ensuring that neigh would
3396          * not be destructed before the nexthop entry is finished.
3397          * The reference is taken either in neigh_lookup() or
3398          * in neigh_create() in case n is not found.
3399          */
3400         n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3401         if (!n) {
3402                 n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3403                                  nh->rif->dev);
3404                 if (IS_ERR(n))
3405                         return PTR_ERR(n);
3406                 neigh_event_send(n, NULL);
3407         }
3408         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3409         if (!neigh_entry) {
3410                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3411                 if (IS_ERR(neigh_entry)) {
3412                         err = -EINVAL;
3413                         goto err_neigh_entry_create;
3414                 }
3415         }
3416
3417         /* If that is the first nexthop connected to that neigh, add to
3418          * nexthop_neighs_list
3419          */
3420         if (list_empty(&neigh_entry->nexthop_list))
3421                 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3422                               &mlxsw_sp->router->nexthop_neighs_list);
3423
3424         nh->neigh_entry = neigh_entry;
3425         list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3426         read_lock_bh(&n->lock);
3427         nud_state = n->nud_state;
3428         dead = n->dead;
3429         read_unlock_bh(&n->lock);
3430         __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3431
3432         return 0;
3433
3434 err_neigh_entry_create:
3435         neigh_release(n);
3436         return err;
3437 }
3438
3439 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3440                                         struct mlxsw_sp_nexthop *nh)
3441 {
3442         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3443         struct neighbour *n;
3444
3445         if (!neigh_entry)
3446                 return;
3447         n = neigh_entry->key.n;
3448
3449         __mlxsw_sp_nexthop_neigh_update(nh, true);
3450         list_del(&nh->neigh_list_node);
3451         nh->neigh_entry = NULL;
3452
3453         /* If that is the last nexthop connected to that neigh, remove from
3454          * nexthop_neighs_list
3455          */
3456         if (list_empty(&neigh_entry->nexthop_list))
3457                 list_del(&neigh_entry->nexthop_neighs_list_node);
3458
3459         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3460                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3461
3462         neigh_release(n);
3463 }
3464
3465 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3466 {
3467         struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3468
3469         return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3470 }
3471
3472 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3473                                        struct mlxsw_sp_nexthop *nh,
3474                                        struct mlxsw_sp_ipip_entry *ipip_entry)
3475 {
3476         bool removing;
3477
3478         if (!nh->nh_grp->gateway || nh->ipip_entry)
3479                 return;
3480
3481         nh->ipip_entry = ipip_entry;
3482         removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3483         __mlxsw_sp_nexthop_neigh_update(nh, removing);
3484         mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3485 }
3486
3487 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3488                                        struct mlxsw_sp_nexthop *nh)
3489 {
3490         struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3491
3492         if (!ipip_entry)
3493                 return;
3494
3495         __mlxsw_sp_nexthop_neigh_update(nh, true);
3496         nh->ipip_entry = NULL;
3497 }
3498
3499 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3500                                         const struct fib_nh *fib_nh,
3501                                         enum mlxsw_sp_ipip_type *p_ipipt)
3502 {
3503         struct net_device *dev = fib_nh->nh_dev;
3504
3505         return dev &&
3506                fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3507                mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3508 }
3509
3510 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3511                                        struct mlxsw_sp_nexthop *nh)
3512 {
3513         switch (nh->type) {
3514         case MLXSW_SP_NEXTHOP_TYPE_ETH:
3515                 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3516                 mlxsw_sp_nexthop_rif_fini(nh);
3517                 break;
3518         case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3519                 mlxsw_sp_nexthop_rif_fini(nh);
3520                 mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3521                 break;
3522         }
3523 }
3524
3525 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3526                                        struct mlxsw_sp_nexthop *nh,
3527                                        struct fib_nh *fib_nh)
3528 {
3529         const struct mlxsw_sp_ipip_ops *ipip_ops;
3530         struct net_device *dev = fib_nh->nh_dev;
3531         struct mlxsw_sp_ipip_entry *ipip_entry;
3532         struct mlxsw_sp_rif *rif;
3533         int err;
3534
3535         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3536         if (ipip_entry) {
3537                 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3538                 if (ipip_ops->can_offload(mlxsw_sp, dev,
3539                                           MLXSW_SP_L3_PROTO_IPV4)) {
3540                         nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3541                         mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3542                         return 0;
3543                 }
3544         }
3545
3546         nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3547         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3548         if (!rif)
3549                 return 0;
3550
3551         mlxsw_sp_nexthop_rif_init(nh, rif);
3552         err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3553         if (err)
3554                 goto err_neigh_init;
3555
3556         return 0;
3557
3558 err_neigh_init:
3559         mlxsw_sp_nexthop_rif_fini(nh);
3560         return err;
3561 }
3562
3563 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3564                                         struct mlxsw_sp_nexthop *nh)
3565 {
3566         mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3567 }
3568
3569 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3570                                   struct mlxsw_sp_nexthop_group *nh_grp,
3571                                   struct mlxsw_sp_nexthop *nh,
3572                                   struct fib_nh *fib_nh)
3573 {
3574         struct net_device *dev = fib_nh->nh_dev;
3575         struct in_device *in_dev;
3576         int err;
3577
3578         nh->nh_grp = nh_grp;
3579         nh->key.fib_nh = fib_nh;
3580 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3581         nh->nh_weight = fib_nh->nh_weight;
3582 #else
3583         nh->nh_weight = 1;
3584 #endif
3585         memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3586         err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3587         if (err)
3588                 return err;
3589
3590         mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3591         list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3592
3593         if (!dev)
3594                 return 0;
3595
3596         in_dev = __in_dev_get_rtnl(dev);
3597         if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3598             fib_nh->nh_flags & RTNH_F_LINKDOWN)
3599                 return 0;
3600
3601         err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3602         if (err)
3603                 goto err_nexthop_neigh_init;
3604
3605         return 0;
3606
3607 err_nexthop_neigh_init:
3608         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3609         return err;
3610 }
3611
3612 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3613                                    struct mlxsw_sp_nexthop *nh)
3614 {
3615         mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3616         list_del(&nh->router_list_node);
3617         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3618         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3619 }
3620
3621 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3622                                     unsigned long event, struct fib_nh *fib_nh)
3623 {
3624         struct mlxsw_sp_nexthop_key key;
3625         struct mlxsw_sp_nexthop *nh;
3626
3627         if (mlxsw_sp->router->aborted)
3628                 return;
3629
3630         key.fib_nh = fib_nh;
3631         nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3632         if (WARN_ON_ONCE(!nh))
3633                 return;
3634
3635         switch (event) {
3636         case FIB_EVENT_NH_ADD:
3637                 mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3638                 break;
3639         case FIB_EVENT_NH_DEL:
3640                 mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3641                 break;
3642         }
3643
3644         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3645 }
3646
3647 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3648                                         struct mlxsw_sp_rif *rif)
3649 {
3650         struct mlxsw_sp_nexthop *nh;
3651         bool removing;
3652
3653         list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3654                 switch (nh->type) {
3655                 case MLXSW_SP_NEXTHOP_TYPE_ETH:
3656                         removing = false;
3657                         break;
3658                 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3659                         removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3660                         break;
3661                 default:
3662                         WARN_ON(1);
3663                         continue;
3664                 }
3665
3666                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3667                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3668         }
3669 }
3670
3671 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3672                                          struct mlxsw_sp_rif *old_rif,
3673                                          struct mlxsw_sp_rif *new_rif)
3674 {
3675         struct mlxsw_sp_nexthop *nh;
3676
3677         list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3678         list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3679                 nh->rif = new_rif;
3680         mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3681 }
3682
3683 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3684                                            struct mlxsw_sp_rif *rif)
3685 {
3686         struct mlxsw_sp_nexthop *nh, *tmp;
3687
3688         list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3689                 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3690                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3691         }
3692 }
3693
3694 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3695                                    const struct fib_info *fi)
3696 {
3697         return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3698                mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3699 }
3700
3701 static struct mlxsw_sp_nexthop_group *
3702 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3703 {
3704         struct mlxsw_sp_nexthop_group *nh_grp;
3705         struct mlxsw_sp_nexthop *nh;
3706         struct fib_nh *fib_nh;
3707         size_t alloc_size;
3708         int i;
3709         int err;
3710
3711         alloc_size = sizeof(*nh_grp) +
3712                      fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
3713         nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3714         if (!nh_grp)
3715                 return ERR_PTR(-ENOMEM);
3716         nh_grp->priv = fi;
3717         INIT_LIST_HEAD(&nh_grp->fib_list);
3718         nh_grp->neigh_tbl = &arp_tbl;
3719
3720         nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3721         nh_grp->count = fi->fib_nhs;
3722         fib_info_hold(fi);
3723         for (i = 0; i < nh_grp->count; i++) {
3724                 nh = &nh_grp->nexthops[i];
3725                 fib_nh = &fi->fib_nh[i];
3726                 err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3727                 if (err)
3728                         goto err_nexthop4_init;
3729         }
3730         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3731         if (err)
3732                 goto err_nexthop_group_insert;
3733         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3734         return nh_grp;
3735
3736 err_nexthop_group_insert:
3737 err_nexthop4_init:
3738         for (i--; i >= 0; i--) {
3739                 nh = &nh_grp->nexthops[i];
3740                 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3741         }
3742         fib_info_put(fi);
3743         kfree(nh_grp);
3744         return ERR_PTR(err);
3745 }
3746
3747 static void
3748 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3749                                 struct mlxsw_sp_nexthop_group *nh_grp)
3750 {
3751         struct mlxsw_sp_nexthop *nh;
3752         int i;
3753
3754         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3755         for (i = 0; i < nh_grp->count; i++) {
3756                 nh = &nh_grp->nexthops[i];
3757                 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3758         }
3759         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3760         WARN_ON_ONCE(nh_grp->adj_index_valid);
3761         fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3762         kfree(nh_grp);
3763 }
3764
3765 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3766                                        struct mlxsw_sp_fib_entry *fib_entry,
3767                                        struct fib_info *fi)
3768 {
3769         struct mlxsw_sp_nexthop_group *nh_grp;
3770
3771         nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3772         if (!nh_grp) {
3773                 nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3774                 if (IS_ERR(nh_grp))
3775                         return PTR_ERR(nh_grp);
3776         }
3777         list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3778         fib_entry->nh_group = nh_grp;
3779         return 0;
3780 }
3781
3782 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3783                                         struct mlxsw_sp_fib_entry *fib_entry)
3784 {
3785         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3786
3787         list_del(&fib_entry->nexthop_group_node);
3788         if (!list_empty(&nh_grp->fib_list))
3789                 return;
3790         mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3791 }
3792
3793 static bool
3794 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3795 {
3796         struct mlxsw_sp_fib4_entry *fib4_entry;
3797
3798         fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3799                                   common);
3800         return !fib4_entry->tos;
3801 }
3802
3803 static bool
3804 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3805 {
3806         struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3807
3808         switch (fib_entry->fib_node->fib->proto) {
3809         case MLXSW_SP_L3_PROTO_IPV4:
3810                 if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3811                         return false;
3812                 break;
3813         case MLXSW_SP_L3_PROTO_IPV6:
3814                 break;
3815         }
3816
3817         switch (fib_entry->type) {
3818         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3819                 return !!nh_group->adj_index_valid;
3820         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3821                 return !!nh_group->nh_rif;
3822         case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3823                 return true;
3824         default:
3825                 return false;
3826         }
3827 }
3828
3829 static struct mlxsw_sp_nexthop *
3830 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3831                      const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3832 {
3833         int i;
3834
3835         for (i = 0; i < nh_grp->count; i++) {
3836                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3837                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
3838
3839                 if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev &&
3840                     ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3841                                     &rt->fib6_nh.nh_gw))
3842                         return nh;
3843                 continue;
3844         }
3845
3846         return NULL;
3847 }
3848
3849 static void
3850 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3851 {
3852         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3853         int i;
3854
3855         if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3856             fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
3857                 nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3858                 return;
3859         }
3860
3861         for (i = 0; i < nh_grp->count; i++) {
3862                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3863
3864                 if (nh->offloaded)
3865                         nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3866                 else
3867                         nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3868         }
3869 }
3870
3871 static void
3872 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3873 {
3874         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3875         int i;
3876
3877         if (!list_is_singular(&nh_grp->fib_list))
3878                 return;
3879
3880         for (i = 0; i < nh_grp->count; i++) {
3881                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3882
3883                 nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3884         }
3885 }
3886
3887 static void
3888 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3889 {
3890         struct mlxsw_sp_fib6_entry *fib6_entry;
3891         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3892
3893         fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3894                                   common);
3895
3896         if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
3897                 list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
3898                                  list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
3899                 return;
3900         }
3901
3902         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3903                 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3904                 struct mlxsw_sp_nexthop *nh;
3905
3906                 nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3907                 if (nh && nh->offloaded)
3908                         mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
3909                 else
3910                         mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
3911         }
3912 }
3913
3914 static void
3915 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3916 {
3917         struct mlxsw_sp_fib6_entry *fib6_entry;
3918         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3919
3920         fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3921                                   common);
3922         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3923                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
3924
3925                 rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
3926         }
3927 }
3928
3929 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3930 {
3931         switch (fib_entry->fib_node->fib->proto) {
3932         case MLXSW_SP_L3_PROTO_IPV4:
3933                 mlxsw_sp_fib4_entry_offload_set(fib_entry);
3934                 break;
3935         case MLXSW_SP_L3_PROTO_IPV6:
3936                 mlxsw_sp_fib6_entry_offload_set(fib_entry);
3937                 break;
3938         }
3939 }
3940
3941 static void
3942 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3943 {
3944         switch (fib_entry->fib_node->fib->proto) {
3945         case MLXSW_SP_L3_PROTO_IPV4:
3946                 mlxsw_sp_fib4_entry_offload_unset(fib_entry);
3947                 break;
3948         case MLXSW_SP_L3_PROTO_IPV6:
3949                 mlxsw_sp_fib6_entry_offload_unset(fib_entry);
3950                 break;
3951         }
3952 }
3953
3954 static void
3955 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3956                                    enum mlxsw_reg_ralue_op op, int err)
3957 {
3958         switch (op) {
3959         case MLXSW_REG_RALUE_OP_WRITE_DELETE:
3960                 return mlxsw_sp_fib_entry_offload_unset(fib_entry);
3961         case MLXSW_REG_RALUE_OP_WRITE_WRITE:
3962                 if (err)
3963                         return;
3964                 if (mlxsw_sp_fib_entry_should_offload(fib_entry))
3965                         mlxsw_sp_fib_entry_offload_set(fib_entry);
3966                 else
3967                         mlxsw_sp_fib_entry_offload_unset(fib_entry);
3968                 return;
3969         default:
3970                 return;
3971         }
3972 }
3973
3974 static void
3975 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
3976                               const struct mlxsw_sp_fib_entry *fib_entry,
3977                               enum mlxsw_reg_ralue_op op)
3978 {
3979         struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
3980         enum mlxsw_reg_ralxx_protocol proto;
3981         u32 *p_dip;
3982
3983         proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
3984
3985         switch (fib->proto) {
3986         case MLXSW_SP_L3_PROTO_IPV4:
3987                 p_dip = (u32 *) fib_entry->fib_node->key.addr;
3988                 mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
3989                                       fib_entry->fib_node->key.prefix_len,
3990                                       *p_dip);
3991                 break;
3992         case MLXSW_SP_L3_PROTO_IPV6:
3993                 mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
3994                                       fib_entry->fib_node->key.prefix_len,
3995                                       fib_entry->fib_node->key.addr);
3996                 break;
3997         }
3998 }
3999
4000 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
4001                                         struct mlxsw_sp_fib_entry *fib_entry,
4002                                         enum mlxsw_reg_ralue_op op)
4003 {
4004         char ralue_pl[MLXSW_REG_RALUE_LEN];
4005         enum mlxsw_reg_ralue_trap_action trap_action;
4006         u16 trap_id = 0;
4007         u32 adjacency_index = 0;
4008         u16 ecmp_size = 0;
4009
4010         /* In case the nexthop group adjacency index is valid, use it
4011          * with provided ECMP size. Otherwise, setup trap and pass
4012          * traffic to kernel.
4013          */
4014         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4015                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4016                 adjacency_index = fib_entry->nh_group->adj_index;
4017                 ecmp_size = fib_entry->nh_group->ecmp_size;
4018         } else {
4019                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4020                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4021         }
4022
4023         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4024         mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
4025                                         adjacency_index, ecmp_size);
4026         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4027 }
4028
4029 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
4030                                        struct mlxsw_sp_fib_entry *fib_entry,
4031                                        enum mlxsw_reg_ralue_op op)
4032 {
4033         struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
4034         enum mlxsw_reg_ralue_trap_action trap_action;
4035         char ralue_pl[MLXSW_REG_RALUE_LEN];
4036         u16 trap_id = 0;
4037         u16 rif_index = 0;
4038
4039         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4040                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4041                 rif_index = rif->rif_index;
4042         } else {
4043                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4044                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4045         }
4046
4047         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4048         mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
4049                                        rif_index);
4050         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4051 }
4052
4053 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
4054                                       struct mlxsw_sp_fib_entry *fib_entry,
4055                                       enum mlxsw_reg_ralue_op op)
4056 {
4057         char ralue_pl[MLXSW_REG_RALUE_LEN];
4058
4059         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4060         mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
4061         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4062 }
4063
4064 static int
4065 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
4066                                  struct mlxsw_sp_fib_entry *fib_entry,
4067                                  enum mlxsw_reg_ralue_op op)
4068 {
4069         struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
4070         const struct mlxsw_sp_ipip_ops *ipip_ops;
4071
4072         if (WARN_ON(!ipip_entry))
4073                 return -EINVAL;
4074
4075         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4076         return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
4077                                       fib_entry->decap.tunnel_index);
4078 }
4079
4080 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4081                                    struct mlxsw_sp_fib_entry *fib_entry,
4082                                    enum mlxsw_reg_ralue_op op)
4083 {
4084         switch (fib_entry->type) {
4085         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4086                 return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4087         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4088                 return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4089         case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4090                 return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4091         case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4092                 return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4093                                                         fib_entry, op);
4094         }
4095         return -EINVAL;
4096 }
4097
4098 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4099                                  struct mlxsw_sp_fib_entry *fib_entry,
4100                                  enum mlxsw_reg_ralue_op op)
4101 {
4102         int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4103
4104         mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
4105
4106         return err;
4107 }
4108
4109 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4110                                      struct mlxsw_sp_fib_entry *fib_entry)
4111 {
4112         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4113                                      MLXSW_REG_RALUE_OP_WRITE_WRITE);
4114 }
4115
4116 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4117                                   struct mlxsw_sp_fib_entry *fib_entry)
4118 {
4119         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4120                                      MLXSW_REG_RALUE_OP_WRITE_DELETE);
4121 }
4122
4123 static int
4124 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4125                              const struct fib_entry_notifier_info *fen_info,
4126                              struct mlxsw_sp_fib_entry *fib_entry)
4127 {
4128         union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4129         struct net_device *dev = fen_info->fi->fib_dev;
4130         struct mlxsw_sp_ipip_entry *ipip_entry;
4131         struct fib_info *fi = fen_info->fi;
4132
4133         switch (fen_info->type) {
4134         case RTN_LOCAL:
4135                 ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4136                                                  MLXSW_SP_L3_PROTO_IPV4, dip);
4137                 if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4138                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4139                         return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4140                                                              fib_entry,
4141                                                              ipip_entry);
4142                 }
4143                 /* fall through */
4144         case RTN_BROADCAST:
4145                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4146                 return 0;
4147         case RTN_UNREACHABLE: /* fall through */
4148         case RTN_BLACKHOLE: /* fall through */
4149         case RTN_PROHIBIT:
4150                 /* Packets hitting these routes need to be trapped, but
4151                  * can do so with a lower priority than packets directed
4152                  * at the host, so use action type local instead of trap.
4153                  */
4154                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4155                 return 0;
4156         case RTN_UNICAST:
4157                 if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4158                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4159                 else
4160                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4161                 return 0;
4162         default:
4163                 return -EINVAL;
4164         }
4165 }
4166
4167 static struct mlxsw_sp_fib4_entry *
4168 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4169                            struct mlxsw_sp_fib_node *fib_node,
4170                            const struct fib_entry_notifier_info *fen_info)
4171 {
4172         struct mlxsw_sp_fib4_entry *fib4_entry;
4173         struct mlxsw_sp_fib_entry *fib_entry;
4174         int err;
4175
4176         fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4177         if (!fib4_entry)
4178                 return ERR_PTR(-ENOMEM);
4179         fib_entry = &fib4_entry->common;
4180
4181         err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4182         if (err)
4183                 goto err_fib4_entry_type_set;
4184
4185         err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4186         if (err)
4187                 goto err_nexthop4_group_get;
4188
4189         fib4_entry->prio = fen_info->fi->fib_priority;
4190         fib4_entry->tb_id = fen_info->tb_id;
4191         fib4_entry->type = fen_info->type;
4192         fib4_entry->tos = fen_info->tos;
4193
4194         fib_entry->fib_node = fib_node;
4195
4196         return fib4_entry;
4197
4198 err_nexthop4_group_get:
4199 err_fib4_entry_type_set:
4200         kfree(fib4_entry);
4201         return ERR_PTR(err);
4202 }
4203
4204 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4205                                         struct mlxsw_sp_fib4_entry *fib4_entry)
4206 {
4207         mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4208         kfree(fib4_entry);
4209 }
4210
4211 static struct mlxsw_sp_fib4_entry *
4212 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4213                            const struct fib_entry_notifier_info *fen_info)
4214 {
4215         struct mlxsw_sp_fib4_entry *fib4_entry;
4216         struct mlxsw_sp_fib_node *fib_node;
4217         struct mlxsw_sp_fib *fib;
4218         struct mlxsw_sp_vr *vr;
4219
4220         vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4221         if (!vr)
4222                 return NULL;
4223         fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4224
4225         fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4226                                             sizeof(fen_info->dst),
4227                                             fen_info->dst_len);
4228         if (!fib_node)
4229                 return NULL;
4230
4231         list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4232                 if (fib4_entry->tb_id == fen_info->tb_id &&
4233                     fib4_entry->tos == fen_info->tos &&
4234                     fib4_entry->type == fen_info->type &&
4235                     mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4236                     fen_info->fi) {
4237                         return fib4_entry;
4238                 }
4239         }
4240
4241         return NULL;
4242 }
4243
4244 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4245         .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4246         .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4247         .key_len = sizeof(struct mlxsw_sp_fib_key),
4248         .automatic_shrinking = true,
4249 };
4250
4251 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4252                                     struct mlxsw_sp_fib_node *fib_node)
4253 {
4254         return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4255                                       mlxsw_sp_fib_ht_params);
4256 }
4257
4258 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4259                                      struct mlxsw_sp_fib_node *fib_node)
4260 {
4261         rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4262                                mlxsw_sp_fib_ht_params);
4263 }
4264
4265 static struct mlxsw_sp_fib_node *
4266 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4267                          size_t addr_len, unsigned char prefix_len)
4268 {
4269         struct mlxsw_sp_fib_key key;
4270
4271         memset(&key, 0, sizeof(key));
4272         memcpy(key.addr, addr, addr_len);
4273         key.prefix_len = prefix_len;
4274         return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4275 }
4276
4277 static struct mlxsw_sp_fib_node *
4278 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4279                          size_t addr_len, unsigned char prefix_len)
4280 {
4281         struct mlxsw_sp_fib_node *fib_node;
4282
4283         fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4284         if (!fib_node)
4285                 return NULL;
4286
4287         INIT_LIST_HEAD(&fib_node->entry_list);
4288         list_add(&fib_node->list, &fib->node_list);
4289         memcpy(fib_node->key.addr, addr, addr_len);
4290         fib_node->key.prefix_len = prefix_len;
4291
4292         return fib_node;
4293 }
4294
4295 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4296 {
4297         list_del(&fib_node->list);
4298         WARN_ON(!list_empty(&fib_node->entry_list));
4299         kfree(fib_node);
4300 }
4301
4302 static bool
4303 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4304                                  const struct mlxsw_sp_fib_entry *fib_entry)
4305 {
4306         return list_first_entry(&fib_node->entry_list,
4307                                 struct mlxsw_sp_fib_entry, list) == fib_entry;
4308 }
4309
4310 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4311                                       struct mlxsw_sp_fib_node *fib_node)
4312 {
4313         struct mlxsw_sp_prefix_usage req_prefix_usage;
4314         struct mlxsw_sp_fib *fib = fib_node->fib;
4315         struct mlxsw_sp_lpm_tree *lpm_tree;
4316         int err;
4317
4318         lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4319         if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4320                 goto out;
4321
4322         mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4323         mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4324         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4325                                          fib->proto);
4326         if (IS_ERR(lpm_tree))
4327                 return PTR_ERR(lpm_tree);
4328
4329         err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4330         if (err)
4331                 goto err_lpm_tree_replace;
4332
4333 out:
4334         lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4335         return 0;
4336
4337 err_lpm_tree_replace:
4338         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4339         return err;
4340 }
4341
4342 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4343                                          struct mlxsw_sp_fib_node *fib_node)
4344 {
4345         struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4346         struct mlxsw_sp_prefix_usage req_prefix_usage;
4347         struct mlxsw_sp_fib *fib = fib_node->fib;
4348         int err;
4349
4350         if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4351                 return;
4352         /* Try to construct a new LPM tree from the current prefix usage
4353          * minus the unused one. If we fail, continue using the old one.
4354          */
4355         mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4356         mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4357                                     fib_node->key.prefix_len);
4358         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4359                                          fib->proto);
4360         if (IS_ERR(lpm_tree))
4361                 return;
4362
4363         err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4364         if (err)
4365                 goto err_lpm_tree_replace;
4366
4367         return;
4368
4369 err_lpm_tree_replace:
4370         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4371 }
4372
4373 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4374                                   struct mlxsw_sp_fib_node *fib_node,
4375                                   struct mlxsw_sp_fib *fib)
4376 {
4377         int err;
4378
4379         err = mlxsw_sp_fib_node_insert(fib, fib_node);
4380         if (err)
4381                 return err;
4382         fib_node->fib = fib;
4383
4384         err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4385         if (err)
4386                 goto err_fib_lpm_tree_link;
4387
4388         return 0;
4389
4390 err_fib_lpm_tree_link:
4391         fib_node->fib = NULL;
4392         mlxsw_sp_fib_node_remove(fib, fib_node);
4393         return err;
4394 }
4395
4396 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4397                                    struct mlxsw_sp_fib_node *fib_node)
4398 {
4399         struct mlxsw_sp_fib *fib = fib_node->fib;
4400
4401         mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4402         fib_node->fib = NULL;
4403         mlxsw_sp_fib_node_remove(fib, fib_node);
4404 }
4405
4406 static struct mlxsw_sp_fib_node *
4407 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4408                       size_t addr_len, unsigned char prefix_len,
4409                       enum mlxsw_sp_l3proto proto)
4410 {
4411         struct mlxsw_sp_fib_node *fib_node;
4412         struct mlxsw_sp_fib *fib;
4413         struct mlxsw_sp_vr *vr;
4414         int err;
4415
4416         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4417         if (IS_ERR(vr))
4418                 return ERR_CAST(vr);
4419         fib = mlxsw_sp_vr_fib(vr, proto);
4420
4421         fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4422         if (fib_node)
4423                 return fib_node;
4424
4425         fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4426         if (!fib_node) {
4427                 err = -ENOMEM;
4428                 goto err_fib_node_create;
4429         }
4430
4431         err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4432         if (err)
4433                 goto err_fib_node_init;
4434
4435         return fib_node;
4436
4437 err_fib_node_init:
4438         mlxsw_sp_fib_node_destroy(fib_node);
4439 err_fib_node_create:
4440         mlxsw_sp_vr_put(mlxsw_sp, vr);
4441         return ERR_PTR(err);
4442 }
4443
4444 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4445                                   struct mlxsw_sp_fib_node *fib_node)
4446 {
4447         struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4448
4449         if (!list_empty(&fib_node->entry_list))
4450                 return;
4451         mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4452         mlxsw_sp_fib_node_destroy(fib_node);
4453         mlxsw_sp_vr_put(mlxsw_sp, vr);
4454 }
4455
4456 static struct mlxsw_sp_fib4_entry *
4457 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4458                               const struct mlxsw_sp_fib4_entry *new4_entry)
4459 {
4460         struct mlxsw_sp_fib4_entry *fib4_entry;
4461
4462         list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4463                 if (fib4_entry->tb_id > new4_entry->tb_id)
4464                         continue;
4465                 if (fib4_entry->tb_id != new4_entry->tb_id)
4466                         break;
4467                 if (fib4_entry->tos > new4_entry->tos)
4468                         continue;
4469                 if (fib4_entry->prio >= new4_entry->prio ||
4470                     fib4_entry->tos < new4_entry->tos)
4471                         return fib4_entry;
4472         }
4473
4474         return NULL;
4475 }
4476
4477 static int
4478 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4479                                struct mlxsw_sp_fib4_entry *new4_entry)
4480 {
4481         struct mlxsw_sp_fib_node *fib_node;
4482
4483         if (WARN_ON(!fib4_entry))
4484                 return -EINVAL;
4485
4486         fib_node = fib4_entry->common.fib_node;
4487         list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4488                                  common.list) {
4489                 if (fib4_entry->tb_id != new4_entry->tb_id ||
4490                     fib4_entry->tos != new4_entry->tos ||
4491                     fib4_entry->prio != new4_entry->prio)
4492                         break;
4493         }
4494
4495         list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4496         return 0;
4497 }
4498
4499 static int
4500 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4501                                bool replace, bool append)
4502 {
4503         struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4504         struct mlxsw_sp_fib4_entry *fib4_entry;
4505
4506         fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4507
4508         if (append)
4509                 return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4510         if (replace && WARN_ON(!fib4_entry))
4511                 return -EINVAL;
4512
4513         /* Insert new entry before replaced one, so that we can later
4514          * remove the second.
4515          */
4516         if (fib4_entry) {
4517                 list_add_tail(&new4_entry->common.list,
4518                               &fib4_entry->common.list);
4519         } else {
4520                 struct mlxsw_sp_fib4_entry *last;
4521
4522                 list_for_each_entry(last, &fib_node->entry_list, common.list) {
4523                         if (new4_entry->tb_id > last->tb_id)
4524                                 break;
4525                         fib4_entry = last;
4526                 }
4527
4528                 if (fib4_entry)
4529                         list_add(&new4_entry->common.list,
4530                                  &fib4_entry->common.list);
4531                 else
4532                         list_add(&new4_entry->common.list,
4533                                  &fib_node->entry_list);
4534         }
4535
4536         return 0;
4537 }
4538
4539 static void
4540 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4541 {
4542         list_del(&fib4_entry->common.list);
4543 }
4544
4545 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4546                                        struct mlxsw_sp_fib_entry *fib_entry)
4547 {
4548         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4549
4550         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4551                 return 0;
4552
4553         /* To prevent packet loss, overwrite the previously offloaded
4554          * entry.
4555          */
4556         if (!list_is_singular(&fib_node->entry_list)) {
4557                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4558                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4559
4560                 mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4561         }
4562
4563         return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4564 }
4565
4566 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4567                                         struct mlxsw_sp_fib_entry *fib_entry)
4568 {
4569         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4570
4571         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4572                 return;
4573
4574         /* Promote the next entry by overwriting the deleted entry */
4575         if (!list_is_singular(&fib_node->entry_list)) {
4576                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4577                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4578
4579                 mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4580                 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4581                 return;
4582         }
4583
4584         mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4585 }
4586
4587 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4588                                          struct mlxsw_sp_fib4_entry *fib4_entry,
4589                                          bool replace, bool append)
4590 {
4591         int err;
4592
4593         err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4594         if (err)
4595                 return err;
4596
4597         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4598         if (err)
4599                 goto err_fib_node_entry_add;
4600
4601         return 0;
4602
4603 err_fib_node_entry_add:
4604         mlxsw_sp_fib4_node_list_remove(fib4_entry);
4605         return err;
4606 }
4607
4608 static void
4609 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4610                                 struct mlxsw_sp_fib4_entry *fib4_entry)
4611 {
4612         mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4613         mlxsw_sp_fib4_node_list_remove(fib4_entry);
4614
4615         if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4616                 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4617 }
4618
4619 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4620                                         struct mlxsw_sp_fib4_entry *fib4_entry,
4621                                         bool replace)
4622 {
4623         struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4624         struct mlxsw_sp_fib4_entry *replaced;
4625
4626         if (!replace)
4627                 return;
4628
4629         /* We inserted the new entry before replaced one */
4630         replaced = list_next_entry(fib4_entry, common.list);
4631
4632         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4633         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4634         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4635 }
4636
4637 static int
4638 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4639                          const struct fib_entry_notifier_info *fen_info,
4640                          bool replace, bool append)
4641 {
4642         struct mlxsw_sp_fib4_entry *fib4_entry;
4643         struct mlxsw_sp_fib_node *fib_node;
4644         int err;
4645
4646         if (mlxsw_sp->router->aborted)
4647                 return 0;
4648
4649         fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4650                                          &fen_info->dst, sizeof(fen_info->dst),
4651                                          fen_info->dst_len,
4652                                          MLXSW_SP_L3_PROTO_IPV4);
4653         if (IS_ERR(fib_node)) {
4654                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4655                 return PTR_ERR(fib_node);
4656         }
4657
4658         fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4659         if (IS_ERR(fib4_entry)) {
4660                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4661                 err = PTR_ERR(fib4_entry);
4662                 goto err_fib4_entry_create;
4663         }
4664
4665         err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4666                                             append);
4667         if (err) {
4668                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4669                 goto err_fib4_node_entry_link;
4670         }
4671
4672         mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4673
4674         return 0;
4675
4676 err_fib4_node_entry_link:
4677         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4678 err_fib4_entry_create:
4679         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4680         return err;
4681 }
4682
4683 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4684                                      struct fib_entry_notifier_info *fen_info)
4685 {
4686         struct mlxsw_sp_fib4_entry *fib4_entry;
4687         struct mlxsw_sp_fib_node *fib_node;
4688
4689         if (mlxsw_sp->router->aborted)
4690                 return;
4691
4692         fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4693         if (WARN_ON(!fib4_entry))
4694                 return;
4695         fib_node = fib4_entry->common.fib_node;
4696
4697         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4698         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4699         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4700 }
4701
4702 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
4703 {
4704         /* Packets with link-local destination IP arriving to the router
4705          * are trapped to the CPU, so no need to program specific routes
4706          * for them.
4707          */
4708         if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
4709                 return true;
4710
4711         /* Multicast routes aren't supported, so ignore them. Neighbour
4712          * Discovery packets are specifically trapped.
4713          */
4714         if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
4715                 return true;
4716
4717         /* Cloned routes are irrelevant in the forwarding path. */
4718         if (rt->fib6_flags & RTF_CACHE)
4719                 return true;
4720
4721         return false;
4722 }
4723
4724 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
4725 {
4726         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4727
4728         mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4729         if (!mlxsw_sp_rt6)
4730                 return ERR_PTR(-ENOMEM);
4731
4732         /* In case of route replace, replaced route is deleted with
4733          * no notification. Take reference to prevent accessing freed
4734          * memory.
4735          */
4736         mlxsw_sp_rt6->rt = rt;
4737         fib6_info_hold(rt);
4738
4739         return mlxsw_sp_rt6;
4740 }
4741
4742 #if IS_ENABLED(CONFIG_IPV6)
4743 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4744 {
4745         fib6_info_release(rt);
4746 }
4747 #else
4748 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4749 {
4750 }
4751 #endif
4752
4753 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4754 {
4755         mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4756         kfree(mlxsw_sp_rt6);
4757 }
4758
4759 static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
4760 {
4761         /* RTF_CACHE routes are ignored */
4762         return (rt->fib6_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
4763 }
4764
4765 static struct fib6_info *
4766 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4767 {
4768         return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4769                                 list)->rt;
4770 }
4771
4772 static struct mlxsw_sp_fib6_entry *
4773 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4774                                  const struct fib6_info *nrt, bool replace)
4775 {
4776         struct mlxsw_sp_fib6_entry *fib6_entry;
4777
4778         if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4779                 return NULL;
4780
4781         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4782                 struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4783
4784                 /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4785                  * virtual router.
4786                  */
4787                 if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
4788                         continue;
4789                 if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
4790                         break;
4791                 if (rt->fib6_metric < nrt->fib6_metric)
4792                         continue;
4793                 if (rt->fib6_metric == nrt->fib6_metric &&
4794                     mlxsw_sp_fib6_rt_can_mp(rt))
4795                         return fib6_entry;
4796                 if (rt->fib6_metric > nrt->fib6_metric)
4797                         break;
4798         }
4799
4800         return NULL;
4801 }
4802
4803 static struct mlxsw_sp_rt6 *
4804 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4805                             const struct fib6_info *rt)
4806 {
4807         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4808
4809         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4810                 if (mlxsw_sp_rt6->rt == rt)
4811                         return mlxsw_sp_rt6;
4812         }
4813
4814         return NULL;
4815 }
4816
4817 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4818                                         const struct fib6_info *rt,
4819                                         enum mlxsw_sp_ipip_type *ret)
4820 {
4821         return rt->fib6_nh.nh_dev &&
4822                mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret);
4823 }
4824
4825 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4826                                        struct mlxsw_sp_nexthop_group *nh_grp,
4827                                        struct mlxsw_sp_nexthop *nh,
4828                                        const struct fib6_info *rt)
4829 {
4830         const struct mlxsw_sp_ipip_ops *ipip_ops;
4831         struct mlxsw_sp_ipip_entry *ipip_entry;
4832         struct net_device *dev = rt->fib6_nh.nh_dev;
4833         struct mlxsw_sp_rif *rif;
4834         int err;
4835
4836         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4837         if (ipip_entry) {
4838                 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4839                 if (ipip_ops->can_offload(mlxsw_sp, dev,
4840                                           MLXSW_SP_L3_PROTO_IPV6)) {
4841                         nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4842                         mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4843                         return 0;
4844                 }
4845         }
4846
4847         nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4848         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4849         if (!rif)
4850                 return 0;
4851         mlxsw_sp_nexthop_rif_init(nh, rif);
4852
4853         err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4854         if (err)
4855                 goto err_nexthop_neigh_init;
4856
4857         return 0;
4858
4859 err_nexthop_neigh_init:
4860         mlxsw_sp_nexthop_rif_fini(nh);
4861         return err;
4862 }
4863
4864 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
4865                                         struct mlxsw_sp_nexthop *nh)
4866 {
4867         mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4868 }
4869
4870 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
4871                                   struct mlxsw_sp_nexthop_group *nh_grp,
4872                                   struct mlxsw_sp_nexthop *nh,
4873                                   const struct fib6_info *rt)
4874 {
4875         struct net_device *dev = rt->fib6_nh.nh_dev;
4876
4877         nh->nh_grp = nh_grp;
4878         nh->nh_weight = rt->fib6_nh.nh_weight;
4879         memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr));
4880         mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4881
4882         list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4883
4884         if (!dev)
4885                 return 0;
4886         nh->ifindex = dev->ifindex;
4887
4888         return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
4889 }
4890
4891 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
4892                                    struct mlxsw_sp_nexthop *nh)
4893 {
4894         mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
4895         list_del(&nh->router_list_node);
4896         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4897 }
4898
4899 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
4900                                     const struct fib6_info *rt)
4901 {
4902         return rt->fib6_flags & RTF_GATEWAY ||
4903                mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
4904 }
4905
4906 static struct mlxsw_sp_nexthop_group *
4907 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
4908                                struct mlxsw_sp_fib6_entry *fib6_entry)
4909 {
4910         struct mlxsw_sp_nexthop_group *nh_grp;
4911         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4912         struct mlxsw_sp_nexthop *nh;
4913         size_t alloc_size;
4914         int i = 0;
4915         int err;
4916
4917         alloc_size = sizeof(*nh_grp) +
4918                      fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
4919         nh_grp = kzalloc(alloc_size, GFP_KERNEL);
4920         if (!nh_grp)
4921                 return ERR_PTR(-ENOMEM);
4922         INIT_LIST_HEAD(&nh_grp->fib_list);
4923 #if IS_ENABLED(CONFIG_IPV6)
4924         nh_grp->neigh_tbl = &nd_tbl;
4925 #endif
4926         mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
4927                                         struct mlxsw_sp_rt6, list);
4928         nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
4929         nh_grp->count = fib6_entry->nrt6;
4930         for (i = 0; i < nh_grp->count; i++) {
4931                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
4932
4933                 nh = &nh_grp->nexthops[i];
4934                 err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
4935                 if (err)
4936                         goto err_nexthop6_init;
4937                 mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
4938         }
4939
4940         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4941         if (err)
4942                 goto err_nexthop_group_insert;
4943
4944         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4945         return nh_grp;
4946
4947 err_nexthop_group_insert:
4948 err_nexthop6_init:
4949         for (i--; i >= 0; i--) {
4950                 nh = &nh_grp->nexthops[i];
4951                 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4952         }
4953         kfree(nh_grp);
4954         return ERR_PTR(err);
4955 }
4956
4957 static void
4958 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
4959                                 struct mlxsw_sp_nexthop_group *nh_grp)
4960 {
4961         struct mlxsw_sp_nexthop *nh;
4962         int i = nh_grp->count;
4963
4964         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4965         for (i--; i >= 0; i--) {
4966                 nh = &nh_grp->nexthops[i];
4967                 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4968         }
4969         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4970         WARN_ON(nh_grp->adj_index_valid);
4971         kfree(nh_grp);
4972 }
4973
4974 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
4975                                        struct mlxsw_sp_fib6_entry *fib6_entry)
4976 {
4977         struct mlxsw_sp_nexthop_group *nh_grp;
4978
4979         nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
4980         if (!nh_grp) {
4981                 nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
4982                 if (IS_ERR(nh_grp))
4983                         return PTR_ERR(nh_grp);
4984         }
4985
4986         list_add_tail(&fib6_entry->common.nexthop_group_node,
4987                       &nh_grp->fib_list);
4988         fib6_entry->common.nh_group = nh_grp;
4989
4990         return 0;
4991 }
4992
4993 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
4994                                         struct mlxsw_sp_fib_entry *fib_entry)
4995 {
4996         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4997
4998         list_del(&fib_entry->nexthop_group_node);
4999         if (!list_empty(&nh_grp->fib_list))
5000                 return;
5001         mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
5002 }
5003
5004 static int
5005 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
5006                                struct mlxsw_sp_fib6_entry *fib6_entry)
5007 {
5008         struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
5009         int err;
5010
5011         fib6_entry->common.nh_group = NULL;
5012         list_del(&fib6_entry->common.nexthop_group_node);
5013
5014         err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5015         if (err)
5016                 goto err_nexthop6_group_get;
5017
5018         /* In case this entry is offloaded, then the adjacency index
5019          * currently associated with it in the device's table is that
5020          * of the old group. Start using the new one instead.
5021          */
5022         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5023         if (err)
5024                 goto err_fib_node_entry_add;
5025
5026         if (list_empty(&old_nh_grp->fib_list))
5027                 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
5028
5029         return 0;
5030
5031 err_fib_node_entry_add:
5032         mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5033 err_nexthop6_group_get:
5034         list_add_tail(&fib6_entry->common.nexthop_group_node,
5035                       &old_nh_grp->fib_list);
5036         fib6_entry->common.nh_group = old_nh_grp;
5037         return err;
5038 }
5039
5040 static int
5041 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
5042                                 struct mlxsw_sp_fib6_entry *fib6_entry,
5043                                 struct fib6_info *rt)
5044 {
5045         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5046         int err;
5047
5048         mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5049         if (IS_ERR(mlxsw_sp_rt6))
5050                 return PTR_ERR(mlxsw_sp_rt6);
5051
5052         list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5053         fib6_entry->nrt6++;
5054
5055         err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5056         if (err)
5057                 goto err_nexthop6_group_update;
5058
5059         return 0;
5060
5061 err_nexthop6_group_update:
5062         fib6_entry->nrt6--;
5063         list_del(&mlxsw_sp_rt6->list);
5064         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5065         return err;
5066 }
5067
5068 static void
5069 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
5070                                 struct mlxsw_sp_fib6_entry *fib6_entry,
5071                                 struct fib6_info *rt)
5072 {
5073         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5074
5075         mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
5076         if (WARN_ON(!mlxsw_sp_rt6))
5077                 return;
5078
5079         fib6_entry->nrt6--;
5080         list_del(&mlxsw_sp_rt6->list);
5081         mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5082         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5083 }
5084
5085 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5086                                          struct mlxsw_sp_fib_entry *fib_entry,
5087                                          const struct fib6_info *rt)
5088 {
5089         /* Packets hitting RTF_REJECT routes need to be discarded by the
5090          * stack. We can rely on their destination device not having a
5091          * RIF (it's the loopback device) and can thus use action type
5092          * local, which will cause them to be trapped with a lower
5093          * priority than packets that need to be locally received.
5094          */
5095         if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
5096                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5097         else if (rt->fib6_flags & RTF_REJECT)
5098                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5099         else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5100                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5101         else
5102                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5103 }
5104
5105 static void
5106 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5107 {
5108         struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5109
5110         list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5111                                  list) {
5112                 fib6_entry->nrt6--;
5113                 list_del(&mlxsw_sp_rt6->list);
5114                 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5115         }
5116 }
5117
5118 static struct mlxsw_sp_fib6_entry *
5119 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5120                            struct mlxsw_sp_fib_node *fib_node,
5121                            struct fib6_info *rt)
5122 {
5123         struct mlxsw_sp_fib6_entry *fib6_entry;
5124         struct mlxsw_sp_fib_entry *fib_entry;
5125         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5126         int err;
5127
5128         fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5129         if (!fib6_entry)
5130                 return ERR_PTR(-ENOMEM);
5131         fib_entry = &fib6_entry->common;
5132
5133         mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5134         if (IS_ERR(mlxsw_sp_rt6)) {
5135                 err = PTR_ERR(mlxsw_sp_rt6);
5136                 goto err_rt6_create;
5137         }
5138
5139         mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5140
5141         INIT_LIST_HEAD(&fib6_entry->rt6_list);
5142         list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5143         fib6_entry->nrt6 = 1;
5144         err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5145         if (err)
5146                 goto err_nexthop6_group_get;
5147
5148         fib_entry->fib_node = fib_node;
5149
5150         return fib6_entry;
5151
5152 err_nexthop6_group_get:
5153         list_del(&mlxsw_sp_rt6->list);
5154         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5155 err_rt6_create:
5156         kfree(fib6_entry);
5157         return ERR_PTR(err);
5158 }
5159
5160 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5161                                         struct mlxsw_sp_fib6_entry *fib6_entry)
5162 {
5163         mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5164         mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5165         WARN_ON(fib6_entry->nrt6);
5166         kfree(fib6_entry);
5167 }
5168
5169 static struct mlxsw_sp_fib6_entry *
5170 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5171                               const struct fib6_info *nrt, bool replace)
5172 {
5173         struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5174
5175         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5176                 struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5177
5178                 if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
5179                         continue;
5180                 if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
5181                         break;
5182                 if (replace && rt->fib6_metric == nrt->fib6_metric) {
5183                         if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5184                             mlxsw_sp_fib6_rt_can_mp(nrt))
5185                                 return fib6_entry;
5186                         if (mlxsw_sp_fib6_rt_can_mp(nrt))
5187                                 fallback = fallback ?: fib6_entry;
5188                 }
5189                 if (rt->fib6_metric > nrt->fib6_metric)
5190                         return fallback ?: fib6_entry;
5191         }
5192
5193         return fallback;
5194 }
5195
5196 static int
5197 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5198                                bool replace)
5199 {
5200         struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5201         struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5202         struct mlxsw_sp_fib6_entry *fib6_entry;
5203
5204         fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5205
5206         if (replace && WARN_ON(!fib6_entry))
5207                 return -EINVAL;
5208
5209         if (fib6_entry) {
5210                 list_add_tail(&new6_entry->common.list,
5211                               &fib6_entry->common.list);
5212         } else {
5213                 struct mlxsw_sp_fib6_entry *last;
5214
5215                 list_for_each_entry(last, &fib_node->entry_list, common.list) {
5216                         struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5217
5218                         if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
5219                                 break;
5220                         fib6_entry = last;
5221                 }
5222
5223                 if (fib6_entry)
5224                         list_add(&new6_entry->common.list,
5225                                  &fib6_entry->common.list);
5226                 else
5227                         list_add(&new6_entry->common.list,
5228                                  &fib_node->entry_list);
5229         }
5230
5231         return 0;
5232 }
5233
5234 static void
5235 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5236 {
5237         list_del(&fib6_entry->common.list);
5238 }
5239
5240 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5241                                          struct mlxsw_sp_fib6_entry *fib6_entry,
5242                                          bool replace)
5243 {
5244         int err;
5245
5246         err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5247         if (err)
5248                 return err;
5249
5250         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5251         if (err)
5252                 goto err_fib_node_entry_add;
5253
5254         return 0;
5255
5256 err_fib_node_entry_add:
5257         mlxsw_sp_fib6_node_list_remove(fib6_entry);
5258         return err;
5259 }
5260
5261 static void
5262 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5263                                 struct mlxsw_sp_fib6_entry *fib6_entry)
5264 {
5265         mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5266         mlxsw_sp_fib6_node_list_remove(fib6_entry);
5267 }
5268
5269 static struct mlxsw_sp_fib6_entry *
5270 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5271                            const struct fib6_info *rt)
5272 {
5273         struct mlxsw_sp_fib6_entry *fib6_entry;
5274         struct mlxsw_sp_fib_node *fib_node;
5275         struct mlxsw_sp_fib *fib;
5276         struct mlxsw_sp_vr *vr;
5277
5278         vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
5279         if (!vr)
5280                 return NULL;
5281         fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5282
5283         fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
5284                                             sizeof(rt->fib6_dst.addr),
5285                                             rt->fib6_dst.plen);
5286         if (!fib_node)
5287                 return NULL;
5288
5289         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5290                 struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5291
5292                 if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id &&
5293                     rt->fib6_metric == iter_rt->fib6_metric &&
5294                     mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5295                         return fib6_entry;
5296         }
5297
5298         return NULL;
5299 }
5300
5301 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5302                                         struct mlxsw_sp_fib6_entry *fib6_entry,
5303                                         bool replace)
5304 {
5305         struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5306         struct mlxsw_sp_fib6_entry *replaced;
5307
5308         if (!replace)
5309                 return;
5310
5311         replaced = list_next_entry(fib6_entry, common.list);
5312
5313         mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5314         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5315         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5316 }
5317
5318 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5319                                     struct fib6_info *rt, bool replace)
5320 {
5321         struct mlxsw_sp_fib6_entry *fib6_entry;
5322         struct mlxsw_sp_fib_node *fib_node;
5323         int err;
5324
5325         if (mlxsw_sp->router->aborted)
5326                 return 0;
5327
5328         if (rt->fib6_src.plen)
5329                 return -EINVAL;
5330
5331         if (mlxsw_sp_fib6_rt_should_ignore(rt))
5332                 return 0;
5333
5334         fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5335                                          &rt->fib6_dst.addr,
5336                                          sizeof(rt->fib6_dst.addr),
5337                                          rt->fib6_dst.plen,
5338                                          MLXSW_SP_L3_PROTO_IPV6);
5339         if (IS_ERR(fib_node))
5340                 return PTR_ERR(fib_node);
5341
5342         /* Before creating a new entry, try to append route to an existing
5343          * multipath entry.
5344          */
5345         fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5346         if (fib6_entry) {
5347                 err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5348                 if (err)
5349                         goto err_fib6_entry_nexthop_add;
5350                 return 0;
5351         }
5352
5353         fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5354         if (IS_ERR(fib6_entry)) {
5355                 err = PTR_ERR(fib6_entry);
5356                 goto err_fib6_entry_create;
5357         }
5358
5359         err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5360         if (err)
5361                 goto err_fib6_node_entry_link;
5362
5363         mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5364
5365         return 0;
5366
5367 err_fib6_node_entry_link:
5368         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5369 err_fib6_entry_create:
5370 err_fib6_entry_nexthop_add:
5371         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5372         return err;
5373 }
5374
5375 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5376                                      struct fib6_info *rt)
5377 {
5378         struct mlxsw_sp_fib6_entry *fib6_entry;
5379         struct mlxsw_sp_fib_node *fib_node;
5380
5381         if (mlxsw_sp->router->aborted)
5382                 return;
5383
5384         if (mlxsw_sp_fib6_rt_should_ignore(rt))
5385                 return;
5386
5387         fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5388         if (WARN_ON(!fib6_entry))
5389                 return;
5390
5391         /* If route is part of a multipath entry, but not the last one
5392          * removed, then only reduce its nexthop group.
5393          */
5394         if (!list_is_singular(&fib6_entry->rt6_list)) {
5395                 mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5396                 return;
5397         }
5398
5399         fib_node = fib6_entry->common.fib_node;
5400
5401         mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5402         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5403         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5404 }
5405
5406 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5407                                             enum mlxsw_reg_ralxx_protocol proto,
5408                                             u8 tree_id)
5409 {
5410         char ralta_pl[MLXSW_REG_RALTA_LEN];
5411         char ralst_pl[MLXSW_REG_RALST_LEN];
5412         int i, err;
5413
5414         mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5415         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5416         if (err)
5417                 return err;
5418
5419         mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5420         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5421         if (err)
5422                 return err;
5423
5424         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5425                 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5426                 char raltb_pl[MLXSW_REG_RALTB_LEN];
5427                 char ralue_pl[MLXSW_REG_RALUE_LEN];
5428
5429                 mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5430                 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5431                                       raltb_pl);
5432                 if (err)
5433                         return err;
5434
5435                 mlxsw_reg_ralue_pack(ralue_pl, proto,
5436                                      MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5437                 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5438                 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5439                                       ralue_pl);
5440                 if (err)
5441                         return err;
5442         }
5443
5444         return 0;
5445 }
5446
5447 static struct mlxsw_sp_mr_table *
5448 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
5449 {
5450         if (family == RTNL_FAMILY_IPMR)
5451                 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
5452         else
5453                 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
5454 }
5455
5456 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5457                                      struct mfc_entry_notifier_info *men_info,
5458                                      bool replace)
5459 {
5460         struct mlxsw_sp_mr_table *mrt;
5461         struct mlxsw_sp_vr *vr;
5462
5463         if (mlxsw_sp->router->aborted)
5464                 return 0;
5465
5466         vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5467         if (IS_ERR(vr))
5468                 return PTR_ERR(vr);
5469
5470         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5471         return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
5472 }
5473
5474 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5475                                       struct mfc_entry_notifier_info *men_info)
5476 {
5477         struct mlxsw_sp_mr_table *mrt;
5478         struct mlxsw_sp_vr *vr;
5479
5480         if (mlxsw_sp->router->aborted)
5481                 return;
5482
5483         vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5484         if (WARN_ON(!vr))
5485                 return;
5486
5487         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5488         mlxsw_sp_mr_route_del(mrt, men_info->mfc);
5489         mlxsw_sp_vr_put(mlxsw_sp, vr);
5490 }
5491
5492 static int
5493 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5494                               struct vif_entry_notifier_info *ven_info)
5495 {
5496         struct mlxsw_sp_mr_table *mrt;
5497         struct mlxsw_sp_rif *rif;
5498         struct mlxsw_sp_vr *vr;
5499
5500         if (mlxsw_sp->router->aborted)
5501                 return 0;
5502
5503         vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5504         if (IS_ERR(vr))
5505                 return PTR_ERR(vr);
5506
5507         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5508         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5509         return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
5510                                    ven_info->vif_index,
5511                                    ven_info->vif_flags, rif);
5512 }
5513
5514 static void
5515 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5516                               struct vif_entry_notifier_info *ven_info)
5517 {
5518         struct mlxsw_sp_mr_table *mrt;
5519         struct mlxsw_sp_vr *vr;
5520
5521         if (mlxsw_sp->router->aborted)
5522                 return;
5523
5524         vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5525         if (WARN_ON(!vr))
5526                 return;
5527
5528         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5529         mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
5530         mlxsw_sp_vr_put(mlxsw_sp, vr);
5531 }
5532
5533 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5534 {
5535         enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5536         int err;
5537
5538         err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5539                                                MLXSW_SP_LPM_TREE_MIN);
5540         if (err)
5541                 return err;
5542
5543         /* The multicast router code does not need an abort trap as by default,
5544          * packets that don't match any routes are trapped to the CPU.
5545          */
5546
5547         proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5548         return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5549                                                 MLXSW_SP_LPM_TREE_MIN + 1);
5550 }
5551
5552 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5553                                      struct mlxsw_sp_fib_node *fib_node)
5554 {
5555         struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5556
5557         list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5558                                  common.list) {
5559                 bool do_break = &tmp->common.list == &fib_node->entry_list;
5560
5561                 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5562                 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5563                 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5564                 /* Break when entry list is empty and node was freed.
5565                  * Otherwise, we'll access freed memory in the next
5566                  * iteration.
5567                  */
5568                 if (do_break)
5569                         break;
5570         }
5571 }
5572
5573 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5574                                      struct mlxsw_sp_fib_node *fib_node)
5575 {
5576         struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5577
5578         list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5579                                  common.list) {
5580                 bool do_break = &tmp->common.list == &fib_node->entry_list;
5581
5582                 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5583                 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5584                 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5585                 if (do_break)
5586                         break;
5587         }
5588 }
5589
5590 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5591                                     struct mlxsw_sp_fib_node *fib_node)
5592 {
5593         switch (fib_node->fib->proto) {
5594         case MLXSW_SP_L3_PROTO_IPV4:
5595                 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5596                 break;
5597         case MLXSW_SP_L3_PROTO_IPV6:
5598                 mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5599                 break;
5600         }
5601 }
5602
5603 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5604                                   struct mlxsw_sp_vr *vr,
5605                                   enum mlxsw_sp_l3proto proto)
5606 {
5607         struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5608         struct mlxsw_sp_fib_node *fib_node, *tmp;
5609
5610         list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5611                 bool do_break = &tmp->list == &fib->node_list;
5612
5613                 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5614                 if (do_break)
5615                         break;
5616         }
5617 }
5618
5619 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5620 {
5621         int i, j;
5622
5623         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5624                 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5625
5626                 if (!mlxsw_sp_vr_is_used(vr))
5627                         continue;
5628
5629                 for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
5630                         mlxsw_sp_mr_table_flush(vr->mr_table[j]);
5631                 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5632
5633                 /* If virtual router was only used for IPv4, then it's no
5634                  * longer used.
5635                  */
5636                 if (!mlxsw_sp_vr_is_used(vr))
5637                         continue;
5638                 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5639         }
5640 }
5641
5642 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5643 {
5644         int err;
5645
5646         if (mlxsw_sp->router->aborted)
5647                 return;
5648         dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5649         mlxsw_sp_router_fib_flush(mlxsw_sp);
5650         mlxsw_sp->router->aborted = true;
5651         err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5652         if (err)
5653                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5654 }
5655
5656 struct mlxsw_sp_fib_event_work {
5657         struct work_struct work;
5658         union {
5659                 struct fib6_entry_notifier_info fen6_info;
5660                 struct fib_entry_notifier_info fen_info;
5661                 struct fib_rule_notifier_info fr_info;
5662                 struct fib_nh_notifier_info fnh_info;
5663                 struct mfc_entry_notifier_info men_info;
5664                 struct vif_entry_notifier_info ven_info;
5665         };
5666         struct mlxsw_sp *mlxsw_sp;
5667         unsigned long event;
5668 };
5669
5670 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5671 {
5672         struct mlxsw_sp_fib_event_work *fib_work =
5673                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5674         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5675         bool replace, append;
5676         int err;
5677
5678         /* Protect internal structures from changes */
5679         rtnl_lock();
5680         mlxsw_sp_span_respin(mlxsw_sp);
5681
5682         switch (fib_work->event) {
5683         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5684         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5685         case FIB_EVENT_ENTRY_ADD:
5686                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5687                 append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5688                 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5689                                                replace, append);
5690                 if (err)
5691                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5692                 fib_info_put(fib_work->fen_info.fi);
5693                 break;
5694         case FIB_EVENT_ENTRY_DEL:
5695                 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5696                 fib_info_put(fib_work->fen_info.fi);
5697                 break;
5698         case FIB_EVENT_RULE_ADD:
5699                 /* if we get here, a rule was added that we do not support.
5700                  * just do the fib_abort
5701                  */
5702                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5703                 break;
5704         case FIB_EVENT_NH_ADD: /* fall through */
5705         case FIB_EVENT_NH_DEL:
5706                 mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5707                                         fib_work->fnh_info.fib_nh);
5708                 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5709                 break;
5710         }
5711         rtnl_unlock();
5712         kfree(fib_work);
5713 }
5714
5715 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5716 {
5717         struct mlxsw_sp_fib_event_work *fib_work =
5718                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5719         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5720         bool replace;
5721         int err;
5722
5723         rtnl_lock();
5724         mlxsw_sp_span_respin(mlxsw_sp);
5725
5726         switch (fib_work->event) {
5727         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5728         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5729         case FIB_EVENT_ENTRY_ADD:
5730                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5731                 err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5732                                                fib_work->fen6_info.rt, replace);
5733                 if (err)
5734                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5735                 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5736                 break;
5737         case FIB_EVENT_ENTRY_DEL:
5738                 mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5739                 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5740                 break;
5741         case FIB_EVENT_RULE_ADD:
5742                 /* if we get here, a rule was added that we do not support.
5743                  * just do the fib_abort
5744                  */
5745                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5746                 break;
5747         }
5748         rtnl_unlock();
5749         kfree(fib_work);
5750 }
5751
5752 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5753 {
5754         struct mlxsw_sp_fib_event_work *fib_work =
5755                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5756         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5757         bool replace;
5758         int err;
5759
5760         rtnl_lock();
5761         switch (fib_work->event) {
5762         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5763         case FIB_EVENT_ENTRY_ADD:
5764                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5765
5766                 err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5767                                                 replace);
5768                 if (err)
5769                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5770                 mr_cache_put(fib_work->men_info.mfc);
5771                 break;
5772         case FIB_EVENT_ENTRY_DEL:
5773                 mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5774                 mr_cache_put(fib_work->men_info.mfc);
5775                 break;
5776         case FIB_EVENT_VIF_ADD:
5777                 err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5778                                                     &fib_work->ven_info);
5779                 if (err)
5780                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5781                 dev_put(fib_work->ven_info.dev);
5782                 break;
5783         case FIB_EVENT_VIF_DEL:
5784                 mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5785                                               &fib_work->ven_info);
5786                 dev_put(fib_work->ven_info.dev);
5787                 break;
5788         case FIB_EVENT_RULE_ADD:
5789                 /* if we get here, a rule was added that we do not support.
5790                  * just do the fib_abort
5791                  */
5792                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5793                 break;
5794         }
5795         rtnl_unlock();
5796         kfree(fib_work);
5797 }
5798
5799 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5800                                        struct fib_notifier_info *info)
5801 {
5802         struct fib_entry_notifier_info *fen_info;
5803         struct fib_nh_notifier_info *fnh_info;
5804
5805         switch (fib_work->event) {
5806         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5807         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5808         case FIB_EVENT_ENTRY_ADD: /* fall through */
5809         case FIB_EVENT_ENTRY_DEL:
5810                 fen_info = container_of(info, struct fib_entry_notifier_info,
5811                                         info);
5812                 fib_work->fen_info = *fen_info;
5813                 /* Take reference on fib_info to prevent it from being
5814                  * freed while work is queued. Release it afterwards.
5815                  */
5816                 fib_info_hold(fib_work->fen_info.fi);
5817                 break;
5818         case FIB_EVENT_NH_ADD: /* fall through */
5819         case FIB_EVENT_NH_DEL:
5820                 fnh_info = container_of(info, struct fib_nh_notifier_info,
5821                                         info);
5822                 fib_work->fnh_info = *fnh_info;
5823                 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5824                 break;
5825         }
5826 }
5827
5828 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5829                                        struct fib_notifier_info *info)
5830 {
5831         struct fib6_entry_notifier_info *fen6_info;
5832
5833         switch (fib_work->event) {
5834         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5835         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5836         case FIB_EVENT_ENTRY_ADD: /* fall through */
5837         case FIB_EVENT_ENTRY_DEL:
5838                 fen6_info = container_of(info, struct fib6_entry_notifier_info,
5839                                          info);
5840                 fib_work->fen6_info = *fen6_info;
5841                 fib6_info_hold(fib_work->fen6_info.rt);
5842                 break;
5843         }
5844 }
5845
5846 static void
5847 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
5848                             struct fib_notifier_info *info)
5849 {
5850         switch (fib_work->event) {
5851         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5852         case FIB_EVENT_ENTRY_ADD: /* fall through */
5853         case FIB_EVENT_ENTRY_DEL:
5854                 memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
5855                 mr_cache_hold(fib_work->men_info.mfc);
5856                 break;
5857         case FIB_EVENT_VIF_ADD: /* fall through */
5858         case FIB_EVENT_VIF_DEL:
5859                 memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
5860                 dev_hold(fib_work->ven_info.dev);
5861                 break;
5862         }
5863 }
5864
5865 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
5866                                           struct fib_notifier_info *info,
5867                                           struct mlxsw_sp *mlxsw_sp)
5868 {
5869         struct netlink_ext_ack *extack = info->extack;
5870         struct fib_rule_notifier_info *fr_info;
5871         struct fib_rule *rule;
5872         int err = 0;
5873
5874         /* nothing to do at the moment */
5875         if (event == FIB_EVENT_RULE_DEL)
5876                 return 0;
5877
5878         if (mlxsw_sp->router->aborted)
5879                 return 0;
5880
5881         fr_info = container_of(info, struct fib_rule_notifier_info, info);
5882         rule = fr_info->rule;
5883
5884         switch (info->family) {
5885         case AF_INET:
5886                 if (!fib4_rule_default(rule) && !rule->l3mdev)
5887                         err = -EOPNOTSUPP;
5888                 break;
5889         case AF_INET6:
5890                 if (!fib6_rule_default(rule) && !rule->l3mdev)
5891                         err = -EOPNOTSUPP;
5892                 break;
5893         case RTNL_FAMILY_IPMR:
5894                 if (!ipmr_rule_default(rule) && !rule->l3mdev)
5895                         err = -EOPNOTSUPP;
5896                 break;
5897         case RTNL_FAMILY_IP6MR:
5898                 if (!ip6mr_rule_default(rule) && !rule->l3mdev)
5899                         err = -EOPNOTSUPP;
5900                 break;
5901         }
5902
5903         if (err < 0)
5904                 NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
5905
5906         return err;
5907 }
5908
5909 /* Called with rcu_read_lock() */
5910 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
5911                                      unsigned long event, void *ptr)
5912 {
5913         struct mlxsw_sp_fib_event_work *fib_work;
5914         struct fib_notifier_info *info = ptr;
5915         struct mlxsw_sp_router *router;
5916         int err;
5917
5918         if (!net_eq(info->net, &init_net) ||
5919             (info->family != AF_INET && info->family != AF_INET6 &&
5920              info->family != RTNL_FAMILY_IPMR &&
5921              info->family != RTNL_FAMILY_IP6MR))
5922                 return NOTIFY_DONE;
5923
5924         router = container_of(nb, struct mlxsw_sp_router, fib_nb);
5925
5926         switch (event) {
5927         case FIB_EVENT_RULE_ADD: /* fall through */
5928         case FIB_EVENT_RULE_DEL:
5929                 err = mlxsw_sp_router_fib_rule_event(event, info,
5930                                                      router->mlxsw_sp);
5931                 if (!err || info->extack)
5932                         return notifier_from_errno(err);
5933                 break;
5934         case FIB_EVENT_ENTRY_ADD:
5935                 if (router->aborted) {
5936                         NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
5937                         return notifier_from_errno(-EINVAL);
5938                 }
5939                 break;
5940         }
5941
5942         fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
5943         if (WARN_ON(!fib_work))
5944                 return NOTIFY_BAD;
5945
5946         fib_work->mlxsw_sp = router->mlxsw_sp;
5947         fib_work->event = event;
5948
5949         switch (info->family) {
5950         case AF_INET:
5951                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
5952                 mlxsw_sp_router_fib4_event(fib_work, info);
5953                 break;
5954         case AF_INET6:
5955                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
5956                 mlxsw_sp_router_fib6_event(fib_work, info);
5957                 break;
5958         case RTNL_FAMILY_IP6MR:
5959         case RTNL_FAMILY_IPMR:
5960                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
5961                 mlxsw_sp_router_fibmr_event(fib_work, info);
5962                 break;
5963         }
5964
5965         mlxsw_core_schedule_work(&fib_work->work);
5966
5967         return NOTIFY_DONE;
5968 }
5969
5970 static struct mlxsw_sp_rif *
5971 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
5972                          const struct net_device *dev)
5973 {
5974         int i;
5975
5976         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
5977                 if (mlxsw_sp->router->rifs[i] &&
5978                     mlxsw_sp->router->rifs[i]->dev == dev)
5979                         return mlxsw_sp->router->rifs[i];
5980
5981         return NULL;
5982 }
5983
5984 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
5985 {
5986         char ritr_pl[MLXSW_REG_RITR_LEN];
5987         int err;
5988
5989         mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
5990         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5991         if (WARN_ON_ONCE(err))
5992                 return err;
5993
5994         mlxsw_reg_ritr_enable_set(ritr_pl, false);
5995         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5996 }
5997
5998 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
5999                                           struct mlxsw_sp_rif *rif)
6000 {
6001         mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
6002         mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
6003         mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
6004 }
6005
6006 static bool
6007 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
6008                            unsigned long event)
6009 {
6010         struct inet6_dev *inet6_dev;
6011         bool addr_list_empty = true;
6012         struct in_device *idev;
6013
6014         switch (event) {
6015         case NETDEV_UP:
6016                 return rif == NULL;
6017         case NETDEV_DOWN:
6018                 idev = __in_dev_get_rtnl(dev);
6019                 if (idev && idev->ifa_list)
6020                         addr_list_empty = false;
6021
6022                 inet6_dev = __in6_dev_get(dev);
6023                 if (addr_list_empty && inet6_dev &&
6024                     !list_empty(&inet6_dev->addr_list))
6025                         addr_list_empty = false;
6026
6027                 if (rif && addr_list_empty &&
6028                     !netif_is_l3_slave(rif->dev))
6029                         return true;
6030                 /* It is possible we already removed the RIF ourselves
6031                  * if it was assigned to a netdev that is now a bridge
6032                  * or LAG slave.
6033                  */
6034                 return false;
6035         }
6036
6037         return false;
6038 }
6039
6040 static enum mlxsw_sp_rif_type
6041 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
6042                       const struct net_device *dev)
6043 {
6044         enum mlxsw_sp_fid_type type;
6045
6046         if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
6047                 return MLXSW_SP_RIF_TYPE_IPIP_LB;
6048
6049         /* Otherwise RIF type is derived from the type of the underlying FID. */
6050         if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
6051                 type = MLXSW_SP_FID_TYPE_8021Q;
6052         else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
6053                 type = MLXSW_SP_FID_TYPE_8021Q;
6054         else if (netif_is_bridge_master(dev))
6055                 type = MLXSW_SP_FID_TYPE_8021D;
6056         else
6057                 type = MLXSW_SP_FID_TYPE_RFID;
6058
6059         return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
6060 }
6061
6062 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
6063 {
6064         int i;
6065
6066         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6067                 if (!mlxsw_sp->router->rifs[i]) {
6068                         *p_rif_index = i;
6069                         return 0;
6070                 }
6071         }
6072
6073         return -ENOBUFS;
6074 }
6075
6076 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
6077                                                u16 vr_id,
6078                                                struct net_device *l3_dev)
6079 {
6080         struct mlxsw_sp_rif *rif;
6081
6082         rif = kzalloc(rif_size, GFP_KERNEL);
6083         if (!rif)
6084                 return NULL;
6085
6086         INIT_LIST_HEAD(&rif->nexthop_list);
6087         INIT_LIST_HEAD(&rif->neigh_list);
6088         ether_addr_copy(rif->addr, l3_dev->dev_addr);
6089         rif->mtu = l3_dev->mtu;
6090         rif->vr_id = vr_id;
6091         rif->dev = l3_dev;
6092         rif->rif_index = rif_index;
6093
6094         return rif;
6095 }
6096
6097 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
6098                                            u16 rif_index)
6099 {
6100         return mlxsw_sp->router->rifs[rif_index];
6101 }
6102
6103 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
6104 {
6105         return rif->rif_index;
6106 }
6107
6108 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6109 {
6110         return lb_rif->common.rif_index;
6111 }
6112
6113 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6114 {
6115         return lb_rif->ul_vr_id;
6116 }
6117
6118 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6119 {
6120         return rif->dev->ifindex;
6121 }
6122
6123 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6124 {
6125         return rif->dev;
6126 }
6127
6128 static struct mlxsw_sp_rif *
6129 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6130                     const struct mlxsw_sp_rif_params *params,
6131                     struct netlink_ext_ack *extack)
6132 {
6133         u32 tb_id = l3mdev_fib_table(params->dev);
6134         const struct mlxsw_sp_rif_ops *ops;
6135         struct mlxsw_sp_fid *fid = NULL;
6136         enum mlxsw_sp_rif_type type;
6137         struct mlxsw_sp_rif *rif;
6138         struct mlxsw_sp_vr *vr;
6139         u16 rif_index;
6140         int i, err;
6141
6142         type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6143         ops = mlxsw_sp->router->rif_ops_arr[type];
6144
6145         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6146         if (IS_ERR(vr))
6147                 return ERR_CAST(vr);
6148         vr->rif_count++;
6149
6150         err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6151         if (err) {
6152                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6153                 goto err_rif_index_alloc;
6154         }
6155
6156         rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6157         if (!rif) {
6158                 err = -ENOMEM;
6159                 goto err_rif_alloc;
6160         }
6161         rif->mlxsw_sp = mlxsw_sp;
6162         rif->ops = ops;
6163
6164         if (ops->fid_get) {
6165                 fid = ops->fid_get(rif);
6166                 if (IS_ERR(fid)) {
6167                         err = PTR_ERR(fid);
6168                         goto err_fid_get;
6169                 }
6170                 rif->fid = fid;
6171         }
6172
6173         if (ops->setup)
6174                 ops->setup(rif, params);
6175
6176         err = ops->configure(rif);
6177         if (err)
6178                 goto err_configure;
6179
6180         for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
6181                 err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
6182                 if (err)
6183                         goto err_mr_rif_add;
6184         }
6185
6186         mlxsw_sp_rif_counters_alloc(rif);
6187         mlxsw_sp->router->rifs[rif_index] = rif;
6188
6189         return rif;
6190
6191 err_mr_rif_add:
6192         for (i--; i >= 0; i--)
6193                 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6194         ops->deconfigure(rif);
6195 err_configure:
6196         if (fid)
6197                 mlxsw_sp_fid_put(fid);
6198 err_fid_get:
6199         kfree(rif);
6200 err_rif_alloc:
6201 err_rif_index_alloc:
6202         vr->rif_count--;
6203         mlxsw_sp_vr_put(mlxsw_sp, vr);
6204         return ERR_PTR(err);
6205 }
6206
6207 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6208 {
6209         const struct mlxsw_sp_rif_ops *ops = rif->ops;
6210         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6211         struct mlxsw_sp_fid *fid = rif->fid;
6212         struct mlxsw_sp_vr *vr;
6213         int i;
6214
6215         mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6216         vr = &mlxsw_sp->router->vrs[rif->vr_id];
6217
6218         mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6219         mlxsw_sp_rif_counters_free(rif);
6220         for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6221                 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6222         ops->deconfigure(rif);
6223         if (fid)
6224                 /* Loopback RIFs are not associated with a FID. */
6225                 mlxsw_sp_fid_put(fid);
6226         kfree(rif);
6227         vr->rif_count--;
6228         mlxsw_sp_vr_put(mlxsw_sp, vr);
6229 }
6230
6231 static void
6232 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6233                                  struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6234 {
6235         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6236
6237         params->vid = mlxsw_sp_port_vlan->vid;
6238         params->lag = mlxsw_sp_port->lagged;
6239         if (params->lag)
6240                 params->lag_id = mlxsw_sp_port->lag_id;
6241         else
6242                 params->system_port = mlxsw_sp_port->local_port;
6243 }
6244
6245 static int
6246 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6247                                struct net_device *l3_dev,
6248                                struct netlink_ext_ack *extack)
6249 {
6250         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6251         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6252         u16 vid = mlxsw_sp_port_vlan->vid;
6253         struct mlxsw_sp_rif *rif;
6254         struct mlxsw_sp_fid *fid;
6255         int err;
6256
6257         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6258         if (!rif) {
6259                 struct mlxsw_sp_rif_params params = {
6260                         .dev = l3_dev,
6261                 };
6262
6263                 mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6264                 rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6265                 if (IS_ERR(rif))
6266                         return PTR_ERR(rif);
6267         }
6268
6269         /* FID was already created, just take a reference */
6270         fid = rif->ops->fid_get(rif);
6271         err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6272         if (err)
6273                 goto err_fid_port_vid_map;
6274
6275         err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6276         if (err)
6277                 goto err_port_vid_learning_set;
6278
6279         err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6280                                         BR_STATE_FORWARDING);
6281         if (err)
6282                 goto err_port_vid_stp_set;
6283
6284         mlxsw_sp_port_vlan->fid = fid;
6285
6286         return 0;
6287
6288 err_port_vid_stp_set:
6289         mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6290 err_port_vid_learning_set:
6291         mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6292 err_fid_port_vid_map:
6293         mlxsw_sp_fid_put(fid);
6294         return err;
6295 }
6296
6297 void
6298 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6299 {
6300         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6301         struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6302         u16 vid = mlxsw_sp_port_vlan->vid;
6303
6304         if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6305                 return;
6306
6307         mlxsw_sp_port_vlan->fid = NULL;
6308         mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6309         mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6310         mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6311         /* If router port holds the last reference on the rFID, then the
6312          * associated Sub-port RIF will be destroyed.
6313          */
6314         mlxsw_sp_fid_put(fid);
6315 }
6316
6317 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6318                                              struct net_device *port_dev,
6319                                              unsigned long event, u16 vid,
6320                                              struct netlink_ext_ack *extack)
6321 {
6322         struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6323         struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6324
6325         mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6326         if (WARN_ON(!mlxsw_sp_port_vlan))
6327                 return -EINVAL;
6328
6329         switch (event) {
6330         case NETDEV_UP:
6331                 return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6332                                                       l3_dev, extack);
6333         case NETDEV_DOWN:
6334                 mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6335                 break;
6336         }
6337
6338         return 0;
6339 }
6340
6341 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6342                                         unsigned long event,
6343                                         struct netlink_ext_ack *extack)
6344 {
6345         if (netif_is_bridge_port(port_dev) ||
6346             netif_is_lag_port(port_dev) ||
6347             netif_is_ovs_port(port_dev))
6348                 return 0;
6349
6350         return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
6351                                                  extack);
6352 }
6353
6354 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6355                                          struct net_device *lag_dev,
6356                                          unsigned long event, u16 vid,
6357                                          struct netlink_ext_ack *extack)
6358 {
6359         struct net_device *port_dev;
6360         struct list_head *iter;
6361         int err;
6362
6363         netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6364                 if (mlxsw_sp_port_dev_check(port_dev)) {
6365                         err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6366                                                                 port_dev,
6367                                                                 event, vid,
6368                                                                 extack);
6369                         if (err)
6370                                 return err;
6371                 }
6372         }
6373
6374         return 0;
6375 }
6376
6377 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6378                                        unsigned long event,
6379                                        struct netlink_ext_ack *extack)
6380 {
6381         if (netif_is_bridge_port(lag_dev))
6382                 return 0;
6383
6384         return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
6385                                              extack);
6386 }
6387
6388 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
6389                                           unsigned long event,
6390                                           struct netlink_ext_ack *extack)
6391 {
6392         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6393         struct mlxsw_sp_rif_params params = {
6394                 .dev = l3_dev,
6395         };
6396         struct mlxsw_sp_rif *rif;
6397
6398         switch (event) {
6399         case NETDEV_UP:
6400                 rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6401                 if (IS_ERR(rif))
6402                         return PTR_ERR(rif);
6403                 break;
6404         case NETDEV_DOWN:
6405                 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6406                 mlxsw_sp_rif_destroy(rif);
6407                 break;
6408         }
6409
6410         return 0;
6411 }
6412
6413 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
6414                                         unsigned long event,
6415                                         struct netlink_ext_ack *extack)
6416 {
6417         struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6418         u16 vid = vlan_dev_vlan_id(vlan_dev);
6419
6420         if (netif_is_bridge_port(vlan_dev))
6421                 return 0;
6422
6423         if (mlxsw_sp_port_dev_check(real_dev))
6424                 return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6425                                                          event, vid, extack);
6426         else if (netif_is_lag_master(real_dev))
6427                 return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6428                                                      vid, extack);
6429         else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6430                 return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
6431
6432         return 0;
6433 }
6434
6435 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
6436                                      unsigned long event,
6437                                      struct netlink_ext_ack *extack)
6438 {
6439         if (mlxsw_sp_port_dev_check(dev))
6440                 return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6441         else if (netif_is_lag_master(dev))
6442                 return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6443         else if (netif_is_bridge_master(dev))
6444                 return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
6445         else if (is_vlan_dev(dev))
6446                 return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
6447         else
6448                 return 0;
6449 }
6450
6451 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
6452                             unsigned long event, void *ptr)
6453 {
6454         struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6455         struct net_device *dev = ifa->ifa_dev->dev;
6456         struct mlxsw_sp *mlxsw_sp;
6457         struct mlxsw_sp_rif *rif;
6458         int err = 0;
6459
6460         /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6461         if (event == NETDEV_UP)
6462                 goto out;
6463
6464         mlxsw_sp = mlxsw_sp_lower_get(dev);
6465         if (!mlxsw_sp)
6466                 goto out;
6467
6468         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6469         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6470                 goto out;
6471
6472         err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
6473 out:
6474         return notifier_from_errno(err);
6475 }
6476
6477 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6478                                   unsigned long event, void *ptr)
6479 {
6480         struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6481         struct net_device *dev = ivi->ivi_dev->dev;
6482         struct mlxsw_sp *mlxsw_sp;
6483         struct mlxsw_sp_rif *rif;
6484         int err = 0;
6485
6486         mlxsw_sp = mlxsw_sp_lower_get(dev);
6487         if (!mlxsw_sp)
6488                 goto out;
6489
6490         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6491         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6492                 goto out;
6493
6494         err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
6495 out:
6496         return notifier_from_errno(err);
6497 }
6498
6499 struct mlxsw_sp_inet6addr_event_work {
6500         struct work_struct work;
6501         struct net_device *dev;
6502         unsigned long event;
6503 };
6504
6505 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6506 {
6507         struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6508                 container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6509         struct net_device *dev = inet6addr_work->dev;
6510         unsigned long event = inet6addr_work->event;
6511         struct mlxsw_sp *mlxsw_sp;
6512         struct mlxsw_sp_rif *rif;
6513
6514         rtnl_lock();
6515         mlxsw_sp = mlxsw_sp_lower_get(dev);
6516         if (!mlxsw_sp)
6517                 goto out;
6518
6519         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6520         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6521                 goto out;
6522
6523         __mlxsw_sp_inetaddr_event(dev, event, NULL);
6524 out:
6525         rtnl_unlock();
6526         dev_put(dev);
6527         kfree(inet6addr_work);
6528 }
6529
6530 /* Called with rcu_read_lock() */
6531 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
6532                              unsigned long event, void *ptr)
6533 {
6534         struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6535         struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6536         struct net_device *dev = if6->idev->dev;
6537
6538         /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6539         if (event == NETDEV_UP)
6540                 return NOTIFY_DONE;
6541
6542         if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
6543                 return NOTIFY_DONE;
6544
6545         inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6546         if (!inet6addr_work)
6547                 return NOTIFY_BAD;
6548
6549         INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6550         inet6addr_work->dev = dev;
6551         inet6addr_work->event = event;
6552         dev_hold(dev);
6553         mlxsw_core_schedule_work(&inet6addr_work->work);
6554
6555         return NOTIFY_DONE;
6556 }
6557
6558 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6559                                    unsigned long event, void *ptr)
6560 {
6561         struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6562         struct net_device *dev = i6vi->i6vi_dev->dev;
6563         struct mlxsw_sp *mlxsw_sp;
6564         struct mlxsw_sp_rif *rif;
6565         int err = 0;
6566
6567         mlxsw_sp = mlxsw_sp_lower_get(dev);
6568         if (!mlxsw_sp)
6569                 goto out;
6570
6571         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6572         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6573                 goto out;
6574
6575         err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
6576 out:
6577         return notifier_from_errno(err);
6578 }
6579
6580 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6581                              const char *mac, int mtu)
6582 {
6583         char ritr_pl[MLXSW_REG_RITR_LEN];
6584         int err;
6585
6586         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6587         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6588         if (err)
6589                 return err;
6590
6591         mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6592         mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6593         mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6594         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6595 }
6596
6597 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
6598 {
6599         struct mlxsw_sp *mlxsw_sp;
6600         struct mlxsw_sp_rif *rif;
6601         u16 fid_index;
6602         int err;
6603
6604         mlxsw_sp = mlxsw_sp_lower_get(dev);
6605         if (!mlxsw_sp)
6606                 return 0;
6607
6608         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6609         if (!rif)
6610                 return 0;
6611         fid_index = mlxsw_sp_fid_index(rif->fid);
6612
6613         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
6614         if (err)
6615                 return err;
6616
6617         err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
6618                                 dev->mtu);
6619         if (err)
6620                 goto err_rif_edit;
6621
6622         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
6623         if (err)
6624                 goto err_rif_fdb_op;
6625
6626         if (rif->mtu != dev->mtu) {
6627                 struct mlxsw_sp_vr *vr;
6628                 int i;
6629
6630                 /* The RIF is relevant only to its mr_table instance, as unlike
6631                  * unicast routing, in multicast routing a RIF cannot be shared
6632                  * between several multicast routing tables.
6633                  */
6634                 vr = &mlxsw_sp->router->vrs[rif->vr_id];
6635                 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6636                         mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
6637                                                    rif, dev->mtu);
6638         }
6639
6640         ether_addr_copy(rif->addr, dev->dev_addr);
6641         rif->mtu = dev->mtu;
6642
6643         netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
6644
6645         return 0;
6646
6647 err_rif_fdb_op:
6648         mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
6649 err_rif_edit:
6650         mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
6651         return err;
6652 }
6653
6654 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
6655                                   struct net_device *l3_dev,
6656                                   struct netlink_ext_ack *extack)
6657 {
6658         struct mlxsw_sp_rif *rif;
6659
6660         /* If netdev is already associated with a RIF, then we need to
6661          * destroy it and create a new one with the new virtual router ID.
6662          */
6663         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6664         if (rif)
6665                 __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
6666
6667         return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
6668 }
6669
6670 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
6671                                     struct net_device *l3_dev)
6672 {
6673         struct mlxsw_sp_rif *rif;
6674
6675         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6676         if (!rif)
6677                 return;
6678         __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
6679 }
6680
6681 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
6682                                  struct netdev_notifier_changeupper_info *info)
6683 {
6684         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6685         int err = 0;
6686
6687         if (!mlxsw_sp)
6688                 return 0;
6689
6690         switch (event) {
6691         case NETDEV_PRECHANGEUPPER:
6692                 return 0;
6693         case NETDEV_CHANGEUPPER:
6694                 if (info->linking) {
6695                         struct netlink_ext_ack *extack;
6696
6697                         extack = netdev_notifier_info_to_extack(&info->info);
6698                         err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
6699                 } else {
6700                         mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
6701                 }
6702                 break;
6703         }
6704
6705         return err;
6706 }
6707
6708 static struct mlxsw_sp_rif_subport *
6709 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6710 {
6711         return container_of(rif, struct mlxsw_sp_rif_subport, common);
6712 }
6713
6714 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
6715                                        const struct mlxsw_sp_rif_params *params)
6716 {
6717         struct mlxsw_sp_rif_subport *rif_subport;
6718
6719         rif_subport = mlxsw_sp_rif_subport_rif(rif);
6720         rif_subport->vid = params->vid;
6721         rif_subport->lag = params->lag;
6722         if (params->lag)
6723                 rif_subport->lag_id = params->lag_id;
6724         else
6725                 rif_subport->system_port = params->system_port;
6726 }
6727
6728 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
6729 {
6730         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6731         struct mlxsw_sp_rif_subport *rif_subport;
6732         char ritr_pl[MLXSW_REG_RITR_LEN];
6733
6734         rif_subport = mlxsw_sp_rif_subport_rif(rif);
6735         mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
6736                             rif->rif_index, rif->vr_id, rif->dev->mtu);
6737         mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6738         mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
6739                                   rif_subport->lag ? rif_subport->lag_id :
6740                                                      rif_subport->system_port,
6741                                   rif_subport->vid);
6742
6743         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6744 }
6745
6746 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
6747 {
6748         int err;
6749
6750         err = mlxsw_sp_rif_subport_op(rif, true);
6751         if (err)
6752                 return err;
6753
6754         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6755                                   mlxsw_sp_fid_index(rif->fid), true);
6756         if (err)
6757                 goto err_rif_fdb_op;
6758
6759         mlxsw_sp_fid_rif_set(rif->fid, rif);
6760         return 0;
6761
6762 err_rif_fdb_op:
6763         mlxsw_sp_rif_subport_op(rif, false);
6764         return err;
6765 }
6766
6767 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
6768 {
6769         struct mlxsw_sp_fid *fid = rif->fid;
6770
6771         mlxsw_sp_fid_rif_set(fid, NULL);
6772         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6773                             mlxsw_sp_fid_index(fid), false);
6774         mlxsw_sp_rif_subport_op(rif, false);
6775 }
6776
6777 static struct mlxsw_sp_fid *
6778 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif)
6779 {
6780         return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
6781 }
6782
6783 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
6784         .type                   = MLXSW_SP_RIF_TYPE_SUBPORT,
6785         .rif_size               = sizeof(struct mlxsw_sp_rif_subport),
6786         .setup                  = mlxsw_sp_rif_subport_setup,
6787         .configure              = mlxsw_sp_rif_subport_configure,
6788         .deconfigure            = mlxsw_sp_rif_subport_deconfigure,
6789         .fid_get                = mlxsw_sp_rif_subport_fid_get,
6790 };
6791
6792 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
6793                                     enum mlxsw_reg_ritr_if_type type,
6794                                     u16 vid_fid, bool enable)
6795 {
6796         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6797         char ritr_pl[MLXSW_REG_RITR_LEN];
6798
6799         mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
6800                             rif->dev->mtu);
6801         mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6802         mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
6803
6804         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6805 }
6806
6807 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
6808 {
6809         return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
6810 }
6811
6812 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
6813 {
6814         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6815         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6816         int err;
6817
6818         err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
6819         if (err)
6820                 return err;
6821
6822         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6823                                      mlxsw_sp_router_port(mlxsw_sp), true);
6824         if (err)
6825                 goto err_fid_mc_flood_set;
6826
6827         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6828                                      mlxsw_sp_router_port(mlxsw_sp), true);
6829         if (err)
6830                 goto err_fid_bc_flood_set;
6831
6832         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6833                                   mlxsw_sp_fid_index(rif->fid), true);
6834         if (err)
6835                 goto err_rif_fdb_op;
6836
6837         mlxsw_sp_fid_rif_set(rif->fid, rif);
6838         return 0;
6839
6840 err_rif_fdb_op:
6841         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6842                                mlxsw_sp_router_port(mlxsw_sp), false);
6843 err_fid_bc_flood_set:
6844         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6845                                mlxsw_sp_router_port(mlxsw_sp), false);
6846 err_fid_mc_flood_set:
6847         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6848         return err;
6849 }
6850
6851 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
6852 {
6853         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6854         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6855         struct mlxsw_sp_fid *fid = rif->fid;
6856
6857         mlxsw_sp_fid_rif_set(fid, NULL);
6858         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6859                             mlxsw_sp_fid_index(fid), false);
6860         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6861                                mlxsw_sp_router_port(mlxsw_sp), false);
6862         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6863                                mlxsw_sp_router_port(mlxsw_sp), false);
6864         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6865 }
6866
6867 static struct mlxsw_sp_fid *
6868 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif)
6869 {
6870         u16 vid = is_vlan_dev(rif->dev) ? vlan_dev_vlan_id(rif->dev) : 1;
6871
6872         return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
6873 }
6874
6875 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
6876         .type                   = MLXSW_SP_RIF_TYPE_VLAN,
6877         .rif_size               = sizeof(struct mlxsw_sp_rif),
6878         .configure              = mlxsw_sp_rif_vlan_configure,
6879         .deconfigure            = mlxsw_sp_rif_vlan_deconfigure,
6880         .fid_get                = mlxsw_sp_rif_vlan_fid_get,
6881 };
6882
6883 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
6884 {
6885         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6886         u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6887         int err;
6888
6889         err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
6890                                        true);
6891         if (err)
6892                 return err;
6893
6894         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6895                                      mlxsw_sp_router_port(mlxsw_sp), true);
6896         if (err)
6897                 goto err_fid_mc_flood_set;
6898
6899         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6900                                      mlxsw_sp_router_port(mlxsw_sp), true);
6901         if (err)
6902                 goto err_fid_bc_flood_set;
6903
6904         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6905                                   mlxsw_sp_fid_index(rif->fid), true);
6906         if (err)
6907                 goto err_rif_fdb_op;
6908
6909         mlxsw_sp_fid_rif_set(rif->fid, rif);
6910         return 0;
6911
6912 err_rif_fdb_op:
6913         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6914                                mlxsw_sp_router_port(mlxsw_sp), false);
6915 err_fid_bc_flood_set:
6916         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6917                                mlxsw_sp_router_port(mlxsw_sp), false);
6918 err_fid_mc_flood_set:
6919         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6920         return err;
6921 }
6922
6923 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
6924 {
6925         u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6926         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6927         struct mlxsw_sp_fid *fid = rif->fid;
6928
6929         mlxsw_sp_fid_rif_set(fid, NULL);
6930         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6931                             mlxsw_sp_fid_index(fid), false);
6932         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6933                                mlxsw_sp_router_port(mlxsw_sp), false);
6934         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6935                                mlxsw_sp_router_port(mlxsw_sp), false);
6936         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6937 }
6938
6939 static struct mlxsw_sp_fid *
6940 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif)
6941 {
6942         return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
6943 }
6944
6945 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
6946         .type                   = MLXSW_SP_RIF_TYPE_FID,
6947         .rif_size               = sizeof(struct mlxsw_sp_rif),
6948         .configure              = mlxsw_sp_rif_fid_configure,
6949         .deconfigure            = mlxsw_sp_rif_fid_deconfigure,
6950         .fid_get                = mlxsw_sp_rif_fid_fid_get,
6951 };
6952
6953 static struct mlxsw_sp_rif_ipip_lb *
6954 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
6955 {
6956         return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
6957 }
6958
6959 static void
6960 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
6961                            const struct mlxsw_sp_rif_params *params)
6962 {
6963         struct mlxsw_sp_rif_params_ipip_lb *params_lb;
6964         struct mlxsw_sp_rif_ipip_lb *rif_lb;
6965
6966         params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
6967                                  common);
6968         rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
6969         rif_lb->lb_config = params_lb->lb_config;
6970 }
6971
6972 static int
6973 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
6974 {
6975         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6976         u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
6977         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6978         struct mlxsw_sp_vr *ul_vr;
6979         int err;
6980
6981         ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
6982         if (IS_ERR(ul_vr))
6983                 return PTR_ERR(ul_vr);
6984
6985         err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
6986         if (err)
6987                 goto err_loopback_op;
6988
6989         lb_rif->ul_vr_id = ul_vr->id;
6990         ++ul_vr->rif_count;
6991         return 0;
6992
6993 err_loopback_op:
6994         mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
6995         return err;
6996 }
6997
6998 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
6999 {
7000         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7001         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7002         struct mlxsw_sp_vr *ul_vr;
7003
7004         ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
7005         mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
7006
7007         --ul_vr->rif_count;
7008         mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7009 }
7010
7011 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
7012         .type                   = MLXSW_SP_RIF_TYPE_IPIP_LB,
7013         .rif_size               = sizeof(struct mlxsw_sp_rif_ipip_lb),
7014         .setup                  = mlxsw_sp_rif_ipip_lb_setup,
7015         .configure              = mlxsw_sp_rif_ipip_lb_configure,
7016         .deconfigure            = mlxsw_sp_rif_ipip_lb_deconfigure,
7017 };
7018
7019 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
7020         [MLXSW_SP_RIF_TYPE_SUBPORT]     = &mlxsw_sp_rif_subport_ops,
7021         [MLXSW_SP_RIF_TYPE_VLAN]        = &mlxsw_sp_rif_vlan_ops,
7022         [MLXSW_SP_RIF_TYPE_FID]         = &mlxsw_sp_rif_fid_ops,
7023         [MLXSW_SP_RIF_TYPE_IPIP_LB]     = &mlxsw_sp_rif_ipip_lb_ops,
7024 };
7025
7026 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
7027 {
7028         u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7029
7030         mlxsw_sp->router->rifs = kcalloc(max_rifs,
7031                                          sizeof(struct mlxsw_sp_rif *),
7032                                          GFP_KERNEL);
7033         if (!mlxsw_sp->router->rifs)
7034                 return -ENOMEM;
7035
7036         mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
7037
7038         return 0;
7039 }
7040
7041 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
7042 {
7043         int i;
7044
7045         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7046                 WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
7047
7048         kfree(mlxsw_sp->router->rifs);
7049 }
7050
7051 static int
7052 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
7053 {
7054         char tigcr_pl[MLXSW_REG_TIGCR_LEN];
7055
7056         mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
7057         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
7058 }
7059
7060 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
7061 {
7062         mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
7063         INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
7064         return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
7065 }
7066
7067 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
7068 {
7069         WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
7070 }
7071
7072 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
7073 {
7074         struct mlxsw_sp_router *router;
7075
7076         /* Flush pending FIB notifications and then flush the device's
7077          * table before requesting another dump. The FIB notification
7078          * block is unregistered, so no need to take RTNL.
7079          */
7080         mlxsw_core_flush_owq();
7081         router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7082         mlxsw_sp_router_fib_flush(router->mlxsw_sp);
7083 }
7084
7085 #ifdef CONFIG_IP_ROUTE_MULTIPATH
7086 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
7087 {
7088         mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
7089 }
7090
7091 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
7092 {
7093         mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
7094 }
7095
7096 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
7097 {
7098         bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
7099
7100         mlxsw_sp_mp_hash_header_set(recr2_pl,
7101                                     MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7102         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7103         mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7104         mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7105         if (only_l3)
7106                 return;
7107         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7108         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7109         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7110         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7111 }
7112
7113 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
7114 {
7115         bool only_l3 = !ip6_multipath_hash_policy(&init_net);
7116
7117         mlxsw_sp_mp_hash_header_set(recr2_pl,
7118                                     MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7119         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7120         mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7121         mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7122         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7123         if (only_l3) {
7124                 mlxsw_sp_mp_hash_field_set(recr2_pl,
7125                                            MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7126         } else {
7127                 mlxsw_sp_mp_hash_header_set(recr2_pl,
7128                                             MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
7129                 mlxsw_sp_mp_hash_field_set(recr2_pl,
7130                                            MLXSW_REG_RECR2_TCP_UDP_SPORT);
7131                 mlxsw_sp_mp_hash_field_set(recr2_pl,
7132                                            MLXSW_REG_RECR2_TCP_UDP_DPORT);
7133         }
7134 }
7135
7136 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7137 {
7138         char recr2_pl[MLXSW_REG_RECR2_LEN];
7139         u32 seed;
7140
7141         get_random_bytes(&seed, sizeof(seed));
7142         mlxsw_reg_recr2_pack(recr2_pl, seed);
7143         mlxsw_sp_mp4_hash_init(recr2_pl);
7144         mlxsw_sp_mp6_hash_init(recr2_pl);
7145
7146         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7147 }
7148 #else
7149 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7150 {
7151         return 0;
7152 }
7153 #endif
7154
7155 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
7156 {
7157         char rdpm_pl[MLXSW_REG_RDPM_LEN];
7158         unsigned int i;
7159
7160         MLXSW_REG_ZERO(rdpm, rdpm_pl);
7161
7162         /* HW is determining switch priority based on DSCP-bits, but the
7163          * kernel is still doing that based on the ToS. Since there's a
7164          * mismatch in bits we need to make sure to translate the right
7165          * value ToS would observe, skipping the 2 least-significant ECN bits.
7166          */
7167         for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
7168                 mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
7169
7170         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
7171 }
7172
7173 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7174 {
7175         char rgcr_pl[MLXSW_REG_RGCR_LEN];
7176         u64 max_rifs;
7177         int err;
7178
7179         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7180                 return -EIO;
7181         max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7182
7183         mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7184         mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7185         mlxsw_reg_rgcr_usp_set(rgcr_pl, true);
7186         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7187         if (err)
7188                 return err;
7189         return 0;
7190 }
7191
7192 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7193 {
7194         char rgcr_pl[MLXSW_REG_RGCR_LEN];
7195
7196         mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7197         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7198 }
7199
7200 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7201 {
7202         struct mlxsw_sp_router *router;
7203         int err;
7204
7205         router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7206         if (!router)
7207                 return -ENOMEM;
7208         mlxsw_sp->router = router;
7209         router->mlxsw_sp = mlxsw_sp;
7210
7211         INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7212         err = __mlxsw_sp_router_init(mlxsw_sp);
7213         if (err)
7214                 goto err_router_init;
7215
7216         err = mlxsw_sp_rifs_init(mlxsw_sp);
7217         if (err)
7218                 goto err_rifs_init;
7219
7220         err = mlxsw_sp_ipips_init(mlxsw_sp);
7221         if (err)
7222                 goto err_ipips_init;
7223
7224         err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7225                               &mlxsw_sp_nexthop_ht_params);
7226         if (err)
7227                 goto err_nexthop_ht_init;
7228
7229         err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7230                               &mlxsw_sp_nexthop_group_ht_params);
7231         if (err)
7232                 goto err_nexthop_group_ht_init;
7233
7234         INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7235         err = mlxsw_sp_lpm_init(mlxsw_sp);
7236         if (err)
7237                 goto err_lpm_init;
7238
7239         err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7240         if (err)
7241                 goto err_mr_init;
7242
7243         err = mlxsw_sp_vrs_init(mlxsw_sp);
7244         if (err)
7245                 goto err_vrs_init;
7246
7247         err = mlxsw_sp_neigh_init(mlxsw_sp);
7248         if (err)
7249                 goto err_neigh_init;
7250
7251         mlxsw_sp->router->netevent_nb.notifier_call =
7252                 mlxsw_sp_router_netevent_event;
7253         err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7254         if (err)
7255                 goto err_register_netevent_notifier;
7256
7257         err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7258         if (err)
7259                 goto err_mp_hash_init;
7260
7261         err = mlxsw_sp_dscp_init(mlxsw_sp);
7262         if (err)
7263                 goto err_dscp_init;
7264
7265         mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7266         err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7267                                     mlxsw_sp_router_fib_dump_flush);
7268         if (err)
7269                 goto err_register_fib_notifier;
7270
7271         return 0;
7272
7273 err_register_fib_notifier:
7274 err_dscp_init:
7275 err_mp_hash_init:
7276         unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7277 err_register_netevent_notifier:
7278         mlxsw_sp_neigh_fini(mlxsw_sp);
7279 err_neigh_init:
7280         mlxsw_sp_vrs_fini(mlxsw_sp);
7281 err_vrs_init:
7282         mlxsw_sp_mr_fini(mlxsw_sp);
7283 err_mr_init:
7284         mlxsw_sp_lpm_fini(mlxsw_sp);
7285 err_lpm_init:
7286         rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7287 err_nexthop_group_ht_init:
7288         rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7289 err_nexthop_ht_init:
7290         mlxsw_sp_ipips_fini(mlxsw_sp);
7291 err_ipips_init:
7292         mlxsw_sp_rifs_fini(mlxsw_sp);
7293 err_rifs_init:
7294         __mlxsw_sp_router_fini(mlxsw_sp);
7295 err_router_init:
7296         kfree(mlxsw_sp->router);
7297         return err;
7298 }
7299
7300 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7301 {
7302         unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
7303         unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7304         mlxsw_sp_neigh_fini(mlxsw_sp);
7305         mlxsw_sp_vrs_fini(mlxsw_sp);
7306         mlxsw_sp_mr_fini(mlxsw_sp);
7307         mlxsw_sp_lpm_fini(mlxsw_sp);
7308         rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7309         rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7310         mlxsw_sp_ipips_fini(mlxsw_sp);
7311         mlxsw_sp_rifs_fini(mlxsw_sp);
7312         __mlxsw_sp_router_fini(mlxsw_sp);
7313         kfree(mlxsw_sp->router);
7314 }