tipc: don't call sock_release() in atomic context
[linux-2.6-microblaze.git] / drivers / net / ethernet / mellanox / mlxsw / spectrum_router.c
1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the names of the copyright holders nor the names of its
18  *    contributors may be used to endorse or promote products derived from
19  *    this software without specific prior written permission.
20  *
21  * Alternatively, this software may be distributed under the terms of the
22  * GNU General Public License ("GPL") version 2 as published by the Free
23  * Software Foundation.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37
38 #include <linux/kernel.h>
39 #include <linux/types.h>
40 #include <linux/rhashtable.h>
41 #include <linux/bitops.h>
42 #include <linux/in6.h>
43 #include <linux/notifier.h>
44 #include <linux/inetdevice.h>
45 #include <linux/netdevice.h>
46 #include <linux/if_bridge.h>
47 #include <linux/socket.h>
48 #include <linux/route.h>
49 #include <linux/gcd.h>
50 #include <linux/random.h>
51 #include <net/netevent.h>
52 #include <net/neighbour.h>
53 #include <net/arp.h>
54 #include <net/ip_fib.h>
55 #include <net/ip6_fib.h>
56 #include <net/fib_rules.h>
57 #include <net/ip_tunnels.h>
58 #include <net/l3mdev.h>
59 #include <net/addrconf.h>
60 #include <net/ndisc.h>
61 #include <net/ipv6.h>
62 #include <net/fib_notifier.h>
63
64 #include "spectrum.h"
65 #include "core.h"
66 #include "reg.h"
67 #include "spectrum_cnt.h"
68 #include "spectrum_dpipe.h"
69 #include "spectrum_ipip.h"
70 #include "spectrum_mr.h"
71 #include "spectrum_mr_tcam.h"
72 #include "spectrum_router.h"
73
74 struct mlxsw_sp_fib;
75 struct mlxsw_sp_vr;
76 struct mlxsw_sp_lpm_tree;
77 struct mlxsw_sp_rif_ops;
78
79 struct mlxsw_sp_router {
80         struct mlxsw_sp *mlxsw_sp;
81         struct mlxsw_sp_rif **rifs;
82         struct mlxsw_sp_vr *vrs;
83         struct rhashtable neigh_ht;
84         struct rhashtable nexthop_group_ht;
85         struct rhashtable nexthop_ht;
86         struct list_head nexthop_list;
87         struct {
88                 /* One tree for each protocol: IPv4 and IPv6 */
89                 struct mlxsw_sp_lpm_tree *proto_trees[2];
90                 struct mlxsw_sp_lpm_tree *trees;
91                 unsigned int tree_count;
92         } lpm;
93         struct {
94                 struct delayed_work dw;
95                 unsigned long interval; /* ms */
96         } neighs_update;
97         struct delayed_work nexthop_probe_dw;
98 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
99         struct list_head nexthop_neighs_list;
100         struct list_head ipip_list;
101         bool aborted;
102         struct notifier_block fib_nb;
103         struct notifier_block netevent_nb;
104         const struct mlxsw_sp_rif_ops **rif_ops_arr;
105         const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
106 };
107
108 struct mlxsw_sp_rif {
109         struct list_head nexthop_list;
110         struct list_head neigh_list;
111         struct net_device *dev;
112         struct mlxsw_sp_fid *fid;
113         unsigned char addr[ETH_ALEN];
114         int mtu;
115         u16 rif_index;
116         u16 vr_id;
117         const struct mlxsw_sp_rif_ops *ops;
118         struct mlxsw_sp *mlxsw_sp;
119
120         unsigned int counter_ingress;
121         bool counter_ingress_valid;
122         unsigned int counter_egress;
123         bool counter_egress_valid;
124 };
125
126 struct mlxsw_sp_rif_params {
127         struct net_device *dev;
128         union {
129                 u16 system_port;
130                 u16 lag_id;
131         };
132         u16 vid;
133         bool lag;
134 };
135
136 struct mlxsw_sp_rif_subport {
137         struct mlxsw_sp_rif common;
138         union {
139                 u16 system_port;
140                 u16 lag_id;
141         };
142         u16 vid;
143         bool lag;
144 };
145
146 struct mlxsw_sp_rif_ipip_lb {
147         struct mlxsw_sp_rif common;
148         struct mlxsw_sp_rif_ipip_lb_config lb_config;
149         u16 ul_vr_id; /* Reserved for Spectrum-2. */
150 };
151
152 struct mlxsw_sp_rif_params_ipip_lb {
153         struct mlxsw_sp_rif_params common;
154         struct mlxsw_sp_rif_ipip_lb_config lb_config;
155 };
156
157 struct mlxsw_sp_rif_ops {
158         enum mlxsw_sp_rif_type type;
159         size_t rif_size;
160
161         void (*setup)(struct mlxsw_sp_rif *rif,
162                       const struct mlxsw_sp_rif_params *params);
163         int (*configure)(struct mlxsw_sp_rif *rif);
164         void (*deconfigure)(struct mlxsw_sp_rif *rif);
165         struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif);
166 };
167
168 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
169 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
170                                   struct mlxsw_sp_lpm_tree *lpm_tree);
171 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
172                                      const struct mlxsw_sp_fib *fib,
173                                      u8 tree_id);
174 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
175                                        const struct mlxsw_sp_fib *fib);
176
177 static unsigned int *
178 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
179                            enum mlxsw_sp_rif_counter_dir dir)
180 {
181         switch (dir) {
182         case MLXSW_SP_RIF_COUNTER_EGRESS:
183                 return &rif->counter_egress;
184         case MLXSW_SP_RIF_COUNTER_INGRESS:
185                 return &rif->counter_ingress;
186         }
187         return NULL;
188 }
189
190 static bool
191 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
192                                enum mlxsw_sp_rif_counter_dir dir)
193 {
194         switch (dir) {
195         case MLXSW_SP_RIF_COUNTER_EGRESS:
196                 return rif->counter_egress_valid;
197         case MLXSW_SP_RIF_COUNTER_INGRESS:
198                 return rif->counter_ingress_valid;
199         }
200         return false;
201 }
202
203 static void
204 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
205                                enum mlxsw_sp_rif_counter_dir dir,
206                                bool valid)
207 {
208         switch (dir) {
209         case MLXSW_SP_RIF_COUNTER_EGRESS:
210                 rif->counter_egress_valid = valid;
211                 break;
212         case MLXSW_SP_RIF_COUNTER_INGRESS:
213                 rif->counter_ingress_valid = valid;
214                 break;
215         }
216 }
217
218 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
219                                      unsigned int counter_index, bool enable,
220                                      enum mlxsw_sp_rif_counter_dir dir)
221 {
222         char ritr_pl[MLXSW_REG_RITR_LEN];
223         bool is_egress = false;
224         int err;
225
226         if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
227                 is_egress = true;
228         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
229         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
230         if (err)
231                 return err;
232
233         mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
234                                     is_egress);
235         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
236 }
237
238 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
239                                    struct mlxsw_sp_rif *rif,
240                                    enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
241 {
242         char ricnt_pl[MLXSW_REG_RICNT_LEN];
243         unsigned int *p_counter_index;
244         bool valid;
245         int err;
246
247         valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
248         if (!valid)
249                 return -EINVAL;
250
251         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
252         if (!p_counter_index)
253                 return -EINVAL;
254         mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
255                              MLXSW_REG_RICNT_OPCODE_NOP);
256         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
257         if (err)
258                 return err;
259         *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
260         return 0;
261 }
262
263 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
264                                       unsigned int counter_index)
265 {
266         char ricnt_pl[MLXSW_REG_RICNT_LEN];
267
268         mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
269                              MLXSW_REG_RICNT_OPCODE_CLEAR);
270         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
271 }
272
273 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
274                                struct mlxsw_sp_rif *rif,
275                                enum mlxsw_sp_rif_counter_dir dir)
276 {
277         unsigned int *p_counter_index;
278         int err;
279
280         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
281         if (!p_counter_index)
282                 return -EINVAL;
283         err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
284                                      p_counter_index);
285         if (err)
286                 return err;
287
288         err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
289         if (err)
290                 goto err_counter_clear;
291
292         err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
293                                         *p_counter_index, true, dir);
294         if (err)
295                 goto err_counter_edit;
296         mlxsw_sp_rif_counter_valid_set(rif, dir, true);
297         return 0;
298
299 err_counter_edit:
300 err_counter_clear:
301         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
302                               *p_counter_index);
303         return err;
304 }
305
306 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
307                                struct mlxsw_sp_rif *rif,
308                                enum mlxsw_sp_rif_counter_dir dir)
309 {
310         unsigned int *p_counter_index;
311
312         if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
313                 return;
314
315         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
316         if (WARN_ON(!p_counter_index))
317                 return;
318         mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
319                                   *p_counter_index, false, dir);
320         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
321                               *p_counter_index);
322         mlxsw_sp_rif_counter_valid_set(rif, dir, false);
323 }
324
325 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
326 {
327         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
328         struct devlink *devlink;
329
330         devlink = priv_to_devlink(mlxsw_sp->core);
331         if (!devlink_dpipe_table_counter_enabled(devlink,
332                                                  MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
333                 return;
334         mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
335 }
336
337 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
338 {
339         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
340
341         mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
342 }
343
344 static struct mlxsw_sp_rif *
345 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
346                          const struct net_device *dev);
347
348 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
349
350 struct mlxsw_sp_prefix_usage {
351         DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
352 };
353
354 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
355         for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
356
357 static bool
358 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
359                          struct mlxsw_sp_prefix_usage *prefix_usage2)
360 {
361         return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
362 }
363
364 static void
365 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
366                           struct mlxsw_sp_prefix_usage *prefix_usage2)
367 {
368         memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
369 }
370
371 static void
372 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
373                           unsigned char prefix_len)
374 {
375         set_bit(prefix_len, prefix_usage->b);
376 }
377
378 static void
379 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
380                             unsigned char prefix_len)
381 {
382         clear_bit(prefix_len, prefix_usage->b);
383 }
384
385 struct mlxsw_sp_fib_key {
386         unsigned char addr[sizeof(struct in6_addr)];
387         unsigned char prefix_len;
388 };
389
390 enum mlxsw_sp_fib_entry_type {
391         MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
392         MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
393         MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
394
395         /* This is a special case of local delivery, where a packet should be
396          * decapsulated on reception. Note that there is no corresponding ENCAP,
397          * because that's a type of next hop, not of FIB entry. (There can be
398          * several next hops in a REMOTE entry, and some of them may be
399          * encapsulating entries.)
400          */
401         MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
402 };
403
404 struct mlxsw_sp_nexthop_group;
405
406 struct mlxsw_sp_fib_node {
407         struct list_head entry_list;
408         struct list_head list;
409         struct rhash_head ht_node;
410         struct mlxsw_sp_fib *fib;
411         struct mlxsw_sp_fib_key key;
412 };
413
414 struct mlxsw_sp_fib_entry_decap {
415         struct mlxsw_sp_ipip_entry *ipip_entry;
416         u32 tunnel_index;
417 };
418
419 struct mlxsw_sp_fib_entry {
420         struct list_head list;
421         struct mlxsw_sp_fib_node *fib_node;
422         enum mlxsw_sp_fib_entry_type type;
423         struct list_head nexthop_group_node;
424         struct mlxsw_sp_nexthop_group *nh_group;
425         struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
426 };
427
428 struct mlxsw_sp_fib4_entry {
429         struct mlxsw_sp_fib_entry common;
430         u32 tb_id;
431         u32 prio;
432         u8 tos;
433         u8 type;
434 };
435
436 struct mlxsw_sp_fib6_entry {
437         struct mlxsw_sp_fib_entry common;
438         struct list_head rt6_list;
439         unsigned int nrt6;
440 };
441
442 struct mlxsw_sp_rt6 {
443         struct list_head list;
444         struct rt6_info *rt;
445 };
446
447 struct mlxsw_sp_lpm_tree {
448         u8 id; /* tree ID */
449         unsigned int ref_count;
450         enum mlxsw_sp_l3proto proto;
451         unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
452         struct mlxsw_sp_prefix_usage prefix_usage;
453 };
454
455 struct mlxsw_sp_fib {
456         struct rhashtable ht;
457         struct list_head node_list;
458         struct mlxsw_sp_vr *vr;
459         struct mlxsw_sp_lpm_tree *lpm_tree;
460         enum mlxsw_sp_l3proto proto;
461 };
462
463 struct mlxsw_sp_vr {
464         u16 id; /* virtual router ID */
465         u32 tb_id; /* kernel fib table id */
466         unsigned int rif_count;
467         struct mlxsw_sp_fib *fib4;
468         struct mlxsw_sp_fib *fib6;
469         struct mlxsw_sp_mr_table *mr4_table;
470 };
471
472 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
473
474 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
475                                                 struct mlxsw_sp_vr *vr,
476                                                 enum mlxsw_sp_l3proto proto)
477 {
478         struct mlxsw_sp_lpm_tree *lpm_tree;
479         struct mlxsw_sp_fib *fib;
480         int err;
481
482         lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
483         fib = kzalloc(sizeof(*fib), GFP_KERNEL);
484         if (!fib)
485                 return ERR_PTR(-ENOMEM);
486         err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
487         if (err)
488                 goto err_rhashtable_init;
489         INIT_LIST_HEAD(&fib->node_list);
490         fib->proto = proto;
491         fib->vr = vr;
492         fib->lpm_tree = lpm_tree;
493         mlxsw_sp_lpm_tree_hold(lpm_tree);
494         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
495         if (err)
496                 goto err_lpm_tree_bind;
497         return fib;
498
499 err_lpm_tree_bind:
500         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
501 err_rhashtable_init:
502         kfree(fib);
503         return ERR_PTR(err);
504 }
505
506 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
507                                  struct mlxsw_sp_fib *fib)
508 {
509         mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
510         mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
511         WARN_ON(!list_empty(&fib->node_list));
512         rhashtable_destroy(&fib->ht);
513         kfree(fib);
514 }
515
516 static struct mlxsw_sp_lpm_tree *
517 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
518 {
519         static struct mlxsw_sp_lpm_tree *lpm_tree;
520         int i;
521
522         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
523                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
524                 if (lpm_tree->ref_count == 0)
525                         return lpm_tree;
526         }
527         return NULL;
528 }
529
530 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
531                                    struct mlxsw_sp_lpm_tree *lpm_tree)
532 {
533         char ralta_pl[MLXSW_REG_RALTA_LEN];
534
535         mlxsw_reg_ralta_pack(ralta_pl, true,
536                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
537                              lpm_tree->id);
538         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
539 }
540
541 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
542                                    struct mlxsw_sp_lpm_tree *lpm_tree)
543 {
544         char ralta_pl[MLXSW_REG_RALTA_LEN];
545
546         mlxsw_reg_ralta_pack(ralta_pl, false,
547                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
548                              lpm_tree->id);
549         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
550 }
551
552 static int
553 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
554                                   struct mlxsw_sp_prefix_usage *prefix_usage,
555                                   struct mlxsw_sp_lpm_tree *lpm_tree)
556 {
557         char ralst_pl[MLXSW_REG_RALST_LEN];
558         u8 root_bin = 0;
559         u8 prefix;
560         u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
561
562         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
563                 root_bin = prefix;
564
565         mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
566         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
567                 if (prefix == 0)
568                         continue;
569                 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
570                                          MLXSW_REG_RALST_BIN_NO_CHILD);
571                 last_prefix = prefix;
572         }
573         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
574 }
575
576 static struct mlxsw_sp_lpm_tree *
577 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
578                          struct mlxsw_sp_prefix_usage *prefix_usage,
579                          enum mlxsw_sp_l3proto proto)
580 {
581         struct mlxsw_sp_lpm_tree *lpm_tree;
582         int err;
583
584         lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
585         if (!lpm_tree)
586                 return ERR_PTR(-EBUSY);
587         lpm_tree->proto = proto;
588         err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
589         if (err)
590                 return ERR_PTR(err);
591
592         err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
593                                                 lpm_tree);
594         if (err)
595                 goto err_left_struct_set;
596         memcpy(&lpm_tree->prefix_usage, prefix_usage,
597                sizeof(lpm_tree->prefix_usage));
598         memset(&lpm_tree->prefix_ref_count, 0,
599                sizeof(lpm_tree->prefix_ref_count));
600         lpm_tree->ref_count = 1;
601         return lpm_tree;
602
603 err_left_struct_set:
604         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
605         return ERR_PTR(err);
606 }
607
608 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
609                                       struct mlxsw_sp_lpm_tree *lpm_tree)
610 {
611         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
612 }
613
614 static struct mlxsw_sp_lpm_tree *
615 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
616                       struct mlxsw_sp_prefix_usage *prefix_usage,
617                       enum mlxsw_sp_l3proto proto)
618 {
619         struct mlxsw_sp_lpm_tree *lpm_tree;
620         int i;
621
622         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
623                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
624                 if (lpm_tree->ref_count != 0 &&
625                     lpm_tree->proto == proto &&
626                     mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
627                                              prefix_usage)) {
628                         mlxsw_sp_lpm_tree_hold(lpm_tree);
629                         return lpm_tree;
630                 }
631         }
632         return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
633 }
634
635 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
636 {
637         lpm_tree->ref_count++;
638 }
639
640 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
641                                   struct mlxsw_sp_lpm_tree *lpm_tree)
642 {
643         if (--lpm_tree->ref_count == 0)
644                 mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
645 }
646
647 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
648
649 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
650 {
651         struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
652         struct mlxsw_sp_lpm_tree *lpm_tree;
653         u64 max_trees;
654         int err, i;
655
656         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
657                 return -EIO;
658
659         max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
660         mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
661         mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
662                                              sizeof(struct mlxsw_sp_lpm_tree),
663                                              GFP_KERNEL);
664         if (!mlxsw_sp->router->lpm.trees)
665                 return -ENOMEM;
666
667         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
668                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
669                 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
670         }
671
672         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
673                                          MLXSW_SP_L3_PROTO_IPV4);
674         if (IS_ERR(lpm_tree)) {
675                 err = PTR_ERR(lpm_tree);
676                 goto err_ipv4_tree_get;
677         }
678         mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
679
680         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
681                                          MLXSW_SP_L3_PROTO_IPV6);
682         if (IS_ERR(lpm_tree)) {
683                 err = PTR_ERR(lpm_tree);
684                 goto err_ipv6_tree_get;
685         }
686         mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
687
688         return 0;
689
690 err_ipv6_tree_get:
691         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
692         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
693 err_ipv4_tree_get:
694         kfree(mlxsw_sp->router->lpm.trees);
695         return err;
696 }
697
698 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
699 {
700         struct mlxsw_sp_lpm_tree *lpm_tree;
701
702         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
703         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
704
705         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
706         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
707
708         kfree(mlxsw_sp->router->lpm.trees);
709 }
710
711 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
712 {
713         return !!vr->fib4 || !!vr->fib6 || !!vr->mr4_table;
714 }
715
716 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
717 {
718         struct mlxsw_sp_vr *vr;
719         int i;
720
721         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
722                 vr = &mlxsw_sp->router->vrs[i];
723                 if (!mlxsw_sp_vr_is_used(vr))
724                         return vr;
725         }
726         return NULL;
727 }
728
729 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
730                                      const struct mlxsw_sp_fib *fib, u8 tree_id)
731 {
732         char raltb_pl[MLXSW_REG_RALTB_LEN];
733
734         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
735                              (enum mlxsw_reg_ralxx_protocol) fib->proto,
736                              tree_id);
737         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
738 }
739
740 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
741                                        const struct mlxsw_sp_fib *fib)
742 {
743         char raltb_pl[MLXSW_REG_RALTB_LEN];
744
745         /* Bind to tree 0 which is default */
746         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
747                              (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
748         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
749 }
750
751 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
752 {
753         /* For our purpose, squash main, default and local tables into one */
754         if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
755                 tb_id = RT_TABLE_MAIN;
756         return tb_id;
757 }
758
759 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
760                                             u32 tb_id)
761 {
762         struct mlxsw_sp_vr *vr;
763         int i;
764
765         tb_id = mlxsw_sp_fix_tb_id(tb_id);
766
767         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
768                 vr = &mlxsw_sp->router->vrs[i];
769                 if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
770                         return vr;
771         }
772         return NULL;
773 }
774
775 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
776                                             enum mlxsw_sp_l3proto proto)
777 {
778         switch (proto) {
779         case MLXSW_SP_L3_PROTO_IPV4:
780                 return vr->fib4;
781         case MLXSW_SP_L3_PROTO_IPV6:
782                 return vr->fib6;
783         }
784         return NULL;
785 }
786
787 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
788                                               u32 tb_id,
789                                               struct netlink_ext_ack *extack)
790 {
791         struct mlxsw_sp_vr *vr;
792         int err;
793
794         vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
795         if (!vr) {
796                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
797                 return ERR_PTR(-EBUSY);
798         }
799         vr->fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
800         if (IS_ERR(vr->fib4))
801                 return ERR_CAST(vr->fib4);
802         vr->fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
803         if (IS_ERR(vr->fib6)) {
804                 err = PTR_ERR(vr->fib6);
805                 goto err_fib6_create;
806         }
807         vr->mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
808                                                  MLXSW_SP_L3_PROTO_IPV4);
809         if (IS_ERR(vr->mr4_table)) {
810                 err = PTR_ERR(vr->mr4_table);
811                 goto err_mr_table_create;
812         }
813         vr->tb_id = tb_id;
814         return vr;
815
816 err_mr_table_create:
817         mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
818         vr->fib6 = NULL;
819 err_fib6_create:
820         mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
821         vr->fib4 = NULL;
822         return ERR_PTR(err);
823 }
824
825 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
826                                 struct mlxsw_sp_vr *vr)
827 {
828         mlxsw_sp_mr_table_destroy(vr->mr4_table);
829         vr->mr4_table = NULL;
830         mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
831         vr->fib6 = NULL;
832         mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
833         vr->fib4 = NULL;
834 }
835
836 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
837                                            struct netlink_ext_ack *extack)
838 {
839         struct mlxsw_sp_vr *vr;
840
841         tb_id = mlxsw_sp_fix_tb_id(tb_id);
842         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
843         if (!vr)
844                 vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
845         return vr;
846 }
847
848 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
849 {
850         if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
851             list_empty(&vr->fib6->node_list) &&
852             mlxsw_sp_mr_table_empty(vr->mr4_table))
853                 mlxsw_sp_vr_destroy(mlxsw_sp, vr);
854 }
855
856 static bool
857 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
858                                     enum mlxsw_sp_l3proto proto, u8 tree_id)
859 {
860         struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
861
862         if (!mlxsw_sp_vr_is_used(vr))
863                 return false;
864         if (fib->lpm_tree->id == tree_id)
865                 return true;
866         return false;
867 }
868
869 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
870                                         struct mlxsw_sp_fib *fib,
871                                         struct mlxsw_sp_lpm_tree *new_tree)
872 {
873         struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
874         int err;
875
876         fib->lpm_tree = new_tree;
877         mlxsw_sp_lpm_tree_hold(new_tree);
878         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
879         if (err)
880                 goto err_tree_bind;
881         mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
882         return 0;
883
884 err_tree_bind:
885         mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
886         fib->lpm_tree = old_tree;
887         return err;
888 }
889
890 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
891                                          struct mlxsw_sp_fib *fib,
892                                          struct mlxsw_sp_lpm_tree *new_tree)
893 {
894         enum mlxsw_sp_l3proto proto = fib->proto;
895         struct mlxsw_sp_lpm_tree *old_tree;
896         u8 old_id, new_id = new_tree->id;
897         struct mlxsw_sp_vr *vr;
898         int i, err;
899
900         old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
901         old_id = old_tree->id;
902
903         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
904                 vr = &mlxsw_sp->router->vrs[i];
905                 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
906                         continue;
907                 err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
908                                                    mlxsw_sp_vr_fib(vr, proto),
909                                                    new_tree);
910                 if (err)
911                         goto err_tree_replace;
912         }
913
914         memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
915                sizeof(new_tree->prefix_ref_count));
916         mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
917         mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
918
919         return 0;
920
921 err_tree_replace:
922         for (i--; i >= 0; i--) {
923                 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
924                         continue;
925                 mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
926                                              mlxsw_sp_vr_fib(vr, proto),
927                                              old_tree);
928         }
929         return err;
930 }
931
932 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
933 {
934         struct mlxsw_sp_vr *vr;
935         u64 max_vrs;
936         int i;
937
938         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
939                 return -EIO;
940
941         max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
942         mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
943                                         GFP_KERNEL);
944         if (!mlxsw_sp->router->vrs)
945                 return -ENOMEM;
946
947         for (i = 0; i < max_vrs; i++) {
948                 vr = &mlxsw_sp->router->vrs[i];
949                 vr->id = i;
950         }
951
952         return 0;
953 }
954
955 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
956
957 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
958 {
959         /* At this stage we're guaranteed not to have new incoming
960          * FIB notifications and the work queue is free from FIBs
961          * sitting on top of mlxsw netdevs. However, we can still
962          * have other FIBs queued. Flush the queue before flushing
963          * the device's tables. No need for locks, as we're the only
964          * writer.
965          */
966         mlxsw_core_flush_owq();
967         mlxsw_sp_router_fib_flush(mlxsw_sp);
968         kfree(mlxsw_sp->router->vrs);
969 }
970
971 static struct net_device *
972 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
973 {
974         struct ip_tunnel *tun = netdev_priv(ol_dev);
975         struct net *net = dev_net(ol_dev);
976
977         return __dev_get_by_index(net, tun->parms.link);
978 }
979
980 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
981 {
982         struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
983
984         if (d)
985                 return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
986         else
987                 return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
988 }
989
990 static struct mlxsw_sp_rif *
991 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
992                     const struct mlxsw_sp_rif_params *params,
993                     struct netlink_ext_ack *extack);
994
995 static struct mlxsw_sp_rif_ipip_lb *
996 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
997                                 enum mlxsw_sp_ipip_type ipipt,
998                                 struct net_device *ol_dev,
999                                 struct netlink_ext_ack *extack)
1000 {
1001         struct mlxsw_sp_rif_params_ipip_lb lb_params;
1002         const struct mlxsw_sp_ipip_ops *ipip_ops;
1003         struct mlxsw_sp_rif *rif;
1004
1005         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1006         lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1007                 .common.dev = ol_dev,
1008                 .common.lag = false,
1009                 .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1010         };
1011
1012         rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1013         if (IS_ERR(rif))
1014                 return ERR_CAST(rif);
1015         return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1016 }
1017
1018 static struct mlxsw_sp_ipip_entry *
1019 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1020                           enum mlxsw_sp_ipip_type ipipt,
1021                           struct net_device *ol_dev)
1022 {
1023         const struct mlxsw_sp_ipip_ops *ipip_ops;
1024         struct mlxsw_sp_ipip_entry *ipip_entry;
1025         struct mlxsw_sp_ipip_entry *ret = NULL;
1026
1027         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1028         ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1029         if (!ipip_entry)
1030                 return ERR_PTR(-ENOMEM);
1031
1032         ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1033                                                             ol_dev, NULL);
1034         if (IS_ERR(ipip_entry->ol_lb)) {
1035                 ret = ERR_CAST(ipip_entry->ol_lb);
1036                 goto err_ol_ipip_lb_create;
1037         }
1038
1039         ipip_entry->ipipt = ipipt;
1040         ipip_entry->ol_dev = ol_dev;
1041
1042         switch (ipip_ops->ul_proto) {
1043         case MLXSW_SP_L3_PROTO_IPV4:
1044                 ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1045                 break;
1046         case MLXSW_SP_L3_PROTO_IPV6:
1047                 WARN_ON(1);
1048                 break;
1049         }
1050
1051         return ipip_entry;
1052
1053 err_ol_ipip_lb_create:
1054         kfree(ipip_entry);
1055         return ret;
1056 }
1057
1058 static void
1059 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1060 {
1061         mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1062         kfree(ipip_entry);
1063 }
1064
1065 static bool
1066 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1067                                   const enum mlxsw_sp_l3proto ul_proto,
1068                                   union mlxsw_sp_l3addr saddr,
1069                                   u32 ul_tb_id,
1070                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1071 {
1072         u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1073         enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1074         union mlxsw_sp_l3addr tun_saddr;
1075
1076         if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1077                 return false;
1078
1079         tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1080         return tun_ul_tb_id == ul_tb_id &&
1081                mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1082 }
1083
1084 static int
1085 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1086                               struct mlxsw_sp_fib_entry *fib_entry,
1087                               struct mlxsw_sp_ipip_entry *ipip_entry)
1088 {
1089         u32 tunnel_index;
1090         int err;
1091
1092         err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &tunnel_index);
1093         if (err)
1094                 return err;
1095
1096         ipip_entry->decap_fib_entry = fib_entry;
1097         fib_entry->decap.ipip_entry = ipip_entry;
1098         fib_entry->decap.tunnel_index = tunnel_index;
1099         return 0;
1100 }
1101
1102 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1103                                           struct mlxsw_sp_fib_entry *fib_entry)
1104 {
1105         /* Unlink this node from the IPIP entry that it's the decap entry of. */
1106         fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1107         fib_entry->decap.ipip_entry = NULL;
1108         mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index);
1109 }
1110
1111 static struct mlxsw_sp_fib_node *
1112 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1113                          size_t addr_len, unsigned char prefix_len);
1114 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1115                                      struct mlxsw_sp_fib_entry *fib_entry);
1116
1117 static void
1118 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1119                                  struct mlxsw_sp_ipip_entry *ipip_entry)
1120 {
1121         struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1122
1123         mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1124         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1125
1126         mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1127 }
1128
1129 static void
1130 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1131                                   struct mlxsw_sp_ipip_entry *ipip_entry,
1132                                   struct mlxsw_sp_fib_entry *decap_fib_entry)
1133 {
1134         if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1135                                           ipip_entry))
1136                 return;
1137         decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1138
1139         if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1140                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1141 }
1142
1143 /* Given an IPIP entry, find the corresponding decap route. */
1144 static struct mlxsw_sp_fib_entry *
1145 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1146                                struct mlxsw_sp_ipip_entry *ipip_entry)
1147 {
1148         static struct mlxsw_sp_fib_node *fib_node;
1149         const struct mlxsw_sp_ipip_ops *ipip_ops;
1150         struct mlxsw_sp_fib_entry *fib_entry;
1151         unsigned char saddr_prefix_len;
1152         union mlxsw_sp_l3addr saddr;
1153         struct mlxsw_sp_fib *ul_fib;
1154         struct mlxsw_sp_vr *ul_vr;
1155         const void *saddrp;
1156         size_t saddr_len;
1157         u32 ul_tb_id;
1158         u32 saddr4;
1159
1160         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1161
1162         ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1163         ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1164         if (!ul_vr)
1165                 return NULL;
1166
1167         ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1168         saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1169                                            ipip_entry->ol_dev);
1170
1171         switch (ipip_ops->ul_proto) {
1172         case MLXSW_SP_L3_PROTO_IPV4:
1173                 saddr4 = be32_to_cpu(saddr.addr4);
1174                 saddrp = &saddr4;
1175                 saddr_len = 4;
1176                 saddr_prefix_len = 32;
1177                 break;
1178         case MLXSW_SP_L3_PROTO_IPV6:
1179                 WARN_ON(1);
1180                 return NULL;
1181         }
1182
1183         fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1184                                             saddr_prefix_len);
1185         if (!fib_node || list_empty(&fib_node->entry_list))
1186                 return NULL;
1187
1188         fib_entry = list_first_entry(&fib_node->entry_list,
1189                                      struct mlxsw_sp_fib_entry, list);
1190         if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1191                 return NULL;
1192
1193         return fib_entry;
1194 }
1195
1196 static struct mlxsw_sp_ipip_entry *
1197 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1198                            enum mlxsw_sp_ipip_type ipipt,
1199                            struct net_device *ol_dev)
1200 {
1201         struct mlxsw_sp_ipip_entry *ipip_entry;
1202
1203         ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1204         if (IS_ERR(ipip_entry))
1205                 return ipip_entry;
1206
1207         list_add_tail(&ipip_entry->ipip_list_node,
1208                       &mlxsw_sp->router->ipip_list);
1209
1210         return ipip_entry;
1211 }
1212
1213 static void
1214 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1215                             struct mlxsw_sp_ipip_entry *ipip_entry)
1216 {
1217         list_del(&ipip_entry->ipip_list_node);
1218         mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1219 }
1220
1221 static bool
1222 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1223                                   const struct net_device *ul_dev,
1224                                   enum mlxsw_sp_l3proto ul_proto,
1225                                   union mlxsw_sp_l3addr ul_dip,
1226                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1227 {
1228         u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1229         enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1230         struct net_device *ipip_ul_dev;
1231
1232         if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1233                 return false;
1234
1235         ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1236         return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1237                                                  ul_tb_id, ipip_entry) &&
1238                (!ipip_ul_dev || ipip_ul_dev == ul_dev);
1239 }
1240
1241 /* Given decap parameters, find the corresponding IPIP entry. */
1242 static struct mlxsw_sp_ipip_entry *
1243 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1244                                   const struct net_device *ul_dev,
1245                                   enum mlxsw_sp_l3proto ul_proto,
1246                                   union mlxsw_sp_l3addr ul_dip)
1247 {
1248         struct mlxsw_sp_ipip_entry *ipip_entry;
1249
1250         list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1251                             ipip_list_node)
1252                 if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1253                                                       ul_proto, ul_dip,
1254                                                       ipip_entry))
1255                         return ipip_entry;
1256
1257         return NULL;
1258 }
1259
1260 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1261                                       const struct net_device *dev,
1262                                       enum mlxsw_sp_ipip_type *p_type)
1263 {
1264         struct mlxsw_sp_router *router = mlxsw_sp->router;
1265         const struct mlxsw_sp_ipip_ops *ipip_ops;
1266         enum mlxsw_sp_ipip_type ipipt;
1267
1268         for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1269                 ipip_ops = router->ipip_ops_arr[ipipt];
1270                 if (dev->type == ipip_ops->dev_type) {
1271                         if (p_type)
1272                                 *p_type = ipipt;
1273                         return true;
1274                 }
1275         }
1276         return false;
1277 }
1278
1279 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1280                                 const struct net_device *dev)
1281 {
1282         return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1283 }
1284
1285 static struct mlxsw_sp_ipip_entry *
1286 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1287                                    const struct net_device *ol_dev)
1288 {
1289         struct mlxsw_sp_ipip_entry *ipip_entry;
1290
1291         list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1292                             ipip_list_node)
1293                 if (ipip_entry->ol_dev == ol_dev)
1294                         return ipip_entry;
1295
1296         return NULL;
1297 }
1298
1299 static struct mlxsw_sp_ipip_entry *
1300 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1301                                    const struct net_device *ul_dev,
1302                                    struct mlxsw_sp_ipip_entry *start)
1303 {
1304         struct mlxsw_sp_ipip_entry *ipip_entry;
1305
1306         ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1307                                         ipip_list_node);
1308         list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1309                                      ipip_list_node) {
1310                 struct net_device *ipip_ul_dev =
1311                         __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1312
1313                 if (ipip_ul_dev == ul_dev)
1314                         return ipip_entry;
1315         }
1316
1317         return NULL;
1318 }
1319
1320 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1321                                 const struct net_device *dev)
1322 {
1323         return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1324 }
1325
1326 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1327                                                 const struct net_device *ol_dev,
1328                                                 enum mlxsw_sp_ipip_type ipipt)
1329 {
1330         const struct mlxsw_sp_ipip_ops *ops
1331                 = mlxsw_sp->router->ipip_ops_arr[ipipt];
1332
1333         /* For deciding whether decap should be offloaded, we don't care about
1334          * overlay protocol, so ask whether either one is supported.
1335          */
1336         return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1337                ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1338 }
1339
1340 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1341                                                 struct net_device *ol_dev)
1342 {
1343         struct mlxsw_sp_ipip_entry *ipip_entry;
1344         enum mlxsw_sp_l3proto ul_proto;
1345         enum mlxsw_sp_ipip_type ipipt;
1346         union mlxsw_sp_l3addr saddr;
1347         u32 ul_tb_id;
1348
1349         mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1350         if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1351                 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1352                 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1353                 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1354                 if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1355                                                           saddr, ul_tb_id,
1356                                                           NULL)) {
1357                         ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1358                                                                 ol_dev);
1359                         if (IS_ERR(ipip_entry))
1360                                 return PTR_ERR(ipip_entry);
1361                 }
1362         }
1363
1364         return 0;
1365 }
1366
1367 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1368                                                    struct net_device *ol_dev)
1369 {
1370         struct mlxsw_sp_ipip_entry *ipip_entry;
1371
1372         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1373         if (ipip_entry)
1374                 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1375 }
1376
1377 static void
1378 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1379                                 struct mlxsw_sp_ipip_entry *ipip_entry)
1380 {
1381         struct mlxsw_sp_fib_entry *decap_fib_entry;
1382
1383         decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1384         if (decap_fib_entry)
1385                 mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1386                                                   decap_fib_entry);
1387 }
1388
1389 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1390                                                 struct net_device *ol_dev)
1391 {
1392         struct mlxsw_sp_ipip_entry *ipip_entry;
1393
1394         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1395         if (ipip_entry)
1396                 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1397 }
1398
1399 static void
1400 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1401                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1402 {
1403         if (ipip_entry->decap_fib_entry)
1404                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1405 }
1406
1407 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1408                                                   struct net_device *ol_dev)
1409 {
1410         struct mlxsw_sp_ipip_entry *ipip_entry;
1411
1412         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1413         if (ipip_entry)
1414                 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1415 }
1416
1417 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1418                                          struct mlxsw_sp_rif *old_rif,
1419                                          struct mlxsw_sp_rif *new_rif);
1420 static int
1421 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1422                                  struct mlxsw_sp_ipip_entry *ipip_entry,
1423                                  bool keep_encap,
1424                                  struct netlink_ext_ack *extack)
1425 {
1426         struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1427         struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1428
1429         new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1430                                                      ipip_entry->ipipt,
1431                                                      ipip_entry->ol_dev,
1432                                                      extack);
1433         if (IS_ERR(new_lb_rif))
1434                 return PTR_ERR(new_lb_rif);
1435         ipip_entry->ol_lb = new_lb_rif;
1436
1437         if (keep_encap)
1438                 mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1439                                              &new_lb_rif->common);
1440
1441         mlxsw_sp_rif_destroy(&old_lb_rif->common);
1442
1443         return 0;
1444 }
1445
1446 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1447                                         struct mlxsw_sp_rif *rif);
1448
1449 /**
1450  * Update the offload related to an IPIP entry. This always updates decap, and
1451  * in addition to that it also:
1452  * @recreate_loopback: recreates the associated loopback RIF
1453  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1454  *              relevant when recreate_loopback is true.
1455  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1456  *                   is only relevant when recreate_loopback is false.
1457  */
1458 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1459                                         struct mlxsw_sp_ipip_entry *ipip_entry,
1460                                         bool recreate_loopback,
1461                                         bool keep_encap,
1462                                         bool update_nexthops,
1463                                         struct netlink_ext_ack *extack)
1464 {
1465         int err;
1466
1467         /* RIFs can't be edited, so to update loopback, we need to destroy and
1468          * recreate it. That creates a window of opportunity where RALUE and
1469          * RATR registers end up referencing a RIF that's already gone. RATRs
1470          * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1471          * of RALUE, demote the decap route back.
1472          */
1473         if (ipip_entry->decap_fib_entry)
1474                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1475
1476         if (recreate_loopback) {
1477                 err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1478                                                        keep_encap, extack);
1479                 if (err)
1480                         return err;
1481         } else if (update_nexthops) {
1482                 mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1483                                             &ipip_entry->ol_lb->common);
1484         }
1485
1486         if (ipip_entry->ol_dev->flags & IFF_UP)
1487                 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1488
1489         return 0;
1490 }
1491
1492 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1493                                                 struct net_device *ol_dev,
1494                                                 struct netlink_ext_ack *extack)
1495 {
1496         struct mlxsw_sp_ipip_entry *ipip_entry =
1497                 mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1498         enum mlxsw_sp_l3proto ul_proto;
1499         union mlxsw_sp_l3addr saddr;
1500         u32 ul_tb_id;
1501
1502         if (!ipip_entry)
1503                 return 0;
1504
1505         /* For flat configuration cases, moving overlay to a different VRF might
1506          * cause local address conflict, and the conflicting tunnels need to be
1507          * demoted.
1508          */
1509         ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1510         ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1511         saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1512         if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1513                                                  saddr, ul_tb_id,
1514                                                  ipip_entry)) {
1515                 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1516                 return 0;
1517         }
1518
1519         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1520                                                    true, false, false, extack);
1521 }
1522
1523 static int
1524 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1525                                      struct mlxsw_sp_ipip_entry *ipip_entry,
1526                                      struct net_device *ul_dev,
1527                                      struct netlink_ext_ack *extack)
1528 {
1529         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1530                                                    true, true, false, extack);
1531 }
1532
1533 static int
1534 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1535                                     struct mlxsw_sp_ipip_entry *ipip_entry,
1536                                     struct net_device *ul_dev)
1537 {
1538         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1539                                                    false, false, true, NULL);
1540 }
1541
1542 static int
1543 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1544                                       struct mlxsw_sp_ipip_entry *ipip_entry,
1545                                       struct net_device *ul_dev)
1546 {
1547         /* A down underlay device causes encapsulated packets to not be
1548          * forwarded, but decap still works. So refresh next hops without
1549          * touching anything else.
1550          */
1551         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1552                                                    false, false, true, NULL);
1553 }
1554
1555 static int
1556 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1557                                         struct net_device *ol_dev,
1558                                         struct netlink_ext_ack *extack)
1559 {
1560         const struct mlxsw_sp_ipip_ops *ipip_ops;
1561         struct mlxsw_sp_ipip_entry *ipip_entry;
1562         int err;
1563
1564         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1565         if (!ipip_entry)
1566                 /* A change might make a tunnel eligible for offloading, but
1567                  * that is currently not implemented. What falls to slow path
1568                  * stays there.
1569                  */
1570                 return 0;
1571
1572         /* A change might make a tunnel not eligible for offloading. */
1573         if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1574                                                  ipip_entry->ipipt)) {
1575                 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1576                 return 0;
1577         }
1578
1579         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1580         err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1581         return err;
1582 }
1583
1584 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1585                                        struct mlxsw_sp_ipip_entry *ipip_entry)
1586 {
1587         struct net_device *ol_dev = ipip_entry->ol_dev;
1588
1589         if (ol_dev->flags & IFF_UP)
1590                 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1591         mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1592 }
1593
1594 /* The configuration where several tunnels have the same local address in the
1595  * same underlay table needs special treatment in the HW. That is currently not
1596  * implemented in the driver. This function finds and demotes the first tunnel
1597  * with a given source address, except the one passed in in the argument
1598  * `except'.
1599  */
1600 bool
1601 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1602                                      enum mlxsw_sp_l3proto ul_proto,
1603                                      union mlxsw_sp_l3addr saddr,
1604                                      u32 ul_tb_id,
1605                                      const struct mlxsw_sp_ipip_entry *except)
1606 {
1607         struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1608
1609         list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1610                                  ipip_list_node) {
1611                 if (ipip_entry != except &&
1612                     mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1613                                                       ul_tb_id, ipip_entry)) {
1614                         mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1615                         return true;
1616                 }
1617         }
1618
1619         return false;
1620 }
1621
1622 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1623                                                      struct net_device *ul_dev)
1624 {
1625         struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1626
1627         list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1628                                  ipip_list_node) {
1629                 struct net_device *ipip_ul_dev =
1630                         __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1631
1632                 if (ipip_ul_dev == ul_dev)
1633                         mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1634         }
1635 }
1636
1637 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1638                                      struct net_device *ol_dev,
1639                                      unsigned long event,
1640                                      struct netdev_notifier_info *info)
1641 {
1642         struct netdev_notifier_changeupper_info *chup;
1643         struct netlink_ext_ack *extack;
1644
1645         switch (event) {
1646         case NETDEV_REGISTER:
1647                 return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1648         case NETDEV_UNREGISTER:
1649                 mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1650                 return 0;
1651         case NETDEV_UP:
1652                 mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1653                 return 0;
1654         case NETDEV_DOWN:
1655                 mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1656                 return 0;
1657         case NETDEV_CHANGEUPPER:
1658                 chup = container_of(info, typeof(*chup), info);
1659                 extack = info->extack;
1660                 if (netif_is_l3_master(chup->upper_dev))
1661                         return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1662                                                                     ol_dev,
1663                                                                     extack);
1664                 return 0;
1665         case NETDEV_CHANGE:
1666                 extack = info->extack;
1667                 return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1668                                                                ol_dev, extack);
1669         }
1670         return 0;
1671 }
1672
1673 static int
1674 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1675                                    struct mlxsw_sp_ipip_entry *ipip_entry,
1676                                    struct net_device *ul_dev,
1677                                    unsigned long event,
1678                                    struct netdev_notifier_info *info)
1679 {
1680         struct netdev_notifier_changeupper_info *chup;
1681         struct netlink_ext_ack *extack;
1682
1683         switch (event) {
1684         case NETDEV_CHANGEUPPER:
1685                 chup = container_of(info, typeof(*chup), info);
1686                 extack = info->extack;
1687                 if (netif_is_l3_master(chup->upper_dev))
1688                         return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1689                                                                     ipip_entry,
1690                                                                     ul_dev,
1691                                                                     extack);
1692                 break;
1693
1694         case NETDEV_UP:
1695                 return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1696                                                            ul_dev);
1697         case NETDEV_DOWN:
1698                 return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1699                                                              ipip_entry,
1700                                                              ul_dev);
1701         }
1702         return 0;
1703 }
1704
1705 int
1706 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1707                                  struct net_device *ul_dev,
1708                                  unsigned long event,
1709                                  struct netdev_notifier_info *info)
1710 {
1711         struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1712         int err;
1713
1714         while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1715                                                                 ul_dev,
1716                                                                 ipip_entry))) {
1717                 err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1718                                                          ul_dev, event, info);
1719                 if (err) {
1720                         mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1721                                                                  ul_dev);
1722                         return err;
1723                 }
1724         }
1725
1726         return 0;
1727 }
1728
1729 struct mlxsw_sp_neigh_key {
1730         struct neighbour *n;
1731 };
1732
1733 struct mlxsw_sp_neigh_entry {
1734         struct list_head rif_list_node;
1735         struct rhash_head ht_node;
1736         struct mlxsw_sp_neigh_key key;
1737         u16 rif;
1738         bool connected;
1739         unsigned char ha[ETH_ALEN];
1740         struct list_head nexthop_list; /* list of nexthops using
1741                                         * this neigh entry
1742                                         */
1743         struct list_head nexthop_neighs_list_node;
1744         unsigned int counter_index;
1745         bool counter_valid;
1746 };
1747
1748 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1749         .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1750         .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1751         .key_len = sizeof(struct mlxsw_sp_neigh_key),
1752 };
1753
1754 struct mlxsw_sp_neigh_entry *
1755 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1756                         struct mlxsw_sp_neigh_entry *neigh_entry)
1757 {
1758         if (!neigh_entry) {
1759                 if (list_empty(&rif->neigh_list))
1760                         return NULL;
1761                 else
1762                         return list_first_entry(&rif->neigh_list,
1763                                                 typeof(*neigh_entry),
1764                                                 rif_list_node);
1765         }
1766         if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1767                 return NULL;
1768         return list_next_entry(neigh_entry, rif_list_node);
1769 }
1770
1771 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1772 {
1773         return neigh_entry->key.n->tbl->family;
1774 }
1775
1776 unsigned char *
1777 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1778 {
1779         return neigh_entry->ha;
1780 }
1781
1782 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1783 {
1784         struct neighbour *n;
1785
1786         n = neigh_entry->key.n;
1787         return ntohl(*((__be32 *) n->primary_key));
1788 }
1789
1790 struct in6_addr *
1791 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1792 {
1793         struct neighbour *n;
1794
1795         n = neigh_entry->key.n;
1796         return (struct in6_addr *) &n->primary_key;
1797 }
1798
1799 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1800                                struct mlxsw_sp_neigh_entry *neigh_entry,
1801                                u64 *p_counter)
1802 {
1803         if (!neigh_entry->counter_valid)
1804                 return -EINVAL;
1805
1806         return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1807                                          p_counter, NULL);
1808 }
1809
1810 static struct mlxsw_sp_neigh_entry *
1811 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1812                            u16 rif)
1813 {
1814         struct mlxsw_sp_neigh_entry *neigh_entry;
1815
1816         neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1817         if (!neigh_entry)
1818                 return NULL;
1819
1820         neigh_entry->key.n = n;
1821         neigh_entry->rif = rif;
1822         INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1823
1824         return neigh_entry;
1825 }
1826
1827 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1828 {
1829         kfree(neigh_entry);
1830 }
1831
1832 static int
1833 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1834                             struct mlxsw_sp_neigh_entry *neigh_entry)
1835 {
1836         return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1837                                       &neigh_entry->ht_node,
1838                                       mlxsw_sp_neigh_ht_params);
1839 }
1840
1841 static void
1842 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1843                             struct mlxsw_sp_neigh_entry *neigh_entry)
1844 {
1845         rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1846                                &neigh_entry->ht_node,
1847                                mlxsw_sp_neigh_ht_params);
1848 }
1849
1850 static bool
1851 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
1852                                     struct mlxsw_sp_neigh_entry *neigh_entry)
1853 {
1854         struct devlink *devlink;
1855         const char *table_name;
1856
1857         switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
1858         case AF_INET:
1859                 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
1860                 break;
1861         case AF_INET6:
1862                 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
1863                 break;
1864         default:
1865                 WARN_ON(1);
1866                 return false;
1867         }
1868
1869         devlink = priv_to_devlink(mlxsw_sp->core);
1870         return devlink_dpipe_table_counter_enabled(devlink, table_name);
1871 }
1872
1873 static void
1874 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
1875                              struct mlxsw_sp_neigh_entry *neigh_entry)
1876 {
1877         if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
1878                 return;
1879
1880         if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
1881                 return;
1882
1883         neigh_entry->counter_valid = true;
1884 }
1885
1886 static void
1887 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
1888                             struct mlxsw_sp_neigh_entry *neigh_entry)
1889 {
1890         if (!neigh_entry->counter_valid)
1891                 return;
1892         mlxsw_sp_flow_counter_free(mlxsw_sp,
1893                                    neigh_entry->counter_index);
1894         neigh_entry->counter_valid = false;
1895 }
1896
1897 static struct mlxsw_sp_neigh_entry *
1898 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1899 {
1900         struct mlxsw_sp_neigh_entry *neigh_entry;
1901         struct mlxsw_sp_rif *rif;
1902         int err;
1903
1904         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
1905         if (!rif)
1906                 return ERR_PTR(-EINVAL);
1907
1908         neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
1909         if (!neigh_entry)
1910                 return ERR_PTR(-ENOMEM);
1911
1912         err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
1913         if (err)
1914                 goto err_neigh_entry_insert;
1915
1916         mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
1917         list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
1918
1919         return neigh_entry;
1920
1921 err_neigh_entry_insert:
1922         mlxsw_sp_neigh_entry_free(neigh_entry);
1923         return ERR_PTR(err);
1924 }
1925
1926 static void
1927 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1928                              struct mlxsw_sp_neigh_entry *neigh_entry)
1929 {
1930         list_del(&neigh_entry->rif_list_node);
1931         mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
1932         mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
1933         mlxsw_sp_neigh_entry_free(neigh_entry);
1934 }
1935
1936 static struct mlxsw_sp_neigh_entry *
1937 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1938 {
1939         struct mlxsw_sp_neigh_key key;
1940
1941         key.n = n;
1942         return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
1943                                       &key, mlxsw_sp_neigh_ht_params);
1944 }
1945
1946 static void
1947 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
1948 {
1949         unsigned long interval;
1950
1951 #if IS_ENABLED(CONFIG_IPV6)
1952         interval = min_t(unsigned long,
1953                          NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
1954                          NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
1955 #else
1956         interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
1957 #endif
1958         mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
1959 }
1960
1961 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1962                                                    char *rauhtd_pl,
1963                                                    int ent_index)
1964 {
1965         struct net_device *dev;
1966         struct neighbour *n;
1967         __be32 dipn;
1968         u32 dip;
1969         u16 rif;
1970
1971         mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
1972
1973         if (!mlxsw_sp->router->rifs[rif]) {
1974                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1975                 return;
1976         }
1977
1978         dipn = htonl(dip);
1979         dev = mlxsw_sp->router->rifs[rif]->dev;
1980         n = neigh_lookup(&arp_tbl, &dipn, dev);
1981         if (!n)
1982                 return;
1983
1984         netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
1985         neigh_event_send(n, NULL);
1986         neigh_release(n);
1987 }
1988
1989 #if IS_ENABLED(CONFIG_IPV6)
1990 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1991                                                    char *rauhtd_pl,
1992                                                    int rec_index)
1993 {
1994         struct net_device *dev;
1995         struct neighbour *n;
1996         struct in6_addr dip;
1997         u16 rif;
1998
1999         mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2000                                          (char *) &dip);
2001
2002         if (!mlxsw_sp->router->rifs[rif]) {
2003                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2004                 return;
2005         }
2006
2007         dev = mlxsw_sp->router->rifs[rif]->dev;
2008         n = neigh_lookup(&nd_tbl, &dip, dev);
2009         if (!n)
2010                 return;
2011
2012         netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2013         neigh_event_send(n, NULL);
2014         neigh_release(n);
2015 }
2016 #else
2017 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2018                                                    char *rauhtd_pl,
2019                                                    int rec_index)
2020 {
2021 }
2022 #endif
2023
2024 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2025                                                    char *rauhtd_pl,
2026                                                    int rec_index)
2027 {
2028         u8 num_entries;
2029         int i;
2030
2031         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2032                                                                 rec_index);
2033         /* Hardware starts counting at 0, so add 1. */
2034         num_entries++;
2035
2036         /* Each record consists of several neighbour entries. */
2037         for (i = 0; i < num_entries; i++) {
2038                 int ent_index;
2039
2040                 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2041                 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2042                                                        ent_index);
2043         }
2044
2045 }
2046
2047 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2048                                                    char *rauhtd_pl,
2049                                                    int rec_index)
2050 {
2051         /* One record contains one entry. */
2052         mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2053                                                rec_index);
2054 }
2055
2056 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2057                                               char *rauhtd_pl, int rec_index)
2058 {
2059         switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2060         case MLXSW_REG_RAUHTD_TYPE_IPV4:
2061                 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2062                                                        rec_index);
2063                 break;
2064         case MLXSW_REG_RAUHTD_TYPE_IPV6:
2065                 mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2066                                                        rec_index);
2067                 break;
2068         }
2069 }
2070
2071 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2072 {
2073         u8 num_rec, last_rec_index, num_entries;
2074
2075         num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2076         last_rec_index = num_rec - 1;
2077
2078         if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2079                 return false;
2080         if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2081             MLXSW_REG_RAUHTD_TYPE_IPV6)
2082                 return true;
2083
2084         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2085                                                                 last_rec_index);
2086         if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2087                 return true;
2088         return false;
2089 }
2090
2091 static int
2092 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2093                                        char *rauhtd_pl,
2094                                        enum mlxsw_reg_rauhtd_type type)
2095 {
2096         int i, num_rec;
2097         int err;
2098
2099         /* Make sure the neighbour's netdev isn't removed in the
2100          * process.
2101          */
2102         rtnl_lock();
2103         do {
2104                 mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2105                 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2106                                       rauhtd_pl);
2107                 if (err) {
2108                         dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2109                         break;
2110                 }
2111                 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2112                 for (i = 0; i < num_rec; i++)
2113                         mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2114                                                           i);
2115         } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2116         rtnl_unlock();
2117
2118         return err;
2119 }
2120
2121 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2122 {
2123         enum mlxsw_reg_rauhtd_type type;
2124         char *rauhtd_pl;
2125         int err;
2126
2127         rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2128         if (!rauhtd_pl)
2129                 return -ENOMEM;
2130
2131         type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2132         err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2133         if (err)
2134                 goto out;
2135
2136         type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2137         err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2138 out:
2139         kfree(rauhtd_pl);
2140         return err;
2141 }
2142
2143 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2144 {
2145         struct mlxsw_sp_neigh_entry *neigh_entry;
2146
2147         /* Take RTNL mutex here to prevent lists from changes */
2148         rtnl_lock();
2149         list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2150                             nexthop_neighs_list_node)
2151                 /* If this neigh have nexthops, make the kernel think this neigh
2152                  * is active regardless of the traffic.
2153                  */
2154                 neigh_event_send(neigh_entry->key.n, NULL);
2155         rtnl_unlock();
2156 }
2157
2158 static void
2159 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2160 {
2161         unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2162
2163         mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2164                                msecs_to_jiffies(interval));
2165 }
2166
2167 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2168 {
2169         struct mlxsw_sp_router *router;
2170         int err;
2171
2172         router = container_of(work, struct mlxsw_sp_router,
2173                               neighs_update.dw.work);
2174         err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2175         if (err)
2176                 dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2177
2178         mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2179
2180         mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2181 }
2182
2183 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2184 {
2185         struct mlxsw_sp_neigh_entry *neigh_entry;
2186         struct mlxsw_sp_router *router;
2187
2188         router = container_of(work, struct mlxsw_sp_router,
2189                               nexthop_probe_dw.work);
2190         /* Iterate over nexthop neighbours, find those who are unresolved and
2191          * send arp on them. This solves the chicken-egg problem when
2192          * the nexthop wouldn't get offloaded until the neighbor is resolved
2193          * but it wouldn't get resolved ever in case traffic is flowing in HW
2194          * using different nexthop.
2195          *
2196          * Take RTNL mutex here to prevent lists from changes.
2197          */
2198         rtnl_lock();
2199         list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2200                             nexthop_neighs_list_node)
2201                 if (!neigh_entry->connected)
2202                         neigh_event_send(neigh_entry->key.n, NULL);
2203         rtnl_unlock();
2204
2205         mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2206                                MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2207 }
2208
2209 static void
2210 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2211                               struct mlxsw_sp_neigh_entry *neigh_entry,
2212                               bool removing);
2213
2214 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2215 {
2216         return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2217                         MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2218 }
2219
2220 static void
2221 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2222                                 struct mlxsw_sp_neigh_entry *neigh_entry,
2223                                 enum mlxsw_reg_rauht_op op)
2224 {
2225         struct neighbour *n = neigh_entry->key.n;
2226         u32 dip = ntohl(*((__be32 *) n->primary_key));
2227         char rauht_pl[MLXSW_REG_RAUHT_LEN];
2228
2229         mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2230                               dip);
2231         if (neigh_entry->counter_valid)
2232                 mlxsw_reg_rauht_pack_counter(rauht_pl,
2233                                              neigh_entry->counter_index);
2234         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2235 }
2236
2237 static void
2238 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2239                                 struct mlxsw_sp_neigh_entry *neigh_entry,
2240                                 enum mlxsw_reg_rauht_op op)
2241 {
2242         struct neighbour *n = neigh_entry->key.n;
2243         char rauht_pl[MLXSW_REG_RAUHT_LEN];
2244         const char *dip = n->primary_key;
2245
2246         mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2247                               dip);
2248         if (neigh_entry->counter_valid)
2249                 mlxsw_reg_rauht_pack_counter(rauht_pl,
2250                                              neigh_entry->counter_index);
2251         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2252 }
2253
2254 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2255 {
2256         struct neighbour *n = neigh_entry->key.n;
2257
2258         /* Packets with a link-local destination address are trapped
2259          * after LPM lookup and never reach the neighbour table, so
2260          * there is no need to program such neighbours to the device.
2261          */
2262         if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2263             IPV6_ADDR_LINKLOCAL)
2264                 return true;
2265         return false;
2266 }
2267
2268 static void
2269 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2270                             struct mlxsw_sp_neigh_entry *neigh_entry,
2271                             bool adding)
2272 {
2273         if (!adding && !neigh_entry->connected)
2274                 return;
2275         neigh_entry->connected = adding;
2276         if (neigh_entry->key.n->tbl->family == AF_INET) {
2277                 mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2278                                                 mlxsw_sp_rauht_op(adding));
2279         } else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2280                 if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2281                         return;
2282                 mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2283                                                 mlxsw_sp_rauht_op(adding));
2284         } else {
2285                 WARN_ON_ONCE(1);
2286         }
2287 }
2288
2289 void
2290 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2291                                     struct mlxsw_sp_neigh_entry *neigh_entry,
2292                                     bool adding)
2293 {
2294         if (adding)
2295                 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2296         else
2297                 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2298         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2299 }
2300
2301 struct mlxsw_sp_netevent_work {
2302         struct work_struct work;
2303         struct mlxsw_sp *mlxsw_sp;
2304         struct neighbour *n;
2305 };
2306
2307 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2308 {
2309         struct mlxsw_sp_netevent_work *net_work =
2310                 container_of(work, struct mlxsw_sp_netevent_work, work);
2311         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2312         struct mlxsw_sp_neigh_entry *neigh_entry;
2313         struct neighbour *n = net_work->n;
2314         unsigned char ha[ETH_ALEN];
2315         bool entry_connected;
2316         u8 nud_state, dead;
2317
2318         /* If these parameters are changed after we release the lock,
2319          * then we are guaranteed to receive another event letting us
2320          * know about it.
2321          */
2322         read_lock_bh(&n->lock);
2323         memcpy(ha, n->ha, ETH_ALEN);
2324         nud_state = n->nud_state;
2325         dead = n->dead;
2326         read_unlock_bh(&n->lock);
2327
2328         rtnl_lock();
2329         entry_connected = nud_state & NUD_VALID && !dead;
2330         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2331         if (!entry_connected && !neigh_entry)
2332                 goto out;
2333         if (!neigh_entry) {
2334                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2335                 if (IS_ERR(neigh_entry))
2336                         goto out;
2337         }
2338
2339         memcpy(neigh_entry->ha, ha, ETH_ALEN);
2340         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2341         mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2342
2343         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2344                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2345
2346 out:
2347         rtnl_unlock();
2348         neigh_release(n);
2349         kfree(net_work);
2350 }
2351
2352 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2353
2354 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2355 {
2356         struct mlxsw_sp_netevent_work *net_work =
2357                 container_of(work, struct mlxsw_sp_netevent_work, work);
2358         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2359
2360         mlxsw_sp_mp_hash_init(mlxsw_sp);
2361         kfree(net_work);
2362 }
2363
2364 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2365                                           unsigned long event, void *ptr)
2366 {
2367         struct mlxsw_sp_netevent_work *net_work;
2368         struct mlxsw_sp_port *mlxsw_sp_port;
2369         struct mlxsw_sp_router *router;
2370         struct mlxsw_sp *mlxsw_sp;
2371         unsigned long interval;
2372         struct neigh_parms *p;
2373         struct neighbour *n;
2374         struct net *net;
2375
2376         switch (event) {
2377         case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2378                 p = ptr;
2379
2380                 /* We don't care about changes in the default table. */
2381                 if (!p->dev || (p->tbl->family != AF_INET &&
2382                                 p->tbl->family != AF_INET6))
2383                         return NOTIFY_DONE;
2384
2385                 /* We are in atomic context and can't take RTNL mutex,
2386                  * so use RCU variant to walk the device chain.
2387                  */
2388                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2389                 if (!mlxsw_sp_port)
2390                         return NOTIFY_DONE;
2391
2392                 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2393                 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2394                 mlxsw_sp->router->neighs_update.interval = interval;
2395
2396                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2397                 break;
2398         case NETEVENT_NEIGH_UPDATE:
2399                 n = ptr;
2400
2401                 if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2402                         return NOTIFY_DONE;
2403
2404                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2405                 if (!mlxsw_sp_port)
2406                         return NOTIFY_DONE;
2407
2408                 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2409                 if (!net_work) {
2410                         mlxsw_sp_port_dev_put(mlxsw_sp_port);
2411                         return NOTIFY_BAD;
2412                 }
2413
2414                 INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2415                 net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2416                 net_work->n = n;
2417
2418                 /* Take a reference to ensure the neighbour won't be
2419                  * destructed until we drop the reference in delayed
2420                  * work.
2421                  */
2422                 neigh_clone(n);
2423                 mlxsw_core_schedule_work(&net_work->work);
2424                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2425                 break;
2426         case NETEVENT_MULTIPATH_HASH_UPDATE:
2427                 net = ptr;
2428
2429                 if (!net_eq(net, &init_net))
2430                         return NOTIFY_DONE;
2431
2432                 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2433                 if (!net_work)
2434                         return NOTIFY_BAD;
2435
2436                 router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2437                 INIT_WORK(&net_work->work, mlxsw_sp_router_mp_hash_event_work);
2438                 net_work->mlxsw_sp = router->mlxsw_sp;
2439                 mlxsw_core_schedule_work(&net_work->work);
2440                 break;
2441         }
2442
2443         return NOTIFY_DONE;
2444 }
2445
2446 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2447 {
2448         int err;
2449
2450         err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2451                               &mlxsw_sp_neigh_ht_params);
2452         if (err)
2453                 return err;
2454
2455         /* Initialize the polling interval according to the default
2456          * table.
2457          */
2458         mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2459
2460         /* Create the delayed works for the activity_update */
2461         INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2462                           mlxsw_sp_router_neighs_update_work);
2463         INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2464                           mlxsw_sp_router_probe_unresolved_nexthops);
2465         mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2466         mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2467         return 0;
2468 }
2469
2470 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2471 {
2472         cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2473         cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2474         rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2475 }
2476
2477 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2478                                          struct mlxsw_sp_rif *rif)
2479 {
2480         struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2481
2482         list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2483                                  rif_list_node) {
2484                 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2485                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2486         }
2487 }
2488
2489 enum mlxsw_sp_nexthop_type {
2490         MLXSW_SP_NEXTHOP_TYPE_ETH,
2491         MLXSW_SP_NEXTHOP_TYPE_IPIP,
2492 };
2493
2494 struct mlxsw_sp_nexthop_key {
2495         struct fib_nh *fib_nh;
2496 };
2497
2498 struct mlxsw_sp_nexthop {
2499         struct list_head neigh_list_node; /* member of neigh entry list */
2500         struct list_head rif_list_node;
2501         struct list_head router_list_node;
2502         struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2503                                                 * this belongs to
2504                                                 */
2505         struct rhash_head ht_node;
2506         struct mlxsw_sp_nexthop_key key;
2507         unsigned char gw_addr[sizeof(struct in6_addr)];
2508         int ifindex;
2509         int nh_weight;
2510         int norm_nh_weight;
2511         int num_adj_entries;
2512         struct mlxsw_sp_rif *rif;
2513         u8 should_offload:1, /* set indicates this neigh is connected and
2514                               * should be put to KVD linear area of this group.
2515                               */
2516            offloaded:1, /* set in case the neigh is actually put into
2517                          * KVD linear area of this group.
2518                          */
2519            update:1; /* set indicates that MAC of this neigh should be
2520                       * updated in HW
2521                       */
2522         enum mlxsw_sp_nexthop_type type;
2523         union {
2524                 struct mlxsw_sp_neigh_entry *neigh_entry;
2525                 struct mlxsw_sp_ipip_entry *ipip_entry;
2526         };
2527         unsigned int counter_index;
2528         bool counter_valid;
2529 };
2530
2531 struct mlxsw_sp_nexthop_group {
2532         void *priv;
2533         struct rhash_head ht_node;
2534         struct list_head fib_list; /* list of fib entries that use this group */
2535         struct neigh_table *neigh_tbl;
2536         u8 adj_index_valid:1,
2537            gateway:1; /* routes using the group use a gateway */
2538         u32 adj_index;
2539         u16 ecmp_size;
2540         u16 count;
2541         int sum_norm_weight;
2542         struct mlxsw_sp_nexthop nexthops[0];
2543 #define nh_rif  nexthops[0].rif
2544 };
2545
2546 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2547                                     struct mlxsw_sp_nexthop *nh)
2548 {
2549         struct devlink *devlink;
2550
2551         devlink = priv_to_devlink(mlxsw_sp->core);
2552         if (!devlink_dpipe_table_counter_enabled(devlink,
2553                                                  MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2554                 return;
2555
2556         if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2557                 return;
2558
2559         nh->counter_valid = true;
2560 }
2561
2562 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2563                                    struct mlxsw_sp_nexthop *nh)
2564 {
2565         if (!nh->counter_valid)
2566                 return;
2567         mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2568         nh->counter_valid = false;
2569 }
2570
2571 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2572                                  struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2573 {
2574         if (!nh->counter_valid)
2575                 return -EINVAL;
2576
2577         return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2578                                          p_counter, NULL);
2579 }
2580
2581 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2582                                                struct mlxsw_sp_nexthop *nh)
2583 {
2584         if (!nh) {
2585                 if (list_empty(&router->nexthop_list))
2586                         return NULL;
2587                 else
2588                         return list_first_entry(&router->nexthop_list,
2589                                                 typeof(*nh), router_list_node);
2590         }
2591         if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2592                 return NULL;
2593         return list_next_entry(nh, router_list_node);
2594 }
2595
2596 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2597 {
2598         return nh->offloaded;
2599 }
2600
2601 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2602 {
2603         if (!nh->offloaded)
2604                 return NULL;
2605         return nh->neigh_entry->ha;
2606 }
2607
2608 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2609                              u32 *p_adj_size, u32 *p_adj_hash_index)
2610 {
2611         struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2612         u32 adj_hash_index = 0;
2613         int i;
2614
2615         if (!nh->offloaded || !nh_grp->adj_index_valid)
2616                 return -EINVAL;
2617
2618         *p_adj_index = nh_grp->adj_index;
2619         *p_adj_size = nh_grp->ecmp_size;
2620
2621         for (i = 0; i < nh_grp->count; i++) {
2622                 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2623
2624                 if (nh_iter == nh)
2625                         break;
2626                 if (nh_iter->offloaded)
2627                         adj_hash_index += nh_iter->num_adj_entries;
2628         }
2629
2630         *p_adj_hash_index = adj_hash_index;
2631         return 0;
2632 }
2633
2634 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2635 {
2636         return nh->rif;
2637 }
2638
2639 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2640 {
2641         struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2642         int i;
2643
2644         for (i = 0; i < nh_grp->count; i++) {
2645                 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2646
2647                 if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2648                         return true;
2649         }
2650         return false;
2651 }
2652
2653 static struct fib_info *
2654 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2655 {
2656         return nh_grp->priv;
2657 }
2658
2659 struct mlxsw_sp_nexthop_group_cmp_arg {
2660         enum mlxsw_sp_l3proto proto;
2661         union {
2662                 struct fib_info *fi;
2663                 struct mlxsw_sp_fib6_entry *fib6_entry;
2664         };
2665 };
2666
2667 static bool
2668 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2669                                     const struct in6_addr *gw, int ifindex,
2670                                     int weight)
2671 {
2672         int i;
2673
2674         for (i = 0; i < nh_grp->count; i++) {
2675                 const struct mlxsw_sp_nexthop *nh;
2676
2677                 nh = &nh_grp->nexthops[i];
2678                 if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2679                     ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2680                         return true;
2681         }
2682
2683         return false;
2684 }
2685
2686 static bool
2687 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2688                             const struct mlxsw_sp_fib6_entry *fib6_entry)
2689 {
2690         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2691
2692         if (nh_grp->count != fib6_entry->nrt6)
2693                 return false;
2694
2695         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2696                 struct in6_addr *gw;
2697                 int ifindex, weight;
2698
2699                 ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex;
2700                 weight = mlxsw_sp_rt6->rt->rt6i_nh_weight;
2701                 gw = &mlxsw_sp_rt6->rt->rt6i_gateway;
2702                 if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2703                                                          weight))
2704                         return false;
2705         }
2706
2707         return true;
2708 }
2709
2710 static int
2711 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2712 {
2713         const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2714         const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2715
2716         switch (cmp_arg->proto) {
2717         case MLXSW_SP_L3_PROTO_IPV4:
2718                 return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2719         case MLXSW_SP_L3_PROTO_IPV6:
2720                 return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2721                                                     cmp_arg->fib6_entry);
2722         default:
2723                 WARN_ON(1);
2724                 return 1;
2725         }
2726 }
2727
2728 static int
2729 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2730 {
2731         return nh_grp->neigh_tbl->family;
2732 }
2733
2734 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2735 {
2736         const struct mlxsw_sp_nexthop_group *nh_grp = data;
2737         const struct mlxsw_sp_nexthop *nh;
2738         struct fib_info *fi;
2739         unsigned int val;
2740         int i;
2741
2742         switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2743         case AF_INET:
2744                 fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2745                 return jhash(&fi, sizeof(fi), seed);
2746         case AF_INET6:
2747                 val = nh_grp->count;
2748                 for (i = 0; i < nh_grp->count; i++) {
2749                         nh = &nh_grp->nexthops[i];
2750                         val ^= nh->ifindex;
2751                 }
2752                 return jhash(&val, sizeof(val), seed);
2753         default:
2754                 WARN_ON(1);
2755                 return 0;
2756         }
2757 }
2758
2759 static u32
2760 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2761 {
2762         unsigned int val = fib6_entry->nrt6;
2763         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2764         struct net_device *dev;
2765
2766         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2767                 dev = mlxsw_sp_rt6->rt->dst.dev;
2768                 val ^= dev->ifindex;
2769         }
2770
2771         return jhash(&val, sizeof(val), seed);
2772 }
2773
2774 static u32
2775 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2776 {
2777         const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2778
2779         switch (cmp_arg->proto) {
2780         case MLXSW_SP_L3_PROTO_IPV4:
2781                 return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2782         case MLXSW_SP_L3_PROTO_IPV6:
2783                 return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2784         default:
2785                 WARN_ON(1);
2786                 return 0;
2787         }
2788 }
2789
2790 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2791         .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2792         .hashfn      = mlxsw_sp_nexthop_group_hash,
2793         .obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2794         .obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2795 };
2796
2797 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2798                                          struct mlxsw_sp_nexthop_group *nh_grp)
2799 {
2800         if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2801             !nh_grp->gateway)
2802                 return 0;
2803
2804         return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2805                                       &nh_grp->ht_node,
2806                                       mlxsw_sp_nexthop_group_ht_params);
2807 }
2808
2809 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2810                                           struct mlxsw_sp_nexthop_group *nh_grp)
2811 {
2812         if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2813             !nh_grp->gateway)
2814                 return;
2815
2816         rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2817                                &nh_grp->ht_node,
2818                                mlxsw_sp_nexthop_group_ht_params);
2819 }
2820
2821 static struct mlxsw_sp_nexthop_group *
2822 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
2823                                struct fib_info *fi)
2824 {
2825         struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2826
2827         cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
2828         cmp_arg.fi = fi;
2829         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2830                                       &cmp_arg,
2831                                       mlxsw_sp_nexthop_group_ht_params);
2832 }
2833
2834 static struct mlxsw_sp_nexthop_group *
2835 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
2836                                struct mlxsw_sp_fib6_entry *fib6_entry)
2837 {
2838         struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2839
2840         cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
2841         cmp_arg.fib6_entry = fib6_entry;
2842         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2843                                       &cmp_arg,
2844                                       mlxsw_sp_nexthop_group_ht_params);
2845 }
2846
2847 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
2848         .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
2849         .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
2850         .key_len = sizeof(struct mlxsw_sp_nexthop_key),
2851 };
2852
2853 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
2854                                    struct mlxsw_sp_nexthop *nh)
2855 {
2856         return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
2857                                       &nh->ht_node, mlxsw_sp_nexthop_ht_params);
2858 }
2859
2860 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
2861                                     struct mlxsw_sp_nexthop *nh)
2862 {
2863         rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
2864                                mlxsw_sp_nexthop_ht_params);
2865 }
2866
2867 static struct mlxsw_sp_nexthop *
2868 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
2869                         struct mlxsw_sp_nexthop_key key)
2870 {
2871         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
2872                                       mlxsw_sp_nexthop_ht_params);
2873 }
2874
2875 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
2876                                              const struct mlxsw_sp_fib *fib,
2877                                              u32 adj_index, u16 ecmp_size,
2878                                              u32 new_adj_index,
2879                                              u16 new_ecmp_size)
2880 {
2881         char raleu_pl[MLXSW_REG_RALEU_LEN];
2882
2883         mlxsw_reg_raleu_pack(raleu_pl,
2884                              (enum mlxsw_reg_ralxx_protocol) fib->proto,
2885                              fib->vr->id, adj_index, ecmp_size, new_adj_index,
2886                              new_ecmp_size);
2887         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
2888 }
2889
2890 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
2891                                           struct mlxsw_sp_nexthop_group *nh_grp,
2892                                           u32 old_adj_index, u16 old_ecmp_size)
2893 {
2894         struct mlxsw_sp_fib_entry *fib_entry;
2895         struct mlxsw_sp_fib *fib = NULL;
2896         int err;
2897
2898         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2899                 if (fib == fib_entry->fib_node->fib)
2900                         continue;
2901                 fib = fib_entry->fib_node->fib;
2902                 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
2903                                                         old_adj_index,
2904                                                         old_ecmp_size,
2905                                                         nh_grp->adj_index,
2906                                                         nh_grp->ecmp_size);
2907                 if (err)
2908                         return err;
2909         }
2910         return 0;
2911 }
2912
2913 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2914                                      struct mlxsw_sp_nexthop *nh)
2915 {
2916         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
2917         char ratr_pl[MLXSW_REG_RATR_LEN];
2918
2919         mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
2920                             true, MLXSW_REG_RATR_TYPE_ETHERNET,
2921                             adj_index, neigh_entry->rif);
2922         mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
2923         if (nh->counter_valid)
2924                 mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
2925         else
2926                 mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
2927
2928         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
2929 }
2930
2931 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2932                             struct mlxsw_sp_nexthop *nh)
2933 {
2934         int i;
2935
2936         for (i = 0; i < nh->num_adj_entries; i++) {
2937                 int err;
2938
2939                 err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
2940                 if (err)
2941                         return err;
2942         }
2943
2944         return 0;
2945 }
2946
2947 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2948                                           u32 adj_index,
2949                                           struct mlxsw_sp_nexthop *nh)
2950 {
2951         const struct mlxsw_sp_ipip_ops *ipip_ops;
2952
2953         ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
2954         return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
2955 }
2956
2957 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2958                                         u32 adj_index,
2959                                         struct mlxsw_sp_nexthop *nh)
2960 {
2961         int i;
2962
2963         for (i = 0; i < nh->num_adj_entries; i++) {
2964                 int err;
2965
2966                 err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
2967                                                      nh);
2968                 if (err)
2969                         return err;
2970         }
2971
2972         return 0;
2973 }
2974
2975 static int
2976 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
2977                               struct mlxsw_sp_nexthop_group *nh_grp,
2978                               bool reallocate)
2979 {
2980         u32 adj_index = nh_grp->adj_index; /* base */
2981         struct mlxsw_sp_nexthop *nh;
2982         int i;
2983         int err;
2984
2985         for (i = 0; i < nh_grp->count; i++) {
2986                 nh = &nh_grp->nexthops[i];
2987
2988                 if (!nh->should_offload) {
2989                         nh->offloaded = 0;
2990                         continue;
2991                 }
2992
2993                 if (nh->update || reallocate) {
2994                         switch (nh->type) {
2995                         case MLXSW_SP_NEXTHOP_TYPE_ETH:
2996                                 err = mlxsw_sp_nexthop_update
2997                                             (mlxsw_sp, adj_index, nh);
2998                                 break;
2999                         case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3000                                 err = mlxsw_sp_nexthop_ipip_update
3001                                             (mlxsw_sp, adj_index, nh);
3002                                 break;
3003                         }
3004                         if (err)
3005                                 return err;
3006                         nh->update = 0;
3007                         nh->offloaded = 1;
3008                 }
3009                 adj_index += nh->num_adj_entries;
3010         }
3011         return 0;
3012 }
3013
3014 static bool
3015 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3016                                  const struct mlxsw_sp_fib_entry *fib_entry);
3017
3018 static int
3019 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3020                                     struct mlxsw_sp_nexthop_group *nh_grp)
3021 {
3022         struct mlxsw_sp_fib_entry *fib_entry;
3023         int err;
3024
3025         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3026                 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3027                                                       fib_entry))
3028                         continue;
3029                 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3030                 if (err)
3031                         return err;
3032         }
3033         return 0;
3034 }
3035
3036 static void
3037 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3038                                    enum mlxsw_reg_ralue_op op, int err);
3039
3040 static void
3041 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3042 {
3043         enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3044         struct mlxsw_sp_fib_entry *fib_entry;
3045
3046         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3047                 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3048                                                       fib_entry))
3049                         continue;
3050                 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3051         }
3052 }
3053
3054 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3055 {
3056         /* Valid sizes for an adjacency group are:
3057          * 1-64, 512, 1024, 2048 and 4096.
3058          */
3059         if (*p_adj_grp_size <= 64)
3060                 return;
3061         else if (*p_adj_grp_size <= 512)
3062                 *p_adj_grp_size = 512;
3063         else if (*p_adj_grp_size <= 1024)
3064                 *p_adj_grp_size = 1024;
3065         else if (*p_adj_grp_size <= 2048)
3066                 *p_adj_grp_size = 2048;
3067         else
3068                 *p_adj_grp_size = 4096;
3069 }
3070
3071 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3072                                              unsigned int alloc_size)
3073 {
3074         if (alloc_size >= 4096)
3075                 *p_adj_grp_size = 4096;
3076         else if (alloc_size >= 2048)
3077                 *p_adj_grp_size = 2048;
3078         else if (alloc_size >= 1024)
3079                 *p_adj_grp_size = 1024;
3080         else if (alloc_size >= 512)
3081                 *p_adj_grp_size = 512;
3082 }
3083
3084 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3085                                      u16 *p_adj_grp_size)
3086 {
3087         unsigned int alloc_size;
3088         int err;
3089
3090         /* Round up the requested group size to the next size supported
3091          * by the device and make sure the request can be satisfied.
3092          */
3093         mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3094         err = mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp, *p_adj_grp_size,
3095                                              &alloc_size);
3096         if (err)
3097                 return err;
3098         /* It is possible the allocation results in more allocated
3099          * entries than requested. Try to use as much of them as
3100          * possible.
3101          */
3102         mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3103
3104         return 0;
3105 }
3106
3107 static void
3108 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3109 {
3110         int i, g = 0, sum_norm_weight = 0;
3111         struct mlxsw_sp_nexthop *nh;
3112
3113         for (i = 0; i < nh_grp->count; i++) {
3114                 nh = &nh_grp->nexthops[i];
3115
3116                 if (!nh->should_offload)
3117                         continue;
3118                 if (g > 0)
3119                         g = gcd(nh->nh_weight, g);
3120                 else
3121                         g = nh->nh_weight;
3122         }
3123
3124         for (i = 0; i < nh_grp->count; i++) {
3125                 nh = &nh_grp->nexthops[i];
3126
3127                 if (!nh->should_offload)
3128                         continue;
3129                 nh->norm_nh_weight = nh->nh_weight / g;
3130                 sum_norm_weight += nh->norm_nh_weight;
3131         }
3132
3133         nh_grp->sum_norm_weight = sum_norm_weight;
3134 }
3135
3136 static void
3137 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3138 {
3139         int total = nh_grp->sum_norm_weight;
3140         u16 ecmp_size = nh_grp->ecmp_size;
3141         int i, weight = 0, lower_bound = 0;
3142
3143         for (i = 0; i < nh_grp->count; i++) {
3144                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3145                 int upper_bound;
3146
3147                 if (!nh->should_offload)
3148                         continue;
3149                 weight += nh->norm_nh_weight;
3150                 upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3151                 nh->num_adj_entries = upper_bound - lower_bound;
3152                 lower_bound = upper_bound;
3153         }
3154 }
3155
3156 static void
3157 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3158                                struct mlxsw_sp_nexthop_group *nh_grp)
3159 {
3160         u16 ecmp_size, old_ecmp_size;
3161         struct mlxsw_sp_nexthop *nh;
3162         bool offload_change = false;
3163         u32 adj_index;
3164         bool old_adj_index_valid;
3165         u32 old_adj_index;
3166         int i;
3167         int err;
3168
3169         if (!nh_grp->gateway) {
3170                 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3171                 return;
3172         }
3173
3174         for (i = 0; i < nh_grp->count; i++) {
3175                 nh = &nh_grp->nexthops[i];
3176
3177                 if (nh->should_offload != nh->offloaded) {
3178                         offload_change = true;
3179                         if (nh->should_offload)
3180                                 nh->update = 1;
3181                 }
3182         }
3183         if (!offload_change) {
3184                 /* Nothing was added or removed, so no need to reallocate. Just
3185                  * update MAC on existing adjacency indexes.
3186                  */
3187                 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3188                 if (err) {
3189                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3190                         goto set_trap;
3191                 }
3192                 return;
3193         }
3194         mlxsw_sp_nexthop_group_normalize(nh_grp);
3195         if (!nh_grp->sum_norm_weight)
3196                 /* No neigh of this group is connected so we just set
3197                  * the trap and let everthing flow through kernel.
3198                  */
3199                 goto set_trap;
3200
3201         ecmp_size = nh_grp->sum_norm_weight;
3202         err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3203         if (err)
3204                 /* No valid allocation size available. */
3205                 goto set_trap;
3206
3207         err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
3208         if (err) {
3209                 /* We ran out of KVD linear space, just set the
3210                  * trap and let everything flow through kernel.
3211                  */
3212                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3213                 goto set_trap;
3214         }
3215         old_adj_index_valid = nh_grp->adj_index_valid;
3216         old_adj_index = nh_grp->adj_index;
3217         old_ecmp_size = nh_grp->ecmp_size;
3218         nh_grp->adj_index_valid = 1;
3219         nh_grp->adj_index = adj_index;
3220         nh_grp->ecmp_size = ecmp_size;
3221         mlxsw_sp_nexthop_group_rebalance(nh_grp);
3222         err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3223         if (err) {
3224                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3225                 goto set_trap;
3226         }
3227
3228         if (!old_adj_index_valid) {
3229                 /* The trap was set for fib entries, so we have to call
3230                  * fib entry update to unset it and use adjacency index.
3231                  */
3232                 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3233                 if (err) {
3234                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3235                         goto set_trap;
3236                 }
3237                 return;
3238         }
3239
3240         err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3241                                              old_adj_index, old_ecmp_size);
3242         mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
3243         if (err) {
3244                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3245                 goto set_trap;
3246         }
3247
3248         /* Offload state within the group changed, so update the flags. */
3249         mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3250
3251         return;
3252
3253 set_trap:
3254         old_adj_index_valid = nh_grp->adj_index_valid;
3255         nh_grp->adj_index_valid = 0;
3256         for (i = 0; i < nh_grp->count; i++) {
3257                 nh = &nh_grp->nexthops[i];
3258                 nh->offloaded = 0;
3259         }
3260         err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3261         if (err)
3262                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3263         if (old_adj_index_valid)
3264                 mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
3265 }
3266
3267 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3268                                             bool removing)
3269 {
3270         if (!removing)
3271                 nh->should_offload = 1;
3272         else
3273                 nh->should_offload = 0;
3274         nh->update = 1;
3275 }
3276
3277 static void
3278 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3279                               struct mlxsw_sp_neigh_entry *neigh_entry,
3280                               bool removing)
3281 {
3282         struct mlxsw_sp_nexthop *nh;
3283
3284         list_for_each_entry(nh, &neigh_entry->nexthop_list,
3285                             neigh_list_node) {
3286                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3287                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3288         }
3289 }
3290
3291 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3292                                       struct mlxsw_sp_rif *rif)
3293 {
3294         if (nh->rif)
3295                 return;
3296
3297         nh->rif = rif;
3298         list_add(&nh->rif_list_node, &rif->nexthop_list);
3299 }
3300
3301 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3302 {
3303         if (!nh->rif)
3304                 return;
3305
3306         list_del(&nh->rif_list_node);
3307         nh->rif = NULL;
3308 }
3309
3310 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3311                                        struct mlxsw_sp_nexthop *nh)
3312 {
3313         struct mlxsw_sp_neigh_entry *neigh_entry;
3314         struct neighbour *n;
3315         u8 nud_state, dead;
3316         int err;
3317
3318         if (!nh->nh_grp->gateway || nh->neigh_entry)
3319                 return 0;
3320
3321         /* Take a reference of neigh here ensuring that neigh would
3322          * not be destructed before the nexthop entry is finished.
3323          * The reference is taken either in neigh_lookup() or
3324          * in neigh_create() in case n is not found.
3325          */
3326         n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3327         if (!n) {
3328                 n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3329                                  nh->rif->dev);
3330                 if (IS_ERR(n))
3331                         return PTR_ERR(n);
3332                 neigh_event_send(n, NULL);
3333         }
3334         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3335         if (!neigh_entry) {
3336                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3337                 if (IS_ERR(neigh_entry)) {
3338                         err = -EINVAL;
3339                         goto err_neigh_entry_create;
3340                 }
3341         }
3342
3343         /* If that is the first nexthop connected to that neigh, add to
3344          * nexthop_neighs_list
3345          */
3346         if (list_empty(&neigh_entry->nexthop_list))
3347                 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3348                               &mlxsw_sp->router->nexthop_neighs_list);
3349
3350         nh->neigh_entry = neigh_entry;
3351         list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3352         read_lock_bh(&n->lock);
3353         nud_state = n->nud_state;
3354         dead = n->dead;
3355         read_unlock_bh(&n->lock);
3356         __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3357
3358         return 0;
3359
3360 err_neigh_entry_create:
3361         neigh_release(n);
3362         return err;
3363 }
3364
3365 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3366                                         struct mlxsw_sp_nexthop *nh)
3367 {
3368         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3369         struct neighbour *n;
3370
3371         if (!neigh_entry)
3372                 return;
3373         n = neigh_entry->key.n;
3374
3375         __mlxsw_sp_nexthop_neigh_update(nh, true);
3376         list_del(&nh->neigh_list_node);
3377         nh->neigh_entry = NULL;
3378
3379         /* If that is the last nexthop connected to that neigh, remove from
3380          * nexthop_neighs_list
3381          */
3382         if (list_empty(&neigh_entry->nexthop_list))
3383                 list_del(&neigh_entry->nexthop_neighs_list_node);
3384
3385         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3386                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3387
3388         neigh_release(n);
3389 }
3390
3391 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3392 {
3393         struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3394
3395         return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3396 }
3397
3398 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3399                                        struct mlxsw_sp_nexthop *nh,
3400                                        struct mlxsw_sp_ipip_entry *ipip_entry)
3401 {
3402         bool removing;
3403
3404         if (!nh->nh_grp->gateway || nh->ipip_entry)
3405                 return;
3406
3407         nh->ipip_entry = ipip_entry;
3408         removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3409         __mlxsw_sp_nexthop_neigh_update(nh, removing);
3410         mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3411 }
3412
3413 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3414                                        struct mlxsw_sp_nexthop *nh)
3415 {
3416         struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3417
3418         if (!ipip_entry)
3419                 return;
3420
3421         __mlxsw_sp_nexthop_neigh_update(nh, true);
3422         nh->ipip_entry = NULL;
3423 }
3424
3425 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3426                                         const struct fib_nh *fib_nh,
3427                                         enum mlxsw_sp_ipip_type *p_ipipt)
3428 {
3429         struct net_device *dev = fib_nh->nh_dev;
3430
3431         return dev &&
3432                fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3433                mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3434 }
3435
3436 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3437                                        struct mlxsw_sp_nexthop *nh)
3438 {
3439         switch (nh->type) {
3440         case MLXSW_SP_NEXTHOP_TYPE_ETH:
3441                 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3442                 mlxsw_sp_nexthop_rif_fini(nh);
3443                 break;
3444         case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3445                 mlxsw_sp_nexthop_rif_fini(nh);
3446                 mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3447                 break;
3448         }
3449 }
3450
3451 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3452                                        struct mlxsw_sp_nexthop *nh,
3453                                        struct fib_nh *fib_nh)
3454 {
3455         const struct mlxsw_sp_ipip_ops *ipip_ops;
3456         struct net_device *dev = fib_nh->nh_dev;
3457         struct mlxsw_sp_ipip_entry *ipip_entry;
3458         struct mlxsw_sp_rif *rif;
3459         int err;
3460
3461         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3462         if (ipip_entry) {
3463                 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3464                 if (ipip_ops->can_offload(mlxsw_sp, dev,
3465                                           MLXSW_SP_L3_PROTO_IPV4)) {
3466                         nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3467                         mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3468                         return 0;
3469                 }
3470         }
3471
3472         nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3473         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3474         if (!rif)
3475                 return 0;
3476
3477         mlxsw_sp_nexthop_rif_init(nh, rif);
3478         err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3479         if (err)
3480                 goto err_neigh_init;
3481
3482         return 0;
3483
3484 err_neigh_init:
3485         mlxsw_sp_nexthop_rif_fini(nh);
3486         return err;
3487 }
3488
3489 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3490                                         struct mlxsw_sp_nexthop *nh)
3491 {
3492         mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3493 }
3494
3495 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3496                                   struct mlxsw_sp_nexthop_group *nh_grp,
3497                                   struct mlxsw_sp_nexthop *nh,
3498                                   struct fib_nh *fib_nh)
3499 {
3500         struct net_device *dev = fib_nh->nh_dev;
3501         struct in_device *in_dev;
3502         int err;
3503
3504         nh->nh_grp = nh_grp;
3505         nh->key.fib_nh = fib_nh;
3506 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3507         nh->nh_weight = fib_nh->nh_weight;
3508 #else
3509         nh->nh_weight = 1;
3510 #endif
3511         memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3512         err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3513         if (err)
3514                 return err;
3515
3516         mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3517         list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3518
3519         if (!dev)
3520                 return 0;
3521
3522         in_dev = __in_dev_get_rtnl(dev);
3523         if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3524             fib_nh->nh_flags & RTNH_F_LINKDOWN)
3525                 return 0;
3526
3527         err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3528         if (err)
3529                 goto err_nexthop_neigh_init;
3530
3531         return 0;
3532
3533 err_nexthop_neigh_init:
3534         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3535         return err;
3536 }
3537
3538 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3539                                    struct mlxsw_sp_nexthop *nh)
3540 {
3541         mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3542         list_del(&nh->router_list_node);
3543         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3544         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3545 }
3546
3547 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3548                                     unsigned long event, struct fib_nh *fib_nh)
3549 {
3550         struct mlxsw_sp_nexthop_key key;
3551         struct mlxsw_sp_nexthop *nh;
3552
3553         if (mlxsw_sp->router->aborted)
3554                 return;
3555
3556         key.fib_nh = fib_nh;
3557         nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3558         if (WARN_ON_ONCE(!nh))
3559                 return;
3560
3561         switch (event) {
3562         case FIB_EVENT_NH_ADD:
3563                 mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3564                 break;
3565         case FIB_EVENT_NH_DEL:
3566                 mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3567                 break;
3568         }
3569
3570         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3571 }
3572
3573 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3574                                         struct mlxsw_sp_rif *rif)
3575 {
3576         struct mlxsw_sp_nexthop *nh;
3577         bool removing;
3578
3579         list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3580                 switch (nh->type) {
3581                 case MLXSW_SP_NEXTHOP_TYPE_ETH:
3582                         removing = false;
3583                         break;
3584                 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3585                         removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3586                         break;
3587                 default:
3588                         WARN_ON(1);
3589                         continue;
3590                 }
3591
3592                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3593                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3594         }
3595 }
3596
3597 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3598                                          struct mlxsw_sp_rif *old_rif,
3599                                          struct mlxsw_sp_rif *new_rif)
3600 {
3601         struct mlxsw_sp_nexthop *nh;
3602
3603         list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3604         list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3605                 nh->rif = new_rif;
3606         mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3607 }
3608
3609 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3610                                            struct mlxsw_sp_rif *rif)
3611 {
3612         struct mlxsw_sp_nexthop *nh, *tmp;
3613
3614         list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3615                 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3616                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3617         }
3618 }
3619
3620 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3621                                    const struct fib_info *fi)
3622 {
3623         return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3624                mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3625 }
3626
3627 static struct mlxsw_sp_nexthop_group *
3628 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3629 {
3630         struct mlxsw_sp_nexthop_group *nh_grp;
3631         struct mlxsw_sp_nexthop *nh;
3632         struct fib_nh *fib_nh;
3633         size_t alloc_size;
3634         int i;
3635         int err;
3636
3637         alloc_size = sizeof(*nh_grp) +
3638                      fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
3639         nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3640         if (!nh_grp)
3641                 return ERR_PTR(-ENOMEM);
3642         nh_grp->priv = fi;
3643         INIT_LIST_HEAD(&nh_grp->fib_list);
3644         nh_grp->neigh_tbl = &arp_tbl;
3645
3646         nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3647         nh_grp->count = fi->fib_nhs;
3648         fib_info_hold(fi);
3649         for (i = 0; i < nh_grp->count; i++) {
3650                 nh = &nh_grp->nexthops[i];
3651                 fib_nh = &fi->fib_nh[i];
3652                 err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3653                 if (err)
3654                         goto err_nexthop4_init;
3655         }
3656         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3657         if (err)
3658                 goto err_nexthop_group_insert;
3659         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3660         return nh_grp;
3661
3662 err_nexthop_group_insert:
3663 err_nexthop4_init:
3664         for (i--; i >= 0; i--) {
3665                 nh = &nh_grp->nexthops[i];
3666                 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3667         }
3668         fib_info_put(fi);
3669         kfree(nh_grp);
3670         return ERR_PTR(err);
3671 }
3672
3673 static void
3674 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3675                                 struct mlxsw_sp_nexthop_group *nh_grp)
3676 {
3677         struct mlxsw_sp_nexthop *nh;
3678         int i;
3679
3680         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3681         for (i = 0; i < nh_grp->count; i++) {
3682                 nh = &nh_grp->nexthops[i];
3683                 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3684         }
3685         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3686         WARN_ON_ONCE(nh_grp->adj_index_valid);
3687         fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3688         kfree(nh_grp);
3689 }
3690
3691 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3692                                        struct mlxsw_sp_fib_entry *fib_entry,
3693                                        struct fib_info *fi)
3694 {
3695         struct mlxsw_sp_nexthop_group *nh_grp;
3696
3697         nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3698         if (!nh_grp) {
3699                 nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3700                 if (IS_ERR(nh_grp))
3701                         return PTR_ERR(nh_grp);
3702         }
3703         list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3704         fib_entry->nh_group = nh_grp;
3705         return 0;
3706 }
3707
3708 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3709                                         struct mlxsw_sp_fib_entry *fib_entry)
3710 {
3711         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3712
3713         list_del(&fib_entry->nexthop_group_node);
3714         if (!list_empty(&nh_grp->fib_list))
3715                 return;
3716         mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3717 }
3718
3719 static bool
3720 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3721 {
3722         struct mlxsw_sp_fib4_entry *fib4_entry;
3723
3724         fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3725                                   common);
3726         return !fib4_entry->tos;
3727 }
3728
3729 static bool
3730 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3731 {
3732         struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3733
3734         switch (fib_entry->fib_node->fib->proto) {
3735         case MLXSW_SP_L3_PROTO_IPV4:
3736                 if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3737                         return false;
3738                 break;
3739         case MLXSW_SP_L3_PROTO_IPV6:
3740                 break;
3741         }
3742
3743         switch (fib_entry->type) {
3744         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3745                 return !!nh_group->adj_index_valid;
3746         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3747                 return !!nh_group->nh_rif;
3748         case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3749                 return true;
3750         default:
3751                 return false;
3752         }
3753 }
3754
3755 static struct mlxsw_sp_nexthop *
3756 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3757                      const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3758 {
3759         int i;
3760
3761         for (i = 0; i < nh_grp->count; i++) {
3762                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3763                 struct rt6_info *rt = mlxsw_sp_rt6->rt;
3764
3765                 if (nh->rif && nh->rif->dev == rt->dst.dev &&
3766                     ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3767                                     &rt->rt6i_gateway))
3768                         return nh;
3769                 continue;
3770         }
3771
3772         return NULL;
3773 }
3774
3775 static void
3776 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3777 {
3778         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3779         int i;
3780
3781         if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3782             fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
3783                 nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3784                 return;
3785         }
3786
3787         for (i = 0; i < nh_grp->count; i++) {
3788                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3789
3790                 if (nh->offloaded)
3791                         nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3792                 else
3793                         nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3794         }
3795 }
3796
3797 static void
3798 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3799 {
3800         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3801         int i;
3802
3803         for (i = 0; i < nh_grp->count; i++) {
3804                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3805
3806                 nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3807         }
3808 }
3809
3810 static void
3811 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3812 {
3813         struct mlxsw_sp_fib6_entry *fib6_entry;
3814         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3815
3816         fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3817                                   common);
3818
3819         if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
3820                 list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
3821                                  list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3822                 return;
3823         }
3824
3825         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3826                 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3827                 struct mlxsw_sp_nexthop *nh;
3828
3829                 nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3830                 if (nh && nh->offloaded)
3831                         mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3832                 else
3833                         mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3834         }
3835 }
3836
3837 static void
3838 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3839 {
3840         struct mlxsw_sp_fib6_entry *fib6_entry;
3841         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3842
3843         fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3844                                   common);
3845         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3846                 struct rt6_info *rt = mlxsw_sp_rt6->rt;
3847
3848                 rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3849         }
3850 }
3851
3852 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3853 {
3854         switch (fib_entry->fib_node->fib->proto) {
3855         case MLXSW_SP_L3_PROTO_IPV4:
3856                 mlxsw_sp_fib4_entry_offload_set(fib_entry);
3857                 break;
3858         case MLXSW_SP_L3_PROTO_IPV6:
3859                 mlxsw_sp_fib6_entry_offload_set(fib_entry);
3860                 break;
3861         }
3862 }
3863
3864 static void
3865 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3866 {
3867         switch (fib_entry->fib_node->fib->proto) {
3868         case MLXSW_SP_L3_PROTO_IPV4:
3869                 mlxsw_sp_fib4_entry_offload_unset(fib_entry);
3870                 break;
3871         case MLXSW_SP_L3_PROTO_IPV6:
3872                 mlxsw_sp_fib6_entry_offload_unset(fib_entry);
3873                 break;
3874         }
3875 }
3876
3877 static void
3878 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3879                                    enum mlxsw_reg_ralue_op op, int err)
3880 {
3881         switch (op) {
3882         case MLXSW_REG_RALUE_OP_WRITE_DELETE:
3883                 return mlxsw_sp_fib_entry_offload_unset(fib_entry);
3884         case MLXSW_REG_RALUE_OP_WRITE_WRITE:
3885                 if (err)
3886                         return;
3887                 if (mlxsw_sp_fib_entry_should_offload(fib_entry))
3888                         mlxsw_sp_fib_entry_offload_set(fib_entry);
3889                 else
3890                         mlxsw_sp_fib_entry_offload_unset(fib_entry);
3891                 return;
3892         default:
3893                 return;
3894         }
3895 }
3896
3897 static void
3898 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
3899                               const struct mlxsw_sp_fib_entry *fib_entry,
3900                               enum mlxsw_reg_ralue_op op)
3901 {
3902         struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
3903         enum mlxsw_reg_ralxx_protocol proto;
3904         u32 *p_dip;
3905
3906         proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
3907
3908         switch (fib->proto) {
3909         case MLXSW_SP_L3_PROTO_IPV4:
3910                 p_dip = (u32 *) fib_entry->fib_node->key.addr;
3911                 mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
3912                                       fib_entry->fib_node->key.prefix_len,
3913                                       *p_dip);
3914                 break;
3915         case MLXSW_SP_L3_PROTO_IPV6:
3916                 mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
3917                                       fib_entry->fib_node->key.prefix_len,
3918                                       fib_entry->fib_node->key.addr);
3919                 break;
3920         }
3921 }
3922
3923 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
3924                                         struct mlxsw_sp_fib_entry *fib_entry,
3925                                         enum mlxsw_reg_ralue_op op)
3926 {
3927         char ralue_pl[MLXSW_REG_RALUE_LEN];
3928         enum mlxsw_reg_ralue_trap_action trap_action;
3929         u16 trap_id = 0;
3930         u32 adjacency_index = 0;
3931         u16 ecmp_size = 0;
3932
3933         /* In case the nexthop group adjacency index is valid, use it
3934          * with provided ECMP size. Otherwise, setup trap and pass
3935          * traffic to kernel.
3936          */
3937         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3938                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3939                 adjacency_index = fib_entry->nh_group->adj_index;
3940                 ecmp_size = fib_entry->nh_group->ecmp_size;
3941         } else {
3942                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3943                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3944         }
3945
3946         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3947         mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
3948                                         adjacency_index, ecmp_size);
3949         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3950 }
3951
3952 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
3953                                        struct mlxsw_sp_fib_entry *fib_entry,
3954                                        enum mlxsw_reg_ralue_op op)
3955 {
3956         struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
3957         enum mlxsw_reg_ralue_trap_action trap_action;
3958         char ralue_pl[MLXSW_REG_RALUE_LEN];
3959         u16 trap_id = 0;
3960         u16 rif_index = 0;
3961
3962         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3963                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3964                 rif_index = rif->rif_index;
3965         } else {
3966                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3967                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3968         }
3969
3970         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3971         mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
3972                                        rif_index);
3973         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3974 }
3975
3976 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
3977                                       struct mlxsw_sp_fib_entry *fib_entry,
3978                                       enum mlxsw_reg_ralue_op op)
3979 {
3980         char ralue_pl[MLXSW_REG_RALUE_LEN];
3981
3982         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3983         mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
3984         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3985 }
3986
3987 static int
3988 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
3989                                  struct mlxsw_sp_fib_entry *fib_entry,
3990                                  enum mlxsw_reg_ralue_op op)
3991 {
3992         struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
3993         const struct mlxsw_sp_ipip_ops *ipip_ops;
3994
3995         if (WARN_ON(!ipip_entry))
3996                 return -EINVAL;
3997
3998         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3999         return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
4000                                       fib_entry->decap.tunnel_index);
4001 }
4002
4003 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4004                                    struct mlxsw_sp_fib_entry *fib_entry,
4005                                    enum mlxsw_reg_ralue_op op)
4006 {
4007         switch (fib_entry->type) {
4008         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4009                 return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4010         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4011                 return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4012         case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4013                 return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4014         case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4015                 return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4016                                                         fib_entry, op);
4017         }
4018         return -EINVAL;
4019 }
4020
4021 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4022                                  struct mlxsw_sp_fib_entry *fib_entry,
4023                                  enum mlxsw_reg_ralue_op op)
4024 {
4025         int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4026
4027         mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
4028
4029         return err;
4030 }
4031
4032 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4033                                      struct mlxsw_sp_fib_entry *fib_entry)
4034 {
4035         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4036                                      MLXSW_REG_RALUE_OP_WRITE_WRITE);
4037 }
4038
4039 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4040                                   struct mlxsw_sp_fib_entry *fib_entry)
4041 {
4042         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4043                                      MLXSW_REG_RALUE_OP_WRITE_DELETE);
4044 }
4045
4046 static int
4047 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4048                              const struct fib_entry_notifier_info *fen_info,
4049                              struct mlxsw_sp_fib_entry *fib_entry)
4050 {
4051         union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4052         struct net_device *dev = fen_info->fi->fib_dev;
4053         struct mlxsw_sp_ipip_entry *ipip_entry;
4054         struct fib_info *fi = fen_info->fi;
4055
4056         switch (fen_info->type) {
4057         case RTN_LOCAL:
4058                 ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4059                                                  MLXSW_SP_L3_PROTO_IPV4, dip);
4060                 if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4061                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4062                         return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4063                                                              fib_entry,
4064                                                              ipip_entry);
4065                 }
4066                 /* fall through */
4067         case RTN_BROADCAST:
4068                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4069                 return 0;
4070         case RTN_UNREACHABLE: /* fall through */
4071         case RTN_BLACKHOLE: /* fall through */
4072         case RTN_PROHIBIT:
4073                 /* Packets hitting these routes need to be trapped, but
4074                  * can do so with a lower priority than packets directed
4075                  * at the host, so use action type local instead of trap.
4076                  */
4077                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4078                 return 0;
4079         case RTN_UNICAST:
4080                 if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4081                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4082                 else
4083                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4084                 return 0;
4085         default:
4086                 return -EINVAL;
4087         }
4088 }
4089
4090 static struct mlxsw_sp_fib4_entry *
4091 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4092                            struct mlxsw_sp_fib_node *fib_node,
4093                            const struct fib_entry_notifier_info *fen_info)
4094 {
4095         struct mlxsw_sp_fib4_entry *fib4_entry;
4096         struct mlxsw_sp_fib_entry *fib_entry;
4097         int err;
4098
4099         fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4100         if (!fib4_entry)
4101                 return ERR_PTR(-ENOMEM);
4102         fib_entry = &fib4_entry->common;
4103
4104         err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4105         if (err)
4106                 goto err_fib4_entry_type_set;
4107
4108         err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4109         if (err)
4110                 goto err_nexthop4_group_get;
4111
4112         fib4_entry->prio = fen_info->fi->fib_priority;
4113         fib4_entry->tb_id = fen_info->tb_id;
4114         fib4_entry->type = fen_info->type;
4115         fib4_entry->tos = fen_info->tos;
4116
4117         fib_entry->fib_node = fib_node;
4118
4119         return fib4_entry;
4120
4121 err_nexthop4_group_get:
4122 err_fib4_entry_type_set:
4123         kfree(fib4_entry);
4124         return ERR_PTR(err);
4125 }
4126
4127 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4128                                         struct mlxsw_sp_fib4_entry *fib4_entry)
4129 {
4130         mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4131         kfree(fib4_entry);
4132 }
4133
4134 static struct mlxsw_sp_fib4_entry *
4135 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4136                            const struct fib_entry_notifier_info *fen_info)
4137 {
4138         struct mlxsw_sp_fib4_entry *fib4_entry;
4139         struct mlxsw_sp_fib_node *fib_node;
4140         struct mlxsw_sp_fib *fib;
4141         struct mlxsw_sp_vr *vr;
4142
4143         vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4144         if (!vr)
4145                 return NULL;
4146         fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4147
4148         fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4149                                             sizeof(fen_info->dst),
4150                                             fen_info->dst_len);
4151         if (!fib_node)
4152                 return NULL;
4153
4154         list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4155                 if (fib4_entry->tb_id == fen_info->tb_id &&
4156                     fib4_entry->tos == fen_info->tos &&
4157                     fib4_entry->type == fen_info->type &&
4158                     mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4159                     fen_info->fi) {
4160                         return fib4_entry;
4161                 }
4162         }
4163
4164         return NULL;
4165 }
4166
4167 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4168         .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4169         .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4170         .key_len = sizeof(struct mlxsw_sp_fib_key),
4171         .automatic_shrinking = true,
4172 };
4173
4174 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4175                                     struct mlxsw_sp_fib_node *fib_node)
4176 {
4177         return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4178                                       mlxsw_sp_fib_ht_params);
4179 }
4180
4181 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4182                                      struct mlxsw_sp_fib_node *fib_node)
4183 {
4184         rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4185                                mlxsw_sp_fib_ht_params);
4186 }
4187
4188 static struct mlxsw_sp_fib_node *
4189 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4190                          size_t addr_len, unsigned char prefix_len)
4191 {
4192         struct mlxsw_sp_fib_key key;
4193
4194         memset(&key, 0, sizeof(key));
4195         memcpy(key.addr, addr, addr_len);
4196         key.prefix_len = prefix_len;
4197         return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4198 }
4199
4200 static struct mlxsw_sp_fib_node *
4201 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4202                          size_t addr_len, unsigned char prefix_len)
4203 {
4204         struct mlxsw_sp_fib_node *fib_node;
4205
4206         fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4207         if (!fib_node)
4208                 return NULL;
4209
4210         INIT_LIST_HEAD(&fib_node->entry_list);
4211         list_add(&fib_node->list, &fib->node_list);
4212         memcpy(fib_node->key.addr, addr, addr_len);
4213         fib_node->key.prefix_len = prefix_len;
4214
4215         return fib_node;
4216 }
4217
4218 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4219 {
4220         list_del(&fib_node->list);
4221         WARN_ON(!list_empty(&fib_node->entry_list));
4222         kfree(fib_node);
4223 }
4224
4225 static bool
4226 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4227                                  const struct mlxsw_sp_fib_entry *fib_entry)
4228 {
4229         return list_first_entry(&fib_node->entry_list,
4230                                 struct mlxsw_sp_fib_entry, list) == fib_entry;
4231 }
4232
4233 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4234                                       struct mlxsw_sp_fib_node *fib_node)
4235 {
4236         struct mlxsw_sp_prefix_usage req_prefix_usage;
4237         struct mlxsw_sp_fib *fib = fib_node->fib;
4238         struct mlxsw_sp_lpm_tree *lpm_tree;
4239         int err;
4240
4241         lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4242         if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4243                 goto out;
4244
4245         mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4246         mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4247         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4248                                          fib->proto);
4249         if (IS_ERR(lpm_tree))
4250                 return PTR_ERR(lpm_tree);
4251
4252         err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4253         if (err)
4254                 goto err_lpm_tree_replace;
4255
4256 out:
4257         lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4258         return 0;
4259
4260 err_lpm_tree_replace:
4261         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4262         return err;
4263 }
4264
4265 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4266                                          struct mlxsw_sp_fib_node *fib_node)
4267 {
4268         struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4269         struct mlxsw_sp_prefix_usage req_prefix_usage;
4270         struct mlxsw_sp_fib *fib = fib_node->fib;
4271         int err;
4272
4273         if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4274                 return;
4275         /* Try to construct a new LPM tree from the current prefix usage
4276          * minus the unused one. If we fail, continue using the old one.
4277          */
4278         mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4279         mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4280                                     fib_node->key.prefix_len);
4281         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4282                                          fib->proto);
4283         if (IS_ERR(lpm_tree))
4284                 return;
4285
4286         err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4287         if (err)
4288                 goto err_lpm_tree_replace;
4289
4290         return;
4291
4292 err_lpm_tree_replace:
4293         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4294 }
4295
4296 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4297                                   struct mlxsw_sp_fib_node *fib_node,
4298                                   struct mlxsw_sp_fib *fib)
4299 {
4300         int err;
4301
4302         err = mlxsw_sp_fib_node_insert(fib, fib_node);
4303         if (err)
4304                 return err;
4305         fib_node->fib = fib;
4306
4307         err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4308         if (err)
4309                 goto err_fib_lpm_tree_link;
4310
4311         return 0;
4312
4313 err_fib_lpm_tree_link:
4314         fib_node->fib = NULL;
4315         mlxsw_sp_fib_node_remove(fib, fib_node);
4316         return err;
4317 }
4318
4319 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4320                                    struct mlxsw_sp_fib_node *fib_node)
4321 {
4322         struct mlxsw_sp_fib *fib = fib_node->fib;
4323
4324         mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4325         fib_node->fib = NULL;
4326         mlxsw_sp_fib_node_remove(fib, fib_node);
4327 }
4328
4329 static struct mlxsw_sp_fib_node *
4330 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4331                       size_t addr_len, unsigned char prefix_len,
4332                       enum mlxsw_sp_l3proto proto)
4333 {
4334         struct mlxsw_sp_fib_node *fib_node;
4335         struct mlxsw_sp_fib *fib;
4336         struct mlxsw_sp_vr *vr;
4337         int err;
4338
4339         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4340         if (IS_ERR(vr))
4341                 return ERR_CAST(vr);
4342         fib = mlxsw_sp_vr_fib(vr, proto);
4343
4344         fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4345         if (fib_node)
4346                 return fib_node;
4347
4348         fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4349         if (!fib_node) {
4350                 err = -ENOMEM;
4351                 goto err_fib_node_create;
4352         }
4353
4354         err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4355         if (err)
4356                 goto err_fib_node_init;
4357
4358         return fib_node;
4359
4360 err_fib_node_init:
4361         mlxsw_sp_fib_node_destroy(fib_node);
4362 err_fib_node_create:
4363         mlxsw_sp_vr_put(mlxsw_sp, vr);
4364         return ERR_PTR(err);
4365 }
4366
4367 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4368                                   struct mlxsw_sp_fib_node *fib_node)
4369 {
4370         struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4371
4372         if (!list_empty(&fib_node->entry_list))
4373                 return;
4374         mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4375         mlxsw_sp_fib_node_destroy(fib_node);
4376         mlxsw_sp_vr_put(mlxsw_sp, vr);
4377 }
4378
4379 static struct mlxsw_sp_fib4_entry *
4380 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4381                               const struct mlxsw_sp_fib4_entry *new4_entry)
4382 {
4383         struct mlxsw_sp_fib4_entry *fib4_entry;
4384
4385         list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4386                 if (fib4_entry->tb_id > new4_entry->tb_id)
4387                         continue;
4388                 if (fib4_entry->tb_id != new4_entry->tb_id)
4389                         break;
4390                 if (fib4_entry->tos > new4_entry->tos)
4391                         continue;
4392                 if (fib4_entry->prio >= new4_entry->prio ||
4393                     fib4_entry->tos < new4_entry->tos)
4394                         return fib4_entry;
4395         }
4396
4397         return NULL;
4398 }
4399
4400 static int
4401 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4402                                struct mlxsw_sp_fib4_entry *new4_entry)
4403 {
4404         struct mlxsw_sp_fib_node *fib_node;
4405
4406         if (WARN_ON(!fib4_entry))
4407                 return -EINVAL;
4408
4409         fib_node = fib4_entry->common.fib_node;
4410         list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4411                                  common.list) {
4412                 if (fib4_entry->tb_id != new4_entry->tb_id ||
4413                     fib4_entry->tos != new4_entry->tos ||
4414                     fib4_entry->prio != new4_entry->prio)
4415                         break;
4416         }
4417
4418         list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4419         return 0;
4420 }
4421
4422 static int
4423 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4424                                bool replace, bool append)
4425 {
4426         struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4427         struct mlxsw_sp_fib4_entry *fib4_entry;
4428
4429         fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4430
4431         if (append)
4432                 return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4433         if (replace && WARN_ON(!fib4_entry))
4434                 return -EINVAL;
4435
4436         /* Insert new entry before replaced one, so that we can later
4437          * remove the second.
4438          */
4439         if (fib4_entry) {
4440                 list_add_tail(&new4_entry->common.list,
4441                               &fib4_entry->common.list);
4442         } else {
4443                 struct mlxsw_sp_fib4_entry *last;
4444
4445                 list_for_each_entry(last, &fib_node->entry_list, common.list) {
4446                         if (new4_entry->tb_id > last->tb_id)
4447                                 break;
4448                         fib4_entry = last;
4449                 }
4450
4451                 if (fib4_entry)
4452                         list_add(&new4_entry->common.list,
4453                                  &fib4_entry->common.list);
4454                 else
4455                         list_add(&new4_entry->common.list,
4456                                  &fib_node->entry_list);
4457         }
4458
4459         return 0;
4460 }
4461
4462 static void
4463 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4464 {
4465         list_del(&fib4_entry->common.list);
4466 }
4467
4468 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4469                                        struct mlxsw_sp_fib_entry *fib_entry)
4470 {
4471         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4472
4473         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4474                 return 0;
4475
4476         /* To prevent packet loss, overwrite the previously offloaded
4477          * entry.
4478          */
4479         if (!list_is_singular(&fib_node->entry_list)) {
4480                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4481                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4482
4483                 mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4484         }
4485
4486         return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4487 }
4488
4489 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4490                                         struct mlxsw_sp_fib_entry *fib_entry)
4491 {
4492         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4493
4494         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4495                 return;
4496
4497         /* Promote the next entry by overwriting the deleted entry */
4498         if (!list_is_singular(&fib_node->entry_list)) {
4499                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4500                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4501
4502                 mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4503                 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4504                 return;
4505         }
4506
4507         mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4508 }
4509
4510 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4511                                          struct mlxsw_sp_fib4_entry *fib4_entry,
4512                                          bool replace, bool append)
4513 {
4514         int err;
4515
4516         err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4517         if (err)
4518                 return err;
4519
4520         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4521         if (err)
4522                 goto err_fib_node_entry_add;
4523
4524         return 0;
4525
4526 err_fib_node_entry_add:
4527         mlxsw_sp_fib4_node_list_remove(fib4_entry);
4528         return err;
4529 }
4530
4531 static void
4532 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4533                                 struct mlxsw_sp_fib4_entry *fib4_entry)
4534 {
4535         mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4536         mlxsw_sp_fib4_node_list_remove(fib4_entry);
4537
4538         if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4539                 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4540 }
4541
4542 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4543                                         struct mlxsw_sp_fib4_entry *fib4_entry,
4544                                         bool replace)
4545 {
4546         struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4547         struct mlxsw_sp_fib4_entry *replaced;
4548
4549         if (!replace)
4550                 return;
4551
4552         /* We inserted the new entry before replaced one */
4553         replaced = list_next_entry(fib4_entry, common.list);
4554
4555         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4556         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4557         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4558 }
4559
4560 static int
4561 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4562                          const struct fib_entry_notifier_info *fen_info,
4563                          bool replace, bool append)
4564 {
4565         struct mlxsw_sp_fib4_entry *fib4_entry;
4566         struct mlxsw_sp_fib_node *fib_node;
4567         int err;
4568
4569         if (mlxsw_sp->router->aborted)
4570                 return 0;
4571
4572         fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4573                                          &fen_info->dst, sizeof(fen_info->dst),
4574                                          fen_info->dst_len,
4575                                          MLXSW_SP_L3_PROTO_IPV4);
4576         if (IS_ERR(fib_node)) {
4577                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4578                 return PTR_ERR(fib_node);
4579         }
4580
4581         fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4582         if (IS_ERR(fib4_entry)) {
4583                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4584                 err = PTR_ERR(fib4_entry);
4585                 goto err_fib4_entry_create;
4586         }
4587
4588         err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4589                                             append);
4590         if (err) {
4591                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4592                 goto err_fib4_node_entry_link;
4593         }
4594
4595         mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4596
4597         return 0;
4598
4599 err_fib4_node_entry_link:
4600         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4601 err_fib4_entry_create:
4602         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4603         return err;
4604 }
4605
4606 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4607                                      struct fib_entry_notifier_info *fen_info)
4608 {
4609         struct mlxsw_sp_fib4_entry *fib4_entry;
4610         struct mlxsw_sp_fib_node *fib_node;
4611
4612         if (mlxsw_sp->router->aborted)
4613                 return;
4614
4615         fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4616         if (WARN_ON(!fib4_entry))
4617                 return;
4618         fib_node = fib4_entry->common.fib_node;
4619
4620         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4621         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4622         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4623 }
4624
4625 static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
4626 {
4627         /* Packets with link-local destination IP arriving to the router
4628          * are trapped to the CPU, so no need to program specific routes
4629          * for them.
4630          */
4631         if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL)
4632                 return true;
4633
4634         /* Multicast routes aren't supported, so ignore them. Neighbour
4635          * Discovery packets are specifically trapped.
4636          */
4637         if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST)
4638                 return true;
4639
4640         /* Cloned routes are irrelevant in the forwarding path. */
4641         if (rt->rt6i_flags & RTF_CACHE)
4642                 return true;
4643
4644         return false;
4645 }
4646
4647 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
4648 {
4649         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4650
4651         mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4652         if (!mlxsw_sp_rt6)
4653                 return ERR_PTR(-ENOMEM);
4654
4655         /* In case of route replace, replaced route is deleted with
4656          * no notification. Take reference to prevent accessing freed
4657          * memory.
4658          */
4659         mlxsw_sp_rt6->rt = rt;
4660         rt6_hold(rt);
4661
4662         return mlxsw_sp_rt6;
4663 }
4664
4665 #if IS_ENABLED(CONFIG_IPV6)
4666 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4667 {
4668         rt6_release(rt);
4669 }
4670 #else
4671 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4672 {
4673 }
4674 #endif
4675
4676 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4677 {
4678         mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4679         kfree(mlxsw_sp_rt6);
4680 }
4681
4682 static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
4683 {
4684         /* RTF_CACHE routes are ignored */
4685         return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
4686 }
4687
4688 static struct rt6_info *
4689 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4690 {
4691         return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4692                                 list)->rt;
4693 }
4694
4695 static struct mlxsw_sp_fib6_entry *
4696 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4697                                  const struct rt6_info *nrt, bool replace)
4698 {
4699         struct mlxsw_sp_fib6_entry *fib6_entry;
4700
4701         if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4702                 return NULL;
4703
4704         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4705                 struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4706
4707                 /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4708                  * virtual router.
4709                  */
4710                 if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
4711                         continue;
4712                 if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
4713                         break;
4714                 if (rt->rt6i_metric < nrt->rt6i_metric)
4715                         continue;
4716                 if (rt->rt6i_metric == nrt->rt6i_metric &&
4717                     mlxsw_sp_fib6_rt_can_mp(rt))
4718                         return fib6_entry;
4719                 if (rt->rt6i_metric > nrt->rt6i_metric)
4720                         break;
4721         }
4722
4723         return NULL;
4724 }
4725
4726 static struct mlxsw_sp_rt6 *
4727 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4728                             const struct rt6_info *rt)
4729 {
4730         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4731
4732         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4733                 if (mlxsw_sp_rt6->rt == rt)
4734                         return mlxsw_sp_rt6;
4735         }
4736
4737         return NULL;
4738 }
4739
4740 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4741                                         const struct rt6_info *rt,
4742                                         enum mlxsw_sp_ipip_type *ret)
4743 {
4744         return rt->dst.dev &&
4745                mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret);
4746 }
4747
4748 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4749                                        struct mlxsw_sp_nexthop_group *nh_grp,
4750                                        struct mlxsw_sp_nexthop *nh,
4751                                        const struct rt6_info *rt)
4752 {
4753         const struct mlxsw_sp_ipip_ops *ipip_ops;
4754         struct mlxsw_sp_ipip_entry *ipip_entry;
4755         struct net_device *dev = rt->dst.dev;
4756         struct mlxsw_sp_rif *rif;
4757         int err;
4758
4759         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4760         if (ipip_entry) {
4761                 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4762                 if (ipip_ops->can_offload(mlxsw_sp, dev,
4763                                           MLXSW_SP_L3_PROTO_IPV6)) {
4764                         nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4765                         mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4766                         return 0;
4767                 }
4768         }
4769
4770         nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4771         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4772         if (!rif)
4773                 return 0;
4774         mlxsw_sp_nexthop_rif_init(nh, rif);
4775
4776         err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4777         if (err)
4778                 goto err_nexthop_neigh_init;
4779
4780         return 0;
4781
4782 err_nexthop_neigh_init:
4783         mlxsw_sp_nexthop_rif_fini(nh);
4784         return err;
4785 }
4786
4787 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
4788                                         struct mlxsw_sp_nexthop *nh)
4789 {
4790         mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4791 }
4792
4793 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
4794                                   struct mlxsw_sp_nexthop_group *nh_grp,
4795                                   struct mlxsw_sp_nexthop *nh,
4796                                   const struct rt6_info *rt)
4797 {
4798         struct net_device *dev = rt->dst.dev;
4799
4800         nh->nh_grp = nh_grp;
4801         nh->nh_weight = rt->rt6i_nh_weight;
4802         memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
4803         mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4804
4805         list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4806
4807         if (!dev)
4808                 return 0;
4809         nh->ifindex = dev->ifindex;
4810
4811         return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
4812 }
4813
4814 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
4815                                    struct mlxsw_sp_nexthop *nh)
4816 {
4817         mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
4818         list_del(&nh->router_list_node);
4819         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4820 }
4821
4822 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
4823                                     const struct rt6_info *rt)
4824 {
4825         return rt->rt6i_flags & RTF_GATEWAY ||
4826                mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
4827 }
4828
4829 static struct mlxsw_sp_nexthop_group *
4830 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
4831                                struct mlxsw_sp_fib6_entry *fib6_entry)
4832 {
4833         struct mlxsw_sp_nexthop_group *nh_grp;
4834         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4835         struct mlxsw_sp_nexthop *nh;
4836         size_t alloc_size;
4837         int i = 0;
4838         int err;
4839
4840         alloc_size = sizeof(*nh_grp) +
4841                      fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
4842         nh_grp = kzalloc(alloc_size, GFP_KERNEL);
4843         if (!nh_grp)
4844                 return ERR_PTR(-ENOMEM);
4845         INIT_LIST_HEAD(&nh_grp->fib_list);
4846 #if IS_ENABLED(CONFIG_IPV6)
4847         nh_grp->neigh_tbl = &nd_tbl;
4848 #endif
4849         mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
4850                                         struct mlxsw_sp_rt6, list);
4851         nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
4852         nh_grp->count = fib6_entry->nrt6;
4853         for (i = 0; i < nh_grp->count; i++) {
4854                 struct rt6_info *rt = mlxsw_sp_rt6->rt;
4855
4856                 nh = &nh_grp->nexthops[i];
4857                 err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
4858                 if (err)
4859                         goto err_nexthop6_init;
4860                 mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
4861         }
4862
4863         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4864         if (err)
4865                 goto err_nexthop_group_insert;
4866
4867         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4868         return nh_grp;
4869
4870 err_nexthop_group_insert:
4871 err_nexthop6_init:
4872         for (i--; i >= 0; i--) {
4873                 nh = &nh_grp->nexthops[i];
4874                 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4875         }
4876         kfree(nh_grp);
4877         return ERR_PTR(err);
4878 }
4879
4880 static void
4881 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
4882                                 struct mlxsw_sp_nexthop_group *nh_grp)
4883 {
4884         struct mlxsw_sp_nexthop *nh;
4885         int i = nh_grp->count;
4886
4887         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4888         for (i--; i >= 0; i--) {
4889                 nh = &nh_grp->nexthops[i];
4890                 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4891         }
4892         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4893         WARN_ON(nh_grp->adj_index_valid);
4894         kfree(nh_grp);
4895 }
4896
4897 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
4898                                        struct mlxsw_sp_fib6_entry *fib6_entry)
4899 {
4900         struct mlxsw_sp_nexthop_group *nh_grp;
4901
4902         nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
4903         if (!nh_grp) {
4904                 nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
4905                 if (IS_ERR(nh_grp))
4906                         return PTR_ERR(nh_grp);
4907         }
4908
4909         list_add_tail(&fib6_entry->common.nexthop_group_node,
4910                       &nh_grp->fib_list);
4911         fib6_entry->common.nh_group = nh_grp;
4912
4913         return 0;
4914 }
4915
4916 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
4917                                         struct mlxsw_sp_fib_entry *fib_entry)
4918 {
4919         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4920
4921         list_del(&fib_entry->nexthop_group_node);
4922         if (!list_empty(&nh_grp->fib_list))
4923                 return;
4924         mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
4925 }
4926
4927 static int
4928 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
4929                                struct mlxsw_sp_fib6_entry *fib6_entry)
4930 {
4931         struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
4932         int err;
4933
4934         fib6_entry->common.nh_group = NULL;
4935         list_del(&fib6_entry->common.nexthop_group_node);
4936
4937         err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
4938         if (err)
4939                 goto err_nexthop6_group_get;
4940
4941         /* In case this entry is offloaded, then the adjacency index
4942          * currently associated with it in the device's table is that
4943          * of the old group. Start using the new one instead.
4944          */
4945         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
4946         if (err)
4947                 goto err_fib_node_entry_add;
4948
4949         if (list_empty(&old_nh_grp->fib_list))
4950                 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
4951
4952         return 0;
4953
4954 err_fib_node_entry_add:
4955         mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
4956 err_nexthop6_group_get:
4957         list_add_tail(&fib6_entry->common.nexthop_group_node,
4958                       &old_nh_grp->fib_list);
4959         fib6_entry->common.nh_group = old_nh_grp;
4960         return err;
4961 }
4962
4963 static int
4964 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
4965                                 struct mlxsw_sp_fib6_entry *fib6_entry,
4966                                 struct rt6_info *rt)
4967 {
4968         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4969         int err;
4970
4971         mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
4972         if (IS_ERR(mlxsw_sp_rt6))
4973                 return PTR_ERR(mlxsw_sp_rt6);
4974
4975         list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
4976         fib6_entry->nrt6++;
4977
4978         err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
4979         if (err)
4980                 goto err_nexthop6_group_update;
4981
4982         return 0;
4983
4984 err_nexthop6_group_update:
4985         fib6_entry->nrt6--;
4986         list_del(&mlxsw_sp_rt6->list);
4987         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4988         return err;
4989 }
4990
4991 static void
4992 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
4993                                 struct mlxsw_sp_fib6_entry *fib6_entry,
4994                                 struct rt6_info *rt)
4995 {
4996         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4997
4998         mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
4999         if (WARN_ON(!mlxsw_sp_rt6))
5000                 return;
5001
5002         fib6_entry->nrt6--;
5003         list_del(&mlxsw_sp_rt6->list);
5004         mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5005         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5006 }
5007
5008 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5009                                          struct mlxsw_sp_fib_entry *fib_entry,
5010                                          const struct rt6_info *rt)
5011 {
5012         /* Packets hitting RTF_REJECT routes need to be discarded by the
5013          * stack. We can rely on their destination device not having a
5014          * RIF (it's the loopback device) and can thus use action type
5015          * local, which will cause them to be trapped with a lower
5016          * priority than packets that need to be locally received.
5017          */
5018         if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
5019                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5020         else if (rt->rt6i_flags & RTF_REJECT)
5021                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5022         else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5023                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5024         else
5025                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5026 }
5027
5028 static void
5029 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5030 {
5031         struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5032
5033         list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5034                                  list) {
5035                 fib6_entry->nrt6--;
5036                 list_del(&mlxsw_sp_rt6->list);
5037                 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5038         }
5039 }
5040
5041 static struct mlxsw_sp_fib6_entry *
5042 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5043                            struct mlxsw_sp_fib_node *fib_node,
5044                            struct rt6_info *rt)
5045 {
5046         struct mlxsw_sp_fib6_entry *fib6_entry;
5047         struct mlxsw_sp_fib_entry *fib_entry;
5048         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5049         int err;
5050
5051         fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5052         if (!fib6_entry)
5053                 return ERR_PTR(-ENOMEM);
5054         fib_entry = &fib6_entry->common;
5055
5056         mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5057         if (IS_ERR(mlxsw_sp_rt6)) {
5058                 err = PTR_ERR(mlxsw_sp_rt6);
5059                 goto err_rt6_create;
5060         }
5061
5062         mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5063
5064         INIT_LIST_HEAD(&fib6_entry->rt6_list);
5065         list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5066         fib6_entry->nrt6 = 1;
5067         err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5068         if (err)
5069                 goto err_nexthop6_group_get;
5070
5071         fib_entry->fib_node = fib_node;
5072
5073         return fib6_entry;
5074
5075 err_nexthop6_group_get:
5076         list_del(&mlxsw_sp_rt6->list);
5077         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5078 err_rt6_create:
5079         kfree(fib6_entry);
5080         return ERR_PTR(err);
5081 }
5082
5083 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5084                                         struct mlxsw_sp_fib6_entry *fib6_entry)
5085 {
5086         mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5087         mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5088         WARN_ON(fib6_entry->nrt6);
5089         kfree(fib6_entry);
5090 }
5091
5092 static struct mlxsw_sp_fib6_entry *
5093 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5094                               const struct rt6_info *nrt, bool replace)
5095 {
5096         struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5097
5098         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5099                 struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5100
5101                 if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
5102                         continue;
5103                 if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
5104                         break;
5105                 if (replace && rt->rt6i_metric == nrt->rt6i_metric) {
5106                         if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5107                             mlxsw_sp_fib6_rt_can_mp(nrt))
5108                                 return fib6_entry;
5109                         if (mlxsw_sp_fib6_rt_can_mp(nrt))
5110                                 fallback = fallback ?: fib6_entry;
5111                 }
5112                 if (rt->rt6i_metric > nrt->rt6i_metric)
5113                         return fallback ?: fib6_entry;
5114         }
5115
5116         return fallback;
5117 }
5118
5119 static int
5120 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5121                                bool replace)
5122 {
5123         struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5124         struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5125         struct mlxsw_sp_fib6_entry *fib6_entry;
5126
5127         fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5128
5129         if (replace && WARN_ON(!fib6_entry))
5130                 return -EINVAL;
5131
5132         if (fib6_entry) {
5133                 list_add_tail(&new6_entry->common.list,
5134                               &fib6_entry->common.list);
5135         } else {
5136                 struct mlxsw_sp_fib6_entry *last;
5137
5138                 list_for_each_entry(last, &fib_node->entry_list, common.list) {
5139                         struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5140
5141                         if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
5142                                 break;
5143                         fib6_entry = last;
5144                 }
5145
5146                 if (fib6_entry)
5147                         list_add(&new6_entry->common.list,
5148                                  &fib6_entry->common.list);
5149                 else
5150                         list_add(&new6_entry->common.list,
5151                                  &fib_node->entry_list);
5152         }
5153
5154         return 0;
5155 }
5156
5157 static void
5158 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5159 {
5160         list_del(&fib6_entry->common.list);
5161 }
5162
5163 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5164                                          struct mlxsw_sp_fib6_entry *fib6_entry,
5165                                          bool replace)
5166 {
5167         int err;
5168
5169         err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5170         if (err)
5171                 return err;
5172
5173         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5174         if (err)
5175                 goto err_fib_node_entry_add;
5176
5177         return 0;
5178
5179 err_fib_node_entry_add:
5180         mlxsw_sp_fib6_node_list_remove(fib6_entry);
5181         return err;
5182 }
5183
5184 static void
5185 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5186                                 struct mlxsw_sp_fib6_entry *fib6_entry)
5187 {
5188         mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5189         mlxsw_sp_fib6_node_list_remove(fib6_entry);
5190 }
5191
5192 static struct mlxsw_sp_fib6_entry *
5193 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5194                            const struct rt6_info *rt)
5195 {
5196         struct mlxsw_sp_fib6_entry *fib6_entry;
5197         struct mlxsw_sp_fib_node *fib_node;
5198         struct mlxsw_sp_fib *fib;
5199         struct mlxsw_sp_vr *vr;
5200
5201         vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id);
5202         if (!vr)
5203                 return NULL;
5204         fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5205
5206         fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr,
5207                                             sizeof(rt->rt6i_dst.addr),
5208                                             rt->rt6i_dst.plen);
5209         if (!fib_node)
5210                 return NULL;
5211
5212         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5213                 struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5214
5215                 if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
5216                     rt->rt6i_metric == iter_rt->rt6i_metric &&
5217                     mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5218                         return fib6_entry;
5219         }
5220
5221         return NULL;
5222 }
5223
5224 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5225                                         struct mlxsw_sp_fib6_entry *fib6_entry,
5226                                         bool replace)
5227 {
5228         struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5229         struct mlxsw_sp_fib6_entry *replaced;
5230
5231         if (!replace)
5232                 return;
5233
5234         replaced = list_next_entry(fib6_entry, common.list);
5235
5236         mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5237         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5238         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5239 }
5240
5241 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5242                                     struct rt6_info *rt, bool replace)
5243 {
5244         struct mlxsw_sp_fib6_entry *fib6_entry;
5245         struct mlxsw_sp_fib_node *fib_node;
5246         int err;
5247
5248         if (mlxsw_sp->router->aborted)
5249                 return 0;
5250
5251         if (rt->rt6i_src.plen)
5252                 return -EINVAL;
5253
5254         if (mlxsw_sp_fib6_rt_should_ignore(rt))
5255                 return 0;
5256
5257         fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id,
5258                                          &rt->rt6i_dst.addr,
5259                                          sizeof(rt->rt6i_dst.addr),
5260                                          rt->rt6i_dst.plen,
5261                                          MLXSW_SP_L3_PROTO_IPV6);
5262         if (IS_ERR(fib_node))
5263                 return PTR_ERR(fib_node);
5264
5265         /* Before creating a new entry, try to append route to an existing
5266          * multipath entry.
5267          */
5268         fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5269         if (fib6_entry) {
5270                 err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5271                 if (err)
5272                         goto err_fib6_entry_nexthop_add;
5273                 return 0;
5274         }
5275
5276         fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5277         if (IS_ERR(fib6_entry)) {
5278                 err = PTR_ERR(fib6_entry);
5279                 goto err_fib6_entry_create;
5280         }
5281
5282         err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5283         if (err)
5284                 goto err_fib6_node_entry_link;
5285
5286         mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5287
5288         return 0;
5289
5290 err_fib6_node_entry_link:
5291         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5292 err_fib6_entry_create:
5293 err_fib6_entry_nexthop_add:
5294         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5295         return err;
5296 }
5297
5298 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5299                                      struct rt6_info *rt)
5300 {
5301         struct mlxsw_sp_fib6_entry *fib6_entry;
5302         struct mlxsw_sp_fib_node *fib_node;
5303
5304         if (mlxsw_sp->router->aborted)
5305                 return;
5306
5307         if (mlxsw_sp_fib6_rt_should_ignore(rt))
5308                 return;
5309
5310         fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5311         if (WARN_ON(!fib6_entry))
5312                 return;
5313
5314         /* If route is part of a multipath entry, but not the last one
5315          * removed, then only reduce its nexthop group.
5316          */
5317         if (!list_is_singular(&fib6_entry->rt6_list)) {
5318                 mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5319                 return;
5320         }
5321
5322         fib_node = fib6_entry->common.fib_node;
5323
5324         mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5325         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5326         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5327 }
5328
5329 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5330                                             enum mlxsw_reg_ralxx_protocol proto,
5331                                             u8 tree_id)
5332 {
5333         char ralta_pl[MLXSW_REG_RALTA_LEN];
5334         char ralst_pl[MLXSW_REG_RALST_LEN];
5335         int i, err;
5336
5337         mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5338         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5339         if (err)
5340                 return err;
5341
5342         mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5343         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5344         if (err)
5345                 return err;
5346
5347         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5348                 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5349                 char raltb_pl[MLXSW_REG_RALTB_LEN];
5350                 char ralue_pl[MLXSW_REG_RALUE_LEN];
5351
5352                 mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5353                 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5354                                       raltb_pl);
5355                 if (err)
5356                         return err;
5357
5358                 mlxsw_reg_ralue_pack(ralue_pl, proto,
5359                                      MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5360                 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5361                 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5362                                       ralue_pl);
5363                 if (err)
5364                         return err;
5365         }
5366
5367         return 0;
5368 }
5369
5370 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5371                                      struct mfc_entry_notifier_info *men_info,
5372                                      bool replace)
5373 {
5374         struct mlxsw_sp_vr *vr;
5375
5376         if (mlxsw_sp->router->aborted)
5377                 return 0;
5378
5379         vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5380         if (IS_ERR(vr))
5381                 return PTR_ERR(vr);
5382
5383         return mlxsw_sp_mr_route4_add(vr->mr4_table, men_info->mfc, replace);
5384 }
5385
5386 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5387                                       struct mfc_entry_notifier_info *men_info)
5388 {
5389         struct mlxsw_sp_vr *vr;
5390
5391         if (mlxsw_sp->router->aborted)
5392                 return;
5393
5394         vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5395         if (WARN_ON(!vr))
5396                 return;
5397
5398         mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc);
5399         mlxsw_sp_vr_put(mlxsw_sp, vr);
5400 }
5401
5402 static int
5403 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5404                               struct vif_entry_notifier_info *ven_info)
5405 {
5406         struct mlxsw_sp_rif *rif;
5407         struct mlxsw_sp_vr *vr;
5408
5409         if (mlxsw_sp->router->aborted)
5410                 return 0;
5411
5412         vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5413         if (IS_ERR(vr))
5414                 return PTR_ERR(vr);
5415
5416         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5417         return mlxsw_sp_mr_vif_add(vr->mr4_table, ven_info->dev,
5418                                    ven_info->vif_index,
5419                                    ven_info->vif_flags, rif);
5420 }
5421
5422 static void
5423 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5424                               struct vif_entry_notifier_info *ven_info)
5425 {
5426         struct mlxsw_sp_vr *vr;
5427
5428         if (mlxsw_sp->router->aborted)
5429                 return;
5430
5431         vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5432         if (WARN_ON(!vr))
5433                 return;
5434
5435         mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index);
5436         mlxsw_sp_vr_put(mlxsw_sp, vr);
5437 }
5438
5439 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5440 {
5441         enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5442         int err;
5443
5444         err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5445                                                MLXSW_SP_LPM_TREE_MIN);
5446         if (err)
5447                 return err;
5448
5449         /* The multicast router code does not need an abort trap as by default,
5450          * packets that don't match any routes are trapped to the CPU.
5451          */
5452
5453         proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5454         return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5455                                                 MLXSW_SP_LPM_TREE_MIN + 1);
5456 }
5457
5458 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5459                                      struct mlxsw_sp_fib_node *fib_node)
5460 {
5461         struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5462
5463         list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5464                                  common.list) {
5465                 bool do_break = &tmp->common.list == &fib_node->entry_list;
5466
5467                 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5468                 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5469                 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5470                 /* Break when entry list is empty and node was freed.
5471                  * Otherwise, we'll access freed memory in the next
5472                  * iteration.
5473                  */
5474                 if (do_break)
5475                         break;
5476         }
5477 }
5478
5479 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5480                                      struct mlxsw_sp_fib_node *fib_node)
5481 {
5482         struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5483
5484         list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5485                                  common.list) {
5486                 bool do_break = &tmp->common.list == &fib_node->entry_list;
5487
5488                 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5489                 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5490                 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5491                 if (do_break)
5492                         break;
5493         }
5494 }
5495
5496 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5497                                     struct mlxsw_sp_fib_node *fib_node)
5498 {
5499         switch (fib_node->fib->proto) {
5500         case MLXSW_SP_L3_PROTO_IPV4:
5501                 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5502                 break;
5503         case MLXSW_SP_L3_PROTO_IPV6:
5504                 mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5505                 break;
5506         }
5507 }
5508
5509 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5510                                   struct mlxsw_sp_vr *vr,
5511                                   enum mlxsw_sp_l3proto proto)
5512 {
5513         struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5514         struct mlxsw_sp_fib_node *fib_node, *tmp;
5515
5516         list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5517                 bool do_break = &tmp->list == &fib->node_list;
5518
5519                 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5520                 if (do_break)
5521                         break;
5522         }
5523 }
5524
5525 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5526 {
5527         int i;
5528
5529         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5530                 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5531
5532                 if (!mlxsw_sp_vr_is_used(vr))
5533                         continue;
5534
5535                 mlxsw_sp_mr_table_flush(vr->mr4_table);
5536                 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5537
5538                 /* If virtual router was only used for IPv4, then it's no
5539                  * longer used.
5540                  */
5541                 if (!mlxsw_sp_vr_is_used(vr))
5542                         continue;
5543                 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5544         }
5545 }
5546
5547 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5548 {
5549         int err;
5550
5551         if (mlxsw_sp->router->aborted)
5552                 return;
5553         dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5554         mlxsw_sp_router_fib_flush(mlxsw_sp);
5555         mlxsw_sp->router->aborted = true;
5556         err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5557         if (err)
5558                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5559 }
5560
5561 struct mlxsw_sp_fib_event_work {
5562         struct work_struct work;
5563         union {
5564                 struct fib6_entry_notifier_info fen6_info;
5565                 struct fib_entry_notifier_info fen_info;
5566                 struct fib_rule_notifier_info fr_info;
5567                 struct fib_nh_notifier_info fnh_info;
5568                 struct mfc_entry_notifier_info men_info;
5569                 struct vif_entry_notifier_info ven_info;
5570         };
5571         struct mlxsw_sp *mlxsw_sp;
5572         unsigned long event;
5573 };
5574
5575 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5576 {
5577         struct mlxsw_sp_fib_event_work *fib_work =
5578                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5579         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5580         bool replace, append;
5581         int err;
5582
5583         /* Protect internal structures from changes */
5584         rtnl_lock();
5585         switch (fib_work->event) {
5586         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5587         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5588         case FIB_EVENT_ENTRY_ADD:
5589                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5590                 append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5591                 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5592                                                replace, append);
5593                 if (err)
5594                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5595                 fib_info_put(fib_work->fen_info.fi);
5596                 break;
5597         case FIB_EVENT_ENTRY_DEL:
5598                 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5599                 fib_info_put(fib_work->fen_info.fi);
5600                 break;
5601         case FIB_EVENT_RULE_ADD:
5602                 /* if we get here, a rule was added that we do not support.
5603                  * just do the fib_abort
5604                  */
5605                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5606                 break;
5607         case FIB_EVENT_NH_ADD: /* fall through */
5608         case FIB_EVENT_NH_DEL:
5609                 mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5610                                         fib_work->fnh_info.fib_nh);
5611                 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5612                 break;
5613         }
5614         rtnl_unlock();
5615         kfree(fib_work);
5616 }
5617
5618 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5619 {
5620         struct mlxsw_sp_fib_event_work *fib_work =
5621                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5622         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5623         bool replace;
5624         int err;
5625
5626         rtnl_lock();
5627         switch (fib_work->event) {
5628         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5629         case FIB_EVENT_ENTRY_ADD:
5630                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5631                 err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5632                                                fib_work->fen6_info.rt, replace);
5633                 if (err)
5634                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5635                 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5636                 break;
5637         case FIB_EVENT_ENTRY_DEL:
5638                 mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5639                 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5640                 break;
5641         case FIB_EVENT_RULE_ADD:
5642                 /* if we get here, a rule was added that we do not support.
5643                  * just do the fib_abort
5644                  */
5645                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5646                 break;
5647         }
5648         rtnl_unlock();
5649         kfree(fib_work);
5650 }
5651
5652 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5653 {
5654         struct mlxsw_sp_fib_event_work *fib_work =
5655                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5656         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5657         bool replace;
5658         int err;
5659
5660         rtnl_lock();
5661         switch (fib_work->event) {
5662         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5663         case FIB_EVENT_ENTRY_ADD:
5664                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5665
5666                 err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5667                                                 replace);
5668                 if (err)
5669                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5670                 ipmr_cache_put(fib_work->men_info.mfc);
5671                 break;
5672         case FIB_EVENT_ENTRY_DEL:
5673                 mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5674                 ipmr_cache_put(fib_work->men_info.mfc);
5675                 break;
5676         case FIB_EVENT_VIF_ADD:
5677                 err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5678                                                     &fib_work->ven_info);
5679                 if (err)
5680                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5681                 dev_put(fib_work->ven_info.dev);
5682                 break;
5683         case FIB_EVENT_VIF_DEL:
5684                 mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5685                                               &fib_work->ven_info);
5686                 dev_put(fib_work->ven_info.dev);
5687                 break;
5688         case FIB_EVENT_RULE_ADD:
5689                 /* if we get here, a rule was added that we do not support.
5690                  * just do the fib_abort
5691                  */
5692                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5693                 break;
5694         }
5695         rtnl_unlock();
5696         kfree(fib_work);
5697 }
5698
5699 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5700                                        struct fib_notifier_info *info)
5701 {
5702         struct fib_entry_notifier_info *fen_info;
5703         struct fib_nh_notifier_info *fnh_info;
5704
5705         switch (fib_work->event) {
5706         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5707         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5708         case FIB_EVENT_ENTRY_ADD: /* fall through */
5709         case FIB_EVENT_ENTRY_DEL:
5710                 fen_info = container_of(info, struct fib_entry_notifier_info,
5711                                         info);
5712                 fib_work->fen_info = *fen_info;
5713                 /* Take reference on fib_info to prevent it from being
5714                  * freed while work is queued. Release it afterwards.
5715                  */
5716                 fib_info_hold(fib_work->fen_info.fi);
5717                 break;
5718         case FIB_EVENT_NH_ADD: /* fall through */
5719         case FIB_EVENT_NH_DEL:
5720                 fnh_info = container_of(info, struct fib_nh_notifier_info,
5721                                         info);
5722                 fib_work->fnh_info = *fnh_info;
5723                 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5724                 break;
5725         }
5726 }
5727
5728 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5729                                        struct fib_notifier_info *info)
5730 {
5731         struct fib6_entry_notifier_info *fen6_info;
5732
5733         switch (fib_work->event) {
5734         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5735         case FIB_EVENT_ENTRY_ADD: /* fall through */
5736         case FIB_EVENT_ENTRY_DEL:
5737                 fen6_info = container_of(info, struct fib6_entry_notifier_info,
5738                                          info);
5739                 fib_work->fen6_info = *fen6_info;
5740                 rt6_hold(fib_work->fen6_info.rt);
5741                 break;
5742         }
5743 }
5744
5745 static void
5746 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
5747                             struct fib_notifier_info *info)
5748 {
5749         switch (fib_work->event) {
5750         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5751         case FIB_EVENT_ENTRY_ADD: /* fall through */
5752         case FIB_EVENT_ENTRY_DEL:
5753                 memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
5754                 ipmr_cache_hold(fib_work->men_info.mfc);
5755                 break;
5756         case FIB_EVENT_VIF_ADD: /* fall through */
5757         case FIB_EVENT_VIF_DEL:
5758                 memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
5759                 dev_hold(fib_work->ven_info.dev);
5760                 break;
5761         }
5762 }
5763
5764 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
5765                                           struct fib_notifier_info *info,
5766                                           struct mlxsw_sp *mlxsw_sp)
5767 {
5768         struct netlink_ext_ack *extack = info->extack;
5769         struct fib_rule_notifier_info *fr_info;
5770         struct fib_rule *rule;
5771         int err = 0;
5772
5773         /* nothing to do at the moment */
5774         if (event == FIB_EVENT_RULE_DEL)
5775                 return 0;
5776
5777         if (mlxsw_sp->router->aborted)
5778                 return 0;
5779
5780         fr_info = container_of(info, struct fib_rule_notifier_info, info);
5781         rule = fr_info->rule;
5782
5783         switch (info->family) {
5784         case AF_INET:
5785                 if (!fib4_rule_default(rule) && !rule->l3mdev)
5786                         err = -1;
5787                 break;
5788         case AF_INET6:
5789                 if (!fib6_rule_default(rule) && !rule->l3mdev)
5790                         err = -1;
5791                 break;
5792         case RTNL_FAMILY_IPMR:
5793                 if (!ipmr_rule_default(rule) && !rule->l3mdev)
5794                         err = -1;
5795                 break;
5796         }
5797
5798         if (err < 0)
5799                 NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported. Aborting offload");
5800
5801         return err;
5802 }
5803
5804 /* Called with rcu_read_lock() */
5805 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
5806                                      unsigned long event, void *ptr)
5807 {
5808         struct mlxsw_sp_fib_event_work *fib_work;
5809         struct fib_notifier_info *info = ptr;
5810         struct mlxsw_sp_router *router;
5811         int err;
5812
5813         if (!net_eq(info->net, &init_net) ||
5814             (info->family != AF_INET && info->family != AF_INET6 &&
5815              info->family != RTNL_FAMILY_IPMR))
5816                 return NOTIFY_DONE;
5817
5818         router = container_of(nb, struct mlxsw_sp_router, fib_nb);
5819
5820         switch (event) {
5821         case FIB_EVENT_RULE_ADD: /* fall through */
5822         case FIB_EVENT_RULE_DEL:
5823                 err = mlxsw_sp_router_fib_rule_event(event, info,
5824                                                      router->mlxsw_sp);
5825                 if (!err)
5826                         return NOTIFY_DONE;
5827         }
5828
5829         fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
5830         if (WARN_ON(!fib_work))
5831                 return NOTIFY_BAD;
5832
5833         fib_work->mlxsw_sp = router->mlxsw_sp;
5834         fib_work->event = event;
5835
5836         switch (info->family) {
5837         case AF_INET:
5838                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
5839                 mlxsw_sp_router_fib4_event(fib_work, info);
5840                 break;
5841         case AF_INET6:
5842                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
5843                 mlxsw_sp_router_fib6_event(fib_work, info);
5844                 break;
5845         case RTNL_FAMILY_IPMR:
5846                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
5847                 mlxsw_sp_router_fibmr_event(fib_work, info);
5848                 break;
5849         }
5850
5851         mlxsw_core_schedule_work(&fib_work->work);
5852
5853         return NOTIFY_DONE;
5854 }
5855
5856 static struct mlxsw_sp_rif *
5857 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
5858                          const struct net_device *dev)
5859 {
5860         int i;
5861
5862         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
5863                 if (mlxsw_sp->router->rifs[i] &&
5864                     mlxsw_sp->router->rifs[i]->dev == dev)
5865                         return mlxsw_sp->router->rifs[i];
5866
5867         return NULL;
5868 }
5869
5870 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
5871 {
5872         char ritr_pl[MLXSW_REG_RITR_LEN];
5873         int err;
5874
5875         mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
5876         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5877         if (WARN_ON_ONCE(err))
5878                 return err;
5879
5880         mlxsw_reg_ritr_enable_set(ritr_pl, false);
5881         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5882 }
5883
5884 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
5885                                           struct mlxsw_sp_rif *rif)
5886 {
5887         mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
5888         mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
5889         mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
5890 }
5891
5892 static bool
5893 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
5894                            unsigned long event)
5895 {
5896         struct inet6_dev *inet6_dev;
5897         bool addr_list_empty = true;
5898         struct in_device *idev;
5899
5900         switch (event) {
5901         case NETDEV_UP:
5902                 return rif == NULL;
5903         case NETDEV_DOWN:
5904                 idev = __in_dev_get_rtnl(dev);
5905                 if (idev && idev->ifa_list)
5906                         addr_list_empty = false;
5907
5908                 inet6_dev = __in6_dev_get(dev);
5909                 if (addr_list_empty && inet6_dev &&
5910                     !list_empty(&inet6_dev->addr_list))
5911                         addr_list_empty = false;
5912
5913                 if (rif && addr_list_empty &&
5914                     !netif_is_l3_slave(rif->dev))
5915                         return true;
5916                 /* It is possible we already removed the RIF ourselves
5917                  * if it was assigned to a netdev that is now a bridge
5918                  * or LAG slave.
5919                  */
5920                 return false;
5921         }
5922
5923         return false;
5924 }
5925
5926 static enum mlxsw_sp_rif_type
5927 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
5928                       const struct net_device *dev)
5929 {
5930         enum mlxsw_sp_fid_type type;
5931
5932         if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
5933                 return MLXSW_SP_RIF_TYPE_IPIP_LB;
5934
5935         /* Otherwise RIF type is derived from the type of the underlying FID. */
5936         if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
5937                 type = MLXSW_SP_FID_TYPE_8021Q;
5938         else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
5939                 type = MLXSW_SP_FID_TYPE_8021Q;
5940         else if (netif_is_bridge_master(dev))
5941                 type = MLXSW_SP_FID_TYPE_8021D;
5942         else
5943                 type = MLXSW_SP_FID_TYPE_RFID;
5944
5945         return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
5946 }
5947
5948 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
5949 {
5950         int i;
5951
5952         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
5953                 if (!mlxsw_sp->router->rifs[i]) {
5954                         *p_rif_index = i;
5955                         return 0;
5956                 }
5957         }
5958
5959         return -ENOBUFS;
5960 }
5961
5962 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
5963                                                u16 vr_id,
5964                                                struct net_device *l3_dev)
5965 {
5966         struct mlxsw_sp_rif *rif;
5967
5968         rif = kzalloc(rif_size, GFP_KERNEL);
5969         if (!rif)
5970                 return NULL;
5971
5972         INIT_LIST_HEAD(&rif->nexthop_list);
5973         INIT_LIST_HEAD(&rif->neigh_list);
5974         ether_addr_copy(rif->addr, l3_dev->dev_addr);
5975         rif->mtu = l3_dev->mtu;
5976         rif->vr_id = vr_id;
5977         rif->dev = l3_dev;
5978         rif->rif_index = rif_index;
5979
5980         return rif;
5981 }
5982
5983 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
5984                                            u16 rif_index)
5985 {
5986         return mlxsw_sp->router->rifs[rif_index];
5987 }
5988
5989 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
5990 {
5991         return rif->rif_index;
5992 }
5993
5994 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
5995 {
5996         return lb_rif->common.rif_index;
5997 }
5998
5999 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6000 {
6001         return lb_rif->ul_vr_id;
6002 }
6003
6004 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6005 {
6006         return rif->dev->ifindex;
6007 }
6008
6009 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6010 {
6011         return rif->dev;
6012 }
6013
6014 static struct mlxsw_sp_rif *
6015 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6016                     const struct mlxsw_sp_rif_params *params,
6017                     struct netlink_ext_ack *extack)
6018 {
6019         u32 tb_id = l3mdev_fib_table(params->dev);
6020         const struct mlxsw_sp_rif_ops *ops;
6021         struct mlxsw_sp_fid *fid = NULL;
6022         enum mlxsw_sp_rif_type type;
6023         struct mlxsw_sp_rif *rif;
6024         struct mlxsw_sp_vr *vr;
6025         u16 rif_index;
6026         int err;
6027
6028         type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6029         ops = mlxsw_sp->router->rif_ops_arr[type];
6030
6031         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6032         if (IS_ERR(vr))
6033                 return ERR_CAST(vr);
6034         vr->rif_count++;
6035
6036         err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6037         if (err) {
6038                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6039                 goto err_rif_index_alloc;
6040         }
6041
6042         rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6043         if (!rif) {
6044                 err = -ENOMEM;
6045                 goto err_rif_alloc;
6046         }
6047         rif->mlxsw_sp = mlxsw_sp;
6048         rif->ops = ops;
6049
6050         if (ops->fid_get) {
6051                 fid = ops->fid_get(rif);
6052                 if (IS_ERR(fid)) {
6053                         err = PTR_ERR(fid);
6054                         goto err_fid_get;
6055                 }
6056                 rif->fid = fid;
6057         }
6058
6059         if (ops->setup)
6060                 ops->setup(rif, params);
6061
6062         err = ops->configure(rif);
6063         if (err)
6064                 goto err_configure;
6065
6066         err = mlxsw_sp_mr_rif_add(vr->mr4_table, rif);
6067         if (err)
6068                 goto err_mr_rif_add;
6069
6070         mlxsw_sp_rif_counters_alloc(rif);
6071         mlxsw_sp->router->rifs[rif_index] = rif;
6072
6073         return rif;
6074
6075 err_mr_rif_add:
6076         ops->deconfigure(rif);
6077 err_configure:
6078         if (fid)
6079                 mlxsw_sp_fid_put(fid);
6080 err_fid_get:
6081         kfree(rif);
6082 err_rif_alloc:
6083 err_rif_index_alloc:
6084         vr->rif_count--;
6085         mlxsw_sp_vr_put(mlxsw_sp, vr);
6086         return ERR_PTR(err);
6087 }
6088
6089 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6090 {
6091         const struct mlxsw_sp_rif_ops *ops = rif->ops;
6092         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6093         struct mlxsw_sp_fid *fid = rif->fid;
6094         struct mlxsw_sp_vr *vr;
6095
6096         mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6097         vr = &mlxsw_sp->router->vrs[rif->vr_id];
6098
6099         mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6100         mlxsw_sp_rif_counters_free(rif);
6101         mlxsw_sp_mr_rif_del(vr->mr4_table, rif);
6102         ops->deconfigure(rif);
6103         if (fid)
6104                 /* Loopback RIFs are not associated with a FID. */
6105                 mlxsw_sp_fid_put(fid);
6106         kfree(rif);
6107         vr->rif_count--;
6108         mlxsw_sp_vr_put(mlxsw_sp, vr);
6109 }
6110
6111 static void
6112 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6113                                  struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6114 {
6115         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6116
6117         params->vid = mlxsw_sp_port_vlan->vid;
6118         params->lag = mlxsw_sp_port->lagged;
6119         if (params->lag)
6120                 params->lag_id = mlxsw_sp_port->lag_id;
6121         else
6122                 params->system_port = mlxsw_sp_port->local_port;
6123 }
6124
6125 static int
6126 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6127                                struct net_device *l3_dev,
6128                                struct netlink_ext_ack *extack)
6129 {
6130         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6131         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6132         u16 vid = mlxsw_sp_port_vlan->vid;
6133         struct mlxsw_sp_rif *rif;
6134         struct mlxsw_sp_fid *fid;
6135         int err;
6136
6137         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6138         if (!rif) {
6139                 struct mlxsw_sp_rif_params params = {
6140                         .dev = l3_dev,
6141                 };
6142
6143                 mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6144                 rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6145                 if (IS_ERR(rif))
6146                         return PTR_ERR(rif);
6147         }
6148
6149         /* FID was already created, just take a reference */
6150         fid = rif->ops->fid_get(rif);
6151         err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6152         if (err)
6153                 goto err_fid_port_vid_map;
6154
6155         err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6156         if (err)
6157                 goto err_port_vid_learning_set;
6158
6159         err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6160                                         BR_STATE_FORWARDING);
6161         if (err)
6162                 goto err_port_vid_stp_set;
6163
6164         mlxsw_sp_port_vlan->fid = fid;
6165
6166         return 0;
6167
6168 err_port_vid_stp_set:
6169         mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6170 err_port_vid_learning_set:
6171         mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6172 err_fid_port_vid_map:
6173         mlxsw_sp_fid_put(fid);
6174         return err;
6175 }
6176
6177 void
6178 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6179 {
6180         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6181         struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6182         u16 vid = mlxsw_sp_port_vlan->vid;
6183
6184         if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6185                 return;
6186
6187         mlxsw_sp_port_vlan->fid = NULL;
6188         mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6189         mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6190         mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6191         /* If router port holds the last reference on the rFID, then the
6192          * associated Sub-port RIF will be destroyed.
6193          */
6194         mlxsw_sp_fid_put(fid);
6195 }
6196
6197 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6198                                              struct net_device *port_dev,
6199                                              unsigned long event, u16 vid,
6200                                              struct netlink_ext_ack *extack)
6201 {
6202         struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6203         struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6204
6205         mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6206         if (WARN_ON(!mlxsw_sp_port_vlan))
6207                 return -EINVAL;
6208
6209         switch (event) {
6210         case NETDEV_UP:
6211                 return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6212                                                       l3_dev, extack);
6213         case NETDEV_DOWN:
6214                 mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6215                 break;
6216         }
6217
6218         return 0;
6219 }
6220
6221 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6222                                         unsigned long event,
6223                                         struct netlink_ext_ack *extack)
6224 {
6225         if (netif_is_bridge_port(port_dev) ||
6226             netif_is_lag_port(port_dev) ||
6227             netif_is_ovs_port(port_dev))
6228                 return 0;
6229
6230         return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
6231                                                  extack);
6232 }
6233
6234 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6235                                          struct net_device *lag_dev,
6236                                          unsigned long event, u16 vid,
6237                                          struct netlink_ext_ack *extack)
6238 {
6239         struct net_device *port_dev;
6240         struct list_head *iter;
6241         int err;
6242
6243         netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6244                 if (mlxsw_sp_port_dev_check(port_dev)) {
6245                         err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6246                                                                 port_dev,
6247                                                                 event, vid,
6248                                                                 extack);
6249                         if (err)
6250                                 return err;
6251                 }
6252         }
6253
6254         return 0;
6255 }
6256
6257 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6258                                        unsigned long event,
6259                                        struct netlink_ext_ack *extack)
6260 {
6261         if (netif_is_bridge_port(lag_dev))
6262                 return 0;
6263
6264         return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
6265                                              extack);
6266 }
6267
6268 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
6269                                           unsigned long event,
6270                                           struct netlink_ext_ack *extack)
6271 {
6272         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6273         struct mlxsw_sp_rif_params params = {
6274                 .dev = l3_dev,
6275         };
6276         struct mlxsw_sp_rif *rif;
6277
6278         switch (event) {
6279         case NETDEV_UP:
6280                 rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6281                 if (IS_ERR(rif))
6282                         return PTR_ERR(rif);
6283                 break;
6284         case NETDEV_DOWN:
6285                 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6286                 mlxsw_sp_rif_destroy(rif);
6287                 break;
6288         }
6289
6290         return 0;
6291 }
6292
6293 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
6294                                         unsigned long event,
6295                                         struct netlink_ext_ack *extack)
6296 {
6297         struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6298         u16 vid = vlan_dev_vlan_id(vlan_dev);
6299
6300         if (netif_is_bridge_port(vlan_dev))
6301                 return 0;
6302
6303         if (mlxsw_sp_port_dev_check(real_dev))
6304                 return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6305                                                          event, vid, extack);
6306         else if (netif_is_lag_master(real_dev))
6307                 return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6308                                                      vid, extack);
6309         else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6310                 return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
6311
6312         return 0;
6313 }
6314
6315 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
6316                                      unsigned long event,
6317                                      struct netlink_ext_ack *extack)
6318 {
6319         if (mlxsw_sp_port_dev_check(dev))
6320                 return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6321         else if (netif_is_lag_master(dev))
6322                 return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6323         else if (netif_is_bridge_master(dev))
6324                 return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
6325         else if (is_vlan_dev(dev))
6326                 return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
6327         else
6328                 return 0;
6329 }
6330
6331 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
6332                             unsigned long event, void *ptr)
6333 {
6334         struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6335         struct net_device *dev = ifa->ifa_dev->dev;
6336         struct mlxsw_sp *mlxsw_sp;
6337         struct mlxsw_sp_rif *rif;
6338         int err = 0;
6339
6340         /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6341         if (event == NETDEV_UP)
6342                 goto out;
6343
6344         mlxsw_sp = mlxsw_sp_lower_get(dev);
6345         if (!mlxsw_sp)
6346                 goto out;
6347
6348         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6349         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6350                 goto out;
6351
6352         err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
6353 out:
6354         return notifier_from_errno(err);
6355 }
6356
6357 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6358                                   unsigned long event, void *ptr)
6359 {
6360         struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6361         struct net_device *dev = ivi->ivi_dev->dev;
6362         struct mlxsw_sp *mlxsw_sp;
6363         struct mlxsw_sp_rif *rif;
6364         int err = 0;
6365
6366         mlxsw_sp = mlxsw_sp_lower_get(dev);
6367         if (!mlxsw_sp)
6368                 goto out;
6369
6370         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6371         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6372                 goto out;
6373
6374         err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
6375 out:
6376         return notifier_from_errno(err);
6377 }
6378
6379 struct mlxsw_sp_inet6addr_event_work {
6380         struct work_struct work;
6381         struct net_device *dev;
6382         unsigned long event;
6383 };
6384
6385 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6386 {
6387         struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6388                 container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6389         struct net_device *dev = inet6addr_work->dev;
6390         unsigned long event = inet6addr_work->event;
6391         struct mlxsw_sp *mlxsw_sp;
6392         struct mlxsw_sp_rif *rif;
6393
6394         rtnl_lock();
6395         mlxsw_sp = mlxsw_sp_lower_get(dev);
6396         if (!mlxsw_sp)
6397                 goto out;
6398
6399         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6400         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6401                 goto out;
6402
6403         __mlxsw_sp_inetaddr_event(dev, event, NULL);
6404 out:
6405         rtnl_unlock();
6406         dev_put(dev);
6407         kfree(inet6addr_work);
6408 }
6409
6410 /* Called with rcu_read_lock() */
6411 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
6412                              unsigned long event, void *ptr)
6413 {
6414         struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6415         struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6416         struct net_device *dev = if6->idev->dev;
6417
6418         /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6419         if (event == NETDEV_UP)
6420                 return NOTIFY_DONE;
6421
6422         if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
6423                 return NOTIFY_DONE;
6424
6425         inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6426         if (!inet6addr_work)
6427                 return NOTIFY_BAD;
6428
6429         INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6430         inet6addr_work->dev = dev;
6431         inet6addr_work->event = event;
6432         dev_hold(dev);
6433         mlxsw_core_schedule_work(&inet6addr_work->work);
6434
6435         return NOTIFY_DONE;
6436 }
6437
6438 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6439                                    unsigned long event, void *ptr)
6440 {
6441         struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6442         struct net_device *dev = i6vi->i6vi_dev->dev;
6443         struct mlxsw_sp *mlxsw_sp;
6444         struct mlxsw_sp_rif *rif;
6445         int err = 0;
6446
6447         mlxsw_sp = mlxsw_sp_lower_get(dev);
6448         if (!mlxsw_sp)
6449                 goto out;
6450
6451         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6452         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6453                 goto out;
6454
6455         err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
6456 out:
6457         return notifier_from_errno(err);
6458 }
6459
6460 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6461                              const char *mac, int mtu)
6462 {
6463         char ritr_pl[MLXSW_REG_RITR_LEN];
6464         int err;
6465
6466         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6467         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6468         if (err)
6469                 return err;
6470
6471         mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6472         mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6473         mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6474         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6475 }
6476
6477 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
6478 {
6479         struct mlxsw_sp *mlxsw_sp;
6480         struct mlxsw_sp_rif *rif;
6481         u16 fid_index;
6482         int err;
6483
6484         mlxsw_sp = mlxsw_sp_lower_get(dev);
6485         if (!mlxsw_sp)
6486                 return 0;
6487
6488         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6489         if (!rif)
6490                 return 0;
6491         fid_index = mlxsw_sp_fid_index(rif->fid);
6492
6493         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
6494         if (err)
6495                 return err;
6496
6497         err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
6498                                 dev->mtu);
6499         if (err)
6500                 goto err_rif_edit;
6501
6502         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
6503         if (err)
6504                 goto err_rif_fdb_op;
6505
6506         if (rif->mtu != dev->mtu) {
6507                 struct mlxsw_sp_vr *vr;
6508
6509                 /* The RIF is relevant only to its mr_table instance, as unlike
6510                  * unicast routing, in multicast routing a RIF cannot be shared
6511                  * between several multicast routing tables.
6512                  */
6513                 vr = &mlxsw_sp->router->vrs[rif->vr_id];
6514                 mlxsw_sp_mr_rif_mtu_update(vr->mr4_table, rif, dev->mtu);
6515         }
6516
6517         ether_addr_copy(rif->addr, dev->dev_addr);
6518         rif->mtu = dev->mtu;
6519
6520         netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
6521
6522         return 0;
6523
6524 err_rif_fdb_op:
6525         mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
6526 err_rif_edit:
6527         mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
6528         return err;
6529 }
6530
6531 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
6532                                   struct net_device *l3_dev,
6533                                   struct netlink_ext_ack *extack)
6534 {
6535         struct mlxsw_sp_rif *rif;
6536
6537         /* If netdev is already associated with a RIF, then we need to
6538          * destroy it and create a new one with the new virtual router ID.
6539          */
6540         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6541         if (rif)
6542                 __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
6543
6544         return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
6545 }
6546
6547 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
6548                                     struct net_device *l3_dev)
6549 {
6550         struct mlxsw_sp_rif *rif;
6551
6552         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6553         if (!rif)
6554                 return;
6555         __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
6556 }
6557
6558 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
6559                                  struct netdev_notifier_changeupper_info *info)
6560 {
6561         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6562         int err = 0;
6563
6564         if (!mlxsw_sp)
6565                 return 0;
6566
6567         switch (event) {
6568         case NETDEV_PRECHANGEUPPER:
6569                 return 0;
6570         case NETDEV_CHANGEUPPER:
6571                 if (info->linking) {
6572                         struct netlink_ext_ack *extack;
6573
6574                         extack = netdev_notifier_info_to_extack(&info->info);
6575                         err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
6576                 } else {
6577                         mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
6578                 }
6579                 break;
6580         }
6581
6582         return err;
6583 }
6584
6585 static struct mlxsw_sp_rif_subport *
6586 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6587 {
6588         return container_of(rif, struct mlxsw_sp_rif_subport, common);
6589 }
6590
6591 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
6592                                        const struct mlxsw_sp_rif_params *params)
6593 {
6594         struct mlxsw_sp_rif_subport *rif_subport;
6595
6596         rif_subport = mlxsw_sp_rif_subport_rif(rif);
6597         rif_subport->vid = params->vid;
6598         rif_subport->lag = params->lag;
6599         if (params->lag)
6600                 rif_subport->lag_id = params->lag_id;
6601         else
6602                 rif_subport->system_port = params->system_port;
6603 }
6604
6605 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
6606 {
6607         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6608         struct mlxsw_sp_rif_subport *rif_subport;
6609         char ritr_pl[MLXSW_REG_RITR_LEN];
6610
6611         rif_subport = mlxsw_sp_rif_subport_rif(rif);
6612         mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
6613                             rif->rif_index, rif->vr_id, rif->dev->mtu);
6614         mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6615         mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
6616                                   rif_subport->lag ? rif_subport->lag_id :
6617                                                      rif_subport->system_port,
6618                                   rif_subport->vid);
6619
6620         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6621 }
6622
6623 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
6624 {
6625         int err;
6626
6627         err = mlxsw_sp_rif_subport_op(rif, true);
6628         if (err)
6629                 return err;
6630
6631         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6632                                   mlxsw_sp_fid_index(rif->fid), true);
6633         if (err)
6634                 goto err_rif_fdb_op;
6635
6636         mlxsw_sp_fid_rif_set(rif->fid, rif);
6637         return 0;
6638
6639 err_rif_fdb_op:
6640         mlxsw_sp_rif_subport_op(rif, false);
6641         return err;
6642 }
6643
6644 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
6645 {
6646         struct mlxsw_sp_fid *fid = rif->fid;
6647
6648         mlxsw_sp_fid_rif_set(fid, NULL);
6649         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6650                             mlxsw_sp_fid_index(fid), false);
6651         mlxsw_sp_rif_subport_op(rif, false);
6652 }
6653
6654 static struct mlxsw_sp_fid *
6655 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif)
6656 {
6657         return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
6658 }
6659
6660 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
6661         .type                   = MLXSW_SP_RIF_TYPE_SUBPORT,
6662         .rif_size               = sizeof(struct mlxsw_sp_rif_subport),
6663         .setup                  = mlxsw_sp_rif_subport_setup,
6664         .configure              = mlxsw_sp_rif_subport_configure,
6665         .deconfigure            = mlxsw_sp_rif_subport_deconfigure,
6666         .fid_get                = mlxsw_sp_rif_subport_fid_get,
6667 };
6668
6669 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
6670                                     enum mlxsw_reg_ritr_if_type type,
6671                                     u16 vid_fid, bool enable)
6672 {
6673         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6674         char ritr_pl[MLXSW_REG_RITR_LEN];
6675
6676         mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
6677                             rif->dev->mtu);
6678         mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6679         mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
6680
6681         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6682 }
6683
6684 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
6685 {
6686         return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
6687 }
6688
6689 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
6690 {
6691         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6692         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6693         int err;
6694
6695         err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
6696         if (err)
6697                 return err;
6698
6699         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6700                                      mlxsw_sp_router_port(mlxsw_sp), true);
6701         if (err)
6702                 goto err_fid_mc_flood_set;
6703
6704         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6705                                      mlxsw_sp_router_port(mlxsw_sp), true);
6706         if (err)
6707                 goto err_fid_bc_flood_set;
6708
6709         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6710                                   mlxsw_sp_fid_index(rif->fid), true);
6711         if (err)
6712                 goto err_rif_fdb_op;
6713
6714         mlxsw_sp_fid_rif_set(rif->fid, rif);
6715         return 0;
6716
6717 err_rif_fdb_op:
6718         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6719                                mlxsw_sp_router_port(mlxsw_sp), false);
6720 err_fid_bc_flood_set:
6721         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6722                                mlxsw_sp_router_port(mlxsw_sp), false);
6723 err_fid_mc_flood_set:
6724         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6725         return err;
6726 }
6727
6728 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
6729 {
6730         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6731         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6732         struct mlxsw_sp_fid *fid = rif->fid;
6733
6734         mlxsw_sp_fid_rif_set(fid, NULL);
6735         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6736                             mlxsw_sp_fid_index(fid), false);
6737         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6738                                mlxsw_sp_router_port(mlxsw_sp), false);
6739         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6740                                mlxsw_sp_router_port(mlxsw_sp), false);
6741         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6742 }
6743
6744 static struct mlxsw_sp_fid *
6745 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif)
6746 {
6747         u16 vid = is_vlan_dev(rif->dev) ? vlan_dev_vlan_id(rif->dev) : 1;
6748
6749         return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
6750 }
6751
6752 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
6753         .type                   = MLXSW_SP_RIF_TYPE_VLAN,
6754         .rif_size               = sizeof(struct mlxsw_sp_rif),
6755         .configure              = mlxsw_sp_rif_vlan_configure,
6756         .deconfigure            = mlxsw_sp_rif_vlan_deconfigure,
6757         .fid_get                = mlxsw_sp_rif_vlan_fid_get,
6758 };
6759
6760 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
6761 {
6762         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6763         u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6764         int err;
6765
6766         err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
6767                                        true);
6768         if (err)
6769                 return err;
6770
6771         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6772                                      mlxsw_sp_router_port(mlxsw_sp), true);
6773         if (err)
6774                 goto err_fid_mc_flood_set;
6775
6776         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6777                                      mlxsw_sp_router_port(mlxsw_sp), true);
6778         if (err)
6779                 goto err_fid_bc_flood_set;
6780
6781         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6782                                   mlxsw_sp_fid_index(rif->fid), true);
6783         if (err)
6784                 goto err_rif_fdb_op;
6785
6786         mlxsw_sp_fid_rif_set(rif->fid, rif);
6787         return 0;
6788
6789 err_rif_fdb_op:
6790         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6791                                mlxsw_sp_router_port(mlxsw_sp), false);
6792 err_fid_bc_flood_set:
6793         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6794                                mlxsw_sp_router_port(mlxsw_sp), false);
6795 err_fid_mc_flood_set:
6796         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6797         return err;
6798 }
6799
6800 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
6801 {
6802         u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6803         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6804         struct mlxsw_sp_fid *fid = rif->fid;
6805
6806         mlxsw_sp_fid_rif_set(fid, NULL);
6807         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6808                             mlxsw_sp_fid_index(fid), false);
6809         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6810                                mlxsw_sp_router_port(mlxsw_sp), false);
6811         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6812                                mlxsw_sp_router_port(mlxsw_sp), false);
6813         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6814 }
6815
6816 static struct mlxsw_sp_fid *
6817 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif)
6818 {
6819         return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
6820 }
6821
6822 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
6823         .type                   = MLXSW_SP_RIF_TYPE_FID,
6824         .rif_size               = sizeof(struct mlxsw_sp_rif),
6825         .configure              = mlxsw_sp_rif_fid_configure,
6826         .deconfigure            = mlxsw_sp_rif_fid_deconfigure,
6827         .fid_get                = mlxsw_sp_rif_fid_fid_get,
6828 };
6829
6830 static struct mlxsw_sp_rif_ipip_lb *
6831 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
6832 {
6833         return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
6834 }
6835
6836 static void
6837 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
6838                            const struct mlxsw_sp_rif_params *params)
6839 {
6840         struct mlxsw_sp_rif_params_ipip_lb *params_lb;
6841         struct mlxsw_sp_rif_ipip_lb *rif_lb;
6842
6843         params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
6844                                  common);
6845         rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
6846         rif_lb->lb_config = params_lb->lb_config;
6847 }
6848
6849 static int
6850 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
6851                         struct mlxsw_sp_vr *ul_vr, bool enable)
6852 {
6853         struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
6854         struct mlxsw_sp_rif *rif = &lb_rif->common;
6855         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6856         char ritr_pl[MLXSW_REG_RITR_LEN];
6857         u32 saddr4;
6858
6859         switch (lb_cf.ul_protocol) {
6860         case MLXSW_SP_L3_PROTO_IPV4:
6861                 saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
6862                 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
6863                                     rif->rif_index, rif->vr_id, rif->dev->mtu);
6864                 mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
6865                             MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
6866                             ul_vr->id, saddr4, lb_cf.okey);
6867                 break;
6868
6869         case MLXSW_SP_L3_PROTO_IPV6:
6870                 return -EAFNOSUPPORT;
6871         }
6872
6873         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6874 }
6875
6876 static int
6877 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
6878 {
6879         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6880         u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
6881         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6882         struct mlxsw_sp_vr *ul_vr;
6883         int err;
6884
6885         ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
6886         if (IS_ERR(ul_vr))
6887                 return PTR_ERR(ul_vr);
6888
6889         err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
6890         if (err)
6891                 goto err_loopback_op;
6892
6893         lb_rif->ul_vr_id = ul_vr->id;
6894         ++ul_vr->rif_count;
6895         return 0;
6896
6897 err_loopback_op:
6898         mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
6899         return err;
6900 }
6901
6902 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
6903 {
6904         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6905         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6906         struct mlxsw_sp_vr *ul_vr;
6907
6908         ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
6909         mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
6910
6911         --ul_vr->rif_count;
6912         mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
6913 }
6914
6915 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
6916         .type                   = MLXSW_SP_RIF_TYPE_IPIP_LB,
6917         .rif_size               = sizeof(struct mlxsw_sp_rif_ipip_lb),
6918         .setup                  = mlxsw_sp_rif_ipip_lb_setup,
6919         .configure              = mlxsw_sp_rif_ipip_lb_configure,
6920         .deconfigure            = mlxsw_sp_rif_ipip_lb_deconfigure,
6921 };
6922
6923 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
6924         [MLXSW_SP_RIF_TYPE_SUBPORT]     = &mlxsw_sp_rif_subport_ops,
6925         [MLXSW_SP_RIF_TYPE_VLAN]        = &mlxsw_sp_rif_vlan_ops,
6926         [MLXSW_SP_RIF_TYPE_FID]         = &mlxsw_sp_rif_fid_ops,
6927         [MLXSW_SP_RIF_TYPE_IPIP_LB]     = &mlxsw_sp_rif_ipip_lb_ops,
6928 };
6929
6930 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
6931 {
6932         u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
6933
6934         mlxsw_sp->router->rifs = kcalloc(max_rifs,
6935                                          sizeof(struct mlxsw_sp_rif *),
6936                                          GFP_KERNEL);
6937         if (!mlxsw_sp->router->rifs)
6938                 return -ENOMEM;
6939
6940         mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
6941
6942         return 0;
6943 }
6944
6945 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
6946 {
6947         int i;
6948
6949         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6950                 WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
6951
6952         kfree(mlxsw_sp->router->rifs);
6953 }
6954
6955 static int
6956 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
6957 {
6958         char tigcr_pl[MLXSW_REG_TIGCR_LEN];
6959
6960         mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
6961         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
6962 }
6963
6964 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
6965 {
6966         mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
6967         INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
6968         return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
6969 }
6970
6971 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
6972 {
6973         WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
6974 }
6975
6976 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
6977 {
6978         struct mlxsw_sp_router *router;
6979
6980         /* Flush pending FIB notifications and then flush the device's
6981          * table before requesting another dump. The FIB notification
6982          * block is unregistered, so no need to take RTNL.
6983          */
6984         mlxsw_core_flush_owq();
6985         router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6986         mlxsw_sp_router_fib_flush(router->mlxsw_sp);
6987 }
6988
6989 #ifdef CONFIG_IP_ROUTE_MULTIPATH
6990 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
6991 {
6992         mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
6993 }
6994
6995 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
6996 {
6997         mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
6998 }
6999
7000 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
7001 {
7002         bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
7003
7004         mlxsw_sp_mp_hash_header_set(recr2_pl,
7005                                     MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7006         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7007         mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7008         mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7009         if (only_l3)
7010                 return;
7011         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7012         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7013         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7014         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7015 }
7016
7017 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
7018 {
7019         mlxsw_sp_mp_hash_header_set(recr2_pl,
7020                                     MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7021         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7022         mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7023         mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7024         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7025         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7026 }
7027
7028 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7029 {
7030         char recr2_pl[MLXSW_REG_RECR2_LEN];
7031         u32 seed;
7032
7033         get_random_bytes(&seed, sizeof(seed));
7034         mlxsw_reg_recr2_pack(recr2_pl, seed);
7035         mlxsw_sp_mp4_hash_init(recr2_pl);
7036         mlxsw_sp_mp6_hash_init(recr2_pl);
7037
7038         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7039 }
7040 #else
7041 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7042 {
7043         return 0;
7044 }
7045 #endif
7046
7047 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
7048 {
7049         char rdpm_pl[MLXSW_REG_RDPM_LEN];
7050         unsigned int i;
7051
7052         MLXSW_REG_ZERO(rdpm, rdpm_pl);
7053
7054         /* HW is determining switch priority based on DSCP-bits, but the
7055          * kernel is still doing that based on the ToS. Since there's a
7056          * mismatch in bits we need to make sure to translate the right
7057          * value ToS would observe, skipping the 2 least-significant ECN bits.
7058          */
7059         for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
7060                 mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
7061
7062         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
7063 }
7064
7065 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7066 {
7067         char rgcr_pl[MLXSW_REG_RGCR_LEN];
7068         u64 max_rifs;
7069         int err;
7070
7071         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7072                 return -EIO;
7073         max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7074
7075         mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7076         mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7077         mlxsw_reg_rgcr_usp_set(rgcr_pl, true);
7078         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7079         if (err)
7080                 return err;
7081         return 0;
7082 }
7083
7084 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7085 {
7086         char rgcr_pl[MLXSW_REG_RGCR_LEN];
7087
7088         mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7089         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7090 }
7091
7092 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7093 {
7094         struct mlxsw_sp_router *router;
7095         int err;
7096
7097         router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7098         if (!router)
7099                 return -ENOMEM;
7100         mlxsw_sp->router = router;
7101         router->mlxsw_sp = mlxsw_sp;
7102
7103         INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7104         err = __mlxsw_sp_router_init(mlxsw_sp);
7105         if (err)
7106                 goto err_router_init;
7107
7108         err = mlxsw_sp_rifs_init(mlxsw_sp);
7109         if (err)
7110                 goto err_rifs_init;
7111
7112         err = mlxsw_sp_ipips_init(mlxsw_sp);
7113         if (err)
7114                 goto err_ipips_init;
7115
7116         err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7117                               &mlxsw_sp_nexthop_ht_params);
7118         if (err)
7119                 goto err_nexthop_ht_init;
7120
7121         err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7122                               &mlxsw_sp_nexthop_group_ht_params);
7123         if (err)
7124                 goto err_nexthop_group_ht_init;
7125
7126         INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7127         err = mlxsw_sp_lpm_init(mlxsw_sp);
7128         if (err)
7129                 goto err_lpm_init;
7130
7131         err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7132         if (err)
7133                 goto err_mr_init;
7134
7135         err = mlxsw_sp_vrs_init(mlxsw_sp);
7136         if (err)
7137                 goto err_vrs_init;
7138
7139         err = mlxsw_sp_neigh_init(mlxsw_sp);
7140         if (err)
7141                 goto err_neigh_init;
7142
7143         mlxsw_sp->router->netevent_nb.notifier_call =
7144                 mlxsw_sp_router_netevent_event;
7145         err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7146         if (err)
7147                 goto err_register_netevent_notifier;
7148
7149         err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7150         if (err)
7151                 goto err_mp_hash_init;
7152
7153         err = mlxsw_sp_dscp_init(mlxsw_sp);
7154         if (err)
7155                 goto err_dscp_init;
7156
7157         mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7158         err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7159                                     mlxsw_sp_router_fib_dump_flush);
7160         if (err)
7161                 goto err_register_fib_notifier;
7162
7163         return 0;
7164
7165 err_register_fib_notifier:
7166 err_dscp_init:
7167 err_mp_hash_init:
7168         unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7169 err_register_netevent_notifier:
7170         mlxsw_sp_neigh_fini(mlxsw_sp);
7171 err_neigh_init:
7172         mlxsw_sp_vrs_fini(mlxsw_sp);
7173 err_vrs_init:
7174         mlxsw_sp_mr_fini(mlxsw_sp);
7175 err_mr_init:
7176         mlxsw_sp_lpm_fini(mlxsw_sp);
7177 err_lpm_init:
7178         rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7179 err_nexthop_group_ht_init:
7180         rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7181 err_nexthop_ht_init:
7182         mlxsw_sp_ipips_fini(mlxsw_sp);
7183 err_ipips_init:
7184         mlxsw_sp_rifs_fini(mlxsw_sp);
7185 err_rifs_init:
7186         __mlxsw_sp_router_fini(mlxsw_sp);
7187 err_router_init:
7188         kfree(mlxsw_sp->router);
7189         return err;
7190 }
7191
7192 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7193 {
7194         unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
7195         unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7196         mlxsw_sp_neigh_fini(mlxsw_sp);
7197         mlxsw_sp_vrs_fini(mlxsw_sp);
7198         mlxsw_sp_mr_fini(mlxsw_sp);
7199         mlxsw_sp_lpm_fini(mlxsw_sp);
7200         rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7201         rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7202         mlxsw_sp_ipips_fini(mlxsw_sp);
7203         mlxsw_sp_rifs_fini(mlxsw_sp);
7204         __mlxsw_sp_router_fini(mlxsw_sp);
7205         kfree(mlxsw_sp->router);
7206 }