1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
4 #include <linux/netdevice.h>
5 #include <net/nexthop.h>
12 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
14 if (!mlx5_lag_is_ready(ldev))
17 return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
18 ldev->pf[MLX5_LAG_P2].dev);
21 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
23 return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
26 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
28 struct mlx5_lag *ldev;
31 ldev = mlx5_lag_dev_get(dev);
32 res = ldev && __mlx5_lag_is_multipath(ldev);
38 * mlx5_lag_set_port_affinity
42 * 0 - set normal affinity.
43 * 1 - set affinity to port 1.
44 * 2 - set affinity to port 2.
47 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
48 enum mlx5_lag_port_affinity port)
50 struct lag_tracker tracker;
52 if (!__mlx5_lag_is_multipath(ldev))
56 case MLX5_LAG_NORMAL_AFFINITY:
57 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
58 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
59 tracker.netdev_state[MLX5_LAG_P1].link_up = true;
60 tracker.netdev_state[MLX5_LAG_P2].link_up = true;
62 case MLX5_LAG_P1_AFFINITY:
63 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
64 tracker.netdev_state[MLX5_LAG_P1].link_up = true;
65 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
66 tracker.netdev_state[MLX5_LAG_P2].link_up = false;
68 case MLX5_LAG_P2_AFFINITY:
69 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
70 tracker.netdev_state[MLX5_LAG_P1].link_up = false;
71 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
72 tracker.netdev_state[MLX5_LAG_P2].link_up = true;
75 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
76 "Invalid affinity port %d", port);
80 if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
81 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
82 MLX5_DEV_EVENT_PORT_AFFINITY,
85 if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
86 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
87 MLX5_DEV_EVENT_PORT_AFFINITY,
90 mlx5_modify_lag(ldev, &tracker);
93 static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
95 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
97 flush_workqueue(mp->wq);
100 struct mlx5_fib_event_work {
101 struct work_struct work;
102 struct mlx5_lag *ldev;
105 struct fib_entry_notifier_info fen_info;
106 struct fib_nh_notifier_info fnh_info;
110 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
114 struct lag_mp *mp = &ldev->lag_mp;
115 struct fib_nh *fib_nh0, *fib_nh1;
118 /* Handle delete event */
119 if (event == FIB_EVENT_ENTRY_DEL) {
126 /* Handle add/replace event */
127 nhs = fib_info_num_path(fi);
129 if (__mlx5_lag_is_active(ldev)) {
130 struct fib_nh *nh = fib_info_nh(fi, 0);
131 struct net_device *nh_dev = nh->fib_nh_dev;
132 int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
135 i = MLX5_LAG_NORMAL_AFFINITY;
139 mlx5_lag_set_port_affinity(ldev, i);
147 /* Verify next hops are ports of the same hca */
148 fib_nh0 = fib_info_nh(fi, 0);
149 fib_nh1 = fib_info_nh(fi, 1);
150 if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev &&
151 fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) &&
152 !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev &&
153 fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) {
154 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
155 "Multipath offload require two ports of the same HCA\n");
159 /* First time we see multipath route */
160 if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
161 struct lag_tracker tracker;
163 tracker = ldev->tracker;
164 mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
167 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
171 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
173 struct fib_nh *fib_nh,
176 struct lag_mp *mp = &ldev->lag_mp;
178 /* Check the nh event is related to the route */
179 if (!mp->mfi || mp->mfi != fi)
182 /* nh added/removed */
183 if (event == FIB_EVENT_NH_DEL) {
184 int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
187 i = (i + 1) % 2 + 1; /* peer port */
188 mlx5_lag_set_port_affinity(ldev, i);
190 } else if (event == FIB_EVENT_NH_ADD &&
191 fib_info_num_path(fi) == 2) {
192 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
196 static void mlx5_lag_fib_update(struct work_struct *work)
198 struct mlx5_fib_event_work *fib_work =
199 container_of(work, struct mlx5_fib_event_work, work);
200 struct mlx5_lag *ldev = fib_work->ldev;
201 struct fib_nh *fib_nh;
203 /* Protect internal structures from changes */
205 switch (fib_work->event) {
206 case FIB_EVENT_ENTRY_REPLACE:
207 case FIB_EVENT_ENTRY_DEL:
208 mlx5_lag_fib_route_event(ldev, fib_work->event,
209 fib_work->fen_info.fi);
210 fib_info_put(fib_work->fen_info.fi);
212 case FIB_EVENT_NH_ADD:
213 case FIB_EVENT_NH_DEL:
214 fib_nh = fib_work->fnh_info.fib_nh;
215 mlx5_lag_fib_nexthop_event(ldev,
217 fib_work->fnh_info.fib_nh,
219 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
227 static struct mlx5_fib_event_work *
228 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
230 struct mlx5_fib_event_work *fib_work;
232 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
233 if (WARN_ON(!fib_work))
236 INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
237 fib_work->ldev = ldev;
238 fib_work->event = event;
243 static int mlx5_lag_fib_event(struct notifier_block *nb,
247 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
248 struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
249 struct fib_notifier_info *info = ptr;
250 struct mlx5_fib_event_work *fib_work;
251 struct fib_entry_notifier_info *fen_info;
252 struct fib_nh_notifier_info *fnh_info;
253 struct net_device *fib_dev;
256 if (info->family != AF_INET)
259 if (!mlx5_lag_multipath_check_prereq(ldev))
263 case FIB_EVENT_ENTRY_REPLACE:
264 case FIB_EVENT_ENTRY_DEL:
265 fen_info = container_of(info, struct fib_entry_notifier_info,
269 NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
270 return notifier_from_errno(-EINVAL);
272 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
273 if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev &&
274 fib_dev != ldev->pf[MLX5_LAG_P2].netdev) {
277 fib_work = mlx5_lag_init_fib_work(ldev, event);
280 fib_work->fen_info = *fen_info;
281 /* Take reference on fib_info to prevent it from being
282 * freed while work is queued. Release it afterwards.
284 fib_info_hold(fib_work->fen_info.fi);
286 case FIB_EVENT_NH_ADD:
287 case FIB_EVENT_NH_DEL:
288 fnh_info = container_of(info, struct fib_nh_notifier_info,
290 fib_work = mlx5_lag_init_fib_work(ldev, event);
293 fib_work->fnh_info = *fnh_info;
294 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
300 queue_work(mp->wq, &fib_work->work);
305 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
307 struct lag_mp *mp = &ldev->lag_mp;
310 if (mp->fib_nb.notifier_call)
313 mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
317 mp->fib_nb.notifier_call = mlx5_lag_fib_event;
318 err = register_fib_notifier(&init_net, &mp->fib_nb,
319 mlx5_lag_fib_event_flush, NULL);
321 destroy_workqueue(mp->wq);
322 mp->fib_nb.notifier_call = NULL;
328 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
330 struct lag_mp *mp = &ldev->lag_mp;
332 if (!mp->fib_nb.notifier_call)
335 unregister_fib_notifier(&init_net, &mp->fib_nb);
336 destroy_workqueue(mp->wq);
337 mp->fib_nb.notifier_call = NULL;