2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/netdevice.h>
34 #include <linux/mlx5/driver.h>
35 #include <linux/mlx5/eswitch.h>
36 #include <linux/mlx5/vport.h>
37 #include "lib/devcom.h"
38 #include "mlx5_core.h"
43 /* General purpose, use for short periods of time.
44 * Beware of lock dependencies (preferably, no locks should be acquired
47 static DEFINE_SPINLOCK(lag_lock);
49 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
50 u8 remap_port2, bool shared_fdb)
52 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
53 void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
55 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
57 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
58 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
59 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
61 return mlx5_cmd_exec_in(dev, create_lag, in);
64 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
67 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
68 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
70 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
71 MLX5_SET(modify_lag_in, in, field_select, 0x1);
73 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
74 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
76 return mlx5_cmd_exec_in(dev, modify_lag, in);
79 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
81 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
83 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
85 return mlx5_cmd_exec_in(dev, create_vport_lag, in);
87 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
89 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
91 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
93 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
95 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
97 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
99 static int mlx5_lag_netdev_event(struct notifier_block *this,
100 unsigned long event, void *ptr);
101 static void mlx5_do_bond_work(struct work_struct *work);
103 static void mlx5_ldev_free(struct kref *ref)
105 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
107 if (ldev->nb.notifier_call)
108 unregister_netdevice_notifier_net(&init_net, &ldev->nb);
109 mlx5_lag_mp_cleanup(ldev);
110 cancel_delayed_work_sync(&ldev->bond_work);
111 destroy_workqueue(ldev->wq);
115 static void mlx5_ldev_put(struct mlx5_lag *ldev)
117 kref_put(&ldev->ref, mlx5_ldev_free);
120 static void mlx5_ldev_get(struct mlx5_lag *ldev)
122 kref_get(&ldev->ref);
125 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
127 struct mlx5_lag *ldev;
130 ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
134 ldev->wq = create_singlethread_workqueue("mlx5_lag");
140 kref_init(&ldev->ref);
141 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
143 ldev->nb.notifier_call = mlx5_lag_netdev_event;
144 if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
145 ldev->nb.notifier_call = NULL;
146 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
149 err = mlx5_lag_mp_init(ldev);
151 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
157 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
158 struct net_device *ndev)
162 for (i = 0; i < MLX5_MAX_PORTS; i++)
163 if (ldev->pf[i].netdev == ndev)
169 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
171 return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
174 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
176 return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
179 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
180 u8 *port1, u8 *port2)
185 p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled &&
186 tracker->netdev_state[MLX5_LAG_P1].link_up;
188 p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled &&
189 tracker->netdev_state[MLX5_LAG_P2].link_up;
193 if ((!p1en && !p2en) || (p1en && p2en))
202 void mlx5_modify_lag(struct mlx5_lag *ldev,
203 struct lag_tracker *tracker)
205 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
206 u8 v2p_port1, v2p_port2;
209 mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
212 if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] ||
213 v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) {
214 ldev->v2p_map[MLX5_LAG_P1] = v2p_port1;
215 ldev->v2p_map[MLX5_LAG_P2] = v2p_port2;
217 mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
218 ldev->v2p_map[MLX5_LAG_P1],
219 ldev->v2p_map[MLX5_LAG_P2]);
221 err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
224 "Failed to modify LAG (%d)\n",
229 static int mlx5_create_lag(struct mlx5_lag *ldev,
230 struct lag_tracker *tracker,
233 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
234 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
235 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
238 mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
239 &ldev->v2p_map[MLX5_LAG_P2]);
241 mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d",
242 ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2],
245 err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
246 ldev->v2p_map[MLX5_LAG_P2], shared_fdb);
249 "Failed to create LAG (%d)\n",
255 err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
258 mlx5_core_err(dev0, "Can't enable single FDB mode\n");
260 mlx5_core_info(dev0, "Operation mode is single FDB\n");
264 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
265 if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
267 "Failed to deactivate RoCE LAG; driver restart required\n");
273 int mlx5_activate_lag(struct mlx5_lag *ldev,
274 struct lag_tracker *tracker,
278 bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
279 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
282 err = mlx5_create_lag(ldev, tracker, shared_fdb);
286 "Failed to activate RoCE LAG\n");
289 "Failed to activate VF LAG\n"
290 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
295 ldev->flags |= flags;
296 ldev->shared_fdb = shared_fdb;
300 static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
302 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
303 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
304 bool roce_lag = __mlx5_lag_is_roce(ldev);
307 ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
309 if (ldev->shared_fdb) {
310 mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch,
311 ldev->pf[MLX5_LAG_P2].dev->priv.eswitch);
312 ldev->shared_fdb = false;
315 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
316 err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
320 "Failed to deactivate RoCE LAG; driver restart required\n");
323 "Failed to deactivate VF LAG; driver restart required\n"
324 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
331 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
333 if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
336 #ifdef CONFIG_MLX5_ESWITCH
337 return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev,
338 ldev->pf[MLX5_LAG_P2].dev);
340 return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) &&
341 !mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev));
345 static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
349 for (i = 0; i < MLX5_MAX_PORTS; i++) {
350 if (!ldev->pf[i].dev)
353 if (ldev->pf[i].dev->priv.flags &
354 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
357 ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
358 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
362 static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
366 for (i = 0; i < MLX5_MAX_PORTS; i++) {
367 if (!ldev->pf[i].dev)
370 if (ldev->pf[i].dev->priv.flags &
371 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
374 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
375 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
379 static void mlx5_disable_lag(struct mlx5_lag *ldev)
381 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
382 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
383 bool shared_fdb = ldev->shared_fdb;
387 roce_lag = __mlx5_lag_is_roce(ldev);
390 mlx5_lag_remove_devices(ldev);
391 } else if (roce_lag) {
392 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
393 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
394 mlx5_rescan_drivers_locked(dev0);
396 mlx5_nic_vport_disable_roce(dev1);
399 err = mlx5_deactivate_lag(ldev);
403 if (shared_fdb || roce_lag)
404 mlx5_lag_add_devices(ldev);
407 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
408 mlx5_eswitch_reload_reps(dev0->priv.eswitch);
409 if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
410 mlx5_eswitch_reload_reps(dev1->priv.eswitch);
414 static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
416 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
417 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
419 if (is_mdev_switchdev_mode(dev0) &&
420 is_mdev_switchdev_mode(dev1) &&
421 mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
422 mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
423 mlx5_devcom_is_paired(dev0->priv.devcom,
424 MLX5_DEVCOM_ESW_OFFLOADS) &&
425 MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
426 MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
427 MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
433 static void mlx5_do_bond(struct mlx5_lag *ldev)
435 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
436 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
437 struct lag_tracker tracker;
438 bool do_bond, roce_lag;
441 if (!mlx5_lag_is_ready(ldev)) {
444 tracker = ldev->tracker;
446 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
449 if (do_bond && !__mlx5_lag_is_active(ldev)) {
450 bool shared_fdb = mlx5_shared_fdb_supported(ldev);
452 roce_lag = !mlx5_sriov_is_enabled(dev0) &&
453 !mlx5_sriov_is_enabled(dev1);
455 #ifdef CONFIG_MLX5_ESWITCH
456 roce_lag = roce_lag &&
457 dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
458 dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
461 if (shared_fdb || roce_lag)
462 mlx5_lag_remove_devices(ldev);
464 err = mlx5_activate_lag(ldev, &tracker,
465 roce_lag ? MLX5_LAG_FLAG_ROCE :
469 if (shared_fdb || roce_lag)
470 mlx5_lag_add_devices(ldev);
473 } else if (roce_lag) {
474 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
475 mlx5_rescan_drivers_locked(dev0);
476 mlx5_nic_vport_enable_roce(dev1);
477 } else if (shared_fdb) {
478 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
479 mlx5_rescan_drivers_locked(dev0);
481 err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
483 err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
486 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
487 mlx5_rescan_drivers_locked(dev0);
488 mlx5_deactivate_lag(ldev);
489 mlx5_lag_add_devices(ldev);
490 mlx5_eswitch_reload_reps(dev0->priv.eswitch);
491 mlx5_eswitch_reload_reps(dev1->priv.eswitch);
492 mlx5_core_err(dev0, "Failed to enable lag\n");
496 } else if (do_bond && __mlx5_lag_is_active(ldev)) {
497 mlx5_modify_lag(ldev, &tracker);
498 } else if (!do_bond && __mlx5_lag_is_active(ldev)) {
499 mlx5_disable_lag(ldev);
503 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
505 queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
508 static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0,
509 struct mlx5_core_dev *dev1)
512 mlx5_esw_lock(dev0->priv.eswitch);
514 mlx5_esw_lock(dev1->priv.eswitch);
517 static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0,
518 struct mlx5_core_dev *dev1)
521 mlx5_esw_unlock(dev1->priv.eswitch);
523 mlx5_esw_unlock(dev0->priv.eswitch);
526 static void mlx5_do_bond_work(struct work_struct *work)
528 struct delayed_work *delayed_work = to_delayed_work(work);
529 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
531 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
532 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
535 status = mlx5_dev_list_trylock();
537 mlx5_queue_bond_work(ldev, HZ);
541 if (ldev->mode_changes_in_progress) {
542 mlx5_dev_list_unlock();
543 mlx5_queue_bond_work(ldev, HZ);
547 mlx5_lag_lock_eswitches(dev0, dev1);
549 mlx5_lag_unlock_eswitches(dev0, dev1);
550 mlx5_dev_list_unlock();
553 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
554 struct lag_tracker *tracker,
555 struct net_device *ndev,
556 struct netdev_notifier_changeupper_info *info)
558 struct net_device *upper = info->upper_dev, *ndev_tmp;
559 struct netdev_lag_upper_info *lag_upper_info = NULL;
560 bool is_bonded, is_in_lag, mode_supported;
565 if (!netif_is_lag_master(upper))
569 lag_upper_info = info->upper_info;
571 /* The event may still be of interest if the slave does not belong to
572 * us, but is enslaved to a master which has one or more of our netdevs
573 * as slaves (e.g., if a new slave is added to a master that bonds two
574 * of our netdevs, we should unbond).
577 for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
578 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
580 bond_status |= (1 << idx);
586 /* None of this lagdev's netdevs are slaves of this master. */
587 if (!(bond_status & 0x3))
591 tracker->tx_type = lag_upper_info->tx_type;
593 /* Determine bonding status:
594 * A device is considered bonded if both its physical ports are slaves
595 * of the same lag master, and only them.
597 is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
599 if (!mlx5_lag_is_ready(ldev) && is_in_lag) {
600 NL_SET_ERR_MSG_MOD(info->info.extack,
601 "Can't activate LAG offload, PF is configured with more than 64 VFs");
605 /* Lag mode must be activebackup or hash. */
606 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
607 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
609 if (is_in_lag && !mode_supported)
610 NL_SET_ERR_MSG_MOD(info->info.extack,
611 "Can't activate LAG offload, TX type isn't supported");
613 is_bonded = is_in_lag && mode_supported;
614 if (tracker->is_bonded != is_bonded) {
615 tracker->is_bonded = is_bonded;
622 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
623 struct lag_tracker *tracker,
624 struct net_device *ndev,
625 struct netdev_notifier_changelowerstate_info *info)
627 struct netdev_lag_lower_state_info *lag_lower_info;
630 if (!netif_is_lag_port(ndev))
633 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
637 /* This information is used to determine virtual to physical
640 lag_lower_info = info->lower_state_info;
644 tracker->netdev_state[idx] = *lag_lower_info;
649 static int mlx5_lag_netdev_event(struct notifier_block *this,
650 unsigned long event, void *ptr)
652 struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
653 struct lag_tracker tracker;
654 struct mlx5_lag *ldev;
657 if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
660 ldev = container_of(this, struct mlx5_lag, nb);
662 if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE)
665 tracker = ldev->tracker;
668 case NETDEV_CHANGEUPPER:
669 changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
672 case NETDEV_CHANGELOWERSTATE:
673 changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
678 ldev->tracker = tracker;
681 mlx5_queue_bond_work(ldev, 0);
686 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
687 struct mlx5_core_dev *dev,
688 struct net_device *netdev)
690 unsigned int fn = PCI_FUNC(dev->pdev->devfn);
692 if (fn >= MLX5_MAX_PORTS)
695 spin_lock(&lag_lock);
696 ldev->pf[fn].netdev = netdev;
697 ldev->tracker.netdev_state[fn].link_up = 0;
698 ldev->tracker.netdev_state[fn].tx_enabled = 0;
699 spin_unlock(&lag_lock);
702 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
703 struct net_device *netdev)
707 spin_lock(&lag_lock);
708 for (i = 0; i < MLX5_MAX_PORTS; i++) {
709 if (ldev->pf[i].netdev == netdev) {
710 ldev->pf[i].netdev = NULL;
714 spin_unlock(&lag_lock);
717 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
718 struct mlx5_core_dev *dev)
720 unsigned int fn = PCI_FUNC(dev->pdev->devfn);
722 if (fn >= MLX5_MAX_PORTS)
725 ldev->pf[fn].dev = dev;
726 dev->priv.lag = ldev;
729 /* Must be called with intf_mutex held */
730 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
731 struct mlx5_core_dev *dev)
735 for (i = 0; i < MLX5_MAX_PORTS; i++)
736 if (ldev->pf[i].dev == dev)
739 if (i == MLX5_MAX_PORTS)
742 ldev->pf[i].dev = NULL;
743 dev->priv.lag = NULL;
746 /* Must be called with intf_mutex held */
747 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
749 struct mlx5_lag *ldev = NULL;
750 struct mlx5_core_dev *tmp_dev;
752 if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
753 !MLX5_CAP_GEN(dev, lag_master) ||
754 MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
757 tmp_dev = mlx5_get_next_phys_dev(dev);
759 ldev = tmp_dev->priv.lag;
762 ldev = mlx5_lag_dev_alloc(dev);
764 mlx5_core_err(dev, "Failed to alloc lag dev\n");
768 if (ldev->mode_changes_in_progress)
773 mlx5_ldev_add_mdev(ldev, dev);
778 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
780 struct mlx5_lag *ldev;
782 ldev = mlx5_lag_dev(dev);
787 mlx5_dev_list_lock();
788 if (ldev->mode_changes_in_progress) {
789 mlx5_dev_list_unlock();
793 mlx5_ldev_remove_mdev(ldev, dev);
794 mlx5_dev_list_unlock();
798 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
803 mlx5_dev_list_lock();
804 err = __mlx5_lag_dev_add_mdev(dev);
806 mlx5_dev_list_unlock();
810 mlx5_dev_list_unlock();
813 /* Must be called with intf_mutex held */
814 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
815 struct net_device *netdev)
817 struct mlx5_lag *ldev;
819 ldev = mlx5_lag_dev(dev);
823 mlx5_ldev_remove_netdev(ldev, netdev);
824 ldev->flags &= ~MLX5_LAG_FLAG_READY;
826 if (__mlx5_lag_is_active(ldev))
827 mlx5_queue_bond_work(ldev, 0);
830 /* Must be called with intf_mutex held */
831 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
832 struct net_device *netdev)
834 struct mlx5_lag *ldev;
837 ldev = mlx5_lag_dev(dev);
841 mlx5_ldev_add_netdev(ldev, dev, netdev);
843 for (i = 0; i < MLX5_MAX_PORTS; i++)
844 if (!ldev->pf[i].dev)
847 if (i >= MLX5_MAX_PORTS)
848 ldev->flags |= MLX5_LAG_FLAG_READY;
849 mlx5_queue_bond_work(ldev, 0);
852 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
854 struct mlx5_lag *ldev;
857 spin_lock(&lag_lock);
858 ldev = mlx5_lag_dev(dev);
859 res = ldev && __mlx5_lag_is_roce(ldev);
860 spin_unlock(&lag_lock);
864 EXPORT_SYMBOL(mlx5_lag_is_roce);
866 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
868 struct mlx5_lag *ldev;
871 spin_lock(&lag_lock);
872 ldev = mlx5_lag_dev(dev);
873 res = ldev && __mlx5_lag_is_active(ldev);
874 spin_unlock(&lag_lock);
878 EXPORT_SYMBOL(mlx5_lag_is_active);
880 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
882 struct mlx5_lag *ldev;
885 spin_lock(&lag_lock);
886 ldev = mlx5_lag_dev(dev);
887 res = ldev && __mlx5_lag_is_active(ldev) &&
888 dev == ldev->pf[MLX5_LAG_P1].dev;
889 spin_unlock(&lag_lock);
893 EXPORT_SYMBOL(mlx5_lag_is_master);
895 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
897 struct mlx5_lag *ldev;
900 spin_lock(&lag_lock);
901 ldev = mlx5_lag_dev(dev);
902 res = ldev && __mlx5_lag_is_sriov(ldev);
903 spin_unlock(&lag_lock);
907 EXPORT_SYMBOL(mlx5_lag_is_sriov);
909 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
911 struct mlx5_lag *ldev;
914 spin_lock(&lag_lock);
915 ldev = mlx5_lag_dev(dev);
916 res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb;
917 spin_unlock(&lag_lock);
921 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
923 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
925 struct mlx5_core_dev *dev0;
926 struct mlx5_core_dev *dev1;
927 struct mlx5_lag *ldev;
929 mlx5_dev_list_lock();
931 ldev = mlx5_lag_dev(dev);
932 dev0 = ldev->pf[MLX5_LAG_P1].dev;
933 dev1 = ldev->pf[MLX5_LAG_P2].dev;
935 ldev->mode_changes_in_progress++;
936 if (__mlx5_lag_is_active(ldev)) {
937 mlx5_lag_lock_eswitches(dev0, dev1);
938 mlx5_disable_lag(ldev);
939 mlx5_lag_unlock_eswitches(dev0, dev1);
941 mlx5_dev_list_unlock();
944 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
946 struct mlx5_lag *ldev;
948 mlx5_dev_list_lock();
949 ldev = mlx5_lag_dev(dev);
950 ldev->mode_changes_in_progress--;
951 mlx5_dev_list_unlock();
952 mlx5_queue_bond_work(ldev, 0);
955 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
957 struct net_device *ndev = NULL;
958 struct mlx5_lag *ldev;
960 spin_lock(&lag_lock);
961 ldev = mlx5_lag_dev(dev);
963 if (!(ldev && __mlx5_lag_is_roce(ldev)))
966 if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
967 ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ?
968 ldev->pf[MLX5_LAG_P1].netdev :
969 ldev->pf[MLX5_LAG_P2].netdev;
971 ndev = ldev->pf[MLX5_LAG_P1].netdev;
977 spin_unlock(&lag_lock);
981 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
983 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
984 struct net_device *slave)
986 struct mlx5_lag *ldev;
989 spin_lock(&lag_lock);
990 ldev = mlx5_lag_dev(dev);
991 if (!(ldev && __mlx5_lag_is_roce(ldev)))
994 if (ldev->pf[MLX5_LAG_P1].netdev == slave)
999 port = ldev->v2p_map[port];
1002 spin_unlock(&lag_lock);
1005 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1007 struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
1009 struct mlx5_core_dev *peer_dev = NULL;
1010 struct mlx5_lag *ldev;
1012 spin_lock(&lag_lock);
1013 ldev = mlx5_lag_dev(dev);
1017 peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
1018 ldev->pf[MLX5_LAG_P2].dev :
1019 ldev->pf[MLX5_LAG_P1].dev;
1022 spin_unlock(&lag_lock);
1025 EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
1027 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1032 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1033 struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
1034 struct mlx5_lag *ldev;
1039 out = kvzalloc(outlen, GFP_KERNEL);
1043 memset(values, 0, sizeof(*values) * num_counters);
1045 spin_lock(&lag_lock);
1046 ldev = mlx5_lag_dev(dev);
1047 if (ldev && __mlx5_lag_is_active(ldev)) {
1048 num_ports = MLX5_MAX_PORTS;
1049 mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
1050 mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
1053 mdev[MLX5_LAG_P1] = dev;
1055 spin_unlock(&lag_lock);
1057 for (i = 0; i < num_ports; ++i) {
1058 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
1060 MLX5_SET(query_cong_statistics_in, in, opcode,
1061 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
1062 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
1067 for (j = 0; j < num_counters; ++j)
1068 values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
1075 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);