1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2 /* Copyright (C) 2018 Netronome Systems, Inc. */
6 /* LAG group config flags. */
7 #define NFP_FL_LAG_LAST BIT(1)
8 #define NFP_FL_LAG_FIRST BIT(2)
9 #define NFP_FL_LAG_DATA BIT(3)
10 #define NFP_FL_LAG_XON BIT(4)
11 #define NFP_FL_LAG_SYNC BIT(5)
12 #define NFP_FL_LAG_SWITCH BIT(6)
13 #define NFP_FL_LAG_RESET BIT(7)
15 /* LAG port state flags. */
16 #define NFP_PORT_LAG_LINK_UP BIT(0)
17 #define NFP_PORT_LAG_TX_ENABLED BIT(1)
18 #define NFP_PORT_LAG_CHANGED BIT(2)
20 enum nfp_fl_lag_batch {
21 NFP_FL_LAG_BATCH_FIRST,
22 NFP_FL_LAG_BATCH_MEMBER,
23 NFP_FL_LAG_BATCH_FINISHED
27 * struct nfp_flower_cmsg_lag_config - control message payload for LAG config
28 * @ctrl_flags: Configuration flags
29 * @reserved: Reserved for future use
30 * @ttl: Time to live of packet - host always sets to 0xff
31 * @pkt_number: Config message packet number - increment for each message
32 * @batch_ver: Batch version of messages - increment for each batch of messages
33 * @group_id: Group ID applicable
34 * @group_inst: Group instance number - increment when group is reused
35 * @members: Array of 32-bit words listing all active group members
37 struct nfp_flower_cmsg_lag_config {
49 * struct nfp_fl_lag_group - list entry for each LAG group
50 * @group_id: Assigned group ID for host/kernel sync
51 * @group_inst: Group instance in case of ID reuse
53 * @master_ndev: Group master Netdev
54 * @dirty: Marked if the group needs synced to HW
55 * @offloaded: Marked if the group is currently offloaded to NIC
56 * @to_remove: Marked if the group should be removed from NIC
57 * @to_destroy: Marked if the group should be removed from driver
58 * @slave_cnt: Number of slaves in group
60 struct nfp_fl_lag_group {
61 unsigned int group_id;
63 struct list_head list;
64 struct net_device *master_ndev;
69 unsigned int slave_cnt;
72 #define NFP_FL_LAG_PKT_NUMBER_MASK GENMASK(30, 0)
73 #define NFP_FL_LAG_VERSION_MASK GENMASK(22, 0)
74 #define NFP_FL_LAG_HOST_TTL 0xff
76 /* Use this ID with zero members to ack a batch config */
77 #define NFP_FL_LAG_SYNC_ID 0
78 #define NFP_FL_LAG_GROUP_MIN 1 /* ID 0 reserved */
79 #define NFP_FL_LAG_GROUP_MAX 32 /* IDs 1 to 31 are valid */
81 /* wait for more config */
82 #define NFP_FL_LAG_DELAY (msecs_to_jiffies(2))
84 #define NFP_FL_LAG_RETRANS_LIMIT 100 /* max retrans cmsgs to store */
86 static unsigned int nfp_fl_get_next_pkt_number(struct nfp_fl_lag *lag)
89 lag->pkt_num &= NFP_FL_LAG_PKT_NUMBER_MASK;
94 static void nfp_fl_increment_version(struct nfp_fl_lag *lag)
96 /* LSB is not considered by firmware so add 2 for each increment. */
98 lag->batch_ver &= NFP_FL_LAG_VERSION_MASK;
100 /* Zero is reserved by firmware. */
105 static struct nfp_fl_lag_group *
106 nfp_fl_lag_group_create(struct nfp_fl_lag *lag, struct net_device *master)
108 struct nfp_fl_lag_group *group;
109 struct nfp_flower_priv *priv;
112 priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
114 id = ida_simple_get(&lag->ida_handle, NFP_FL_LAG_GROUP_MIN,
115 NFP_FL_LAG_GROUP_MAX, GFP_KERNEL);
117 nfp_flower_cmsg_warn(priv->app,
118 "No more bonding groups available\n");
122 group = kmalloc(sizeof(*group), GFP_KERNEL);
124 ida_simple_remove(&lag->ida_handle, id);
125 return ERR_PTR(-ENOMEM);
128 group->group_id = id;
129 group->master_ndev = master;
131 group->offloaded = false;
132 group->to_remove = false;
133 group->to_destroy = false;
134 group->slave_cnt = 0;
135 group->group_inst = ++lag->global_inst;
136 list_add_tail(&group->list, &lag->group_list);
141 static struct nfp_fl_lag_group *
142 nfp_fl_lag_find_group_for_master_with_lag(struct nfp_fl_lag *lag,
143 struct net_device *master)
145 struct nfp_fl_lag_group *entry;
150 list_for_each_entry(entry, &lag->group_list, list)
151 if (entry->master_ndev == master)
157 int nfp_flower_lag_populate_pre_action(struct nfp_app *app,
158 struct net_device *master,
159 struct nfp_fl_pre_lag *pre_act)
161 struct nfp_flower_priv *priv = app->priv;
162 struct nfp_fl_lag_group *group = NULL;
165 mutex_lock(&priv->nfp_lag.lock);
166 group = nfp_fl_lag_find_group_for_master_with_lag(&priv->nfp_lag,
169 mutex_unlock(&priv->nfp_lag.lock);
173 pre_act->group_id = cpu_to_be16(group->group_id);
174 temp_vers = cpu_to_be32(priv->nfp_lag.batch_ver <<
175 NFP_FL_PRE_LAG_VER_OFF);
176 memcpy(pre_act->lag_version, &temp_vers, 3);
177 pre_act->instance = group->group_inst;
178 mutex_unlock(&priv->nfp_lag.lock);
183 int nfp_flower_lag_get_output_id(struct nfp_app *app, struct net_device *master)
185 struct nfp_flower_priv *priv = app->priv;
186 struct nfp_fl_lag_group *group = NULL;
187 int group_id = -ENOENT;
189 mutex_lock(&priv->nfp_lag.lock);
190 group = nfp_fl_lag_find_group_for_master_with_lag(&priv->nfp_lag,
193 group_id = group->group_id;
194 mutex_unlock(&priv->nfp_lag.lock);
200 nfp_fl_lag_config_group(struct nfp_fl_lag *lag, struct nfp_fl_lag_group *group,
201 struct net_device **active_members,
202 unsigned int member_cnt, enum nfp_fl_lag_batch *batch)
204 struct nfp_flower_cmsg_lag_config *cmsg_payload;
205 struct nfp_flower_priv *priv;
206 unsigned long int flags;
207 unsigned int size, i;
210 priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
211 size = sizeof(*cmsg_payload) + sizeof(__be32) * member_cnt;
212 skb = nfp_flower_cmsg_alloc(priv->app, size,
213 NFP_FLOWER_CMSG_TYPE_LAG_CONFIG,
218 cmsg_payload = nfp_flower_cmsg_get_data(skb);
221 /* Increment batch version for each new batch of config messages. */
222 if (*batch == NFP_FL_LAG_BATCH_FIRST) {
223 flags |= NFP_FL_LAG_FIRST;
224 nfp_fl_increment_version(lag);
225 *batch = NFP_FL_LAG_BATCH_MEMBER;
228 /* If it is a reset msg then it is also the end of the batch. */
230 flags |= NFP_FL_LAG_RESET;
231 *batch = NFP_FL_LAG_BATCH_FINISHED;
234 /* To signal the end of a batch, both the switch and last flags are set
235 * and the the reserved SYNC group ID is used.
237 if (*batch == NFP_FL_LAG_BATCH_FINISHED) {
238 flags |= NFP_FL_LAG_SWITCH | NFP_FL_LAG_LAST;
239 lag->rst_cfg = false;
240 cmsg_payload->group_id = cpu_to_be32(NFP_FL_LAG_SYNC_ID);
241 cmsg_payload->group_inst = 0;
243 cmsg_payload->group_id = cpu_to_be32(group->group_id);
244 cmsg_payload->group_inst = cpu_to_be32(group->group_inst);
247 cmsg_payload->reserved[0] = 0;
248 cmsg_payload->reserved[1] = 0;
249 cmsg_payload->ttl = NFP_FL_LAG_HOST_TTL;
250 cmsg_payload->ctrl_flags = flags;
251 cmsg_payload->batch_ver = cpu_to_be32(lag->batch_ver);
252 cmsg_payload->pkt_number = cpu_to_be32(nfp_fl_get_next_pkt_number(lag));
254 for (i = 0; i < member_cnt; i++)
255 cmsg_payload->members[i] =
256 cpu_to_be32(nfp_repr_get_port_id(active_members[i]));
258 nfp_ctrl_tx(priv->app->ctrl, skb);
262 static void nfp_fl_lag_do_work(struct work_struct *work)
264 enum nfp_fl_lag_batch batch = NFP_FL_LAG_BATCH_FIRST;
265 struct nfp_fl_lag_group *entry, *storage;
266 struct delayed_work *delayed_work;
267 struct nfp_flower_priv *priv;
268 struct nfp_fl_lag *lag;
271 delayed_work = to_delayed_work(work);
272 lag = container_of(delayed_work, struct nfp_fl_lag, work);
273 priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
275 mutex_lock(&lag->lock);
276 list_for_each_entry_safe(entry, storage, &lag->group_list, list) {
277 struct net_device *iter_netdev, **acti_netdevs;
278 struct nfp_flower_repr_priv *repr_priv;
279 int active_count = 0, slaves = 0;
280 struct nfp_repr *repr;
281 unsigned long *flags;
283 if (entry->to_remove) {
284 /* Active count of 0 deletes group on hw. */
285 err = nfp_fl_lag_config_group(lag, entry, NULL, 0,
288 entry->to_remove = false;
289 entry->offloaded = false;
291 nfp_flower_cmsg_warn(priv->app,
292 "group delete failed\n");
293 schedule_delayed_work(&lag->work,
298 if (entry->to_destroy) {
299 ida_simple_remove(&lag->ida_handle,
301 list_del(&entry->list);
307 acti_netdevs = kmalloc_array(entry->slave_cnt,
308 sizeof(*acti_netdevs), GFP_KERNEL);
310 /* Include sanity check in the loop. It may be that a bond has
311 * changed between processing the last notification and the
312 * work queue triggering. If the number of slaves has changed
313 * or it now contains netdevs that cannot be offloaded, ignore
314 * the group until pending notifications are processed.
317 for_each_netdev_in_bond_rcu(entry->master_ndev, iter_netdev) {
318 if (!nfp_netdev_is_nfp_repr(iter_netdev)) {
323 repr = netdev_priv(iter_netdev);
325 if (repr->app != priv->app) {
331 if (slaves > entry->slave_cnt)
334 /* Check the ports for state changes. */
335 repr_priv = repr->app_priv;
336 flags = &repr_priv->lag_port_flags;
338 if (*flags & NFP_PORT_LAG_CHANGED) {
339 *flags &= ~NFP_PORT_LAG_CHANGED;
343 if ((*flags & NFP_PORT_LAG_TX_ENABLED) &&
344 (*flags & NFP_PORT_LAG_LINK_UP))
345 acti_netdevs[active_count++] = iter_netdev;
349 if (slaves != entry->slave_cnt || !entry->dirty) {
354 err = nfp_fl_lag_config_group(lag, entry, acti_netdevs,
355 active_count, &batch);
357 entry->offloaded = true;
358 entry->dirty = false;
360 nfp_flower_cmsg_warn(priv->app,
361 "group offload failed\n");
362 schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
368 /* End the config batch if at least one packet has been batched. */
369 if (batch == NFP_FL_LAG_BATCH_MEMBER) {
370 batch = NFP_FL_LAG_BATCH_FINISHED;
371 err = nfp_fl_lag_config_group(lag, NULL, NULL, 0, &batch);
373 nfp_flower_cmsg_warn(priv->app,
374 "group batch end cmsg failed\n");
377 mutex_unlock(&lag->lock);
381 nfp_fl_lag_put_unprocessed(struct nfp_fl_lag *lag, struct sk_buff *skb)
383 struct nfp_flower_cmsg_lag_config *cmsg_payload;
385 cmsg_payload = nfp_flower_cmsg_get_data(skb);
386 if (be32_to_cpu(cmsg_payload->group_id) >= NFP_FL_LAG_GROUP_MAX)
389 /* Drop cmsg retrans if storage limit is exceeded to prevent
390 * overloading. If the fw notices that expected messages have not been
391 * received in a given time block, it will request a full resync.
393 if (skb_queue_len(&lag->retrans_skbs) >= NFP_FL_LAG_RETRANS_LIMIT)
396 __skb_queue_tail(&lag->retrans_skbs, skb);
401 static void nfp_fl_send_unprocessed(struct nfp_fl_lag *lag)
403 struct nfp_flower_priv *priv;
406 priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
408 while ((skb = __skb_dequeue(&lag->retrans_skbs)))
409 nfp_ctrl_tx(priv->app->ctrl, skb);
412 bool nfp_flower_lag_unprocessed_msg(struct nfp_app *app, struct sk_buff *skb)
414 struct nfp_flower_cmsg_lag_config *cmsg_payload;
415 struct nfp_flower_priv *priv = app->priv;
416 struct nfp_fl_lag_group *group_entry;
417 unsigned long int flags;
418 bool store_skb = false;
421 cmsg_payload = nfp_flower_cmsg_get_data(skb);
422 flags = cmsg_payload->ctrl_flags;
424 /* Note the intentional fall through below. If DATA and XON are both
425 * set, the message will stored and sent again with the rest of the
426 * unprocessed messages list.
430 if (flags & NFP_FL_LAG_DATA)
431 if (!nfp_fl_lag_put_unprocessed(&priv->nfp_lag, skb))
435 if (flags & NFP_FL_LAG_XON)
436 nfp_fl_send_unprocessed(&priv->nfp_lag);
439 if (flags & NFP_FL_LAG_SYNC) {
440 /* To resend all config:
441 * 1) Clear all unprocessed messages
442 * 2) Mark all groups dirty
443 * 3) Reset NFP group config
444 * 4) Schedule a LAG config update
447 __skb_queue_purge(&priv->nfp_lag.retrans_skbs);
449 mutex_lock(&priv->nfp_lag.lock);
450 list_for_each_entry(group_entry, &priv->nfp_lag.group_list,
452 group_entry->dirty = true;
454 err = nfp_flower_lag_reset(&priv->nfp_lag);
456 nfp_flower_cmsg_warn(priv->app,
457 "mem err in group reset msg\n");
458 mutex_unlock(&priv->nfp_lag.lock);
460 schedule_delayed_work(&priv->nfp_lag.work, 0);
467 nfp_fl_lag_schedule_group_remove(struct nfp_fl_lag *lag,
468 struct nfp_fl_lag_group *group)
470 group->to_remove = true;
472 schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
476 nfp_fl_lag_schedule_group_delete(struct nfp_fl_lag *lag,
477 struct net_device *master)
479 struct nfp_fl_lag_group *group;
480 struct nfp_flower_priv *priv;
482 priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
484 if (!netif_is_bond_master(master))
487 mutex_lock(&lag->lock);
488 group = nfp_fl_lag_find_group_for_master_with_lag(lag, master);
490 mutex_unlock(&lag->lock);
491 nfp_warn(priv->app->cpp, "untracked bond got unregistered %s\n",
492 netdev_name(master));
496 group->to_remove = true;
497 group->to_destroy = true;
498 mutex_unlock(&lag->lock);
500 schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
504 nfp_fl_lag_changeupper_event(struct nfp_fl_lag *lag,
505 struct netdev_notifier_changeupper_info *info)
507 struct net_device *upper = info->upper_dev, *iter_netdev;
508 struct netdev_lag_upper_info *lag_upper_info;
509 struct nfp_fl_lag_group *group;
510 struct nfp_flower_priv *priv;
511 unsigned int slave_count = 0;
512 bool can_offload = true;
513 struct nfp_repr *repr;
515 if (!netif_is_lag_master(upper))
518 priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
521 for_each_netdev_in_bond_rcu(upper, iter_netdev) {
522 if (!nfp_netdev_is_nfp_repr(iter_netdev)) {
526 repr = netdev_priv(iter_netdev);
528 /* Ensure all ports are created by the same app/on same card. */
529 if (repr->app != priv->app) {
538 lag_upper_info = info->upper_info;
540 /* Firmware supports active/backup and L3/L4 hash bonds. */
541 if (lag_upper_info &&
542 lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
543 (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH ||
544 (lag_upper_info->hash_type != NETDEV_LAG_HASH_L34 &&
545 lag_upper_info->hash_type != NETDEV_LAG_HASH_E34 &&
546 lag_upper_info->hash_type != NETDEV_LAG_HASH_UNKNOWN))) {
548 nfp_flower_cmsg_warn(priv->app,
549 "Unable to offload tx_type %u hash %u\n",
550 lag_upper_info->tx_type,
551 lag_upper_info->hash_type);
554 mutex_lock(&lag->lock);
555 group = nfp_fl_lag_find_group_for_master_with_lag(lag, upper);
557 if (slave_count == 0 || !can_offload) {
558 /* Cannot offload the group - remove if previously offloaded. */
559 if (group && group->offloaded)
560 nfp_fl_lag_schedule_group_remove(lag, group);
562 mutex_unlock(&lag->lock);
567 group = nfp_fl_lag_group_create(lag, upper);
569 mutex_unlock(&lag->lock);
570 return PTR_ERR(group);
575 group->slave_cnt = slave_count;
577 /* Group may have been on queue for removal but is now offfloable. */
578 group->to_remove = false;
579 mutex_unlock(&lag->lock);
581 schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
586 nfp_fl_lag_changels_event(struct nfp_fl_lag *lag, struct net_device *netdev,
587 struct netdev_notifier_changelowerstate_info *info)
589 struct netdev_lag_lower_state_info *lag_lower_info;
590 struct nfp_flower_repr_priv *repr_priv;
591 struct nfp_flower_priv *priv;
592 struct nfp_repr *repr;
593 unsigned long *flags;
595 if (!netif_is_lag_port(netdev) || !nfp_netdev_is_nfp_repr(netdev))
598 lag_lower_info = info->lower_state_info;
602 priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
603 repr = netdev_priv(netdev);
605 /* Verify that the repr is associated with this app. */
606 if (repr->app != priv->app)
609 repr_priv = repr->app_priv;
610 flags = &repr_priv->lag_port_flags;
612 mutex_lock(&lag->lock);
613 if (lag_lower_info->link_up)
614 *flags |= NFP_PORT_LAG_LINK_UP;
616 *flags &= ~NFP_PORT_LAG_LINK_UP;
618 if (lag_lower_info->tx_enabled)
619 *flags |= NFP_PORT_LAG_TX_ENABLED;
621 *flags &= ~NFP_PORT_LAG_TX_ENABLED;
623 *flags |= NFP_PORT_LAG_CHANGED;
624 mutex_unlock(&lag->lock);
626 schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
629 int nfp_flower_lag_netdev_event(struct nfp_flower_priv *priv,
630 struct net_device *netdev,
631 unsigned long event, void *ptr)
633 struct nfp_fl_lag *lag = &priv->nfp_lag;
637 case NETDEV_CHANGEUPPER:
638 err = nfp_fl_lag_changeupper_event(lag, ptr);
642 case NETDEV_CHANGELOWERSTATE:
643 nfp_fl_lag_changels_event(lag, netdev, ptr);
645 case NETDEV_UNREGISTER:
646 nfp_fl_lag_schedule_group_delete(lag, netdev);
653 int nfp_flower_lag_reset(struct nfp_fl_lag *lag)
655 enum nfp_fl_lag_batch batch = NFP_FL_LAG_BATCH_FIRST;
658 return nfp_fl_lag_config_group(lag, NULL, NULL, 0, &batch);
661 void nfp_flower_lag_init(struct nfp_fl_lag *lag)
663 INIT_DELAYED_WORK(&lag->work, nfp_fl_lag_do_work);
664 INIT_LIST_HEAD(&lag->group_list);
665 mutex_init(&lag->lock);
666 ida_init(&lag->ida_handle);
668 __skb_queue_head_init(&lag->retrans_skbs);
670 /* 0 is a reserved batch version so increment to first valid value. */
671 nfp_fl_increment_version(lag);
674 void nfp_flower_lag_cleanup(struct nfp_fl_lag *lag)
676 struct nfp_fl_lag_group *entry, *storage;
678 cancel_delayed_work_sync(&lag->work);
680 __skb_queue_purge(&lag->retrans_skbs);
682 /* Remove all groups. */
683 mutex_lock(&lag->lock);
684 list_for_each_entry_safe(entry, storage, &lag->group_list, list) {
685 list_del(&entry->list);
688 mutex_unlock(&lag->lock);
689 mutex_destroy(&lag->lock);
690 ida_destroy(&lag->ida_handle);