nfp: flower: monitor and offload LAG groups
authorJohn Hurley <john.hurley@netronome.com>
Thu, 24 May 2018 02:22:53 +0000 (19:22 -0700)
committerDavid S. Miller <davem@davemloft.net>
Fri, 25 May 2018 03:10:57 +0000 (23:10 -0400)
Monitor LAG events via the NETDEV_CHANGEUPPER/NETDEV_CHANGELOWERSTATE
notifiers to maintain a list of offloadable groups. Sync these groups with
HW via a delayed workqueue to prevent excessive re-configuration. When the
workqueue is triggered it may generate multiple control messages for
different groups. These messages are linked via a batch ID and flags to
indicate a new batch and the end of a batch.

Update private data in each repr to track their LAG lower state flags. The
state of a repr is used to determine the active netdevs that can be
offloaded. For example, in active-backup mode, we only offload the netdev
currently active.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/netronome/nfp/Makefile
drivers/net/ethernet/netronome/nfp/flower/lag_conf.c [new file with mode: 0644]
drivers/net/ethernet/netronome/nfp/flower/main.c
drivers/net/ethernet/netronome/nfp/flower/main.h

index 6373f56..4afb103 100644 (file)
@@ -37,6 +37,7 @@ ifeq ($(CONFIG_NFP_APP_FLOWER),y)
 nfp-objs += \
            flower/action.o \
            flower/cmsg.o \
+           flower/lag_conf.o \
            flower/main.o \
            flower/match.o \
            flower/metadata.o \
diff --git a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c
new file mode 100644 (file)
index 0000000..35a700b
--- /dev/null
@@ -0,0 +1,589 @@
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "main.h"
+
+/* LAG group config flags. */
+#define NFP_FL_LAG_LAST                        BIT(1)
+#define NFP_FL_LAG_FIRST               BIT(2)
+#define NFP_FL_LAG_SWITCH              BIT(6)
+#define NFP_FL_LAG_RESET               BIT(7)
+
+/* LAG port state flags. */
+#define NFP_PORT_LAG_LINK_UP           BIT(0)
+#define NFP_PORT_LAG_TX_ENABLED                BIT(1)
+#define NFP_PORT_LAG_CHANGED           BIT(2)
+
+enum nfp_fl_lag_batch {
+       NFP_FL_LAG_BATCH_FIRST,
+       NFP_FL_LAG_BATCH_MEMBER,
+       NFP_FL_LAG_BATCH_FINISHED
+};
+
+/**
+ * struct nfp_flower_cmsg_lag_config - control message payload for LAG config
+ * @ctrl_flags:        Configuration flags
+ * @reserved:  Reserved for future use
+ * @ttl:       Time to live of packet - host always sets to 0xff
+ * @pkt_number:        Config message packet number - increment for each message
+ * @batch_ver: Batch version of messages - increment for each batch of messages
+ * @group_id:  Group ID applicable
+ * @group_inst:        Group instance number - increment when group is reused
+ * @members:   Array of 32-bit words listing all active group members
+ */
+struct nfp_flower_cmsg_lag_config {
+       u8 ctrl_flags;
+       u8 reserved[2];
+       u8 ttl;
+       __be32 pkt_number;
+       __be32 batch_ver;
+       __be32 group_id;
+       __be32 group_inst;
+       __be32 members[];
+};
+
+/**
+ * struct nfp_fl_lag_group - list entry for each LAG group
+ * @group_id:          Assigned group ID for host/kernel sync
+ * @group_inst:                Group instance in case of ID reuse
+ * @list:              List entry
+ * @master_ndev:       Group master Netdev
+ * @dirty:             Marked if the group needs synced to HW
+ * @offloaded:         Marked if the group is currently offloaded to NIC
+ * @to_remove:         Marked if the group should be removed from NIC
+ * @to_destroy:                Marked if the group should be removed from driver
+ * @slave_cnt:         Number of slaves in group
+ */
+struct nfp_fl_lag_group {
+       unsigned int group_id;
+       u8 group_inst;
+       struct list_head list;
+       struct net_device *master_ndev;
+       bool dirty;
+       bool offloaded;
+       bool to_remove;
+       bool to_destroy;
+       unsigned int slave_cnt;
+};
+
+#define NFP_FL_LAG_PKT_NUMBER_MASK     GENMASK(30, 0)
+#define NFP_FL_LAG_VERSION_MASK                GENMASK(22, 0)
+#define NFP_FL_LAG_HOST_TTL            0xff
+
+/* Use this ID with zero members to ack a batch config */
+#define NFP_FL_LAG_SYNC_ID             0
+#define NFP_FL_LAG_GROUP_MIN           1 /* ID 0 reserved */
+#define NFP_FL_LAG_GROUP_MAX           32 /* IDs 1 to 31 are valid */
+
+/* wait for more config */
+#define NFP_FL_LAG_DELAY               (msecs_to_jiffies(2))
+
+static unsigned int nfp_fl_get_next_pkt_number(struct nfp_fl_lag *lag)
+{
+       lag->pkt_num++;
+       lag->pkt_num &= NFP_FL_LAG_PKT_NUMBER_MASK;
+
+       return lag->pkt_num;
+}
+
+static void nfp_fl_increment_version(struct nfp_fl_lag *lag)
+{
+       /* LSB is not considered by firmware so add 2 for each increment. */
+       lag->batch_ver += 2;
+       lag->batch_ver &= NFP_FL_LAG_VERSION_MASK;
+
+       /* Zero is reserved by firmware. */
+       if (!lag->batch_ver)
+               lag->batch_ver += 2;
+}
+
+static struct nfp_fl_lag_group *
+nfp_fl_lag_group_create(struct nfp_fl_lag *lag, struct net_device *master)
+{
+       struct nfp_fl_lag_group *group;
+       struct nfp_flower_priv *priv;
+       int id;
+
+       priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
+
+       id = ida_simple_get(&lag->ida_handle, NFP_FL_LAG_GROUP_MIN,
+                           NFP_FL_LAG_GROUP_MAX, GFP_KERNEL);
+       if (id < 0) {
+               nfp_flower_cmsg_warn(priv->app,
+                                    "No more bonding groups available\n");
+               return ERR_PTR(id);
+       }
+
+       group = kmalloc(sizeof(*group), GFP_KERNEL);
+       if (!group) {
+               ida_simple_remove(&lag->ida_handle, id);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       group->group_id = id;
+       group->master_ndev = master;
+       group->dirty = true;
+       group->offloaded = false;
+       group->to_remove = false;
+       group->to_destroy = false;
+       group->slave_cnt = 0;
+       group->group_inst = ++lag->global_inst;
+       list_add_tail(&group->list, &lag->group_list);
+
+       return group;
+}
+
+static struct nfp_fl_lag_group *
+nfp_fl_lag_find_group_for_master_with_lag(struct nfp_fl_lag *lag,
+                                         struct net_device *master)
+{
+       struct nfp_fl_lag_group *entry;
+
+       if (!master)
+               return NULL;
+
+       list_for_each_entry(entry, &lag->group_list, list)
+               if (entry->master_ndev == master)
+                       return entry;
+
+       return NULL;
+}
+
+static int
+nfp_fl_lag_config_group(struct nfp_fl_lag *lag, struct nfp_fl_lag_group *group,
+                       struct net_device **active_members,
+                       unsigned int member_cnt, enum nfp_fl_lag_batch *batch)
+{
+       struct nfp_flower_cmsg_lag_config *cmsg_payload;
+       struct nfp_flower_priv *priv;
+       unsigned long int flags;
+       unsigned int size, i;
+       struct sk_buff *skb;
+
+       priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
+       size = sizeof(*cmsg_payload) + sizeof(__be32) * member_cnt;
+       skb = nfp_flower_cmsg_alloc(priv->app, size,
+                                   NFP_FLOWER_CMSG_TYPE_LAG_CONFIG,
+                                   GFP_KERNEL);
+       if (!skb)
+               return -ENOMEM;
+
+       cmsg_payload = nfp_flower_cmsg_get_data(skb);
+       flags = 0;
+
+       /* Increment batch version for each new batch of config messages. */
+       if (*batch == NFP_FL_LAG_BATCH_FIRST) {
+               flags |= NFP_FL_LAG_FIRST;
+               nfp_fl_increment_version(lag);
+               *batch = NFP_FL_LAG_BATCH_MEMBER;
+       }
+
+       /* If it is a reset msg then it is also the end of the batch. */
+       if (lag->rst_cfg) {
+               flags |= NFP_FL_LAG_RESET;
+               *batch = NFP_FL_LAG_BATCH_FINISHED;
+       }
+
+       /* To signal the end of a batch, both the switch and last flags are set
+        * and the the reserved SYNC group ID is used.
+        */
+       if (*batch == NFP_FL_LAG_BATCH_FINISHED) {
+               flags |= NFP_FL_LAG_SWITCH | NFP_FL_LAG_LAST;
+               lag->rst_cfg = false;
+               cmsg_payload->group_id = cpu_to_be32(NFP_FL_LAG_SYNC_ID);
+               cmsg_payload->group_inst = 0;
+       } else {
+               cmsg_payload->group_id = cpu_to_be32(group->group_id);
+               cmsg_payload->group_inst = cpu_to_be32(group->group_inst);
+       }
+
+       cmsg_payload->reserved[0] = 0;
+       cmsg_payload->reserved[1] = 0;
+       cmsg_payload->ttl = NFP_FL_LAG_HOST_TTL;
+       cmsg_payload->ctrl_flags = flags;
+       cmsg_payload->batch_ver = cpu_to_be32(lag->batch_ver);
+       cmsg_payload->pkt_number = cpu_to_be32(nfp_fl_get_next_pkt_number(lag));
+
+       for (i = 0; i < member_cnt; i++)
+               cmsg_payload->members[i] =
+                       cpu_to_be32(nfp_repr_get_port_id(active_members[i]));
+
+       nfp_ctrl_tx(priv->app->ctrl, skb);
+       return 0;
+}
+
+static void nfp_fl_lag_do_work(struct work_struct *work)
+{
+       enum nfp_fl_lag_batch batch = NFP_FL_LAG_BATCH_FIRST;
+       struct nfp_fl_lag_group *entry, *storage;
+       struct delayed_work *delayed_work;
+       struct nfp_flower_priv *priv;
+       struct nfp_fl_lag *lag;
+       int err;
+
+       delayed_work = to_delayed_work(work);
+       lag = container_of(delayed_work, struct nfp_fl_lag, work);
+       priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
+
+       mutex_lock(&lag->lock);
+       list_for_each_entry_safe(entry, storage, &lag->group_list, list) {
+               struct net_device *iter_netdev, **acti_netdevs;
+               struct nfp_flower_repr_priv *repr_priv;
+               int active_count = 0, slaves = 0;
+               struct nfp_repr *repr;
+               unsigned long *flags;
+
+               if (entry->to_remove) {
+                       /* Active count of 0 deletes group on hw. */
+                       err = nfp_fl_lag_config_group(lag, entry, NULL, 0,
+                                                     &batch);
+                       if (!err) {
+                               entry->to_remove = false;
+                               entry->offloaded = false;
+                       } else {
+                               nfp_flower_cmsg_warn(priv->app,
+                                                    "group delete failed\n");
+                               schedule_delayed_work(&lag->work,
+                                                     NFP_FL_LAG_DELAY);
+                               continue;
+                       }
+
+                       if (entry->to_destroy) {
+                               ida_simple_remove(&lag->ida_handle,
+                                                 entry->group_id);
+                               list_del(&entry->list);
+                               kfree(entry);
+                       }
+                       continue;
+               }
+
+               acti_netdevs = kmalloc_array(entry->slave_cnt,
+                                            sizeof(*acti_netdevs), GFP_KERNEL);
+
+               /* Include sanity check in the loop. It may be that a bond has
+                * changed between processing the last notification and the
+                * work queue triggering. If the number of slaves has changed
+                * or it now contains netdevs that cannot be offloaded, ignore
+                * the group until pending notifications are processed.
+                */
+               rcu_read_lock();
+               for_each_netdev_in_bond_rcu(entry->master_ndev, iter_netdev) {
+                       if (!nfp_netdev_is_nfp_repr(iter_netdev)) {
+                               slaves = 0;
+                               break;
+                       }
+
+                       repr = netdev_priv(iter_netdev);
+
+                       if (repr->app != priv->app) {
+                               slaves = 0;
+                               break;
+                       }
+
+                       slaves++;
+                       if (slaves > entry->slave_cnt)
+                               break;
+
+                       /* Check the ports for state changes. */
+                       repr_priv = repr->app_priv;
+                       flags = &repr_priv->lag_port_flags;
+
+                       if (*flags & NFP_PORT_LAG_CHANGED) {
+                               *flags &= ~NFP_PORT_LAG_CHANGED;
+                               entry->dirty = true;
+                       }
+
+                       if ((*flags & NFP_PORT_LAG_TX_ENABLED) &&
+                           (*flags & NFP_PORT_LAG_LINK_UP))
+                               acti_netdevs[active_count++] = iter_netdev;
+               }
+               rcu_read_unlock();
+
+               if (slaves != entry->slave_cnt || !entry->dirty) {
+                       kfree(acti_netdevs);
+                       continue;
+               }
+
+               err = nfp_fl_lag_config_group(lag, entry, acti_netdevs,
+                                             active_count, &batch);
+               if (!err) {
+                       entry->offloaded = true;
+                       entry->dirty = false;
+               } else {
+                       nfp_flower_cmsg_warn(priv->app,
+                                            "group offload failed\n");
+                       schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
+               }
+
+               kfree(acti_netdevs);
+       }
+
+       /* End the config batch if at least one packet has been batched. */
+       if (batch == NFP_FL_LAG_BATCH_MEMBER) {
+               batch = NFP_FL_LAG_BATCH_FINISHED;
+               err = nfp_fl_lag_config_group(lag, NULL, NULL, 0, &batch);
+               if (err)
+                       nfp_flower_cmsg_warn(priv->app,
+                                            "group batch end cmsg failed\n");
+       }
+
+       mutex_unlock(&lag->lock);
+}
+
+static void
+nfp_fl_lag_schedule_group_remove(struct nfp_fl_lag *lag,
+                                struct nfp_fl_lag_group *group)
+{
+       group->to_remove = true;
+
+       schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
+}
+
+static int
+nfp_fl_lag_schedule_group_delete(struct nfp_fl_lag *lag,
+                                struct net_device *master)
+{
+       struct nfp_fl_lag_group *group;
+
+       mutex_lock(&lag->lock);
+       group = nfp_fl_lag_find_group_for_master_with_lag(lag, master);
+       if (!group) {
+               mutex_unlock(&lag->lock);
+               return -ENOENT;
+       }
+
+       group->to_remove = true;
+       group->to_destroy = true;
+       mutex_unlock(&lag->lock);
+
+       schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
+       return 0;
+}
+
+static int
+nfp_fl_lag_changeupper_event(struct nfp_fl_lag *lag,
+                            struct netdev_notifier_changeupper_info *info)
+{
+       struct net_device *upper = info->upper_dev, *iter_netdev;
+       struct netdev_lag_upper_info *lag_upper_info;
+       struct nfp_fl_lag_group *group;
+       struct nfp_flower_priv *priv;
+       unsigned int slave_count = 0;
+       bool can_offload = true;
+       struct nfp_repr *repr;
+
+       if (!netif_is_lag_master(upper))
+               return 0;
+
+       priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
+
+       rcu_read_lock();
+       for_each_netdev_in_bond_rcu(upper, iter_netdev) {
+               if (!nfp_netdev_is_nfp_repr(iter_netdev)) {
+                       can_offload = false;
+                       break;
+               }
+               repr = netdev_priv(iter_netdev);
+
+               /* Ensure all ports are created by the same app/on same card. */
+               if (repr->app != priv->app) {
+                       can_offload = false;
+                       break;
+               }
+
+               slave_count++;
+       }
+       rcu_read_unlock();
+
+       lag_upper_info = info->upper_info;
+
+       /* Firmware supports active/backup and L3/L4 hash bonds. */
+       if (lag_upper_info &&
+           lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
+           (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH ||
+           (lag_upper_info->hash_type != NETDEV_LAG_HASH_L34 &&
+           lag_upper_info->hash_type != NETDEV_LAG_HASH_E34))) {
+               can_offload = false;
+               nfp_flower_cmsg_warn(priv->app,
+                                    "Unable to offload tx_type %u hash %u\n",
+                                    lag_upper_info->tx_type,
+                                    lag_upper_info->hash_type);
+       }
+
+       mutex_lock(&lag->lock);
+       group = nfp_fl_lag_find_group_for_master_with_lag(lag, upper);
+
+       if (slave_count == 0 || !can_offload) {
+               /* Cannot offload the group - remove if previously offloaded. */
+               if (group && group->offloaded)
+                       nfp_fl_lag_schedule_group_remove(lag, group);
+
+               mutex_unlock(&lag->lock);
+               return 0;
+       }
+
+       if (!group) {
+               group = nfp_fl_lag_group_create(lag, upper);
+               if (IS_ERR(group)) {
+                       mutex_unlock(&lag->lock);
+                       return PTR_ERR(group);
+               }
+       }
+
+       group->dirty = true;
+       group->slave_cnt = slave_count;
+
+       /* Group may have been on queue for removal but is now offfloable. */
+       group->to_remove = false;
+       mutex_unlock(&lag->lock);
+
+       schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
+       return 0;
+}
+
+static int
+nfp_fl_lag_changels_event(struct nfp_fl_lag *lag, struct net_device *netdev,
+                         struct netdev_notifier_changelowerstate_info *info)
+{
+       struct netdev_lag_lower_state_info *lag_lower_info;
+       struct nfp_flower_repr_priv *repr_priv;
+       struct nfp_flower_priv *priv;
+       struct nfp_repr *repr;
+       unsigned long *flags;
+
+       if (!netif_is_lag_port(netdev) || !nfp_netdev_is_nfp_repr(netdev))
+               return 0;
+
+       lag_lower_info = info->lower_state_info;
+       if (!lag_lower_info)
+               return 0;
+
+       priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
+       repr = netdev_priv(netdev);
+
+       /* Verify that the repr is associated with this app. */
+       if (repr->app != priv->app)
+               return 0;
+
+       repr_priv = repr->app_priv;
+       flags = &repr_priv->lag_port_flags;
+
+       mutex_lock(&lag->lock);
+       if (lag_lower_info->link_up)
+               *flags |= NFP_PORT_LAG_LINK_UP;
+       else
+               *flags &= ~NFP_PORT_LAG_LINK_UP;
+
+       if (lag_lower_info->tx_enabled)
+               *flags |= NFP_PORT_LAG_TX_ENABLED;
+       else
+               *flags &= ~NFP_PORT_LAG_TX_ENABLED;
+
+       *flags |= NFP_PORT_LAG_CHANGED;
+       mutex_unlock(&lag->lock);
+
+       schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
+       return 0;
+}
+
+static int
+nfp_fl_lag_netdev_event(struct notifier_block *nb, unsigned long event,
+                       void *ptr)
+{
+       struct net_device *netdev;
+       struct nfp_fl_lag *lag;
+       int err;
+
+       netdev = netdev_notifier_info_to_dev(ptr);
+       lag = container_of(nb, struct nfp_fl_lag, lag_nb);
+
+       switch (event) {
+       case NETDEV_CHANGEUPPER:
+               err = nfp_fl_lag_changeupper_event(lag, ptr);
+               if (err)
+                       return NOTIFY_BAD;
+               return NOTIFY_OK;
+       case NETDEV_CHANGELOWERSTATE:
+               err = nfp_fl_lag_changels_event(lag, netdev, ptr);
+               if (err)
+                       return NOTIFY_BAD;
+               return NOTIFY_OK;
+       case NETDEV_UNREGISTER:
+               if (netif_is_bond_master(netdev)) {
+                       err = nfp_fl_lag_schedule_group_delete(lag, netdev);
+                       if (err)
+                               return NOTIFY_BAD;
+                       return NOTIFY_OK;
+               }
+       }
+
+       return NOTIFY_DONE;
+}
+
+int nfp_flower_lag_reset(struct nfp_fl_lag *lag)
+{
+       enum nfp_fl_lag_batch batch = NFP_FL_LAG_BATCH_FIRST;
+
+       lag->rst_cfg = true;
+       return nfp_fl_lag_config_group(lag, NULL, NULL, 0, &batch);
+}
+
+void nfp_flower_lag_init(struct nfp_fl_lag *lag)
+{
+       INIT_DELAYED_WORK(&lag->work, nfp_fl_lag_do_work);
+       INIT_LIST_HEAD(&lag->group_list);
+       mutex_init(&lag->lock);
+       ida_init(&lag->ida_handle);
+
+       /* 0 is a reserved batch version so increment to first valid value. */
+       nfp_fl_increment_version(lag);
+
+       lag->lag_nb.notifier_call = nfp_fl_lag_netdev_event;
+}
+
+void nfp_flower_lag_cleanup(struct nfp_fl_lag *lag)
+{
+       struct nfp_fl_lag_group *entry, *storage;
+
+       cancel_delayed_work_sync(&lag->work);
+
+       /* Remove all groups. */
+       mutex_lock(&lag->lock);
+       list_for_each_entry_safe(entry, storage, &lag->group_list, list) {
+               list_del(&entry->list);
+               kfree(entry);
+       }
+       mutex_unlock(&lag->lock);
+       mutex_destroy(&lag->lock);
+       ida_destroy(&lag->ida_handle);
+}
index 202284b..19cfa16 100644 (file)
@@ -575,12 +575,14 @@ static int nfp_flower_init(struct nfp_app *app)
        /* Tell the firmware that the driver supports lag. */
        err = nfp_rtsym_write_le(app->pf->rtbl,
                                 "_abi_flower_balance_sync_enable", 1);
-       if (!err)
+       if (!err) {
                app_priv->flower_ext_feats |= NFP_FL_FEATS_LAG;
-       else if (err == -ENOENT)
+               nfp_flower_lag_init(&app_priv->nfp_lag);
+       } else if (err == -ENOENT) {
                nfp_warn(app->cpp, "LAG not supported by FW.\n");
-       else
+       } else {
                goto err_cleanup_metadata;
+       }
 
        return 0;
 
@@ -599,6 +601,9 @@ static void nfp_flower_clean(struct nfp_app *app)
        skb_queue_purge(&app_priv->cmsg_skbs_low);
        flush_work(&app_priv->cmsg_work);
 
+       if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG)
+               nfp_flower_lag_cleanup(&app_priv->nfp_lag);
+
        nfp_flower_metadata_cleanup(app);
        vfree(app->priv);
        app->priv = NULL;
@@ -665,11 +670,29 @@ nfp_flower_repr_change_mtu(struct nfp_app *app, struct net_device *netdev,
 
 static int nfp_flower_start(struct nfp_app *app)
 {
+       struct nfp_flower_priv *app_priv = app->priv;
+       int err;
+
+       if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG) {
+               err = nfp_flower_lag_reset(&app_priv->nfp_lag);
+               if (err)
+                       return err;
+
+               err = register_netdevice_notifier(&app_priv->nfp_lag.lag_nb);
+               if (err)
+                       return err;
+       }
+
        return nfp_tunnel_config_start(app);
 }
 
 static void nfp_flower_stop(struct nfp_app *app)
 {
+       struct nfp_flower_priv *app_priv = app->priv;
+
+       if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG)
+               unregister_netdevice_notifier(&app_priv->nfp_lag.lag_nb);
+
        nfp_tunnel_config_stop(app);
 }
 
index 7ce2557..e03efb0 100644 (file)
@@ -43,6 +43,7 @@
 #include <net/pkt_cls.h>
 #include <net/tcp.h>
 #include <linux/workqueue.h>
+#include <linux/idr.h>
 
 struct net_device;
 struct nfp_app;
@@ -97,6 +98,30 @@ struct nfp_mtu_conf {
        spinlock_t lock;
 };
 
+/**
+ * struct nfp_fl_lag - Flower APP priv data for link aggregation
+ * @lag_nb:            Notifier to track master/slave events
+ * @work:              Work queue for writing configs to the HW
+ * @lock:              Lock to protect lag_group_list
+ * @group_list:                List of all master/slave groups offloaded
+ * @ida_handle:                IDA to handle group ids
+ * @pkt_num:           Incremented for each config packet sent
+ * @batch_ver:         Incremented for each batch of config packets
+ * @global_inst:       Instance allocator for groups
+ * @rst_cfg:           Marker to reset HW LAG config
+ */
+struct nfp_fl_lag {
+       struct notifier_block lag_nb;
+       struct delayed_work work;
+       struct mutex lock;
+       struct list_head group_list;
+       struct ida ida_handle;
+       unsigned int pkt_num;
+       unsigned int batch_ver;
+       u8 global_inst;
+       bool rst_cfg;
+};
+
 /**
  * struct nfp_flower_priv - Flower APP per-vNIC priv data
  * @app:               Back pointer to app
@@ -129,6 +154,7 @@ struct nfp_mtu_conf {
  *                     from firmware for repr reify
  * @reify_wait_queue:  wait queue for repr reify response counting
  * @mtu_conf:          Configuration of repr MTU value
+ * @nfp_lag:           Link aggregation data block
  */
 struct nfp_flower_priv {
        struct nfp_app *app;
@@ -158,6 +184,7 @@ struct nfp_flower_priv {
        atomic_t reify_replies;
        wait_queue_head_t reify_wait_queue;
        struct nfp_mtu_conf mtu_conf;
+       struct nfp_fl_lag nfp_lag;
 };
 
 /**
@@ -250,5 +277,8 @@ void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb);
 void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb);
 int nfp_flower_setup_tc_egress_cb(enum tc_setup_type type, void *type_data,
                                  void *cb_priv);
+void nfp_flower_lag_init(struct nfp_fl_lag *lag);
+void nfp_flower_lag_cleanup(struct nfp_fl_lag *lag);
+int nfp_flower_lag_reset(struct nfp_fl_lag *lag);
 
 #endif