Merge tag 'locking-urgent-2021-05-09' of git://git.kernel.org/pub/scm/linux/kernel...

[linux-2.6-microblaze.git] / net / ipv4 / nexthop.c
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c

index 743777b..4075230 100644 (file)
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -16,6 +16,9 @@
  #include <net/route.h>
  #include <net/sock.h>
  
  #include <net/route.h>
  #include <net/sock.h>
  
+#define NH_RES_DEFAULT_IDLE_TIMER      (120 * HZ)
+#define NH_RES_DEFAULT_UNBALANCED_TIMER        0       /* No forced rebalancing. */
+
  static void remove_nexthop(struct net *net, struct nexthop *nh,
                            struct nl_info *nlinfo);
  
  static void remove_nexthop(struct net *net, struct nexthop *nh,
                            struct nl_info *nlinfo);
  
@@ -32,6 +35,7 @@ static const struct nla_policy rtm_nh_policy_new[] = {
         [NHA_ENCAP_TYPE]        = { .type = NLA_U16 },
         [NHA_ENCAP]             = { .type = NLA_NESTED },
         [NHA_FDB]               = { .type = NLA_FLAG },
         [NHA_ENCAP_TYPE]        = { .type = NLA_U16 },
         [NHA_ENCAP]             = { .type = NLA_NESTED },
         [NHA_FDB]               = { .type = NLA_FLAG },
+       [NHA_RES_GROUP]         = { .type = NLA_NESTED },
  };
  
  static const struct nla_policy rtm_nh_policy_get[] = {
  };
  
  static const struct nla_policy rtm_nh_policy_get[] = {
@@ -45,6 +49,32 @@ static const struct nla_policy rtm_nh_policy_dump[] = {
         [NHA_FDB]               = { .type = NLA_FLAG },
  };
  
         [NHA_FDB]               = { .type = NLA_FLAG },
  };
  
+static const struct nla_policy rtm_nh_res_policy_new[] = {
+       [NHA_RES_GROUP_BUCKETS]                 = { .type = NLA_U16 },
+       [NHA_RES_GROUP_IDLE_TIMER]              = { .type = NLA_U32 },
+       [NHA_RES_GROUP_UNBALANCED_TIMER]        = { .type = NLA_U32 },
+};
+
+static const struct nla_policy rtm_nh_policy_dump_bucket[] = {
+       [NHA_ID]                = { .type = NLA_U32 },
+       [NHA_OIF]               = { .type = NLA_U32 },
+       [NHA_MASTER]            = { .type = NLA_U32 },
+       [NHA_RES_BUCKET]        = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy rtm_nh_res_bucket_policy_dump[] = {
+       [NHA_RES_BUCKET_NH_ID]  = { .type = NLA_U32 },
+};
+
+static const struct nla_policy rtm_nh_policy_get_bucket[] = {
+       [NHA_ID]                = { .type = NLA_U32 },
+       [NHA_RES_BUCKET]        = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy rtm_nh_res_bucket_policy_get[] = {
+       [NHA_RES_BUCKET_INDEX]  = { .type = NLA_U16 },
+};
+
  static bool nexthop_notifiers_is_empty(struct net *net)
  {
         return !net->nexthop.notifier_chain.head;
  static bool nexthop_notifiers_is_empty(struct net *net)
  {
         return !net->nexthop.notifier_chain.head;
@@ -52,10 +82,8 @@ static bool nexthop_notifiers_is_empty(struct net *net)
  
  static void
  __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
  
  static void
  __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
-                              const struct nexthop *nh)
+                              const struct nh_info *nhi)
  {
  {
-       struct nh_info *nhi = rtnl_dereference(nh->nh_info);
-
         nh_info->dev = nhi->fib_nhc.nhc_dev;
         nh_info->gw_family = nhi->fib_nhc.nhc_gw_family;
         if (nh_info->gw_family == AF_INET)
         nh_info->dev = nhi->fib_nhc.nhc_dev;
         nh_info->gw_family = nhi->fib_nhc.nhc_gw_family;
         if (nh_info->gw_family == AF_INET)
@@ -71,12 +99,14 @@ __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
  static int nh_notifier_single_info_init(struct nh_notifier_info *info,
                                         const struct nexthop *nh)
  {
  static int nh_notifier_single_info_init(struct nh_notifier_info *info,
                                         const struct nexthop *nh)
  {
+       struct nh_info *nhi = rtnl_dereference(nh->nh_info);
+
         info->type = NH_NOTIFIER_INFO_TYPE_SINGLE;
         info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL);
         if (!info->nh)
                 return -ENOMEM;
  
         info->type = NH_NOTIFIER_INFO_TYPE_SINGLE;
         info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL);
         if (!info->nh)
                 return -ENOMEM;
  
-       __nh_notifier_single_info_init(info->nh, nh);
+       __nh_notifier_single_info_init(info->nh, nhi);
  
         return 0;
  }
  
         return 0;
  }
@@ -86,8 +116,8 @@ static void nh_notifier_single_info_fini(struct nh_notifier_info *info)
         kfree(info->nh);
  }
  
         kfree(info->nh);
  }
  
-static int nh_notifier_mp_info_init(struct nh_notifier_info *info,
-                                   struct nh_group *nhg)
+static int nh_notifier_mpath_info_init(struct nh_notifier_info *info,
+                                      struct nh_group *nhg)
  {
         u16 num_nh = nhg->num_nh;
         int i;
  {
         u16 num_nh = nhg->num_nh;
         int i;
@@ -103,11 +133,44 @@ static int nh_notifier_mp_info_init(struct nh_notifier_info *info,
  
         for (i = 0; i < num_nh; i++) {
                 struct nh_grp_entry *nhge = &nhg->nh_entries[i];
  
         for (i = 0; i < num_nh; i++) {
                 struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+               struct nh_info *nhi;
  
  
+               nhi = rtnl_dereference(nhge->nh->nh_info);
                 info->nh_grp->nh_entries[i].id = nhge->nh->id;
                 info->nh_grp->nh_entries[i].weight = nhge->weight;
                 __nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh,
                 info->nh_grp->nh_entries[i].id = nhge->nh->id;
                 info->nh_grp->nh_entries[i].weight = nhge->weight;
                 __nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh,
-                                              nhge->nh);
+                                              nhi);
+       }
+
+       return 0;
+}
+
+static int nh_notifier_res_table_info_init(struct nh_notifier_info *info,
+                                          struct nh_group *nhg)
+{
+       struct nh_res_table *res_table = rtnl_dereference(nhg->res_table);
+       u16 num_nh_buckets = res_table->num_nh_buckets;
+       unsigned long size;
+       u16 i;
+
+       info->type = NH_NOTIFIER_INFO_TYPE_RES_TABLE;
+       size = struct_size(info->nh_res_table, nhs, num_nh_buckets);
+       info->nh_res_table = __vmalloc(size, GFP_KERNEL | __GFP_ZERO |
+                                      __GFP_NOWARN);
+       if (!info->nh_res_table)
+               return -ENOMEM;
+
+       info->nh_res_table->num_nh_buckets = num_nh_buckets;
+
+       for (i = 0; i < num_nh_buckets; i++) {
+               struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
+               struct nh_grp_entry *nhge;
+               struct nh_info *nhi;
+
+               nhge = rtnl_dereference(bucket->nh_entry);
+               nhi = rtnl_dereference(nhge->nh->nh_info);
+               __nh_notifier_single_info_init(&info->nh_res_table->nhs[i],
+                                              nhi);
         }
  
         return 0;
         }
  
         return 0;
@@ -118,8 +181,10 @@ static int nh_notifier_grp_info_init(struct nh_notifier_info *info,
  {
         struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
  
  {
         struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
  
-       if (nhg->mpath)
-               return nh_notifier_mp_info_init(info, nhg);
+       if (nhg->hash_threshold)
+               return nh_notifier_mpath_info_init(info, nhg);
+       else if (nhg->resilient)
+               return nh_notifier_res_table_info_init(info, nhg);
         return -EINVAL;
  }
  
         return -EINVAL;
  }
  
@@ -128,8 +193,10 @@ static void nh_notifier_grp_info_fini(struct nh_notifier_info *info,
  {
         struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
  
  {
         struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
  
-       if (nhg->mpath)
+       if (nhg->hash_threshold)
                 kfree(info->nh_grp);
                 kfree(info->nh_grp);
+       else if (nhg->resilient)
+               vfree(info->nh_res_table);
  }
  
  static int nh_notifier_info_init(struct nh_notifier_info *info,
  }
  
  static int nh_notifier_info_init(struct nh_notifier_info *info,
@@ -181,6 +248,178 @@ static int call_nexthop_notifiers(struct net *net,
         return notifier_to_errno(err);
  }
  
         return notifier_to_errno(err);
  }
  
+static int
+nh_notifier_res_bucket_idle_timer_get(const struct nh_notifier_info *info,
+                                     bool force, unsigned int *p_idle_timer_ms)
+{
+       struct nh_res_table *res_table;
+       struct nh_group *nhg;
+       struct nexthop *nh;
+       int err = 0;
+
+       /* When 'force' is false, nexthop bucket replacement is performed
+        * because the bucket was deemed to be idle. In this case, capable
+        * listeners can choose to perform an atomic replacement: The bucket is
+        * only replaced if it is inactive. However, if the idle timer interval
+        * is smaller than the interval in which a listener is querying
+        * buckets' activity from the device, then atomic replacement should
+        * not be tried. Pass the idle timer value to listeners, so that they
+        * could determine which type of replacement to perform.
+        */
+       if (force) {
+               *p_idle_timer_ms = 0;
+               return 0;
+       }
+
+       rcu_read_lock();
+
+       nh = nexthop_find_by_id(info->net, info->id);
+       if (!nh) {
+               err = -EINVAL;
+               goto out;
+       }
+
+       nhg = rcu_dereference(nh->nh_grp);
+       res_table = rcu_dereference(nhg->res_table);
+       *p_idle_timer_ms = jiffies_to_msecs(res_table->idle_timer);
+
+out:
+       rcu_read_unlock();
+
+       return err;
+}
+
+static int nh_notifier_res_bucket_info_init(struct nh_notifier_info *info,
+                                           u16 bucket_index, bool force,
+                                           struct nh_info *oldi,
+                                           struct nh_info *newi)
+{
+       unsigned int idle_timer_ms;
+       int err;
+
+       err = nh_notifier_res_bucket_idle_timer_get(info, force,
+                                                   &idle_timer_ms);
+       if (err)
+               return err;
+
+       info->type = NH_NOTIFIER_INFO_TYPE_RES_BUCKET;
+       info->nh_res_bucket = kzalloc(sizeof(*info->nh_res_bucket),
+                                     GFP_KERNEL);
+       if (!info->nh_res_bucket)
+               return -ENOMEM;
+
+       info->nh_res_bucket->bucket_index = bucket_index;
+       info->nh_res_bucket->idle_timer_ms = idle_timer_ms;
+       info->nh_res_bucket->force = force;
+       __nh_notifier_single_info_init(&info->nh_res_bucket->old_nh, oldi);
+       __nh_notifier_single_info_init(&info->nh_res_bucket->new_nh, newi);
+       return 0;
+}
+
+static void nh_notifier_res_bucket_info_fini(struct nh_notifier_info *info)
+{
+       kfree(info->nh_res_bucket);
+}
+
+static int __call_nexthop_res_bucket_notifiers(struct net *net, u32 nhg_id,
+                                              u16 bucket_index, bool force,
+                                              struct nh_info *oldi,
+                                              struct nh_info *newi,
+                                              struct netlink_ext_ack *extack)
+{
+       struct nh_notifier_info info = {
+               .net = net,
+               .extack = extack,
+               .id = nhg_id,
+       };
+       int err;
+
+       if (nexthop_notifiers_is_empty(net))
+               return 0;
+
+       err = nh_notifier_res_bucket_info_init(&info, bucket_index, force,
+                                              oldi, newi);
+       if (err)
+               return err;
+
+       err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
+                                          NEXTHOP_EVENT_BUCKET_REPLACE, &info);
+       nh_notifier_res_bucket_info_fini(&info);
+
+       return notifier_to_errno(err);
+}
+
+/* There are three users of RES_TABLE, and NHs etc. referenced from there:
+ *
+ * 1) a collection of callbacks for NH maintenance. This operates under
+ *    RTNL,
+ * 2) the delayed work that gradually balances the resilient table,
+ * 3) and nexthop_select_path(), operating under RCU.
+ *
+ * Both the delayed work and the RTNL block are writers, and need to
+ * maintain mutual exclusion. Since there are only two and well-known
+ * writers for each table, the RTNL code can make sure it has exclusive
+ * access thus:
+ *
+ * - Have the DW operate without locking;
+ * - synchronously cancel the DW;
+ * - do the writing;
+ * - if the write was not actually a delete, call upkeep, which schedules
+ *   DW again if necessary.
+ *
+ * The functions that are always called from the RTNL context use
+ * rtnl_dereference(). The functions that can also be called from the DW do
+ * a raw dereference and rely on the above mutual exclusion scheme.
+ */
+#define nh_res_dereference(p) (rcu_dereference_raw(p))
+
+static int call_nexthop_res_bucket_notifiers(struct net *net, u32 nhg_id,
+                                            u16 bucket_index, bool force,
+                                            struct nexthop *old_nh,
+                                            struct nexthop *new_nh,
+                                            struct netlink_ext_ack *extack)
+{
+       struct nh_info *oldi = nh_res_dereference(old_nh->nh_info);
+       struct nh_info *newi = nh_res_dereference(new_nh->nh_info);
+
+       return __call_nexthop_res_bucket_notifiers(net, nhg_id, bucket_index,
+                                                  force, oldi, newi, extack);
+}
+
+static int call_nexthop_res_table_notifiers(struct net *net, struct nexthop *nh,
+                                           struct netlink_ext_ack *extack)
+{
+       struct nh_notifier_info info = {
+               .net = net,
+               .extack = extack,
+       };
+       struct nh_group *nhg;
+       int err;
+
+       ASSERT_RTNL();
+
+       if (nexthop_notifiers_is_empty(net))
+               return 0;
+
+       /* At this point, the nexthop buckets are still not populated. Only
+        * emit a notification with the logical nexthops, so that a listener
+        * could potentially veto it in case of unsupported configuration.
+        */
+       nhg = rtnl_dereference(nh->nh_grp);
+       err = nh_notifier_mpath_info_init(&info, nhg);
+       if (err) {
+               NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info");
+               return err;
+       }
+
+       err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
+                                          NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
+                                          &info);
+       kfree(info.nh_grp);
+
+       return notifier_to_errno(err);
+}
+
  static int call_nexthop_notifier(struct notifier_block *nb, struct net *net,
                                  enum nexthop_event_type event_type,
                                  struct nexthop *nh,
  static int call_nexthop_notifier(struct notifier_block *nb, struct net *net,
                                  enum nexthop_event_type event_type,
                                  struct nexthop *nh,
@@ -239,6 +478,9 @@ static void nexthop_free_group(struct nexthop *nh)
  
         WARN_ON(nhg->spare == nhg);
  
  
         WARN_ON(nhg->spare == nhg);
  
+       if (nhg->resilient)
+               vfree(rcu_dereference_raw(nhg->res_table));
+
         kfree(nhg->spare);
         kfree(nhg);
  }
         kfree(nhg->spare);
         kfree(nhg);
  }
@@ -297,6 +539,30 @@ static struct nh_group *nexthop_grp_alloc(u16 num_nh)
         return nhg;
  }
  
         return nhg;
  }
  
+static void nh_res_table_upkeep_dw(struct work_struct *work);
+
+static struct nh_res_table *
+nexthop_res_table_alloc(struct net *net, u32 nhg_id, struct nh_config *cfg)
+{
+       const u16 num_nh_buckets = cfg->nh_grp_res_num_buckets;
+       struct nh_res_table *res_table;
+       unsigned long size;
+
+       size = struct_size(res_table, nh_buckets, num_nh_buckets);
+       res_table = __vmalloc(size, GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN);
+       if (!res_table)
+               return NULL;
+
+       res_table->net = net;
+       res_table->nhg_id = nhg_id;
+       INIT_DELAYED_WORK(&res_table->upkeep_dw, &nh_res_table_upkeep_dw);
+       INIT_LIST_HEAD(&res_table->uw_nh_entries);
+       res_table->idle_timer = cfg->nh_grp_res_idle_timer;
+       res_table->unbalanced_timer = cfg->nh_grp_res_unbalanced_timer;
+       res_table->num_nh_buckets = num_nh_buckets;
+       return res_table;
+}
+
  static void nh_base_seq_inc(struct net *net)
  {
         while (++net->nexthop.seq == 0)
  static void nh_base_seq_inc(struct net *net)
  {
         while (++net->nexthop.seq == 0)
@@ -345,6 +611,48 @@ static u32 nh_find_unused_id(struct net *net)
         return 0;
  }
  
         return 0;
  }
  
+static void nh_res_time_set_deadline(unsigned long next_time,
+                                    unsigned long *deadline)
+{
+       if (time_before(next_time, *deadline))
+               *deadline = next_time;
+}
+
+static clock_t nh_res_table_unbalanced_time(struct nh_res_table *res_table)
+{
+       if (list_empty(&res_table->uw_nh_entries))
+               return 0;
+       return jiffies_delta_to_clock_t(jiffies - res_table->unbalanced_since);
+}
+
+static int nla_put_nh_group_res(struct sk_buff *skb, struct nh_group *nhg)
+{
+       struct nh_res_table *res_table = rtnl_dereference(nhg->res_table);
+       struct nlattr *nest;
+
+       nest = nla_nest_start(skb, NHA_RES_GROUP);
+       if (!nest)
+               return -EMSGSIZE;
+
+       if (nla_put_u16(skb, NHA_RES_GROUP_BUCKETS,
+                       res_table->num_nh_buckets) ||
+           nla_put_u32(skb, NHA_RES_GROUP_IDLE_TIMER,
+                       jiffies_to_clock_t(res_table->idle_timer)) ||
+           nla_put_u32(skb, NHA_RES_GROUP_UNBALANCED_TIMER,
+                       jiffies_to_clock_t(res_table->unbalanced_timer)) ||
+           nla_put_u64_64bit(skb, NHA_RES_GROUP_UNBALANCED_TIME,
+                             nh_res_table_unbalanced_time(res_table),
+                             NHA_RES_GROUP_PAD))
+               goto nla_put_failure;
+
+       nla_nest_end(skb, nest);
+       return 0;
+
+nla_put_failure:
+       nla_nest_cancel(skb, nest);
+       return -EMSGSIZE;
+}
+
  static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
  {
         struct nexthop_grp *p;
  static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
  {
         struct nexthop_grp *p;
@@ -353,8 +661,10 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
         u16 group_type = 0;
         int i;
  
         u16 group_type = 0;
         int i;
  
-       if (nhg->mpath)
+       if (nhg->hash_threshold)
                 group_type = NEXTHOP_GRP_TYPE_MPATH;
                 group_type = NEXTHOP_GRP_TYPE_MPATH;
+       else if (nhg->resilient)
+               group_type = NEXTHOP_GRP_TYPE_RES;
  
         if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type))
                 goto nla_put_failure;
  
         if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type))
                 goto nla_put_failure;
@@ -370,6 +680,9 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
                 p += 1;
         }
  
                 p += 1;
         }
  
+       if (nhg->resilient && nla_put_nh_group_res(skb, nhg))
+               goto nla_put_failure;
+
         return 0;
  
  nla_put_failure:
         return 0;
  
  nla_put_failure:
@@ -457,13 +770,26 @@ nla_put_failure:
         return -EMSGSIZE;
  }
  
         return -EMSGSIZE;
  }
  
+static size_t nh_nlmsg_size_grp_res(struct nh_group *nhg)
+{
+       return nla_total_size(0) +      /* NHA_RES_GROUP */
+               nla_total_size(2) +     /* NHA_RES_GROUP_BUCKETS */
+               nla_total_size(4) +     /* NHA_RES_GROUP_IDLE_TIMER */
+               nla_total_size(4) +     /* NHA_RES_GROUP_UNBALANCED_TIMER */
+               nla_total_size_64bit(8);/* NHA_RES_GROUP_UNBALANCED_TIME */
+}
+
  static size_t nh_nlmsg_size_grp(struct nexthop *nh)
  {
         struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
         size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh;
  static size_t nh_nlmsg_size_grp(struct nexthop *nh)
  {
         struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
         size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh;
+       size_t tot = nla_total_size(sz) +
+               nla_total_size(2); /* NHA_GROUP_TYPE */
+
+       if (nhg->resilient)
+               tot += nh_nlmsg_size_grp_res(nhg);
  
  
-       return nla_total_size(sz) +
-              nla_total_size(2);  /* NHA_GROUP_TYPE */
+       return tot;
  }
  
  static size_t nh_nlmsg_size_single(struct nexthop *nh)
  }
  
  static size_t nh_nlmsg_size_single(struct nexthop *nh)
@@ -538,18 +864,142 @@ errout:
                 rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
  }
  
                 rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
  }
  
+static unsigned long nh_res_bucket_used_time(const struct nh_res_bucket *bucket)
+{
+       return (unsigned long)atomic_long_read(&bucket->used_time);
+}
+
+static unsigned long
+nh_res_bucket_idle_point(const struct nh_res_table *res_table,
+                        const struct nh_res_bucket *bucket,
+                        unsigned long now)
+{
+       unsigned long time = nh_res_bucket_used_time(bucket);
+
+       /* Bucket was not used since it was migrated. The idle time is now. */
+       if (time == bucket->migrated_time)
+               return now;
+
+       return time + res_table->idle_timer;
+}
+
+static unsigned long
+nh_res_table_unb_point(const struct nh_res_table *res_table)
+{
+       return res_table->unbalanced_since + res_table->unbalanced_timer;
+}
+
+static void nh_res_bucket_set_idle(const struct nh_res_table *res_table,
+                                  struct nh_res_bucket *bucket)
+{
+       unsigned long now = jiffies;
+
+       atomic_long_set(&bucket->used_time, (long)now);
+       bucket->migrated_time = now;
+}
+
+static void nh_res_bucket_set_busy(struct nh_res_bucket *bucket)
+{
+       atomic_long_set(&bucket->used_time, (long)jiffies);
+}
+
+static clock_t nh_res_bucket_idle_time(const struct nh_res_bucket *bucket)
+{
+       unsigned long used_time = nh_res_bucket_used_time(bucket);
+
+       return jiffies_delta_to_clock_t(jiffies - used_time);
+}
+
+static int nh_fill_res_bucket(struct sk_buff *skb, struct nexthop *nh,
+                             struct nh_res_bucket *bucket, u16 bucket_index,
+                             int event, u32 portid, u32 seq,
+                             unsigned int nlflags,
+                             struct netlink_ext_ack *extack)
+{
+       struct nh_grp_entry *nhge = nh_res_dereference(bucket->nh_entry);
+       struct nlmsghdr *nlh;
+       struct nlattr *nest;
+       struct nhmsg *nhm;
+
+       nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags);
+       if (!nlh)
+               return -EMSGSIZE;
+
+       nhm = nlmsg_data(nlh);
+       nhm->nh_family = AF_UNSPEC;
+       nhm->nh_flags = bucket->nh_flags;
+       nhm->nh_protocol = nh->protocol;
+       nhm->nh_scope = 0;
+       nhm->resvd = 0;
+
+       if (nla_put_u32(skb, NHA_ID, nh->id))
+               goto nla_put_failure;
+
+       nest = nla_nest_start(skb, NHA_RES_BUCKET);
+       if (!nest)
+               goto nla_put_failure;
+
+       if (nla_put_u16(skb, NHA_RES_BUCKET_INDEX, bucket_index) ||
+           nla_put_u32(skb, NHA_RES_BUCKET_NH_ID, nhge->nh->id) ||
+           nla_put_u64_64bit(skb, NHA_RES_BUCKET_IDLE_TIME,
+                             nh_res_bucket_idle_time(bucket),
+                             NHA_RES_BUCKET_PAD))
+               goto nla_put_failure_nest;
+
+       nla_nest_end(skb, nest);
+       nlmsg_end(skb, nlh);
+       return 0;
+
+nla_put_failure_nest:
+       nla_nest_cancel(skb, nest);
+nla_put_failure:
+       nlmsg_cancel(skb, nlh);
+       return -EMSGSIZE;
+}
+
+static void nexthop_bucket_notify(struct nh_res_table *res_table,
+                                 u16 bucket_index)
+{
+       struct nh_res_bucket *bucket = &res_table->nh_buckets[bucket_index];
+       struct nh_grp_entry *nhge = nh_res_dereference(bucket->nh_entry);
+       struct nexthop *nh = nhge->nh_parent;
+       struct sk_buff *skb;
+       int err = -ENOBUFS;
+
+       skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+       if (!skb)
+               goto errout;
+
+       err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
+                                RTM_NEWNEXTHOPBUCKET, 0, 0, NLM_F_REPLACE,
+                                NULL);
+       if (err < 0) {
+               kfree_skb(skb);
+               goto errout;
+       }
+
+       rtnl_notify(skb, nh->net, 0, RTNLGRP_NEXTHOP, NULL, GFP_KERNEL);
+       return;
+errout:
+       if (err < 0)
+               rtnl_set_sk_err(nh->net, RTNLGRP_NEXTHOP, err);
+}
+
  static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
                            bool *is_fdb, struct netlink_ext_ack *extack)
  {
         if (nh->is_group) {
                 struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
  
  static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
                            bool *is_fdb, struct netlink_ext_ack *extack)
  {
         if (nh->is_group) {
                 struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
  
-               /* nested multipath (group within a group) is not
-                * supported
-                */
-               if (nhg->mpath) {
+               /* Nesting groups within groups is not supported. */
+               if (nhg->hash_threshold) {
+                       NL_SET_ERR_MSG(extack,
+                                      "Hash-threshold group can not be a nexthop within a group");
+                       return false;
+               }
+               if (nhg->resilient) {
                         NL_SET_ERR_MSG(extack,
                         NL_SET_ERR_MSG(extack,
-                                      "Multipath group can not be a nexthop within a group");
+                                      "Resilient group can not be a nexthop within a group");
                         return false;
                 }
                 *is_fdb = nhg->fdb_nh;
                         return false;
                 }
                 *is_fdb = nhg->fdb_nh;
@@ -591,7 +1041,7 @@ static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family,
  
  static int nh_check_attr_group(struct net *net,
                                struct nlattr *tb[], size_t tb_size,
  
  static int nh_check_attr_group(struct net *net,
                                struct nlattr *tb[], size_t tb_size,
-                              struct netlink_ext_ack *extack)
+                              u16 nh_grp_type, struct netlink_ext_ack *extack)
  {
         unsigned int len = nla_len(tb[NHA_GROUP]);
         u8 nh_family = AF_UNSPEC;
  {
         unsigned int len = nla_len(tb[NHA_GROUP]);
         u8 nh_family = AF_UNSPEC;
@@ -652,8 +1102,14 @@ static int nh_check_attr_group(struct net *net,
         for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) {
                 if (!tb[i])
                         continue;
         for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) {
                 if (!tb[i])
                         continue;
-               if (i == NHA_FDB)
+               switch (i) {
+               case NHA_FDB:
                         continue;
                         continue;
+               case NHA_RES_GROUP:
+                       if (nh_grp_type == NEXTHOP_GRP_TYPE_RES)
+                               continue;
+                       break;
+               }
                 NL_SET_ERR_MSG(extack,
                                "No other attributes can be set in nexthop groups");
                 return -EINVAL;
                 NL_SET_ERR_MSG(extack,
                                "No other attributes can be set in nexthop groups");
                 return -EINVAL;
@@ -695,7 +1151,7 @@ static bool ipv4_good_nh(const struct fib_nh *nh)
         return !!(state & NUD_VALID);
  }
  
         return !!(state & NUD_VALID);
  }
  
-static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash)
+static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
  {
         struct nexthop *rc = NULL;
         int i;
  {
         struct nexthop *rc = NULL;
         int i;
@@ -704,7 +1160,7 @@ static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash)
                 struct nh_grp_entry *nhge = &nhg->nh_entries[i];
                 struct nh_info *nhi;
  
                 struct nh_grp_entry *nhge = &nhg->nh_entries[i];
                 struct nh_info *nhi;
  
-               if (hash > atomic_read(&nhge->mpath.upper_bound))
+               if (hash > atomic_read(&nhge->hthr.upper_bound))
                         continue;
  
                 nhi = rcu_dereference(nhge->nh->nh_info);
                         continue;
  
                 nhi = rcu_dereference(nhge->nh->nh_info);
@@ -732,6 +1188,22 @@ static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash)
         return rc;
  }
  
         return rc;
  }
  
+static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
+{
+       struct nh_res_table *res_table = rcu_dereference(nhg->res_table);
+       u16 bucket_index = hash % res_table->num_nh_buckets;
+       struct nh_res_bucket *bucket;
+       struct nh_grp_entry *nhge;
+
+       /* nexthop_select_path() is expected to return a non-NULL value, so
+        * skip protocol validation and just hand out whatever there is.
+        */
+       bucket = &res_table->nh_buckets[bucket_index];
+       nh_res_bucket_set_busy(bucket);
+       nhge = rcu_dereference(bucket->nh_entry);
+       return nhge->nh;
+}
+
  struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
  {
         struct nh_group *nhg;
  struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
  {
         struct nh_group *nhg;
@@ -740,8 +1212,10 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
                 return nh;
  
         nhg = rcu_dereference(nh->nh_grp);
                 return nh;
  
         nhg = rcu_dereference(nh->nh_grp);
-       if (nhg->mpath)
-               return nexthop_select_path_mp(nhg, hash);
+       if (nhg->hash_threshold)
+               return nexthop_select_path_hthr(nhg, hash);
+       else if (nhg->resilient)
+               return nexthop_select_path_res(nhg, hash);
  
         /* Unreachable. */
         return NULL;
  
         /* Unreachable. */
         return NULL;
@@ -924,31 +1398,343 @@ static int fib_check_nh_list(struct nexthop *old, struct nexthop *new,
         return 0;
  }
  
         return 0;
  }
  
-static void nh_group_rebalance(struct nh_group *nhg)
+static bool nh_res_nhge_is_balanced(const struct nh_grp_entry *nhge)
  {
  {
-       int total = 0;
-       int w = 0;
-       int i;
+       return nhge->res.count_buckets == nhge->res.wants_buckets;
+}
  
  
-       for (i = 0; i < nhg->num_nh; ++i)
-               total += nhg->nh_entries[i].weight;
+static bool nh_res_nhge_is_ow(const struct nh_grp_entry *nhge)
+{
+       return nhge->res.count_buckets > nhge->res.wants_buckets;
+}
  
  
-       for (i = 0; i < nhg->num_nh; ++i) {
-               struct nh_grp_entry *nhge = &nhg->nh_entries[i];
-               int upper_bound;
+static bool nh_res_nhge_is_uw(const struct nh_grp_entry *nhge)
+{
+       return nhge->res.count_buckets < nhge->res.wants_buckets;
+}
  
  
-               w += nhge->weight;
-               upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1;
-               atomic_set(&nhge->mpath.upper_bound, upper_bound);
-       }
+static bool nh_res_table_is_balanced(const struct nh_res_table *res_table)
+{
+       return list_empty(&res_table->uw_nh_entries);
  }
  
  }
  
-static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
-                               struct nl_info *nlinfo)
+static void nh_res_bucket_unset_nh(struct nh_res_bucket *bucket)
  {
  {
-       struct nh_grp_entry *nhges, *new_nhges;
-       struct nexthop *nhp = nhge->nh_parent;
-       struct netlink_ext_ack extack;
+       struct nh_grp_entry *nhge;
+
+       if (bucket->occupied) {
+               nhge = nh_res_dereference(bucket->nh_entry);
+               nhge->res.count_buckets--;
+               bucket->occupied = false;
+       }
+}
+
+static void nh_res_bucket_set_nh(struct nh_res_bucket *bucket,
+                                struct nh_grp_entry *nhge)
+{
+       nh_res_bucket_unset_nh(bucket);
+
+       bucket->occupied = true;
+       rcu_assign_pointer(bucket->nh_entry, nhge);
+       nhge->res.count_buckets++;
+}
+
+static bool nh_res_bucket_should_migrate(struct nh_res_table *res_table,
+                                        struct nh_res_bucket *bucket,
+                                        unsigned long *deadline, bool *force)
+{
+       unsigned long now = jiffies;
+       struct nh_grp_entry *nhge;
+       unsigned long idle_point;
+
+       if (!bucket->occupied) {
+               /* The bucket is not occupied, its NHGE pointer is either
+                * NULL or obsolete. We _have to_ migrate: set force.
+                */
+               *force = true;
+               return true;
+       }
+
+       nhge = nh_res_dereference(bucket->nh_entry);
+
+       /* If the bucket is populated by an underweight or balanced
+        * nexthop, do not migrate.
+        */
+       if (!nh_res_nhge_is_ow(nhge))
+               return false;
+
+       /* At this point we know that the bucket is populated with an
+        * overweight nexthop. It needs to be migrated to a new nexthop if
+        * the idle timer of unbalanced timer expired.
+        */
+
+       idle_point = nh_res_bucket_idle_point(res_table, bucket, now);
+       if (time_after_eq(now, idle_point)) {
+               /* The bucket is idle. We _can_ migrate: unset force. */
+               *force = false;
+               return true;
+       }
+
+       /* Unbalanced timer of 0 means "never force". */
+       if (res_table->unbalanced_timer) {
+               unsigned long unb_point;
+
+               unb_point = nh_res_table_unb_point(res_table);
+               if (time_after(now, unb_point)) {
+                       /* The bucket is not idle, but the unbalanced timer
+                        * expired. We _can_ migrate, but set force anyway,
+                        * so that drivers know to ignore activity reports
+                        * from the HW.
+                        */
+                       *force = true;
+                       return true;
+               }
+
+               nh_res_time_set_deadline(unb_point, deadline);
+       }
+
+       nh_res_time_set_deadline(idle_point, deadline);
+       return false;
+}
+
+static bool nh_res_bucket_migrate(struct nh_res_table *res_table,
+                                 u16 bucket_index, bool notify,
+                                 bool notify_nl, bool force)
+{
+       struct nh_res_bucket *bucket = &res_table->nh_buckets[bucket_index];
+       struct nh_grp_entry *new_nhge;
+       struct netlink_ext_ack extack;
+       int err;
+
+       new_nhge = list_first_entry_or_null(&res_table->uw_nh_entries,
+                                           struct nh_grp_entry,
+                                           res.uw_nh_entry);
+       if (WARN_ON_ONCE(!new_nhge))
+               /* If this function is called, "bucket" is either not
+                * occupied, or it belongs to a next hop that is
+                * overweight. In either case, there ought to be a
+                * corresponding underweight next hop.
+                */
+               return false;
+
+       if (notify) {
+               struct nh_grp_entry *old_nhge;
+
+               old_nhge = nh_res_dereference(bucket->nh_entry);
+               err = call_nexthop_res_bucket_notifiers(res_table->net,
+                                                       res_table->nhg_id,
+                                                       bucket_index, force,
+                                                       old_nhge->nh,
+                                                       new_nhge->nh, &extack);
+               if (err) {
+                       pr_err_ratelimited("%s\n", extack._msg);
+                       if (!force)
+                               return false;
+                       /* It is not possible to veto a forced replacement, so
+                        * just clear the hardware flags from the nexthop
+                        * bucket to indicate to user space that this bucket is
+                        * not correctly populated in hardware.
+                        */
+                       bucket->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP);
+               }
+       }
+
+       nh_res_bucket_set_nh(bucket, new_nhge);
+       nh_res_bucket_set_idle(res_table, bucket);
+
+       if (notify_nl)
+               nexthop_bucket_notify(res_table, bucket_index);
+
+       if (nh_res_nhge_is_balanced(new_nhge))
+               list_del(&new_nhge->res.uw_nh_entry);
+       return true;
+}
+
+#define NH_RES_UPKEEP_DW_MINIMUM_INTERVAL (HZ / 2)
+
+static void nh_res_table_upkeep(struct nh_res_table *res_table,
+                               bool notify, bool notify_nl)
+{
+       unsigned long now = jiffies;
+       unsigned long deadline;
+       u16 i;
+
+       /* Deadline is the next time that upkeep should be run. It is the
+        * earliest time at which one of the buckets might be migrated.
+        * Start at the most pessimistic estimate: either unbalanced_timer
+        * from now, or if there is none, idle_timer from now. For each
+        * encountered time point, call nh_res_time_set_deadline() to
+        * refine the estimate.
+        */
+       if (res_table->unbalanced_timer)
+               deadline = now + res_table->unbalanced_timer;
+       else
+               deadline = now + res_table->idle_timer;
+
+       for (i = 0; i < res_table->num_nh_buckets; i++) {
+               struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
+               bool force;
+
+               if (nh_res_bucket_should_migrate(res_table, bucket,
+                                                &deadline, &force)) {
+                       if (!nh_res_bucket_migrate(res_table, i, notify,
+                                                  notify_nl, force)) {
+                               unsigned long idle_point;
+
+                               /* A driver can override the migration
+                                * decision if the HW reports that the
+                                * bucket is actually not idle. Therefore
+                                * remark the bucket as busy again and
+                                * update the deadline.
+                                */
+                               nh_res_bucket_set_busy(bucket);
+                               idle_point = nh_res_bucket_idle_point(res_table,
+                                                                     bucket,
+                                                                     now);
+                               nh_res_time_set_deadline(idle_point, &deadline);
+                       }
+               }
+       }
+
+       /* If the group is still unbalanced, schedule the next upkeep to
+        * either the deadline computed above, or the minimum deadline,
+        * whichever comes later.
+        */
+       if (!nh_res_table_is_balanced(res_table)) {
+               unsigned long now = jiffies;
+               unsigned long min_deadline;
+
+               min_deadline = now + NH_RES_UPKEEP_DW_MINIMUM_INTERVAL;
+               if (time_before(deadline, min_deadline))
+                       deadline = min_deadline;
+
+               queue_delayed_work(system_power_efficient_wq,
+                                  &res_table->upkeep_dw, deadline - now);
+       }
+}
+
+static void nh_res_table_upkeep_dw(struct work_struct *work)
+{
+       struct delayed_work *dw = to_delayed_work(work);
+       struct nh_res_table *res_table;
+
+       res_table = container_of(dw, struct nh_res_table, upkeep_dw);
+       nh_res_table_upkeep(res_table, true, true);
+}
+
+static void nh_res_table_cancel_upkeep(struct nh_res_table *res_table)
+{
+       cancel_delayed_work_sync(&res_table->upkeep_dw);
+}
+
+static void nh_res_group_rebalance(struct nh_group *nhg,
+                                  struct nh_res_table *res_table)
+{
+       int prev_upper_bound = 0;
+       int total = 0;
+       int w = 0;
+       int i;
+
+       INIT_LIST_HEAD(&res_table->uw_nh_entries);
+
+       for (i = 0; i < nhg->num_nh; ++i)
+               total += nhg->nh_entries[i].weight;
+
+       for (i = 0; i < nhg->num_nh; ++i) {
+               struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+               int upper_bound;
+
+               w += nhge->weight;
+               upper_bound = DIV_ROUND_CLOSEST(res_table->num_nh_buckets * w,
+                                               total);
+               nhge->res.wants_buckets = upper_bound - prev_upper_bound;
+               prev_upper_bound = upper_bound;
+
+               if (nh_res_nhge_is_uw(nhge)) {
+                       if (list_empty(&res_table->uw_nh_entries))
+                               res_table->unbalanced_since = jiffies;
+                       list_add(&nhge->res.uw_nh_entry,
+                                &res_table->uw_nh_entries);
+               }
+       }
+}
+
+/* Migrate buckets in res_table so that they reference NHGE's from NHG with
+ * the right NH ID. Set those buckets that do not have a corresponding NHGE
+ * entry in NHG as not occupied.
+ */
+static void nh_res_table_migrate_buckets(struct nh_res_table *res_table,
+                                        struct nh_group *nhg)
+{
+       u16 i;
+
+       for (i = 0; i < res_table->num_nh_buckets; i++) {
+               struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
+               u32 id = rtnl_dereference(bucket->nh_entry)->nh->id;
+               bool found = false;
+               int j;
+
+               for (j = 0; j < nhg->num_nh; j++) {
+                       struct nh_grp_entry *nhge = &nhg->nh_entries[j];
+
+                       if (nhge->nh->id == id) {
+                               nh_res_bucket_set_nh(bucket, nhge);
+                               found = true;
+                               break;
+                       }
+               }
+
+               if (!found)
+                       nh_res_bucket_unset_nh(bucket);
+       }
+}
+
+static void replace_nexthop_grp_res(struct nh_group *oldg,
+                                   struct nh_group *newg)
+{
+       /* For NH group replacement, the new NHG might only have a stub
+        * hash table with 0 buckets, because the number of buckets was not
+        * specified. For NH removal, oldg and newg both reference the same
+        * res_table. So in any case, in the following, we want to work
+        * with oldg->res_table.
+        */
+       struct nh_res_table *old_res_table = rtnl_dereference(oldg->res_table);
+       unsigned long prev_unbalanced_since = old_res_table->unbalanced_since;
+       bool prev_has_uw = !list_empty(&old_res_table->uw_nh_entries);
+
+       nh_res_table_cancel_upkeep(old_res_table);
+       nh_res_table_migrate_buckets(old_res_table, newg);
+       nh_res_group_rebalance(newg, old_res_table);
+       if (prev_has_uw && !list_empty(&old_res_table->uw_nh_entries))
+               old_res_table->unbalanced_since = prev_unbalanced_since;
+       nh_res_table_upkeep(old_res_table, true, false);
+}
+
+static void nh_hthr_group_rebalance(struct nh_group *nhg)
+{
+       int total = 0;
+       int w = 0;
+       int i;
+
+       for (i = 0; i < nhg->num_nh; ++i)
+               total += nhg->nh_entries[i].weight;
+
+       for (i = 0; i < nhg->num_nh; ++i) {
+               struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+               int upper_bound;
+
+               w += nhge->weight;
+               upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1;
+               atomic_set(&nhge->hthr.upper_bound, upper_bound);
+       }
+}
+
+static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
+                               struct nl_info *nlinfo)
+{
+       struct nh_grp_entry *nhges, *new_nhges;
+       struct nexthop *nhp = nhge->nh_parent;
+       struct netlink_ext_ack extack;
         struct nexthop *nh = nhge->nh;
         struct nh_group *nhg, *newg;
         int i, j, err;
         struct nexthop *nh = nhge->nh;
         struct nh_group *nhg, *newg;
         int i, j, err;
@@ -965,7 +1751,9 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
         }
  
         newg->has_v4 = false;
         }
  
         newg->has_v4 = false;
-       newg->mpath = nhg->mpath;
+       newg->is_multipath = nhg->is_multipath;
+       newg->hash_threshold = nhg->hash_threshold;
+       newg->resilient = nhg->resilient;
         newg->fdb_nh = nhg->fdb_nh;
         newg->num_nh = nhg->num_nh;
  
         newg->fdb_nh = nhg->fdb_nh;
         newg->num_nh = nhg->num_nh;
  
@@ -993,15 +1781,25 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
                 j++;
         }
  
                 j++;
         }
  
-       nh_group_rebalance(newg);
+       if (newg->hash_threshold)
+               nh_hthr_group_rebalance(newg);
+       else if (newg->resilient)
+               replace_nexthop_grp_res(nhg, newg);
+
         rcu_assign_pointer(nhp->nh_grp, newg);
  
         list_del(&nhge->nh_list);
         nexthop_put(nhge->nh);
  
         rcu_assign_pointer(nhp->nh_grp, newg);
  
         list_del(&nhge->nh_list);
         nexthop_put(nhge->nh);
  
-       err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, &extack);
-       if (err)
-               pr_err("%s\n", extack._msg);
+       /* Removal of a NH from a resilient group is notified through
+        * bucket notifications.
+        */
+       if (newg->hash_threshold) {
+               err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp,
+                                            &extack);
+               if (err)
+                       pr_err("%s\n", extack._msg);
+       }
  
         if (nlinfo)
                 nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo);
  
         if (nlinfo)
                 nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo);
@@ -1022,6 +1820,7 @@ static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
  static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
  {
         struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
  static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
  {
         struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
+       struct nh_res_table *res_table;
         int i, num_nh = nhg->num_nh;
  
         for (i = 0; i < num_nh; ++i) {
         int i, num_nh = nhg->num_nh;
  
         for (i = 0; i < num_nh; ++i) {
@@ -1032,6 +1831,11 @@ static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
  
                 list_del_init(&nhge->nh_list);
         }
  
                 list_del_init(&nhge->nh_list);
         }
+
+       if (nhg->resilient) {
+               res_table = rtnl_dereference(nhg->res_table);
+               nh_res_table_cancel_upkeep(res_table);
+       }
  }
  
  /* not called for nexthop replace */
  }
  
  /* not called for nexthop replace */
@@ -1107,9 +1911,12 @@ static void nh_rt_cache_flush(struct net *net, struct nexthop *nh)
  }
  
  static int replace_nexthop_grp(struct net *net, struct nexthop *old,
  }
  
  static int replace_nexthop_grp(struct net *net, struct nexthop *old,
-                              struct nexthop *new,
+                              struct nexthop *new, const struct nh_config *cfg,
                                struct netlink_ext_ack *extack)
  {
                                struct netlink_ext_ack *extack)
  {
+       struct nh_res_table *tmp_table = NULL;
+       struct nh_res_table *new_res_table;
+       struct nh_res_table *old_res_table;
         struct nh_group *oldg, *newg;
         int i, err;
  
         struct nh_group *oldg, *newg;
         int i, err;
  
@@ -1118,19 +1925,67 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old,
                 return -EINVAL;
         }
  
                 return -EINVAL;
         }
  
-       err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack);
-       if (err)
-               return err;
-
         oldg = rtnl_dereference(old->nh_grp);
         newg = rtnl_dereference(new->nh_grp);
  
         oldg = rtnl_dereference(old->nh_grp);
         newg = rtnl_dereference(new->nh_grp);
  
+       if (newg->hash_threshold != oldg->hash_threshold) {
+               NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with one of a different type.");
+               return -EINVAL;
+       }
+
+       if (newg->hash_threshold) {
+               err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new,
+                                            extack);
+               if (err)
+                       return err;
+       } else if (newg->resilient) {
+               new_res_table = rtnl_dereference(newg->res_table);
+               old_res_table = rtnl_dereference(oldg->res_table);
+
+               /* Accept if num_nh_buckets was not given, but if it was
+                * given, demand that the value be correct.
+                */
+               if (cfg->nh_grp_res_has_num_buckets &&
+                   cfg->nh_grp_res_num_buckets !=
+                   old_res_table->num_nh_buckets) {
+                       NL_SET_ERR_MSG(extack, "Can not change number of buckets of a resilient nexthop group.");
+                       return -EINVAL;
+               }
+
+               /* Emit a pre-replace notification so that listeners could veto
+                * a potentially unsupported configuration. Otherwise,
+                * individual bucket replacement notifications would need to be
+                * vetoed, which is something that should only happen if the
+                * bucket is currently active.
+                */
+               err = call_nexthop_res_table_notifiers(net, new, extack);
+               if (err)
+                       return err;
+
+               if (cfg->nh_grp_res_has_idle_timer)
+                       old_res_table->idle_timer = cfg->nh_grp_res_idle_timer;
+               if (cfg->nh_grp_res_has_unbalanced_timer)
+                       old_res_table->unbalanced_timer =
+                               cfg->nh_grp_res_unbalanced_timer;
+
+               replace_nexthop_grp_res(oldg, newg);
+
+               tmp_table = new_res_table;
+               rcu_assign_pointer(newg->res_table, old_res_table);
+               rcu_assign_pointer(newg->spare->res_table, old_res_table);
+       }
+
         /* update parents - used by nexthop code for cleanup */
         for (i = 0; i < newg->num_nh; i++)
                 newg->nh_entries[i].nh_parent = old;
  
         rcu_assign_pointer(old->nh_grp, newg);
  
         /* update parents - used by nexthop code for cleanup */
         for (i = 0; i < newg->num_nh; i++)
                 newg->nh_entries[i].nh_parent = old;
  
         rcu_assign_pointer(old->nh_grp, newg);
  
+       if (newg->resilient) {
+               rcu_assign_pointer(oldg->res_table, tmp_table);
+               rcu_assign_pointer(oldg->spare->res_table, tmp_table);
+       }
+
         for (i = 0; i < oldg->num_nh; i++)
                 oldg->nh_entries[i].nh_parent = new;
  
         for (i = 0; i < oldg->num_nh; i++)
                 oldg->nh_entries[i].nh_parent = new;
  
@@ -1156,6 +2011,71 @@ static void nh_group_v4_update(struct nh_group *nhg)
         nhg->has_v4 = has_v4;
  }
  
         nhg->has_v4 = has_v4;
  }
  
+static int replace_nexthop_single_notify_res(struct net *net,
+                                            struct nh_res_table *res_table,
+                                            struct nexthop *old,
+                                            struct nh_info *oldi,
+                                            struct nh_info *newi,
+                                            struct netlink_ext_ack *extack)
+{
+       u32 nhg_id = res_table->nhg_id;
+       int err;
+       u16 i;
+
+       for (i = 0; i < res_table->num_nh_buckets; i++) {
+               struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
+               struct nh_grp_entry *nhge;
+
+               nhge = rtnl_dereference(bucket->nh_entry);
+               if (nhge->nh == old) {
+                       err = __call_nexthop_res_bucket_notifiers(net, nhg_id,
+                                                                 i, true,
+                                                                 oldi, newi,
+                                                                 extack);
+                       if (err)
+                               goto err_notify;
+               }
+       }
+
+       return 0;
+
+err_notify:
+       while (i-- > 0) {
+               struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
+               struct nh_grp_entry *nhge;
+
+               nhge = rtnl_dereference(bucket->nh_entry);
+               if (nhge->nh == old)
+                       __call_nexthop_res_bucket_notifiers(net, nhg_id, i,
+                                                           true, newi, oldi,
+                                                           extack);
+       }
+       return err;
+}
+
+static int replace_nexthop_single_notify(struct net *net,
+                                        struct nexthop *group_nh,
+                                        struct nexthop *old,
+                                        struct nh_info *oldi,
+                                        struct nh_info *newi,
+                                        struct netlink_ext_ack *extack)
+{
+       struct nh_group *nhg = rtnl_dereference(group_nh->nh_grp);
+       struct nh_res_table *res_table;
+
+       if (nhg->hash_threshold) {
+               return call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE,
+                                             group_nh, extack);
+       } else if (nhg->resilient) {
+               res_table = rtnl_dereference(nhg->res_table);
+               return replace_nexthop_single_notify_res(net, res_table,
+                                                        old, oldi, newi,
+                                                        extack);
+       }
+
+       return -EINVAL;
+}
+
  static int replace_nexthop_single(struct net *net, struct nexthop *old,
                                   struct nexthop *new,
                                   struct netlink_ext_ack *extack)
  static int replace_nexthop_single(struct net *net, struct nexthop *old,
                                   struct nexthop *new,
                                   struct netlink_ext_ack *extack)
@@ -1198,8 +2118,8 @@ static int replace_nexthop_single(struct net *net, struct nexthop *old,
         list_for_each_entry(nhge, &old->grp_list, nh_list) {
                 struct nexthop *nhp = nhge->nh_parent;
  
         list_for_each_entry(nhge, &old->grp_list, nh_list) {
                 struct nexthop *nhp = nhge->nh_parent;
  
-               err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp,
-                                            extack);
+               err = replace_nexthop_single_notify(net, nhp, old, oldi, newi,
+                                                   extack);
                 if (err)
                         goto err_notify;
         }
                 if (err)
                         goto err_notify;
         }
@@ -1229,7 +2149,7 @@ err_notify:
         list_for_each_entry_continue_reverse(nhge, &old->grp_list, nh_list) {
                 struct nexthop *nhp = nhge->nh_parent;
  
         list_for_each_entry_continue_reverse(nhge, &old->grp_list, nh_list) {
                 struct nexthop *nhp = nhge->nh_parent;
  
-               call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, extack);
+               replace_nexthop_single_notify(net, nhp, old, newi, oldi, NULL);
         }
         call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, old, extack);
         return err;
         }
         call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, old, extack);
         return err;
@@ -1276,7 +2196,8 @@ static void nexthop_replace_notify(struct net *net, struct nexthop *nh,
  }
  
  static int replace_nexthop(struct net *net, struct nexthop *old,
  }
  
  static int replace_nexthop(struct net *net, struct nexthop *old,
-                          struct nexthop *new, struct netlink_ext_ack *extack)
+                          struct nexthop *new, const struct nh_config *cfg,
+                          struct netlink_ext_ack *extack)
  {
         bool new_is_reject = false;
         struct nh_grp_entry *nhge;
  {
         bool new_is_reject = false;
         struct nh_grp_entry *nhge;
@@ -1319,7 +2240,7 @@ static int replace_nexthop(struct net *net, struct nexthop *old,
         }
  
         if (old->is_group)
         }
  
         if (old->is_group)
-               err = replace_nexthop_grp(net, old, new, extack);
+               err = replace_nexthop_grp(net, old, new, cfg, extack);
         else
                 err = replace_nexthop_single(net, old, new, extack);
  
         else
                 err = replace_nexthop_single(net, old, new, extack);
  
@@ -1361,7 +2282,7 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
                 } else if (new_id > nh->id) {
                         pp = &next->rb_right;
                 } else if (replace) {
                 } else if (new_id > nh->id) {
                         pp = &next->rb_right;
                 } else if (replace) {
-                       rc = replace_nexthop(net, nh, new_nh, extack);
+                       rc = replace_nexthop(net, nh, new_nh, cfg, extack);
                         if (!rc) {
                                 new_nh = nh; /* send notification with old nh */
                                 replace_notify = 1;
                         if (!rc) {
                                 new_nh = nh; /* send notification with old nh */
                                 replace_notify = 1;
@@ -1379,9 +2300,37 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
                 goto out;
         }
  
                 goto out;
         }
  
+       if (new_nh->is_group) {
+               struct nh_group *nhg = rtnl_dereference(new_nh->nh_grp);
+               struct nh_res_table *res_table;
+
+               if (nhg->resilient) {
+                       res_table = rtnl_dereference(nhg->res_table);
+
+                       /* Not passing the number of buckets is OK when
+                        * replacing, but not when creating a new group.
+                        */
+                       if (!cfg->nh_grp_res_has_num_buckets) {
+                               NL_SET_ERR_MSG(extack, "Number of buckets not specified for nexthop group insertion");
+                               rc = -EINVAL;
+                               goto out;
+                       }
+
+                       nh_res_group_rebalance(nhg, res_table);
+
+                       /* Do not send bucket notifications, we do full
+                        * notification below.
+                        */
+                       nh_res_table_upkeep(res_table, false, false);
+               }
+       }
+
         rb_link_node_rcu(&new_nh->rb_node, parent, pp);
         rb_insert_color(&new_nh->rb_node, root);
  
         rb_link_node_rcu(&new_nh->rb_node, parent, pp);
         rb_insert_color(&new_nh->rb_node, root);
  
+       /* The initial insertion is a full notification for hash-threshold as
+        * well as resilient groups.
+        */
         rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack);
         if (rc)
                 rb_erase(&new_nh->rb_node, &net->nexthop.rb_root);
         rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack);
         if (rc)
                 rb_erase(&new_nh->rb_node, &net->nexthop.rb_root);
@@ -1441,6 +2390,7 @@ static struct nexthop *nexthop_create_group(struct net *net,
         u16 num_nh = nla_len(grps_attr) / sizeof(*entry);
         struct nh_group *nhg;
         struct nexthop *nh;
         u16 num_nh = nla_len(grps_attr) / sizeof(*entry);
         struct nh_group *nhg;
         struct nexthop *nh;
+       int err;
         int i;
  
         if (WARN_ON(!num_nh))
         int i;
  
         if (WARN_ON(!num_nh))
@@ -1472,8 +2422,10 @@ static struct nexthop *nexthop_create_group(struct net *net,
                 struct nh_info *nhi;
  
                 nhe = nexthop_find_by_id(net, entry[i].id);
                 struct nh_info *nhi;
  
                 nhe = nexthop_find_by_id(net, entry[i].id);
-               if (!nexthop_get(nhe))
+               if (!nexthop_get(nhe)) {
+                       err = -ENOENT;
                         goto out_no_nh;
                         goto out_no_nh;
+               }
  
                 nhi = rtnl_dereference(nhe->nh_info);
                 if (nhi->family == AF_INET)
  
                 nhi = rtnl_dereference(nhe->nh_info);
                 if (nhi->family == AF_INET)
@@ -1485,13 +2437,28 @@ static struct nexthop *nexthop_create_group(struct net *net,
                 nhg->nh_entries[i].nh_parent = nh;
         }
  
                 nhg->nh_entries[i].nh_parent = nh;
         }
  
-       if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH)
-               nhg->mpath = 1;
+       if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) {
+               nhg->hash_threshold = 1;
+               nhg->is_multipath = true;
+       } else if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES) {
+               struct nh_res_table *res_table;
  
  
-       WARN_ON_ONCE(nhg->mpath != 1);
+               res_table = nexthop_res_table_alloc(net, cfg->nh_id, cfg);
+               if (!res_table) {
+                       err = -ENOMEM;
+                       goto out_no_nh;
+               }
  
  
-       if (nhg->mpath)
-               nh_group_rebalance(nhg);
+               rcu_assign_pointer(nhg->spare->res_table, res_table);
+               rcu_assign_pointer(nhg->res_table, res_table);
+               nhg->resilient = true;
+               nhg->is_multipath = true;
+       }
+
+       WARN_ON_ONCE(nhg->hash_threshold + nhg->resilient != 1);
+
+       if (nhg->hash_threshold)
+               nh_hthr_group_rebalance(nhg);
  
         if (cfg->nh_fdb)
                 nhg->fdb_nh = 1;
  
         if (cfg->nh_fdb)
                 nhg->fdb_nh = 1;
@@ -1510,7 +2477,7 @@ out_no_nh:
         kfree(nhg);
         kfree(nh);
  
         kfree(nhg);
         kfree(nh);
  
-       return ERR_PTR(-ENOENT);
+       return ERR_PTR(err);
  }
  
  static int nh_create_ipv4(struct net *net, struct nexthop *nh,
  }
  
  static int nh_create_ipv4(struct net *net, struct nexthop *nh,
@@ -1680,6 +2647,70 @@ static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
         return nh;
  }
  
         return nh;
  }
  
+static int rtm_nh_get_timer(struct nlattr *attr, unsigned long fallback,
+                           unsigned long *timer_p, bool *has_p,
+                           struct netlink_ext_ack *extack)
+{
+       unsigned long timer;
+       u32 value;
+
+       if (!attr) {
+               *timer_p = fallback;
+               *has_p = false;
+               return 0;
+       }
+
+       value = nla_get_u32(attr);
+       timer = clock_t_to_jiffies(value);
+       if (timer == ~0UL) {
+               NL_SET_ERR_MSG(extack, "Timer value too large");
+               return -EINVAL;
+       }
+
+       *timer_p = timer;
+       *has_p = true;
+       return 0;
+}
+
+static int rtm_to_nh_config_grp_res(struct nlattr *res, struct nh_config *cfg,
+                                   struct netlink_ext_ack *extack)
+{
+       struct nlattr *tb[ARRAY_SIZE(rtm_nh_res_policy_new)] = {};
+       int err;
+
+       if (res) {
+               err = nla_parse_nested(tb,
+                                      ARRAY_SIZE(rtm_nh_res_policy_new) - 1,
+                                      res, rtm_nh_res_policy_new, extack);
+               if (err < 0)
+                       return err;
+       }
+
+       if (tb[NHA_RES_GROUP_BUCKETS]) {
+               cfg->nh_grp_res_num_buckets =
+                       nla_get_u16(tb[NHA_RES_GROUP_BUCKETS]);
+               cfg->nh_grp_res_has_num_buckets = true;
+               if (!cfg->nh_grp_res_num_buckets) {
+                       NL_SET_ERR_MSG(extack, "Number of buckets needs to be non-0");
+                       return -EINVAL;
+               }
+       }
+
+       err = rtm_nh_get_timer(tb[NHA_RES_GROUP_IDLE_TIMER],
+                              NH_RES_DEFAULT_IDLE_TIMER,
+                              &cfg->nh_grp_res_idle_timer,
+                              &cfg->nh_grp_res_has_idle_timer,
+                              extack);
+       if (err)
+               return err;
+
+       return rtm_nh_get_timer(tb[NHA_RES_GROUP_UNBALANCED_TIMER],
+                               NH_RES_DEFAULT_UNBALANCED_TIMER,
+                               &cfg->nh_grp_res_unbalanced_timer,
+                               &cfg->nh_grp_res_has_unbalanced_timer,
+                               extack);
+}
+
  static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
                             struct nlmsghdr *nlh, struct nh_config *cfg,
                             struct netlink_ext_ack *extack)
  static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
                             struct nlmsghdr *nlh, struct nh_config *cfg,
                             struct netlink_ext_ack *extack)
@@ -1758,7 +2789,14 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
                         NL_SET_ERR_MSG(extack, "Invalid group type");
                         goto out;
                 }
                         NL_SET_ERR_MSG(extack, "Invalid group type");
                         goto out;
                 }
-               err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb), extack);
+               err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb),
+                                         cfg->nh_grp_type, extack);
+               if (err)
+                       goto out;
+
+               if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES)
+                       err = rtm_to_nh_config_grp_res(tb[NHA_RES_GROUP],
+                                                      cfg, extack);
  
                 /* no other attributes should be set */
                 goto out;
  
                 /* no other attributes should be set */
                 goto out;
@@ -1983,10 +3021,12 @@ errout_free:
  }
  
  struct nh_dump_filter {
  }
  
  struct nh_dump_filter {
+       u32 nh_id;
         int dev_idx;
         int master_idx;
         bool group_filter;
         bool fdb_filter;
         int dev_idx;
         int master_idx;
         bool group_filter;
         bool fdb_filter;
+       u32 res_bucket_nh_id;
  };
  
  static bool nh_dump_filtered(struct nexthop *nh,
  };
  
  static bool nh_dump_filtered(struct nexthop *nh,
@@ -2100,26 +3140,24 @@ static int rtm_dump_walk_nexthops(struct sk_buff *skb,
                                   void *data)
  {
         struct rb_node *node;
                                   void *data)
  {
         struct rb_node *node;
-       int idx = 0, s_idx;
+       int s_idx;
         int err;
  
         s_idx = ctx->idx;
         for (node = rb_first(root); node; node = rb_next(node)) {
                 struct nexthop *nh;
  
         int err;
  
         s_idx = ctx->idx;
         for (node = rb_first(root); node; node = rb_next(node)) {
                 struct nexthop *nh;
  
-               if (idx < s_idx)
-                       goto cont;
-
                 nh = rb_entry(node, struct nexthop, rb_node);
                 nh = rb_entry(node, struct nexthop, rb_node);
-               ctx->idx = idx;
+               if (nh->id < s_idx)
+                       continue;
+
+               ctx->idx = nh->id;
                 err = nh_cb(skb, cb, nh, data);
                 if (err)
                         return err;
                 err = nh_cb(skb, cb, nh, data);
                 if (err)
                         return err;
-cont:
-               idx++;
         }
  
         }
  
-       ctx->idx = idx;
+       ctx->idx++;
         return 0;
  }
  
         return 0;
  }
  
@@ -2166,6 +3204,318 @@ out_err:
         return err;
  }
  
         return err;
  }
  
+static struct nexthop *
+nexthop_find_group_resilient(struct net *net, u32 id,
+                            struct netlink_ext_ack *extack)
+{
+       struct nh_group *nhg;
+       struct nexthop *nh;
+
+       nh = nexthop_find_by_id(net, id);
+       if (!nh)
+               return ERR_PTR(-ENOENT);
+
+       if (!nh->is_group) {
+               NL_SET_ERR_MSG(extack, "Not a nexthop group");
+               return ERR_PTR(-EINVAL);
+       }
+
+       nhg = rtnl_dereference(nh->nh_grp);
+       if (!nhg->resilient) {
+               NL_SET_ERR_MSG(extack, "Nexthop group not of type resilient");
+               return ERR_PTR(-EINVAL);
+       }
+
+       return nh;
+}
+
+static int nh_valid_dump_nhid(struct nlattr *attr, u32 *nh_id_p,
+                             struct netlink_ext_ack *extack)
+{
+       u32 idx;
+
+       if (attr) {
+               idx = nla_get_u32(attr);
+               if (!idx) {
+                       NL_SET_ERR_MSG(extack, "Invalid nexthop id");
+                       return -EINVAL;
+               }
+               *nh_id_p = idx;
+       } else {
+               *nh_id_p = 0;
+       }
+
+       return 0;
+}
+
+static int nh_valid_dump_bucket_req(const struct nlmsghdr *nlh,
+                                   struct nh_dump_filter *filter,
+                                   struct netlink_callback *cb)
+{
+       struct nlattr *res_tb[ARRAY_SIZE(rtm_nh_res_bucket_policy_dump)];
+       struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump_bucket)];
+       int err;
+
+       err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
+                         ARRAY_SIZE(rtm_nh_policy_dump_bucket) - 1,
+                         rtm_nh_policy_dump_bucket, NULL);
+       if (err < 0)
+               return err;
+
+       err = nh_valid_dump_nhid(tb[NHA_ID], &filter->nh_id, cb->extack);
+       if (err)
+               return err;
+
+       if (tb[NHA_RES_BUCKET]) {
+               size_t max = ARRAY_SIZE(rtm_nh_res_bucket_policy_dump) - 1;
+
+               err = nla_parse_nested(res_tb, max,
+                                      tb[NHA_RES_BUCKET],
+                                      rtm_nh_res_bucket_policy_dump,
+                                      cb->extack);
+               if (err < 0)
+                       return err;
+
+               err = nh_valid_dump_nhid(res_tb[NHA_RES_BUCKET_NH_ID],
+                                        &filter->res_bucket_nh_id,
+                                        cb->extack);
+               if (err)
+                       return err;
+       }
+
+       return __nh_valid_dump_req(nlh, tb, filter, cb->extack);
+}
+
+struct rtm_dump_res_bucket_ctx {
+       struct rtm_dump_nh_ctx nh;
+       u16 bucket_index;
+       u32 done_nh_idx; /* 1 + the index of the last fully processed NH. */
+};
+
+static struct rtm_dump_res_bucket_ctx *
+rtm_dump_res_bucket_ctx(struct netlink_callback *cb)
+{
+       struct rtm_dump_res_bucket_ctx *ctx = (void *)cb->ctx;
+
+       BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
+       return ctx;
+}
+
+struct rtm_dump_nexthop_bucket_data {
+       struct rtm_dump_res_bucket_ctx *ctx;
+       struct nh_dump_filter filter;
+};
+
+static int rtm_dump_nexthop_bucket_nh(struct sk_buff *skb,
+                                     struct netlink_callback *cb,
+                                     struct nexthop *nh,
+                                     struct rtm_dump_nexthop_bucket_data *dd)
+{
+       u32 portid = NETLINK_CB(cb->skb).portid;
+       struct nhmsg *nhm = nlmsg_data(cb->nlh);
+       struct nh_res_table *res_table;
+       struct nh_group *nhg;
+       u16 bucket_index;
+       int err;
+
+       if (dd->ctx->nh.idx < dd->ctx->done_nh_idx)
+               return 0;
+
+       nhg = rtnl_dereference(nh->nh_grp);
+       res_table = rtnl_dereference(nhg->res_table);
+       for (bucket_index = dd->ctx->bucket_index;
+            bucket_index < res_table->num_nh_buckets;
+            bucket_index++) {
+               struct nh_res_bucket *bucket;
+               struct nh_grp_entry *nhge;
+
+               bucket = &res_table->nh_buckets[bucket_index];
+               nhge = rtnl_dereference(bucket->nh_entry);
+               if (nh_dump_filtered(nhge->nh, &dd->filter, nhm->nh_family))
+                       continue;
+
+               if (dd->filter.res_bucket_nh_id &&
+                   dd->filter.res_bucket_nh_id != nhge->nh->id)
+                       continue;
+
+               err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
+                                        RTM_NEWNEXTHOPBUCKET, portid,
+                                        cb->nlh->nlmsg_seq, NLM_F_MULTI,
+                                        cb->extack);
+               if (err < 0) {
+                       if (likely(skb->len))
+                               goto out;
+                       goto out_err;
+               }
+       }
+
+       dd->ctx->done_nh_idx = dd->ctx->nh.idx + 1;
+       bucket_index = 0;
+
+out:
+       err = skb->len;
+out_err:
+       dd->ctx->bucket_index = bucket_index;
+       return err;
+}
+
+static int rtm_dump_nexthop_bucket_cb(struct sk_buff *skb,
+                                     struct netlink_callback *cb,
+                                     struct nexthop *nh, void *data)
+{
+       struct rtm_dump_nexthop_bucket_data *dd = data;
+       struct nh_group *nhg;
+
+       if (!nh->is_group)
+               return 0;
+
+       nhg = rtnl_dereference(nh->nh_grp);
+       if (!nhg->resilient)
+               return 0;
+
+       return rtm_dump_nexthop_bucket_nh(skb, cb, nh, dd);
+}
+
+/* rtnl */
+static int rtm_dump_nexthop_bucket(struct sk_buff *skb,
+                                  struct netlink_callback *cb)
+{
+       struct rtm_dump_res_bucket_ctx *ctx = rtm_dump_res_bucket_ctx(cb);
+       struct rtm_dump_nexthop_bucket_data dd = { .ctx = ctx };
+       struct net *net = sock_net(skb->sk);
+       struct nexthop *nh;
+       int err;
+
+       err = nh_valid_dump_bucket_req(cb->nlh, &dd.filter, cb);
+       if (err)
+               return err;
+
+       if (dd.filter.nh_id) {
+               nh = nexthop_find_group_resilient(net, dd.filter.nh_id,
+                                                 cb->extack);
+               if (IS_ERR(nh))
+                       return PTR_ERR(nh);
+               err = rtm_dump_nexthop_bucket_nh(skb, cb, nh, &dd);
+       } else {
+               struct rb_root *root = &net->nexthop.rb_root;
+
+               err = rtm_dump_walk_nexthops(skb, cb, root, &ctx->nh,
+                                            &rtm_dump_nexthop_bucket_cb, &dd);
+       }
+
+       if (err < 0) {
+               if (likely(skb->len))
+                       goto out;
+               goto out_err;
+       }
+
+out:
+       err = skb->len;
+out_err:
+       cb->seq = net->nexthop.seq;
+       nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+       return err;
+}
+
+static int nh_valid_get_bucket_req_res_bucket(struct nlattr *res,
+                                             u16 *bucket_index,
+                                             struct netlink_ext_ack *extack)
+{
+       struct nlattr *tb[ARRAY_SIZE(rtm_nh_res_bucket_policy_get)];
+       int err;
+
+       err = nla_parse_nested(tb, ARRAY_SIZE(rtm_nh_res_bucket_policy_get) - 1,
+                              res, rtm_nh_res_bucket_policy_get, extack);
+       if (err < 0)
+               return err;
+
+       if (!tb[NHA_RES_BUCKET_INDEX]) {
+               NL_SET_ERR_MSG(extack, "Bucket index is missing");
+               return -EINVAL;
+       }
+
+       *bucket_index = nla_get_u16(tb[NHA_RES_BUCKET_INDEX]);
+       return 0;
+}
+
+static int nh_valid_get_bucket_req(const struct nlmsghdr *nlh,
+                                  u32 *id, u16 *bucket_index,
+                                  struct netlink_ext_ack *extack)
+{
+       struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get_bucket)];
+       int err;
+
+       err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
+                         ARRAY_SIZE(rtm_nh_policy_get_bucket) - 1,
+                         rtm_nh_policy_get_bucket, extack);
+       if (err < 0)
+               return err;
+
+       err = __nh_valid_get_del_req(nlh, tb, id, extack);
+       if (err)
+               return err;
+
+       if (!tb[NHA_RES_BUCKET]) {
+               NL_SET_ERR_MSG(extack, "Bucket information is missing");
+               return -EINVAL;
+       }
+
+       err = nh_valid_get_bucket_req_res_bucket(tb[NHA_RES_BUCKET],
+                                                bucket_index, extack);
+       if (err)
+               return err;
+
+       return 0;
+}
+
+/* rtnl */
+static int rtm_get_nexthop_bucket(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+                                 struct netlink_ext_ack *extack)
+{
+       struct net *net = sock_net(in_skb->sk);
+       struct nh_res_table *res_table;
+       struct sk_buff *skb = NULL;
+       struct nh_group *nhg;
+       struct nexthop *nh;
+       u16 bucket_index;
+       int err;
+       u32 id;
+
+       err = nh_valid_get_bucket_req(nlh, &id, &bucket_index, extack);
+       if (err)
+               return err;
+
+       nh = nexthop_find_group_resilient(net, id, extack);
+       if (IS_ERR(nh))
+               return PTR_ERR(nh);
+
+       nhg = rtnl_dereference(nh->nh_grp);
+       res_table = rtnl_dereference(nhg->res_table);
+       if (bucket_index >= res_table->num_nh_buckets) {
+               NL_SET_ERR_MSG(extack, "Bucket index out of bounds");
+               return -ENOENT;
+       }
+
+       skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+       if (!skb)
+               return -ENOBUFS;
+
+       err = nh_fill_res_bucket(skb, nh, &res_table->nh_buckets[bucket_index],
+                                bucket_index, RTM_NEWNEXTHOPBUCKET,
+                                NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
+                                0, extack);
+       if (err < 0) {
+               WARN_ON(err == -EMSGSIZE);
+               goto errout_free;
+       }
+
+       return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+
+errout_free:
+       kfree_skb(skb);
+       return err;
+}
+
  static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu)
  {
         unsigned int hash = nh_dev_hashfn(dev->ifindex);
  static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu)
  {
         unsigned int hash = nh_dev_hashfn(dev->ifindex);
@@ -2277,6 +3627,75 @@ out:
  }
  EXPORT_SYMBOL(nexthop_set_hw_flags);
  
  }
  EXPORT_SYMBOL(nexthop_set_hw_flags);
  
+void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
+                                bool offload, bool trap)
+{
+       struct nh_res_table *res_table;
+       struct nh_res_bucket *bucket;
+       struct nexthop *nexthop;
+       struct nh_group *nhg;
+
+       rcu_read_lock();
+
+       nexthop = nexthop_find_by_id(net, id);
+       if (!nexthop || !nexthop->is_group)
+               goto out;
+
+       nhg = rcu_dereference(nexthop->nh_grp);
+       if (!nhg->resilient)
+               goto out;
+
+       if (bucket_index >= nhg->res_table->num_nh_buckets)
+               goto out;
+
+       res_table = rcu_dereference(nhg->res_table);
+       bucket = &res_table->nh_buckets[bucket_index];
+       bucket->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP);
+       if (offload)
+               bucket->nh_flags |= RTNH_F_OFFLOAD;
+       if (trap)
+               bucket->nh_flags |= RTNH_F_TRAP;
+
+out:
+       rcu_read_unlock();
+}
+EXPORT_SYMBOL(nexthop_bucket_set_hw_flags);
+
+void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
+                                    unsigned long *activity)
+{
+       struct nh_res_table *res_table;
+       struct nexthop *nexthop;
+       struct nh_group *nhg;
+       u16 i;
+
+       rcu_read_lock();
+
+       nexthop = nexthop_find_by_id(net, id);
+       if (!nexthop || !nexthop->is_group)
+               goto out;
+
+       nhg = rcu_dereference(nexthop->nh_grp);
+       if (!nhg->resilient)
+               goto out;
+
+       /* Instead of silently ignoring some buckets, demand that the sizes
+        * be the same.
+        */
+       res_table = rcu_dereference(nhg->res_table);
+       if (num_buckets != res_table->num_nh_buckets)
+               goto out;
+
+       for (i = 0; i < num_buckets; i++) {
+               if (test_bit(i, activity))
+                       nh_res_bucket_set_busy(&res_table->nh_buckets[i]);
+       }
+
+out:
+       rcu_read_unlock();
+}
+EXPORT_SYMBOL(nexthop_res_grp_activity_update);
+
  static void __net_exit nexthop_net_exit(struct net *net)
  {
         rtnl_lock();
  static void __net_exit nexthop_net_exit(struct net *net)
  {
         rtnl_lock();
@@ -2320,6 +3739,9 @@ static int __init nexthop_init(void)
         rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
         rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
  
         rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
         rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
  
+       rtnl_register(PF_UNSPEC, RTM_GETNEXTHOPBUCKET, rtm_get_nexthop_bucket,
+                     rtm_dump_nexthop_bucket, 0);
+
         return 0;
  }
  subsys_initcall(nexthop_init);
         return 0;
  }
  subsys_initcall(nexthop_init);