net/netfilter/ipvs/ip_vs_nq.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * IPVS:        Never Queue scheduling module
   4  *
   5  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
   6  *
   7  * Changes:
   8  */
   9
  10 /*
  11  * The NQ algorithm adopts a two-speed model. When there is an idle server
  12  * available, the job will be sent to the idle server, instead of waiting
  13  * for a fast one. When there is no idle server available, the job will be
  14  * sent to the server that minimize its expected delay (The Shortest
  15  * Expected Delay scheduling algorithm).
  16  *
  17  * See the following paper for more information:
  18  * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
  19  * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
  20  * pages 986-994, 1988.
  21  *
  22  * Thanks must go to Marko Buuri <marko@buuri.name> for talking NQ to me.
  23  *
  24  * The difference between NQ and SED is that NQ can improve overall
  25  * system utilization.
  26  *
  27  */
  28
  29 #define KMSG_COMPONENT "IPVS"
  30 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  31
  32 #include <linux/module.h>
  33 #include <linux/kernel.h>
  34
  35 #include <net/ip_vs.h>
  36
  37
  38 static inline int
  39 ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
  40 {
  41         /*
  42          * We only use the active connection number in the cost
  43          * calculation here.
  44          */
  45         return atomic_read(&dest->activeconns) + 1;
  46 }
  47
  48
  49 /*
  50  *      Weighted Least Connection scheduling
  51  */
  52 static struct ip_vs_dest *
  53 ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
  54                   struct ip_vs_iphdr *iph)
  55 {
  56         struct ip_vs_dest *dest, *least = NULL;
  57         int loh = 0, doh;
  58
  59         IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
  60
  61         /*
  62          * We calculate the load of each dest server as follows:
  63          *      (server expected overhead) / dest->weight
  64          *
  65          * Remember -- no floats in kernel mode!!!
  66          * The comparison of h1*w2 > h2*w1 is equivalent to that of
  67          *                h1/w1 > h2/w2
  68          * if every weight is larger than zero.
  69          *
  70          * The server with weight=0 is quiesced and will not receive any
  71          * new connections.
  72          */
  73
  74         list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
  75
  76                 if (dest->flags & IP_VS_DEST_F_OVERLOAD ||
  77                     !atomic_read(&dest->weight))
  78                         continue;
  79
  80                 doh = ip_vs_nq_dest_overhead(dest);
  81
  82                 /* return the server directly if it is idle */
  83                 if (atomic_read(&dest->activeconns) == 0) {
  84                         least = dest;
  85                         loh = doh;
  86                         goto out;
  87                 }
  88
  89                 if (!least ||
  90                     ((__s64)loh * atomic_read(&dest->weight) >
  91                      (__s64)doh * atomic_read(&least->weight))) {
  92                         least = dest;
  93                         loh = doh;
  94                 }
  95         }
  96
  97         if (!least) {
  98                 ip_vs_scheduler_err(svc, "no destination available");
  99                 return NULL;
 100         }
 101
 102   out:
 103         IP_VS_DBG_BUF(6, "NQ: server %s:%u "
 104                       "activeconns %d refcnt %d weight %d overhead %d\n",
 105                       IP_VS_DBG_ADDR(least->af, &least->addr),
 106                       ntohs(least->port),
 107                       atomic_read(&least->activeconns),
 108                       refcount_read(&least->refcnt),
 109                       atomic_read(&least->weight), loh);
 110
 111         return least;
 112 }
 113
 114
 115 static struct ip_vs_scheduler ip_vs_nq_scheduler =
 116 {
 117         .name =                 "nq",
 118         .refcnt =               ATOMIC_INIT(0),
 119         .module =               THIS_MODULE,
 120         .n_list =               LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list),
 121         .schedule =             ip_vs_nq_schedule,
 122 };
 123
 124
 125 static int __init ip_vs_nq_init(void)
 126 {
 127         return register_ip_vs_scheduler(&ip_vs_nq_scheduler);
 128 }
 129
 130 static void __exit ip_vs_nq_cleanup(void)
 131 {
 132         unregister_ip_vs_scheduler(&ip_vs_nq_scheduler);
 133         synchronize_rcu();
 134 }
 135
 136 module_init(ip_vs_nq_init);
 137 module_exit(ip_vs_nq_cleanup);
 138 MODULE_LICENSE("GPL");