kernel/irq/affinity.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (C) 2016 Thomas Gleixner.
   4  * Copyright (C) 2016-2017 Christoph Hellwig.
   5  */
   6 #include <linux/interrupt.h>
   7 #include <linux/kernel.h>
   8 #include <linux/slab.h>
   9 #include <linux/cpu.h>
  10
  11 static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
  12                                 int cpus_per_vec)
  13 {
  14         const struct cpumask *siblmsk;
  15         int cpu, sibl;
  16
  17         for ( ; cpus_per_vec > 0; ) {
  18                 cpu = cpumask_first(nmsk);
  19
  20                 /* Should not happen, but I'm too lazy to think about it */
  21                 if (cpu >= nr_cpu_ids)
  22                         return;
  23
  24                 cpumask_clear_cpu(cpu, nmsk);
  25                 cpumask_set_cpu(cpu, irqmsk);
  26                 cpus_per_vec--;
  27
  28                 /* If the cpu has siblings, use them first */
  29                 siblmsk = topology_sibling_cpumask(cpu);
  30                 for (sibl = -1; cpus_per_vec > 0; ) {
  31                         sibl = cpumask_next(sibl, siblmsk);
  32                         if (sibl >= nr_cpu_ids)
  33                                 break;
  34                         if (!cpumask_test_and_clear_cpu(sibl, nmsk))
  35                                 continue;
  36                         cpumask_set_cpu(sibl, irqmsk);
  37                         cpus_per_vec--;
  38                 }
  39         }
  40 }
  41
  42 static cpumask_var_t *alloc_node_to_cpumask(void)
  43 {
  44         cpumask_var_t *masks;
  45         int node;
  46
  47         masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL);
  48         if (!masks)
  49                 return NULL;
  50
  51         for (node = 0; node < nr_node_ids; node++) {
  52                 if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL))
  53                         goto out_unwind;
  54         }
  55
  56         return masks;
  57
  58 out_unwind:
  59         while (--node >= 0)
  60                 free_cpumask_var(masks[node]);
  61         kfree(masks);
  62         return NULL;
  63 }
  64
  65 static void free_node_to_cpumask(cpumask_var_t *masks)
  66 {
  67         int node;
  68
  69         for (node = 0; node < nr_node_ids; node++)
  70                 free_cpumask_var(masks[node]);
  71         kfree(masks);
  72 }
  73
  74 static void build_node_to_cpumask(cpumask_var_t *masks)
  75 {
  76         int cpu;
  77
  78         for_each_possible_cpu(cpu)
  79                 cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]);
  80 }
  81
  82 static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
  83                                 const struct cpumask *mask, nodemask_t *nodemsk)
  84 {
  85         int n, nodes = 0;
  86
  87         /* Calculate the number of nodes in the supplied affinity mask */
  88         for_each_node(n) {
  89                 if (cpumask_intersects(mask, node_to_cpumask[n])) {
  90                         node_set(n, *nodemsk);
  91                         nodes++;
  92                 }
  93         }
  94         return nodes;
  95 }
  96
  97 static int __irq_build_affinity_masks(const struct irq_affinity *affd,
  98                                       int startvec, int numvecs, int firstvec,
  99                                       cpumask_var_t *node_to_cpumask,
 100                                       const struct cpumask *cpu_mask,
 101                                       struct cpumask *nmsk,
 102                                       struct irq_affinity_desc *masks)
 103 {
 104         int n, nodes, cpus_per_vec, extra_vecs, done = 0;
 105         int last_affv = firstvec + numvecs;
 106         int curvec = startvec;
 107         nodemask_t nodemsk = NODE_MASK_NONE;
 108
 109         if (!cpumask_weight(cpu_mask))
 110                 return 0;
 111
 112         nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk);
 113
 114         /*
 115          * If the number of nodes in the mask is greater than or equal the
 116          * number of vectors we just spread the vectors across the nodes.
 117          */
 118         if (numvecs <= nodes) {
 119                 for_each_node_mask(n, nodemsk) {
 120                         cpumask_or(&masks[curvec].mask,
 121                                         &masks[curvec].mask,
 122                                         node_to_cpumask[n]);
 123                         if (++curvec == last_affv)
 124                                 curvec = firstvec;
 125                 }
 126                 done = numvecs;
 127                 goto out;
 128         }
 129
 130         for_each_node_mask(n, nodemsk) {
 131                 int ncpus, v, vecs_to_assign, vecs_per_node;
 132
 133                 /* Spread the vectors per node */
 134                 vecs_per_node = (numvecs - (curvec - firstvec)) / nodes;
 135
 136                 /* Get the cpus on this node which are in the mask */
 137                 cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
 138
 139                 /* Calculate the number of cpus per vector */
 140                 ncpus = cpumask_weight(nmsk);
 141                 vecs_to_assign = min(vecs_per_node, ncpus);
 142
 143                 /* Account for rounding errors */
 144                 extra_vecs = ncpus - vecs_to_assign * (ncpus / vecs_to_assign);
 145
 146                 for (v = 0; curvec < last_affv && v < vecs_to_assign;
 147                      curvec++, v++) {
 148                         cpus_per_vec = ncpus / vecs_to_assign;
 149
 150                         /* Account for extra vectors to compensate rounding errors */
 151                         if (extra_vecs) {
 152                                 cpus_per_vec++;
 153                                 --extra_vecs;
 154                         }
 155                         irq_spread_init_one(&masks[curvec].mask, nmsk,
 156                                                 cpus_per_vec);
 157                 }
 158
 159                 done += v;
 160                 if (done >= numvecs)
 161                         break;
 162                 if (curvec >= last_affv)
 163                         curvec = firstvec;
 164                 --nodes;
 165         }
 166
 167 out:
 168         return done;
 169 }
 170
 171 /*
 172  * build affinity in two stages:
 173  *      1) spread present CPU on these vectors
 174  *      2) spread other possible CPUs on these vectors
 175  */
 176 static int irq_build_affinity_masks(const struct irq_affinity *affd,
 177                                     int startvec, int numvecs, int firstvec,
 178                                     cpumask_var_t *node_to_cpumask,
 179                                     struct irq_affinity_desc *masks)
 180 {
 181         int curvec = startvec, nr_present, nr_others;
 182         int ret = -ENOMEM;
 183         cpumask_var_t nmsk, npresmsk;
 184
 185         if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
 186                 return ret;
 187
 188         if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL))
 189                 goto fail;
 190
 191         ret = 0;
 192         /* Stabilize the cpumasks */
 193         get_online_cpus();
 194         build_node_to_cpumask(node_to_cpumask);
 195
 196         /* Spread on present CPUs starting from affd->pre_vectors */
 197         nr_present = __irq_build_affinity_masks(affd, curvec, numvecs,
 198                                                 firstvec, node_to_cpumask,
 199                                                 cpu_present_mask, nmsk, masks);
 200
 201         /*
 202          * Spread on non present CPUs starting from the next vector to be
 203          * handled. If the spreading of present CPUs already exhausted the
 204          * vector space, assign the non present CPUs to the already spread
 205          * out vectors.
 206          */
 207         if (nr_present >= numvecs)
 208                 curvec = firstvec;
 209         else
 210                 curvec = firstvec + nr_present;
 211         cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
 212         nr_others = __irq_build_affinity_masks(affd, curvec, numvecs,
 213                                                firstvec, node_to_cpumask,
 214                                                npresmsk, nmsk, masks);
 215         put_online_cpus();
 216
 217         if (nr_present < numvecs)
 218                 WARN_ON(nr_present + nr_others < numvecs);
 219
 220         free_cpumask_var(npresmsk);
 221
 222  fail:
 223         free_cpumask_var(nmsk);
 224         return ret;
 225 }
 226
 227 /**
 228  * irq_create_affinity_masks - Create affinity masks for multiqueue spreading
 229  * @nvecs:      The total number of vectors
 230  * @affd:       Description of the affinity requirements
 231  *
 232  * Returns the irq_affinity_desc pointer or NULL if allocation failed.
 233  */
 234 struct irq_affinity_desc *
 235 irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
 236 {
 237         int affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
 238         int curvec, usedvecs;
 239         cpumask_var_t *node_to_cpumask;
 240         struct irq_affinity_desc *masks = NULL;
 241         int i, nr_sets;
 242
 243         /*
 244          * If there aren't any vectors left after applying the pre/post
 245          * vectors don't bother with assigning affinity.
 246          */
 247         if (nvecs == affd->pre_vectors + affd->post_vectors)
 248                 return NULL;
 249
 250         node_to_cpumask = alloc_node_to_cpumask();
 251         if (!node_to_cpumask)
 252                 return NULL;
 253
 254         masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
 255         if (!masks)
 256                 goto outnodemsk;
 257
 258         /* Fill out vectors at the beginning that don't need affinity */
 259         for (curvec = 0; curvec < affd->pre_vectors; curvec++)
 260                 cpumask_copy(&masks[curvec].mask, irq_default_affinity);
 261         /*
 262          * Spread on present CPUs starting from affd->pre_vectors. If we
 263          * have multiple sets, build each sets affinity mask separately.
 264          */
 265         nr_sets = affd->nr_sets;
 266         if (!nr_sets)
 267                 nr_sets = 1;
 268
 269         for (i = 0, usedvecs = 0; i < nr_sets; i++) {
 270                 int this_vecs = affd->sets ? affd->sets[i] : affvecs;
 271                 int ret;
 272
 273                 ret = irq_build_affinity_masks(affd, curvec, this_vecs,
 274                                                 curvec, node_to_cpumask, masks);
 275                 if (ret) {
 276                         kfree(masks);
 277                         masks = NULL;
 278                         goto outnodemsk;
 279                 }
 280                 curvec += this_vecs;
 281                 usedvecs += this_vecs;
 282         }
 283
 284         /* Fill out vectors at the end that don't need affinity */
 285         if (usedvecs >= affvecs)
 286                 curvec = affd->pre_vectors + affvecs;
 287         else
 288                 curvec = affd->pre_vectors + usedvecs;
 289         for (; curvec < nvecs; curvec++)
 290                 cpumask_copy(&masks[curvec].mask, irq_default_affinity);
 291
 292         /* Mark the managed interrupts */
 293         for (i = affd->pre_vectors; i < nvecs - affd->post_vectors; i++)
 294                 masks[i].is_managed = 1;
 295
 296 outnodemsk:
 297         free_node_to_cpumask(node_to_cpumask);
 298         return masks;
 299 }
 300
 301 /**
 302  * irq_calc_affinity_vectors - Calculate the optimal number of vectors
 303  * @minvec:     The minimum number of vectors available
 304  * @maxvec:     The maximum number of vectors available
 305  * @affd:       Description of the affinity requirements
 306  */
 307 int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd)
 308 {
 309         int resv = affd->pre_vectors + affd->post_vectors;
 310         int vecs = maxvec - resv;
 311         int set_vecs;
 312
 313         if (resv > minvec)
 314                 return 0;
 315
 316         if (affd->nr_sets) {
 317                 int i;
 318
 319                 for (i = 0, set_vecs = 0;  i < affd->nr_sets; i++)
 320                         set_vecs += affd->sets[i];
 321         } else {
 322                 get_online_cpus();
 323                 set_vecs = cpumask_weight(cpu_possible_mask);
 324                 put_online_cpus();
 325         }
 326
 327         return resv + min(set_vecs, vecs);
 328 }