kernel/irq/affinity.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (C) 2016 Thomas Gleixner.
   4  * Copyright (C) 2016-2017 Christoph Hellwig.
   5  */
   6 #include <linux/interrupt.h>
   7 #include <linux/kernel.h>
   8 #include <linux/slab.h>
   9 #include <linux/cpu.h>
  10
  11 static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
  12                                 unsigned int cpus_per_vec)
  13 {
  14         const struct cpumask *siblmsk;
  15         int cpu, sibl;
  16
  17         for ( ; cpus_per_vec > 0; ) {
  18                 cpu = cpumask_first(nmsk);
  19
  20                 /* Should not happen, but I'm too lazy to think about it */
  21                 if (cpu >= nr_cpu_ids)
  22                         return;
  23
  24                 cpumask_clear_cpu(cpu, nmsk);
  25                 cpumask_set_cpu(cpu, irqmsk);
  26                 cpus_per_vec--;
  27
  28                 /* If the cpu has siblings, use them first */
  29                 siblmsk = topology_sibling_cpumask(cpu);
  30                 for (sibl = -1; cpus_per_vec > 0; ) {
  31                         sibl = cpumask_next(sibl, siblmsk);
  32                         if (sibl >= nr_cpu_ids)
  33                                 break;
  34                         if (!cpumask_test_and_clear_cpu(sibl, nmsk))
  35                                 continue;
  36                         cpumask_set_cpu(sibl, irqmsk);
  37                         cpus_per_vec--;
  38                 }
  39         }
  40 }
  41
  42 static cpumask_var_t *alloc_node_to_cpumask(void)
  43 {
  44         cpumask_var_t *masks;
  45         int node;
  46
  47         masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL);
  48         if (!masks)
  49                 return NULL;
  50
  51         for (node = 0; node < nr_node_ids; node++) {
  52                 if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL))
  53                         goto out_unwind;
  54         }
  55
  56         return masks;
  57
  58 out_unwind:
  59         while (--node >= 0)
  60                 free_cpumask_var(masks[node]);
  61         kfree(masks);
  62         return NULL;
  63 }
  64
  65 static void free_node_to_cpumask(cpumask_var_t *masks)
  66 {
  67         int node;
  68
  69         for (node = 0; node < nr_node_ids; node++)
  70                 free_cpumask_var(masks[node]);
  71         kfree(masks);
  72 }
  73
  74 static void build_node_to_cpumask(cpumask_var_t *masks)
  75 {
  76         int cpu;
  77
  78         for_each_possible_cpu(cpu)
  79                 cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]);
  80 }
  81
  82 static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
  83                                 const struct cpumask *mask, nodemask_t *nodemsk)
  84 {
  85         int n, nodes = 0;
  86
  87         /* Calculate the number of nodes in the supplied affinity mask */
  88         for_each_node(n) {
  89                 if (cpumask_intersects(mask, node_to_cpumask[n])) {
  90                         node_set(n, *nodemsk);
  91                         nodes++;
  92                 }
  93         }
  94         return nodes;
  95 }
  96
  97 static int __irq_build_affinity_masks(unsigned int startvec,
  98                                       unsigned int numvecs,
  99                                       unsigned int firstvec,
 100                                       cpumask_var_t *node_to_cpumask,
 101                                       const struct cpumask *cpu_mask,
 102                                       struct cpumask *nmsk,
 103                                       struct irq_affinity_desc *masks)
 104 {
 105         unsigned int n, nodes, cpus_per_vec, extra_vecs, done = 0;
 106         unsigned int last_affv = firstvec + numvecs;
 107         unsigned int curvec = startvec;
 108         nodemask_t nodemsk = NODE_MASK_NONE;
 109
 110         if (!cpumask_weight(cpu_mask))
 111                 return 0;
 112
 113         nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk);
 114
 115         /*
 116          * If the number of nodes in the mask is greater than or equal the
 117          * number of vectors we just spread the vectors across the nodes.
 118          */
 119         if (numvecs <= nodes) {
 120                 for_each_node_mask(n, nodemsk) {
 121                         cpumask_or(&masks[curvec].mask, &masks[curvec].mask,
 122                                    node_to_cpumask[n]);
 123                         if (++curvec == last_affv)
 124                                 curvec = firstvec;
 125                 }
 126                 return numvecs;
 127         }
 128
 129         for_each_node_mask(n, nodemsk) {
 130                 unsigned int ncpus, v, vecs_to_assign, vecs_per_node;
 131
 132                 /* Get the cpus on this node which are in the mask */
 133                 cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
 134                 ncpus = cpumask_weight(nmsk);
 135                 if (!ncpus)
 136                         continue;
 137
 138                 /*
 139                  * Calculate the number of cpus per vector
 140                  *
 141                  * Spread the vectors evenly per node. If the requested
 142                  * vector number has been reached, simply allocate one
 143                  * vector for each remaining node so that all nodes can
 144                  * be covered
 145                  */
 146                 if (numvecs > done)
 147                         vecs_per_node = max_t(unsigned,
 148                                         (numvecs - done) / nodes, 1);
 149                 else
 150                         vecs_per_node = 1;
 151
 152                 vecs_to_assign = min(vecs_per_node, ncpus);
 153
 154                 /* Account for rounding errors */
 155                 extra_vecs = ncpus - vecs_to_assign * (ncpus / vecs_to_assign);
 156
 157                 for (v = 0; curvec < last_affv && v < vecs_to_assign;
 158                      curvec++, v++) {
 159                         cpus_per_vec = ncpus / vecs_to_assign;
 160
 161                         /* Account for extra vectors to compensate rounding errors */
 162                         if (extra_vecs) {
 163                                 cpus_per_vec++;
 164                                 --extra_vecs;
 165                         }
 166                         irq_spread_init_one(&masks[curvec].mask, nmsk,
 167                                                 cpus_per_vec);
 168                 }
 169
 170                 done += v;
 171                 if (curvec >= last_affv)
 172                         curvec = firstvec;
 173                 --nodes;
 174         }
 175         return done < numvecs ? done : numvecs;
 176 }
 177
 178 /*
 179  * build affinity in two stages:
 180  *      1) spread present CPU on these vectors
 181  *      2) spread other possible CPUs on these vectors
 182  */
 183 static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
 184                                     unsigned int firstvec,
 185                                     struct irq_affinity_desc *masks)
 186 {
 187         unsigned int curvec = startvec, nr_present, nr_others;
 188         cpumask_var_t *node_to_cpumask;
 189         cpumask_var_t nmsk, npresmsk;
 190         int ret = -ENOMEM;
 191
 192         if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
 193                 return ret;
 194
 195         if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL))
 196                 goto fail_nmsk;
 197
 198         node_to_cpumask = alloc_node_to_cpumask();
 199         if (!node_to_cpumask)
 200                 goto fail_npresmsk;
 201
 202         ret = 0;
 203         /* Stabilize the cpumasks */
 204         get_online_cpus();
 205         build_node_to_cpumask(node_to_cpumask);
 206
 207         /* Spread on present CPUs starting from affd->pre_vectors */
 208         nr_present = __irq_build_affinity_masks(curvec, numvecs,
 209                                                 firstvec, node_to_cpumask,
 210                                                 cpu_present_mask, nmsk, masks);
 211
 212         /*
 213          * Spread on non present CPUs starting from the next vector to be
 214          * handled. If the spreading of present CPUs already exhausted the
 215          * vector space, assign the non present CPUs to the already spread
 216          * out vectors.
 217          */
 218         if (nr_present >= numvecs)
 219                 curvec = firstvec;
 220         else
 221                 curvec = firstvec + nr_present;
 222         cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
 223         nr_others = __irq_build_affinity_masks(curvec, numvecs,
 224                                                firstvec, node_to_cpumask,
 225                                                npresmsk, nmsk, masks);
 226         put_online_cpus();
 227
 228         if (nr_present < numvecs)
 229                 WARN_ON(nr_present + nr_others < numvecs);
 230
 231         free_node_to_cpumask(node_to_cpumask);
 232
 233  fail_npresmsk:
 234         free_cpumask_var(npresmsk);
 235
 236  fail_nmsk:
 237         free_cpumask_var(nmsk);
 238         return ret;
 239 }
 240
 241 static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)
 242 {
 243         affd->nr_sets = 1;
 244         affd->set_size[0] = affvecs;
 245 }
 246
 247 /**
 248  * irq_create_affinity_masks - Create affinity masks for multiqueue spreading
 249  * @nvecs:      The total number of vectors
 250  * @affd:       Description of the affinity requirements
 251  *
 252  * Returns the irq_affinity_desc pointer or NULL if allocation failed.
 253  */
 254 struct irq_affinity_desc *
 255 irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
 256 {
 257         unsigned int affvecs, curvec, usedvecs, i;
 258         struct irq_affinity_desc *masks = NULL;
 259
 260         /*
 261          * Determine the number of vectors which need interrupt affinities
 262          * assigned. If the pre/post request exhausts the available vectors
 263          * then nothing to do here except for invoking the calc_sets()
 264          * callback so the device driver can adjust to the situation.
 265          */
 266         if (nvecs > affd->pre_vectors + affd->post_vectors)
 267                 affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
 268         else
 269                 affvecs = 0;
 270
 271         /*
 272          * Simple invocations do not provide a calc_sets() callback. Install
 273          * the generic one.
 274          */
 275         if (!affd->calc_sets)
 276                 affd->calc_sets = default_calc_sets;
 277
 278         /* Recalculate the sets */
 279         affd->calc_sets(affd, affvecs);
 280
 281         if (WARN_ON_ONCE(affd->nr_sets > IRQ_AFFINITY_MAX_SETS))
 282                 return NULL;
 283
 284         /* Nothing to assign? */
 285         if (!affvecs)
 286                 return NULL;
 287
 288         masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
 289         if (!masks)
 290                 return NULL;
 291
 292         /* Fill out vectors at the beginning that don't need affinity */
 293         for (curvec = 0; curvec < affd->pre_vectors; curvec++)
 294                 cpumask_copy(&masks[curvec].mask, irq_default_affinity);
 295
 296         /*
 297          * Spread on present CPUs starting from affd->pre_vectors. If we
 298          * have multiple sets, build each sets affinity mask separately.
 299          */
 300         for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
 301                 unsigned int this_vecs = affd->set_size[i];
 302                 int ret;
 303
 304                 ret = irq_build_affinity_masks(curvec, this_vecs,
 305                                                curvec, masks);
 306                 if (ret) {
 307                         kfree(masks);
 308                         return NULL;
 309                 }
 310                 curvec += this_vecs;
 311                 usedvecs += this_vecs;
 312         }
 313
 314         /* Fill out vectors at the end that don't need affinity */
 315         if (usedvecs >= affvecs)
 316                 curvec = affd->pre_vectors + affvecs;
 317         else
 318                 curvec = affd->pre_vectors + usedvecs;
 319         for (; curvec < nvecs; curvec++)
 320                 cpumask_copy(&masks[curvec].mask, irq_default_affinity);
 321
 322         /* Mark the managed interrupts */
 323         for (i = affd->pre_vectors; i < nvecs - affd->post_vectors; i++)
 324                 masks[i].is_managed = 1;
 325
 326         return masks;
 327 }
 328
 329 /**
 330  * irq_calc_affinity_vectors - Calculate the optimal number of vectors
 331  * @minvec:     The minimum number of vectors available
 332  * @maxvec:     The maximum number of vectors available
 333  * @affd:       Description of the affinity requirements
 334  */
 335 unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
 336                                        const struct irq_affinity *affd)
 337 {
 338         unsigned int resv = affd->pre_vectors + affd->post_vectors;
 339         unsigned int set_vecs;
 340
 341         if (resv > minvec)
 342                 return 0;
 343
 344         if (affd->calc_sets) {
 345                 set_vecs = maxvec - resv;
 346         } else {
 347                 get_online_cpus();
 348                 set_vecs = cpumask_weight(cpu_possible_mask);
 349                 put_online_cpus();
 350         }
 351
 352         return resv + min(set_vecs, maxvec - resv);
 353 }