arch/x86/kernel/cpu/intel_rdt_rdtgroup.c

   1 /*
   2  * User interface for Resource Alloction in Resource Director Technology(RDT)
   3  *
   4  * Copyright (C) 2016 Intel Corporation
   5  *
   6  * Author: Fenghua Yu <fenghua.yu@intel.com>
   7  *
   8  * This program is free software; you can redistribute it and/or modify it
   9  * under the terms and conditions of the GNU General Public License,
  10  * version 2, as published by the Free Software Foundation.
  11  *
  12  * This program is distributed in the hope it will be useful, but WITHOUT
  13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  15  * more details.
  16  *
  17  * More information about RDT be found in the Intel (R) x86 Architecture
  18  * Software Developer Manual.
  19  */
  20
  21 #define pr_fmt(fmt)     KBUILD_MODNAME ": " fmt
  22
  23 #include <linux/cpu.h>
  24 #include <linux/fs.h>
  25 #include <linux/sysfs.h>
  26 #include <linux/kernfs.h>
  27 #include <linux/seq_file.h>
  28 #include <linux/sched.h>
  29 #include <linux/slab.h>
  30 #include <linux/cpu.h>
  31 #include <linux/task_work.h>
  32
  33 #include <uapi/linux/magic.h>
  34
  35 #include <asm/intel_rdt.h>
  36 #include <asm/intel_rdt_common.h>
  37
  38 DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
  39 struct kernfs_root *rdt_root;
  40 struct rdtgroup rdtgroup_default;
  41 LIST_HEAD(rdt_all_groups);
  42
  43 /* Kernel fs node for "info" directory under root */
  44 static struct kernfs_node *kn_info;
  45
  46 /*
  47  * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
  48  * we can keep a bitmap of free CLOSIDs in a single integer.
  49  *
  50  * Using a global CLOSID across all resources has some advantages and
  51  * some drawbacks:
  52  * + We can simply set "current->closid" to assign a task to a resource
  53  *   group.
  54  * + Context switch code can avoid extra memory references deciding which
  55  *   CLOSID to load into the PQR_ASSOC MSR
  56  * - We give up some options in configuring resource groups across multi-socket
  57  *   systems.
  58  * - Our choices on how to configure each resource become progressively more
  59  *   limited as the number of resources grows.
  60  */
  61 static int closid_free_map;
  62
  63 static void closid_init(void)
  64 {
  65         struct rdt_resource *r;
  66         int rdt_min_closid = 32;
  67
  68         /* Compute rdt_min_closid across all resources */
  69         for_each_enabled_rdt_resource(r)
  70                 rdt_min_closid = min(rdt_min_closid, r->num_closid);
  71
  72         closid_free_map = BIT_MASK(rdt_min_closid) - 1;
  73
  74         /* CLOSID 0 is always reserved for the default group */
  75         closid_free_map &= ~1;
  76 }
  77
  78 int closid_alloc(void)
  79 {
  80         int closid = ffs(closid_free_map);
  81
  82         if (closid == 0)
  83                 return -ENOSPC;
  84         closid--;
  85         closid_free_map &= ~(1 << closid);
  86
  87         return closid;
  88 }
  89
  90 static void closid_free(int closid)
  91 {
  92         closid_free_map |= 1 << closid;
  93 }
  94
  95 /* set uid and gid of rdtgroup dirs and files to that of the creator */
  96 static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
  97 {
  98         struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
  99                                 .ia_uid = current_fsuid(),
 100                                 .ia_gid = current_fsgid(), };
 101
 102         if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
 103             gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
 104                 return 0;
 105
 106         return kernfs_setattr(kn, &iattr);
 107 }
 108
 109 static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
 110 {
 111         struct kernfs_node *kn;
 112         int ret;
 113
 114         kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
 115                                   0, rft->kf_ops, rft, NULL, NULL);
 116         if (IS_ERR(kn))
 117                 return PTR_ERR(kn);
 118
 119         ret = rdtgroup_kn_set_ugid(kn);
 120         if (ret) {
 121                 kernfs_remove(kn);
 122                 return ret;
 123         }
 124
 125         return 0;
 126 }
 127
 128 static int rdtgroup_add_files(struct kernfs_node *kn, struct rftype *rfts,
 129                               int len)
 130 {
 131         struct rftype *rft;
 132         int ret;
 133
 134         lockdep_assert_held(&rdtgroup_mutex);
 135
 136         for (rft = rfts; rft < rfts + len; rft++) {
 137                 ret = rdtgroup_add_file(kn, rft);
 138                 if (ret)
 139                         goto error;
 140         }
 141
 142         return 0;
 143 error:
 144         pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
 145         while (--rft >= rfts)
 146                 kernfs_remove_by_name(kn, rft->name);
 147         return ret;
 148 }
 149
 150 static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
 151 {
 152         struct kernfs_open_file *of = m->private;
 153         struct rftype *rft = of->kn->priv;
 154
 155         if (rft->seq_show)
 156                 return rft->seq_show(of, m, arg);
 157         return 0;
 158 }
 159
 160 static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
 161                                    size_t nbytes, loff_t off)
 162 {
 163         struct rftype *rft = of->kn->priv;
 164
 165         if (rft->write)
 166                 return rft->write(of, buf, nbytes, off);
 167
 168         return -EINVAL;
 169 }
 170
 171 static struct kernfs_ops rdtgroup_kf_single_ops = {
 172         .atomic_write_len       = PAGE_SIZE,
 173         .write                  = rdtgroup_file_write,
 174         .seq_show               = rdtgroup_seqfile_show,
 175 };
 176
 177 static int rdtgroup_cpus_show(struct kernfs_open_file *of,
 178                               struct seq_file *s, void *v)
 179 {
 180         struct rdtgroup *rdtgrp;
 181         int ret = 0;
 182
 183         rdtgrp = rdtgroup_kn_lock_live(of->kn);
 184
 185         if (rdtgrp)
 186                 seq_printf(s, "%*pb\n", cpumask_pr_args(&rdtgrp->cpu_mask));
 187         else
 188                 ret = -ENOENT;
 189         rdtgroup_kn_unlock(of->kn);
 190
 191         return ret;
 192 }
 193
 194 static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
 195                                    char *buf, size_t nbytes, loff_t off)
 196 {
 197         cpumask_var_t tmpmask, newmask;
 198         struct rdtgroup *rdtgrp, *r;
 199         int ret, cpu;
 200
 201         if (!buf)
 202                 return -EINVAL;
 203
 204         if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
 205                 return -ENOMEM;
 206         if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
 207                 free_cpumask_var(tmpmask);
 208                 return -ENOMEM;
 209         }
 210         rdtgrp = rdtgroup_kn_lock_live(of->kn);
 211         if (!rdtgrp) {
 212                 ret = -ENOENT;
 213                 goto unlock;
 214         }
 215
 216         ret = cpumask_parse(buf, newmask);
 217         if (ret)
 218                 goto unlock;
 219
 220         get_online_cpus();
 221         /* check that user didn't specify any offline cpus */
 222         cpumask_andnot(tmpmask, newmask, cpu_online_mask);
 223         if (cpumask_weight(tmpmask)) {
 224                 ret = -EINVAL;
 225                 goto end;
 226         }
 227
 228         /* Check whether cpus are dropped from this group */
 229         cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
 230         if (cpumask_weight(tmpmask)) {
 231                 /* Can't drop from default group */
 232                 if (rdtgrp == &rdtgroup_default) {
 233                         ret = -EINVAL;
 234                         goto end;
 235                 }
 236                 /* Give any dropped cpus to rdtgroup_default */
 237                 cpumask_or(&rdtgroup_default.cpu_mask,
 238                            &rdtgroup_default.cpu_mask, tmpmask);
 239                 for_each_cpu(cpu, tmpmask)
 240                         per_cpu(cpu_closid, cpu) = 0;
 241         }
 242
 243         /*
 244          * If we added cpus, remove them from previous group that owned them
 245          * and update per-cpu closid
 246          */
 247         cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
 248         if (cpumask_weight(tmpmask)) {
 249                 list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
 250                         if (r == rdtgrp)
 251                                 continue;
 252                         cpumask_andnot(&r->cpu_mask, &r->cpu_mask, tmpmask);
 253                 }
 254                 for_each_cpu(cpu, tmpmask)
 255                         per_cpu(cpu_closid, cpu) = rdtgrp->closid;
 256         }
 257
 258         /* Done pushing/pulling - update this group with new mask */
 259         cpumask_copy(&rdtgrp->cpu_mask, newmask);
 260
 261 end:
 262         put_online_cpus();
 263 unlock:
 264         rdtgroup_kn_unlock(of->kn);
 265         free_cpumask_var(tmpmask);
 266         free_cpumask_var(newmask);
 267
 268         return ret ?: nbytes;
 269 }
 270
 271 struct task_move_callback {
 272         struct callback_head    work;
 273         struct rdtgroup         *rdtgrp;
 274 };
 275
 276 static void move_myself(struct callback_head *head)
 277 {
 278         struct task_move_callback *callback;
 279         struct rdtgroup *rdtgrp;
 280
 281         callback = container_of(head, struct task_move_callback, work);
 282         rdtgrp = callback->rdtgrp;
 283
 284         /*
 285          * If resource group was deleted before this task work callback
 286          * was invoked, then assign the task to root group and free the
 287          * resource group.
 288          */
 289         if (atomic_dec_and_test(&rdtgrp->waitcount) &&
 290             (rdtgrp->flags & RDT_DELETED)) {
 291                 current->closid = 0;
 292                 kfree(rdtgrp);
 293         }
 294
 295         /* update PQR_ASSOC MSR to make resource group go into effect */
 296         intel_rdt_sched_in();
 297
 298         kfree(callback);
 299 }
 300
 301 static int __rdtgroup_move_task(struct task_struct *tsk,
 302                                 struct rdtgroup *rdtgrp)
 303 {
 304         struct task_move_callback *callback;
 305         int ret;
 306
 307         callback = kzalloc(sizeof(*callback), GFP_KERNEL);
 308         if (!callback)
 309                 return -ENOMEM;
 310         callback->work.func = move_myself;
 311         callback->rdtgrp = rdtgrp;
 312
 313         /*
 314          * Take a refcount, so rdtgrp cannot be freed before the
 315          * callback has been invoked.
 316          */
 317         atomic_inc(&rdtgrp->waitcount);
 318         ret = task_work_add(tsk, &callback->work, true);
 319         if (ret) {
 320                 /*
 321                  * Task is exiting. Drop the refcount and free the callback.
 322                  * No need to check the refcount as the group cannot be
 323                  * deleted before the write function unlocks rdtgroup_mutex.
 324                  */
 325                 atomic_dec(&rdtgrp->waitcount);
 326                 kfree(callback);
 327         } else {
 328                 tsk->closid = rdtgrp->closid;
 329         }
 330         return ret;
 331 }
 332
 333 static int rdtgroup_task_write_permission(struct task_struct *task,
 334                                           struct kernfs_open_file *of)
 335 {
 336         const struct cred *tcred = get_task_cred(task);
 337         const struct cred *cred = current_cred();
 338         int ret = 0;
 339
 340         /*
 341          * Even if we're attaching all tasks in the thread group, we only
 342          * need to check permissions on one of them.
 343          */
 344         if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
 345             !uid_eq(cred->euid, tcred->uid) &&
 346             !uid_eq(cred->euid, tcred->suid))
 347                 ret = -EPERM;
 348
 349         put_cred(tcred);
 350         return ret;
 351 }
 352
 353 static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
 354                               struct kernfs_open_file *of)
 355 {
 356         struct task_struct *tsk;
 357         int ret;
 358
 359         rcu_read_lock();
 360         if (pid) {
 361                 tsk = find_task_by_vpid(pid);
 362                 if (!tsk) {
 363                         rcu_read_unlock();
 364                         return -ESRCH;
 365                 }
 366         } else {
 367                 tsk = current;
 368         }
 369
 370         get_task_struct(tsk);
 371         rcu_read_unlock();
 372
 373         ret = rdtgroup_task_write_permission(tsk, of);
 374         if (!ret)
 375                 ret = __rdtgroup_move_task(tsk, rdtgrp);
 376
 377         put_task_struct(tsk);
 378         return ret;
 379 }
 380
 381 static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
 382                                     char *buf, size_t nbytes, loff_t off)
 383 {
 384         struct rdtgroup *rdtgrp;
 385         int ret = 0;
 386         pid_t pid;
 387
 388         if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
 389                 return -EINVAL;
 390         rdtgrp = rdtgroup_kn_lock_live(of->kn);
 391
 392         if (rdtgrp)
 393                 ret = rdtgroup_move_task(pid, rdtgrp, of);
 394         else
 395                 ret = -ENOENT;
 396
 397         rdtgroup_kn_unlock(of->kn);
 398
 399         return ret ?: nbytes;
 400 }
 401
 402 static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
 403 {
 404         struct task_struct *p, *t;
 405
 406         rcu_read_lock();
 407         for_each_process_thread(p, t) {
 408                 if (t->closid == r->closid)
 409                         seq_printf(s, "%d\n", t->pid);
 410         }
 411         rcu_read_unlock();
 412 }
 413
 414 static int rdtgroup_tasks_show(struct kernfs_open_file *of,
 415                                struct seq_file *s, void *v)
 416 {
 417         struct rdtgroup *rdtgrp;
 418         int ret = 0;
 419
 420         rdtgrp = rdtgroup_kn_lock_live(of->kn);
 421         if (rdtgrp)
 422                 show_rdt_tasks(rdtgrp, s);
 423         else
 424                 ret = -ENOENT;
 425         rdtgroup_kn_unlock(of->kn);
 426
 427         return ret;
 428 }
 429
 430 /* Files in each rdtgroup */
 431 static struct rftype rdtgroup_base_files[] = {
 432         {
 433                 .name           = "cpus",
 434                 .mode           = 0644,
 435                 .kf_ops         = &rdtgroup_kf_single_ops,
 436                 .write          = rdtgroup_cpus_write,
 437                 .seq_show       = rdtgroup_cpus_show,
 438         },
 439         {
 440                 .name           = "tasks",
 441                 .mode           = 0644,
 442                 .kf_ops         = &rdtgroup_kf_single_ops,
 443                 .write          = rdtgroup_tasks_write,
 444                 .seq_show       = rdtgroup_tasks_show,
 445         },
 446         {
 447                 .name           = "schemata",
 448                 .mode           = 0644,
 449                 .kf_ops         = &rdtgroup_kf_single_ops,
 450                 .write          = rdtgroup_schemata_write,
 451                 .seq_show       = rdtgroup_schemata_show,
 452         },
 453 };
 454
 455 static int rdt_num_closids_show(struct kernfs_open_file *of,
 456                                 struct seq_file *seq, void *v)
 457 {
 458         struct rdt_resource *r = of->kn->parent->priv;
 459
 460         seq_printf(seq, "%d\n", r->num_closid);
 461
 462         return 0;
 463 }
 464
 465 static int rdt_cbm_mask_show(struct kernfs_open_file *of,
 466                              struct seq_file *seq, void *v)
 467 {
 468         struct rdt_resource *r = of->kn->parent->priv;
 469
 470         seq_printf(seq, "%x\n", r->max_cbm);
 471
 472         return 0;
 473 }
 474
 475 static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
 476                              struct seq_file *seq, void *v)
 477 {
 478         struct rdt_resource *r = of->kn->parent->priv;
 479
 480         seq_printf(seq, "%d\n", r->min_cbm_bits);
 481
 482         return 0;
 483 }
 484
 485 /* rdtgroup information files for one cache resource. */
 486 static struct rftype res_info_files[] = {
 487         {
 488                 .name           = "num_closids",
 489                 .mode           = 0444,
 490                 .kf_ops         = &rdtgroup_kf_single_ops,
 491                 .seq_show       = rdt_num_closids_show,
 492         },
 493         {
 494                 .name           = "cbm_mask",
 495                 .mode           = 0444,
 496                 .kf_ops         = &rdtgroup_kf_single_ops,
 497                 .seq_show       = rdt_cbm_mask_show,
 498         },
 499         {
 500                 .name           = "min_cbm_bits",
 501                 .mode           = 0444,
 502                 .kf_ops         = &rdtgroup_kf_single_ops,
 503                 .seq_show       = rdt_min_cbm_bits_show,
 504         },
 505 };
 506
 507 static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
 508 {
 509         struct kernfs_node *kn_subdir;
 510         struct rdt_resource *r;
 511         int ret;
 512
 513         /* create the directory */
 514         kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
 515         if (IS_ERR(kn_info))
 516                 return PTR_ERR(kn_info);
 517         kernfs_get(kn_info);
 518
 519         for_each_enabled_rdt_resource(r) {
 520                 kn_subdir = kernfs_create_dir(kn_info, r->name,
 521                                               kn_info->mode, r);
 522                 if (IS_ERR(kn_subdir)) {
 523                         ret = PTR_ERR(kn_subdir);
 524                         goto out_destroy;
 525                 }
 526                 kernfs_get(kn_subdir);
 527                 ret = rdtgroup_kn_set_ugid(kn_subdir);
 528                 if (ret)
 529                         goto out_destroy;
 530                 ret = rdtgroup_add_files(kn_subdir, res_info_files,
 531                                          ARRAY_SIZE(res_info_files));
 532                 if (ret)
 533                         goto out_destroy;
 534                 kernfs_activate(kn_subdir);
 535         }
 536
 537         /*
 538          * This extra ref will be put in kernfs_remove() and guarantees
 539          * that @rdtgrp->kn is always accessible.
 540          */
 541         kernfs_get(kn_info);
 542
 543         ret = rdtgroup_kn_set_ugid(kn_info);
 544         if (ret)
 545                 goto out_destroy;
 546
 547         kernfs_activate(kn_info);
 548
 549         return 0;
 550
 551 out_destroy:
 552         kernfs_remove(kn_info);
 553         return ret;
 554 }
 555
 556 static void l3_qos_cfg_update(void *arg)
 557 {
 558         bool *enable = arg;
 559
 560         wrmsrl(IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
 561 }
 562
 563 static int set_l3_qos_cfg(struct rdt_resource *r, bool enable)
 564 {
 565         cpumask_var_t cpu_mask;
 566         struct rdt_domain *d;
 567         int cpu;
 568
 569         if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
 570                 return -ENOMEM;
 571
 572         list_for_each_entry(d, &r->domains, list) {
 573                 /* Pick one CPU from each domain instance to update MSR */
 574                 cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
 575         }
 576         cpu = get_cpu();
 577         /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */
 578         if (cpumask_test_cpu(cpu, cpu_mask))
 579                 l3_qos_cfg_update(&enable);
 580         /* Update QOS_CFG MSR on all other cpus in cpu_mask. */
 581         smp_call_function_many(cpu_mask, l3_qos_cfg_update, &enable, 1);
 582         put_cpu();
 583
 584         free_cpumask_var(cpu_mask);
 585
 586         return 0;
 587 }
 588
 589 static int cdp_enable(void)
 590 {
 591         struct rdt_resource *r_l3data = &rdt_resources_all[RDT_RESOURCE_L3DATA];
 592         struct rdt_resource *r_l3code = &rdt_resources_all[RDT_RESOURCE_L3CODE];
 593         struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3];
 594         int ret;
 595
 596         if (!r_l3->capable || !r_l3data->capable || !r_l3code->capable)
 597                 return -EINVAL;
 598
 599         ret = set_l3_qos_cfg(r_l3, true);
 600         if (!ret) {
 601                 r_l3->enabled = false;
 602                 r_l3data->enabled = true;
 603                 r_l3code->enabled = true;
 604         }
 605         return ret;
 606 }
 607
 608 static void cdp_disable(void)
 609 {
 610         struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
 611
 612         r->enabled = r->capable;
 613
 614         if (rdt_resources_all[RDT_RESOURCE_L3DATA].enabled) {
 615                 rdt_resources_all[RDT_RESOURCE_L3DATA].enabled = false;
 616                 rdt_resources_all[RDT_RESOURCE_L3CODE].enabled = false;
 617                 set_l3_qos_cfg(r, false);
 618         }
 619 }
 620
 621 static int parse_rdtgroupfs_options(char *data)
 622 {
 623         char *token, *o = data;
 624         int ret = 0;
 625
 626         while ((token = strsep(&o, ",")) != NULL) {
 627                 if (!*token)
 628                         return -EINVAL;
 629
 630                 if (!strcmp(token, "cdp"))
 631                         ret = cdp_enable();
 632         }
 633
 634         return ret;
 635 }
 636
 637 /*
 638  * We don't allow rdtgroup directories to be created anywhere
 639  * except the root directory. Thus when looking for the rdtgroup
 640  * structure for a kernfs node we are either looking at a directory,
 641  * in which case the rdtgroup structure is pointed at by the "priv"
 642  * field, otherwise we have a file, and need only look to the parent
 643  * to find the rdtgroup.
 644  */
 645 static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
 646 {
 647         if (kernfs_type(kn) == KERNFS_DIR) {
 648                 /*
 649                  * All the resource directories use "kn->priv"
 650                  * to point to the "struct rdtgroup" for the
 651                  * resource. "info" and its subdirectories don't
 652                  * have rdtgroup structures, so return NULL here.
 653                  */
 654                 if (kn == kn_info || kn->parent == kn_info)
 655                         return NULL;
 656                 else
 657                         return kn->priv;
 658         } else {
 659                 return kn->parent->priv;
 660         }
 661 }
 662
 663 struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
 664 {
 665         struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
 666
 667         if (!rdtgrp)
 668                 return NULL;
 669
 670         atomic_inc(&rdtgrp->waitcount);
 671         kernfs_break_active_protection(kn);
 672
 673         mutex_lock(&rdtgroup_mutex);
 674
 675         /* Was this group deleted while we waited? */
 676         if (rdtgrp->flags & RDT_DELETED)
 677                 return NULL;
 678
 679         return rdtgrp;
 680 }
 681
 682 void rdtgroup_kn_unlock(struct kernfs_node *kn)
 683 {
 684         struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
 685
 686         if (!rdtgrp)
 687                 return;
 688
 689         mutex_unlock(&rdtgroup_mutex);
 690
 691         if (atomic_dec_and_test(&rdtgrp->waitcount) &&
 692             (rdtgrp->flags & RDT_DELETED)) {
 693                 kernfs_unbreak_active_protection(kn);
 694                 kernfs_put(kn);
 695                 kfree(rdtgrp);
 696         } else {
 697                 kernfs_unbreak_active_protection(kn);
 698         }
 699 }
 700
 701 static struct dentry *rdt_mount(struct file_system_type *fs_type,
 702                                 int flags, const char *unused_dev_name,
 703                                 void *data)
 704 {
 705         struct dentry *dentry;
 706         int ret;
 707
 708         mutex_lock(&rdtgroup_mutex);
 709         /*
 710          * resctrl file system can only be mounted once.
 711          */
 712         if (static_branch_unlikely(&rdt_enable_key)) {
 713                 dentry = ERR_PTR(-EBUSY);
 714                 goto out;
 715         }
 716
 717         ret = parse_rdtgroupfs_options(data);
 718         if (ret) {
 719                 dentry = ERR_PTR(ret);
 720                 goto out_cdp;
 721         }
 722
 723         closid_init();
 724
 725         ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
 726         if (ret) {
 727                 dentry = ERR_PTR(ret);
 728                 goto out_cdp;
 729         }
 730
 731         dentry = kernfs_mount(fs_type, flags, rdt_root,
 732                               RDTGROUP_SUPER_MAGIC, NULL);
 733         if (IS_ERR(dentry))
 734                 goto out_cdp;
 735
 736         static_branch_enable(&rdt_enable_key);
 737         goto out;
 738
 739 out_cdp:
 740         cdp_disable();
 741 out:
 742         mutex_unlock(&rdtgroup_mutex);
 743
 744         return dentry;
 745 }
 746
 747 static int reset_all_cbms(struct rdt_resource *r)
 748 {
 749         struct msr_param msr_param;
 750         cpumask_var_t cpu_mask;
 751         struct rdt_domain *d;
 752         int i, cpu;
 753
 754         if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
 755                 return -ENOMEM;
 756
 757         msr_param.res = r;
 758         msr_param.low = 0;
 759         msr_param.high = r->num_closid;
 760
 761         /*
 762          * Disable resource control for this resource by setting all
 763          * CBMs in all domains to the maximum mask value. Pick one CPU
 764          * from each domain to update the MSRs below.
 765          */
 766         list_for_each_entry(d, &r->domains, list) {
 767                 cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
 768
 769                 for (i = 0; i < r->num_closid; i++)
 770                         d->cbm[i] = r->max_cbm;
 771         }
 772         cpu = get_cpu();
 773         /* Update CBM on this cpu if it's in cpu_mask. */
 774         if (cpumask_test_cpu(cpu, cpu_mask))
 775                 rdt_cbm_update(&msr_param);
 776         /* Update CBM on all other cpus in cpu_mask. */
 777         smp_call_function_many(cpu_mask, rdt_cbm_update, &msr_param, 1);
 778         put_cpu();
 779
 780         free_cpumask_var(cpu_mask);
 781
 782         return 0;
 783 }
 784
 785 /*
 786  * MSR_IA32_PQR_ASSOC is scoped per logical CPU, so all updates
 787  * are always in thread context.
 788  */
 789 static void rdt_reset_pqr_assoc_closid(void *v)
 790 {
 791         struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
 792
 793         state->closid = 0;
 794         wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, 0);
 795 }
 796
 797 /*
 798  * Forcibly remove all of subdirectories under root.
 799  */
 800 static void rmdir_all_sub(void)
 801 {
 802         struct rdtgroup *rdtgrp, *tmp;
 803         struct task_struct *p, *t;
 804
 805         /* move all tasks to default resource group */
 806         read_lock(&tasklist_lock);
 807         for_each_process_thread(p, t)
 808                 t->closid = 0;
 809         read_unlock(&tasklist_lock);
 810
 811         get_cpu();
 812         /* Reset PQR_ASSOC MSR on this cpu. */
 813         rdt_reset_pqr_assoc_closid(NULL);
 814         /* Reset PQR_ASSOC MSR on the rest of cpus. */
 815         smp_call_function_many(cpu_online_mask, rdt_reset_pqr_assoc_closid,
 816                                NULL, 1);
 817         put_cpu();
 818         list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
 819                 /* Remove each rdtgroup other than root */
 820                 if (rdtgrp == &rdtgroup_default)
 821                         continue;
 822                 kernfs_remove(rdtgrp->kn);
 823                 list_del(&rdtgrp->rdtgroup_list);
 824                 kfree(rdtgrp);
 825         }
 826         kernfs_remove(kn_info);
 827 }
 828
 829 static void rdt_kill_sb(struct super_block *sb)
 830 {
 831         struct rdt_resource *r;
 832
 833         mutex_lock(&rdtgroup_mutex);
 834
 835         /*Put everything back to default values. */
 836         for_each_enabled_rdt_resource(r)
 837                 reset_all_cbms(r);
 838         cdp_disable();
 839         rmdir_all_sub();
 840         static_branch_disable(&rdt_enable_key);
 841         kernfs_kill_sb(sb);
 842         mutex_unlock(&rdtgroup_mutex);
 843 }
 844
 845 static struct file_system_type rdt_fs_type = {
 846         .name    = "resctrl",
 847         .mount   = rdt_mount,
 848         .kill_sb = rdt_kill_sb,
 849 };
 850
 851 static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 852                           umode_t mode)
 853 {
 854         struct rdtgroup *parent, *rdtgrp;
 855         struct kernfs_node *kn;
 856         int ret, closid;
 857
 858         /* Only allow mkdir in the root directory */
 859         if (parent_kn != rdtgroup_default.kn)
 860                 return -EPERM;
 861
 862         /* Do not accept '\n' to avoid unparsable situation. */
 863         if (strchr(name, '\n'))
 864                 return -EINVAL;
 865
 866         parent = rdtgroup_kn_lock_live(parent_kn);
 867         if (!parent) {
 868                 ret = -ENODEV;
 869                 goto out_unlock;
 870         }
 871
 872         ret = closid_alloc();
 873         if (ret < 0)
 874                 goto out_unlock;
 875         closid = ret;
 876
 877         /* allocate the rdtgroup. */
 878         rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
 879         if (!rdtgrp) {
 880                 ret = -ENOSPC;
 881                 goto out_closid_free;
 882         }
 883         rdtgrp->closid = closid;
 884         list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
 885
 886         /* kernfs creates the directory for rdtgrp */
 887         kn = kernfs_create_dir(parent->kn, name, mode, rdtgrp);
 888         if (IS_ERR(kn)) {
 889                 ret = PTR_ERR(kn);
 890                 goto out_cancel_ref;
 891         }
 892         rdtgrp->kn = kn;
 893
 894         /*
 895          * kernfs_remove() will drop the reference count on "kn" which
 896          * will free it. But we still need it to stick around for the
 897          * rdtgroup_kn_unlock(kn} call below. Take one extra reference
 898          * here, which will be dropped inside rdtgroup_kn_unlock().
 899          */
 900         kernfs_get(kn);
 901
 902         ret = rdtgroup_kn_set_ugid(kn);
 903         if (ret)
 904                 goto out_destroy;
 905
 906         ret = rdtgroup_add_files(kn, rdtgroup_base_files,
 907                                  ARRAY_SIZE(rdtgroup_base_files));
 908         if (ret)
 909                 goto out_destroy;
 910
 911         kernfs_activate(kn);
 912
 913         ret = 0;
 914         goto out_unlock;
 915
 916 out_destroy:
 917         kernfs_remove(rdtgrp->kn);
 918 out_cancel_ref:
 919         list_del(&rdtgrp->rdtgroup_list);
 920         kfree(rdtgrp);
 921 out_closid_free:
 922         closid_free(closid);
 923 out_unlock:
 924         rdtgroup_kn_unlock(parent_kn);
 925         return ret;
 926 }
 927
 928 static int rdtgroup_rmdir(struct kernfs_node *kn)
 929 {
 930         struct task_struct *p, *t;
 931         struct rdtgroup *rdtgrp;
 932         int cpu, ret = 0;
 933
 934         rdtgrp = rdtgroup_kn_lock_live(kn);
 935         if (!rdtgrp) {
 936                 rdtgroup_kn_unlock(kn);
 937                 return -EPERM;
 938         }
 939
 940         /* Give any tasks back to the default group */
 941         read_lock(&tasklist_lock);
 942         for_each_process_thread(p, t) {
 943                 if (t->closid == rdtgrp->closid)
 944                         t->closid = 0;
 945         }
 946         read_unlock(&tasklist_lock);
 947
 948         /* Give any CPUs back to the default group */
 949         cpumask_or(&rdtgroup_default.cpu_mask,
 950                    &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
 951         for_each_cpu(cpu, &rdtgrp->cpu_mask)
 952                 per_cpu(cpu_closid, cpu) = 0;
 953
 954         rdtgrp->flags = RDT_DELETED;
 955         closid_free(rdtgrp->closid);
 956         list_del(&rdtgrp->rdtgroup_list);
 957
 958         /*
 959          * one extra hold on this, will drop when we kfree(rdtgrp)
 960          * in rdtgroup_kn_unlock()
 961          */
 962         kernfs_get(kn);
 963         kernfs_remove(rdtgrp->kn);
 964
 965         rdtgroup_kn_unlock(kn);
 966
 967         return ret;
 968 }
 969
 970 static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
 971         .mkdir  = rdtgroup_mkdir,
 972         .rmdir  = rdtgroup_rmdir,
 973 };
 974
 975 static int __init rdtgroup_setup_root(void)
 976 {
 977         int ret;
 978
 979         rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
 980                                       KERNFS_ROOT_CREATE_DEACTIVATED,
 981                                       &rdtgroup_default);
 982         if (IS_ERR(rdt_root))
 983                 return PTR_ERR(rdt_root);
 984
 985         mutex_lock(&rdtgroup_mutex);
 986
 987         rdtgroup_default.closid = 0;
 988         list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
 989
 990         ret = rdtgroup_add_files(rdt_root->kn, rdtgroup_base_files,
 991                                  ARRAY_SIZE(rdtgroup_base_files));
 992         if (ret) {
 993                 kernfs_destroy_root(rdt_root);
 994                 goto out;
 995         }
 996
 997         rdtgroup_default.kn = rdt_root->kn;
 998         kernfs_activate(rdtgroup_default.kn);
 999
1000 out:
1001         mutex_unlock(&rdtgroup_mutex);
1002
1003         return ret;
1004 }
1005
1006 /*
1007  * rdtgroup_init - rdtgroup initialization
1008  *
1009  * Setup resctrl file system including set up root, create mount point,
1010  * register rdtgroup filesystem, and initialize files under root directory.
1011  *
1012  * Return: 0 on success or -errno
1013  */
1014 int __init rdtgroup_init(void)
1015 {
1016         int ret = 0;
1017
1018         ret = rdtgroup_setup_root();
1019         if (ret)
1020                 return ret;
1021
1022         ret = sysfs_create_mount_point(fs_kobj, "resctrl");
1023         if (ret)
1024                 goto cleanup_root;
1025
1026         ret = register_filesystem(&rdt_fs_type);
1027         if (ret)
1028                 goto cleanup_mountpoint;
1029
1030         return 0;
1031
1032 cleanup_mountpoint:
1033         sysfs_remove_mount_point(fs_kobj, "resctrl");
1034 cleanup_root:
1035         kernfs_destroy_root(rdt_root);
1036
1037         return ret;
1038 }