mm/hugetlb_cgroup.c

   1 /*
   2  *
   3  * Copyright IBM Corporation, 2012
   4  * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
   5  *
   6  * Cgroup v2
   7  * Copyright (C) 2019 Red Hat, Inc.
   8  * Author: Giuseppe Scrivano <gscrivan@redhat.com>
   9  *
  10  * This program is free software; you can redistribute it and/or modify it
  11  * under the terms of version 2.1 of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation.
  13  *
  14  * This program is distributed in the hope that it would be useful, but
  15  * WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  17  *
  18  */
  19
  20 #include <linux/cgroup.h>
  21 #include <linux/page_counter.h>
  22 #include <linux/slab.h>
  23 #include <linux/hugetlb.h>
  24 #include <linux/hugetlb_cgroup.h>
  25
  26 #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
  27 #define MEMFILE_IDX(val)        (((val) >> 16) & 0xffff)
  28 #define MEMFILE_ATTR(val)       ((val) & 0xffff)
  29
  30 #define hugetlb_cgroup_from_counter(counter, idx)                   \
  31         container_of(counter, struct hugetlb_cgroup, hugepage[idx])
  32
  33 static struct hugetlb_cgroup *root_h_cgroup __read_mostly;
  34
  35 static inline struct page_counter *
  36 __hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx,
  37                                      bool rsvd)
  38 {
  39         if (rsvd)
  40                 return &h_cg->rsvd_hugepage[idx];
  41         return &h_cg->hugepage[idx];
  42 }
  43
  44 static inline struct page_counter *
  45 hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx)
  46 {
  47         return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false);
  48 }
  49
  50 static inline struct page_counter *
  51 hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx)
  52 {
  53         return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true);
  54 }
  55
  56 static inline
  57 struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
  58 {
  59         return s ? container_of(s, struct hugetlb_cgroup, css) : NULL;
  60 }
  61
  62 static inline
  63 struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task)
  64 {
  65         return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id));
  66 }
  67
  68 static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
  69 {
  70         return (h_cg == root_h_cgroup);
  71 }
  72
  73 static inline struct hugetlb_cgroup *
  74 parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg)
  75 {
  76         return hugetlb_cgroup_from_css(h_cg->css.parent);
  77 }
  78
  79 static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
  80 {
  81         int idx;
  82
  83         for (idx = 0; idx < hugetlb_max_hstate; idx++) {
  84                 if (page_counter_read(
  85                                 hugetlb_cgroup_counter_from_cgroup(h_cg, idx)))
  86                         return true;
  87         }
  88         return false;
  89 }
  90
  91 static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
  92                                 struct hugetlb_cgroup *parent_h_cgroup)
  93 {
  94         int idx;
  95
  96         for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) {
  97                 struct page_counter *fault_parent = NULL;
  98                 struct page_counter *rsvd_parent = NULL;
  99                 unsigned long limit;
 100                 int ret;
 101
 102                 if (parent_h_cgroup) {
 103                         fault_parent = hugetlb_cgroup_counter_from_cgroup(
 104                                 parent_h_cgroup, idx);
 105                         rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd(
 106                                 parent_h_cgroup, idx);
 107                 }
 108                 page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup,
 109                                                                      idx),
 110                                   fault_parent);
 111                 page_counter_init(
 112                         hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx),
 113                         rsvd_parent);
 114
 115                 limit = round_down(PAGE_COUNTER_MAX,
 116                                    pages_per_huge_page(&hstates[idx]));
 117
 118                 ret = page_counter_set_max(
 119                         hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx),
 120                         limit);
 121                 VM_BUG_ON(ret);
 122                 ret = page_counter_set_max(
 123                         hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx),
 124                         limit);
 125                 VM_BUG_ON(ret);
 126         }
 127 }
 128
 129 static struct cgroup_subsys_state *
 130 hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 131 {
 132         struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css);
 133         struct hugetlb_cgroup *h_cgroup;
 134
 135         h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL);
 136         if (!h_cgroup)
 137                 return ERR_PTR(-ENOMEM);
 138
 139         if (!parent_h_cgroup)
 140                 root_h_cgroup = h_cgroup;
 141
 142         hugetlb_cgroup_init(h_cgroup, parent_h_cgroup);
 143         return &h_cgroup->css;
 144 }
 145
 146 static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
 147 {
 148         struct hugetlb_cgroup *h_cgroup;
 149
 150         h_cgroup = hugetlb_cgroup_from_css(css);
 151         kfree(h_cgroup);
 152 }
 153
 154 /*
 155  * Should be called with hugetlb_lock held.
 156  * Since we are holding hugetlb_lock, pages cannot get moved from
 157  * active list or uncharged from the cgroup, So no need to get
 158  * page reference and test for page active here. This function
 159  * cannot fail.
 160  */
 161 static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
 162                                        struct page *page)
 163 {
 164         unsigned int nr_pages;
 165         struct page_counter *counter;
 166         struct hugetlb_cgroup *page_hcg;
 167         struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
 168
 169         page_hcg = hugetlb_cgroup_from_page(page);
 170         /*
 171          * We can have pages in active list without any cgroup
 172          * ie, hugepage with less than 3 pages. We can safely
 173          * ignore those pages.
 174          */
 175         if (!page_hcg || page_hcg != h_cg)
 176                 goto out;
 177
 178         nr_pages = compound_nr(page);
 179         if (!parent) {
 180                 parent = root_h_cgroup;
 181                 /* root has no limit */
 182                 page_counter_charge(&parent->hugepage[idx], nr_pages);
 183         }
 184         counter = &h_cg->hugepage[idx];
 185         /* Take the pages off the local counter */
 186         page_counter_cancel(counter, nr_pages);
 187
 188         set_hugetlb_cgroup(page, parent);
 189 out:
 190         return;
 191 }
 192
 193 /*
 194  * Force the hugetlb cgroup to empty the hugetlb resources by moving them to
 195  * the parent cgroup.
 196  */
 197 static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
 198 {
 199         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
 200         struct hstate *h;
 201         struct page *page;
 202         int idx;
 203
 204         do {
 205                 idx = 0;
 206                 for_each_hstate(h) {
 207                         spin_lock_irq(&hugetlb_lock);
 208                         list_for_each_entry(page, &h->hugepage_activelist, lru)
 209                                 hugetlb_cgroup_move_parent(idx, h_cg, page);
 210
 211                         spin_unlock_irq(&hugetlb_lock);
 212                         idx++;
 213                 }
 214                 cond_resched();
 215         } while (hugetlb_cgroup_have_usage(h_cg));
 216 }
 217
 218 static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx,
 219                                  enum hugetlb_memory_event event)
 220 {
 221         atomic_long_inc(&hugetlb->events_local[idx][event]);
 222         cgroup_file_notify(&hugetlb->events_local_file[idx]);
 223
 224         do {
 225                 atomic_long_inc(&hugetlb->events[idx][event]);
 226                 cgroup_file_notify(&hugetlb->events_file[idx]);
 227         } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) &&
 228                  !hugetlb_cgroup_is_root(hugetlb));
 229 }
 230
 231 static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 232                                           struct hugetlb_cgroup **ptr,
 233                                           bool rsvd)
 234 {
 235         int ret = 0;
 236         struct page_counter *counter;
 237         struct hugetlb_cgroup *h_cg = NULL;
 238
 239         if (hugetlb_cgroup_disabled())
 240                 goto done;
 241         /*
 242          * We don't charge any cgroup if the compound page have less
 243          * than 3 pages.
 244          */
 245         if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
 246                 goto done;
 247 again:
 248         rcu_read_lock();
 249         h_cg = hugetlb_cgroup_from_task(current);
 250         if (!css_tryget(&h_cg->css)) {
 251                 rcu_read_unlock();
 252                 goto again;
 253         }
 254         rcu_read_unlock();
 255
 256         if (!page_counter_try_charge(
 257                     __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
 258                     nr_pages, &counter)) {
 259                 ret = -ENOMEM;
 260                 hugetlb_event(h_cg, idx, HUGETLB_MAX);
 261                 css_put(&h_cg->css);
 262                 goto done;
 263         }
 264         /* Reservations take a reference to the css because they do not get
 265          * reparented.
 266          */
 267         if (!rsvd)
 268                 css_put(&h_cg->css);
 269 done:
 270         *ptr = h_cg;
 271         return ret;
 272 }
 273
 274 int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 275                                  struct hugetlb_cgroup **ptr)
 276 {
 277         return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false);
 278 }
 279
 280 int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
 281                                       struct hugetlb_cgroup **ptr)
 282 {
 283         return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true);
 284 }
 285
 286 /* Should be called with hugetlb_lock held */
 287 static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
 288                                            struct hugetlb_cgroup *h_cg,
 289                                            struct page *page, bool rsvd)
 290 {
 291         if (hugetlb_cgroup_disabled() || !h_cg)
 292                 return;
 293
 294         __set_hugetlb_cgroup(page, h_cg, rsvd);
 295         return;
 296 }
 297
 298 void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
 299                                   struct hugetlb_cgroup *h_cg,
 300                                   struct page *page)
 301 {
 302         __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, false);
 303 }
 304
 305 void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
 306                                        struct hugetlb_cgroup *h_cg,
 307                                        struct page *page)
 308 {
 309         __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, true);
 310 }
 311
 312 /*
 313  * Should be called with hugetlb_lock held
 314  */
 315 static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 316                                            struct page *page, bool rsvd)
 317 {
 318         struct hugetlb_cgroup *h_cg;
 319
 320         if (hugetlb_cgroup_disabled())
 321                 return;
 322         lockdep_assert_held(&hugetlb_lock);
 323         h_cg = __hugetlb_cgroup_from_page(page, rsvd);
 324         if (unlikely(!h_cg))
 325                 return;
 326         __set_hugetlb_cgroup(page, NULL, rsvd);
 327
 328         page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
 329                                                                    rsvd),
 330                               nr_pages);
 331
 332         if (rsvd)
 333                 css_put(&h_cg->css);
 334
 335         return;
 336 }
 337
 338 void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 339                                   struct page *page)
 340 {
 341         __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, false);
 342 }
 343
 344 void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages,
 345                                        struct page *page)
 346 {
 347         __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, true);
 348 }
 349
 350 static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 351                                              struct hugetlb_cgroup *h_cg,
 352                                              bool rsvd)
 353 {
 354         if (hugetlb_cgroup_disabled() || !h_cg)
 355                 return;
 356
 357         if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
 358                 return;
 359
 360         page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
 361                                                                    rsvd),
 362                               nr_pages);
 363
 364         if (rsvd)
 365                 css_put(&h_cg->css);
 366 }
 367
 368 void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 369                                     struct hugetlb_cgroup *h_cg)
 370 {
 371         __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false);
 372 }
 373
 374 void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages,
 375                                          struct hugetlb_cgroup *h_cg)
 376 {
 377         __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true);
 378 }
 379
 380 void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start,
 381                                      unsigned long end)
 382 {
 383         if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter ||
 384             !resv->css)
 385                 return;
 386
 387         page_counter_uncharge(resv->reservation_counter,
 388                               (end - start) * resv->pages_per_hpage);
 389         css_put(resv->css);
 390 }
 391
 392 void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
 393                                          struct file_region *rg,
 394                                          unsigned long nr_pages,
 395                                          bool region_del)
 396 {
 397         if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages)
 398                 return;
 399
 400         if (rg->reservation_counter && resv->pages_per_hpage && nr_pages > 0 &&
 401             !resv->reservation_counter) {
 402                 page_counter_uncharge(rg->reservation_counter,
 403                                       nr_pages * resv->pages_per_hpage);
 404                 /*
 405                  * Only do css_put(rg->css) when we delete the entire region
 406                  * because one file_region must hold exactly one css reference.
 407                  */
 408                 if (region_del)
 409                         css_put(rg->css);
 410         }
 411 }
 412
 413 enum {
 414         RES_USAGE,
 415         RES_RSVD_USAGE,
 416         RES_LIMIT,
 417         RES_RSVD_LIMIT,
 418         RES_MAX_USAGE,
 419         RES_RSVD_MAX_USAGE,
 420         RES_FAILCNT,
 421         RES_RSVD_FAILCNT,
 422 };
 423
 424 static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
 425                                    struct cftype *cft)
 426 {
 427         struct page_counter *counter;
 428         struct page_counter *rsvd_counter;
 429         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
 430
 431         counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)];
 432         rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)];
 433
 434         switch (MEMFILE_ATTR(cft->private)) {
 435         case RES_USAGE:
 436                 return (u64)page_counter_read(counter) * PAGE_SIZE;
 437         case RES_RSVD_USAGE:
 438                 return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE;
 439         case RES_LIMIT:
 440                 return (u64)counter->max * PAGE_SIZE;
 441         case RES_RSVD_LIMIT:
 442                 return (u64)rsvd_counter->max * PAGE_SIZE;
 443         case RES_MAX_USAGE:
 444                 return (u64)counter->watermark * PAGE_SIZE;
 445         case RES_RSVD_MAX_USAGE:
 446                 return (u64)rsvd_counter->watermark * PAGE_SIZE;
 447         case RES_FAILCNT:
 448                 return counter->failcnt;
 449         case RES_RSVD_FAILCNT:
 450                 return rsvd_counter->failcnt;
 451         default:
 452                 BUG();
 453         }
 454 }
 455
 456 static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v)
 457 {
 458         int idx;
 459         u64 val;
 460         struct cftype *cft = seq_cft(seq);
 461         unsigned long limit;
 462         struct page_counter *counter;
 463         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
 464
 465         idx = MEMFILE_IDX(cft->private);
 466         counter = &h_cg->hugepage[idx];
 467
 468         limit = round_down(PAGE_COUNTER_MAX,
 469                            pages_per_huge_page(&hstates[idx]));
 470
 471         switch (MEMFILE_ATTR(cft->private)) {
 472         case RES_RSVD_USAGE:
 473                 counter = &h_cg->rsvd_hugepage[idx];
 474                 fallthrough;
 475         case RES_USAGE:
 476                 val = (u64)page_counter_read(counter);
 477                 seq_printf(seq, "%llu\n", val * PAGE_SIZE);
 478                 break;
 479         case RES_RSVD_LIMIT:
 480                 counter = &h_cg->rsvd_hugepage[idx];
 481                 fallthrough;
 482         case RES_LIMIT:
 483                 val = (u64)counter->max;
 484                 if (val == limit)
 485                         seq_puts(seq, "max\n");
 486                 else
 487                         seq_printf(seq, "%llu\n", val * PAGE_SIZE);
 488                 break;
 489         default:
 490                 BUG();
 491         }
 492
 493         return 0;
 494 }
 495
 496 static DEFINE_MUTEX(hugetlb_limit_mutex);
 497
 498 static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
 499                                     char *buf, size_t nbytes, loff_t off,
 500                                     const char *max)
 501 {
 502         int ret, idx;
 503         unsigned long nr_pages;
 504         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
 505         bool rsvd = false;
 506
 507         if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */
 508                 return -EINVAL;
 509
 510         buf = strstrip(buf);
 511         ret = page_counter_memparse(buf, max, &nr_pages);
 512         if (ret)
 513                 return ret;
 514
 515         idx = MEMFILE_IDX(of_cft(of)->private);
 516         nr_pages = round_down(nr_pages, pages_per_huge_page(&hstates[idx]));
 517
 518         switch (MEMFILE_ATTR(of_cft(of)->private)) {
 519         case RES_RSVD_LIMIT:
 520                 rsvd = true;
 521                 fallthrough;
 522         case RES_LIMIT:
 523                 mutex_lock(&hugetlb_limit_mutex);
 524                 ret = page_counter_set_max(
 525                         __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
 526                         nr_pages);
 527                 mutex_unlock(&hugetlb_limit_mutex);
 528                 break;
 529         default:
 530                 ret = -EINVAL;
 531                 break;
 532         }
 533         return ret ?: nbytes;
 534 }
 535
 536 static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of,
 537                                            char *buf, size_t nbytes, loff_t off)
 538 {
 539         return hugetlb_cgroup_write(of, buf, nbytes, off, "-1");
 540 }
 541
 542 static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of,
 543                                         char *buf, size_t nbytes, loff_t off)
 544 {
 545         return hugetlb_cgroup_write(of, buf, nbytes, off, "max");
 546 }
 547
 548 static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
 549                                     char *buf, size_t nbytes, loff_t off)
 550 {
 551         int ret = 0;
 552         struct page_counter *counter, *rsvd_counter;
 553         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
 554
 555         counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)];
 556         rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)];
 557
 558         switch (MEMFILE_ATTR(of_cft(of)->private)) {
 559         case RES_MAX_USAGE:
 560                 page_counter_reset_watermark(counter);
 561                 break;
 562         case RES_RSVD_MAX_USAGE:
 563                 page_counter_reset_watermark(rsvd_counter);
 564                 break;
 565         case RES_FAILCNT:
 566                 counter->failcnt = 0;
 567                 break;
 568         case RES_RSVD_FAILCNT:
 569                 rsvd_counter->failcnt = 0;
 570                 break;
 571         default:
 572                 ret = -EINVAL;
 573                 break;
 574         }
 575         return ret ?: nbytes;
 576 }
 577
 578 static char *mem_fmt(char *buf, int size, unsigned long hsize)
 579 {
 580         if (hsize >= (1UL << 30))
 581                 snprintf(buf, size, "%luGB", hsize >> 30);
 582         else if (hsize >= (1UL << 20))
 583                 snprintf(buf, size, "%luMB", hsize >> 20);
 584         else
 585                 snprintf(buf, size, "%luKB", hsize >> 10);
 586         return buf;
 587 }
 588
 589 static int __hugetlb_events_show(struct seq_file *seq, bool local)
 590 {
 591         int idx;
 592         long max;
 593         struct cftype *cft = seq_cft(seq);
 594         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
 595
 596         idx = MEMFILE_IDX(cft->private);
 597
 598         if (local)
 599                 max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]);
 600         else
 601                 max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]);
 602
 603         seq_printf(seq, "max %lu\n", max);
 604
 605         return 0;
 606 }
 607
 608 static int hugetlb_events_show(struct seq_file *seq, void *v)
 609 {
 610         return __hugetlb_events_show(seq, false);
 611 }
 612
 613 static int hugetlb_events_local_show(struct seq_file *seq, void *v)
 614 {
 615         return __hugetlb_events_show(seq, true);
 616 }
 617
 618 static void __init __hugetlb_cgroup_file_dfl_init(int idx)
 619 {
 620         char buf[32];
 621         struct cftype *cft;
 622         struct hstate *h = &hstates[idx];
 623
 624         /* format the size */
 625         mem_fmt(buf, sizeof(buf), huge_page_size(h));
 626
 627         /* Add the limit file */
 628         cft = &h->cgroup_files_dfl[0];
 629         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf);
 630         cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
 631         cft->seq_show = hugetlb_cgroup_read_u64_max;
 632         cft->write = hugetlb_cgroup_write_dfl;
 633         cft->flags = CFTYPE_NOT_ON_ROOT;
 634
 635         /* Add the reservation limit file */
 636         cft = &h->cgroup_files_dfl[1];
 637         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max", buf);
 638         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT);
 639         cft->seq_show = hugetlb_cgroup_read_u64_max;
 640         cft->write = hugetlb_cgroup_write_dfl;
 641         cft->flags = CFTYPE_NOT_ON_ROOT;
 642
 643         /* Add the current usage file */
 644         cft = &h->cgroup_files_dfl[2];
 645         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf);
 646         cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
 647         cft->seq_show = hugetlb_cgroup_read_u64_max;
 648         cft->flags = CFTYPE_NOT_ON_ROOT;
 649
 650         /* Add the current reservation usage file */
 651         cft = &h->cgroup_files_dfl[3];
 652         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.current", buf);
 653         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE);
 654         cft->seq_show = hugetlb_cgroup_read_u64_max;
 655         cft->flags = CFTYPE_NOT_ON_ROOT;
 656
 657         /* Add the events file */
 658         cft = &h->cgroup_files_dfl[4];
 659         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf);
 660         cft->private = MEMFILE_PRIVATE(idx, 0);
 661         cft->seq_show = hugetlb_events_show;
 662         cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]);
 663         cft->flags = CFTYPE_NOT_ON_ROOT;
 664
 665         /* Add the events.local file */
 666         cft = &h->cgroup_files_dfl[5];
 667         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf);
 668         cft->private = MEMFILE_PRIVATE(idx, 0);
 669         cft->seq_show = hugetlb_events_local_show;
 670         cft->file_offset = offsetof(struct hugetlb_cgroup,
 671                                     events_local_file[idx]);
 672         cft->flags = CFTYPE_NOT_ON_ROOT;
 673
 674         /* NULL terminate the last cft */
 675         cft = &h->cgroup_files_dfl[6];
 676         memset(cft, 0, sizeof(*cft));
 677
 678         WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys,
 679                                        h->cgroup_files_dfl));
 680 }
 681
 682 static void __init __hugetlb_cgroup_file_legacy_init(int idx)
 683 {
 684         char buf[32];
 685         struct cftype *cft;
 686         struct hstate *h = &hstates[idx];
 687
 688         /* format the size */
 689         mem_fmt(buf, sizeof(buf), huge_page_size(h));
 690
 691         /* Add the limit file */
 692         cft = &h->cgroup_files_legacy[0];
 693         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf);
 694         cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
 695         cft->read_u64 = hugetlb_cgroup_read_u64;
 696         cft->write = hugetlb_cgroup_write_legacy;
 697
 698         /* Add the reservation limit file */
 699         cft = &h->cgroup_files_legacy[1];
 700         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.limit_in_bytes", buf);
 701         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT);
 702         cft->read_u64 = hugetlb_cgroup_read_u64;
 703         cft->write = hugetlb_cgroup_write_legacy;
 704
 705         /* Add the usage file */
 706         cft = &h->cgroup_files_legacy[2];
 707         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf);
 708         cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
 709         cft->read_u64 = hugetlb_cgroup_read_u64;
 710
 711         /* Add the reservation usage file */
 712         cft = &h->cgroup_files_legacy[3];
 713         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.usage_in_bytes", buf);
 714         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE);
 715         cft->read_u64 = hugetlb_cgroup_read_u64;
 716
 717         /* Add the MAX usage file */
 718         cft = &h->cgroup_files_legacy[4];
 719         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf);
 720         cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE);
 721         cft->write = hugetlb_cgroup_reset;
 722         cft->read_u64 = hugetlb_cgroup_read_u64;
 723
 724         /* Add the MAX reservation usage file */
 725         cft = &h->cgroup_files_legacy[5];
 726         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max_usage_in_bytes", buf);
 727         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_MAX_USAGE);
 728         cft->write = hugetlb_cgroup_reset;
 729         cft->read_u64 = hugetlb_cgroup_read_u64;
 730
 731         /* Add the failcntfile */
 732         cft = &h->cgroup_files_legacy[6];
 733         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf);
 734         cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT);
 735         cft->write = hugetlb_cgroup_reset;
 736         cft->read_u64 = hugetlb_cgroup_read_u64;
 737
 738         /* Add the reservation failcntfile */
 739         cft = &h->cgroup_files_legacy[7];
 740         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.failcnt", buf);
 741         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_FAILCNT);
 742         cft->write = hugetlb_cgroup_reset;
 743         cft->read_u64 = hugetlb_cgroup_read_u64;
 744
 745         /* NULL terminate the last cft */
 746         cft = &h->cgroup_files_legacy[8];
 747         memset(cft, 0, sizeof(*cft));
 748
 749         WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys,
 750                                           h->cgroup_files_legacy));
 751 }
 752
 753 static void __init __hugetlb_cgroup_file_init(int idx)
 754 {
 755         __hugetlb_cgroup_file_dfl_init(idx);
 756         __hugetlb_cgroup_file_legacy_init(idx);
 757 }
 758
 759 void __init hugetlb_cgroup_file_init(void)
 760 {
 761         struct hstate *h;
 762
 763         for_each_hstate(h) {
 764                 /*
 765                  * Add cgroup control files only if the huge page consists
 766                  * of more than two normal pages. This is because we use
 767                  * page[2].private for storing cgroup details.
 768                  */
 769                 if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER)
 770                         __hugetlb_cgroup_file_init(hstate_index(h));
 771         }
 772 }
 773
 774 /*
 775  * hugetlb_lock will make sure a parallel cgroup rmdir won't happen
 776  * when we migrate hugepages
 777  */
 778 void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
 779 {
 780         struct hugetlb_cgroup *h_cg;
 781         struct hugetlb_cgroup *h_cg_rsvd;
 782         struct hstate *h = page_hstate(oldhpage);
 783
 784         if (hugetlb_cgroup_disabled())
 785                 return;
 786
 787         spin_lock_irq(&hugetlb_lock);
 788         h_cg = hugetlb_cgroup_from_page(oldhpage);
 789         h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage);
 790         set_hugetlb_cgroup(oldhpage, NULL);
 791         set_hugetlb_cgroup_rsvd(oldhpage, NULL);
 792
 793         /* move the h_cg details to new cgroup */
 794         set_hugetlb_cgroup(newhpage, h_cg);
 795         set_hugetlb_cgroup_rsvd(newhpage, h_cg_rsvd);
 796         list_move(&newhpage->lru, &h->hugepage_activelist);
 797         spin_unlock_irq(&hugetlb_lock);
 798         return;
 799 }
 800
 801 static struct cftype hugetlb_files[] = {
 802         {} /* terminate */
 803 };
 804
 805 struct cgroup_subsys hugetlb_cgrp_subsys = {
 806         .css_alloc      = hugetlb_cgroup_css_alloc,
 807         .css_offline    = hugetlb_cgroup_css_offline,
 808         .css_free       = hugetlb_cgroup_css_free,
 809         .dfl_cftypes    = hugetlb_files,
 810         .legacy_cftypes = hugetlb_files,
 811 };