mm/hugetlb_cgroup.c

   1 /*
   2  *
   3  * Copyright IBM Corporation, 2012
   4  * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
   5  *
   6  * Cgroup v2
   7  * Copyright (C) 2019 Red Hat, Inc.
   8  * Author: Giuseppe Scrivano <gscrivan@redhat.com>
   9  *
  10  * This program is free software; you can redistribute it and/or modify it
  11  * under the terms of version 2.1 of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation.
  13  *
  14  * This program is distributed in the hope that it would be useful, but
  15  * WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  17  *
  18  */
  19
  20 #include <linux/cgroup.h>
  21 #include <linux/page_counter.h>
  22 #include <linux/slab.h>
  23 #include <linux/hugetlb.h>
  24 #include <linux/hugetlb_cgroup.h>
  25
  26 #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
  27 #define MEMFILE_IDX(val)        (((val) >> 16) & 0xffff)
  28 #define MEMFILE_ATTR(val)       ((val) & 0xffff)
  29
  30 #define hugetlb_cgroup_from_counter(counter, idx)                   \
  31         container_of(counter, struct hugetlb_cgroup, hugepage[idx])
  32
  33 static struct hugetlb_cgroup *root_h_cgroup __read_mostly;
  34
  35 static inline struct page_counter *
  36 __hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx,
  37                                      bool rsvd)
  38 {
  39         if (rsvd)
  40                 return &h_cg->rsvd_hugepage[idx];
  41         return &h_cg->hugepage[idx];
  42 }
  43
  44 static inline struct page_counter *
  45 hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx)
  46 {
  47         return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false);
  48 }
  49
  50 static inline struct page_counter *
  51 hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx)
  52 {
  53         return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true);
  54 }
  55
  56 static inline
  57 struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
  58 {
  59         return s ? container_of(s, struct hugetlb_cgroup, css) : NULL;
  60 }
  61
  62 static inline
  63 struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task)
  64 {
  65         return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id));
  66 }
  67
  68 static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
  69 {
  70         return (h_cg == root_h_cgroup);
  71 }
  72
  73 static inline struct hugetlb_cgroup *
  74 parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg)
  75 {
  76         return hugetlb_cgroup_from_css(h_cg->css.parent);
  77 }
  78
  79 static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
  80 {
  81         int idx;
  82
  83         for (idx = 0; idx < hugetlb_max_hstate; idx++) {
  84                 if (page_counter_read(
  85                                 hugetlb_cgroup_counter_from_cgroup(h_cg, idx)))
  86                         return true;
  87         }
  88         return false;
  89 }
  90
  91 static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
  92                                 struct hugetlb_cgroup *parent_h_cgroup)
  93 {
  94         int idx;
  95
  96         for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) {
  97                 struct page_counter *fault_parent = NULL;
  98                 struct page_counter *rsvd_parent = NULL;
  99                 unsigned long limit;
 100                 int ret;
 101
 102                 if (parent_h_cgroup) {
 103                         fault_parent = hugetlb_cgroup_counter_from_cgroup(
 104                                 parent_h_cgroup, idx);
 105                         rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd(
 106                                 parent_h_cgroup, idx);
 107                 }
 108                 page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup,
 109                                                                      idx),
 110                                   fault_parent);
 111                 page_counter_init(
 112                         hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx),
 113                         rsvd_parent);
 114
 115                 limit = round_down(PAGE_COUNTER_MAX,
 116                                    1 << huge_page_order(&hstates[idx]));
 117
 118                 ret = page_counter_set_max(
 119                         hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx),
 120                         limit);
 121                 VM_BUG_ON(ret);
 122                 ret = page_counter_set_max(
 123                         hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx),
 124                         limit);
 125                 VM_BUG_ON(ret);
 126         }
 127 }
 128
 129 static struct cgroup_subsys_state *
 130 hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 131 {
 132         struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css);
 133         struct hugetlb_cgroup *h_cgroup;
 134
 135         h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL);
 136         if (!h_cgroup)
 137                 return ERR_PTR(-ENOMEM);
 138
 139         if (!parent_h_cgroup)
 140                 root_h_cgroup = h_cgroup;
 141
 142         hugetlb_cgroup_init(h_cgroup, parent_h_cgroup);
 143         return &h_cgroup->css;
 144 }
 145
 146 static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
 147 {
 148         struct hugetlb_cgroup *h_cgroup;
 149
 150         h_cgroup = hugetlb_cgroup_from_css(css);
 151         kfree(h_cgroup);
 152 }
 153
 154 /*
 155  * Should be called with hugetlb_lock held.
 156  * Since we are holding hugetlb_lock, pages cannot get moved from
 157  * active list or uncharged from the cgroup, So no need to get
 158  * page reference and test for page active here. This function
 159  * cannot fail.
 160  */
 161 static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
 162                                        struct page *page)
 163 {
 164         unsigned int nr_pages;
 165         struct page_counter *counter;
 166         struct hugetlb_cgroup *page_hcg;
 167         struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
 168
 169         page_hcg = hugetlb_cgroup_from_page(page);
 170         /*
 171          * We can have pages in active list without any cgroup
 172          * ie, hugepage with less than 3 pages. We can safely
 173          * ignore those pages.
 174          */
 175         if (!page_hcg || page_hcg != h_cg)
 176                 goto out;
 177
 178         nr_pages = compound_nr(page);
 179         if (!parent) {
 180                 parent = root_h_cgroup;
 181                 /* root has no limit */
 182                 page_counter_charge(&parent->hugepage[idx], nr_pages);
 183         }
 184         counter = &h_cg->hugepage[idx];
 185         /* Take the pages off the local counter */
 186         page_counter_cancel(counter, nr_pages);
 187
 188         set_hugetlb_cgroup(page, parent);
 189 out:
 190         return;
 191 }
 192
 193 /*
 194  * Force the hugetlb cgroup to empty the hugetlb resources by moving them to
 195  * the parent cgroup.
 196  */
 197 static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
 198 {
 199         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
 200         struct hstate *h;
 201         struct page *page;
 202         int idx;
 203
 204         do {
 205                 idx = 0;
 206                 for_each_hstate(h) {
 207                         spin_lock(&hugetlb_lock);
 208                         list_for_each_entry(page, &h->hugepage_activelist, lru)
 209                                 hugetlb_cgroup_move_parent(idx, h_cg, page);
 210
 211                         spin_unlock(&hugetlb_lock);
 212                         idx++;
 213                 }
 214                 cond_resched();
 215         } while (hugetlb_cgroup_have_usage(h_cg));
 216 }
 217
 218 static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx,
 219                                  enum hugetlb_memory_event event)
 220 {
 221         atomic_long_inc(&hugetlb->events_local[idx][event]);
 222         cgroup_file_notify(&hugetlb->events_local_file[idx]);
 223
 224         do {
 225                 atomic_long_inc(&hugetlb->events[idx][event]);
 226                 cgroup_file_notify(&hugetlb->events_file[idx]);
 227         } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) &&
 228                  !hugetlb_cgroup_is_root(hugetlb));
 229 }
 230
 231 static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 232                                           struct hugetlb_cgroup **ptr,
 233                                           bool rsvd)
 234 {
 235         int ret = 0;
 236         struct page_counter *counter;
 237         struct hugetlb_cgroup *h_cg = NULL;
 238
 239         if (hugetlb_cgroup_disabled())
 240                 goto done;
 241         /*
 242          * We don't charge any cgroup if the compound page have less
 243          * than 3 pages.
 244          */
 245         if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
 246                 goto done;
 247 again:
 248         rcu_read_lock();
 249         h_cg = hugetlb_cgroup_from_task(current);
 250         if (!css_tryget(&h_cg->css)) {
 251                 rcu_read_unlock();
 252                 goto again;
 253         }
 254         rcu_read_unlock();
 255
 256         if (!page_counter_try_charge(
 257                     __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
 258                     nr_pages, &counter)) {
 259                 ret = -ENOMEM;
 260                 hugetlb_event(h_cg, idx, HUGETLB_MAX);
 261                 css_put(&h_cg->css);
 262                 goto done;
 263         }
 264         /* Reservations take a reference to the css because they do not get
 265          * reparented.
 266          */
 267         if (!rsvd)
 268                 css_put(&h_cg->css);
 269 done:
 270         *ptr = h_cg;
 271         return ret;
 272 }
 273
 274 int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 275                                  struct hugetlb_cgroup **ptr)
 276 {
 277         return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false);
 278 }
 279
 280 int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
 281                                       struct hugetlb_cgroup **ptr)
 282 {
 283         return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true);
 284 }
 285
 286 /* Should be called with hugetlb_lock held */
 287 static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
 288                                            struct hugetlb_cgroup *h_cg,
 289                                            struct page *page, bool rsvd)
 290 {
 291         if (hugetlb_cgroup_disabled() || !h_cg)
 292                 return;
 293
 294         __set_hugetlb_cgroup(page, h_cg, rsvd);
 295         return;
 296 }
 297
 298 void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
 299                                   struct hugetlb_cgroup *h_cg,
 300                                   struct page *page)
 301 {
 302         __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, false);
 303 }
 304
 305 void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
 306                                        struct hugetlb_cgroup *h_cg,
 307                                        struct page *page)
 308 {
 309         __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, true);
 310 }
 311
 312 /*
 313  * Should be called with hugetlb_lock held
 314  */
 315 static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 316                                            struct page *page, bool rsvd)
 317 {
 318         struct hugetlb_cgroup *h_cg;
 319
 320         if (hugetlb_cgroup_disabled())
 321                 return;
 322         lockdep_assert_held(&hugetlb_lock);
 323         h_cg = __hugetlb_cgroup_from_page(page, rsvd);
 324         if (unlikely(!h_cg))
 325                 return;
 326         __set_hugetlb_cgroup(page, NULL, rsvd);
 327
 328         page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
 329                                                                    rsvd),
 330                               nr_pages);
 331
 332         if (rsvd)
 333                 css_put(&h_cg->css);
 334
 335         return;
 336 }
 337
 338 void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 339                                   struct page *page)
 340 {
 341         __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, false);
 342 }
 343
 344 void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages,
 345                                        struct page *page)
 346 {
 347         __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, true);
 348 }
 349
 350 static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 351                                              struct hugetlb_cgroup *h_cg,
 352                                              bool rsvd)
 353 {
 354         if (hugetlb_cgroup_disabled() || !h_cg)
 355                 return;
 356
 357         if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
 358                 return;
 359
 360         page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
 361                                                                    rsvd),
 362                               nr_pages);
 363
 364         if (rsvd)
 365                 css_put(&h_cg->css);
 366 }
 367
 368 void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 369                                     struct hugetlb_cgroup *h_cg)
 370 {
 371         __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false);
 372 }
 373
 374 void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages,
 375                                          struct hugetlb_cgroup *h_cg)
 376 {
 377         __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true);
 378 }
 379
 380 void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start,
 381                                      unsigned long end)
 382 {
 383         if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter ||
 384             !resv->css)
 385                 return;
 386
 387         page_counter_uncharge(resv->reservation_counter,
 388                               (end - start) * resv->pages_per_hpage);
 389         css_put(resv->css);
 390 }
 391
 392 void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
 393                                          struct file_region *rg,
 394                                          unsigned long nr_pages)
 395 {
 396         if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages)
 397                 return;
 398
 399         if (rg->reservation_counter && resv->pages_per_hpage && nr_pages > 0 &&
 400             !resv->reservation_counter) {
 401                 page_counter_uncharge(rg->reservation_counter,
 402                                       nr_pages * resv->pages_per_hpage);
 403                 css_put(rg->css);
 404         }
 405 }
 406
 407 enum {
 408         RES_USAGE,
 409         RES_RSVD_USAGE,
 410         RES_LIMIT,
 411         RES_RSVD_LIMIT,
 412         RES_MAX_USAGE,
 413         RES_RSVD_MAX_USAGE,
 414         RES_FAILCNT,
 415         RES_RSVD_FAILCNT,
 416 };
 417
 418 static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
 419                                    struct cftype *cft)
 420 {
 421         struct page_counter *counter;
 422         struct page_counter *rsvd_counter;
 423         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
 424
 425         counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)];
 426         rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)];
 427
 428         switch (MEMFILE_ATTR(cft->private)) {
 429         case RES_USAGE:
 430                 return (u64)page_counter_read(counter) * PAGE_SIZE;
 431         case RES_RSVD_USAGE:
 432                 return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE;
 433         case RES_LIMIT:
 434                 return (u64)counter->max * PAGE_SIZE;
 435         case RES_RSVD_LIMIT:
 436                 return (u64)rsvd_counter->max * PAGE_SIZE;
 437         case RES_MAX_USAGE:
 438                 return (u64)counter->watermark * PAGE_SIZE;
 439         case RES_RSVD_MAX_USAGE:
 440                 return (u64)rsvd_counter->watermark * PAGE_SIZE;
 441         case RES_FAILCNT:
 442                 return counter->failcnt;
 443         case RES_RSVD_FAILCNT:
 444                 return rsvd_counter->failcnt;
 445         default:
 446                 BUG();
 447         }
 448 }
 449
 450 static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v)
 451 {
 452         int idx;
 453         u64 val;
 454         struct cftype *cft = seq_cft(seq);
 455         unsigned long limit;
 456         struct page_counter *counter;
 457         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
 458
 459         idx = MEMFILE_IDX(cft->private);
 460         counter = &h_cg->hugepage[idx];
 461
 462         limit = round_down(PAGE_COUNTER_MAX,
 463                            1 << huge_page_order(&hstates[idx]));
 464
 465         switch (MEMFILE_ATTR(cft->private)) {
 466         case RES_RSVD_USAGE:
 467                 counter = &h_cg->rsvd_hugepage[idx];
 468                 fallthrough;
 469         case RES_USAGE:
 470                 val = (u64)page_counter_read(counter);
 471                 seq_printf(seq, "%llu\n", val * PAGE_SIZE);
 472                 break;
 473         case RES_RSVD_LIMIT:
 474                 counter = &h_cg->rsvd_hugepage[idx];
 475                 fallthrough;
 476         case RES_LIMIT:
 477                 val = (u64)counter->max;
 478                 if (val == limit)
 479                         seq_puts(seq, "max\n");
 480                 else
 481                         seq_printf(seq, "%llu\n", val * PAGE_SIZE);
 482                 break;
 483         default:
 484                 BUG();
 485         }
 486
 487         return 0;
 488 }
 489
 490 static DEFINE_MUTEX(hugetlb_limit_mutex);
 491
 492 static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
 493                                     char *buf, size_t nbytes, loff_t off,
 494                                     const char *max)
 495 {
 496         int ret, idx;
 497         unsigned long nr_pages;
 498         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
 499         bool rsvd = false;
 500
 501         if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */
 502                 return -EINVAL;
 503
 504         buf = strstrip(buf);
 505         ret = page_counter_memparse(buf, max, &nr_pages);
 506         if (ret)
 507                 return ret;
 508
 509         idx = MEMFILE_IDX(of_cft(of)->private);
 510         nr_pages = round_down(nr_pages, 1 << huge_page_order(&hstates[idx]));
 511
 512         switch (MEMFILE_ATTR(of_cft(of)->private)) {
 513         case RES_RSVD_LIMIT:
 514                 rsvd = true;
 515                 fallthrough;
 516         case RES_LIMIT:
 517                 mutex_lock(&hugetlb_limit_mutex);
 518                 ret = page_counter_set_max(
 519                         __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
 520                         nr_pages);
 521                 mutex_unlock(&hugetlb_limit_mutex);
 522                 break;
 523         default:
 524                 ret = -EINVAL;
 525                 break;
 526         }
 527         return ret ?: nbytes;
 528 }
 529
 530 static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of,
 531                                            char *buf, size_t nbytes, loff_t off)
 532 {
 533         return hugetlb_cgroup_write(of, buf, nbytes, off, "-1");
 534 }
 535
 536 static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of,
 537                                         char *buf, size_t nbytes, loff_t off)
 538 {
 539         return hugetlb_cgroup_write(of, buf, nbytes, off, "max");
 540 }
 541
 542 static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
 543                                     char *buf, size_t nbytes, loff_t off)
 544 {
 545         int ret = 0;
 546         struct page_counter *counter, *rsvd_counter;
 547         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
 548
 549         counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)];
 550         rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)];
 551
 552         switch (MEMFILE_ATTR(of_cft(of)->private)) {
 553         case RES_MAX_USAGE:
 554                 page_counter_reset_watermark(counter);
 555                 break;
 556         case RES_RSVD_MAX_USAGE:
 557                 page_counter_reset_watermark(rsvd_counter);
 558                 break;
 559         case RES_FAILCNT:
 560                 counter->failcnt = 0;
 561                 break;
 562         case RES_RSVD_FAILCNT:
 563                 rsvd_counter->failcnt = 0;
 564                 break;
 565         default:
 566                 ret = -EINVAL;
 567                 break;
 568         }
 569         return ret ?: nbytes;
 570 }
 571
 572 static char *mem_fmt(char *buf, int size, unsigned long hsize)
 573 {
 574         if (hsize >= (1UL << 30))
 575                 snprintf(buf, size, "%luGB", hsize >> 30);
 576         else if (hsize >= (1UL << 20))
 577                 snprintf(buf, size, "%luMB", hsize >> 20);
 578         else
 579                 snprintf(buf, size, "%luKB", hsize >> 10);
 580         return buf;
 581 }
 582
 583 static int __hugetlb_events_show(struct seq_file *seq, bool local)
 584 {
 585         int idx;
 586         long max;
 587         struct cftype *cft = seq_cft(seq);
 588         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
 589
 590         idx = MEMFILE_IDX(cft->private);
 591
 592         if (local)
 593                 max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]);
 594         else
 595                 max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]);
 596
 597         seq_printf(seq, "max %lu\n", max);
 598
 599         return 0;
 600 }
 601
 602 static int hugetlb_events_show(struct seq_file *seq, void *v)
 603 {
 604         return __hugetlb_events_show(seq, false);
 605 }
 606
 607 static int hugetlb_events_local_show(struct seq_file *seq, void *v)
 608 {
 609         return __hugetlb_events_show(seq, true);
 610 }
 611
 612 static void __init __hugetlb_cgroup_file_dfl_init(int idx)
 613 {
 614         char buf[32];
 615         struct cftype *cft;
 616         struct hstate *h = &hstates[idx];
 617
 618         /* format the size */
 619         mem_fmt(buf, sizeof(buf), huge_page_size(h));
 620
 621         /* Add the limit file */
 622         cft = &h->cgroup_files_dfl[0];
 623         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf);
 624         cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
 625         cft->seq_show = hugetlb_cgroup_read_u64_max;
 626         cft->write = hugetlb_cgroup_write_dfl;
 627         cft->flags = CFTYPE_NOT_ON_ROOT;
 628
 629         /* Add the reservation limit file */
 630         cft = &h->cgroup_files_dfl[1];
 631         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max", buf);
 632         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT);
 633         cft->seq_show = hugetlb_cgroup_read_u64_max;
 634         cft->write = hugetlb_cgroup_write_dfl;
 635         cft->flags = CFTYPE_NOT_ON_ROOT;
 636
 637         /* Add the current usage file */
 638         cft = &h->cgroup_files_dfl[2];
 639         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf);
 640         cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
 641         cft->seq_show = hugetlb_cgroup_read_u64_max;
 642         cft->flags = CFTYPE_NOT_ON_ROOT;
 643
 644         /* Add the current reservation usage file */
 645         cft = &h->cgroup_files_dfl[3];
 646         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.current", buf);
 647         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE);
 648         cft->seq_show = hugetlb_cgroup_read_u64_max;
 649         cft->flags = CFTYPE_NOT_ON_ROOT;
 650
 651         /* Add the events file */
 652         cft = &h->cgroup_files_dfl[4];
 653         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf);
 654         cft->private = MEMFILE_PRIVATE(idx, 0);
 655         cft->seq_show = hugetlb_events_show;
 656         cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]);
 657         cft->flags = CFTYPE_NOT_ON_ROOT;
 658
 659         /* Add the events.local file */
 660         cft = &h->cgroup_files_dfl[5];
 661         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf);
 662         cft->private = MEMFILE_PRIVATE(idx, 0);
 663         cft->seq_show = hugetlb_events_local_show;
 664         cft->file_offset = offsetof(struct hugetlb_cgroup,
 665                                     events_local_file[idx]);
 666         cft->flags = CFTYPE_NOT_ON_ROOT;
 667
 668         /* NULL terminate the last cft */
 669         cft = &h->cgroup_files_dfl[6];
 670         memset(cft, 0, sizeof(*cft));
 671
 672         WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys,
 673                                        h->cgroup_files_dfl));
 674 }
 675
 676 static void __init __hugetlb_cgroup_file_legacy_init(int idx)
 677 {
 678         char buf[32];
 679         struct cftype *cft;
 680         struct hstate *h = &hstates[idx];
 681
 682         /* format the size */
 683         mem_fmt(buf, sizeof(buf), huge_page_size(h));
 684
 685         /* Add the limit file */
 686         cft = &h->cgroup_files_legacy[0];
 687         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf);
 688         cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
 689         cft->read_u64 = hugetlb_cgroup_read_u64;
 690         cft->write = hugetlb_cgroup_write_legacy;
 691
 692         /* Add the reservation limit file */
 693         cft = &h->cgroup_files_legacy[1];
 694         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.limit_in_bytes", buf);
 695         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT);
 696         cft->read_u64 = hugetlb_cgroup_read_u64;
 697         cft->write = hugetlb_cgroup_write_legacy;
 698
 699         /* Add the usage file */
 700         cft = &h->cgroup_files_legacy[2];
 701         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf);
 702         cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
 703         cft->read_u64 = hugetlb_cgroup_read_u64;
 704
 705         /* Add the reservation usage file */
 706         cft = &h->cgroup_files_legacy[3];
 707         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.usage_in_bytes", buf);
 708         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE);
 709         cft->read_u64 = hugetlb_cgroup_read_u64;
 710
 711         /* Add the MAX usage file */
 712         cft = &h->cgroup_files_legacy[4];
 713         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf);
 714         cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE);
 715         cft->write = hugetlb_cgroup_reset;
 716         cft->read_u64 = hugetlb_cgroup_read_u64;
 717
 718         /* Add the MAX reservation usage file */
 719         cft = &h->cgroup_files_legacy[5];
 720         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max_usage_in_bytes", buf);
 721         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_MAX_USAGE);
 722         cft->write = hugetlb_cgroup_reset;
 723         cft->read_u64 = hugetlb_cgroup_read_u64;
 724
 725         /* Add the failcntfile */
 726         cft = &h->cgroup_files_legacy[6];
 727         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf);
 728         cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT);
 729         cft->write = hugetlb_cgroup_reset;
 730         cft->read_u64 = hugetlb_cgroup_read_u64;
 731
 732         /* Add the reservation failcntfile */
 733         cft = &h->cgroup_files_legacy[7];
 734         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.failcnt", buf);
 735         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_FAILCNT);
 736         cft->write = hugetlb_cgroup_reset;
 737         cft->read_u64 = hugetlb_cgroup_read_u64;
 738
 739         /* NULL terminate the last cft */
 740         cft = &h->cgroup_files_legacy[8];
 741         memset(cft, 0, sizeof(*cft));
 742
 743         WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys,
 744                                           h->cgroup_files_legacy));
 745 }
 746
 747 static void __init __hugetlb_cgroup_file_init(int idx)
 748 {
 749         __hugetlb_cgroup_file_dfl_init(idx);
 750         __hugetlb_cgroup_file_legacy_init(idx);
 751 }
 752
 753 void __init hugetlb_cgroup_file_init(void)
 754 {
 755         struct hstate *h;
 756
 757         for_each_hstate(h) {
 758                 /*
 759                  * Add cgroup control files only if the huge page consists
 760                  * of more than two normal pages. This is because we use
 761                  * page[2].private for storing cgroup details.
 762                  */
 763                 if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER)
 764                         __hugetlb_cgroup_file_init(hstate_index(h));
 765         }
 766 }
 767
 768 /*
 769  * hugetlb_lock will make sure a parallel cgroup rmdir won't happen
 770  * when we migrate hugepages
 771  */
 772 void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
 773 {
 774         struct hugetlb_cgroup *h_cg;
 775         struct hugetlb_cgroup *h_cg_rsvd;
 776         struct hstate *h = page_hstate(oldhpage);
 777
 778         if (hugetlb_cgroup_disabled())
 779                 return;
 780
 781         VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage);
 782         spin_lock(&hugetlb_lock);
 783         h_cg = hugetlb_cgroup_from_page(oldhpage);
 784         h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage);
 785         set_hugetlb_cgroup(oldhpage, NULL);
 786         set_hugetlb_cgroup_rsvd(oldhpage, NULL);
 787
 788         /* move the h_cg details to new cgroup */
 789         set_hugetlb_cgroup(newhpage, h_cg);
 790         set_hugetlb_cgroup_rsvd(newhpage, h_cg_rsvd);
 791         list_move(&newhpage->lru, &h->hugepage_activelist);
 792         spin_unlock(&hugetlb_lock);
 793         return;
 794 }
 795
 796 static struct cftype hugetlb_files[] = {
 797         {} /* terminate */
 798 };
 799
 800 struct cgroup_subsys hugetlb_cgrp_subsys = {
 801         .css_alloc      = hugetlb_cgroup_css_alloc,
 802         .css_offline    = hugetlb_cgroup_css_offline,
 803         .css_free       = hugetlb_cgroup_css_free,
 804         .dfl_cftypes    = hugetlb_files,
 805         .legacy_cftypes = hugetlb_files,
 806 };