include/linux/memcontrol.h

   1 /* memcontrol.h - Memory Controller
   2  *
   3  * Copyright IBM Corporation, 2007
   4  * Author Balbir Singh <balbir@linux.vnet.ibm.com>
   5  *
   6  * Copyright 2007 OpenVZ SWsoft Inc
   7  * Author: Pavel Emelianov <xemul@openvz.org>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  */
  19
  20 #ifndef _LINUX_MEMCONTROL_H
  21 #define _LINUX_MEMCONTROL_H
  22 #include <linux/cgroup.h>
  23 #include <linux/vm_event_item.h>
  24 #include <linux/hardirq.h>
  25 #include <linux/jump_label.h>
  26 #include <linux/page_counter.h>
  27 #include <linux/vmpressure.h>
  28 #include <linux/eventfd.h>
  29 #include <linux/mm.h>
  30 #include <linux/vmstat.h>
  31 #include <linux/writeback.h>
  32 #include <linux/page-flags.h>
  33
  34 struct mem_cgroup;
  35 struct page;
  36 struct mm_struct;
  37 struct kmem_cache;
  38
  39 /* Cgroup-specific page state, on top of universal node page state */
  40 enum memcg_stat_item {
  41         MEMCG_CACHE = NR_VM_NODE_STAT_ITEMS,
  42         MEMCG_RSS,
  43         MEMCG_RSS_HUGE,
  44         MEMCG_SWAP,
  45         MEMCG_SOCK,
  46         /* XXX: why are these zone and not node counters? */
  47         MEMCG_KERNEL_STACK_KB,
  48         MEMCG_NR_STAT,
  49 };
  50
  51 enum memcg_memory_event {
  52         MEMCG_LOW,
  53         MEMCG_HIGH,
  54         MEMCG_MAX,
  55         MEMCG_OOM,
  56         MEMCG_NR_MEMORY_EVENTS,
  57 };
  58
  59 struct mem_cgroup_reclaim_cookie {
  60         pg_data_t *pgdat;
  61         int priority;
  62         unsigned int generation;
  63 };
  64
  65 #ifdef CONFIG_MEMCG
  66
  67 #define MEM_CGROUP_ID_SHIFT     16
  68 #define MEM_CGROUP_ID_MAX       USHRT_MAX
  69
  70 struct mem_cgroup_id {
  71         int id;
  72         atomic_t ref;
  73 };
  74
  75 /*
  76  * Per memcg event counter is incremented at every pagein/pageout. With THP,
  77  * it will be incremated by the number of pages. This counter is used for
  78  * for trigger some periodic events. This is straightforward and better
  79  * than using jiffies etc. to handle periodic memcg event.
  80  */
  81 enum mem_cgroup_events_target {
  82         MEM_CGROUP_TARGET_THRESH,
  83         MEM_CGROUP_TARGET_SOFTLIMIT,
  84         MEM_CGROUP_TARGET_NUMAINFO,
  85         MEM_CGROUP_NTARGETS,
  86 };
  87
  88 struct mem_cgroup_stat_cpu {
  89         long count[MEMCG_NR_STAT];
  90         unsigned long events[NR_VM_EVENT_ITEMS];
  91         unsigned long nr_page_events;
  92         unsigned long targets[MEM_CGROUP_NTARGETS];
  93 };
  94
  95 struct mem_cgroup_reclaim_iter {
  96         struct mem_cgroup *position;
  97         /* scan generation, increased every round-trip */
  98         unsigned int generation;
  99 };
 100
 101 struct lruvec_stat {
 102         long count[NR_VM_NODE_STAT_ITEMS];
 103 };
 104
 105 /*
 106  * per-zone information in memory controller.
 107  */
 108 struct mem_cgroup_per_node {
 109         struct lruvec           lruvec;
 110
 111         struct lruvec_stat __percpu *lruvec_stat_cpu;
 112         atomic_long_t           lruvec_stat[NR_VM_NODE_STAT_ITEMS];
 113
 114         unsigned long           lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
 115
 116         struct mem_cgroup_reclaim_iter  iter[DEF_PRIORITY + 1];
 117
 118         struct rb_node          tree_node;      /* RB tree node */
 119         unsigned long           usage_in_excess;/* Set to the value by which */
 120                                                 /* the soft limit is exceeded*/
 121         bool                    on_tree;
 122         bool                    congested;      /* memcg has many dirty pages */
 123                                                 /* backed by a congested BDI */
 124
 125         struct mem_cgroup       *memcg;         /* Back pointer, we cannot */
 126                                                 /* use container_of        */
 127 };
 128
 129 struct mem_cgroup_threshold {
 130         struct eventfd_ctx *eventfd;
 131         unsigned long threshold;
 132 };
 133
 134 /* For threshold */
 135 struct mem_cgroup_threshold_ary {
 136         /* An array index points to threshold just below or equal to usage. */
 137         int current_threshold;
 138         /* Size of entries[] */
 139         unsigned int size;
 140         /* Array of thresholds */
 141         struct mem_cgroup_threshold entries[0];
 142 };
 143
 144 struct mem_cgroup_thresholds {
 145         /* Primary thresholds array */
 146         struct mem_cgroup_threshold_ary *primary;
 147         /*
 148          * Spare threshold array.
 149          * This is needed to make mem_cgroup_unregister_event() "never fail".
 150          * It must be able to store at least primary->size - 1 entries.
 151          */
 152         struct mem_cgroup_threshold_ary *spare;
 153 };
 154
 155 enum memcg_kmem_state {
 156         KMEM_NONE,
 157         KMEM_ALLOCATED,
 158         KMEM_ONLINE,
 159 };
 160
 161 /*
 162  * The memory controller data structure. The memory controller controls both
 163  * page cache and RSS per cgroup. We would eventually like to provide
 164  * statistics based on the statistics developed by Rik Van Riel for clock-pro,
 165  * to help the administrator determine what knobs to tune.
 166  */
 167 struct mem_cgroup {
 168         struct cgroup_subsys_state css;
 169
 170         /* Private memcg ID. Used to ID objects that outlive the cgroup */
 171         struct mem_cgroup_id id;
 172
 173         /* Accounted resources */
 174         struct page_counter memory;
 175         struct page_counter swap;
 176
 177         /* Legacy consumer-oriented counters */
 178         struct page_counter memsw;
 179         struct page_counter kmem;
 180         struct page_counter tcpmem;
 181
 182         /* Normal memory consumption range */
 183         unsigned long low;
 184         unsigned long high;
 185
 186         /* Range enforcement for interrupt charges */
 187         struct work_struct high_work;
 188
 189         unsigned long soft_limit;
 190
 191         /* vmpressure notifications */
 192         struct vmpressure vmpressure;
 193
 194         /*
 195          * Should the accounting and control be hierarchical, per subtree?
 196          */
 197         bool use_hierarchy;
 198
 199         /* protected by memcg_oom_lock */
 200         bool            oom_lock;
 201         int             under_oom;
 202
 203         int     swappiness;
 204         /* OOM-Killer disable */
 205         int             oom_kill_disable;
 206
 207         /* memory.events */
 208         atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
 209         struct cgroup_file events_file;
 210
 211         /* protect arrays of thresholds */
 212         struct mutex thresholds_lock;
 213
 214         /* thresholds for memory usage. RCU-protected */
 215         struct mem_cgroup_thresholds thresholds;
 216
 217         /* thresholds for mem+swap usage. RCU-protected */
 218         struct mem_cgroup_thresholds memsw_thresholds;
 219
 220         /* For oom notifier event fd */
 221         struct list_head oom_notify;
 222
 223         /*
 224          * Should we move charges of a task when a task is moved into this
 225          * mem_cgroup ? And what type of charges should we move ?
 226          */
 227         unsigned long move_charge_at_immigrate;
 228         /*
 229          * set > 0 if pages under this cgroup are moving to other cgroup.
 230          */
 231         atomic_t                moving_account;
 232         /* taken only while moving_account > 0 */
 233         spinlock_t              move_lock;
 234         struct task_struct      *move_lock_task;
 235         unsigned long           move_lock_flags;
 236
 237         /* memory.stat */
 238         struct mem_cgroup_stat_cpu __percpu *stat_cpu;
 239         atomic_long_t           stat[MEMCG_NR_STAT];
 240         atomic_long_t           events[NR_VM_EVENT_ITEMS];
 241
 242         unsigned long           socket_pressure;
 243
 244         /* Legacy tcp memory accounting */
 245         bool                    tcpmem_active;
 246         int                     tcpmem_pressure;
 247
 248 #ifndef CONFIG_SLOB
 249         /* Index in the kmem_cache->memcg_params.memcg_caches array */
 250         int kmemcg_id;
 251         enum memcg_kmem_state kmem_state;
 252         struct list_head kmem_caches;
 253 #endif
 254
 255         int last_scanned_node;
 256 #if MAX_NUMNODES > 1
 257         nodemask_t      scan_nodes;
 258         atomic_t        numainfo_events;
 259         atomic_t        numainfo_updating;
 260 #endif
 261
 262 #ifdef CONFIG_CGROUP_WRITEBACK
 263         struct list_head cgwb_list;
 264         struct wb_domain cgwb_domain;
 265 #endif
 266
 267         /* List of events which userspace want to receive */
 268         struct list_head event_list;
 269         spinlock_t event_list_lock;
 270
 271         struct mem_cgroup_per_node *nodeinfo[0];
 272         /* WARNING: nodeinfo must be the last member here */
 273 };
 274
 275 /*
 276  * size of first charge trial. "32" comes from vmscan.c's magic value.
 277  * TODO: maybe necessary to use big numbers in big irons.
 278  */
 279 #define MEMCG_CHARGE_BATCH 32U
 280
 281 extern struct mem_cgroup *root_mem_cgroup;
 282
 283 static inline bool mem_cgroup_disabled(void)
 284 {
 285         return !cgroup_subsys_enabled(memory_cgrp_subsys);
 286 }
 287
 288 bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg);
 289
 290 int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
 291                           gfp_t gfp_mask, struct mem_cgroup **memcgp,
 292                           bool compound);
 293 void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
 294                               bool lrucare, bool compound);
 295 void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg,
 296                 bool compound);
 297 void mem_cgroup_uncharge(struct page *page);
 298 void mem_cgroup_uncharge_list(struct list_head *page_list);
 299
 300 void mem_cgroup_migrate(struct page *oldpage, struct page *newpage);
 301
 302 static struct mem_cgroup_per_node *
 303 mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid)
 304 {
 305         return memcg->nodeinfo[nid];
 306 }
 307
 308 /**
 309  * mem_cgroup_lruvec - get the lru list vector for a node or a memcg zone
 310  * @node: node of the wanted lruvec
 311  * @memcg: memcg of the wanted lruvec
 312  *
 313  * Returns the lru list vector holding pages for a given @node or a given
 314  * @memcg and @zone. This can be the node lruvec, if the memory controller
 315  * is disabled.
 316  */
 317 static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
 318                                 struct mem_cgroup *memcg)
 319 {
 320         struct mem_cgroup_per_node *mz;
 321         struct lruvec *lruvec;
 322
 323         if (mem_cgroup_disabled()) {
 324                 lruvec = node_lruvec(pgdat);
 325                 goto out;
 326         }
 327
 328         mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
 329         lruvec = &mz->lruvec;
 330 out:
 331         /*
 332          * Since a node can be onlined after the mem_cgroup was created,
 333          * we have to be prepared to initialize lruvec->pgdat here;
 334          * and if offlined then reonlined, we need to reinitialize it.
 335          */
 336         if (unlikely(lruvec->pgdat != pgdat))
 337                 lruvec->pgdat = pgdat;
 338         return lruvec;
 339 }
 340
 341 struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *);
 342
 343 bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
 344 struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 345
 346 static inline
 347 struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
 348         return css ? container_of(css, struct mem_cgroup, css) : NULL;
 349 }
 350
 351 #define mem_cgroup_from_counter(counter, member)        \
 352         container_of(counter, struct mem_cgroup, member)
 353
 354 struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
 355                                    struct mem_cgroup *,
 356                                    struct mem_cgroup_reclaim_cookie *);
 357 void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
 358 int mem_cgroup_scan_tasks(struct mem_cgroup *,
 359                           int (*)(struct task_struct *, void *), void *);
 360
 361 static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 362 {
 363         if (mem_cgroup_disabled())
 364                 return 0;
 365
 366         return memcg->id.id;
 367 }
 368 struct mem_cgroup *mem_cgroup_from_id(unsigned short id);
 369
 370 static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
 371 {
 372         struct mem_cgroup_per_node *mz;
 373
 374         if (mem_cgroup_disabled())
 375                 return NULL;
 376
 377         mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 378         return mz->memcg;
 379 }
 380
 381 /**
 382  * parent_mem_cgroup - find the accounting parent of a memcg
 383  * @memcg: memcg whose parent to find
 384  *
 385  * Returns the parent memcg, or NULL if this is the root or the memory
 386  * controller is in legacy no-hierarchy mode.
 387  */
 388 static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
 389 {
 390         if (!memcg->memory.parent)
 391                 return NULL;
 392         return mem_cgroup_from_counter(memcg->memory.parent, memory);
 393 }
 394
 395 static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg,
 396                               struct mem_cgroup *root)
 397 {
 398         if (root == memcg)
 399                 return true;
 400         if (!root->use_hierarchy)
 401                 return false;
 402         return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup);
 403 }
 404
 405 static inline bool mm_match_cgroup(struct mm_struct *mm,
 406                                    struct mem_cgroup *memcg)
 407 {
 408         struct mem_cgroup *task_memcg;
 409         bool match = false;
 410
 411         rcu_read_lock();
 412         task_memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
 413         if (task_memcg)
 414                 match = mem_cgroup_is_descendant(task_memcg, memcg);
 415         rcu_read_unlock();
 416         return match;
 417 }
 418
 419 struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page);
 420 ino_t page_cgroup_ino(struct page *page);
 421
 422 static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
 423 {
 424         if (mem_cgroup_disabled())
 425                 return true;
 426         return !!(memcg->css.flags & CSS_ONLINE);
 427 }
 428
 429 /*
 430  * For memory reclaim.
 431  */
 432 int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
 433
 434 void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
 435                 int zid, int nr_pages);
 436
 437 unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
 438                                            int nid, unsigned int lru_mask);
 439
 440 static inline
 441 unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
 442 {
 443         struct mem_cgroup_per_node *mz;
 444         unsigned long nr_pages = 0;
 445         int zid;
 446
 447         mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 448         for (zid = 0; zid < MAX_NR_ZONES; zid++)
 449                 nr_pages += mz->lru_zone_size[zid][lru];
 450         return nr_pages;
 451 }
 452
 453 static inline
 454 unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
 455                 enum lru_list lru, int zone_idx)
 456 {
 457         struct mem_cgroup_per_node *mz;
 458
 459         mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 460         return mz->lru_zone_size[zone_idx][lru];
 461 }
 462
 463 void mem_cgroup_handle_over_high(void);
 464
 465 unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg);
 466
 467 void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 468                                 struct task_struct *p);
 469
 470 static inline void mem_cgroup_oom_enable(void)
 471 {
 472         WARN_ON(current->memcg_may_oom);
 473         current->memcg_may_oom = 1;
 474 }
 475
 476 static inline void mem_cgroup_oom_disable(void)
 477 {
 478         WARN_ON(!current->memcg_may_oom);
 479         current->memcg_may_oom = 0;
 480 }
 481
 482 static inline bool task_in_memcg_oom(struct task_struct *p)
 483 {
 484         return p->memcg_in_oom;
 485 }
 486
 487 bool mem_cgroup_oom_synchronize(bool wait);
 488
 489 #ifdef CONFIG_MEMCG_SWAP
 490 extern int do_swap_account;
 491 #endif
 492
 493 struct mem_cgroup *lock_page_memcg(struct page *page);
 494 void __unlock_page_memcg(struct mem_cgroup *memcg);
 495 void unlock_page_memcg(struct page *page);
 496
 497 /* idx can be of type enum memcg_stat_item or node_stat_item */
 498 static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
 499                                              int idx)
 500 {
 501         long x = atomic_long_read(&memcg->stat[idx]);
 502 #ifdef CONFIG_SMP
 503         if (x < 0)
 504                 x = 0;
 505 #endif
 506         return x;
 507 }
 508
 509 /* idx can be of type enum memcg_stat_item or node_stat_item */
 510 static inline void __mod_memcg_state(struct mem_cgroup *memcg,
 511                                      int idx, int val)
 512 {
 513         long x;
 514
 515         if (mem_cgroup_disabled())
 516                 return;
 517
 518         x = val + __this_cpu_read(memcg->stat_cpu->count[idx]);
 519         if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
 520                 atomic_long_add(x, &memcg->stat[idx]);
 521                 x = 0;
 522         }
 523         __this_cpu_write(memcg->stat_cpu->count[idx], x);
 524 }
 525
 526 /* idx can be of type enum memcg_stat_item or node_stat_item */
 527 static inline void mod_memcg_state(struct mem_cgroup *memcg,
 528                                    int idx, int val)
 529 {
 530         unsigned long flags;
 531
 532         local_irq_save(flags);
 533         __mod_memcg_state(memcg, idx, val);
 534         local_irq_restore(flags);
 535 }
 536
 537 /**
 538  * mod_memcg_page_state - update page state statistics
 539  * @page: the page
 540  * @idx: page state item to account
 541  * @val: number of pages (positive or negative)
 542  *
 543  * The @page must be locked or the caller must use lock_page_memcg()
 544  * to prevent double accounting when the page is concurrently being
 545  * moved to another memcg:
 546  *
 547  *   lock_page(page) or lock_page_memcg(page)
 548  *   if (TestClearPageState(page))
 549  *     mod_memcg_page_state(page, state, -1);
 550  *   unlock_page(page) or unlock_page_memcg(page)
 551  *
 552  * Kernel pages are an exception to this, since they'll never move.
 553  */
 554 static inline void __mod_memcg_page_state(struct page *page,
 555                                           int idx, int val)
 556 {
 557         if (page->mem_cgroup)
 558                 __mod_memcg_state(page->mem_cgroup, idx, val);
 559 }
 560
 561 static inline void mod_memcg_page_state(struct page *page,
 562                                         int idx, int val)
 563 {
 564         if (page->mem_cgroup)
 565                 mod_memcg_state(page->mem_cgroup, idx, val);
 566 }
 567
 568 static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
 569                                               enum node_stat_item idx)
 570 {
 571         struct mem_cgroup_per_node *pn;
 572         long x;
 573
 574         if (mem_cgroup_disabled())
 575                 return node_page_state(lruvec_pgdat(lruvec), idx);
 576
 577         pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 578         x = atomic_long_read(&pn->lruvec_stat[idx]);
 579 #ifdef CONFIG_SMP
 580         if (x < 0)
 581                 x = 0;
 582 #endif
 583         return x;
 584 }
 585
 586 static inline void __mod_lruvec_state(struct lruvec *lruvec,
 587                                       enum node_stat_item idx, int val)
 588 {
 589         struct mem_cgroup_per_node *pn;
 590         long x;
 591
 592         /* Update node */
 593         __mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
 594
 595         if (mem_cgroup_disabled())
 596                 return;
 597
 598         pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 599
 600         /* Update memcg */
 601         __mod_memcg_state(pn->memcg, idx, val);
 602
 603         /* Update lruvec */
 604         x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]);
 605         if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
 606                 atomic_long_add(x, &pn->lruvec_stat[idx]);
 607                 x = 0;
 608         }
 609         __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x);
 610 }
 611
 612 static inline void mod_lruvec_state(struct lruvec *lruvec,
 613                                     enum node_stat_item idx, int val)
 614 {
 615         unsigned long flags;
 616
 617         local_irq_save(flags);
 618         __mod_lruvec_state(lruvec, idx, val);
 619         local_irq_restore(flags);
 620 }
 621
 622 static inline void __mod_lruvec_page_state(struct page *page,
 623                                            enum node_stat_item idx, int val)
 624 {
 625         pg_data_t *pgdat = page_pgdat(page);
 626         struct lruvec *lruvec;
 627
 628         /* Untracked pages have no memcg, no lruvec. Update only the node */
 629         if (!page->mem_cgroup) {
 630                 __mod_node_page_state(pgdat, idx, val);
 631                 return;
 632         }
 633
 634         lruvec = mem_cgroup_lruvec(pgdat, page->mem_cgroup);
 635         __mod_lruvec_state(lruvec, idx, val);
 636 }
 637
 638 static inline void mod_lruvec_page_state(struct page *page,
 639                                          enum node_stat_item idx, int val)
 640 {
 641         unsigned long flags;
 642
 643         local_irq_save(flags);
 644         __mod_lruvec_page_state(page, idx, val);
 645         local_irq_restore(flags);
 646 }
 647
 648 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 649                                                 gfp_t gfp_mask,
 650                                                 unsigned long *total_scanned);
 651
 652 static inline void __count_memcg_events(struct mem_cgroup *memcg,
 653                                         enum vm_event_item idx,
 654                                         unsigned long count)
 655 {
 656         unsigned long x;
 657
 658         if (mem_cgroup_disabled())
 659                 return;
 660
 661         x = count + __this_cpu_read(memcg->stat_cpu->events[idx]);
 662         if (unlikely(x > MEMCG_CHARGE_BATCH)) {
 663                 atomic_long_add(x, &memcg->events[idx]);
 664                 x = 0;
 665         }
 666         __this_cpu_write(memcg->stat_cpu->events[idx], x);
 667 }
 668
 669 static inline void count_memcg_events(struct mem_cgroup *memcg,
 670                                       enum vm_event_item idx,
 671                                       unsigned long count)
 672 {
 673         unsigned long flags;
 674
 675         local_irq_save(flags);
 676         __count_memcg_events(memcg, idx, count);
 677         local_irq_restore(flags);
 678 }
 679
 680 static inline void count_memcg_page_event(struct page *page,
 681                                           enum vm_event_item idx)
 682 {
 683         if (page->mem_cgroup)
 684                 count_memcg_events(page->mem_cgroup, idx, 1);
 685 }
 686
 687 static inline void count_memcg_event_mm(struct mm_struct *mm,
 688                                         enum vm_event_item idx)
 689 {
 690         struct mem_cgroup *memcg;
 691
 692         if (mem_cgroup_disabled())
 693                 return;
 694
 695         rcu_read_lock();
 696         memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
 697         if (likely(memcg)) {
 698                 count_memcg_events(memcg, idx, 1);
 699                 if (idx == OOM_KILL)
 700                         cgroup_file_notify(&memcg->events_file);
 701         }
 702         rcu_read_unlock();
 703 }
 704
 705 static inline void memcg_memory_event(struct mem_cgroup *memcg,
 706                                       enum memcg_memory_event event)
 707 {
 708         atomic_long_inc(&memcg->memory_events[event]);
 709         cgroup_file_notify(&memcg->events_file);
 710 }
 711
 712 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 713 void mem_cgroup_split_huge_fixup(struct page *head);
 714 #endif
 715
 716 #else /* CONFIG_MEMCG */
 717
 718 #define MEM_CGROUP_ID_SHIFT     0
 719 #define MEM_CGROUP_ID_MAX       0
 720
 721 struct mem_cgroup;
 722
 723 static inline bool mem_cgroup_disabled(void)
 724 {
 725         return true;
 726 }
 727
 728 static inline void memcg_memory_event(struct mem_cgroup *memcg,
 729                                       enum memcg_memory_event event)
 730 {
 731 }
 732
 733 static inline bool mem_cgroup_low(struct mem_cgroup *root,
 734                                   struct mem_cgroup *memcg)
 735 {
 736         return false;
 737 }
 738
 739 static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
 740                                         gfp_t gfp_mask,
 741                                         struct mem_cgroup **memcgp,
 742                                         bool compound)
 743 {
 744         *memcgp = NULL;
 745         return 0;
 746 }
 747
 748 static inline void mem_cgroup_commit_charge(struct page *page,
 749                                             struct mem_cgroup *memcg,
 750                                             bool lrucare, bool compound)
 751 {
 752 }
 753
 754 static inline void mem_cgroup_cancel_charge(struct page *page,
 755                                             struct mem_cgroup *memcg,
 756                                             bool compound)
 757 {
 758 }
 759
 760 static inline void mem_cgroup_uncharge(struct page *page)
 761 {
 762 }
 763
 764 static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
 765 {
 766 }
 767
 768 static inline void mem_cgroup_migrate(struct page *old, struct page *new)
 769 {
 770 }
 771
 772 static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
 773                                 struct mem_cgroup *memcg)
 774 {
 775         return node_lruvec(pgdat);
 776 }
 777
 778 static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
 779                                                     struct pglist_data *pgdat)
 780 {
 781         return &pgdat->lruvec;
 782 }
 783
 784 static inline bool mm_match_cgroup(struct mm_struct *mm,
 785                 struct mem_cgroup *memcg)
 786 {
 787         return true;
 788 }
 789
 790 static inline bool task_in_mem_cgroup(struct task_struct *task,
 791                                       const struct mem_cgroup *memcg)
 792 {
 793         return true;
 794 }
 795
 796 static inline struct mem_cgroup *
 797 mem_cgroup_iter(struct mem_cgroup *root,
 798                 struct mem_cgroup *prev,
 799                 struct mem_cgroup_reclaim_cookie *reclaim)
 800 {
 801         return NULL;
 802 }
 803
 804 static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
 805                                          struct mem_cgroup *prev)
 806 {
 807 }
 808
 809 static inline int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
 810                 int (*fn)(struct task_struct *, void *), void *arg)
 811 {
 812         return 0;
 813 }
 814
 815 static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 816 {
 817         return 0;
 818 }
 819
 820 static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
 821 {
 822         WARN_ON_ONCE(id);
 823         /* XXX: This should always return root_mem_cgroup */
 824         return NULL;
 825 }
 826
 827 static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
 828 {
 829         return NULL;
 830 }
 831
 832 static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
 833 {
 834         return true;
 835 }
 836
 837 static inline unsigned long
 838 mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
 839 {
 840         return 0;
 841 }
 842 static inline
 843 unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
 844                 enum lru_list lru, int zone_idx)
 845 {
 846         return 0;
 847 }
 848
 849 static inline unsigned long
 850 mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
 851                              int nid, unsigned int lru_mask)
 852 {
 853         return 0;
 854 }
 855
 856 static inline unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
 857 {
 858         return 0;
 859 }
 860
 861 static inline void
 862 mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
 863 {
 864 }
 865
 866 static inline struct mem_cgroup *lock_page_memcg(struct page *page)
 867 {
 868         return NULL;
 869 }
 870
 871 static inline void __unlock_page_memcg(struct mem_cgroup *memcg)
 872 {
 873 }
 874
 875 static inline void unlock_page_memcg(struct page *page)
 876 {
 877 }
 878
 879 static inline void mem_cgroup_handle_over_high(void)
 880 {
 881 }
 882
 883 static inline void mem_cgroup_oom_enable(void)
 884 {
 885 }
 886
 887 static inline void mem_cgroup_oom_disable(void)
 888 {
 889 }
 890
 891 static inline bool task_in_memcg_oom(struct task_struct *p)
 892 {
 893         return false;
 894 }
 895
 896 static inline bool mem_cgroup_oom_synchronize(bool wait)
 897 {
 898         return false;
 899 }
 900
 901 static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
 902                                              int idx)
 903 {
 904         return 0;
 905 }
 906
 907 static inline void __mod_memcg_state(struct mem_cgroup *memcg,
 908                                      int idx,
 909                                      int nr)
 910 {
 911 }
 912
 913 static inline void mod_memcg_state(struct mem_cgroup *memcg,
 914                                    int idx,
 915                                    int nr)
 916 {
 917 }
 918
 919 static inline void __mod_memcg_page_state(struct page *page,
 920                                           int idx,
 921                                           int nr)
 922 {
 923 }
 924
 925 static inline void mod_memcg_page_state(struct page *page,
 926                                         int idx,
 927                                         int nr)
 928 {
 929 }
 930
 931 static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
 932                                               enum node_stat_item idx)
 933 {
 934         return node_page_state(lruvec_pgdat(lruvec), idx);
 935 }
 936
 937 static inline void __mod_lruvec_state(struct lruvec *lruvec,
 938                                       enum node_stat_item idx, int val)
 939 {
 940         __mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
 941 }
 942
 943 static inline void mod_lruvec_state(struct lruvec *lruvec,
 944                                     enum node_stat_item idx, int val)
 945 {
 946         mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
 947 }
 948
 949 static inline void __mod_lruvec_page_state(struct page *page,
 950                                            enum node_stat_item idx, int val)
 951 {
 952         __mod_node_page_state(page_pgdat(page), idx, val);
 953 }
 954
 955 static inline void mod_lruvec_page_state(struct page *page,
 956                                          enum node_stat_item idx, int val)
 957 {
 958         mod_node_page_state(page_pgdat(page), idx, val);
 959 }
 960
 961 static inline
 962 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 963                                             gfp_t gfp_mask,
 964                                             unsigned long *total_scanned)
 965 {
 966         return 0;
 967 }
 968
 969 static inline void mem_cgroup_split_huge_fixup(struct page *head)
 970 {
 971 }
 972
 973 static inline void count_memcg_events(struct mem_cgroup *memcg,
 974                                       enum vm_event_item idx,
 975                                       unsigned long count)
 976 {
 977 }
 978
 979 static inline void count_memcg_page_event(struct page *page,
 980                                           int idx)
 981 {
 982 }
 983
 984 static inline
 985 void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
 986 {
 987 }
 988 #endif /* CONFIG_MEMCG */
 989
 990 /* idx can be of type enum memcg_stat_item or node_stat_item */
 991 static inline void __inc_memcg_state(struct mem_cgroup *memcg,
 992                                      int idx)
 993 {
 994         __mod_memcg_state(memcg, idx, 1);
 995 }
 996
 997 /* idx can be of type enum memcg_stat_item or node_stat_item */
 998 static inline void __dec_memcg_state(struct mem_cgroup *memcg,
 999                                      int idx)
1000 {
1001         __mod_memcg_state(memcg, idx, -1);
1002 }
1003
1004 /* idx can be of type enum memcg_stat_item or node_stat_item */
1005 static inline void __inc_memcg_page_state(struct page *page,
1006                                           int idx)
1007 {
1008         __mod_memcg_page_state(page, idx, 1);
1009 }
1010
1011 /* idx can be of type enum memcg_stat_item or node_stat_item */
1012 static inline void __dec_memcg_page_state(struct page *page,
1013                                           int idx)
1014 {
1015         __mod_memcg_page_state(page, idx, -1);
1016 }
1017
1018 static inline void __inc_lruvec_state(struct lruvec *lruvec,
1019                                       enum node_stat_item idx)
1020 {
1021         __mod_lruvec_state(lruvec, idx, 1);
1022 }
1023
1024 static inline void __dec_lruvec_state(struct lruvec *lruvec,
1025                                       enum node_stat_item idx)
1026 {
1027         __mod_lruvec_state(lruvec, idx, -1);
1028 }
1029
1030 static inline void __inc_lruvec_page_state(struct page *page,
1031                                            enum node_stat_item idx)
1032 {
1033         __mod_lruvec_page_state(page, idx, 1);
1034 }
1035
1036 static inline void __dec_lruvec_page_state(struct page *page,
1037                                            enum node_stat_item idx)
1038 {
1039         __mod_lruvec_page_state(page, idx, -1);
1040 }
1041
1042 /* idx can be of type enum memcg_stat_item or node_stat_item */
1043 static inline void inc_memcg_state(struct mem_cgroup *memcg,
1044                                    int idx)
1045 {
1046         mod_memcg_state(memcg, idx, 1);
1047 }
1048
1049 /* idx can be of type enum memcg_stat_item or node_stat_item */
1050 static inline void dec_memcg_state(struct mem_cgroup *memcg,
1051                                    int idx)
1052 {
1053         mod_memcg_state(memcg, idx, -1);
1054 }
1055
1056 /* idx can be of type enum memcg_stat_item or node_stat_item */
1057 static inline void inc_memcg_page_state(struct page *page,
1058                                         int idx)
1059 {
1060         mod_memcg_page_state(page, idx, 1);
1061 }
1062
1063 /* idx can be of type enum memcg_stat_item or node_stat_item */
1064 static inline void dec_memcg_page_state(struct page *page,
1065                                         int idx)
1066 {
1067         mod_memcg_page_state(page, idx, -1);
1068 }
1069
1070 static inline void inc_lruvec_state(struct lruvec *lruvec,
1071                                     enum node_stat_item idx)
1072 {
1073         mod_lruvec_state(lruvec, idx, 1);
1074 }
1075
1076 static inline void dec_lruvec_state(struct lruvec *lruvec,
1077                                     enum node_stat_item idx)
1078 {
1079         mod_lruvec_state(lruvec, idx, -1);
1080 }
1081
1082 static inline void inc_lruvec_page_state(struct page *page,
1083                                          enum node_stat_item idx)
1084 {
1085         mod_lruvec_page_state(page, idx, 1);
1086 }
1087
1088 static inline void dec_lruvec_page_state(struct page *page,
1089                                          enum node_stat_item idx)
1090 {
1091         mod_lruvec_page_state(page, idx, -1);
1092 }
1093
1094 #ifdef CONFIG_CGROUP_WRITEBACK
1095
1096 struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg);
1097 struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb);
1098 void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
1099                          unsigned long *pheadroom, unsigned long *pdirty,
1100                          unsigned long *pwriteback);
1101
1102 #else   /* CONFIG_CGROUP_WRITEBACK */
1103
1104 static inline struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
1105 {
1106         return NULL;
1107 }
1108
1109 static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
1110                                        unsigned long *pfilepages,
1111                                        unsigned long *pheadroom,
1112                                        unsigned long *pdirty,
1113                                        unsigned long *pwriteback)
1114 {
1115 }
1116
1117 #endif  /* CONFIG_CGROUP_WRITEBACK */
1118
1119 struct sock;
1120 bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
1121 void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
1122 #ifdef CONFIG_MEMCG
1123 extern struct static_key_false memcg_sockets_enabled_key;
1124 #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key)
1125 void mem_cgroup_sk_alloc(struct sock *sk);
1126 void mem_cgroup_sk_free(struct sock *sk);
1127 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
1128 {
1129         if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->tcpmem_pressure)
1130                 return true;
1131         do {
1132                 if (time_before(jiffies, memcg->socket_pressure))
1133                         return true;
1134         } while ((memcg = parent_mem_cgroup(memcg)));
1135         return false;
1136 }
1137 #else
1138 #define mem_cgroup_sockets_enabled 0
1139 static inline void mem_cgroup_sk_alloc(struct sock *sk) { };
1140 static inline void mem_cgroup_sk_free(struct sock *sk) { };
1141 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
1142 {
1143         return false;
1144 }
1145 #endif
1146
1147 struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep);
1148 void memcg_kmem_put_cache(struct kmem_cache *cachep);
1149 int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
1150                             struct mem_cgroup *memcg);
1151 int memcg_kmem_charge(struct page *page, gfp_t gfp, int order);
1152 void memcg_kmem_uncharge(struct page *page, int order);
1153
1154 #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
1155 extern struct static_key_false memcg_kmem_enabled_key;
1156 extern struct workqueue_struct *memcg_kmem_cache_wq;
1157
1158 extern int memcg_nr_cache_ids;
1159 void memcg_get_cache_ids(void);
1160 void memcg_put_cache_ids(void);
1161
1162 /*
1163  * Helper macro to loop through all memcg-specific caches. Callers must still
1164  * check if the cache is valid (it is either valid or NULL).
1165  * the slab_mutex must be held when looping through those caches
1166  */
1167 #define for_each_memcg_cache_index(_idx)        \
1168         for ((_idx) = 0; (_idx) < memcg_nr_cache_ids; (_idx)++)
1169
1170 static inline bool memcg_kmem_enabled(void)
1171 {
1172         return static_branch_unlikely(&memcg_kmem_enabled_key);
1173 }
1174
1175 /*
1176  * helper for accessing a memcg's index. It will be used as an index in the
1177  * child cache array in kmem_cache, and also to derive its name. This function
1178  * will return -1 when this is not a kmem-limited memcg.
1179  */
1180 static inline int memcg_cache_id(struct mem_cgroup *memcg)
1181 {
1182         return memcg ? memcg->kmemcg_id : -1;
1183 }
1184
1185 #else
1186 #define for_each_memcg_cache_index(_idx)        \
1187         for (; NULL; )
1188
1189 static inline bool memcg_kmem_enabled(void)
1190 {
1191         return false;
1192 }
1193
1194 static inline int memcg_cache_id(struct mem_cgroup *memcg)
1195 {
1196         return -1;
1197 }
1198
1199 static inline void memcg_get_cache_ids(void)
1200 {
1201 }
1202
1203 static inline void memcg_put_cache_ids(void)
1204 {
1205 }
1206
1207 #endif /* CONFIG_MEMCG && !CONFIG_SLOB */
1208
1209 #endif /* _LINUX_MEMCONTROL_H */