Merge remote-tracking branch 'regulator/for-next' into tmp
[linux-2.6-microblaze.git] / mm / memcontrol.c
index 2058b8d..7a4bd8b 100644 (file)
@@ -2297,28 +2297,41 @@ static void high_work_func(struct work_struct *work)
  #define MEMCG_DELAY_SCALING_SHIFT 14
 
 /*
- * Scheduled by try_charge() to be executed from the userland return path
- * and reclaims memory over the high limit.
+ * Get the number of jiffies that we should penalise a mischievous cgroup which
+ * is exceeding its memory.high by checking both it and its ancestors.
  */
-void mem_cgroup_handle_over_high(void)
+static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
+                                         unsigned int nr_pages)
 {
-       unsigned long usage, high, clamped_high;
-       unsigned long pflags;
-       unsigned long penalty_jiffies, overage;
-       unsigned int nr_pages = current->memcg_nr_pages_over_high;
-       struct mem_cgroup *memcg;
+       unsigned long penalty_jiffies;
+       u64 max_overage = 0;
 
-       if (likely(!nr_pages))
-               return;
+       do {
+               unsigned long usage, high;
+               u64 overage;
 
-       memcg = get_mem_cgroup_from_mm(current->mm);
-       reclaim_high(memcg, nr_pages, GFP_KERNEL);
-       current->memcg_nr_pages_over_high = 0;
+               usage = page_counter_read(&memcg->memory);
+               high = READ_ONCE(memcg->high);
+
+               /*
+                * Prevent division by 0 in overage calculation by acting as if
+                * it was a threshold of 1 page
+                */
+               high = max(high, 1UL);
+
+               overage = usage - high;
+               overage <<= MEMCG_DELAY_PRECISION_SHIFT;
+               overage = div64_u64(overage, high);
+
+               if (overage > max_overage)
+                       max_overage = overage;
+       } while ((memcg = parent_mem_cgroup(memcg)) &&
+                !mem_cgroup_is_root(memcg));
+
+       if (!max_overage)
+               return 0;
 
        /*
-        * memory.high is breached and reclaim is unable to keep up. Throttle
-        * allocators proactively to slow down excessive growth.
-        *
         * We use overage compared to memory.high to calculate the number of
         * jiffies to sleep (penalty_jiffies). Ideally this value should be
         * fairly lenient on small overages, and increasingly harsh when the
@@ -2326,24 +2339,9 @@ void mem_cgroup_handle_over_high(void)
         * its crazy behaviour, so we exponentially increase the delay based on
         * overage amount.
         */
-
-       usage = page_counter_read(&memcg->memory);
-       high = READ_ONCE(memcg->high);
-
-       if (usage <= high)
-               goto out;
-
-       /*
-        * Prevent division by 0 in overage calculation by acting as if it was a
-        * threshold of 1 page
-        */
-       clamped_high = max(high, 1UL);
-
-       overage = div_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT,
-                         clamped_high);
-
-       penalty_jiffies = ((u64)overage * overage * HZ)
-               >> (MEMCG_DELAY_PRECISION_SHIFT + MEMCG_DELAY_SCALING_SHIFT);
+       penalty_jiffies = max_overage * max_overage * HZ;
+       penalty_jiffies >>= MEMCG_DELAY_PRECISION_SHIFT;
+       penalty_jiffies >>= MEMCG_DELAY_SCALING_SHIFT;
 
        /*
         * Factor in the task's own contribution to the overage, such that four
@@ -2360,7 +2358,32 @@ void mem_cgroup_handle_over_high(void)
         * application moving forwards and also permit diagnostics, albeit
         * extremely slowly.
         */
-       penalty_jiffies = min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
+       return min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
+}
+
+/*
+ * Scheduled by try_charge() to be executed from the userland return path
+ * and reclaims memory over the high limit.
+ */
+void mem_cgroup_handle_over_high(void)
+{
+       unsigned long penalty_jiffies;
+       unsigned long pflags;
+       unsigned int nr_pages = current->memcg_nr_pages_over_high;
+       struct mem_cgroup *memcg;
+
+       if (likely(!nr_pages))
+               return;
+
+       memcg = get_mem_cgroup_from_mm(current->mm);
+       reclaim_high(memcg, nr_pages, GFP_KERNEL);
+       current->memcg_nr_pages_over_high = 0;
+
+       /*
+        * memory.high is breached and reclaim is unable to keep up. Throttle
+        * allocators proactively to slow down excessive growth.
+        */
+       penalty_jiffies = calculate_high_delay(memcg, nr_pages);
 
        /*
         * Don't sleep if the amount of jiffies this memcg owes us is so low
@@ -4027,7 +4050,7 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
        struct mem_cgroup_thresholds *thresholds;
        struct mem_cgroup_threshold_ary *new;
        unsigned long usage;
-       int i, j, size;
+       int i, j, size, entries;
 
        mutex_lock(&memcg->thresholds_lock);
 
@@ -4047,14 +4070,20 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
        __mem_cgroup_threshold(memcg, type == _MEMSWAP);
 
        /* Calculate new number of threshold */
-       size = 0;
+       size = entries = 0;
        for (i = 0; i < thresholds->primary->size; i++) {
                if (thresholds->primary->entries[i].eventfd != eventfd)
                        size++;
+               else
+                       entries++;
        }
 
        new = thresholds->spare;
 
+       /* If no items related to eventfd have been cleared, nothing to do */
+       if (!entries)
+               goto unlock;
+
        /* Set thresholds array to NULL if we don't have thresholds */
        if (!size) {
                kfree(new);