mm/vmalloc: fallback to a single page allocator
[linux-2.6-microblaze.git] / mm / backing-dev.c
index eca555f..271f2ca 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/writeback.h>
@@ -370,12 +371,16 @@ static void wb_exit(struct bdi_writeback *wb)
 #include <linux/memcontrol.h>
 
 /*
- * cgwb_lock protects bdi->cgwb_tree, blkcg->cgwb_list, and memcg->cgwb_list.
- * bdi->cgwb_tree is also RCU protected.
+ * cgwb_lock protects bdi->cgwb_tree, blkcg->cgwb_list, offline_cgwbs and
+ * memcg->cgwb_list.  bdi->cgwb_tree is also RCU protected.
  */
 static DEFINE_SPINLOCK(cgwb_lock);
 static struct workqueue_struct *cgwb_release_wq;
 
+static LIST_HEAD(offline_cgwbs);
+static void cleanup_offline_cgwbs_workfn(struct work_struct *work);
+static DECLARE_WORK(cleanup_offline_cgwbs_work, cleanup_offline_cgwbs_workfn);
+
 static void cgwb_release_workfn(struct work_struct *work)
 {
        struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
@@ -394,7 +399,13 @@ static void cgwb_release_workfn(struct work_struct *work)
 
        fprop_local_destroy_percpu(&wb->memcg_completions);
        percpu_ref_exit(&wb->refcnt);
+
+       spin_lock_irq(&cgwb_lock);
+       list_del(&wb->offline_node);
+       spin_unlock_irq(&cgwb_lock);
+
        wb_exit(wb);
+       WARN_ON_ONCE(!list_empty(&wb->b_attached));
        kfree_rcu(wb, rcu);
 }
 
@@ -412,6 +423,7 @@ static void cgwb_kill(struct bdi_writeback *wb)
        WARN_ON(!radix_tree_delete(&wb->bdi->cgwb_tree, wb->memcg_css->id));
        list_del(&wb->memcg_node);
        list_del(&wb->blkcg_node);
+       list_add(&wb->offline_node, &offline_cgwbs);
        percpu_ref_kill(&wb->refcnt);
 }
 
@@ -471,6 +483,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
 
        wb->memcg_css = memcg_css;
        wb->blkcg_css = blkcg_css;
+       INIT_LIST_HEAD(&wb->b_attached);
        INIT_WORK(&wb->release_work, cgwb_release_workfn);
        set_bit(WB_registered, &wb->state);
 
@@ -578,7 +591,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
 {
        struct bdi_writeback *wb;
 
-       might_sleep_if(gfpflags_allow_blocking(gfp));
+       might_alloc(gfp);
 
        if (!memcg_css->parent)
                return &bdi->wb;
@@ -632,6 +645,54 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
        mutex_unlock(&bdi->cgwb_release_mutex);
 }
 
+/*
+ * cleanup_offline_cgwbs_workfn - try to release dying cgwbs
+ *
+ * Try to release dying cgwbs by switching attached inodes to the nearest
+ * living ancestor's writeback. Processed wbs are placed at the end
+ * of the list to guarantee the forward progress.
+ */
+static void cleanup_offline_cgwbs_workfn(struct work_struct *work)
+{
+       struct bdi_writeback *wb;
+       LIST_HEAD(processed);
+
+       spin_lock_irq(&cgwb_lock);
+
+       while (!list_empty(&offline_cgwbs)) {
+               wb = list_first_entry(&offline_cgwbs, struct bdi_writeback,
+                                     offline_node);
+               list_move(&wb->offline_node, &processed);
+
+               /*
+                * If wb is dirty, cleaning up the writeback by switching
+                * attached inodes will result in an effective removal of any
+                * bandwidth restrictions, which isn't the goal.  Instead,
+                * it can be postponed until the next time, when all io
+                * will be likely completed.  If in the meantime some inodes
+                * will get re-dirtied, they should be eventually switched to
+                * a new cgwb.
+                */
+               if (wb_has_dirty_io(wb))
+                       continue;
+
+               if (!wb_tryget(wb))
+                       continue;
+
+               spin_unlock_irq(&cgwb_lock);
+               while (cleanup_offline_cgwb(wb))
+                       cond_resched();
+               spin_lock_irq(&cgwb_lock);
+
+               wb_put(wb);
+       }
+
+       if (!list_empty(&processed))
+               list_splice_tail(&processed, &offline_cgwbs);
+
+       spin_unlock_irq(&cgwb_lock);
+}
+
 /**
  * wb_memcg_offline - kill all wb's associated with a memcg being offlined
  * @memcg: memcg being offlined
@@ -648,6 +709,8 @@ void wb_memcg_offline(struct mem_cgroup *memcg)
                cgwb_kill(wb);
        memcg_cgwb_list->next = NULL;   /* prevent new wb's */
        spin_unlock_irq(&cgwb_lock);
+
+       queue_work(system_unbound_wq, &cleanup_offline_cgwbs_work);
 }
 
 /**