+/*
+ * Determine to which zone to online memory dynamically based on user
+ * configuration and system stats. We care about the following ratio:
+ *
+ * MOVABLE : KERNEL
+ *
+ * Whereby MOVABLE is memory in ZONE_MOVABLE and KERNEL is memory in
+ * one of the kernel zones. CMA pages inside one of the kernel zones really
+ * behaves like ZONE_MOVABLE, so we treat them accordingly.
+ *
+ * We don't allow for hotplugged memory in a KERNEL zone to increase the
+ * amount of MOVABLE memory we can have, so we end up with:
+ *
+ * MOVABLE : KERNEL_EARLY
+ *
+ * Whereby KERNEL_EARLY is memory in one of the kernel zones, available sinze
+ * boot. We base our calculation on KERNEL_EARLY internally, because:
+ *
+ * a) Hotplugged memory in one of the kernel zones can sometimes still get
+ * hotunplugged, especially when hot(un)plugging individual memory blocks.
+ * There is no coordination across memory devices, therefore "automatic"
+ * hotunplugging, as implemented in hypervisors, could result in zone
+ * imbalances.
+ * b) Early/boot memory in one of the kernel zones can usually not get
+ * hotunplugged again (e.g., no firmware interface to unplug, fragmented
+ * with unmovable allocations). While there are corner cases where it might
+ * still work, it is barely relevant in practice.
+ *
+ * Exceptions are dynamic memory groups, which allow for more MOVABLE
+ * memory within the same memory group -- because in that case, there is
+ * coordination within the single memory device managed by a single driver.
+ *
+ * We rely on "present pages" instead of "managed pages", as the latter is
+ * highly unreliable and dynamic in virtualized environments, and does not
+ * consider boot time allocations. For example, memory ballooning adjusts the
+ * managed pages when inflating/deflating the balloon, and balloon compaction
+ * can even migrate inflated pages between zones.
+ *
+ * Using "present pages" is better but some things to keep in mind are:
+ *
+ * a) Some memblock allocations, such as for the crashkernel area, are
+ * effectively unused by the kernel, yet they account to "present pages".
+ * Fortunately, these allocations are comparatively small in relevant setups
+ * (e.g., fraction of system memory).
+ * b) Some hotplugged memory blocks in virtualized environments, esecially
+ * hotplugged by virtio-mem, look like they are completely present, however,
+ * only parts of the memory block are actually currently usable.
+ * "present pages" is an upper limit that can get reached at runtime. As
+ * we base our calculations on KERNEL_EARLY, this is not an issue.
+ */
+static struct zone *auto_movable_zone_for_pfn(int nid,
+ struct memory_group *group,
+ unsigned long pfn,
+ unsigned long nr_pages)
+{
+ unsigned long online_pages = 0, max_pages, end_pfn;
+ struct page *page;
+
+ if (!auto_movable_ratio)
+ goto kernel_zone;
+
+ if (group && !group->is_dynamic) {
+ max_pages = group->s.max_pages;
+ online_pages = group->present_movable_pages;
+
+ /* If anything is !MOVABLE online the rest !MOVABLE. */
+ if (group->present_kernel_pages)
+ goto kernel_zone;
+ } else if (!group || group->d.unit_pages == nr_pages) {
+ max_pages = nr_pages;
+ } else {
+ max_pages = group->d.unit_pages;
+ /*
+ * Take a look at all online sections in the current unit.
+ * We can safely assume that all pages within a section belong
+ * to the same zone, because dynamic memory groups only deal
+ * with hotplugged memory.
+ */
+ pfn = ALIGN_DOWN(pfn, group->d.unit_pages);
+ end_pfn = pfn + group->d.unit_pages;
+ for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+ page = pfn_to_online_page(pfn);
+ if (!page)
+ continue;
+ /* If anything is !MOVABLE online the rest !MOVABLE. */
+ if (page_zonenum(page) != ZONE_MOVABLE)
+ goto kernel_zone;
+ online_pages += PAGES_PER_SECTION;
+ }
+ }
+
+ /*
+ * Online MOVABLE if we could *currently* online all remaining parts
+ * MOVABLE. We expect to (add+) online them immediately next, so if
+ * nobody interferes, all will be MOVABLE if possible.
+ */
+ nr_pages = max_pages - online_pages;
+ if (!auto_movable_can_online_movable(NUMA_NO_NODE, group, nr_pages))
+ goto kernel_zone;
+
+#ifdef CONFIG_NUMA
+ if (auto_movable_numa_aware &&
+ !auto_movable_can_online_movable(nid, group, nr_pages))
+ goto kernel_zone;
+#endif /* CONFIG_NUMA */
+
+ return &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
+kernel_zone:
+ return default_kernel_zone_for_pfn(nid, pfn, nr_pages);
+}
+