node: Link memory nodes to their compute nodes
[linux-2.6-microblaze.git] / drivers / base / node.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Basic Node interface support
4  */
5
6 #include <linux/module.h>
7 #include <linux/init.h>
8 #include <linux/mm.h>
9 #include <linux/memory.h>
10 #include <linux/vmstat.h>
11 #include <linux/notifier.h>
12 #include <linux/node.h>
13 #include <linux/hugetlb.h>
14 #include <linux/compaction.h>
15 #include <linux/cpumask.h>
16 #include <linux/topology.h>
17 #include <linux/nodemask.h>
18 #include <linux/cpu.h>
19 #include <linux/device.h>
20 #include <linux/pm_runtime.h>
21 #include <linux/swap.h>
22 #include <linux/slab.h>
23
24 static struct bus_type node_subsys = {
25         .name = "node",
26         .dev_name = "node",
27 };
28
29
30 static ssize_t node_read_cpumap(struct device *dev, bool list, char *buf)
31 {
32         ssize_t n;
33         cpumask_var_t mask;
34         struct node *node_dev = to_node(dev);
35
36         /* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */
37         BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1));
38
39         if (!alloc_cpumask_var(&mask, GFP_KERNEL))
40                 return 0;
41
42         cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask);
43         n = cpumap_print_to_pagebuf(list, buf, mask);
44         free_cpumask_var(mask);
45
46         return n;
47 }
48
49 static inline ssize_t node_read_cpumask(struct device *dev,
50                                 struct device_attribute *attr, char *buf)
51 {
52         return node_read_cpumap(dev, false, buf);
53 }
54 static inline ssize_t node_read_cpulist(struct device *dev,
55                                 struct device_attribute *attr, char *buf)
56 {
57         return node_read_cpumap(dev, true, buf);
58 }
59
60 static DEVICE_ATTR(cpumap,  S_IRUGO, node_read_cpumask, NULL);
61 static DEVICE_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL);
62
63 /**
64  * struct node_access_nodes - Access class device to hold user visible
65  *                            relationships to other nodes.
66  * @dev:        Device for this memory access class
67  * @list_node:  List element in the node's access list
68  * @access:     The access class rank
69  */
70 struct node_access_nodes {
71         struct device           dev;
72         struct list_head        list_node;
73         unsigned                access;
74 };
75 #define to_access_nodes(dev) container_of(dev, struct node_access_nodes, dev)
76
77 static struct attribute *node_init_access_node_attrs[] = {
78         NULL,
79 };
80
81 static struct attribute *node_targ_access_node_attrs[] = {
82         NULL,
83 };
84
85 static const struct attribute_group initiators = {
86         .name   = "initiators",
87         .attrs  = node_init_access_node_attrs,
88 };
89
90 static const struct attribute_group targets = {
91         .name   = "targets",
92         .attrs  = node_targ_access_node_attrs,
93 };
94
95 static const struct attribute_group *node_access_node_groups[] = {
96         &initiators,
97         &targets,
98         NULL,
99 };
100
101 static void node_remove_accesses(struct node *node)
102 {
103         struct node_access_nodes *c, *cnext;
104
105         list_for_each_entry_safe(c, cnext, &node->access_list, list_node) {
106                 list_del(&c->list_node);
107                 device_unregister(&c->dev);
108         }
109 }
110
111 static void node_access_release(struct device *dev)
112 {
113         kfree(to_access_nodes(dev));
114 }
115
116 static struct node_access_nodes *node_init_node_access(struct node *node,
117                                                        unsigned access)
118 {
119         struct node_access_nodes *access_node;
120         struct device *dev;
121
122         list_for_each_entry(access_node, &node->access_list, list_node)
123                 if (access_node->access == access)
124                         return access_node;
125
126         access_node = kzalloc(sizeof(*access_node), GFP_KERNEL);
127         if (!access_node)
128                 return NULL;
129
130         access_node->access = access;
131         dev = &access_node->dev;
132         dev->parent = &node->dev;
133         dev->release = node_access_release;
134         dev->groups = node_access_node_groups;
135         if (dev_set_name(dev, "access%u", access))
136                 goto free;
137
138         if (device_register(dev))
139                 goto free_name;
140
141         pm_runtime_no_callbacks(dev);
142         list_add_tail(&access_node->list_node, &node->access_list);
143         return access_node;
144 free_name:
145         kfree_const(dev->kobj.name);
146 free:
147         kfree(access_node);
148         return NULL;
149 }
150
151 #define K(x) ((x) << (PAGE_SHIFT - 10))
152 static ssize_t node_read_meminfo(struct device *dev,
153                         struct device_attribute *attr, char *buf)
154 {
155         int n;
156         int nid = dev->id;
157         struct pglist_data *pgdat = NODE_DATA(nid);
158         struct sysinfo i;
159         unsigned long sreclaimable, sunreclaimable;
160
161         si_meminfo_node(&i, nid);
162         sreclaimable = node_page_state(pgdat, NR_SLAB_RECLAIMABLE);
163         sunreclaimable = node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE);
164         n = sprintf(buf,
165                        "Node %d MemTotal:       %8lu kB\n"
166                        "Node %d MemFree:        %8lu kB\n"
167                        "Node %d MemUsed:        %8lu kB\n"
168                        "Node %d Active:         %8lu kB\n"
169                        "Node %d Inactive:       %8lu kB\n"
170                        "Node %d Active(anon):   %8lu kB\n"
171                        "Node %d Inactive(anon): %8lu kB\n"
172                        "Node %d Active(file):   %8lu kB\n"
173                        "Node %d Inactive(file): %8lu kB\n"
174                        "Node %d Unevictable:    %8lu kB\n"
175                        "Node %d Mlocked:        %8lu kB\n",
176                        nid, K(i.totalram),
177                        nid, K(i.freeram),
178                        nid, K(i.totalram - i.freeram),
179                        nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) +
180                                 node_page_state(pgdat, NR_ACTIVE_FILE)),
181                        nid, K(node_page_state(pgdat, NR_INACTIVE_ANON) +
182                                 node_page_state(pgdat, NR_INACTIVE_FILE)),
183                        nid, K(node_page_state(pgdat, NR_ACTIVE_ANON)),
184                        nid, K(node_page_state(pgdat, NR_INACTIVE_ANON)),
185                        nid, K(node_page_state(pgdat, NR_ACTIVE_FILE)),
186                        nid, K(node_page_state(pgdat, NR_INACTIVE_FILE)),
187                        nid, K(node_page_state(pgdat, NR_UNEVICTABLE)),
188                        nid, K(sum_zone_node_page_state(nid, NR_MLOCK)));
189
190 #ifdef CONFIG_HIGHMEM
191         n += sprintf(buf + n,
192                        "Node %d HighTotal:      %8lu kB\n"
193                        "Node %d HighFree:       %8lu kB\n"
194                        "Node %d LowTotal:       %8lu kB\n"
195                        "Node %d LowFree:        %8lu kB\n",
196                        nid, K(i.totalhigh),
197                        nid, K(i.freehigh),
198                        nid, K(i.totalram - i.totalhigh),
199                        nid, K(i.freeram - i.freehigh));
200 #endif
201         n += sprintf(buf + n,
202                        "Node %d Dirty:          %8lu kB\n"
203                        "Node %d Writeback:      %8lu kB\n"
204                        "Node %d FilePages:      %8lu kB\n"
205                        "Node %d Mapped:         %8lu kB\n"
206                        "Node %d AnonPages:      %8lu kB\n"
207                        "Node %d Shmem:          %8lu kB\n"
208                        "Node %d KernelStack:    %8lu kB\n"
209                        "Node %d PageTables:     %8lu kB\n"
210                        "Node %d NFS_Unstable:   %8lu kB\n"
211                        "Node %d Bounce:         %8lu kB\n"
212                        "Node %d WritebackTmp:   %8lu kB\n"
213                        "Node %d KReclaimable:   %8lu kB\n"
214                        "Node %d Slab:           %8lu kB\n"
215                        "Node %d SReclaimable:   %8lu kB\n"
216                        "Node %d SUnreclaim:     %8lu kB\n"
217 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
218                        "Node %d AnonHugePages:  %8lu kB\n"
219                        "Node %d ShmemHugePages: %8lu kB\n"
220                        "Node %d ShmemPmdMapped: %8lu kB\n"
221 #endif
222                         ,
223                        nid, K(node_page_state(pgdat, NR_FILE_DIRTY)),
224                        nid, K(node_page_state(pgdat, NR_WRITEBACK)),
225                        nid, K(node_page_state(pgdat, NR_FILE_PAGES)),
226                        nid, K(node_page_state(pgdat, NR_FILE_MAPPED)),
227                        nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
228                        nid, K(i.sharedram),
229                        nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
230                        nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
231                        nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
232                        nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
233                        nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
234                        nid, K(sreclaimable +
235                               node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE)),
236                        nid, K(sreclaimable + sunreclaimable),
237                        nid, K(sreclaimable),
238                        nid, K(sunreclaimable)
239 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
240                        ,
241                        nid, K(node_page_state(pgdat, NR_ANON_THPS) *
242                                        HPAGE_PMD_NR),
243                        nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
244                                        HPAGE_PMD_NR),
245                        nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
246                                        HPAGE_PMD_NR)
247 #endif
248                        );
249         n += hugetlb_report_node_meminfo(nid, buf + n);
250         return n;
251 }
252
253 #undef K
254 static DEVICE_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL);
255
256 static ssize_t node_read_numastat(struct device *dev,
257                                 struct device_attribute *attr, char *buf)
258 {
259         return sprintf(buf,
260                        "numa_hit %lu\n"
261                        "numa_miss %lu\n"
262                        "numa_foreign %lu\n"
263                        "interleave_hit %lu\n"
264                        "local_node %lu\n"
265                        "other_node %lu\n",
266                        sum_zone_numa_state(dev->id, NUMA_HIT),
267                        sum_zone_numa_state(dev->id, NUMA_MISS),
268                        sum_zone_numa_state(dev->id, NUMA_FOREIGN),
269                        sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT),
270                        sum_zone_numa_state(dev->id, NUMA_LOCAL),
271                        sum_zone_numa_state(dev->id, NUMA_OTHER));
272 }
273 static DEVICE_ATTR(numastat, S_IRUGO, node_read_numastat, NULL);
274
275 static ssize_t node_read_vmstat(struct device *dev,
276                                 struct device_attribute *attr, char *buf)
277 {
278         int nid = dev->id;
279         struct pglist_data *pgdat = NODE_DATA(nid);
280         int i;
281         int n = 0;
282
283         for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
284                 n += sprintf(buf+n, "%s %lu\n", vmstat_text[i],
285                              sum_zone_node_page_state(nid, i));
286
287 #ifdef CONFIG_NUMA
288         for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
289                 n += sprintf(buf+n, "%s %lu\n",
290                              vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
291                              sum_zone_numa_state(nid, i));
292 #endif
293
294         for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
295                 n += sprintf(buf+n, "%s %lu\n",
296                              vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
297                              NR_VM_NUMA_STAT_ITEMS],
298                              node_page_state(pgdat, i));
299
300         return n;
301 }
302 static DEVICE_ATTR(vmstat, S_IRUGO, node_read_vmstat, NULL);
303
304 static ssize_t node_read_distance(struct device *dev,
305                         struct device_attribute *attr, char *buf)
306 {
307         int nid = dev->id;
308         int len = 0;
309         int i;
310
311         /*
312          * buf is currently PAGE_SIZE in length and each node needs 4 chars
313          * at the most (distance + space or newline).
314          */
315         BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE);
316
317         for_each_online_node(i)
318                 len += sprintf(buf + len, "%s%d", i ? " " : "", node_distance(nid, i));
319
320         len += sprintf(buf + len, "\n");
321         return len;
322 }
323 static DEVICE_ATTR(distance, S_IRUGO, node_read_distance, NULL);
324
325 static struct attribute *node_dev_attrs[] = {
326         &dev_attr_cpumap.attr,
327         &dev_attr_cpulist.attr,
328         &dev_attr_meminfo.attr,
329         &dev_attr_numastat.attr,
330         &dev_attr_distance.attr,
331         &dev_attr_vmstat.attr,
332         NULL
333 };
334 ATTRIBUTE_GROUPS(node_dev);
335
336 #ifdef CONFIG_HUGETLBFS
337 /*
338  * hugetlbfs per node attributes registration interface:
339  * When/if hugetlb[fs] subsystem initializes [sometime after this module],
340  * it will register its per node attributes for all online nodes with
341  * memory.  It will also call register_hugetlbfs_with_node(), below, to
342  * register its attribute registration functions with this node driver.
343  * Once these hooks have been initialized, the node driver will call into
344  * the hugetlb module to [un]register attributes for hot-plugged nodes.
345  */
346 static node_registration_func_t __hugetlb_register_node;
347 static node_registration_func_t __hugetlb_unregister_node;
348
349 static inline bool hugetlb_register_node(struct node *node)
350 {
351         if (__hugetlb_register_node &&
352                         node_state(node->dev.id, N_MEMORY)) {
353                 __hugetlb_register_node(node);
354                 return true;
355         }
356         return false;
357 }
358
359 static inline void hugetlb_unregister_node(struct node *node)
360 {
361         if (__hugetlb_unregister_node)
362                 __hugetlb_unregister_node(node);
363 }
364
365 void register_hugetlbfs_with_node(node_registration_func_t doregister,
366                                   node_registration_func_t unregister)
367 {
368         __hugetlb_register_node   = doregister;
369         __hugetlb_unregister_node = unregister;
370 }
371 #else
372 static inline void hugetlb_register_node(struct node *node) {}
373
374 static inline void hugetlb_unregister_node(struct node *node) {}
375 #endif
376
377 static void node_device_release(struct device *dev)
378 {
379         struct node *node = to_node(dev);
380
381 #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS)
382         /*
383          * We schedule the work only when a memory section is
384          * onlined/offlined on this node. When we come here,
385          * all the memory on this node has been offlined,
386          * so we won't enqueue new work to this work.
387          *
388          * The work is using node->node_work, so we should
389          * flush work before freeing the memory.
390          */
391         flush_work(&node->node_work);
392 #endif
393         kfree(node);
394 }
395
396 /*
397  * register_node - Setup a sysfs device for a node.
398  * @num - Node number to use when creating the device.
399  *
400  * Initialize and register the node device.
401  */
402 static int register_node(struct node *node, int num)
403 {
404         int error;
405
406         node->dev.id = num;
407         node->dev.bus = &node_subsys;
408         node->dev.release = node_device_release;
409         node->dev.groups = node_dev_groups;
410         error = device_register(&node->dev);
411
412         if (error)
413                 put_device(&node->dev);
414         else {
415                 hugetlb_register_node(node);
416
417                 compaction_register_node(node);
418         }
419         return error;
420 }
421
422 /**
423  * unregister_node - unregister a node device
424  * @node: node going away
425  *
426  * Unregisters a node device @node.  All the devices on the node must be
427  * unregistered before calling this function.
428  */
429 void unregister_node(struct node *node)
430 {
431         hugetlb_unregister_node(node);          /* no-op, if memoryless node */
432         node_remove_accesses(node);
433         device_unregister(&node->dev);
434 }
435
436 struct node *node_devices[MAX_NUMNODES];
437
438 /*
439  * register cpu under node
440  */
441 int register_cpu_under_node(unsigned int cpu, unsigned int nid)
442 {
443         int ret;
444         struct device *obj;
445
446         if (!node_online(nid))
447                 return 0;
448
449         obj = get_cpu_device(cpu);
450         if (!obj)
451                 return 0;
452
453         ret = sysfs_create_link(&node_devices[nid]->dev.kobj,
454                                 &obj->kobj,
455                                 kobject_name(&obj->kobj));
456         if (ret)
457                 return ret;
458
459         return sysfs_create_link(&obj->kobj,
460                                  &node_devices[nid]->dev.kobj,
461                                  kobject_name(&node_devices[nid]->dev.kobj));
462 }
463
464 /**
465  * register_memory_node_under_compute_node - link memory node to its compute
466  *                                           node for a given access class.
467  * @mem_node:   Memory node number
468  * @cpu_node:   Cpu  node number
469  * @access:     Access class to register
470  *
471  * Description:
472  *      For use with platforms that may have separate memory and compute nodes.
473  *      This function will export node relationships linking which memory
474  *      initiator nodes can access memory targets at a given ranked access
475  *      class.
476  */
477 int register_memory_node_under_compute_node(unsigned int mem_nid,
478                                             unsigned int cpu_nid,
479                                             unsigned access)
480 {
481         struct node *init_node, *targ_node;
482         struct node_access_nodes *initiator, *target;
483         int ret;
484
485         if (!node_online(cpu_nid) || !node_online(mem_nid))
486                 return -ENODEV;
487
488         init_node = node_devices[cpu_nid];
489         targ_node = node_devices[mem_nid];
490         initiator = node_init_node_access(init_node, access);
491         target = node_init_node_access(targ_node, access);
492         if (!initiator || !target)
493                 return -ENOMEM;
494
495         ret = sysfs_add_link_to_group(&initiator->dev.kobj, "targets",
496                                       &targ_node->dev.kobj,
497                                       dev_name(&targ_node->dev));
498         if (ret)
499                 return ret;
500
501         ret = sysfs_add_link_to_group(&target->dev.kobj, "initiators",
502                                       &init_node->dev.kobj,
503                                       dev_name(&init_node->dev));
504         if (ret)
505                 goto err;
506
507         return 0;
508  err:
509         sysfs_remove_link_from_group(&initiator->dev.kobj, "targets",
510                                      dev_name(&targ_node->dev));
511         return ret;
512 }
513
514 int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
515 {
516         struct device *obj;
517
518         if (!node_online(nid))
519                 return 0;
520
521         obj = get_cpu_device(cpu);
522         if (!obj)
523                 return 0;
524
525         sysfs_remove_link(&node_devices[nid]->dev.kobj,
526                           kobject_name(&obj->kobj));
527         sysfs_remove_link(&obj->kobj,
528                           kobject_name(&node_devices[nid]->dev.kobj));
529
530         return 0;
531 }
532
533 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
534 static int __ref get_nid_for_pfn(unsigned long pfn)
535 {
536         if (!pfn_valid_within(pfn))
537                 return -1;
538 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
539         if (system_state < SYSTEM_RUNNING)
540                 return early_pfn_to_nid(pfn);
541 #endif
542         return pfn_to_nid(pfn);
543 }
544
545 /* register memory section under specified node if it spans that node */
546 int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg)
547 {
548         int ret, nid = *(int *)arg;
549         unsigned long pfn, sect_start_pfn, sect_end_pfn;
550
551         mem_blk->nid = nid;
552
553         sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
554         sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr);
555         sect_end_pfn += PAGES_PER_SECTION - 1;
556         for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
557                 int page_nid;
558
559                 /*
560                  * memory block could have several absent sections from start.
561                  * skip pfn range from absent section
562                  */
563                 if (!pfn_present(pfn)) {
564                         pfn = round_down(pfn + PAGES_PER_SECTION,
565                                          PAGES_PER_SECTION) - 1;
566                         continue;
567                 }
568
569                 /*
570                  * We need to check if page belongs to nid only for the boot
571                  * case, during hotplug we know that all pages in the memory
572                  * block belong to the same node.
573                  */
574                 if (system_state == SYSTEM_BOOTING) {
575                         page_nid = get_nid_for_pfn(pfn);
576                         if (page_nid < 0)
577                                 continue;
578                         if (page_nid != nid)
579                                 continue;
580                 }
581                 ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
582                                         &mem_blk->dev.kobj,
583                                         kobject_name(&mem_blk->dev.kobj));
584                 if (ret)
585                         return ret;
586
587                 return sysfs_create_link_nowarn(&mem_blk->dev.kobj,
588                                 &node_devices[nid]->dev.kobj,
589                                 kobject_name(&node_devices[nid]->dev.kobj));
590         }
591         /* mem section does not span the specified node */
592         return 0;
593 }
594
595 /* unregister memory section under all nodes that it spans */
596 int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
597                                     unsigned long phys_index)
598 {
599         NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL);
600         unsigned long pfn, sect_start_pfn, sect_end_pfn;
601
602         if (!mem_blk) {
603                 NODEMASK_FREE(unlinked_nodes);
604                 return -EFAULT;
605         }
606         if (!unlinked_nodes)
607                 return -ENOMEM;
608         nodes_clear(*unlinked_nodes);
609
610         sect_start_pfn = section_nr_to_pfn(phys_index);
611         sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
612         for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
613                 int nid;
614
615                 nid = get_nid_for_pfn(pfn);
616                 if (nid < 0)
617                         continue;
618                 if (!node_online(nid))
619                         continue;
620                 if (node_test_and_set(nid, *unlinked_nodes))
621                         continue;
622                 sysfs_remove_link(&node_devices[nid]->dev.kobj,
623                          kobject_name(&mem_blk->dev.kobj));
624                 sysfs_remove_link(&mem_blk->dev.kobj,
625                          kobject_name(&node_devices[nid]->dev.kobj));
626         }
627         NODEMASK_FREE(unlinked_nodes);
628         return 0;
629 }
630
631 int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn)
632 {
633         return walk_memory_range(start_pfn, end_pfn, (void *)&nid,
634                                         register_mem_sect_under_node);
635 }
636
637 #ifdef CONFIG_HUGETLBFS
638 /*
639  * Handle per node hstate attribute [un]registration on transistions
640  * to/from memoryless state.
641  */
642 static void node_hugetlb_work(struct work_struct *work)
643 {
644         struct node *node = container_of(work, struct node, node_work);
645
646         /*
647          * We only get here when a node transitions to/from memoryless state.
648          * We can detect which transition occurred by examining whether the
649          * node has memory now.  hugetlb_register_node() already check this
650          * so we try to register the attributes.  If that fails, then the
651          * node has transitioned to memoryless, try to unregister the
652          * attributes.
653          */
654         if (!hugetlb_register_node(node))
655                 hugetlb_unregister_node(node);
656 }
657
658 static void init_node_hugetlb_work(int nid)
659 {
660         INIT_WORK(&node_devices[nid]->node_work, node_hugetlb_work);
661 }
662
663 static int node_memory_callback(struct notifier_block *self,
664                                 unsigned long action, void *arg)
665 {
666         struct memory_notify *mnb = arg;
667         int nid = mnb->status_change_nid;
668
669         switch (action) {
670         case MEM_ONLINE:
671         case MEM_OFFLINE:
672                 /*
673                  * offload per node hstate [un]registration to a work thread
674                  * when transitioning to/from memoryless state.
675                  */
676                 if (nid != NUMA_NO_NODE)
677                         schedule_work(&node_devices[nid]->node_work);
678                 break;
679
680         case MEM_GOING_ONLINE:
681         case MEM_GOING_OFFLINE:
682         case MEM_CANCEL_ONLINE:
683         case MEM_CANCEL_OFFLINE:
684         default:
685                 break;
686         }
687
688         return NOTIFY_OK;
689 }
690 #endif  /* CONFIG_HUGETLBFS */
691 #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
692
693 #if !defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || \
694     !defined(CONFIG_HUGETLBFS)
695 static inline int node_memory_callback(struct notifier_block *self,
696                                 unsigned long action, void *arg)
697 {
698         return NOTIFY_OK;
699 }
700
701 static void init_node_hugetlb_work(int nid) { }
702
703 #endif
704
705 int __register_one_node(int nid)
706 {
707         int error;
708         int cpu;
709
710         node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL);
711         if (!node_devices[nid])
712                 return -ENOMEM;
713
714         error = register_node(node_devices[nid], nid);
715
716         /* link cpu under this node */
717         for_each_present_cpu(cpu) {
718                 if (cpu_to_node(cpu) == nid)
719                         register_cpu_under_node(cpu, nid);
720         }
721
722         INIT_LIST_HEAD(&node_devices[nid]->access_list);
723         /* initialize work queue for memory hot plug */
724         init_node_hugetlb_work(nid);
725
726         return error;
727 }
728
729 void unregister_one_node(int nid)
730 {
731         if (!node_devices[nid])
732                 return;
733
734         unregister_node(node_devices[nid]);
735         node_devices[nid] = NULL;
736 }
737
738 /*
739  * node states attributes
740  */
741
742 static ssize_t print_nodes_state(enum node_states state, char *buf)
743 {
744         int n;
745
746         n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl",
747                       nodemask_pr_args(&node_states[state]));
748         buf[n++] = '\n';
749         buf[n] = '\0';
750         return n;
751 }
752
753 struct node_attr {
754         struct device_attribute attr;
755         enum node_states state;
756 };
757
758 static ssize_t show_node_state(struct device *dev,
759                                struct device_attribute *attr, char *buf)
760 {
761         struct node_attr *na = container_of(attr, struct node_attr, attr);
762         return print_nodes_state(na->state, buf);
763 }
764
765 #define _NODE_ATTR(name, state) \
766         { __ATTR(name, 0444, show_node_state, NULL), state }
767
768 static struct node_attr node_state_attr[] = {
769         [N_POSSIBLE] = _NODE_ATTR(possible, N_POSSIBLE),
770         [N_ONLINE] = _NODE_ATTR(online, N_ONLINE),
771         [N_NORMAL_MEMORY] = _NODE_ATTR(has_normal_memory, N_NORMAL_MEMORY),
772 #ifdef CONFIG_HIGHMEM
773         [N_HIGH_MEMORY] = _NODE_ATTR(has_high_memory, N_HIGH_MEMORY),
774 #endif
775         [N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY),
776         [N_CPU] = _NODE_ATTR(has_cpu, N_CPU),
777 };
778
779 static struct attribute *node_state_attrs[] = {
780         &node_state_attr[N_POSSIBLE].attr.attr,
781         &node_state_attr[N_ONLINE].attr.attr,
782         &node_state_attr[N_NORMAL_MEMORY].attr.attr,
783 #ifdef CONFIG_HIGHMEM
784         &node_state_attr[N_HIGH_MEMORY].attr.attr,
785 #endif
786         &node_state_attr[N_MEMORY].attr.attr,
787         &node_state_attr[N_CPU].attr.attr,
788         NULL
789 };
790
791 static struct attribute_group memory_root_attr_group = {
792         .attrs = node_state_attrs,
793 };
794
795 static const struct attribute_group *cpu_root_attr_groups[] = {
796         &memory_root_attr_group,
797         NULL,
798 };
799
800 #define NODE_CALLBACK_PRI       2       /* lower than SLAB */
801 static int __init register_node_type(void)
802 {
803         int ret;
804
805         BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES);
806         BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs)-1 != NR_NODE_STATES);
807
808         ret = subsys_system_register(&node_subsys, cpu_root_attr_groups);
809         if (!ret) {
810                 static struct notifier_block node_memory_callback_nb = {
811                         .notifier_call = node_memory_callback,
812                         .priority = NODE_CALLBACK_PRI,
813                 };
814                 register_hotmemory_notifier(&node_memory_callback_nb);
815         }
816
817         /*
818          * Note:  we're not going to unregister the node class if we fail
819          * to register the node state class attribute files.
820          */
821         return ret;
822 }
823 postcore_initcall(register_node_type);