device-dax: make align a per-device property
[linux-2.6-microblaze.git] / drivers / dax / bus.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */
3 #include <linux/memremap.h>
4 #include <linux/device.h>
5 #include <linux/mutex.h>
6 #include <linux/list.h>
7 #include <linux/slab.h>
8 #include <linux/dax.h>
9 #include <linux/io.h>
10 #include "dax-private.h"
11 #include "bus.h"
12
13 static struct class *dax_class;
14
15 static DEFINE_MUTEX(dax_bus_lock);
16
17 #define DAX_NAME_LEN 30
18 struct dax_id {
19         struct list_head list;
20         char dev_name[DAX_NAME_LEN];
21 };
22
23 static int dax_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
24 {
25         /*
26          * We only ever expect to handle device-dax instances, i.e. the
27          * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
28          */
29         return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0);
30 }
31
32 static struct dax_device_driver *to_dax_drv(struct device_driver *drv)
33 {
34         return container_of(drv, struct dax_device_driver, drv);
35 }
36
37 static struct dax_id *__dax_match_id(struct dax_device_driver *dax_drv,
38                 const char *dev_name)
39 {
40         struct dax_id *dax_id;
41
42         lockdep_assert_held(&dax_bus_lock);
43
44         list_for_each_entry(dax_id, &dax_drv->ids, list)
45                 if (sysfs_streq(dax_id->dev_name, dev_name))
46                         return dax_id;
47         return NULL;
48 }
49
50 static int dax_match_id(struct dax_device_driver *dax_drv, struct device *dev)
51 {
52         int match;
53
54         mutex_lock(&dax_bus_lock);
55         match = !!__dax_match_id(dax_drv, dev_name(dev));
56         mutex_unlock(&dax_bus_lock);
57
58         return match;
59 }
60
61 enum id_action {
62         ID_REMOVE,
63         ID_ADD,
64 };
65
66 static ssize_t do_id_store(struct device_driver *drv, const char *buf,
67                 size_t count, enum id_action action)
68 {
69         struct dax_device_driver *dax_drv = to_dax_drv(drv);
70         unsigned int region_id, id;
71         char devname[DAX_NAME_LEN];
72         struct dax_id *dax_id;
73         ssize_t rc = count;
74         int fields;
75
76         fields = sscanf(buf, "dax%d.%d", &region_id, &id);
77         if (fields != 2)
78                 return -EINVAL;
79         sprintf(devname, "dax%d.%d", region_id, id);
80         if (!sysfs_streq(buf, devname))
81                 return -EINVAL;
82
83         mutex_lock(&dax_bus_lock);
84         dax_id = __dax_match_id(dax_drv, buf);
85         if (!dax_id) {
86                 if (action == ID_ADD) {
87                         dax_id = kzalloc(sizeof(*dax_id), GFP_KERNEL);
88                         if (dax_id) {
89                                 strncpy(dax_id->dev_name, buf, DAX_NAME_LEN);
90                                 list_add(&dax_id->list, &dax_drv->ids);
91                         } else
92                                 rc = -ENOMEM;
93                 } else
94                         /* nothing to remove */;
95         } else if (action == ID_REMOVE) {
96                 list_del(&dax_id->list);
97                 kfree(dax_id);
98         } else
99                 /* dax_id already added */;
100         mutex_unlock(&dax_bus_lock);
101
102         if (rc < 0)
103                 return rc;
104         if (action == ID_ADD)
105                 rc = driver_attach(drv);
106         if (rc)
107                 return rc;
108         return count;
109 }
110
111 static ssize_t new_id_store(struct device_driver *drv, const char *buf,
112                 size_t count)
113 {
114         return do_id_store(drv, buf, count, ID_ADD);
115 }
116 static DRIVER_ATTR_WO(new_id);
117
118 static ssize_t remove_id_store(struct device_driver *drv, const char *buf,
119                 size_t count)
120 {
121         return do_id_store(drv, buf, count, ID_REMOVE);
122 }
123 static DRIVER_ATTR_WO(remove_id);
124
125 static struct attribute *dax_drv_attrs[] = {
126         &driver_attr_new_id.attr,
127         &driver_attr_remove_id.attr,
128         NULL,
129 };
130 ATTRIBUTE_GROUPS(dax_drv);
131
132 static int dax_bus_match(struct device *dev, struct device_driver *drv);
133
134 static bool is_static(struct dax_region *dax_region)
135 {
136         return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0;
137 }
138
139 static u64 dev_dax_size(struct dev_dax *dev_dax)
140 {
141         u64 size = 0;
142         int i;
143
144         device_lock_assert(&dev_dax->dev);
145
146         for (i = 0; i < dev_dax->nr_range; i++)
147                 size += range_len(&dev_dax->ranges[i].range);
148
149         return size;
150 }
151
152 static int dax_bus_probe(struct device *dev)
153 {
154         struct dax_device_driver *dax_drv = to_dax_drv(dev->driver);
155         struct dev_dax *dev_dax = to_dev_dax(dev);
156         struct dax_region *dax_region = dev_dax->region;
157         int rc;
158
159         if (dev_dax_size(dev_dax) == 0 || dev_dax->id < 0)
160                 return -ENXIO;
161
162         rc = dax_drv->probe(dev_dax);
163
164         if (rc || is_static(dax_region))
165                 return rc;
166
167         /*
168          * Track new seed creation only after successful probe of the
169          * previous seed.
170          */
171         if (dax_region->seed == dev)
172                 dax_region->seed = NULL;
173
174         return 0;
175 }
176
177 static int dax_bus_remove(struct device *dev)
178 {
179         struct dax_device_driver *dax_drv = to_dax_drv(dev->driver);
180         struct dev_dax *dev_dax = to_dev_dax(dev);
181
182         return dax_drv->remove(dev_dax);
183 }
184
185 static struct bus_type dax_bus_type = {
186         .name = "dax",
187         .uevent = dax_bus_uevent,
188         .match = dax_bus_match,
189         .probe = dax_bus_probe,
190         .remove = dax_bus_remove,
191         .drv_groups = dax_drv_groups,
192 };
193
194 static int dax_bus_match(struct device *dev, struct device_driver *drv)
195 {
196         struct dax_device_driver *dax_drv = to_dax_drv(drv);
197
198         /*
199          * All but the 'device-dax' driver, which has 'match_always'
200          * set, requires an exact id match.
201          */
202         if (dax_drv->match_always)
203                 return 1;
204
205         return dax_match_id(dax_drv, dev);
206 }
207
208 /*
209  * Rely on the fact that drvdata is set before the attributes are
210  * registered, and that the attributes are unregistered before drvdata
211  * is cleared to assume that drvdata is always valid.
212  */
213 static ssize_t id_show(struct device *dev,
214                 struct device_attribute *attr, char *buf)
215 {
216         struct dax_region *dax_region = dev_get_drvdata(dev);
217
218         return sprintf(buf, "%d\n", dax_region->id);
219 }
220 static DEVICE_ATTR_RO(id);
221
222 static ssize_t region_size_show(struct device *dev,
223                 struct device_attribute *attr, char *buf)
224 {
225         struct dax_region *dax_region = dev_get_drvdata(dev);
226
227         return sprintf(buf, "%llu\n", (unsigned long long)
228                         resource_size(&dax_region->res));
229 }
230 static struct device_attribute dev_attr_region_size = __ATTR(size, 0444,
231                 region_size_show, NULL);
232
233 static ssize_t align_show(struct device *dev,
234                 struct device_attribute *attr, char *buf)
235 {
236         struct dax_region *dax_region = dev_get_drvdata(dev);
237
238         return sprintf(buf, "%u\n", dax_region->align);
239 }
240 static DEVICE_ATTR_RO(align);
241
242 #define for_each_dax_region_resource(dax_region, res) \
243         for (res = (dax_region)->res.child; res; res = res->sibling)
244
245 static unsigned long long dax_region_avail_size(struct dax_region *dax_region)
246 {
247         resource_size_t size = resource_size(&dax_region->res);
248         struct resource *res;
249
250         device_lock_assert(dax_region->dev);
251
252         for_each_dax_region_resource(dax_region, res)
253                 size -= resource_size(res);
254         return size;
255 }
256
257 static ssize_t available_size_show(struct device *dev,
258                 struct device_attribute *attr, char *buf)
259 {
260         struct dax_region *dax_region = dev_get_drvdata(dev);
261         unsigned long long size;
262
263         device_lock(dev);
264         size = dax_region_avail_size(dax_region);
265         device_unlock(dev);
266
267         return sprintf(buf, "%llu\n", size);
268 }
269 static DEVICE_ATTR_RO(available_size);
270
271 static ssize_t seed_show(struct device *dev,
272                 struct device_attribute *attr, char *buf)
273 {
274         struct dax_region *dax_region = dev_get_drvdata(dev);
275         struct device *seed;
276         ssize_t rc;
277
278         if (is_static(dax_region))
279                 return -EINVAL;
280
281         device_lock(dev);
282         seed = dax_region->seed;
283         rc = sprintf(buf, "%s\n", seed ? dev_name(seed) : "");
284         device_unlock(dev);
285
286         return rc;
287 }
288 static DEVICE_ATTR_RO(seed);
289
290 static ssize_t create_show(struct device *dev,
291                 struct device_attribute *attr, char *buf)
292 {
293         struct dax_region *dax_region = dev_get_drvdata(dev);
294         struct device *youngest;
295         ssize_t rc;
296
297         if (is_static(dax_region))
298                 return -EINVAL;
299
300         device_lock(dev);
301         youngest = dax_region->youngest;
302         rc = sprintf(buf, "%s\n", youngest ? dev_name(youngest) : "");
303         device_unlock(dev);
304
305         return rc;
306 }
307
308 static ssize_t create_store(struct device *dev, struct device_attribute *attr,
309                 const char *buf, size_t len)
310 {
311         struct dax_region *dax_region = dev_get_drvdata(dev);
312         unsigned long long avail;
313         ssize_t rc;
314         int val;
315
316         if (is_static(dax_region))
317                 return -EINVAL;
318
319         rc = kstrtoint(buf, 0, &val);
320         if (rc)
321                 return rc;
322         if (val != 1)
323                 return -EINVAL;
324
325         device_lock(dev);
326         avail = dax_region_avail_size(dax_region);
327         if (avail == 0)
328                 rc = -ENOSPC;
329         else {
330                 struct dev_dax_data data = {
331                         .dax_region = dax_region,
332                         .size = 0,
333                         .id = -1,
334                 };
335                 struct dev_dax *dev_dax = devm_create_dev_dax(&data);
336
337                 if (IS_ERR(dev_dax))
338                         rc = PTR_ERR(dev_dax);
339                 else {
340                         /*
341                          * In support of crafting multiple new devices
342                          * simultaneously multiple seeds can be created,
343                          * but only the first one that has not been
344                          * successfully bound is tracked as the region
345                          * seed.
346                          */
347                         if (!dax_region->seed)
348                                 dax_region->seed = &dev_dax->dev;
349                         dax_region->youngest = &dev_dax->dev;
350                         rc = len;
351                 }
352         }
353         device_unlock(dev);
354
355         return rc;
356 }
357 static DEVICE_ATTR_RW(create);
358
359 void kill_dev_dax(struct dev_dax *dev_dax)
360 {
361         struct dax_device *dax_dev = dev_dax->dax_dev;
362         struct inode *inode = dax_inode(dax_dev);
363
364         kill_dax(dax_dev);
365         unmap_mapping_range(inode->i_mapping, 0, 0, 1);
366 }
367 EXPORT_SYMBOL_GPL(kill_dev_dax);
368
369 static void free_dev_dax_ranges(struct dev_dax *dev_dax)
370 {
371         struct dax_region *dax_region = dev_dax->region;
372         int i;
373
374         device_lock_assert(dax_region->dev);
375         for (i = 0; i < dev_dax->nr_range; i++) {
376                 struct range *range = &dev_dax->ranges[i].range;
377
378                 __release_region(&dax_region->res, range->start,
379                                 range_len(range));
380         }
381         dev_dax->nr_range = 0;
382 }
383
384 static void unregister_dev_dax(void *dev)
385 {
386         struct dev_dax *dev_dax = to_dev_dax(dev);
387
388         dev_dbg(dev, "%s\n", __func__);
389
390         kill_dev_dax(dev_dax);
391         free_dev_dax_ranges(dev_dax);
392         device_del(dev);
393         put_device(dev);
394 }
395
396 /* a return value >= 0 indicates this invocation invalidated the id */
397 static int __free_dev_dax_id(struct dev_dax *dev_dax)
398 {
399         struct dax_region *dax_region = dev_dax->region;
400         struct device *dev = &dev_dax->dev;
401         int rc = dev_dax->id;
402
403         device_lock_assert(dev);
404
405         if (is_static(dax_region) || dev_dax->id < 0)
406                 return -1;
407         ida_free(&dax_region->ida, dev_dax->id);
408         dev_dax->id = -1;
409         return rc;
410 }
411
412 static int free_dev_dax_id(struct dev_dax *dev_dax)
413 {
414         struct device *dev = &dev_dax->dev;
415         int rc;
416
417         device_lock(dev);
418         rc = __free_dev_dax_id(dev_dax);
419         device_unlock(dev);
420         return rc;
421 }
422
423 static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
424                 const char *buf, size_t len)
425 {
426         struct dax_region *dax_region = dev_get_drvdata(dev);
427         struct dev_dax *dev_dax;
428         struct device *victim;
429         bool do_del = false;
430         int rc;
431
432         if (is_static(dax_region))
433                 return -EINVAL;
434
435         victim = device_find_child_by_name(dax_region->dev, buf);
436         if (!victim)
437                 return -ENXIO;
438
439         device_lock(dev);
440         device_lock(victim);
441         dev_dax = to_dev_dax(victim);
442         if (victim->driver || dev_dax_size(dev_dax))
443                 rc = -EBUSY;
444         else {
445                 /*
446                  * Invalidate the device so it does not become active
447                  * again, but always preserve device-id-0 so that
448                  * /sys/bus/dax/ is guaranteed to be populated while any
449                  * dax_region is registered.
450                  */
451                 if (dev_dax->id > 0) {
452                         do_del = __free_dev_dax_id(dev_dax) >= 0;
453                         rc = len;
454                         if (dax_region->seed == victim)
455                                 dax_region->seed = NULL;
456                         if (dax_region->youngest == victim)
457                                 dax_region->youngest = NULL;
458                 } else
459                         rc = -EBUSY;
460         }
461         device_unlock(victim);
462
463         /* won the race to invalidate the device, clean it up */
464         if (do_del)
465                 devm_release_action(dev, unregister_dev_dax, victim);
466         device_unlock(dev);
467         put_device(victim);
468
469         return rc;
470 }
471 static DEVICE_ATTR_WO(delete);
472
473 static umode_t dax_region_visible(struct kobject *kobj, struct attribute *a,
474                 int n)
475 {
476         struct device *dev = container_of(kobj, struct device, kobj);
477         struct dax_region *dax_region = dev_get_drvdata(dev);
478
479         if (is_static(dax_region))
480                 if (a == &dev_attr_available_size.attr
481                                 || a == &dev_attr_create.attr
482                                 || a == &dev_attr_seed.attr
483                                 || a == &dev_attr_delete.attr)
484                         return 0;
485         return a->mode;
486 }
487
488 static struct attribute *dax_region_attributes[] = {
489         &dev_attr_available_size.attr,
490         &dev_attr_region_size.attr,
491         &dev_attr_align.attr,
492         &dev_attr_create.attr,
493         &dev_attr_seed.attr,
494         &dev_attr_delete.attr,
495         &dev_attr_id.attr,
496         NULL,
497 };
498
499 static const struct attribute_group dax_region_attribute_group = {
500         .name = "dax_region",
501         .attrs = dax_region_attributes,
502         .is_visible = dax_region_visible,
503 };
504
505 static const struct attribute_group *dax_region_attribute_groups[] = {
506         &dax_region_attribute_group,
507         NULL,
508 };
509
510 static void dax_region_free(struct kref *kref)
511 {
512         struct dax_region *dax_region;
513
514         dax_region = container_of(kref, struct dax_region, kref);
515         kfree(dax_region);
516 }
517
518 void dax_region_put(struct dax_region *dax_region)
519 {
520         kref_put(&dax_region->kref, dax_region_free);
521 }
522 EXPORT_SYMBOL_GPL(dax_region_put);
523
524 static void dax_region_unregister(void *region)
525 {
526         struct dax_region *dax_region = region;
527
528         sysfs_remove_groups(&dax_region->dev->kobj,
529                         dax_region_attribute_groups);
530         dax_region_put(dax_region);
531 }
532
533 struct dax_region *alloc_dax_region(struct device *parent, int region_id,
534                 struct range *range, int target_node, unsigned int align,
535                 unsigned long flags)
536 {
537         struct dax_region *dax_region;
538
539         /*
540          * The DAX core assumes that it can store its private data in
541          * parent->driver_data. This WARN is a reminder / safeguard for
542          * developers of device-dax drivers.
543          */
544         if (dev_get_drvdata(parent)) {
545                 dev_WARN(parent, "dax core failed to setup private data\n");
546                 return NULL;
547         }
548
549         if (!IS_ALIGNED(range->start, align)
550                         || !IS_ALIGNED(range_len(range), align))
551                 return NULL;
552
553         dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
554         if (!dax_region)
555                 return NULL;
556
557         dev_set_drvdata(parent, dax_region);
558         kref_init(&dax_region->kref);
559         dax_region->id = region_id;
560         dax_region->align = align;
561         dax_region->dev = parent;
562         dax_region->target_node = target_node;
563         ida_init(&dax_region->ida);
564         dax_region->res = (struct resource) {
565                 .start = range->start,
566                 .end = range->end,
567                 .flags = IORESOURCE_MEM | flags,
568         };
569
570         if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
571                 kfree(dax_region);
572                 return NULL;
573         }
574
575         kref_get(&dax_region->kref);
576         if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region))
577                 return NULL;
578         return dax_region;
579 }
580 EXPORT_SYMBOL_GPL(alloc_dax_region);
581
582 static void dax_mapping_release(struct device *dev)
583 {
584         struct dax_mapping *mapping = to_dax_mapping(dev);
585         struct dev_dax *dev_dax = to_dev_dax(dev->parent);
586
587         ida_free(&dev_dax->ida, mapping->id);
588         kfree(mapping);
589 }
590
591 static void unregister_dax_mapping(void *data)
592 {
593         struct device *dev = data;
594         struct dax_mapping *mapping = to_dax_mapping(dev);
595         struct dev_dax *dev_dax = to_dev_dax(dev->parent);
596         struct dax_region *dax_region = dev_dax->region;
597
598         dev_dbg(dev, "%s\n", __func__);
599
600         device_lock_assert(dax_region->dev);
601
602         dev_dax->ranges[mapping->range_id].mapping = NULL;
603         mapping->range_id = -1;
604
605         device_del(dev);
606         put_device(dev);
607 }
608
609 static struct dev_dax_range *get_dax_range(struct device *dev)
610 {
611         struct dax_mapping *mapping = to_dax_mapping(dev);
612         struct dev_dax *dev_dax = to_dev_dax(dev->parent);
613         struct dax_region *dax_region = dev_dax->region;
614
615         device_lock(dax_region->dev);
616         if (mapping->range_id < 0) {
617                 device_unlock(dax_region->dev);
618                 return NULL;
619         }
620
621         return &dev_dax->ranges[mapping->range_id];
622 }
623
624 static void put_dax_range(struct dev_dax_range *dax_range)
625 {
626         struct dax_mapping *mapping = dax_range->mapping;
627         struct dev_dax *dev_dax = to_dev_dax(mapping->dev.parent);
628         struct dax_region *dax_region = dev_dax->region;
629
630         device_unlock(dax_region->dev);
631 }
632
633 static ssize_t start_show(struct device *dev,
634                 struct device_attribute *attr, char *buf)
635 {
636         struct dev_dax_range *dax_range;
637         ssize_t rc;
638
639         dax_range = get_dax_range(dev);
640         if (!dax_range)
641                 return -ENXIO;
642         rc = sprintf(buf, "%#llx\n", dax_range->range.start);
643         put_dax_range(dax_range);
644
645         return rc;
646 }
647 static DEVICE_ATTR(start, 0400, start_show, NULL);
648
649 static ssize_t end_show(struct device *dev,
650                 struct device_attribute *attr, char *buf)
651 {
652         struct dev_dax_range *dax_range;
653         ssize_t rc;
654
655         dax_range = get_dax_range(dev);
656         if (!dax_range)
657                 return -ENXIO;
658         rc = sprintf(buf, "%#llx\n", dax_range->range.end);
659         put_dax_range(dax_range);
660
661         return rc;
662 }
663 static DEVICE_ATTR(end, 0400, end_show, NULL);
664
665 static ssize_t pgoff_show(struct device *dev,
666                 struct device_attribute *attr, char *buf)
667 {
668         struct dev_dax_range *dax_range;
669         ssize_t rc;
670
671         dax_range = get_dax_range(dev);
672         if (!dax_range)
673                 return -ENXIO;
674         rc = sprintf(buf, "%#lx\n", dax_range->pgoff);
675         put_dax_range(dax_range);
676
677         return rc;
678 }
679 static DEVICE_ATTR(page_offset, 0400, pgoff_show, NULL);
680
681 static struct attribute *dax_mapping_attributes[] = {
682         &dev_attr_start.attr,
683         &dev_attr_end.attr,
684         &dev_attr_page_offset.attr,
685         NULL,
686 };
687
688 static const struct attribute_group dax_mapping_attribute_group = {
689         .attrs = dax_mapping_attributes,
690 };
691
692 static const struct attribute_group *dax_mapping_attribute_groups[] = {
693         &dax_mapping_attribute_group,
694         NULL,
695 };
696
697 static struct device_type dax_mapping_type = {
698         .release = dax_mapping_release,
699         .groups = dax_mapping_attribute_groups,
700 };
701
702 static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id)
703 {
704         struct dax_region *dax_region = dev_dax->region;
705         struct dax_mapping *mapping;
706         struct device *dev;
707         int rc;
708
709         device_lock_assert(dax_region->dev);
710
711         if (dev_WARN_ONCE(&dev_dax->dev, !dax_region->dev->driver,
712                                 "region disabled\n"))
713                 return -ENXIO;
714
715         mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
716         if (!mapping)
717                 return -ENOMEM;
718         mapping->range_id = range_id;
719         mapping->id = ida_alloc(&dev_dax->ida, GFP_KERNEL);
720         if (mapping->id < 0) {
721                 kfree(mapping);
722                 return -ENOMEM;
723         }
724         dev_dax->ranges[range_id].mapping = mapping;
725         dev = &mapping->dev;
726         device_initialize(dev);
727         dev->parent = &dev_dax->dev;
728         dev->type = &dax_mapping_type;
729         dev_set_name(dev, "mapping%d", mapping->id);
730         rc = device_add(dev);
731         if (rc) {
732                 put_device(dev);
733                 return rc;
734         }
735
736         rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_mapping,
737                         dev);
738         if (rc)
739                 return rc;
740         return 0;
741 }
742
743 static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start,
744                 resource_size_t size)
745 {
746         struct dax_region *dax_region = dev_dax->region;
747         struct resource *res = &dax_region->res;
748         struct device *dev = &dev_dax->dev;
749         struct dev_dax_range *ranges;
750         unsigned long pgoff = 0;
751         struct resource *alloc;
752         int i, rc;
753
754         device_lock_assert(dax_region->dev);
755
756         /* handle the seed alloc special case */
757         if (!size) {
758                 if (dev_WARN_ONCE(dev, dev_dax->nr_range,
759                                         "0-size allocation must be first\n"))
760                         return -EBUSY;
761                 /* nr_range == 0 is elsewhere special cased as 0-size device */
762                 return 0;
763         }
764
765         ranges = krealloc(dev_dax->ranges, sizeof(*ranges)
766                         * (dev_dax->nr_range + 1), GFP_KERNEL);
767         if (!ranges)
768                 return -ENOMEM;
769
770         alloc = __request_region(res, start, size, dev_name(dev), 0);
771         if (!alloc) {
772                 /*
773                  * If this was an empty set of ranges nothing else
774                  * will release @ranges, so do it now.
775                  */
776                 if (!dev_dax->nr_range) {
777                         kfree(ranges);
778                         ranges = NULL;
779                 }
780                 dev_dax->ranges = ranges;
781                 return -ENOMEM;
782         }
783
784         for (i = 0; i < dev_dax->nr_range; i++)
785                 pgoff += PHYS_PFN(range_len(&ranges[i].range));
786         dev_dax->ranges = ranges;
787         ranges[dev_dax->nr_range++] = (struct dev_dax_range) {
788                 .pgoff = pgoff,
789                 .range = {
790                         .start = alloc->start,
791                         .end = alloc->end,
792                 },
793         };
794
795         dev_dbg(dev, "alloc range[%d]: %pa:%pa\n", dev_dax->nr_range - 1,
796                         &alloc->start, &alloc->end);
797         /*
798          * A dev_dax instance must be registered before mapping device
799          * children can be added. Defer to devm_create_dev_dax() to add
800          * the initial mapping device.
801          */
802         if (!device_is_registered(&dev_dax->dev))
803                 return 0;
804
805         rc = devm_register_dax_mapping(dev_dax, dev_dax->nr_range - 1);
806         if (rc) {
807                 dev_dbg(dev, "delete range[%d]: %pa:%pa\n", dev_dax->nr_range - 1,
808                                 &alloc->start, &alloc->end);
809                 dev_dax->nr_range--;
810                 __release_region(res, alloc->start, resource_size(alloc));
811                 return rc;
812         }
813
814         return 0;
815 }
816
817 static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size)
818 {
819         int last_range = dev_dax->nr_range - 1;
820         struct dev_dax_range *dax_range = &dev_dax->ranges[last_range];
821         struct dax_region *dax_region = dev_dax->region;
822         bool is_shrink = resource_size(res) > size;
823         struct range *range = &dax_range->range;
824         struct device *dev = &dev_dax->dev;
825         int rc;
826
827         device_lock_assert(dax_region->dev);
828
829         if (dev_WARN_ONCE(dev, !size, "deletion is handled by dev_dax_shrink\n"))
830                 return -EINVAL;
831
832         rc = adjust_resource(res, range->start, size);
833         if (rc)
834                 return rc;
835
836         *range = (struct range) {
837                 .start = range->start,
838                 .end = range->start + size - 1,
839         };
840
841         dev_dbg(dev, "%s range[%d]: %#llx:%#llx\n", is_shrink ? "shrink" : "extend",
842                         last_range, (unsigned long long) range->start,
843                         (unsigned long long) range->end);
844
845         return 0;
846 }
847
848 static ssize_t size_show(struct device *dev,
849                 struct device_attribute *attr, char *buf)
850 {
851         struct dev_dax *dev_dax = to_dev_dax(dev);
852         unsigned long long size;
853
854         device_lock(dev);
855         size = dev_dax_size(dev_dax);
856         device_unlock(dev);
857
858         return sprintf(buf, "%llu\n", size);
859 }
860
861 static bool alloc_is_aligned(struct dax_region *dax_region,
862                 resource_size_t size)
863 {
864         /*
865          * The minimum mapping granularity for a device instance is a
866          * single subsection, unless the arch says otherwise.
867          */
868         return IS_ALIGNED(size, max_t(unsigned long, dax_region->align,
869                                 memremap_compat_align()));
870 }
871
872 static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size)
873 {
874         resource_size_t to_shrink = dev_dax_size(dev_dax) - size;
875         struct dax_region *dax_region = dev_dax->region;
876         struct device *dev = &dev_dax->dev;
877         int i;
878
879         for (i = dev_dax->nr_range - 1; i >= 0; i--) {
880                 struct range *range = &dev_dax->ranges[i].range;
881                 struct dax_mapping *mapping = dev_dax->ranges[i].mapping;
882                 struct resource *adjust = NULL, *res;
883                 resource_size_t shrink;
884
885                 shrink = min_t(u64, to_shrink, range_len(range));
886                 if (shrink >= range_len(range)) {
887                         devm_release_action(dax_region->dev,
888                                         unregister_dax_mapping, &mapping->dev);
889                         __release_region(&dax_region->res, range->start,
890                                         range_len(range));
891                         dev_dax->nr_range--;
892                         dev_dbg(dev, "delete range[%d]: %#llx:%#llx\n", i,
893                                         (unsigned long long) range->start,
894                                         (unsigned long long) range->end);
895                         to_shrink -= shrink;
896                         if (!to_shrink)
897                                 break;
898                         continue;
899                 }
900
901                 for_each_dax_region_resource(dax_region, res)
902                         if (strcmp(res->name, dev_name(dev)) == 0
903                                         && res->start == range->start) {
904                                 adjust = res;
905                                 break;
906                         }
907
908                 if (dev_WARN_ONCE(dev, !adjust || i != dev_dax->nr_range - 1,
909                                         "failed to find matching resource\n"))
910                         return -ENXIO;
911                 return adjust_dev_dax_range(dev_dax, adjust, range_len(range)
912                                 - shrink);
913         }
914         return 0;
915 }
916
917 /*
918  * Only allow adjustments that preserve the relative pgoff of existing
919  * allocations. I.e. the dev_dax->ranges array is ordered by increasing pgoff.
920  */
921 static bool adjust_ok(struct dev_dax *dev_dax, struct resource *res)
922 {
923         struct dev_dax_range *last;
924         int i;
925
926         if (dev_dax->nr_range == 0)
927                 return false;
928         if (strcmp(res->name, dev_name(&dev_dax->dev)) != 0)
929                 return false;
930         last = &dev_dax->ranges[dev_dax->nr_range - 1];
931         if (last->range.start != res->start || last->range.end != res->end)
932                 return false;
933         for (i = 0; i < dev_dax->nr_range - 1; i++) {
934                 struct dev_dax_range *dax_range = &dev_dax->ranges[i];
935
936                 if (dax_range->pgoff > last->pgoff)
937                         return false;
938         }
939
940         return true;
941 }
942
943 static ssize_t dev_dax_resize(struct dax_region *dax_region,
944                 struct dev_dax *dev_dax, resource_size_t size)
945 {
946         resource_size_t avail = dax_region_avail_size(dax_region), to_alloc;
947         resource_size_t dev_size = dev_dax_size(dev_dax);
948         struct resource *region_res = &dax_region->res;
949         struct device *dev = &dev_dax->dev;
950         struct resource *res, *first;
951         resource_size_t alloc = 0;
952         int rc;
953
954         if (dev->driver)
955                 return -EBUSY;
956         if (size == dev_size)
957                 return 0;
958         if (size > dev_size && size - dev_size > avail)
959                 return -ENOSPC;
960         if (size < dev_size)
961                 return dev_dax_shrink(dev_dax, size);
962
963         to_alloc = size - dev_size;
964         if (dev_WARN_ONCE(dev, !alloc_is_aligned(dax_region, to_alloc),
965                         "resize of %pa misaligned\n", &to_alloc))
966                 return -ENXIO;
967
968         /*
969          * Expand the device into the unused portion of the region. This
970          * may involve adjusting the end of an existing resource, or
971          * allocating a new resource.
972          */
973 retry:
974         first = region_res->child;
975         if (!first)
976                 return alloc_dev_dax_range(dev_dax, dax_region->res.start, to_alloc);
977
978         rc = -ENOSPC;
979         for (res = first; res; res = res->sibling) {
980                 struct resource *next = res->sibling;
981
982                 /* space at the beginning of the region */
983                 if (res == first && res->start > dax_region->res.start) {
984                         alloc = min(res->start - dax_region->res.start, to_alloc);
985                         rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, alloc);
986                         break;
987                 }
988
989                 alloc = 0;
990                 /* space between allocations */
991                 if (next && next->start > res->end + 1)
992                         alloc = min(next->start - (res->end + 1), to_alloc);
993
994                 /* space at the end of the region */
995                 if (!alloc && !next && res->end < region_res->end)
996                         alloc = min(region_res->end - res->end, to_alloc);
997
998                 if (!alloc)
999                         continue;
1000
1001                 if (adjust_ok(dev_dax, res)) {
1002                         rc = adjust_dev_dax_range(dev_dax, res, resource_size(res) + alloc);
1003                         break;
1004                 }
1005                 rc = alloc_dev_dax_range(dev_dax, res->end + 1, alloc);
1006                 break;
1007         }
1008         if (rc)
1009                 return rc;
1010         to_alloc -= alloc;
1011         if (to_alloc)
1012                 goto retry;
1013         return 0;
1014 }
1015
1016 static ssize_t size_store(struct device *dev, struct device_attribute *attr,
1017                 const char *buf, size_t len)
1018 {
1019         ssize_t rc;
1020         unsigned long long val;
1021         struct dev_dax *dev_dax = to_dev_dax(dev);
1022         struct dax_region *dax_region = dev_dax->region;
1023
1024         rc = kstrtoull(buf, 0, &val);
1025         if (rc)
1026                 return rc;
1027
1028         if (!alloc_is_aligned(dax_region, val)) {
1029                 dev_dbg(dev, "%s: size: %lld misaligned\n", __func__, val);
1030                 return -EINVAL;
1031         }
1032
1033         device_lock(dax_region->dev);
1034         if (!dax_region->dev->driver) {
1035                 device_unlock(dax_region->dev);
1036                 return -ENXIO;
1037         }
1038         device_lock(dev);
1039         rc = dev_dax_resize(dax_region, dev_dax, val);
1040         device_unlock(dev);
1041         device_unlock(dax_region->dev);
1042
1043         return rc == 0 ? len : rc;
1044 }
1045 static DEVICE_ATTR_RW(size);
1046
1047 static int dev_dax_target_node(struct dev_dax *dev_dax)
1048 {
1049         struct dax_region *dax_region = dev_dax->region;
1050
1051         return dax_region->target_node;
1052 }
1053
1054 static ssize_t target_node_show(struct device *dev,
1055                 struct device_attribute *attr, char *buf)
1056 {
1057         struct dev_dax *dev_dax = to_dev_dax(dev);
1058
1059         return sprintf(buf, "%d\n", dev_dax_target_node(dev_dax));
1060 }
1061 static DEVICE_ATTR_RO(target_node);
1062
1063 static ssize_t resource_show(struct device *dev,
1064                 struct device_attribute *attr, char *buf)
1065 {
1066         struct dev_dax *dev_dax = to_dev_dax(dev);
1067         struct dax_region *dax_region = dev_dax->region;
1068         unsigned long long start;
1069
1070         if (dev_dax->nr_range < 1)
1071                 start = dax_region->res.start;
1072         else
1073                 start = dev_dax->ranges[0].range.start;
1074
1075         return sprintf(buf, "%#llx\n", start);
1076 }
1077 static DEVICE_ATTR(resource, 0400, resource_show, NULL);
1078
1079 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
1080                 char *buf)
1081 {
1082         /*
1083          * We only ever expect to handle device-dax instances, i.e. the
1084          * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
1085          */
1086         return sprintf(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0);
1087 }
1088 static DEVICE_ATTR_RO(modalias);
1089
1090 static ssize_t numa_node_show(struct device *dev,
1091                 struct device_attribute *attr, char *buf)
1092 {
1093         return sprintf(buf, "%d\n", dev_to_node(dev));
1094 }
1095 static DEVICE_ATTR_RO(numa_node);
1096
1097 static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
1098 {
1099         struct device *dev = container_of(kobj, struct device, kobj);
1100         struct dev_dax *dev_dax = to_dev_dax(dev);
1101         struct dax_region *dax_region = dev_dax->region;
1102
1103         if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0)
1104                 return 0;
1105         if (a == &dev_attr_numa_node.attr && !IS_ENABLED(CONFIG_NUMA))
1106                 return 0;
1107         if (a == &dev_attr_size.attr && is_static(dax_region))
1108                 return 0444;
1109         return a->mode;
1110 }
1111
1112 static struct attribute *dev_dax_attributes[] = {
1113         &dev_attr_modalias.attr,
1114         &dev_attr_size.attr,
1115         &dev_attr_target_node.attr,
1116         &dev_attr_resource.attr,
1117         &dev_attr_numa_node.attr,
1118         NULL,
1119 };
1120
1121 static const struct attribute_group dev_dax_attribute_group = {
1122         .attrs = dev_dax_attributes,
1123         .is_visible = dev_dax_visible,
1124 };
1125
1126 static const struct attribute_group *dax_attribute_groups[] = {
1127         &dev_dax_attribute_group,
1128         NULL,
1129 };
1130
1131 static void dev_dax_release(struct device *dev)
1132 {
1133         struct dev_dax *dev_dax = to_dev_dax(dev);
1134         struct dax_region *dax_region = dev_dax->region;
1135         struct dax_device *dax_dev = dev_dax->dax_dev;
1136
1137         put_dax(dax_dev);
1138         free_dev_dax_id(dev_dax);
1139         dax_region_put(dax_region);
1140         kfree(dev_dax->ranges);
1141         kfree(dev_dax->pgmap);
1142         kfree(dev_dax);
1143 }
1144
1145 static const struct device_type dev_dax_type = {
1146         .release = dev_dax_release,
1147         .groups = dax_attribute_groups,
1148 };
1149
1150 struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
1151 {
1152         struct dax_region *dax_region = data->dax_region;
1153         struct device *parent = dax_region->dev;
1154         struct dax_device *dax_dev;
1155         struct dev_dax *dev_dax;
1156         struct inode *inode;
1157         struct device *dev;
1158         int rc;
1159
1160         dev_dax = kzalloc(sizeof(*dev_dax), GFP_KERNEL);
1161         if (!dev_dax)
1162                 return ERR_PTR(-ENOMEM);
1163
1164         if (is_static(dax_region)) {
1165                 if (dev_WARN_ONCE(parent, data->id < 0,
1166                                 "dynamic id specified to static region\n")) {
1167                         rc = -EINVAL;
1168                         goto err_id;
1169                 }
1170
1171                 dev_dax->id = data->id;
1172         } else {
1173                 if (dev_WARN_ONCE(parent, data->id >= 0,
1174                                 "static id specified to dynamic region\n")) {
1175                         rc = -EINVAL;
1176                         goto err_id;
1177                 }
1178
1179                 rc = ida_alloc(&dax_region->ida, GFP_KERNEL);
1180                 if (rc < 0)
1181                         goto err_id;
1182                 dev_dax->id = rc;
1183         }
1184
1185         dev_dax->region = dax_region;
1186         dev = &dev_dax->dev;
1187         device_initialize(dev);
1188         dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id);
1189
1190         rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, data->size);
1191         if (rc)
1192                 goto err_range;
1193
1194         if (data->pgmap) {
1195                 dev_WARN_ONCE(parent, !is_static(dax_region),
1196                         "custom dev_pagemap requires a static dax_region\n");
1197
1198                 dev_dax->pgmap = kmemdup(data->pgmap,
1199                                 sizeof(struct dev_pagemap), GFP_KERNEL);
1200                 if (!dev_dax->pgmap) {
1201                         rc = -ENOMEM;
1202                         goto err_pgmap;
1203                 }
1204         }
1205
1206         /*
1207          * No 'host' or dax_operations since there is no access to this
1208          * device outside of mmap of the resulting character device.
1209          */
1210         dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC);
1211         if (IS_ERR(dax_dev)) {
1212                 rc = PTR_ERR(dax_dev);
1213                 goto err_alloc_dax;
1214         }
1215
1216         /* a device_dax instance is dead while the driver is not attached */
1217         kill_dax(dax_dev);
1218
1219         dev_dax->dax_dev = dax_dev;
1220         dev_dax->target_node = dax_region->target_node;
1221         dev_dax->align = dax_region->align;
1222         ida_init(&dev_dax->ida);
1223         kref_get(&dax_region->kref);
1224
1225         inode = dax_inode(dax_dev);
1226         dev->devt = inode->i_rdev;
1227         if (data->subsys == DEV_DAX_BUS)
1228                 dev->bus = &dax_bus_type;
1229         else
1230                 dev->class = dax_class;
1231         dev->parent = parent;
1232         dev->type = &dev_dax_type;
1233
1234         rc = device_add(dev);
1235         if (rc) {
1236                 kill_dev_dax(dev_dax);
1237                 put_device(dev);
1238                 return ERR_PTR(rc);
1239         }
1240
1241         rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev);
1242         if (rc)
1243                 return ERR_PTR(rc);
1244
1245         /* register mapping device for the initial allocation range */
1246         if (dev_dax->nr_range && range_len(&dev_dax->ranges[0].range)) {
1247                 rc = devm_register_dax_mapping(dev_dax, 0);
1248                 if (rc)
1249                         return ERR_PTR(rc);
1250         }
1251
1252         return dev_dax;
1253
1254 err_alloc_dax:
1255         kfree(dev_dax->pgmap);
1256 err_pgmap:
1257         free_dev_dax_ranges(dev_dax);
1258 err_range:
1259         free_dev_dax_id(dev_dax);
1260 err_id:
1261         kfree(dev_dax);
1262
1263         return ERR_PTR(rc);
1264 }
1265 EXPORT_SYMBOL_GPL(devm_create_dev_dax);
1266
1267 static int match_always_count;
1268
1269 int __dax_driver_register(struct dax_device_driver *dax_drv,
1270                 struct module *module, const char *mod_name)
1271 {
1272         struct device_driver *drv = &dax_drv->drv;
1273         int rc = 0;
1274
1275         INIT_LIST_HEAD(&dax_drv->ids);
1276         drv->owner = module;
1277         drv->name = mod_name;
1278         drv->mod_name = mod_name;
1279         drv->bus = &dax_bus_type;
1280
1281         /* there can only be one default driver */
1282         mutex_lock(&dax_bus_lock);
1283         match_always_count += dax_drv->match_always;
1284         if (match_always_count > 1) {
1285                 match_always_count--;
1286                 WARN_ON(1);
1287                 rc = -EINVAL;
1288         }
1289         mutex_unlock(&dax_bus_lock);
1290         if (rc)
1291                 return rc;
1292         return driver_register(drv);
1293 }
1294 EXPORT_SYMBOL_GPL(__dax_driver_register);
1295
1296 void dax_driver_unregister(struct dax_device_driver *dax_drv)
1297 {
1298         struct device_driver *drv = &dax_drv->drv;
1299         struct dax_id *dax_id, *_id;
1300
1301         mutex_lock(&dax_bus_lock);
1302         match_always_count -= dax_drv->match_always;
1303         list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) {
1304                 list_del(&dax_id->list);
1305                 kfree(dax_id);
1306         }
1307         mutex_unlock(&dax_bus_lock);
1308         driver_unregister(drv);
1309 }
1310 EXPORT_SYMBOL_GPL(dax_driver_unregister);
1311
1312 int __init dax_bus_init(void)
1313 {
1314         int rc;
1315
1316         if (IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)) {
1317                 dax_class = class_create(THIS_MODULE, "dax");
1318                 if (IS_ERR(dax_class))
1319                         return PTR_ERR(dax_class);
1320         }
1321
1322         rc = bus_register(&dax_bus_type);
1323         if (rc)
1324                 class_destroy(dax_class);
1325         return rc;
1326 }
1327
1328 void __exit dax_bus_exit(void)
1329 {
1330         bus_unregister(&dax_bus_type);
1331         class_destroy(dax_class);
1332 }