device-dax: Fix range release
[linux-2.6-microblaze.git] / drivers / dax / bus.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */
3 #include <linux/memremap.h>
4 #include <linux/device.h>
5 #include <linux/mutex.h>
6 #include <linux/list.h>
7 #include <linux/slab.h>
8 #include <linux/dax.h>
9 #include <linux/io.h>
10 #include "dax-private.h"
11 #include "bus.h"
12
13 static struct class *dax_class;
14
15 static DEFINE_MUTEX(dax_bus_lock);
16
17 #define DAX_NAME_LEN 30
18 struct dax_id {
19         struct list_head list;
20         char dev_name[DAX_NAME_LEN];
21 };
22
23 static int dax_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
24 {
25         /*
26          * We only ever expect to handle device-dax instances, i.e. the
27          * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
28          */
29         return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0);
30 }
31
32 static struct dax_device_driver *to_dax_drv(struct device_driver *drv)
33 {
34         return container_of(drv, struct dax_device_driver, drv);
35 }
36
37 static struct dax_id *__dax_match_id(struct dax_device_driver *dax_drv,
38                 const char *dev_name)
39 {
40         struct dax_id *dax_id;
41
42         lockdep_assert_held(&dax_bus_lock);
43
44         list_for_each_entry(dax_id, &dax_drv->ids, list)
45                 if (sysfs_streq(dax_id->dev_name, dev_name))
46                         return dax_id;
47         return NULL;
48 }
49
50 static int dax_match_id(struct dax_device_driver *dax_drv, struct device *dev)
51 {
52         int match;
53
54         mutex_lock(&dax_bus_lock);
55         match = !!__dax_match_id(dax_drv, dev_name(dev));
56         mutex_unlock(&dax_bus_lock);
57
58         return match;
59 }
60
61 enum id_action {
62         ID_REMOVE,
63         ID_ADD,
64 };
65
66 static ssize_t do_id_store(struct device_driver *drv, const char *buf,
67                 size_t count, enum id_action action)
68 {
69         struct dax_device_driver *dax_drv = to_dax_drv(drv);
70         unsigned int region_id, id;
71         char devname[DAX_NAME_LEN];
72         struct dax_id *dax_id;
73         ssize_t rc = count;
74         int fields;
75
76         fields = sscanf(buf, "dax%d.%d", &region_id, &id);
77         if (fields != 2)
78                 return -EINVAL;
79         sprintf(devname, "dax%d.%d", region_id, id);
80         if (!sysfs_streq(buf, devname))
81                 return -EINVAL;
82
83         mutex_lock(&dax_bus_lock);
84         dax_id = __dax_match_id(dax_drv, buf);
85         if (!dax_id) {
86                 if (action == ID_ADD) {
87                         dax_id = kzalloc(sizeof(*dax_id), GFP_KERNEL);
88                         if (dax_id) {
89                                 strncpy(dax_id->dev_name, buf, DAX_NAME_LEN);
90                                 list_add(&dax_id->list, &dax_drv->ids);
91                         } else
92                                 rc = -ENOMEM;
93                 } else
94                         /* nothing to remove */;
95         } else if (action == ID_REMOVE) {
96                 list_del(&dax_id->list);
97                 kfree(dax_id);
98         } else
99                 /* dax_id already added */;
100         mutex_unlock(&dax_bus_lock);
101
102         if (rc < 0)
103                 return rc;
104         if (action == ID_ADD)
105                 rc = driver_attach(drv);
106         if (rc)
107                 return rc;
108         return count;
109 }
110
111 static ssize_t new_id_store(struct device_driver *drv, const char *buf,
112                 size_t count)
113 {
114         return do_id_store(drv, buf, count, ID_ADD);
115 }
116 static DRIVER_ATTR_WO(new_id);
117
118 static ssize_t remove_id_store(struct device_driver *drv, const char *buf,
119                 size_t count)
120 {
121         return do_id_store(drv, buf, count, ID_REMOVE);
122 }
123 static DRIVER_ATTR_WO(remove_id);
124
125 static struct attribute *dax_drv_attrs[] = {
126         &driver_attr_new_id.attr,
127         &driver_attr_remove_id.attr,
128         NULL,
129 };
130 ATTRIBUTE_GROUPS(dax_drv);
131
132 static int dax_bus_match(struct device *dev, struct device_driver *drv);
133
134 static bool is_static(struct dax_region *dax_region)
135 {
136         return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0;
137 }
138
139 static u64 dev_dax_size(struct dev_dax *dev_dax)
140 {
141         u64 size = 0;
142         int i;
143
144         device_lock_assert(&dev_dax->dev);
145
146         for (i = 0; i < dev_dax->nr_range; i++)
147                 size += range_len(&dev_dax->ranges[i].range);
148
149         return size;
150 }
151
152 static int dax_bus_probe(struct device *dev)
153 {
154         struct dax_device_driver *dax_drv = to_dax_drv(dev->driver);
155         struct dev_dax *dev_dax = to_dev_dax(dev);
156         struct dax_region *dax_region = dev_dax->region;
157         int rc;
158
159         if (dev_dax_size(dev_dax) == 0 || dev_dax->id < 0)
160                 return -ENXIO;
161
162         rc = dax_drv->probe(dev_dax);
163
164         if (rc || is_static(dax_region))
165                 return rc;
166
167         /*
168          * Track new seed creation only after successful probe of the
169          * previous seed.
170          */
171         if (dax_region->seed == dev)
172                 dax_region->seed = NULL;
173
174         return 0;
175 }
176
177 static int dax_bus_remove(struct device *dev)
178 {
179         struct dax_device_driver *dax_drv = to_dax_drv(dev->driver);
180         struct dev_dax *dev_dax = to_dev_dax(dev);
181
182         return dax_drv->remove(dev_dax);
183 }
184
185 static struct bus_type dax_bus_type = {
186         .name = "dax",
187         .uevent = dax_bus_uevent,
188         .match = dax_bus_match,
189         .probe = dax_bus_probe,
190         .remove = dax_bus_remove,
191         .drv_groups = dax_drv_groups,
192 };
193
194 static int dax_bus_match(struct device *dev, struct device_driver *drv)
195 {
196         struct dax_device_driver *dax_drv = to_dax_drv(drv);
197
198         /*
199          * All but the 'device-dax' driver, which has 'match_always'
200          * set, requires an exact id match.
201          */
202         if (dax_drv->match_always)
203                 return 1;
204
205         return dax_match_id(dax_drv, dev);
206 }
207
208 /*
209  * Rely on the fact that drvdata is set before the attributes are
210  * registered, and that the attributes are unregistered before drvdata
211  * is cleared to assume that drvdata is always valid.
212  */
213 static ssize_t id_show(struct device *dev,
214                 struct device_attribute *attr, char *buf)
215 {
216         struct dax_region *dax_region = dev_get_drvdata(dev);
217
218         return sprintf(buf, "%d\n", dax_region->id);
219 }
220 static DEVICE_ATTR_RO(id);
221
222 static ssize_t region_size_show(struct device *dev,
223                 struct device_attribute *attr, char *buf)
224 {
225         struct dax_region *dax_region = dev_get_drvdata(dev);
226
227         return sprintf(buf, "%llu\n", (unsigned long long)
228                         resource_size(&dax_region->res));
229 }
230 static struct device_attribute dev_attr_region_size = __ATTR(size, 0444,
231                 region_size_show, NULL);
232
233 static ssize_t region_align_show(struct device *dev,
234                 struct device_attribute *attr, char *buf)
235 {
236         struct dax_region *dax_region = dev_get_drvdata(dev);
237
238         return sprintf(buf, "%u\n", dax_region->align);
239 }
240 static struct device_attribute dev_attr_region_align =
241                 __ATTR(align, 0400, region_align_show, NULL);
242
243 #define for_each_dax_region_resource(dax_region, res) \
244         for (res = (dax_region)->res.child; res; res = res->sibling)
245
246 static unsigned long long dax_region_avail_size(struct dax_region *dax_region)
247 {
248         resource_size_t size = resource_size(&dax_region->res);
249         struct resource *res;
250
251         device_lock_assert(dax_region->dev);
252
253         for_each_dax_region_resource(dax_region, res)
254                 size -= resource_size(res);
255         return size;
256 }
257
258 static ssize_t available_size_show(struct device *dev,
259                 struct device_attribute *attr, char *buf)
260 {
261         struct dax_region *dax_region = dev_get_drvdata(dev);
262         unsigned long long size;
263
264         device_lock(dev);
265         size = dax_region_avail_size(dax_region);
266         device_unlock(dev);
267
268         return sprintf(buf, "%llu\n", size);
269 }
270 static DEVICE_ATTR_RO(available_size);
271
272 static ssize_t seed_show(struct device *dev,
273                 struct device_attribute *attr, char *buf)
274 {
275         struct dax_region *dax_region = dev_get_drvdata(dev);
276         struct device *seed;
277         ssize_t rc;
278
279         if (is_static(dax_region))
280                 return -EINVAL;
281
282         device_lock(dev);
283         seed = dax_region->seed;
284         rc = sprintf(buf, "%s\n", seed ? dev_name(seed) : "");
285         device_unlock(dev);
286
287         return rc;
288 }
289 static DEVICE_ATTR_RO(seed);
290
291 static ssize_t create_show(struct device *dev,
292                 struct device_attribute *attr, char *buf)
293 {
294         struct dax_region *dax_region = dev_get_drvdata(dev);
295         struct device *youngest;
296         ssize_t rc;
297
298         if (is_static(dax_region))
299                 return -EINVAL;
300
301         device_lock(dev);
302         youngest = dax_region->youngest;
303         rc = sprintf(buf, "%s\n", youngest ? dev_name(youngest) : "");
304         device_unlock(dev);
305
306         return rc;
307 }
308
309 static ssize_t create_store(struct device *dev, struct device_attribute *attr,
310                 const char *buf, size_t len)
311 {
312         struct dax_region *dax_region = dev_get_drvdata(dev);
313         unsigned long long avail;
314         ssize_t rc;
315         int val;
316
317         if (is_static(dax_region))
318                 return -EINVAL;
319
320         rc = kstrtoint(buf, 0, &val);
321         if (rc)
322                 return rc;
323         if (val != 1)
324                 return -EINVAL;
325
326         device_lock(dev);
327         avail = dax_region_avail_size(dax_region);
328         if (avail == 0)
329                 rc = -ENOSPC;
330         else {
331                 struct dev_dax_data data = {
332                         .dax_region = dax_region,
333                         .size = 0,
334                         .id = -1,
335                 };
336                 struct dev_dax *dev_dax = devm_create_dev_dax(&data);
337
338                 if (IS_ERR(dev_dax))
339                         rc = PTR_ERR(dev_dax);
340                 else {
341                         /*
342                          * In support of crafting multiple new devices
343                          * simultaneously multiple seeds can be created,
344                          * but only the first one that has not been
345                          * successfully bound is tracked as the region
346                          * seed.
347                          */
348                         if (!dax_region->seed)
349                                 dax_region->seed = &dev_dax->dev;
350                         dax_region->youngest = &dev_dax->dev;
351                         rc = len;
352                 }
353         }
354         device_unlock(dev);
355
356         return rc;
357 }
358 static DEVICE_ATTR_RW(create);
359
360 void kill_dev_dax(struct dev_dax *dev_dax)
361 {
362         struct dax_device *dax_dev = dev_dax->dax_dev;
363         struct inode *inode = dax_inode(dax_dev);
364
365         kill_dax(dax_dev);
366         unmap_mapping_range(inode->i_mapping, 0, 0, 1);
367 }
368 EXPORT_SYMBOL_GPL(kill_dev_dax);
369
370 static void trim_dev_dax_range(struct dev_dax *dev_dax)
371 {
372         int i = dev_dax->nr_range - 1;
373         struct range *range = &dev_dax->ranges[i].range;
374         struct dax_region *dax_region = dev_dax->region;
375
376         device_lock_assert(dax_region->dev);
377         dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i,
378                 (unsigned long long)range->start,
379                 (unsigned long long)range->end);
380
381         __release_region(&dax_region->res, range->start, range_len(range));
382         if (--dev_dax->nr_range == 0) {
383                 kfree(dev_dax->ranges);
384                 dev_dax->ranges = NULL;
385         }
386 }
387
388 static void free_dev_dax_ranges(struct dev_dax *dev_dax)
389 {
390         while (dev_dax->nr_range)
391                 trim_dev_dax_range(dev_dax);
392 }
393
394 static void unregister_dev_dax(void *dev)
395 {
396         struct dev_dax *dev_dax = to_dev_dax(dev);
397
398         dev_dbg(dev, "%s\n", __func__);
399
400         kill_dev_dax(dev_dax);
401         free_dev_dax_ranges(dev_dax);
402         device_del(dev);
403         put_device(dev);
404 }
405
406 /* a return value >= 0 indicates this invocation invalidated the id */
407 static int __free_dev_dax_id(struct dev_dax *dev_dax)
408 {
409         struct dax_region *dax_region = dev_dax->region;
410         struct device *dev = &dev_dax->dev;
411         int rc = dev_dax->id;
412
413         device_lock_assert(dev);
414
415         if (is_static(dax_region) || dev_dax->id < 0)
416                 return -1;
417         ida_free(&dax_region->ida, dev_dax->id);
418         dev_dax->id = -1;
419         return rc;
420 }
421
422 static int free_dev_dax_id(struct dev_dax *dev_dax)
423 {
424         struct device *dev = &dev_dax->dev;
425         int rc;
426
427         device_lock(dev);
428         rc = __free_dev_dax_id(dev_dax);
429         device_unlock(dev);
430         return rc;
431 }
432
433 static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
434                 const char *buf, size_t len)
435 {
436         struct dax_region *dax_region = dev_get_drvdata(dev);
437         struct dev_dax *dev_dax;
438         struct device *victim;
439         bool do_del = false;
440         int rc;
441
442         if (is_static(dax_region))
443                 return -EINVAL;
444
445         victim = device_find_child_by_name(dax_region->dev, buf);
446         if (!victim)
447                 return -ENXIO;
448
449         device_lock(dev);
450         device_lock(victim);
451         dev_dax = to_dev_dax(victim);
452         if (victim->driver || dev_dax_size(dev_dax))
453                 rc = -EBUSY;
454         else {
455                 /*
456                  * Invalidate the device so it does not become active
457                  * again, but always preserve device-id-0 so that
458                  * /sys/bus/dax/ is guaranteed to be populated while any
459                  * dax_region is registered.
460                  */
461                 if (dev_dax->id > 0) {
462                         do_del = __free_dev_dax_id(dev_dax) >= 0;
463                         rc = len;
464                         if (dax_region->seed == victim)
465                                 dax_region->seed = NULL;
466                         if (dax_region->youngest == victim)
467                                 dax_region->youngest = NULL;
468                 } else
469                         rc = -EBUSY;
470         }
471         device_unlock(victim);
472
473         /* won the race to invalidate the device, clean it up */
474         if (do_del)
475                 devm_release_action(dev, unregister_dev_dax, victim);
476         device_unlock(dev);
477         put_device(victim);
478
479         return rc;
480 }
481 static DEVICE_ATTR_WO(delete);
482
483 static umode_t dax_region_visible(struct kobject *kobj, struct attribute *a,
484                 int n)
485 {
486         struct device *dev = container_of(kobj, struct device, kobj);
487         struct dax_region *dax_region = dev_get_drvdata(dev);
488
489         if (is_static(dax_region))
490                 if (a == &dev_attr_available_size.attr
491                                 || a == &dev_attr_create.attr
492                                 || a == &dev_attr_seed.attr
493                                 || a == &dev_attr_delete.attr)
494                         return 0;
495         return a->mode;
496 }
497
498 static struct attribute *dax_region_attributes[] = {
499         &dev_attr_available_size.attr,
500         &dev_attr_region_size.attr,
501         &dev_attr_region_align.attr,
502         &dev_attr_create.attr,
503         &dev_attr_seed.attr,
504         &dev_attr_delete.attr,
505         &dev_attr_id.attr,
506         NULL,
507 };
508
509 static const struct attribute_group dax_region_attribute_group = {
510         .name = "dax_region",
511         .attrs = dax_region_attributes,
512         .is_visible = dax_region_visible,
513 };
514
515 static const struct attribute_group *dax_region_attribute_groups[] = {
516         &dax_region_attribute_group,
517         NULL,
518 };
519
520 static void dax_region_free(struct kref *kref)
521 {
522         struct dax_region *dax_region;
523
524         dax_region = container_of(kref, struct dax_region, kref);
525         kfree(dax_region);
526 }
527
528 void dax_region_put(struct dax_region *dax_region)
529 {
530         kref_put(&dax_region->kref, dax_region_free);
531 }
532 EXPORT_SYMBOL_GPL(dax_region_put);
533
534 static void dax_region_unregister(void *region)
535 {
536         struct dax_region *dax_region = region;
537
538         sysfs_remove_groups(&dax_region->dev->kobj,
539                         dax_region_attribute_groups);
540         dax_region_put(dax_region);
541 }
542
543 struct dax_region *alloc_dax_region(struct device *parent, int region_id,
544                 struct range *range, int target_node, unsigned int align,
545                 unsigned long flags)
546 {
547         struct dax_region *dax_region;
548
549         /*
550          * The DAX core assumes that it can store its private data in
551          * parent->driver_data. This WARN is a reminder / safeguard for
552          * developers of device-dax drivers.
553          */
554         if (dev_get_drvdata(parent)) {
555                 dev_WARN(parent, "dax core failed to setup private data\n");
556                 return NULL;
557         }
558
559         if (!IS_ALIGNED(range->start, align)
560                         || !IS_ALIGNED(range_len(range), align))
561                 return NULL;
562
563         dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
564         if (!dax_region)
565                 return NULL;
566
567         dev_set_drvdata(parent, dax_region);
568         kref_init(&dax_region->kref);
569         dax_region->id = region_id;
570         dax_region->align = align;
571         dax_region->dev = parent;
572         dax_region->target_node = target_node;
573         ida_init(&dax_region->ida);
574         dax_region->res = (struct resource) {
575                 .start = range->start,
576                 .end = range->end,
577                 .flags = IORESOURCE_MEM | flags,
578         };
579
580         if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
581                 kfree(dax_region);
582                 return NULL;
583         }
584
585         kref_get(&dax_region->kref);
586         if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region))
587                 return NULL;
588         return dax_region;
589 }
590 EXPORT_SYMBOL_GPL(alloc_dax_region);
591
592 static void dax_mapping_release(struct device *dev)
593 {
594         struct dax_mapping *mapping = to_dax_mapping(dev);
595         struct dev_dax *dev_dax = to_dev_dax(dev->parent);
596
597         ida_free(&dev_dax->ida, mapping->id);
598         kfree(mapping);
599 }
600
601 static void unregister_dax_mapping(void *data)
602 {
603         struct device *dev = data;
604         struct dax_mapping *mapping = to_dax_mapping(dev);
605         struct dev_dax *dev_dax = to_dev_dax(dev->parent);
606         struct dax_region *dax_region = dev_dax->region;
607
608         dev_dbg(dev, "%s\n", __func__);
609
610         device_lock_assert(dax_region->dev);
611
612         dev_dax->ranges[mapping->range_id].mapping = NULL;
613         mapping->range_id = -1;
614
615         device_del(dev);
616         put_device(dev);
617 }
618
619 static struct dev_dax_range *get_dax_range(struct device *dev)
620 {
621         struct dax_mapping *mapping = to_dax_mapping(dev);
622         struct dev_dax *dev_dax = to_dev_dax(dev->parent);
623         struct dax_region *dax_region = dev_dax->region;
624
625         device_lock(dax_region->dev);
626         if (mapping->range_id < 0) {
627                 device_unlock(dax_region->dev);
628                 return NULL;
629         }
630
631         return &dev_dax->ranges[mapping->range_id];
632 }
633
634 static void put_dax_range(struct dev_dax_range *dax_range)
635 {
636         struct dax_mapping *mapping = dax_range->mapping;
637         struct dev_dax *dev_dax = to_dev_dax(mapping->dev.parent);
638         struct dax_region *dax_region = dev_dax->region;
639
640         device_unlock(dax_region->dev);
641 }
642
643 static ssize_t start_show(struct device *dev,
644                 struct device_attribute *attr, char *buf)
645 {
646         struct dev_dax_range *dax_range;
647         ssize_t rc;
648
649         dax_range = get_dax_range(dev);
650         if (!dax_range)
651                 return -ENXIO;
652         rc = sprintf(buf, "%#llx\n", dax_range->range.start);
653         put_dax_range(dax_range);
654
655         return rc;
656 }
657 static DEVICE_ATTR(start, 0400, start_show, NULL);
658
659 static ssize_t end_show(struct device *dev,
660                 struct device_attribute *attr, char *buf)
661 {
662         struct dev_dax_range *dax_range;
663         ssize_t rc;
664
665         dax_range = get_dax_range(dev);
666         if (!dax_range)
667                 return -ENXIO;
668         rc = sprintf(buf, "%#llx\n", dax_range->range.end);
669         put_dax_range(dax_range);
670
671         return rc;
672 }
673 static DEVICE_ATTR(end, 0400, end_show, NULL);
674
675 static ssize_t pgoff_show(struct device *dev,
676                 struct device_attribute *attr, char *buf)
677 {
678         struct dev_dax_range *dax_range;
679         ssize_t rc;
680
681         dax_range = get_dax_range(dev);
682         if (!dax_range)
683                 return -ENXIO;
684         rc = sprintf(buf, "%#lx\n", dax_range->pgoff);
685         put_dax_range(dax_range);
686
687         return rc;
688 }
689 static DEVICE_ATTR(page_offset, 0400, pgoff_show, NULL);
690
691 static struct attribute *dax_mapping_attributes[] = {
692         &dev_attr_start.attr,
693         &dev_attr_end.attr,
694         &dev_attr_page_offset.attr,
695         NULL,
696 };
697
698 static const struct attribute_group dax_mapping_attribute_group = {
699         .attrs = dax_mapping_attributes,
700 };
701
702 static const struct attribute_group *dax_mapping_attribute_groups[] = {
703         &dax_mapping_attribute_group,
704         NULL,
705 };
706
707 static struct device_type dax_mapping_type = {
708         .release = dax_mapping_release,
709         .groups = dax_mapping_attribute_groups,
710 };
711
712 static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id)
713 {
714         struct dax_region *dax_region = dev_dax->region;
715         struct dax_mapping *mapping;
716         struct device *dev;
717         int rc;
718
719         device_lock_assert(dax_region->dev);
720
721         if (dev_WARN_ONCE(&dev_dax->dev, !dax_region->dev->driver,
722                                 "region disabled\n"))
723                 return -ENXIO;
724
725         mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
726         if (!mapping)
727                 return -ENOMEM;
728         mapping->range_id = range_id;
729         mapping->id = ida_alloc(&dev_dax->ida, GFP_KERNEL);
730         if (mapping->id < 0) {
731                 kfree(mapping);
732                 return -ENOMEM;
733         }
734         dev_dax->ranges[range_id].mapping = mapping;
735         dev = &mapping->dev;
736         device_initialize(dev);
737         dev->parent = &dev_dax->dev;
738         dev->type = &dax_mapping_type;
739         dev_set_name(dev, "mapping%d", mapping->id);
740         rc = device_add(dev);
741         if (rc) {
742                 put_device(dev);
743                 return rc;
744         }
745
746         rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_mapping,
747                         dev);
748         if (rc)
749                 return rc;
750         return 0;
751 }
752
753 static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start,
754                 resource_size_t size)
755 {
756         struct dax_region *dax_region = dev_dax->region;
757         struct resource *res = &dax_region->res;
758         struct device *dev = &dev_dax->dev;
759         struct dev_dax_range *ranges;
760         unsigned long pgoff = 0;
761         struct resource *alloc;
762         int i, rc;
763
764         device_lock_assert(dax_region->dev);
765
766         /* handle the seed alloc special case */
767         if (!size) {
768                 if (dev_WARN_ONCE(dev, dev_dax->nr_range,
769                                         "0-size allocation must be first\n"))
770                         return -EBUSY;
771                 /* nr_range == 0 is elsewhere special cased as 0-size device */
772                 return 0;
773         }
774
775         ranges = krealloc(dev_dax->ranges, sizeof(*ranges)
776                         * (dev_dax->nr_range + 1), GFP_KERNEL);
777         if (!ranges)
778                 return -ENOMEM;
779
780         alloc = __request_region(res, start, size, dev_name(dev), 0);
781         if (!alloc) {
782                 /*
783                  * If this was an empty set of ranges nothing else
784                  * will release @ranges, so do it now.
785                  */
786                 if (!dev_dax->nr_range) {
787                         kfree(ranges);
788                         ranges = NULL;
789                 }
790                 dev_dax->ranges = ranges;
791                 return -ENOMEM;
792         }
793
794         for (i = 0; i < dev_dax->nr_range; i++)
795                 pgoff += PHYS_PFN(range_len(&ranges[i].range));
796         dev_dax->ranges = ranges;
797         ranges[dev_dax->nr_range++] = (struct dev_dax_range) {
798                 .pgoff = pgoff,
799                 .range = {
800                         .start = alloc->start,
801                         .end = alloc->end,
802                 },
803         };
804
805         dev_dbg(dev, "alloc range[%d]: %pa:%pa\n", dev_dax->nr_range - 1,
806                         &alloc->start, &alloc->end);
807         /*
808          * A dev_dax instance must be registered before mapping device
809          * children can be added. Defer to devm_create_dev_dax() to add
810          * the initial mapping device.
811          */
812         if (!device_is_registered(&dev_dax->dev))
813                 return 0;
814
815         rc = devm_register_dax_mapping(dev_dax, dev_dax->nr_range - 1);
816         if (rc)
817                 trim_dev_dax_range(dev_dax);
818
819         return rc;
820 }
821
822 static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size)
823 {
824         int last_range = dev_dax->nr_range - 1;
825         struct dev_dax_range *dax_range = &dev_dax->ranges[last_range];
826         struct dax_region *dax_region = dev_dax->region;
827         bool is_shrink = resource_size(res) > size;
828         struct range *range = &dax_range->range;
829         struct device *dev = &dev_dax->dev;
830         int rc;
831
832         device_lock_assert(dax_region->dev);
833
834         if (dev_WARN_ONCE(dev, !size, "deletion is handled by dev_dax_shrink\n"))
835                 return -EINVAL;
836
837         rc = adjust_resource(res, range->start, size);
838         if (rc)
839                 return rc;
840
841         *range = (struct range) {
842                 .start = range->start,
843                 .end = range->start + size - 1,
844         };
845
846         dev_dbg(dev, "%s range[%d]: %#llx:%#llx\n", is_shrink ? "shrink" : "extend",
847                         last_range, (unsigned long long) range->start,
848                         (unsigned long long) range->end);
849
850         return 0;
851 }
852
853 static ssize_t size_show(struct device *dev,
854                 struct device_attribute *attr, char *buf)
855 {
856         struct dev_dax *dev_dax = to_dev_dax(dev);
857         unsigned long long size;
858
859         device_lock(dev);
860         size = dev_dax_size(dev_dax);
861         device_unlock(dev);
862
863         return sprintf(buf, "%llu\n", size);
864 }
865
866 static bool alloc_is_aligned(struct dev_dax *dev_dax, resource_size_t size)
867 {
868         /*
869          * The minimum mapping granularity for a device instance is a
870          * single subsection, unless the arch says otherwise.
871          */
872         return IS_ALIGNED(size, max_t(unsigned long, dev_dax->align, memremap_compat_align()));
873 }
874
875 static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size)
876 {
877         resource_size_t to_shrink = dev_dax_size(dev_dax) - size;
878         struct dax_region *dax_region = dev_dax->region;
879         struct device *dev = &dev_dax->dev;
880         int i;
881
882         for (i = dev_dax->nr_range - 1; i >= 0; i--) {
883                 struct range *range = &dev_dax->ranges[i].range;
884                 struct dax_mapping *mapping = dev_dax->ranges[i].mapping;
885                 struct resource *adjust = NULL, *res;
886                 resource_size_t shrink;
887
888                 shrink = min_t(u64, to_shrink, range_len(range));
889                 if (shrink >= range_len(range)) {
890                         devm_release_action(dax_region->dev,
891                                         unregister_dax_mapping, &mapping->dev);
892                         trim_dev_dax_range(dev_dax);
893                         to_shrink -= shrink;
894                         if (!to_shrink)
895                                 break;
896                         continue;
897                 }
898
899                 for_each_dax_region_resource(dax_region, res)
900                         if (strcmp(res->name, dev_name(dev)) == 0
901                                         && res->start == range->start) {
902                                 adjust = res;
903                                 break;
904                         }
905
906                 if (dev_WARN_ONCE(dev, !adjust || i != dev_dax->nr_range - 1,
907                                         "failed to find matching resource\n"))
908                         return -ENXIO;
909                 return adjust_dev_dax_range(dev_dax, adjust, range_len(range)
910                                 - shrink);
911         }
912         return 0;
913 }
914
915 /*
916  * Only allow adjustments that preserve the relative pgoff of existing
917  * allocations. I.e. the dev_dax->ranges array is ordered by increasing pgoff.
918  */
919 static bool adjust_ok(struct dev_dax *dev_dax, struct resource *res)
920 {
921         struct dev_dax_range *last;
922         int i;
923
924         if (dev_dax->nr_range == 0)
925                 return false;
926         if (strcmp(res->name, dev_name(&dev_dax->dev)) != 0)
927                 return false;
928         last = &dev_dax->ranges[dev_dax->nr_range - 1];
929         if (last->range.start != res->start || last->range.end != res->end)
930                 return false;
931         for (i = 0; i < dev_dax->nr_range - 1; i++) {
932                 struct dev_dax_range *dax_range = &dev_dax->ranges[i];
933
934                 if (dax_range->pgoff > last->pgoff)
935                         return false;
936         }
937
938         return true;
939 }
940
941 static ssize_t dev_dax_resize(struct dax_region *dax_region,
942                 struct dev_dax *dev_dax, resource_size_t size)
943 {
944         resource_size_t avail = dax_region_avail_size(dax_region), to_alloc;
945         resource_size_t dev_size = dev_dax_size(dev_dax);
946         struct resource *region_res = &dax_region->res;
947         struct device *dev = &dev_dax->dev;
948         struct resource *res, *first;
949         resource_size_t alloc = 0;
950         int rc;
951
952         if (dev->driver)
953                 return -EBUSY;
954         if (size == dev_size)
955                 return 0;
956         if (size > dev_size && size - dev_size > avail)
957                 return -ENOSPC;
958         if (size < dev_size)
959                 return dev_dax_shrink(dev_dax, size);
960
961         to_alloc = size - dev_size;
962         if (dev_WARN_ONCE(dev, !alloc_is_aligned(dev_dax, to_alloc),
963                         "resize of %pa misaligned\n", &to_alloc))
964                 return -ENXIO;
965
966         /*
967          * Expand the device into the unused portion of the region. This
968          * may involve adjusting the end of an existing resource, or
969          * allocating a new resource.
970          */
971 retry:
972         first = region_res->child;
973         if (!first)
974                 return alloc_dev_dax_range(dev_dax, dax_region->res.start, to_alloc);
975
976         rc = -ENOSPC;
977         for (res = first; res; res = res->sibling) {
978                 struct resource *next = res->sibling;
979
980                 /* space at the beginning of the region */
981                 if (res == first && res->start > dax_region->res.start) {
982                         alloc = min(res->start - dax_region->res.start, to_alloc);
983                         rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, alloc);
984                         break;
985                 }
986
987                 alloc = 0;
988                 /* space between allocations */
989                 if (next && next->start > res->end + 1)
990                         alloc = min(next->start - (res->end + 1), to_alloc);
991
992                 /* space at the end of the region */
993                 if (!alloc && !next && res->end < region_res->end)
994                         alloc = min(region_res->end - res->end, to_alloc);
995
996                 if (!alloc)
997                         continue;
998
999                 if (adjust_ok(dev_dax, res)) {
1000                         rc = adjust_dev_dax_range(dev_dax, res, resource_size(res) + alloc);
1001                         break;
1002                 }
1003                 rc = alloc_dev_dax_range(dev_dax, res->end + 1, alloc);
1004                 break;
1005         }
1006         if (rc)
1007                 return rc;
1008         to_alloc -= alloc;
1009         if (to_alloc)
1010                 goto retry;
1011         return 0;
1012 }
1013
1014 static ssize_t size_store(struct device *dev, struct device_attribute *attr,
1015                 const char *buf, size_t len)
1016 {
1017         ssize_t rc;
1018         unsigned long long val;
1019         struct dev_dax *dev_dax = to_dev_dax(dev);
1020         struct dax_region *dax_region = dev_dax->region;
1021
1022         rc = kstrtoull(buf, 0, &val);
1023         if (rc)
1024                 return rc;
1025
1026         if (!alloc_is_aligned(dev_dax, val)) {
1027                 dev_dbg(dev, "%s: size: %lld misaligned\n", __func__, val);
1028                 return -EINVAL;
1029         }
1030
1031         device_lock(dax_region->dev);
1032         if (!dax_region->dev->driver) {
1033                 device_unlock(dax_region->dev);
1034                 return -ENXIO;
1035         }
1036         device_lock(dev);
1037         rc = dev_dax_resize(dax_region, dev_dax, val);
1038         device_unlock(dev);
1039         device_unlock(dax_region->dev);
1040
1041         return rc == 0 ? len : rc;
1042 }
1043 static DEVICE_ATTR_RW(size);
1044
1045 static ssize_t range_parse(const char *opt, size_t len, struct range *range)
1046 {
1047         unsigned long long addr = 0;
1048         char *start, *end, *str;
1049         ssize_t rc = EINVAL;
1050
1051         str = kstrdup(opt, GFP_KERNEL);
1052         if (!str)
1053                 return rc;
1054
1055         end = str;
1056         start = strsep(&end, "-");
1057         if (!start || !end)
1058                 goto err;
1059
1060         rc = kstrtoull(start, 16, &addr);
1061         if (rc)
1062                 goto err;
1063         range->start = addr;
1064
1065         rc = kstrtoull(end, 16, &addr);
1066         if (rc)
1067                 goto err;
1068         range->end = addr;
1069
1070 err:
1071         kfree(str);
1072         return rc;
1073 }
1074
1075 static ssize_t mapping_store(struct device *dev, struct device_attribute *attr,
1076                 const char *buf, size_t len)
1077 {
1078         struct dev_dax *dev_dax = to_dev_dax(dev);
1079         struct dax_region *dax_region = dev_dax->region;
1080         size_t to_alloc;
1081         struct range r;
1082         ssize_t rc;
1083
1084         rc = range_parse(buf, len, &r);
1085         if (rc)
1086                 return rc;
1087
1088         rc = -ENXIO;
1089         device_lock(dax_region->dev);
1090         if (!dax_region->dev->driver) {
1091                 device_unlock(dax_region->dev);
1092                 return rc;
1093         }
1094         device_lock(dev);
1095
1096         to_alloc = range_len(&r);
1097         if (alloc_is_aligned(dev_dax, to_alloc))
1098                 rc = alloc_dev_dax_range(dev_dax, r.start, to_alloc);
1099         device_unlock(dev);
1100         device_unlock(dax_region->dev);
1101
1102         return rc == 0 ? len : rc;
1103 }
1104 static DEVICE_ATTR_WO(mapping);
1105
1106 static ssize_t align_show(struct device *dev,
1107                 struct device_attribute *attr, char *buf)
1108 {
1109         struct dev_dax *dev_dax = to_dev_dax(dev);
1110
1111         return sprintf(buf, "%d\n", dev_dax->align);
1112 }
1113
1114 static ssize_t dev_dax_validate_align(struct dev_dax *dev_dax)
1115 {
1116         struct device *dev = &dev_dax->dev;
1117         int i;
1118
1119         for (i = 0; i < dev_dax->nr_range; i++) {
1120                 size_t len = range_len(&dev_dax->ranges[i].range);
1121
1122                 if (!alloc_is_aligned(dev_dax, len)) {
1123                         dev_dbg(dev, "%s: align %u invalid for range %d\n",
1124                                 __func__, dev_dax->align, i);
1125                         return -EINVAL;
1126                 }
1127         }
1128
1129         return 0;
1130 }
1131
1132 static ssize_t align_store(struct device *dev, struct device_attribute *attr,
1133                 const char *buf, size_t len)
1134 {
1135         struct dev_dax *dev_dax = to_dev_dax(dev);
1136         struct dax_region *dax_region = dev_dax->region;
1137         unsigned long val, align_save;
1138         ssize_t rc;
1139
1140         rc = kstrtoul(buf, 0, &val);
1141         if (rc)
1142                 return -ENXIO;
1143
1144         if (!dax_align_valid(val))
1145                 return -EINVAL;
1146
1147         device_lock(dax_region->dev);
1148         if (!dax_region->dev->driver) {
1149                 device_unlock(dax_region->dev);
1150                 return -ENXIO;
1151         }
1152
1153         device_lock(dev);
1154         if (dev->driver) {
1155                 rc = -EBUSY;
1156                 goto out_unlock;
1157         }
1158
1159         align_save = dev_dax->align;
1160         dev_dax->align = val;
1161         rc = dev_dax_validate_align(dev_dax);
1162         if (rc)
1163                 dev_dax->align = align_save;
1164 out_unlock:
1165         device_unlock(dev);
1166         device_unlock(dax_region->dev);
1167         return rc == 0 ? len : rc;
1168 }
1169 static DEVICE_ATTR_RW(align);
1170
1171 static int dev_dax_target_node(struct dev_dax *dev_dax)
1172 {
1173         struct dax_region *dax_region = dev_dax->region;
1174
1175         return dax_region->target_node;
1176 }
1177
1178 static ssize_t target_node_show(struct device *dev,
1179                 struct device_attribute *attr, char *buf)
1180 {
1181         struct dev_dax *dev_dax = to_dev_dax(dev);
1182
1183         return sprintf(buf, "%d\n", dev_dax_target_node(dev_dax));
1184 }
1185 static DEVICE_ATTR_RO(target_node);
1186
1187 static ssize_t resource_show(struct device *dev,
1188                 struct device_attribute *attr, char *buf)
1189 {
1190         struct dev_dax *dev_dax = to_dev_dax(dev);
1191         struct dax_region *dax_region = dev_dax->region;
1192         unsigned long long start;
1193
1194         if (dev_dax->nr_range < 1)
1195                 start = dax_region->res.start;
1196         else
1197                 start = dev_dax->ranges[0].range.start;
1198
1199         return sprintf(buf, "%#llx\n", start);
1200 }
1201 static DEVICE_ATTR(resource, 0400, resource_show, NULL);
1202
1203 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
1204                 char *buf)
1205 {
1206         /*
1207          * We only ever expect to handle device-dax instances, i.e. the
1208          * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
1209          */
1210         return sprintf(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0);
1211 }
1212 static DEVICE_ATTR_RO(modalias);
1213
1214 static ssize_t numa_node_show(struct device *dev,
1215                 struct device_attribute *attr, char *buf)
1216 {
1217         return sprintf(buf, "%d\n", dev_to_node(dev));
1218 }
1219 static DEVICE_ATTR_RO(numa_node);
1220
1221 static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
1222 {
1223         struct device *dev = container_of(kobj, struct device, kobj);
1224         struct dev_dax *dev_dax = to_dev_dax(dev);
1225         struct dax_region *dax_region = dev_dax->region;
1226
1227         if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0)
1228                 return 0;
1229         if (a == &dev_attr_numa_node.attr && !IS_ENABLED(CONFIG_NUMA))
1230                 return 0;
1231         if (a == &dev_attr_mapping.attr && is_static(dax_region))
1232                 return 0;
1233         if ((a == &dev_attr_align.attr ||
1234              a == &dev_attr_size.attr) && is_static(dax_region))
1235                 return 0444;
1236         return a->mode;
1237 }
1238
1239 static struct attribute *dev_dax_attributes[] = {
1240         &dev_attr_modalias.attr,
1241         &dev_attr_size.attr,
1242         &dev_attr_mapping.attr,
1243         &dev_attr_target_node.attr,
1244         &dev_attr_align.attr,
1245         &dev_attr_resource.attr,
1246         &dev_attr_numa_node.attr,
1247         NULL,
1248 };
1249
1250 static const struct attribute_group dev_dax_attribute_group = {
1251         .attrs = dev_dax_attributes,
1252         .is_visible = dev_dax_visible,
1253 };
1254
1255 static const struct attribute_group *dax_attribute_groups[] = {
1256         &dev_dax_attribute_group,
1257         NULL,
1258 };
1259
1260 static void dev_dax_release(struct device *dev)
1261 {
1262         struct dev_dax *dev_dax = to_dev_dax(dev);
1263         struct dax_region *dax_region = dev_dax->region;
1264         struct dax_device *dax_dev = dev_dax->dax_dev;
1265
1266         put_dax(dax_dev);
1267         free_dev_dax_id(dev_dax);
1268         dax_region_put(dax_region);
1269         kfree(dev_dax->pgmap);
1270         kfree(dev_dax);
1271 }
1272
1273 static const struct device_type dev_dax_type = {
1274         .release = dev_dax_release,
1275         .groups = dax_attribute_groups,
1276 };
1277
1278 struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
1279 {
1280         struct dax_region *dax_region = data->dax_region;
1281         struct device *parent = dax_region->dev;
1282         struct dax_device *dax_dev;
1283         struct dev_dax *dev_dax;
1284         struct inode *inode;
1285         struct device *dev;
1286         int rc;
1287
1288         dev_dax = kzalloc(sizeof(*dev_dax), GFP_KERNEL);
1289         if (!dev_dax)
1290                 return ERR_PTR(-ENOMEM);
1291
1292         if (is_static(dax_region)) {
1293                 if (dev_WARN_ONCE(parent, data->id < 0,
1294                                 "dynamic id specified to static region\n")) {
1295                         rc = -EINVAL;
1296                         goto err_id;
1297                 }
1298
1299                 dev_dax->id = data->id;
1300         } else {
1301                 if (dev_WARN_ONCE(parent, data->id >= 0,
1302                                 "static id specified to dynamic region\n")) {
1303                         rc = -EINVAL;
1304                         goto err_id;
1305                 }
1306
1307                 rc = ida_alloc(&dax_region->ida, GFP_KERNEL);
1308                 if (rc < 0)
1309                         goto err_id;
1310                 dev_dax->id = rc;
1311         }
1312
1313         dev_dax->region = dax_region;
1314         dev = &dev_dax->dev;
1315         device_initialize(dev);
1316         dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id);
1317
1318         rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, data->size);
1319         if (rc)
1320                 goto err_range;
1321
1322         if (data->pgmap) {
1323                 dev_WARN_ONCE(parent, !is_static(dax_region),
1324                         "custom dev_pagemap requires a static dax_region\n");
1325
1326                 dev_dax->pgmap = kmemdup(data->pgmap,
1327                                 sizeof(struct dev_pagemap), GFP_KERNEL);
1328                 if (!dev_dax->pgmap) {
1329                         rc = -ENOMEM;
1330                         goto err_pgmap;
1331                 }
1332         }
1333
1334         /*
1335          * No 'host' or dax_operations since there is no access to this
1336          * device outside of mmap of the resulting character device.
1337          */
1338         dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC);
1339         if (IS_ERR(dax_dev)) {
1340                 rc = PTR_ERR(dax_dev);
1341                 goto err_alloc_dax;
1342         }
1343
1344         /* a device_dax instance is dead while the driver is not attached */
1345         kill_dax(dax_dev);
1346
1347         dev_dax->dax_dev = dax_dev;
1348         dev_dax->target_node = dax_region->target_node;
1349         dev_dax->align = dax_region->align;
1350         ida_init(&dev_dax->ida);
1351         kref_get(&dax_region->kref);
1352
1353         inode = dax_inode(dax_dev);
1354         dev->devt = inode->i_rdev;
1355         if (data->subsys == DEV_DAX_BUS)
1356                 dev->bus = &dax_bus_type;
1357         else
1358                 dev->class = dax_class;
1359         dev->parent = parent;
1360         dev->type = &dev_dax_type;
1361
1362         rc = device_add(dev);
1363         if (rc) {
1364                 kill_dev_dax(dev_dax);
1365                 put_device(dev);
1366                 return ERR_PTR(rc);
1367         }
1368
1369         rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev);
1370         if (rc)
1371                 return ERR_PTR(rc);
1372
1373         /* register mapping device for the initial allocation range */
1374         if (dev_dax->nr_range && range_len(&dev_dax->ranges[0].range)) {
1375                 rc = devm_register_dax_mapping(dev_dax, 0);
1376                 if (rc)
1377                         return ERR_PTR(rc);
1378         }
1379
1380         return dev_dax;
1381
1382 err_alloc_dax:
1383         kfree(dev_dax->pgmap);
1384 err_pgmap:
1385         free_dev_dax_ranges(dev_dax);
1386 err_range:
1387         free_dev_dax_id(dev_dax);
1388 err_id:
1389         kfree(dev_dax);
1390
1391         return ERR_PTR(rc);
1392 }
1393 EXPORT_SYMBOL_GPL(devm_create_dev_dax);
1394
1395 static int match_always_count;
1396
1397 int __dax_driver_register(struct dax_device_driver *dax_drv,
1398                 struct module *module, const char *mod_name)
1399 {
1400         struct device_driver *drv = &dax_drv->drv;
1401         int rc = 0;
1402
1403         INIT_LIST_HEAD(&dax_drv->ids);
1404         drv->owner = module;
1405         drv->name = mod_name;
1406         drv->mod_name = mod_name;
1407         drv->bus = &dax_bus_type;
1408
1409         /* there can only be one default driver */
1410         mutex_lock(&dax_bus_lock);
1411         match_always_count += dax_drv->match_always;
1412         if (match_always_count > 1) {
1413                 match_always_count--;
1414                 WARN_ON(1);
1415                 rc = -EINVAL;
1416         }
1417         mutex_unlock(&dax_bus_lock);
1418         if (rc)
1419                 return rc;
1420         return driver_register(drv);
1421 }
1422 EXPORT_SYMBOL_GPL(__dax_driver_register);
1423
1424 void dax_driver_unregister(struct dax_device_driver *dax_drv)
1425 {
1426         struct device_driver *drv = &dax_drv->drv;
1427         struct dax_id *dax_id, *_id;
1428
1429         mutex_lock(&dax_bus_lock);
1430         match_always_count -= dax_drv->match_always;
1431         list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) {
1432                 list_del(&dax_id->list);
1433                 kfree(dax_id);
1434         }
1435         mutex_unlock(&dax_bus_lock);
1436         driver_unregister(drv);
1437 }
1438 EXPORT_SYMBOL_GPL(dax_driver_unregister);
1439
1440 int __init dax_bus_init(void)
1441 {
1442         int rc;
1443
1444         if (IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)) {
1445                 dax_class = class_create(THIS_MODULE, "dax");
1446                 if (IS_ERR(dax_class))
1447                         return PTR_ERR(dax_class);
1448         }
1449
1450         rc = bus_register(&dax_bus_type);
1451         if (rc)
1452                 class_destroy(dax_class);
1453         return rc;
1454 }
1455
1456 void __exit dax_bus_exit(void)
1457 {
1458         bus_unregister(&dax_bus_type);
1459         class_destroy(dax_class);
1460 }