device-dax: Avoid an unnecessary check in alloc_dev_dax_range()
[linux-2.6-microblaze.git] / drivers / dax / bus.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */
3 #include <linux/memremap.h>
4 #include <linux/device.h>
5 #include <linux/mutex.h>
6 #include <linux/list.h>
7 #include <linux/slab.h>
8 #include <linux/dax.h>
9 #include <linux/io.h>
10 #include "dax-private.h"
11 #include "bus.h"
12
13 static struct class *dax_class;
14
15 static DEFINE_MUTEX(dax_bus_lock);
16
17 #define DAX_NAME_LEN 30
18 struct dax_id {
19         struct list_head list;
20         char dev_name[DAX_NAME_LEN];
21 };
22
23 static int dax_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
24 {
25         /*
26          * We only ever expect to handle device-dax instances, i.e. the
27          * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
28          */
29         return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0);
30 }
31
32 static struct dax_device_driver *to_dax_drv(struct device_driver *drv)
33 {
34         return container_of(drv, struct dax_device_driver, drv);
35 }
36
37 static struct dax_id *__dax_match_id(struct dax_device_driver *dax_drv,
38                 const char *dev_name)
39 {
40         struct dax_id *dax_id;
41
42         lockdep_assert_held(&dax_bus_lock);
43
44         list_for_each_entry(dax_id, &dax_drv->ids, list)
45                 if (sysfs_streq(dax_id->dev_name, dev_name))
46                         return dax_id;
47         return NULL;
48 }
49
50 static int dax_match_id(struct dax_device_driver *dax_drv, struct device *dev)
51 {
52         int match;
53
54         mutex_lock(&dax_bus_lock);
55         match = !!__dax_match_id(dax_drv, dev_name(dev));
56         mutex_unlock(&dax_bus_lock);
57
58         return match;
59 }
60
61 enum id_action {
62         ID_REMOVE,
63         ID_ADD,
64 };
65
66 static ssize_t do_id_store(struct device_driver *drv, const char *buf,
67                 size_t count, enum id_action action)
68 {
69         struct dax_device_driver *dax_drv = to_dax_drv(drv);
70         unsigned int region_id, id;
71         char devname[DAX_NAME_LEN];
72         struct dax_id *dax_id;
73         ssize_t rc = count;
74         int fields;
75
76         fields = sscanf(buf, "dax%d.%d", &region_id, &id);
77         if (fields != 2)
78                 return -EINVAL;
79         sprintf(devname, "dax%d.%d", region_id, id);
80         if (!sysfs_streq(buf, devname))
81                 return -EINVAL;
82
83         mutex_lock(&dax_bus_lock);
84         dax_id = __dax_match_id(dax_drv, buf);
85         if (!dax_id) {
86                 if (action == ID_ADD) {
87                         dax_id = kzalloc(sizeof(*dax_id), GFP_KERNEL);
88                         if (dax_id) {
89                                 strncpy(dax_id->dev_name, buf, DAX_NAME_LEN);
90                                 list_add(&dax_id->list, &dax_drv->ids);
91                         } else
92                                 rc = -ENOMEM;
93                 } else
94                         /* nothing to remove */;
95         } else if (action == ID_REMOVE) {
96                 list_del(&dax_id->list);
97                 kfree(dax_id);
98         } else
99                 /* dax_id already added */;
100         mutex_unlock(&dax_bus_lock);
101
102         if (rc < 0)
103                 return rc;
104         if (action == ID_ADD)
105                 rc = driver_attach(drv);
106         if (rc)
107                 return rc;
108         return count;
109 }
110
111 static ssize_t new_id_store(struct device_driver *drv, const char *buf,
112                 size_t count)
113 {
114         return do_id_store(drv, buf, count, ID_ADD);
115 }
116 static DRIVER_ATTR_WO(new_id);
117
118 static ssize_t remove_id_store(struct device_driver *drv, const char *buf,
119                 size_t count)
120 {
121         return do_id_store(drv, buf, count, ID_REMOVE);
122 }
123 static DRIVER_ATTR_WO(remove_id);
124
125 static struct attribute *dax_drv_attrs[] = {
126         &driver_attr_new_id.attr,
127         &driver_attr_remove_id.attr,
128         NULL,
129 };
130 ATTRIBUTE_GROUPS(dax_drv);
131
132 static int dax_bus_match(struct device *dev, struct device_driver *drv);
133
134 static bool is_static(struct dax_region *dax_region)
135 {
136         return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0;
137 }
138
139 static u64 dev_dax_size(struct dev_dax *dev_dax)
140 {
141         u64 size = 0;
142         int i;
143
144         device_lock_assert(&dev_dax->dev);
145
146         for (i = 0; i < dev_dax->nr_range; i++)
147                 size += range_len(&dev_dax->ranges[i].range);
148
149         return size;
150 }
151
152 static int dax_bus_probe(struct device *dev)
153 {
154         struct dax_device_driver *dax_drv = to_dax_drv(dev->driver);
155         struct dev_dax *dev_dax = to_dev_dax(dev);
156         struct dax_region *dax_region = dev_dax->region;
157         int rc;
158
159         if (dev_dax_size(dev_dax) == 0 || dev_dax->id < 0)
160                 return -ENXIO;
161
162         rc = dax_drv->probe(dev_dax);
163
164         if (rc || is_static(dax_region))
165                 return rc;
166
167         /*
168          * Track new seed creation only after successful probe of the
169          * previous seed.
170          */
171         if (dax_region->seed == dev)
172                 dax_region->seed = NULL;
173
174         return 0;
175 }
176
177 static int dax_bus_remove(struct device *dev)
178 {
179         struct dax_device_driver *dax_drv = to_dax_drv(dev->driver);
180         struct dev_dax *dev_dax = to_dev_dax(dev);
181
182         return dax_drv->remove(dev_dax);
183 }
184
185 static struct bus_type dax_bus_type = {
186         .name = "dax",
187         .uevent = dax_bus_uevent,
188         .match = dax_bus_match,
189         .probe = dax_bus_probe,
190         .remove = dax_bus_remove,
191         .drv_groups = dax_drv_groups,
192 };
193
194 static int dax_bus_match(struct device *dev, struct device_driver *drv)
195 {
196         struct dax_device_driver *dax_drv = to_dax_drv(drv);
197
198         /*
199          * All but the 'device-dax' driver, which has 'match_always'
200          * set, requires an exact id match.
201          */
202         if (dax_drv->match_always)
203                 return 1;
204
205         return dax_match_id(dax_drv, dev);
206 }
207
208 /*
209  * Rely on the fact that drvdata is set before the attributes are
210  * registered, and that the attributes are unregistered before drvdata
211  * is cleared to assume that drvdata is always valid.
212  */
213 static ssize_t id_show(struct device *dev,
214                 struct device_attribute *attr, char *buf)
215 {
216         struct dax_region *dax_region = dev_get_drvdata(dev);
217
218         return sprintf(buf, "%d\n", dax_region->id);
219 }
220 static DEVICE_ATTR_RO(id);
221
222 static ssize_t region_size_show(struct device *dev,
223                 struct device_attribute *attr, char *buf)
224 {
225         struct dax_region *dax_region = dev_get_drvdata(dev);
226
227         return sprintf(buf, "%llu\n", (unsigned long long)
228                         resource_size(&dax_region->res));
229 }
230 static struct device_attribute dev_attr_region_size = __ATTR(size, 0444,
231                 region_size_show, NULL);
232
233 static ssize_t region_align_show(struct device *dev,
234                 struct device_attribute *attr, char *buf)
235 {
236         struct dax_region *dax_region = dev_get_drvdata(dev);
237
238         return sprintf(buf, "%u\n", dax_region->align);
239 }
240 static struct device_attribute dev_attr_region_align =
241                 __ATTR(align, 0400, region_align_show, NULL);
242
243 #define for_each_dax_region_resource(dax_region, res) \
244         for (res = (dax_region)->res.child; res; res = res->sibling)
245
246 static unsigned long long dax_region_avail_size(struct dax_region *dax_region)
247 {
248         resource_size_t size = resource_size(&dax_region->res);
249         struct resource *res;
250
251         device_lock_assert(dax_region->dev);
252
253         for_each_dax_region_resource(dax_region, res)
254                 size -= resource_size(res);
255         return size;
256 }
257
258 static ssize_t available_size_show(struct device *dev,
259                 struct device_attribute *attr, char *buf)
260 {
261         struct dax_region *dax_region = dev_get_drvdata(dev);
262         unsigned long long size;
263
264         device_lock(dev);
265         size = dax_region_avail_size(dax_region);
266         device_unlock(dev);
267
268         return sprintf(buf, "%llu\n", size);
269 }
270 static DEVICE_ATTR_RO(available_size);
271
272 static ssize_t seed_show(struct device *dev,
273                 struct device_attribute *attr, char *buf)
274 {
275         struct dax_region *dax_region = dev_get_drvdata(dev);
276         struct device *seed;
277         ssize_t rc;
278
279         if (is_static(dax_region))
280                 return -EINVAL;
281
282         device_lock(dev);
283         seed = dax_region->seed;
284         rc = sprintf(buf, "%s\n", seed ? dev_name(seed) : "");
285         device_unlock(dev);
286
287         return rc;
288 }
289 static DEVICE_ATTR_RO(seed);
290
291 static ssize_t create_show(struct device *dev,
292                 struct device_attribute *attr, char *buf)
293 {
294         struct dax_region *dax_region = dev_get_drvdata(dev);
295         struct device *youngest;
296         ssize_t rc;
297
298         if (is_static(dax_region))
299                 return -EINVAL;
300
301         device_lock(dev);
302         youngest = dax_region->youngest;
303         rc = sprintf(buf, "%s\n", youngest ? dev_name(youngest) : "");
304         device_unlock(dev);
305
306         return rc;
307 }
308
309 static ssize_t create_store(struct device *dev, struct device_attribute *attr,
310                 const char *buf, size_t len)
311 {
312         struct dax_region *dax_region = dev_get_drvdata(dev);
313         unsigned long long avail;
314         ssize_t rc;
315         int val;
316
317         if (is_static(dax_region))
318                 return -EINVAL;
319
320         rc = kstrtoint(buf, 0, &val);
321         if (rc)
322                 return rc;
323         if (val != 1)
324                 return -EINVAL;
325
326         device_lock(dev);
327         avail = dax_region_avail_size(dax_region);
328         if (avail == 0)
329                 rc = -ENOSPC;
330         else {
331                 struct dev_dax_data data = {
332                         .dax_region = dax_region,
333                         .size = 0,
334                         .id = -1,
335                 };
336                 struct dev_dax *dev_dax = devm_create_dev_dax(&data);
337
338                 if (IS_ERR(dev_dax))
339                         rc = PTR_ERR(dev_dax);
340                 else {
341                         /*
342                          * In support of crafting multiple new devices
343                          * simultaneously multiple seeds can be created,
344                          * but only the first one that has not been
345                          * successfully bound is tracked as the region
346                          * seed.
347                          */
348                         if (!dax_region->seed)
349                                 dax_region->seed = &dev_dax->dev;
350                         dax_region->youngest = &dev_dax->dev;
351                         rc = len;
352                 }
353         }
354         device_unlock(dev);
355
356         return rc;
357 }
358 static DEVICE_ATTR_RW(create);
359
360 void kill_dev_dax(struct dev_dax *dev_dax)
361 {
362         struct dax_device *dax_dev = dev_dax->dax_dev;
363         struct inode *inode = dax_inode(dax_dev);
364
365         kill_dax(dax_dev);
366         unmap_mapping_range(inode->i_mapping, 0, 0, 1);
367 }
368 EXPORT_SYMBOL_GPL(kill_dev_dax);
369
370 static void trim_dev_dax_range(struct dev_dax *dev_dax)
371 {
372         int i = dev_dax->nr_range - 1;
373         struct range *range = &dev_dax->ranges[i].range;
374         struct dax_region *dax_region = dev_dax->region;
375
376         device_lock_assert(dax_region->dev);
377         dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i,
378                 (unsigned long long)range->start,
379                 (unsigned long long)range->end);
380
381         __release_region(&dax_region->res, range->start, range_len(range));
382         if (--dev_dax->nr_range == 0) {
383                 kfree(dev_dax->ranges);
384                 dev_dax->ranges = NULL;
385         }
386 }
387
388 static void free_dev_dax_ranges(struct dev_dax *dev_dax)
389 {
390         while (dev_dax->nr_range)
391                 trim_dev_dax_range(dev_dax);
392 }
393
394 static void unregister_dev_dax(void *dev)
395 {
396         struct dev_dax *dev_dax = to_dev_dax(dev);
397
398         dev_dbg(dev, "%s\n", __func__);
399
400         kill_dev_dax(dev_dax);
401         free_dev_dax_ranges(dev_dax);
402         device_del(dev);
403         put_device(dev);
404 }
405
406 /* a return value >= 0 indicates this invocation invalidated the id */
407 static int __free_dev_dax_id(struct dev_dax *dev_dax)
408 {
409         struct dax_region *dax_region = dev_dax->region;
410         struct device *dev = &dev_dax->dev;
411         int rc = dev_dax->id;
412
413         device_lock_assert(dev);
414
415         if (is_static(dax_region) || dev_dax->id < 0)
416                 return -1;
417         ida_free(&dax_region->ida, dev_dax->id);
418         dev_dax->id = -1;
419         return rc;
420 }
421
422 static int free_dev_dax_id(struct dev_dax *dev_dax)
423 {
424         struct device *dev = &dev_dax->dev;
425         int rc;
426
427         device_lock(dev);
428         rc = __free_dev_dax_id(dev_dax);
429         device_unlock(dev);
430         return rc;
431 }
432
433 static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
434                 const char *buf, size_t len)
435 {
436         struct dax_region *dax_region = dev_get_drvdata(dev);
437         struct dev_dax *dev_dax;
438         struct device *victim;
439         bool do_del = false;
440         int rc;
441
442         if (is_static(dax_region))
443                 return -EINVAL;
444
445         victim = device_find_child_by_name(dax_region->dev, buf);
446         if (!victim)
447                 return -ENXIO;
448
449         device_lock(dev);
450         device_lock(victim);
451         dev_dax = to_dev_dax(victim);
452         if (victim->driver || dev_dax_size(dev_dax))
453                 rc = -EBUSY;
454         else {
455                 /*
456                  * Invalidate the device so it does not become active
457                  * again, but always preserve device-id-0 so that
458                  * /sys/bus/dax/ is guaranteed to be populated while any
459                  * dax_region is registered.
460                  */
461                 if (dev_dax->id > 0) {
462                         do_del = __free_dev_dax_id(dev_dax) >= 0;
463                         rc = len;
464                         if (dax_region->seed == victim)
465                                 dax_region->seed = NULL;
466                         if (dax_region->youngest == victim)
467                                 dax_region->youngest = NULL;
468                 } else
469                         rc = -EBUSY;
470         }
471         device_unlock(victim);
472
473         /* won the race to invalidate the device, clean it up */
474         if (do_del)
475                 devm_release_action(dev, unregister_dev_dax, victim);
476         device_unlock(dev);
477         put_device(victim);
478
479         return rc;
480 }
481 static DEVICE_ATTR_WO(delete);
482
483 static umode_t dax_region_visible(struct kobject *kobj, struct attribute *a,
484                 int n)
485 {
486         struct device *dev = container_of(kobj, struct device, kobj);
487         struct dax_region *dax_region = dev_get_drvdata(dev);
488
489         if (is_static(dax_region))
490                 if (a == &dev_attr_available_size.attr
491                                 || a == &dev_attr_create.attr
492                                 || a == &dev_attr_seed.attr
493                                 || a == &dev_attr_delete.attr)
494                         return 0;
495         return a->mode;
496 }
497
498 static struct attribute *dax_region_attributes[] = {
499         &dev_attr_available_size.attr,
500         &dev_attr_region_size.attr,
501         &dev_attr_region_align.attr,
502         &dev_attr_create.attr,
503         &dev_attr_seed.attr,
504         &dev_attr_delete.attr,
505         &dev_attr_id.attr,
506         NULL,
507 };
508
509 static const struct attribute_group dax_region_attribute_group = {
510         .name = "dax_region",
511         .attrs = dax_region_attributes,
512         .is_visible = dax_region_visible,
513 };
514
515 static const struct attribute_group *dax_region_attribute_groups[] = {
516         &dax_region_attribute_group,
517         NULL,
518 };
519
520 static void dax_region_free(struct kref *kref)
521 {
522         struct dax_region *dax_region;
523
524         dax_region = container_of(kref, struct dax_region, kref);
525         kfree(dax_region);
526 }
527
528 void dax_region_put(struct dax_region *dax_region)
529 {
530         kref_put(&dax_region->kref, dax_region_free);
531 }
532 EXPORT_SYMBOL_GPL(dax_region_put);
533
534 static void dax_region_unregister(void *region)
535 {
536         struct dax_region *dax_region = region;
537
538         sysfs_remove_groups(&dax_region->dev->kobj,
539                         dax_region_attribute_groups);
540         dax_region_put(dax_region);
541 }
542
543 struct dax_region *alloc_dax_region(struct device *parent, int region_id,
544                 struct range *range, int target_node, unsigned int align,
545                 unsigned long flags)
546 {
547         struct dax_region *dax_region;
548
549         /*
550          * The DAX core assumes that it can store its private data in
551          * parent->driver_data. This WARN is a reminder / safeguard for
552          * developers of device-dax drivers.
553          */
554         if (dev_get_drvdata(parent)) {
555                 dev_WARN(parent, "dax core failed to setup private data\n");
556                 return NULL;
557         }
558
559         if (!IS_ALIGNED(range->start, align)
560                         || !IS_ALIGNED(range_len(range), align))
561                 return NULL;
562
563         dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
564         if (!dax_region)
565                 return NULL;
566
567         dev_set_drvdata(parent, dax_region);
568         kref_init(&dax_region->kref);
569         dax_region->id = region_id;
570         dax_region->align = align;
571         dax_region->dev = parent;
572         dax_region->target_node = target_node;
573         ida_init(&dax_region->ida);
574         dax_region->res = (struct resource) {
575                 .start = range->start,
576                 .end = range->end,
577                 .flags = IORESOURCE_MEM | flags,
578         };
579
580         if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
581                 kfree(dax_region);
582                 return NULL;
583         }
584
585         kref_get(&dax_region->kref);
586         if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region))
587                 return NULL;
588         return dax_region;
589 }
590 EXPORT_SYMBOL_GPL(alloc_dax_region);
591
592 static void dax_mapping_release(struct device *dev)
593 {
594         struct dax_mapping *mapping = to_dax_mapping(dev);
595         struct dev_dax *dev_dax = to_dev_dax(dev->parent);
596
597         ida_free(&dev_dax->ida, mapping->id);
598         kfree(mapping);
599 }
600
601 static void unregister_dax_mapping(void *data)
602 {
603         struct device *dev = data;
604         struct dax_mapping *mapping = to_dax_mapping(dev);
605         struct dev_dax *dev_dax = to_dev_dax(dev->parent);
606         struct dax_region *dax_region = dev_dax->region;
607
608         dev_dbg(dev, "%s\n", __func__);
609
610         device_lock_assert(dax_region->dev);
611
612         dev_dax->ranges[mapping->range_id].mapping = NULL;
613         mapping->range_id = -1;
614
615         device_del(dev);
616         put_device(dev);
617 }
618
619 static struct dev_dax_range *get_dax_range(struct device *dev)
620 {
621         struct dax_mapping *mapping = to_dax_mapping(dev);
622         struct dev_dax *dev_dax = to_dev_dax(dev->parent);
623         struct dax_region *dax_region = dev_dax->region;
624
625         device_lock(dax_region->dev);
626         if (mapping->range_id < 0) {
627                 device_unlock(dax_region->dev);
628                 return NULL;
629         }
630
631         return &dev_dax->ranges[mapping->range_id];
632 }
633
634 static void put_dax_range(struct dev_dax_range *dax_range)
635 {
636         struct dax_mapping *mapping = dax_range->mapping;
637         struct dev_dax *dev_dax = to_dev_dax(mapping->dev.parent);
638         struct dax_region *dax_region = dev_dax->region;
639
640         device_unlock(dax_region->dev);
641 }
642
643 static ssize_t start_show(struct device *dev,
644                 struct device_attribute *attr, char *buf)
645 {
646         struct dev_dax_range *dax_range;
647         ssize_t rc;
648
649         dax_range = get_dax_range(dev);
650         if (!dax_range)
651                 return -ENXIO;
652         rc = sprintf(buf, "%#llx\n", dax_range->range.start);
653         put_dax_range(dax_range);
654
655         return rc;
656 }
657 static DEVICE_ATTR(start, 0400, start_show, NULL);
658
659 static ssize_t end_show(struct device *dev,
660                 struct device_attribute *attr, char *buf)
661 {
662         struct dev_dax_range *dax_range;
663         ssize_t rc;
664
665         dax_range = get_dax_range(dev);
666         if (!dax_range)
667                 return -ENXIO;
668         rc = sprintf(buf, "%#llx\n", dax_range->range.end);
669         put_dax_range(dax_range);
670
671         return rc;
672 }
673 static DEVICE_ATTR(end, 0400, end_show, NULL);
674
675 static ssize_t pgoff_show(struct device *dev,
676                 struct device_attribute *attr, char *buf)
677 {
678         struct dev_dax_range *dax_range;
679         ssize_t rc;
680
681         dax_range = get_dax_range(dev);
682         if (!dax_range)
683                 return -ENXIO;
684         rc = sprintf(buf, "%#lx\n", dax_range->pgoff);
685         put_dax_range(dax_range);
686
687         return rc;
688 }
689 static DEVICE_ATTR(page_offset, 0400, pgoff_show, NULL);
690
691 static struct attribute *dax_mapping_attributes[] = {
692         &dev_attr_start.attr,
693         &dev_attr_end.attr,
694         &dev_attr_page_offset.attr,
695         NULL,
696 };
697
698 static const struct attribute_group dax_mapping_attribute_group = {
699         .attrs = dax_mapping_attributes,
700 };
701
702 static const struct attribute_group *dax_mapping_attribute_groups[] = {
703         &dax_mapping_attribute_group,
704         NULL,
705 };
706
707 static struct device_type dax_mapping_type = {
708         .release = dax_mapping_release,
709         .groups = dax_mapping_attribute_groups,
710 };
711
712 static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id)
713 {
714         struct dax_region *dax_region = dev_dax->region;
715         struct dax_mapping *mapping;
716         struct device *dev;
717         int rc;
718
719         device_lock_assert(dax_region->dev);
720
721         if (dev_WARN_ONCE(&dev_dax->dev, !dax_region->dev->driver,
722                                 "region disabled\n"))
723                 return -ENXIO;
724
725         mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
726         if (!mapping)
727                 return -ENOMEM;
728         mapping->range_id = range_id;
729         mapping->id = ida_alloc(&dev_dax->ida, GFP_KERNEL);
730         if (mapping->id < 0) {
731                 kfree(mapping);
732                 return -ENOMEM;
733         }
734         dev_dax->ranges[range_id].mapping = mapping;
735         dev = &mapping->dev;
736         device_initialize(dev);
737         dev->parent = &dev_dax->dev;
738         dev->type = &dax_mapping_type;
739         dev_set_name(dev, "mapping%d", mapping->id);
740         rc = device_add(dev);
741         if (rc) {
742                 put_device(dev);
743                 return rc;
744         }
745
746         rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_mapping,
747                         dev);
748         if (rc)
749                 return rc;
750         return 0;
751 }
752
753 static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start,
754                 resource_size_t size)
755 {
756         struct dax_region *dax_region = dev_dax->region;
757         struct resource *res = &dax_region->res;
758         struct device *dev = &dev_dax->dev;
759         struct dev_dax_range *ranges;
760         unsigned long pgoff = 0;
761         struct resource *alloc;
762         int i, rc;
763
764         device_lock_assert(dax_region->dev);
765
766         /* handle the seed alloc special case */
767         if (!size) {
768                 if (dev_WARN_ONCE(dev, dev_dax->nr_range,
769                                         "0-size allocation must be first\n"))
770                         return -EBUSY;
771                 /* nr_range == 0 is elsewhere special cased as 0-size device */
772                 return 0;
773         }
774
775         alloc = __request_region(res, start, size, dev_name(dev), 0);
776         if (!alloc)
777                 return -ENOMEM;
778
779         ranges = krealloc(dev_dax->ranges, sizeof(*ranges)
780                         * (dev_dax->nr_range + 1), GFP_KERNEL);
781         if (!ranges) {
782                 __release_region(res, alloc->start, resource_size(alloc));
783                 return -ENOMEM;
784         }
785
786         for (i = 0; i < dev_dax->nr_range; i++)
787                 pgoff += PHYS_PFN(range_len(&ranges[i].range));
788         dev_dax->ranges = ranges;
789         ranges[dev_dax->nr_range++] = (struct dev_dax_range) {
790                 .pgoff = pgoff,
791                 .range = {
792                         .start = alloc->start,
793                         .end = alloc->end,
794                 },
795         };
796
797         dev_dbg(dev, "alloc range[%d]: %pa:%pa\n", dev_dax->nr_range - 1,
798                         &alloc->start, &alloc->end);
799         /*
800          * A dev_dax instance must be registered before mapping device
801          * children can be added. Defer to devm_create_dev_dax() to add
802          * the initial mapping device.
803          */
804         if (!device_is_registered(&dev_dax->dev))
805                 return 0;
806
807         rc = devm_register_dax_mapping(dev_dax, dev_dax->nr_range - 1);
808         if (rc)
809                 trim_dev_dax_range(dev_dax);
810
811         return rc;
812 }
813
814 static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size)
815 {
816         int last_range = dev_dax->nr_range - 1;
817         struct dev_dax_range *dax_range = &dev_dax->ranges[last_range];
818         struct dax_region *dax_region = dev_dax->region;
819         bool is_shrink = resource_size(res) > size;
820         struct range *range = &dax_range->range;
821         struct device *dev = &dev_dax->dev;
822         int rc;
823
824         device_lock_assert(dax_region->dev);
825
826         if (dev_WARN_ONCE(dev, !size, "deletion is handled by dev_dax_shrink\n"))
827                 return -EINVAL;
828
829         rc = adjust_resource(res, range->start, size);
830         if (rc)
831                 return rc;
832
833         *range = (struct range) {
834                 .start = range->start,
835                 .end = range->start + size - 1,
836         };
837
838         dev_dbg(dev, "%s range[%d]: %#llx:%#llx\n", is_shrink ? "shrink" : "extend",
839                         last_range, (unsigned long long) range->start,
840                         (unsigned long long) range->end);
841
842         return 0;
843 }
844
845 static ssize_t size_show(struct device *dev,
846                 struct device_attribute *attr, char *buf)
847 {
848         struct dev_dax *dev_dax = to_dev_dax(dev);
849         unsigned long long size;
850
851         device_lock(dev);
852         size = dev_dax_size(dev_dax);
853         device_unlock(dev);
854
855         return sprintf(buf, "%llu\n", size);
856 }
857
858 static bool alloc_is_aligned(struct dev_dax *dev_dax, resource_size_t size)
859 {
860         /*
861          * The minimum mapping granularity for a device instance is a
862          * single subsection, unless the arch says otherwise.
863          */
864         return IS_ALIGNED(size, max_t(unsigned long, dev_dax->align, memremap_compat_align()));
865 }
866
867 static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size)
868 {
869         resource_size_t to_shrink = dev_dax_size(dev_dax) - size;
870         struct dax_region *dax_region = dev_dax->region;
871         struct device *dev = &dev_dax->dev;
872         int i;
873
874         for (i = dev_dax->nr_range - 1; i >= 0; i--) {
875                 struct range *range = &dev_dax->ranges[i].range;
876                 struct dax_mapping *mapping = dev_dax->ranges[i].mapping;
877                 struct resource *adjust = NULL, *res;
878                 resource_size_t shrink;
879
880                 shrink = min_t(u64, to_shrink, range_len(range));
881                 if (shrink >= range_len(range)) {
882                         devm_release_action(dax_region->dev,
883                                         unregister_dax_mapping, &mapping->dev);
884                         trim_dev_dax_range(dev_dax);
885                         to_shrink -= shrink;
886                         if (!to_shrink)
887                                 break;
888                         continue;
889                 }
890
891                 for_each_dax_region_resource(dax_region, res)
892                         if (strcmp(res->name, dev_name(dev)) == 0
893                                         && res->start == range->start) {
894                                 adjust = res;
895                                 break;
896                         }
897
898                 if (dev_WARN_ONCE(dev, !adjust || i != dev_dax->nr_range - 1,
899                                         "failed to find matching resource\n"))
900                         return -ENXIO;
901                 return adjust_dev_dax_range(dev_dax, adjust, range_len(range)
902                                 - shrink);
903         }
904         return 0;
905 }
906
907 /*
908  * Only allow adjustments that preserve the relative pgoff of existing
909  * allocations. I.e. the dev_dax->ranges array is ordered by increasing pgoff.
910  */
911 static bool adjust_ok(struct dev_dax *dev_dax, struct resource *res)
912 {
913         struct dev_dax_range *last;
914         int i;
915
916         if (dev_dax->nr_range == 0)
917                 return false;
918         if (strcmp(res->name, dev_name(&dev_dax->dev)) != 0)
919                 return false;
920         last = &dev_dax->ranges[dev_dax->nr_range - 1];
921         if (last->range.start != res->start || last->range.end != res->end)
922                 return false;
923         for (i = 0; i < dev_dax->nr_range - 1; i++) {
924                 struct dev_dax_range *dax_range = &dev_dax->ranges[i];
925
926                 if (dax_range->pgoff > last->pgoff)
927                         return false;
928         }
929
930         return true;
931 }
932
933 static ssize_t dev_dax_resize(struct dax_region *dax_region,
934                 struct dev_dax *dev_dax, resource_size_t size)
935 {
936         resource_size_t avail = dax_region_avail_size(dax_region), to_alloc;
937         resource_size_t dev_size = dev_dax_size(dev_dax);
938         struct resource *region_res = &dax_region->res;
939         struct device *dev = &dev_dax->dev;
940         struct resource *res, *first;
941         resource_size_t alloc = 0;
942         int rc;
943
944         if (dev->driver)
945                 return -EBUSY;
946         if (size == dev_size)
947                 return 0;
948         if (size > dev_size && size - dev_size > avail)
949                 return -ENOSPC;
950         if (size < dev_size)
951                 return dev_dax_shrink(dev_dax, size);
952
953         to_alloc = size - dev_size;
954         if (dev_WARN_ONCE(dev, !alloc_is_aligned(dev_dax, to_alloc),
955                         "resize of %pa misaligned\n", &to_alloc))
956                 return -ENXIO;
957
958         /*
959          * Expand the device into the unused portion of the region. This
960          * may involve adjusting the end of an existing resource, or
961          * allocating a new resource.
962          */
963 retry:
964         first = region_res->child;
965         if (!first)
966                 return alloc_dev_dax_range(dev_dax, dax_region->res.start, to_alloc);
967
968         rc = -ENOSPC;
969         for (res = first; res; res = res->sibling) {
970                 struct resource *next = res->sibling;
971
972                 /* space at the beginning of the region */
973                 if (res == first && res->start > dax_region->res.start) {
974                         alloc = min(res->start - dax_region->res.start, to_alloc);
975                         rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, alloc);
976                         break;
977                 }
978
979                 alloc = 0;
980                 /* space between allocations */
981                 if (next && next->start > res->end + 1)
982                         alloc = min(next->start - (res->end + 1), to_alloc);
983
984                 /* space at the end of the region */
985                 if (!alloc && !next && res->end < region_res->end)
986                         alloc = min(region_res->end - res->end, to_alloc);
987
988                 if (!alloc)
989                         continue;
990
991                 if (adjust_ok(dev_dax, res)) {
992                         rc = adjust_dev_dax_range(dev_dax, res, resource_size(res) + alloc);
993                         break;
994                 }
995                 rc = alloc_dev_dax_range(dev_dax, res->end + 1, alloc);
996                 break;
997         }
998         if (rc)
999                 return rc;
1000         to_alloc -= alloc;
1001         if (to_alloc)
1002                 goto retry;
1003         return 0;
1004 }
1005
1006 static ssize_t size_store(struct device *dev, struct device_attribute *attr,
1007                 const char *buf, size_t len)
1008 {
1009         ssize_t rc;
1010         unsigned long long val;
1011         struct dev_dax *dev_dax = to_dev_dax(dev);
1012         struct dax_region *dax_region = dev_dax->region;
1013
1014         rc = kstrtoull(buf, 0, &val);
1015         if (rc)
1016                 return rc;
1017
1018         if (!alloc_is_aligned(dev_dax, val)) {
1019                 dev_dbg(dev, "%s: size: %lld misaligned\n", __func__, val);
1020                 return -EINVAL;
1021         }
1022
1023         device_lock(dax_region->dev);
1024         if (!dax_region->dev->driver) {
1025                 device_unlock(dax_region->dev);
1026                 return -ENXIO;
1027         }
1028         device_lock(dev);
1029         rc = dev_dax_resize(dax_region, dev_dax, val);
1030         device_unlock(dev);
1031         device_unlock(dax_region->dev);
1032
1033         return rc == 0 ? len : rc;
1034 }
1035 static DEVICE_ATTR_RW(size);
1036
1037 static ssize_t range_parse(const char *opt, size_t len, struct range *range)
1038 {
1039         unsigned long long addr = 0;
1040         char *start, *end, *str;
1041         ssize_t rc = EINVAL;
1042
1043         str = kstrdup(opt, GFP_KERNEL);
1044         if (!str)
1045                 return rc;
1046
1047         end = str;
1048         start = strsep(&end, "-");
1049         if (!start || !end)
1050                 goto err;
1051
1052         rc = kstrtoull(start, 16, &addr);
1053         if (rc)
1054                 goto err;
1055         range->start = addr;
1056
1057         rc = kstrtoull(end, 16, &addr);
1058         if (rc)
1059                 goto err;
1060         range->end = addr;
1061
1062 err:
1063         kfree(str);
1064         return rc;
1065 }
1066
1067 static ssize_t mapping_store(struct device *dev, struct device_attribute *attr,
1068                 const char *buf, size_t len)
1069 {
1070         struct dev_dax *dev_dax = to_dev_dax(dev);
1071         struct dax_region *dax_region = dev_dax->region;
1072         size_t to_alloc;
1073         struct range r;
1074         ssize_t rc;
1075
1076         rc = range_parse(buf, len, &r);
1077         if (rc)
1078                 return rc;
1079
1080         rc = -ENXIO;
1081         device_lock(dax_region->dev);
1082         if (!dax_region->dev->driver) {
1083                 device_unlock(dax_region->dev);
1084                 return rc;
1085         }
1086         device_lock(dev);
1087
1088         to_alloc = range_len(&r);
1089         if (alloc_is_aligned(dev_dax, to_alloc))
1090                 rc = alloc_dev_dax_range(dev_dax, r.start, to_alloc);
1091         device_unlock(dev);
1092         device_unlock(dax_region->dev);
1093
1094         return rc == 0 ? len : rc;
1095 }
1096 static DEVICE_ATTR_WO(mapping);
1097
1098 static ssize_t align_show(struct device *dev,
1099                 struct device_attribute *attr, char *buf)
1100 {
1101         struct dev_dax *dev_dax = to_dev_dax(dev);
1102
1103         return sprintf(buf, "%d\n", dev_dax->align);
1104 }
1105
1106 static ssize_t dev_dax_validate_align(struct dev_dax *dev_dax)
1107 {
1108         struct device *dev = &dev_dax->dev;
1109         int i;
1110
1111         for (i = 0; i < dev_dax->nr_range; i++) {
1112                 size_t len = range_len(&dev_dax->ranges[i].range);
1113
1114                 if (!alloc_is_aligned(dev_dax, len)) {
1115                         dev_dbg(dev, "%s: align %u invalid for range %d\n",
1116                                 __func__, dev_dax->align, i);
1117                         return -EINVAL;
1118                 }
1119         }
1120
1121         return 0;
1122 }
1123
1124 static ssize_t align_store(struct device *dev, struct device_attribute *attr,
1125                 const char *buf, size_t len)
1126 {
1127         struct dev_dax *dev_dax = to_dev_dax(dev);
1128         struct dax_region *dax_region = dev_dax->region;
1129         unsigned long val, align_save;
1130         ssize_t rc;
1131
1132         rc = kstrtoul(buf, 0, &val);
1133         if (rc)
1134                 return -ENXIO;
1135
1136         if (!dax_align_valid(val))
1137                 return -EINVAL;
1138
1139         device_lock(dax_region->dev);
1140         if (!dax_region->dev->driver) {
1141                 device_unlock(dax_region->dev);
1142                 return -ENXIO;
1143         }
1144
1145         device_lock(dev);
1146         if (dev->driver) {
1147                 rc = -EBUSY;
1148                 goto out_unlock;
1149         }
1150
1151         align_save = dev_dax->align;
1152         dev_dax->align = val;
1153         rc = dev_dax_validate_align(dev_dax);
1154         if (rc)
1155                 dev_dax->align = align_save;
1156 out_unlock:
1157         device_unlock(dev);
1158         device_unlock(dax_region->dev);
1159         return rc == 0 ? len : rc;
1160 }
1161 static DEVICE_ATTR_RW(align);
1162
1163 static int dev_dax_target_node(struct dev_dax *dev_dax)
1164 {
1165         struct dax_region *dax_region = dev_dax->region;
1166
1167         return dax_region->target_node;
1168 }
1169
1170 static ssize_t target_node_show(struct device *dev,
1171                 struct device_attribute *attr, char *buf)
1172 {
1173         struct dev_dax *dev_dax = to_dev_dax(dev);
1174
1175         return sprintf(buf, "%d\n", dev_dax_target_node(dev_dax));
1176 }
1177 static DEVICE_ATTR_RO(target_node);
1178
1179 static ssize_t resource_show(struct device *dev,
1180                 struct device_attribute *attr, char *buf)
1181 {
1182         struct dev_dax *dev_dax = to_dev_dax(dev);
1183         struct dax_region *dax_region = dev_dax->region;
1184         unsigned long long start;
1185
1186         if (dev_dax->nr_range < 1)
1187                 start = dax_region->res.start;
1188         else
1189                 start = dev_dax->ranges[0].range.start;
1190
1191         return sprintf(buf, "%#llx\n", start);
1192 }
1193 static DEVICE_ATTR(resource, 0400, resource_show, NULL);
1194
1195 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
1196                 char *buf)
1197 {
1198         /*
1199          * We only ever expect to handle device-dax instances, i.e. the
1200          * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
1201          */
1202         return sprintf(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0);
1203 }
1204 static DEVICE_ATTR_RO(modalias);
1205
1206 static ssize_t numa_node_show(struct device *dev,
1207                 struct device_attribute *attr, char *buf)
1208 {
1209         return sprintf(buf, "%d\n", dev_to_node(dev));
1210 }
1211 static DEVICE_ATTR_RO(numa_node);
1212
1213 static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
1214 {
1215         struct device *dev = container_of(kobj, struct device, kobj);
1216         struct dev_dax *dev_dax = to_dev_dax(dev);
1217         struct dax_region *dax_region = dev_dax->region;
1218
1219         if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0)
1220                 return 0;
1221         if (a == &dev_attr_numa_node.attr && !IS_ENABLED(CONFIG_NUMA))
1222                 return 0;
1223         if (a == &dev_attr_mapping.attr && is_static(dax_region))
1224                 return 0;
1225         if ((a == &dev_attr_align.attr ||
1226              a == &dev_attr_size.attr) && is_static(dax_region))
1227                 return 0444;
1228         return a->mode;
1229 }
1230
1231 static struct attribute *dev_dax_attributes[] = {
1232         &dev_attr_modalias.attr,
1233         &dev_attr_size.attr,
1234         &dev_attr_mapping.attr,
1235         &dev_attr_target_node.attr,
1236         &dev_attr_align.attr,
1237         &dev_attr_resource.attr,
1238         &dev_attr_numa_node.attr,
1239         NULL,
1240 };
1241
1242 static const struct attribute_group dev_dax_attribute_group = {
1243         .attrs = dev_dax_attributes,
1244         .is_visible = dev_dax_visible,
1245 };
1246
1247 static const struct attribute_group *dax_attribute_groups[] = {
1248         &dev_dax_attribute_group,
1249         NULL,
1250 };
1251
1252 static void dev_dax_release(struct device *dev)
1253 {
1254         struct dev_dax *dev_dax = to_dev_dax(dev);
1255         struct dax_region *dax_region = dev_dax->region;
1256         struct dax_device *dax_dev = dev_dax->dax_dev;
1257
1258         put_dax(dax_dev);
1259         free_dev_dax_id(dev_dax);
1260         dax_region_put(dax_region);
1261         kfree(dev_dax->pgmap);
1262         kfree(dev_dax);
1263 }
1264
1265 static const struct device_type dev_dax_type = {
1266         .release = dev_dax_release,
1267         .groups = dax_attribute_groups,
1268 };
1269
1270 struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
1271 {
1272         struct dax_region *dax_region = data->dax_region;
1273         struct device *parent = dax_region->dev;
1274         struct dax_device *dax_dev;
1275         struct dev_dax *dev_dax;
1276         struct inode *inode;
1277         struct device *dev;
1278         int rc;
1279
1280         dev_dax = kzalloc(sizeof(*dev_dax), GFP_KERNEL);
1281         if (!dev_dax)
1282                 return ERR_PTR(-ENOMEM);
1283
1284         if (is_static(dax_region)) {
1285                 if (dev_WARN_ONCE(parent, data->id < 0,
1286                                 "dynamic id specified to static region\n")) {
1287                         rc = -EINVAL;
1288                         goto err_id;
1289                 }
1290
1291                 dev_dax->id = data->id;
1292         } else {
1293                 if (dev_WARN_ONCE(parent, data->id >= 0,
1294                                 "static id specified to dynamic region\n")) {
1295                         rc = -EINVAL;
1296                         goto err_id;
1297                 }
1298
1299                 rc = ida_alloc(&dax_region->ida, GFP_KERNEL);
1300                 if (rc < 0)
1301                         goto err_id;
1302                 dev_dax->id = rc;
1303         }
1304
1305         dev_dax->region = dax_region;
1306         dev = &dev_dax->dev;
1307         device_initialize(dev);
1308         dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id);
1309
1310         rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, data->size);
1311         if (rc)
1312                 goto err_range;
1313
1314         if (data->pgmap) {
1315                 dev_WARN_ONCE(parent, !is_static(dax_region),
1316                         "custom dev_pagemap requires a static dax_region\n");
1317
1318                 dev_dax->pgmap = kmemdup(data->pgmap,
1319                                 sizeof(struct dev_pagemap), GFP_KERNEL);
1320                 if (!dev_dax->pgmap) {
1321                         rc = -ENOMEM;
1322                         goto err_pgmap;
1323                 }
1324         }
1325
1326         /*
1327          * No 'host' or dax_operations since there is no access to this
1328          * device outside of mmap of the resulting character device.
1329          */
1330         dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC);
1331         if (IS_ERR(dax_dev)) {
1332                 rc = PTR_ERR(dax_dev);
1333                 goto err_alloc_dax;
1334         }
1335
1336         /* a device_dax instance is dead while the driver is not attached */
1337         kill_dax(dax_dev);
1338
1339         dev_dax->dax_dev = dax_dev;
1340         dev_dax->target_node = dax_region->target_node;
1341         dev_dax->align = dax_region->align;
1342         ida_init(&dev_dax->ida);
1343         kref_get(&dax_region->kref);
1344
1345         inode = dax_inode(dax_dev);
1346         dev->devt = inode->i_rdev;
1347         if (data->subsys == DEV_DAX_BUS)
1348                 dev->bus = &dax_bus_type;
1349         else
1350                 dev->class = dax_class;
1351         dev->parent = parent;
1352         dev->type = &dev_dax_type;
1353
1354         rc = device_add(dev);
1355         if (rc) {
1356                 kill_dev_dax(dev_dax);
1357                 put_device(dev);
1358                 return ERR_PTR(rc);
1359         }
1360
1361         rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev);
1362         if (rc)
1363                 return ERR_PTR(rc);
1364
1365         /* register mapping device for the initial allocation range */
1366         if (dev_dax->nr_range && range_len(&dev_dax->ranges[0].range)) {
1367                 rc = devm_register_dax_mapping(dev_dax, 0);
1368                 if (rc)
1369                         return ERR_PTR(rc);
1370         }
1371
1372         return dev_dax;
1373
1374 err_alloc_dax:
1375         kfree(dev_dax->pgmap);
1376 err_pgmap:
1377         free_dev_dax_ranges(dev_dax);
1378 err_range:
1379         free_dev_dax_id(dev_dax);
1380 err_id:
1381         kfree(dev_dax);
1382
1383         return ERR_PTR(rc);
1384 }
1385 EXPORT_SYMBOL_GPL(devm_create_dev_dax);
1386
1387 static int match_always_count;
1388
1389 int __dax_driver_register(struct dax_device_driver *dax_drv,
1390                 struct module *module, const char *mod_name)
1391 {
1392         struct device_driver *drv = &dax_drv->drv;
1393         int rc = 0;
1394
1395         INIT_LIST_HEAD(&dax_drv->ids);
1396         drv->owner = module;
1397         drv->name = mod_name;
1398         drv->mod_name = mod_name;
1399         drv->bus = &dax_bus_type;
1400
1401         /* there can only be one default driver */
1402         mutex_lock(&dax_bus_lock);
1403         match_always_count += dax_drv->match_always;
1404         if (match_always_count > 1) {
1405                 match_always_count--;
1406                 WARN_ON(1);
1407                 rc = -EINVAL;
1408         }
1409         mutex_unlock(&dax_bus_lock);
1410         if (rc)
1411                 return rc;
1412         return driver_register(drv);
1413 }
1414 EXPORT_SYMBOL_GPL(__dax_driver_register);
1415
1416 void dax_driver_unregister(struct dax_device_driver *dax_drv)
1417 {
1418         struct device_driver *drv = &dax_drv->drv;
1419         struct dax_id *dax_id, *_id;
1420
1421         mutex_lock(&dax_bus_lock);
1422         match_always_count -= dax_drv->match_always;
1423         list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) {
1424                 list_del(&dax_id->list);
1425                 kfree(dax_id);
1426         }
1427         mutex_unlock(&dax_bus_lock);
1428         driver_unregister(drv);
1429 }
1430 EXPORT_SYMBOL_GPL(dax_driver_unregister);
1431
1432 int __init dax_bus_init(void)
1433 {
1434         int rc;
1435
1436         if (IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)) {
1437                 dax_class = class_create(THIS_MODULE, "dax");
1438                 if (IS_ERR(dax_class))
1439                         return PTR_ERR(dax_class);
1440         }
1441
1442         rc = bus_register(&dax_bus_type);
1443         if (rc)
1444                 class_destroy(dax_class);
1445         return rc;
1446 }
1447
1448 void __exit dax_bus_exit(void)
1449 {
1450         bus_unregister(&dax_bus_type);
1451         class_destroy(dax_class);
1452 }