Merge branch 'next-general' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris...
[linux-2.6-microblaze.git] / drivers / lightnvm / pblk-gc.c
1 /*
2  * Copyright (C) 2016 CNEX Labs
3  * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4  *                  Matias Bjorling <matias@cnexlabs.com>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License version
8  * 2 as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * pblk-gc.c - pblk's garbage collector
16  */
17
18 #include "pblk.h"
19 #include <linux/delay.h>
20
21 static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
22 {
23         if (gc_rq->data)
24                 vfree(gc_rq->data);
25         kfree(gc_rq);
26 }
27
28 static int pblk_gc_write(struct pblk *pblk)
29 {
30         struct pblk_gc *gc = &pblk->gc;
31         struct pblk_gc_rq *gc_rq, *tgc_rq;
32         LIST_HEAD(w_list);
33
34         spin_lock(&gc->w_lock);
35         if (list_empty(&gc->w_list)) {
36                 spin_unlock(&gc->w_lock);
37                 return 1;
38         }
39
40         list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
41         gc->w_entries = 0;
42         spin_unlock(&gc->w_lock);
43
44         list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
45                 pblk_write_gc_to_cache(pblk, gc_rq);
46                 list_del(&gc_rq->list);
47                 kref_put(&gc_rq->line->ref, pblk_line_put);
48                 pblk_gc_free_gc_rq(gc_rq);
49         }
50
51         return 0;
52 }
53
54 static void pblk_gc_writer_kick(struct pblk_gc *gc)
55 {
56         wake_up_process(gc->gc_writer_ts);
57 }
58
59 static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
60 {
61         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
62         struct list_head *move_list;
63
64         spin_lock(&line->lock);
65         WARN_ON(line->state != PBLK_LINESTATE_GC);
66         line->state = PBLK_LINESTATE_CLOSED;
67         move_list = pblk_line_gc_list(pblk, line);
68         spin_unlock(&line->lock);
69
70         if (move_list) {
71                 spin_lock(&l_mg->gc_lock);
72                 list_add_tail(&line->list, move_list);
73                 spin_unlock(&l_mg->gc_lock);
74         }
75 }
76
77 static void pblk_gc_line_ws(struct work_struct *work)
78 {
79         struct pblk_line_ws *gc_rq_ws = container_of(work,
80                                                 struct pblk_line_ws, ws);
81         struct pblk *pblk = gc_rq_ws->pblk;
82         struct nvm_tgt_dev *dev = pblk->dev;
83         struct nvm_geo *geo = &dev->geo;
84         struct pblk_gc *gc = &pblk->gc;
85         struct pblk_line *line = gc_rq_ws->line;
86         struct pblk_gc_rq *gc_rq = gc_rq_ws->priv;
87         int ret;
88
89         up(&gc->gc_sem);
90
91         gc_rq->data = vmalloc(gc_rq->nr_secs * geo->csecs);
92         if (!gc_rq->data) {
93                 pr_err("pblk: could not GC line:%d (%d/%d)\n",
94                                         line->id, *line->vsc, gc_rq->nr_secs);
95                 goto out;
96         }
97
98         /* Read from GC victim block */
99         ret = pblk_submit_read_gc(pblk, gc_rq);
100         if (ret) {
101                 pr_err("pblk: failed GC read in line:%d (err:%d)\n",
102                                                                 line->id, ret);
103                 goto out;
104         }
105
106         if (!gc_rq->secs_to_gc)
107                 goto out;
108
109 retry:
110         spin_lock(&gc->w_lock);
111         if (gc->w_entries >= PBLK_GC_RQ_QD) {
112                 spin_unlock(&gc->w_lock);
113                 pblk_gc_writer_kick(&pblk->gc);
114                 usleep_range(128, 256);
115                 goto retry;
116         }
117         gc->w_entries++;
118         list_add_tail(&gc_rq->list, &gc->w_list);
119         spin_unlock(&gc->w_lock);
120
121         pblk_gc_writer_kick(&pblk->gc);
122
123         kfree(gc_rq_ws);
124         return;
125
126 out:
127         pblk_gc_free_gc_rq(gc_rq);
128         kref_put(&line->ref, pblk_line_put);
129         kfree(gc_rq_ws);
130 }
131
132 static void pblk_gc_line_prepare_ws(struct work_struct *work)
133 {
134         struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
135                                                                         ws);
136         struct pblk *pblk = line_ws->pblk;
137         struct pblk_line *line = line_ws->line;
138         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
139         struct pblk_line_meta *lm = &pblk->lm;
140         struct pblk_gc *gc = &pblk->gc;
141         struct line_emeta *emeta_buf;
142         struct pblk_line_ws *gc_rq_ws;
143         struct pblk_gc_rq *gc_rq;
144         __le64 *lba_list;
145         unsigned long *invalid_bitmap;
146         int sec_left, nr_secs, bit;
147         int ret;
148
149         invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL);
150         if (!invalid_bitmap)
151                 goto fail_free_ws;
152
153         emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
154                                                                 GFP_KERNEL);
155         if (!emeta_buf) {
156                 pr_err("pblk: cannot use GC emeta\n");
157                 goto fail_free_bitmap;
158         }
159
160         ret = pblk_line_read_emeta(pblk, line, emeta_buf);
161         if (ret) {
162                 pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
163                 goto fail_free_emeta;
164         }
165
166         /* If this read fails, it means that emeta is corrupted. For now, leave
167          * the line untouched. TODO: Implement a recovery routine that scans and
168          * moves all sectors on the line.
169          */
170
171         ret = pblk_recov_check_emeta(pblk, emeta_buf);
172         if (ret) {
173                 pr_err("pblk: inconsistent emeta (line %d)\n", line->id);
174                 goto fail_free_emeta;
175         }
176
177         lba_list = emeta_to_lbas(pblk, emeta_buf);
178         if (!lba_list) {
179                 pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
180                 goto fail_free_emeta;
181         }
182
183         spin_lock(&line->lock);
184         bitmap_copy(invalid_bitmap, line->invalid_bitmap, lm->sec_per_line);
185         sec_left = pblk_line_vsc(line);
186         spin_unlock(&line->lock);
187
188         if (sec_left < 0) {
189                 pr_err("pblk: corrupted GC line (%d)\n", line->id);
190                 goto fail_free_emeta;
191         }
192
193         bit = -1;
194 next_rq:
195         gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
196         if (!gc_rq)
197                 goto fail_free_emeta;
198
199         nr_secs = 0;
200         do {
201                 bit = find_next_zero_bit(invalid_bitmap, lm->sec_per_line,
202                                                                 bit + 1);
203                 if (bit > line->emeta_ssec)
204                         break;
205
206                 gc_rq->paddr_list[nr_secs] = bit;
207                 gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
208         } while (nr_secs < pblk->max_write_pgs);
209
210         if (unlikely(!nr_secs)) {
211                 kfree(gc_rq);
212                 goto out;
213         }
214
215         gc_rq->nr_secs = nr_secs;
216         gc_rq->line = line;
217
218         gc_rq_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL);
219         if (!gc_rq_ws)
220                 goto fail_free_gc_rq;
221
222         gc_rq_ws->pblk = pblk;
223         gc_rq_ws->line = line;
224         gc_rq_ws->priv = gc_rq;
225
226         /* The write GC path can be much slower than the read GC one due to
227          * the budget imposed by the rate-limiter. Balance in case that we get
228          * back pressure from the write GC path.
229          */
230         while (down_timeout(&gc->gc_sem, msecs_to_jiffies(30000)))
231                 io_schedule();
232
233         kref_get(&line->ref);
234
235         INIT_WORK(&gc_rq_ws->ws, pblk_gc_line_ws);
236         queue_work(gc->gc_line_reader_wq, &gc_rq_ws->ws);
237
238         sec_left -= nr_secs;
239         if (sec_left > 0)
240                 goto next_rq;
241
242 out:
243         pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
244         kfree(line_ws);
245         kfree(invalid_bitmap);
246
247         kref_put(&line->ref, pblk_line_put);
248         atomic_dec(&gc->read_inflight_gc);
249
250         return;
251
252 fail_free_gc_rq:
253         kfree(gc_rq);
254 fail_free_emeta:
255         pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
256 fail_free_bitmap:
257         kfree(invalid_bitmap);
258 fail_free_ws:
259         kfree(line_ws);
260
261         pblk_put_line_back(pblk, line);
262         kref_put(&line->ref, pblk_line_put);
263         atomic_dec(&gc->read_inflight_gc);
264
265         pr_err("pblk: Failed to GC line %d\n", line->id);
266 }
267
268 static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
269 {
270         struct pblk_gc *gc = &pblk->gc;
271         struct pblk_line_ws *line_ws;
272
273         pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
274
275         line_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL);
276         if (!line_ws)
277                 return -ENOMEM;
278
279         line_ws->pblk = pblk;
280         line_ws->line = line;
281
282         atomic_inc(&gc->pipeline_gc);
283         INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
284         queue_work(gc->gc_reader_wq, &line_ws->ws);
285
286         return 0;
287 }
288
289 static void pblk_gc_reader_kick(struct pblk_gc *gc)
290 {
291         wake_up_process(gc->gc_reader_ts);
292 }
293
294 static void pblk_gc_kick(struct pblk *pblk)
295 {
296         struct pblk_gc *gc = &pblk->gc;
297
298         pblk_gc_writer_kick(gc);
299         pblk_gc_reader_kick(gc);
300
301         /* If we're shutting down GC, let's not start it up again */
302         if (gc->gc_enabled) {
303                 wake_up_process(gc->gc_ts);
304                 mod_timer(&gc->gc_timer,
305                           jiffies + msecs_to_jiffies(GC_TIME_MSECS));
306         }
307 }
308
309 static int pblk_gc_read(struct pblk *pblk)
310 {
311         struct pblk_gc *gc = &pblk->gc;
312         struct pblk_line *line;
313
314         spin_lock(&gc->r_lock);
315         if (list_empty(&gc->r_list)) {
316                 spin_unlock(&gc->r_lock);
317                 return 1;
318         }
319
320         line = list_first_entry(&gc->r_list, struct pblk_line, list);
321         list_del(&line->list);
322         spin_unlock(&gc->r_lock);
323
324         pblk_gc_kick(pblk);
325
326         if (pblk_gc_line(pblk, line))
327                 pr_err("pblk: failed to GC line %d\n", line->id);
328
329         return 0;
330 }
331
332 static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
333                                                  struct list_head *group_list)
334 {
335         struct pblk_line *line, *victim;
336         int line_vsc, victim_vsc;
337
338         victim = list_first_entry(group_list, struct pblk_line, list);
339         list_for_each_entry(line, group_list, list) {
340                 line_vsc = le32_to_cpu(*line->vsc);
341                 victim_vsc = le32_to_cpu(*victim->vsc);
342                 if (line_vsc < victim_vsc)
343                         victim = line;
344         }
345
346         return victim;
347 }
348
349 static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
350 {
351         unsigned int nr_blocks_free, nr_blocks_need;
352
353         nr_blocks_need = pblk_rl_high_thrs(rl);
354         nr_blocks_free = pblk_rl_nr_free_blks(rl);
355
356         /* This is not critical, no need to take lock here */
357         return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
358 }
359
360 void pblk_gc_free_full_lines(struct pblk *pblk)
361 {
362         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
363         struct pblk_gc *gc = &pblk->gc;
364         struct pblk_line *line;
365
366         do {
367                 spin_lock(&l_mg->gc_lock);
368                 if (list_empty(&l_mg->gc_full_list)) {
369                         spin_unlock(&l_mg->gc_lock);
370                         return;
371                 }
372
373                 line = list_first_entry(&l_mg->gc_full_list,
374                                                         struct pblk_line, list);
375
376                 spin_lock(&line->lock);
377                 WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
378                 line->state = PBLK_LINESTATE_GC;
379                 spin_unlock(&line->lock);
380
381                 list_del(&line->list);
382                 spin_unlock(&l_mg->gc_lock);
383
384                 atomic_inc(&gc->pipeline_gc);
385                 kref_put(&line->ref, pblk_line_put);
386         } while (1);
387 }
388
389 /*
390  * Lines with no valid sectors will be returned to the free list immediately. If
391  * GC is activated - either because the free block count is under the determined
392  * threshold, or because it is being forced from user space - only lines with a
393  * high count of invalid sectors will be recycled.
394  */
395 static void pblk_gc_run(struct pblk *pblk)
396 {
397         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
398         struct pblk_gc *gc = &pblk->gc;
399         struct pblk_line *line;
400         struct list_head *group_list;
401         bool run_gc;
402         int read_inflight_gc, gc_group = 0, prev_group = 0;
403
404         pblk_gc_free_full_lines(pblk);
405
406         run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
407         if (!run_gc || (atomic_read(&gc->read_inflight_gc) >= PBLK_GC_L_QD))
408                 return;
409
410 next_gc_group:
411         group_list = l_mg->gc_lists[gc_group++];
412
413         do {
414                 spin_lock(&l_mg->gc_lock);
415                 if (list_empty(group_list)) {
416                         spin_unlock(&l_mg->gc_lock);
417                         break;
418                 }
419
420                 line = pblk_gc_get_victim_line(pblk, group_list);
421
422                 spin_lock(&line->lock);
423                 WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
424                 line->state = PBLK_LINESTATE_GC;
425                 spin_unlock(&line->lock);
426
427                 list_del(&line->list);
428                 spin_unlock(&l_mg->gc_lock);
429
430                 spin_lock(&gc->r_lock);
431                 list_add_tail(&line->list, &gc->r_list);
432                 spin_unlock(&gc->r_lock);
433
434                 read_inflight_gc = atomic_inc_return(&gc->read_inflight_gc);
435                 pblk_gc_reader_kick(gc);
436
437                 prev_group = 1;
438
439                 /* No need to queue up more GC lines than we can handle */
440                 run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
441                 if (!run_gc || read_inflight_gc >= PBLK_GC_L_QD)
442                         break;
443         } while (1);
444
445         if (!prev_group && pblk->rl.rb_state > gc_group &&
446                                                 gc_group < PBLK_GC_NR_LISTS)
447                 goto next_gc_group;
448 }
449
450 static void pblk_gc_timer(struct timer_list *t)
451 {
452         struct pblk *pblk = from_timer(pblk, t, gc.gc_timer);
453
454         pblk_gc_kick(pblk);
455 }
456
457 static int pblk_gc_ts(void *data)
458 {
459         struct pblk *pblk = data;
460
461         while (!kthread_should_stop()) {
462                 pblk_gc_run(pblk);
463                 set_current_state(TASK_INTERRUPTIBLE);
464                 io_schedule();
465         }
466
467         return 0;
468 }
469
470 static int pblk_gc_writer_ts(void *data)
471 {
472         struct pblk *pblk = data;
473
474         while (!kthread_should_stop()) {
475                 if (!pblk_gc_write(pblk))
476                         continue;
477                 set_current_state(TASK_INTERRUPTIBLE);
478                 io_schedule();
479         }
480
481         return 0;
482 }
483
484 static int pblk_gc_reader_ts(void *data)
485 {
486         struct pblk *pblk = data;
487         struct pblk_gc *gc = &pblk->gc;
488
489         while (!kthread_should_stop()) {
490                 if (!pblk_gc_read(pblk))
491                         continue;
492                 set_current_state(TASK_INTERRUPTIBLE);
493                 io_schedule();
494         }
495
496 #ifdef CONFIG_NVM_DEBUG
497         pr_info("pblk: flushing gc pipeline, %d lines left\n",
498                 atomic_read(&gc->pipeline_gc));
499 #endif
500
501         do {
502                 if (!atomic_read(&gc->pipeline_gc))
503                         break;
504
505                 schedule();
506         } while (1);
507
508         return 0;
509 }
510
511 static void pblk_gc_start(struct pblk *pblk)
512 {
513         pblk->gc.gc_active = 1;
514         pr_debug("pblk: gc start\n");
515 }
516
517 void pblk_gc_should_start(struct pblk *pblk)
518 {
519         struct pblk_gc *gc = &pblk->gc;
520
521         if (gc->gc_enabled && !gc->gc_active) {
522                 pblk_gc_start(pblk);
523                 pblk_gc_kick(pblk);
524         }
525 }
526
527 void pblk_gc_should_stop(struct pblk *pblk)
528 {
529         struct pblk_gc *gc = &pblk->gc;
530
531         if (gc->gc_active && !gc->gc_forced)
532                 gc->gc_active = 0;
533 }
534
535 void pblk_gc_should_kick(struct pblk *pblk)
536 {
537         pblk_rl_update_rates(&pblk->rl);
538 }
539
540 void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
541                               int *gc_active)
542 {
543         struct pblk_gc *gc = &pblk->gc;
544
545         spin_lock(&gc->lock);
546         *gc_enabled = gc->gc_enabled;
547         *gc_active = gc->gc_active;
548         spin_unlock(&gc->lock);
549 }
550
551 int pblk_gc_sysfs_force(struct pblk *pblk, int force)
552 {
553         struct pblk_gc *gc = &pblk->gc;
554
555         if (force < 0 || force > 1)
556                 return -EINVAL;
557
558         spin_lock(&gc->lock);
559         gc->gc_forced = force;
560
561         if (force)
562                 gc->gc_enabled = 1;
563         else
564                 gc->gc_enabled = 0;
565         spin_unlock(&gc->lock);
566
567         pblk_gc_should_start(pblk);
568
569         return 0;
570 }
571
572 int pblk_gc_init(struct pblk *pblk)
573 {
574         struct pblk_gc *gc = &pblk->gc;
575         int ret;
576
577         gc->gc_ts = kthread_create(pblk_gc_ts, pblk, "pblk-gc-ts");
578         if (IS_ERR(gc->gc_ts)) {
579                 pr_err("pblk: could not allocate GC main kthread\n");
580                 return PTR_ERR(gc->gc_ts);
581         }
582
583         gc->gc_writer_ts = kthread_create(pblk_gc_writer_ts, pblk,
584                                                         "pblk-gc-writer-ts");
585         if (IS_ERR(gc->gc_writer_ts)) {
586                 pr_err("pblk: could not allocate GC writer kthread\n");
587                 ret = PTR_ERR(gc->gc_writer_ts);
588                 goto fail_free_main_kthread;
589         }
590
591         gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
592                                                         "pblk-gc-reader-ts");
593         if (IS_ERR(gc->gc_reader_ts)) {
594                 pr_err("pblk: could not allocate GC reader kthread\n");
595                 ret = PTR_ERR(gc->gc_reader_ts);
596                 goto fail_free_writer_kthread;
597         }
598
599         timer_setup(&gc->gc_timer, pblk_gc_timer, 0);
600         mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
601
602         gc->gc_active = 0;
603         gc->gc_forced = 0;
604         gc->gc_enabled = 1;
605         gc->w_entries = 0;
606         atomic_set(&gc->read_inflight_gc, 0);
607         atomic_set(&gc->pipeline_gc, 0);
608
609         /* Workqueue that reads valid sectors from a line and submit them to the
610          * GC writer to be recycled.
611          */
612         gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
613                         WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
614         if (!gc->gc_line_reader_wq) {
615                 pr_err("pblk: could not allocate GC line reader workqueue\n");
616                 ret = -ENOMEM;
617                 goto fail_free_reader_kthread;
618         }
619
620         /* Workqueue that prepare lines for GC */
621         gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
622                                         WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
623         if (!gc->gc_reader_wq) {
624                 pr_err("pblk: could not allocate GC reader workqueue\n");
625                 ret = -ENOMEM;
626                 goto fail_free_reader_line_wq;
627         }
628
629         spin_lock_init(&gc->lock);
630         spin_lock_init(&gc->w_lock);
631         spin_lock_init(&gc->r_lock);
632
633         sema_init(&gc->gc_sem, PBLK_GC_RQ_QD);
634
635         INIT_LIST_HEAD(&gc->w_list);
636         INIT_LIST_HEAD(&gc->r_list);
637
638         return 0;
639
640 fail_free_reader_line_wq:
641         destroy_workqueue(gc->gc_line_reader_wq);
642 fail_free_reader_kthread:
643         kthread_stop(gc->gc_reader_ts);
644 fail_free_writer_kthread:
645         kthread_stop(gc->gc_writer_ts);
646 fail_free_main_kthread:
647         kthread_stop(gc->gc_ts);
648
649         return ret;
650 }
651
652 void pblk_gc_exit(struct pblk *pblk)
653 {
654         struct pblk_gc *gc = &pblk->gc;
655
656         gc->gc_enabled = 0;
657         del_timer_sync(&gc->gc_timer);
658         gc->gc_active = 0;
659
660         if (gc->gc_ts)
661                 kthread_stop(gc->gc_ts);
662
663         if (gc->gc_reader_ts)
664                 kthread_stop(gc->gc_reader_ts);
665
666         flush_workqueue(gc->gc_reader_wq);
667         destroy_workqueue(gc->gc_reader_wq);
668
669         flush_workqueue(gc->gc_line_reader_wq);
670         destroy_workqueue(gc->gc_line_reader_wq);
671
672         if (gc->gc_writer_ts)
673                 kthread_stop(gc->gc_writer_ts);
674 }