0f501f938d1cf526364663e9d89ccd980208d3fc
[linux-2.6-microblaze.git] / fs / gfs2 / recovery.c
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License version 2.
8  */
9
10 #include <linux/module.h>
11 #include <linux/slab.h>
12 #include <linux/spinlock.h>
13 #include <linux/completion.h>
14 #include <linux/buffer_head.h>
15 #include <linux/gfs2_ondisk.h>
16 #include <linux/crc32.h>
17 #include <linux/crc32c.h>
18 #include <linux/ktime.h>
19
20 #include "gfs2.h"
21 #include "incore.h"
22 #include "bmap.h"
23 #include "glock.h"
24 #include "glops.h"
25 #include "log.h"
26 #include "lops.h"
27 #include "meta_io.h"
28 #include "recovery.h"
29 #include "super.h"
30 #include "util.h"
31 #include "dir.h"
32
33 struct workqueue_struct *gfs_recovery_wq;
34
35 int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
36                            struct buffer_head **bh)
37 {
38         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
39         struct gfs2_glock *gl = ip->i_gl;
40         int new = 0;
41         u64 dblock;
42         u32 extlen;
43         int error;
44
45         error = gfs2_extent_map(&ip->i_inode, blk, &new, &dblock, &extlen);
46         if (error)
47                 return error;
48         if (!dblock) {
49                 gfs2_consist_inode(ip);
50                 return -EIO;
51         }
52
53         *bh = gfs2_meta_ra(gl, dblock, extlen);
54
55         return error;
56 }
57
58 int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
59 {
60         struct list_head *head = &jd->jd_revoke_list;
61         struct gfs2_revoke_replay *rr;
62         int found = 0;
63
64         list_for_each_entry(rr, head, rr_list) {
65                 if (rr->rr_blkno == blkno) {
66                         found = 1;
67                         break;
68                 }
69         }
70
71         if (found) {
72                 rr->rr_where = where;
73                 return 0;
74         }
75
76         rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_NOFS);
77         if (!rr)
78                 return -ENOMEM;
79
80         rr->rr_blkno = blkno;
81         rr->rr_where = where;
82         list_add(&rr->rr_list, head);
83
84         return 1;
85 }
86
87 int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
88 {
89         struct gfs2_revoke_replay *rr;
90         int wrap, a, b, revoke;
91         int found = 0;
92
93         list_for_each_entry(rr, &jd->jd_revoke_list, rr_list) {
94                 if (rr->rr_blkno == blkno) {
95                         found = 1;
96                         break;
97                 }
98         }
99
100         if (!found)
101                 return 0;
102
103         wrap = (rr->rr_where < jd->jd_replay_tail);
104         a = (jd->jd_replay_tail < where);
105         b = (where < rr->rr_where);
106         revoke = (wrap) ? (a || b) : (a && b);
107
108         return revoke;
109 }
110
111 void gfs2_revoke_clean(struct gfs2_jdesc *jd)
112 {
113         struct list_head *head = &jd->jd_revoke_list;
114         struct gfs2_revoke_replay *rr;
115
116         while (!list_empty(head)) {
117                 rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list);
118                 list_del(&rr->rr_list);
119                 kfree(rr);
120         }
121 }
122
123 /**
124  * get_log_header - read the log header for a given segment
125  * @jd: the journal
126  * @blk: the block to look at
127  * @lh: the log header to return
128  *
129  * Read the log header for a given segement in a given journal.  Do a few
130  * sanity checks on it.
131  *
132  * Returns: 0 on success,
133  *          1 if the header was invalid or incomplete,
134  *          errno on error
135  */
136
137 static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
138                           struct gfs2_log_header_host *head)
139 {
140         struct gfs2_log_header *lh;
141         struct buffer_head *bh;
142         u32 hash, crc;
143         int error;
144
145         error = gfs2_replay_read_block(jd, blk, &bh);
146         if (error)
147                 return error;
148         lh = (void *)bh->b_data;
149
150         hash = crc32(~0, lh, LH_V1_SIZE - 4);
151         hash = ~crc32_le_shift(hash, 4);  /* assume lh_hash is zero */
152
153         crc = crc32c(~0, (void *)lh + LH_V1_SIZE + 4,
154                      bh->b_size - LH_V1_SIZE - 4);
155
156         error = lh->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
157                 lh->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH) ||
158                 be32_to_cpu(lh->lh_blkno) != blk ||
159                 be32_to_cpu(lh->lh_hash) != hash ||
160                 (lh->lh_crc != 0 && be32_to_cpu(lh->lh_crc) != crc);
161
162         brelse(bh);
163
164         if (!error) {
165                 head->lh_sequence = be64_to_cpu(lh->lh_sequence);
166                 head->lh_flags = be32_to_cpu(lh->lh_flags);
167                 head->lh_tail = be32_to_cpu(lh->lh_tail);
168                 head->lh_blkno = be32_to_cpu(lh->lh_blkno);
169         }
170         return error;
171 }
172
173 /**
174  * find_good_lh - find a good log header
175  * @jd: the journal
176  * @blk: the segment to start searching from
177  * @lh: the log header to fill in
178  * @forward: if true search forward in the log, else search backward
179  *
180  * Call get_log_header() to get a log header for a segment, but if the
181  * segment is bad, either scan forward or backward until we find a good one.
182  *
183  * Returns: errno
184  */
185
186 static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
187                         struct gfs2_log_header_host *head)
188 {
189         unsigned int orig_blk = *blk;
190         int error;
191
192         for (;;) {
193                 error = get_log_header(jd, *blk, head);
194                 if (error <= 0)
195                         return error;
196
197                 if (++*blk == jd->jd_blocks)
198                         *blk = 0;
199
200                 if (*blk == orig_blk) {
201                         gfs2_consist_inode(GFS2_I(jd->jd_inode));
202                         return -EIO;
203                 }
204         }
205 }
206
207 /**
208  * jhead_scan - make sure we've found the head of the log
209  * @jd: the journal
210  * @head: this is filled in with the log descriptor of the head
211  *
212  * At this point, seg and lh should be either the head of the log or just
213  * before.  Scan forward until we find the head.
214  *
215  * Returns: errno
216  */
217
218 static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
219 {
220         unsigned int blk = head->lh_blkno;
221         struct gfs2_log_header_host lh;
222         int error;
223
224         for (;;) {
225                 if (++blk == jd->jd_blocks)
226                         blk = 0;
227
228                 error = get_log_header(jd, blk, &lh);
229                 if (error < 0)
230                         return error;
231                 if (error == 1)
232                         continue;
233
234                 if (lh.lh_sequence == head->lh_sequence) {
235                         gfs2_consist_inode(GFS2_I(jd->jd_inode));
236                         return -EIO;
237                 }
238                 if (lh.lh_sequence < head->lh_sequence)
239                         break;
240
241                 *head = lh;
242         }
243
244         return 0;
245 }
246
247 /**
248  * gfs2_find_jhead - find the head of a log
249  * @jd: the journal
250  * @head: the log descriptor for the head of the log is returned here
251  *
252  * Do a binary search of a journal and find the valid log entry with the
253  * highest sequence number.  (i.e. the log head)
254  *
255  * Returns: errno
256  */
257
258 int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
259 {
260         struct gfs2_log_header_host lh_1, lh_m;
261         u32 blk_1, blk_2, blk_m;
262         int error;
263
264         blk_1 = 0;
265         blk_2 = jd->jd_blocks - 1;
266
267         for (;;) {
268                 blk_m = (blk_1 + blk_2) / 2;
269
270                 error = find_good_lh(jd, &blk_1, &lh_1);
271                 if (error)
272                         return error;
273
274                 error = find_good_lh(jd, &blk_m, &lh_m);
275                 if (error)
276                         return error;
277
278                 if (blk_1 == blk_m || blk_m == blk_2)
279                         break;
280
281                 if (lh_1.lh_sequence <= lh_m.lh_sequence)
282                         blk_1 = blk_m;
283                 else
284                         blk_2 = blk_m;
285         }
286
287         error = jhead_scan(jd, &lh_1);
288         if (error)
289                 return error;
290
291         *head = lh_1;
292
293         return error;
294 }
295
296 /**
297  * foreach_descriptor - go through the active part of the log
298  * @jd: the journal
299  * @start: the first log header in the active region
300  * @end: the last log header (don't process the contents of this entry))
301  *
302  * Call a given function once for every log descriptor in the active
303  * portion of the log.
304  *
305  * Returns: errno
306  */
307
308 static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
309                               unsigned int end, int pass)
310 {
311         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
312         struct buffer_head *bh;
313         struct gfs2_log_descriptor *ld;
314         int error = 0;
315         u32 length;
316         __be64 *ptr;
317         unsigned int offset = sizeof(struct gfs2_log_descriptor);
318         offset += sizeof(__be64) - 1;
319         offset &= ~(sizeof(__be64) - 1);
320
321         while (start != end) {
322                 error = gfs2_replay_read_block(jd, start, &bh);
323                 if (error)
324                         return error;
325                 if (gfs2_meta_check(sdp, bh)) {
326                         brelse(bh);
327                         return -EIO;
328                 }
329                 ld = (struct gfs2_log_descriptor *)bh->b_data;
330                 length = be32_to_cpu(ld->ld_length);
331
332                 if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
333                         struct gfs2_log_header_host lh;
334                         error = get_log_header(jd, start, &lh);
335                         if (!error) {
336                                 gfs2_replay_incr_blk(jd, &start);
337                                 brelse(bh);
338                                 continue;
339                         }
340                         if (error == 1) {
341                                 gfs2_consist_inode(GFS2_I(jd->jd_inode));
342                                 error = -EIO;
343                         }
344                         brelse(bh);
345                         return error;
346                 } else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) {
347                         brelse(bh);
348                         return -EIO;
349                 }
350                 ptr = (__be64 *)(bh->b_data + offset);
351                 error = lops_scan_elements(jd, start, ld, ptr, pass);
352                 if (error) {
353                         brelse(bh);
354                         return error;
355                 }
356
357                 while (length--)
358                         gfs2_replay_incr_blk(jd, &start);
359
360                 brelse(bh);
361         }
362
363         return 0;
364 }
365
366 /**
367  * clean_journal - mark a dirty journal as being clean
368  * @jd: the journal
369  * @head: the head journal to start from
370  *
371  * Returns: errno
372  */
373
374 static void clean_journal(struct gfs2_jdesc *jd,
375                           struct gfs2_log_header_host *head)
376 {
377         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
378
379         sdp->sd_log_flush_head = head->lh_blkno;
380         gfs2_replay_incr_blk(jd, &sdp->sd_log_flush_head);
381         gfs2_write_log_header(sdp, jd, head->lh_sequence + 1, 0,
382                               GFS2_LOG_HEAD_UNMOUNT | GFS2_LOG_HEAD_RECOVERY,
383                               REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC);
384 }
385
386
387 static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
388                                unsigned int message)
389 {
390         char env_jid[20];
391         char env_status[20];
392         char *envp[] = { env_jid, env_status, NULL };
393         struct lm_lockstruct *ls = &sdp->sd_lockstruct;
394
395         ls->ls_recover_jid_done = jid;
396         ls->ls_recover_jid_status = message;
397         sprintf(env_jid, "JID=%u", jid);
398         sprintf(env_status, "RECOVERY=%s",
399                 message == LM_RD_SUCCESS ? "Done" : "Failed");
400         kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
401
402         if (sdp->sd_lockstruct.ls_ops->lm_recovery_result)
403                 sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message);
404 }
405
406 void gfs2_recover_func(struct work_struct *work)
407 {
408         struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
409         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
410         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
411         struct gfs2_log_header_host head;
412         struct gfs2_holder j_gh, ji_gh, thaw_gh;
413         ktime_t t_start, t_jlck, t_jhd, t_tlck, t_rep;
414         int ro = 0;
415         unsigned int pass;
416         int error = 0;
417         int jlocked = 0;
418
419         t_start = ktime_get();
420         if (sdp->sd_args.ar_spectator)
421                 goto fail;
422         if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
423                 fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
424                         jd->jd_jid);
425                 jlocked = 1;
426                 /* Acquire the journal lock so we can do recovery */
427
428                 error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops,
429                                           LM_ST_EXCLUSIVE,
430                                           LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE,
431                                           &j_gh);
432                 switch (error) {
433                 case 0:
434                         break;
435
436                 case GLR_TRYFAILED:
437                         fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid);
438                         error = 0;
439
440                 default:
441                         goto fail;
442                 };
443
444                 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
445                                            LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh);
446                 if (error)
447                         goto fail_gunlock_j;
448         } else {
449                 fs_info(sdp, "jid=%u, already locked for use\n", jd->jd_jid);
450         }
451
452         t_jlck = ktime_get();
453         fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
454
455         error = gfs2_jdesc_check(jd);
456         if (error)
457                 goto fail_gunlock_ji;
458
459         error = gfs2_find_jhead(jd, &head);
460         if (error)
461                 goto fail_gunlock_ji;
462         t_jhd = ktime_get();
463
464         if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
465                 fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
466                         jd->jd_jid);
467
468                 /* Acquire a shared hold on the freeze lock */
469
470                 error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
471                                            LM_FLAG_NOEXP | LM_FLAG_PRIORITY,
472                                            &thaw_gh);
473                 if (error)
474                         goto fail_gunlock_ji;
475
476                 if (test_bit(SDF_RORECOVERY, &sdp->sd_flags)) {
477                         ro = 1;
478                 } else if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
479                         if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
480                                 ro = 1;
481                 } else {
482                         if (sb_rdonly(sdp->sd_vfs)) {
483                                 /* check if device itself is read-only */
484                                 ro = bdev_read_only(sdp->sd_vfs->s_bdev);
485                                 if (!ro) {
486                                         fs_info(sdp, "recovery required on "
487                                                 "read-only filesystem.\n");
488                                         fs_info(sdp, "write access will be "
489                                                 "enabled during recovery.\n");
490                                 }
491                         }
492                 }
493
494                 if (ro) {
495                         fs_warn(sdp, "jid=%u: Can't replay: read-only block "
496                                 "device\n", jd->jd_jid);
497                         error = -EROFS;
498                         goto fail_gunlock_thaw;
499                 }
500
501                 t_tlck = ktime_get();
502                 fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
503
504                 for (pass = 0; pass < 2; pass++) {
505                         lops_before_scan(jd, &head, pass);
506                         error = foreach_descriptor(jd, head.lh_tail,
507                                                    head.lh_blkno, pass);
508                         lops_after_scan(jd, error, pass);
509                         if (error)
510                                 goto fail_gunlock_thaw;
511                 }
512
513                 clean_journal(jd, &head);
514
515                 gfs2_glock_dq_uninit(&thaw_gh);
516                 t_rep = ktime_get();
517                 fs_info(sdp, "jid=%u: Journal replayed in %lldms [jlck:%lldms, "
518                         "jhead:%lldms, tlck:%lldms, replay:%lldms]\n",
519                         jd->jd_jid, ktime_ms_delta(t_rep, t_start),
520                         ktime_ms_delta(t_jlck, t_start),
521                         ktime_ms_delta(t_jhd, t_jlck),
522                         ktime_ms_delta(t_tlck, t_jhd),
523                         ktime_ms_delta(t_rep, t_tlck));
524         }
525
526         gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
527
528         if (jlocked) {
529                 gfs2_glock_dq_uninit(&ji_gh);
530                 gfs2_glock_dq_uninit(&j_gh);
531         }
532
533         fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
534         goto done;
535
536 fail_gunlock_thaw:
537         gfs2_glock_dq_uninit(&thaw_gh);
538 fail_gunlock_ji:
539         if (jlocked) {
540                 gfs2_glock_dq_uninit(&ji_gh);
541 fail_gunlock_j:
542                 gfs2_glock_dq_uninit(&j_gh);
543         }
544
545         fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
546 fail:
547         jd->jd_recover_error = error;
548         gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
549 done:
550         clear_bit(JDF_RECOVERY, &jd->jd_flags);
551         smp_mb__after_atomic();
552         wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
553 }
554
555 int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
556 {
557         int rv;
558
559         if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags))
560                 return -EBUSY;
561
562         /* we have JDF_RECOVERY, queue should always succeed */
563         rv = queue_work(gfs_recovery_wq, &jd->jd_work);
564         BUG_ON(!rv);
565
566         if (wait)
567                 wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
568                             TASK_UNINTERRUPTIBLE);
569
570         return wait ? jd->jd_recover_error : 0;
571 }
572