Merge tag 'tag-chrome-platform-for-v4.21' of git://git.kernel.org/pub/scm/linux/kerne...
[linux-2.6-microblaze.git] / fs / gfs2 / lops.c
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License version 2.
8  */
9
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/spinlock.h>
13 #include <linux/completion.h>
14 #include <linux/buffer_head.h>
15 #include <linux/mempool.h>
16 #include <linux/gfs2_ondisk.h>
17 #include <linux/bio.h>
18 #include <linux/fs.h>
19 #include <linux/list_sort.h>
20 #include <linux/blkdev.h>
21
22 #include "bmap.h"
23 #include "dir.h"
24 #include "gfs2.h"
25 #include "incore.h"
26 #include "inode.h"
27 #include "glock.h"
28 #include "log.h"
29 #include "lops.h"
30 #include "meta_io.h"
31 #include "recovery.h"
32 #include "rgrp.h"
33 #include "trans.h"
34 #include "util.h"
35 #include "trace_gfs2.h"
36
37 /**
38  * gfs2_pin - Pin a buffer in memory
39  * @sdp: The superblock
40  * @bh: The buffer to be pinned
41  *
42  * The log lock must be held when calling this function
43  */
44 void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
45 {
46         struct gfs2_bufdata *bd;
47
48         BUG_ON(!current->journal_info);
49
50         clear_buffer_dirty(bh);
51         if (test_set_buffer_pinned(bh))
52                 gfs2_assert_withdraw(sdp, 0);
53         if (!buffer_uptodate(bh))
54                 gfs2_io_error_bh_wd(sdp, bh);
55         bd = bh->b_private;
56         /* If this buffer is in the AIL and it has already been written
57          * to in-place disk block, remove it from the AIL.
58          */
59         spin_lock(&sdp->sd_ail_lock);
60         if (bd->bd_tr)
61                 list_move(&bd->bd_ail_st_list, &bd->bd_tr->tr_ail2_list);
62         spin_unlock(&sdp->sd_ail_lock);
63         get_bh(bh);
64         atomic_inc(&sdp->sd_log_pinned);
65         trace_gfs2_pin(bd, 1);
66 }
67
68 static bool buffer_is_rgrp(const struct gfs2_bufdata *bd)
69 {
70         return bd->bd_gl->gl_name.ln_type == LM_TYPE_RGRP;
71 }
72
73 static void maybe_release_space(struct gfs2_bufdata *bd)
74 {
75         struct gfs2_glock *gl = bd->bd_gl;
76         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
77         struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
78         unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
79         struct gfs2_bitmap *bi = rgd->rd_bits + index;
80
81         if (bi->bi_clone == NULL)
82                 return;
83         if (sdp->sd_args.ar_discard)
84                 gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
85         memcpy(bi->bi_clone + bi->bi_offset,
86                bd->bd_bh->b_data + bi->bi_offset, bi->bi_bytes);
87         clear_bit(GBF_FULL, &bi->bi_flags);
88         rgd->rd_free_clone = rgd->rd_free;
89         rgd->rd_extfail_pt = rgd->rd_free;
90 }
91
92 /**
93  * gfs2_unpin - Unpin a buffer
94  * @sdp: the filesystem the buffer belongs to
95  * @bh: The buffer to unpin
96  * @ai:
97  * @flags: The inode dirty flags
98  *
99  */
100
101 static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
102                        struct gfs2_trans *tr)
103 {
104         struct gfs2_bufdata *bd = bh->b_private;
105
106         BUG_ON(!buffer_uptodate(bh));
107         BUG_ON(!buffer_pinned(bh));
108
109         lock_buffer(bh);
110         mark_buffer_dirty(bh);
111         clear_buffer_pinned(bh);
112
113         if (buffer_is_rgrp(bd))
114                 maybe_release_space(bd);
115
116         spin_lock(&sdp->sd_ail_lock);
117         if (bd->bd_tr) {
118                 list_del(&bd->bd_ail_st_list);
119                 brelse(bh);
120         } else {
121                 struct gfs2_glock *gl = bd->bd_gl;
122                 list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
123                 atomic_inc(&gl->gl_ail_count);
124         }
125         bd->bd_tr = tr;
126         list_add(&bd->bd_ail_st_list, &tr->tr_ail1_list);
127         spin_unlock(&sdp->sd_ail_lock);
128
129         clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
130         trace_gfs2_pin(bd, 0);
131         unlock_buffer(bh);
132         atomic_dec(&sdp->sd_log_pinned);
133 }
134
135 static void gfs2_log_incr_head(struct gfs2_sbd *sdp)
136 {
137         BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) &&
138                (sdp->sd_log_flush_head != sdp->sd_log_head));
139
140         if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks)
141                 sdp->sd_log_flush_head = 0;
142 }
143
144 u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
145 {
146         unsigned int lbn = sdp->sd_log_flush_head;
147         struct gfs2_journal_extent *je;
148         u64 block;
149
150         list_for_each_entry(je, &sdp->sd_jdesc->extent_list, list) {
151                 if ((lbn >= je->lblock) && (lbn < (je->lblock + je->blocks))) {
152                         block = je->dblock + lbn - je->lblock;
153                         gfs2_log_incr_head(sdp);
154                         return block;
155                 }
156         }
157
158         return -1;
159 }
160
161 /**
162  * gfs2_end_log_write_bh - end log write of pagecache data with buffers
163  * @sdp: The superblock
164  * @bvec: The bio_vec
165  * @error: The i/o status
166  *
167  * This finds the relevant buffers and unlocks them and sets the
168  * error flag according to the status of the i/o request. This is
169  * used when the log is writing data which has an in-place version
170  * that is pinned in the pagecache.
171  */
172
173 static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
174                                   blk_status_t error)
175 {
176         struct buffer_head *bh, *next;
177         struct page *page = bvec->bv_page;
178         unsigned size;
179
180         bh = page_buffers(page);
181         size = bvec->bv_len;
182         while (bh_offset(bh) < bvec->bv_offset)
183                 bh = bh->b_this_page;
184         do {
185                 if (error)
186                         mark_buffer_write_io_error(bh);
187                 unlock_buffer(bh);
188                 next = bh->b_this_page;
189                 size -= bh->b_size;
190                 brelse(bh);
191                 bh = next;
192         } while(bh && size);
193 }
194
195 /**
196  * gfs2_end_log_write - end of i/o to the log
197  * @bio: The bio
198  *
199  * Each bio_vec contains either data from the pagecache or data
200  * relating to the log itself. Here we iterate over the bio_vec
201  * array, processing both kinds of data.
202  *
203  */
204
205 static void gfs2_end_log_write(struct bio *bio)
206 {
207         struct gfs2_sbd *sdp = bio->bi_private;
208         struct bio_vec *bvec;
209         struct page *page;
210         int i;
211
212         if (bio->bi_status) {
213                 fs_err(sdp, "Error %d writing to journal, jid=%u\n",
214                        bio->bi_status, sdp->sd_jdesc->jd_jid);
215                 wake_up(&sdp->sd_logd_waitq);
216         }
217
218         bio_for_each_segment_all(bvec, bio, i) {
219                 page = bvec->bv_page;
220                 if (page_has_buffers(page))
221                         gfs2_end_log_write_bh(sdp, bvec, bio->bi_status);
222                 else
223                         mempool_free(page, gfs2_page_pool);
224         }
225
226         bio_put(bio);
227         if (atomic_dec_and_test(&sdp->sd_log_in_flight))
228                 wake_up(&sdp->sd_log_flush_wait);
229 }
230
231 /**
232  * gfs2_log_submit_bio - Submit any pending log bio
233  * @biop: Address of the bio pointer
234  * @opf: REQ_OP | op_flags
235  *
236  * Submit any pending part-built or full bio to the block device. If
237  * there is no pending bio, then this is a no-op.
238  */
239
240 void gfs2_log_submit_bio(struct bio **biop, int opf)
241 {
242         struct bio *bio = *biop;
243         if (bio) {
244                 struct gfs2_sbd *sdp = bio->bi_private;
245                 atomic_inc(&sdp->sd_log_in_flight);
246                 bio->bi_opf = opf;
247                 submit_bio(bio);
248                 *biop = NULL;
249         }
250 }
251
252 /**
253  * gfs2_log_alloc_bio - Allocate a bio
254  * @sdp: The super block
255  * @blkno: The device block number we want to write to
256  * @end_io: The bi_end_io callback
257  *
258  * Allocate a new bio, initialize it with the given parameters and return it.
259  *
260  * Returns: The newly allocated bio
261  */
262
263 static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno,
264                                       bio_end_io_t *end_io)
265 {
266         struct super_block *sb = sdp->sd_vfs;
267         struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
268
269         bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9);
270         bio_set_dev(bio, sb->s_bdev);
271         bio->bi_end_io = end_io;
272         bio->bi_private = sdp;
273
274         return bio;
275 }
276
277 /**
278  * gfs2_log_get_bio - Get cached log bio, or allocate a new one
279  * @sdp: The super block
280  * @blkno: The device block number we want to write to
281  * @bio: The bio to get or allocate
282  * @op: REQ_OP
283  * @end_io: The bi_end_io callback
284  * @flush: Always flush the current bio and allocate a new one?
285  *
286  * If there is a cached bio, then if the next block number is sequential
287  * with the previous one, return it, otherwise flush the bio to the
288  * device. If there is no cached bio, or we just flushed it, then
289  * allocate a new one.
290  *
291  * Returns: The bio to use for log writes
292  */
293
294 static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno,
295                                     struct bio **biop, int op,
296                                     bio_end_io_t *end_io, bool flush)
297 {
298         struct bio *bio = *biop;
299
300         if (bio) {
301                 u64 nblk;
302
303                 nblk = bio_end_sector(bio);
304                 nblk >>= sdp->sd_fsb2bb_shift;
305                 if (blkno == nblk && !flush)
306                         return bio;
307                 gfs2_log_submit_bio(biop, op);
308         }
309
310         *biop = gfs2_log_alloc_bio(sdp, blkno, end_io);
311         return *biop;
312 }
313
314 /**
315  * gfs2_log_write - write to log
316  * @sdp: the filesystem
317  * @page: the page to write
318  * @size: the size of the data to write
319  * @offset: the offset within the page 
320  * @blkno: block number of the log entry
321  *
322  * Try and add the page segment to the current bio. If that fails,
323  * submit the current bio to the device and create a new one, and
324  * then add the page segment to that.
325  */
326
327 void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
328                     unsigned size, unsigned offset, u64 blkno)
329 {
330         struct bio *bio;
331         int ret;
332
333         bio = gfs2_log_get_bio(sdp, blkno, &sdp->sd_log_bio, REQ_OP_WRITE,
334                                gfs2_end_log_write, false);
335         ret = bio_add_page(bio, page, size, offset);
336         if (ret == 0) {
337                 bio = gfs2_log_get_bio(sdp, blkno, &sdp->sd_log_bio,
338                                        REQ_OP_WRITE, gfs2_end_log_write, true);
339                 ret = bio_add_page(bio, page, size, offset);
340                 WARN_ON(ret == 0);
341         }
342 }
343
344 /**
345  * gfs2_log_write_bh - write a buffer's content to the log
346  * @sdp: The super block
347  * @bh: The buffer pointing to the in-place location
348  * 
349  * This writes the content of the buffer to the next available location
350  * in the log. The buffer will be unlocked once the i/o to the log has
351  * completed.
352  */
353
354 static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
355 {
356         gfs2_log_write(sdp, bh->b_page, bh->b_size, bh_offset(bh),
357                        gfs2_log_bmap(sdp));
358 }
359
360 /**
361  * gfs2_log_write_page - write one block stored in a page, into the log
362  * @sdp: The superblock
363  * @page: The struct page
364  *
365  * This writes the first block-sized part of the page into the log. Note
366  * that the page must have been allocated from the gfs2_page_pool mempool
367  * and that after this has been called, ownership has been transferred and
368  * the page may be freed at any time.
369  */
370
371 void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
372 {
373         struct super_block *sb = sdp->sd_vfs;
374         gfs2_log_write(sdp, page, sb->s_blocksize, 0,
375                        gfs2_log_bmap(sdp));
376 }
377
378 /**
379  * gfs2_end_log_read - end I/O callback for reads from the log
380  * @bio: The bio
381  *
382  * Simply unlock the pages in the bio. The main thread will wait on them and
383  * process them in order as necessary.
384  */
385
386 static void gfs2_end_log_read(struct bio *bio)
387 {
388         struct page *page;
389         struct bio_vec *bvec;
390         int i;
391
392         bio_for_each_segment_all(bvec, bio, i) {
393                 page = bvec->bv_page;
394                 if (bio->bi_status) {
395                         int err = blk_status_to_errno(bio->bi_status);
396
397                         SetPageError(page);
398                         mapping_set_error(page->mapping, err);
399                 }
400                 unlock_page(page);
401         }
402
403         bio_put(bio);
404 }
405
406 /**
407  * gfs2_jhead_pg_srch - Look for the journal head in a given page.
408  * @jd: The journal descriptor
409  * @page: The page to look in
410  *
411  * Returns: 1 if found, 0 otherwise.
412  */
413
414 static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
415                               struct gfs2_log_header_host *head,
416                               struct page *page)
417 {
418         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
419         struct gfs2_log_header_host uninitialized_var(lh);
420         void *kaddr = kmap_atomic(page);
421         unsigned int offset;
422         bool ret = false;
423
424         for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) {
425                 if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) {
426                         if (lh.lh_sequence > head->lh_sequence)
427                                 *head = lh;
428                         else {
429                                 ret = true;
430                                 break;
431                         }
432                 }
433         }
434         kunmap_atomic(kaddr);
435         return ret;
436 }
437
438 /**
439  * gfs2_jhead_process_page - Search/cleanup a page
440  * @jd: The journal descriptor
441  * @index: Index of the page to look into
442  * @done: If set, perform only cleanup, else search and set if found.
443  *
444  * Find the page with 'index' in the journal's mapping. Search the page for
445  * the journal head if requested (cleanup == false). Release refs on the
446  * page so the page cache can reclaim it (put_page() twice). We grabbed a
447  * reference on this page two times, first when we did a find_or_create_page()
448  * to obtain the page to add it to the bio and second when we do a
449  * find_get_page() here to get the page to wait on while I/O on it is being
450  * completed.
451  * This function is also used to free up a page we might've grabbed but not
452  * used. Maybe we added it to a bio, but not submitted it for I/O. Or we
453  * submitted the I/O, but we already found the jhead so we only need to drop
454  * our references to the page.
455  */
456
457 static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index,
458                                     struct gfs2_log_header_host *head,
459                                     bool *done)
460 {
461         struct page *page;
462
463         page = find_get_page(jd->jd_inode->i_mapping, index);
464         wait_on_page_locked(page);
465
466         if (PageError(page))
467                 *done = true;
468
469         if (!*done)
470                 *done = gfs2_jhead_pg_srch(jd, head, page);
471
472         put_page(page); /* Once for find_get_page */
473         put_page(page); /* Once more for find_or_create_page */
474 }
475
476 /**
477  * gfs2_find_jhead - find the head of a log
478  * @jd: The journal descriptor
479  * @head: The log descriptor for the head of the log is returned here
480  *
481  * Do a search of a journal by reading it in large chunks using bios and find
482  * the valid log entry with the highest sequence number.  (i.e. the log head)
483  *
484  * Returns: 0 on success, errno otherwise
485  */
486
487 int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
488 {
489         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
490         struct address_space *mapping = jd->jd_inode->i_mapping;
491         struct gfs2_journal_extent *je;
492         u32 block, read_idx = 0, submit_idx = 0, index = 0;
493         int shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
494         int blocks_per_page = 1 << shift, sz, ret = 0;
495         struct bio *bio = NULL;
496         struct page *page;
497         bool done = false;
498         errseq_t since;
499
500         memset(head, 0, sizeof(*head));
501         if (list_empty(&jd->extent_list))
502                 gfs2_map_journal_extents(sdp, jd);
503
504         since = filemap_sample_wb_err(mapping);
505         list_for_each_entry(je, &jd->extent_list, list) {
506                 for (block = 0; block < je->blocks; block += blocks_per_page) {
507                         index = (je->lblock + block) >> shift;
508
509                         page = find_or_create_page(mapping, index, GFP_NOFS);
510                         if (!page) {
511                                 ret = -ENOMEM;
512                                 done = true;
513                                 goto out;
514                         }
515
516                         if (bio) {
517                                 sz = bio_add_page(bio, page, PAGE_SIZE, 0);
518                                 if (sz == PAGE_SIZE)
519                                         goto page_added;
520                                 submit_idx = index;
521                                 submit_bio(bio);
522                                 bio = NULL;
523                         }
524
525                         bio = gfs2_log_alloc_bio(sdp,
526                                                  je->dblock + (index << shift),
527                                                  gfs2_end_log_read);
528                         bio->bi_opf = REQ_OP_READ;
529                         sz = bio_add_page(bio, page, PAGE_SIZE, 0);
530                         gfs2_assert_warn(sdp, sz == PAGE_SIZE);
531
532 page_added:
533                         if (submit_idx <= read_idx + BIO_MAX_PAGES) {
534                                 /* Keep at least one bio in flight */
535                                 continue;
536                         }
537
538                         gfs2_jhead_process_page(jd, read_idx++, head, &done);
539                         if (done)
540                                 goto out;  /* found */
541                 }
542         }
543
544 out:
545         if (bio)
546                 submit_bio(bio);
547         while (read_idx <= index)
548                 gfs2_jhead_process_page(jd, read_idx++, head, &done);
549
550         if (!ret)
551                 ret = filemap_check_wb_err(mapping, since);
552
553         return ret;
554 }
555
556 static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
557                                       u32 ld_length, u32 ld_data1)
558 {
559         struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
560         struct gfs2_log_descriptor *ld = page_address(page);
561         clear_page(ld);
562         ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
563         ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
564         ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
565         ld->ld_type = cpu_to_be32(ld_type);
566         ld->ld_length = cpu_to_be32(ld_length);
567         ld->ld_data1 = cpu_to_be32(ld_data1);
568         ld->ld_data2 = 0;
569         return page;
570 }
571
572 static void gfs2_check_magic(struct buffer_head *bh)
573 {
574         void *kaddr;
575         __be32 *ptr;
576
577         clear_buffer_escaped(bh);
578         kaddr = kmap_atomic(bh->b_page);
579         ptr = kaddr + bh_offset(bh);
580         if (*ptr == cpu_to_be32(GFS2_MAGIC))
581                 set_buffer_escaped(bh);
582         kunmap_atomic(kaddr);
583 }
584
585 static int blocknr_cmp(void *priv, struct list_head *a, struct list_head *b)
586 {
587         struct gfs2_bufdata *bda, *bdb;
588
589         bda = list_entry(a, struct gfs2_bufdata, bd_list);
590         bdb = list_entry(b, struct gfs2_bufdata, bd_list);
591
592         if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr)
593                 return -1;
594         if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr)
595                 return 1;
596         return 0;
597 }
598
599 static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit,
600                                 unsigned int total, struct list_head *blist,
601                                 bool is_databuf)
602 {
603         struct gfs2_log_descriptor *ld;
604         struct gfs2_bufdata *bd1 = NULL, *bd2;
605         struct page *page;
606         unsigned int num;
607         unsigned n;
608         __be64 *ptr;
609
610         gfs2_log_lock(sdp);
611         list_sort(NULL, blist, blocknr_cmp);
612         bd1 = bd2 = list_prepare_entry(bd1, blist, bd_list);
613         while(total) {
614                 num = total;
615                 if (total > limit)
616                         num = limit;
617                 gfs2_log_unlock(sdp);
618                 page = gfs2_get_log_desc(sdp,
619                                          is_databuf ? GFS2_LOG_DESC_JDATA :
620                                          GFS2_LOG_DESC_METADATA, num + 1, num);
621                 ld = page_address(page);
622                 gfs2_log_lock(sdp);
623                 ptr = (__be64 *)(ld + 1);
624
625                 n = 0;
626                 list_for_each_entry_continue(bd1, blist, bd_list) {
627                         *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
628                         if (is_databuf) {
629                                 gfs2_check_magic(bd1->bd_bh);
630                                 *ptr++ = cpu_to_be64(buffer_escaped(bd1->bd_bh) ? 1 : 0);
631                         }
632                         if (++n >= num)
633                                 break;
634                 }
635
636                 gfs2_log_unlock(sdp);
637                 gfs2_log_write_page(sdp, page);
638                 gfs2_log_lock(sdp);
639
640                 n = 0;
641                 list_for_each_entry_continue(bd2, blist, bd_list) {
642                         get_bh(bd2->bd_bh);
643                         gfs2_log_unlock(sdp);
644                         lock_buffer(bd2->bd_bh);
645
646                         if (buffer_escaped(bd2->bd_bh)) {
647                                 void *kaddr;
648                                 page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
649                                 ptr = page_address(page);
650                                 kaddr = kmap_atomic(bd2->bd_bh->b_page);
651                                 memcpy(ptr, kaddr + bh_offset(bd2->bd_bh),
652                                        bd2->bd_bh->b_size);
653                                 kunmap_atomic(kaddr);
654                                 *(__be32 *)ptr = 0;
655                                 clear_buffer_escaped(bd2->bd_bh);
656                                 unlock_buffer(bd2->bd_bh);
657                                 brelse(bd2->bd_bh);
658                                 gfs2_log_write_page(sdp, page);
659                         } else {
660                                 gfs2_log_write_bh(sdp, bd2->bd_bh);
661                         }
662                         gfs2_log_lock(sdp);
663                         if (++n >= num)
664                                 break;
665                 }
666
667                 BUG_ON(total < num);
668                 total -= num;
669         }
670         gfs2_log_unlock(sdp);
671 }
672
673 static void buf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
674 {
675         unsigned int limit = buf_limit(sdp); /* 503 for 4k blocks */
676         unsigned int nbuf;
677         if (tr == NULL)
678                 return;
679         nbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm;
680         gfs2_before_commit(sdp, limit, nbuf, &tr->tr_buf, 0);
681 }
682
683 static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
684 {
685         struct list_head *head;
686         struct gfs2_bufdata *bd;
687
688         if (tr == NULL)
689                 return;
690
691         head = &tr->tr_buf;
692         while (!list_empty(head)) {
693                 bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
694                 list_del_init(&bd->bd_list);
695                 gfs2_unpin(sdp, bd->bd_bh, tr);
696         }
697 }
698
699 static void buf_lo_before_scan(struct gfs2_jdesc *jd,
700                                struct gfs2_log_header_host *head, int pass)
701 {
702         if (pass != 0)
703                 return;
704
705         jd->jd_found_blocks = 0;
706         jd->jd_replayed_blocks = 0;
707 }
708
709 static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
710                                 struct gfs2_log_descriptor *ld, __be64 *ptr,
711                                 int pass)
712 {
713         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
714         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
715         struct gfs2_glock *gl = ip->i_gl;
716         unsigned int blks = be32_to_cpu(ld->ld_data1);
717         struct buffer_head *bh_log, *bh_ip;
718         u64 blkno;
719         int error = 0;
720
721         if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
722                 return 0;
723
724         gfs2_replay_incr_blk(jd, &start);
725
726         for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
727                 blkno = be64_to_cpu(*ptr++);
728
729                 jd->jd_found_blocks++;
730
731                 if (gfs2_revoke_check(jd, blkno, start))
732                         continue;
733
734                 error = gfs2_replay_read_block(jd, start, &bh_log);
735                 if (error)
736                         return error;
737
738                 bh_ip = gfs2_meta_new(gl, blkno);
739                 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
740
741                 if (gfs2_meta_check(sdp, bh_ip))
742                         error = -EIO;
743                 else
744                         mark_buffer_dirty(bh_ip);
745
746                 brelse(bh_log);
747                 brelse(bh_ip);
748
749                 if (error)
750                         break;
751
752                 jd->jd_replayed_blocks++;
753         }
754
755         return error;
756 }
757
758 /**
759  * gfs2_meta_sync - Sync all buffers associated with a glock
760  * @gl: The glock
761  *
762  */
763
764 static void gfs2_meta_sync(struct gfs2_glock *gl)
765 {
766         struct address_space *mapping = gfs2_glock2aspace(gl);
767         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
768         int error;
769
770         if (mapping == NULL)
771                 mapping = &sdp->sd_aspace;
772
773         filemap_fdatawrite(mapping);
774         error = filemap_fdatawait(mapping);
775
776         if (error)
777                 gfs2_io_error(gl->gl_name.ln_sbd);
778 }
779
780 static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
781 {
782         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
783         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
784
785         if (error) {
786                 gfs2_meta_sync(ip->i_gl);
787                 return;
788         }
789         if (pass != 1)
790                 return;
791
792         gfs2_meta_sync(ip->i_gl);
793
794         fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
795                 jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks);
796 }
797
798 static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
799 {
800         struct gfs2_meta_header *mh;
801         unsigned int offset;
802         struct list_head *head = &sdp->sd_log_le_revoke;
803         struct gfs2_bufdata *bd;
804         struct page *page;
805         unsigned int length;
806
807         gfs2_write_revokes(sdp);
808         if (!sdp->sd_log_num_revoke)
809                 return;
810
811         length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(u64));
812         page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke);
813         offset = sizeof(struct gfs2_log_descriptor);
814
815         list_for_each_entry(bd, head, bd_list) {
816                 sdp->sd_log_num_revoke--;
817
818                 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
819
820                         gfs2_log_write_page(sdp, page);
821                         page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
822                         mh = page_address(page);
823                         clear_page(mh);
824                         mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
825                         mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
826                         mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
827                         offset = sizeof(struct gfs2_meta_header);
828                 }
829
830                 *(__be64 *)(page_address(page) + offset) = cpu_to_be64(bd->bd_blkno);
831                 offset += sizeof(u64);
832         }
833         gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
834
835         gfs2_log_write_page(sdp, page);
836 }
837
838 static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
839 {
840         struct list_head *head = &sdp->sd_log_le_revoke;
841         struct gfs2_bufdata *bd;
842         struct gfs2_glock *gl;
843
844         while (!list_empty(head)) {
845                 bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
846                 list_del_init(&bd->bd_list);
847                 gl = bd->bd_gl;
848                 atomic_dec(&gl->gl_revokes);
849                 clear_bit(GLF_LFLUSH, &gl->gl_flags);
850                 kmem_cache_free(gfs2_bufdata_cachep, bd);
851         }
852 }
853
854 static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
855                                   struct gfs2_log_header_host *head, int pass)
856 {
857         if (pass != 0)
858                 return;
859
860         jd->jd_found_revokes = 0;
861         jd->jd_replay_tail = head->lh_tail;
862 }
863
864 static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
865                                    struct gfs2_log_descriptor *ld, __be64 *ptr,
866                                    int pass)
867 {
868         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
869         unsigned int blks = be32_to_cpu(ld->ld_length);
870         unsigned int revokes = be32_to_cpu(ld->ld_data1);
871         struct buffer_head *bh;
872         unsigned int offset;
873         u64 blkno;
874         int first = 1;
875         int error;
876
877         if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
878                 return 0;
879
880         offset = sizeof(struct gfs2_log_descriptor);
881
882         for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
883                 error = gfs2_replay_read_block(jd, start, &bh);
884                 if (error)
885                         return error;
886
887                 if (!first)
888                         gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
889
890                 while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) {
891                         blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
892
893                         error = gfs2_revoke_add(jd, blkno, start);
894                         if (error < 0) {
895                                 brelse(bh);
896                                 return error;
897                         }
898                         else if (error)
899                                 jd->jd_found_revokes++;
900
901                         if (!--revokes)
902                                 break;
903                         offset += sizeof(u64);
904                 }
905
906                 brelse(bh);
907                 offset = sizeof(struct gfs2_meta_header);
908                 first = 0;
909         }
910
911         return 0;
912 }
913
914 static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
915 {
916         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
917
918         if (error) {
919                 gfs2_revoke_clean(jd);
920                 return;
921         }
922         if (pass != 1)
923                 return;
924
925         fs_info(sdp, "jid=%u: Found %u revoke tags\n",
926                 jd->jd_jid, jd->jd_found_revokes);
927
928         gfs2_revoke_clean(jd);
929 }
930
931 /**
932  * databuf_lo_before_commit - Scan the data buffers, writing as we go
933  *
934  */
935
936 static void databuf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
937 {
938         unsigned int limit = databuf_limit(sdp);
939         unsigned int nbuf;
940         if (tr == NULL)
941                 return;
942         nbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
943         gfs2_before_commit(sdp, limit, nbuf, &tr->tr_databuf, 1);
944 }
945
946 static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
947                                     struct gfs2_log_descriptor *ld,
948                                     __be64 *ptr, int pass)
949 {
950         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
951         struct gfs2_glock *gl = ip->i_gl;
952         unsigned int blks = be32_to_cpu(ld->ld_data1);
953         struct buffer_head *bh_log, *bh_ip;
954         u64 blkno;
955         u64 esc;
956         int error = 0;
957
958         if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
959                 return 0;
960
961         gfs2_replay_incr_blk(jd, &start);
962         for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
963                 blkno = be64_to_cpu(*ptr++);
964                 esc = be64_to_cpu(*ptr++);
965
966                 jd->jd_found_blocks++;
967
968                 if (gfs2_revoke_check(jd, blkno, start))
969                         continue;
970
971                 error = gfs2_replay_read_block(jd, start, &bh_log);
972                 if (error)
973                         return error;
974
975                 bh_ip = gfs2_meta_new(gl, blkno);
976                 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
977
978                 /* Unescape */
979                 if (esc) {
980                         __be32 *eptr = (__be32 *)bh_ip->b_data;
981                         *eptr = cpu_to_be32(GFS2_MAGIC);
982                 }
983                 mark_buffer_dirty(bh_ip);
984
985                 brelse(bh_log);
986                 brelse(bh_ip);
987
988                 jd->jd_replayed_blocks++;
989         }
990
991         return error;
992 }
993
994 /* FIXME: sort out accounting for log blocks etc. */
995
996 static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
997 {
998         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
999         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
1000
1001         if (error) {
1002                 gfs2_meta_sync(ip->i_gl);
1003                 return;
1004         }
1005         if (pass != 1)
1006                 return;
1007
1008         /* data sync? */
1009         gfs2_meta_sync(ip->i_gl);
1010
1011         fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
1012                 jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks);
1013 }
1014
1015 static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
1016 {
1017         struct list_head *head;
1018         struct gfs2_bufdata *bd;
1019
1020         if (tr == NULL)
1021                 return;
1022
1023         head = &tr->tr_databuf;
1024         while (!list_empty(head)) {
1025                 bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
1026                 list_del_init(&bd->bd_list);
1027                 gfs2_unpin(sdp, bd->bd_bh, tr);
1028         }
1029 }
1030
1031
1032 const struct gfs2_log_operations gfs2_buf_lops = {
1033         .lo_before_commit = buf_lo_before_commit,
1034         .lo_after_commit = buf_lo_after_commit,
1035         .lo_before_scan = buf_lo_before_scan,
1036         .lo_scan_elements = buf_lo_scan_elements,
1037         .lo_after_scan = buf_lo_after_scan,
1038         .lo_name = "buf",
1039 };
1040
1041 const struct gfs2_log_operations gfs2_revoke_lops = {
1042         .lo_before_commit = revoke_lo_before_commit,
1043         .lo_after_commit = revoke_lo_after_commit,
1044         .lo_before_scan = revoke_lo_before_scan,
1045         .lo_scan_elements = revoke_lo_scan_elements,
1046         .lo_after_scan = revoke_lo_after_scan,
1047         .lo_name = "revoke",
1048 };
1049
1050 const struct gfs2_log_operations gfs2_databuf_lops = {
1051         .lo_before_commit = databuf_lo_before_commit,
1052         .lo_after_commit = databuf_lo_after_commit,
1053         .lo_scan_elements = databuf_lo_scan_elements,
1054         .lo_after_scan = databuf_lo_after_scan,
1055         .lo_name = "databuf",
1056 };
1057
1058 const struct gfs2_log_operations *gfs2_log_ops[] = {
1059         &gfs2_databuf_lops,
1060         &gfs2_buf_lops,
1061         &gfs2_revoke_lops,
1062         NULL,
1063 };
1064