gfs2: Do proper error checking for go_sync family of glops functions
authorBob Peterson <rpeterso@redhat.com>
Wed, 13 Nov 2019 20:09:28 +0000 (14:09 -0600)
committerBob Peterson <rpeterso@redhat.com>
Thu, 27 Feb 2020 13:53:18 +0000 (07:53 -0600)
Before this patch, function do_xmote would try to sync out the glock
dirty data by calling the appropriate glops function XXX_go_sync()
but it did not check for a good return code. If the sync was not
possible due to an io error or whatever, do_xmote would continue on
and call go_inval and release the glock to other cluster nodes.
When those nodes go to replay the journal, they may already be holding
glocks for the journal records that should have been synced, but were
not due to the ignored error.

This patch introduces proper error code checking to the go_sync
family of glops functions.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Reviewed-by: Andreas Gruenbacher <agruenba@redhat.com>
fs/gfs2/glock.c
fs/gfs2/glops.c
fs/gfs2/incore.h

index 73cb5bc..0bfa58e 100644 (file)
@@ -602,8 +602,20 @@ __acquires(&gl->gl_lockref.lock)
            (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)))
                clear_bit(GLF_BLOCKING, &gl->gl_flags);
        spin_unlock(&gl->gl_lockref.lock);
-       if (glops->go_sync)
-               glops->go_sync(gl);
+       if (glops->go_sync) {
+               ret = glops->go_sync(gl);
+               /* If we had a problem syncing (due to io errors or whatever,
+                * we should not invalidate the metadata or tell dlm to
+                * release the glock to other nodes.
+                */
+               if (ret) {
+                       if (cmpxchg(&sdp->sd_log_error, 0, ret)) {
+                               fs_err(sdp, "Error %d syncing glock \n", ret);
+                               gfs2_dump_glock(NULL, gl, true);
+                       }
+                       return;
+               }
+       }
        if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) {
                /*
                 * The call to go_sync should have cleared out the ail list.
index bbbcae8..9e9c7a4 100644 (file)
@@ -82,10 +82,11 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync,
 }
 
 
-static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
+static int gfs2_ail_empty_gl(struct gfs2_glock *gl)
 {
        struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
        struct gfs2_trans tr;
+       int ret;
 
        memset(&tr, 0, sizeof(tr));
        INIT_LIST_HEAD(&tr.tr_buf);
@@ -116,7 +117,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
                        goto flush;
                if (log_in_flight)
                        log_flush_wait(sdp);
-               return;
+               return 0;
        }
 
        /* A shortened, inline version of gfs2_trans_begin()
@@ -124,8 +125,9 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
          * on the stack */
        tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes);
        tr.tr_ip = _RET_IP_;
-       if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0)
-               return;
+       ret = gfs2_log_reserve(sdp, tr.tr_reserved);
+       if (ret < 0)
+               return ret;
        WARN_ON_ONCE(current->journal_info);
        current->journal_info = &tr;
 
@@ -135,6 +137,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
 flush:
        gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
                       GFS2_LFC_AIL_EMPTY_GL);
+       return 0;
 }
 
 void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
@@ -168,7 +171,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
  * return to caller to demote/unlock the glock until I/O is complete.
  */
 
-static void rgrp_go_sync(struct gfs2_glock *gl)
+static int rgrp_go_sync(struct gfs2_glock *gl)
 {
        struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
        struct address_space *mapping = &sdp->sd_aspace;
@@ -176,21 +179,24 @@ static void rgrp_go_sync(struct gfs2_glock *gl)
        int error;
 
        if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
-               return;
+               return 0;
        GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);
 
        gfs2_log_flush(sdp, gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
                       GFS2_LFC_RGRP_GO_SYNC);
        filemap_fdatawrite_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
        error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
+       WARN_ON_ONCE(error);
        mapping_set_error(mapping, error);
-       gfs2_ail_empty_gl(gl);
+       if (!error)
+               error = gfs2_ail_empty_gl(gl);
 
        spin_lock(&gl->gl_lockref.lock);
        rgd = gl->gl_object;
        if (rgd)
                gfs2_free_clones(rgd);
        spin_unlock(&gl->gl_lockref.lock);
+       return error;
 }
 
 /**
@@ -257,12 +263,12 @@ static void gfs2_clear_glop_pending(struct gfs2_inode *ip)
  *
  */
 
-static void inode_go_sync(struct gfs2_glock *gl)
+static int inode_go_sync(struct gfs2_glock *gl)
 {
        struct gfs2_inode *ip = gfs2_glock2inode(gl);
        int isreg = ip && S_ISREG(ip->i_inode.i_mode);
        struct address_space *metamapping = gfs2_glock2aspace(gl);
-       int error;
+       int error = 0;
 
        if (isreg) {
                if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
@@ -295,6 +301,7 @@ static void inode_go_sync(struct gfs2_glock *gl)
 
 out:
        gfs2_clear_glop_pending(ip);
+       return error;
 }
 
 /**
@@ -515,7 +522,7 @@ static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl,
  *
  */
 
-static void freeze_go_sync(struct gfs2_glock *gl)
+static int freeze_go_sync(struct gfs2_glock *gl)
 {
        int error = 0;
        struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
@@ -529,7 +536,7 @@ static void freeze_go_sync(struct gfs2_glock *gl)
                                error);
                        if (gfs2_withdrawn(sdp)) {
                                atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
-                               return;
+                               return 0;
                        }
                        gfs2_assert_withdraw(sdp, 0);
                }
@@ -537,6 +544,7 @@ static void freeze_go_sync(struct gfs2_glock *gl)
                gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE |
                               GFS2_LFC_FREEZE_GO_SYNC);
        }
+       return 0;
 }
 
 /**
index 8cd564b..04549a8 100644 (file)
@@ -234,7 +234,7 @@ struct lm_lockname {
 
 
 struct gfs2_glock_operations {
-       void (*go_sync) (struct gfs2_glock *gl);
+       int (*go_sync) (struct gfs2_glock *gl);
        int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh);
        void (*go_inval) (struct gfs2_glock *gl, int flags);
        int (*go_demote_ok) (const struct gfs2_glock *gl);