fs: dlm: change posix lock sigint handling
authorAlexander Aring <aahringo@redhat.com>
Wed, 22 Jun 2022 18:45:09 +0000 (14:45 -0400)
committerDavid Teigland <teigland@redhat.com>
Fri, 24 Jun 2022 16:53:05 +0000 (11:53 -0500)
This patch changes the handling of a plock operation that was interrupted
while waiting for a user space reply from dlm_controld.  (This is not
the lock blocking state, i.e. locks_lock_file_wait().)

Currently, when an op is interrupted while waiting on user space, the
op is removed.  When the user space result later arrives, a kernel
message is loggged: "dev_write no op...".  This can be seen from a test
such as "stress-ng --fcntl 100" and interrupting it with ctrl-c.

Now, leave the op in place when interrupted and remove it when the
result arrives (the result will be ignored.)  With this change, the
logged message is not expected to appear, and would indicate a bug.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
fs/dlm/plock.c

index cf7bba4..737f185 100644 (file)
@@ -29,6 +29,8 @@ struct plock_async_data {
 struct plock_op {
        struct list_head list;
        int done;
+       /* if lock op got interrupted while waiting dlm_controld reply */
+       bool sigint;
        struct dlm_plock_info info;
        /* if set indicates async handling */
        struct plock_async_data *data;
@@ -157,16 +159,24 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
        rv = wait_event_interruptible(recv_wq, (op->done != 0));
        if (rv == -ERESTARTSYS) {
                spin_lock(&ops_lock);
-               list_del(&op->list);
+               /* recheck under ops_lock if we got a done != 0,
+                * if so this interrupt case should be ignored
+                */
+               if (op->done != 0) {
+                       spin_unlock(&ops_lock);
+                       goto do_lock_wait;
+               }
+
+               op->sigint = true;
                spin_unlock(&ops_lock);
                log_debug(ls, "%s: wait interrupted %x %llx pid %d",
                          __func__, ls->ls_global_id,
                          (unsigned long long)number, op->info.pid);
-               dlm_release_plock_op(op);
-               do_unlock_close(&op->info);
                goto out;
        }
 
+do_lock_wait:
+
        WARN_ON(!list_empty(&op->list));
 
        rv = op->info.rv;
@@ -421,6 +431,19 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
                if (iter->info.fsid == info.fsid &&
                    iter->info.number == info.number &&
                    iter->info.owner == info.owner) {
+                       if (iter->sigint) {
+                               list_del(&iter->list);
+                               spin_unlock(&ops_lock);
+
+                               pr_debug("%s: sigint cleanup %x %llx pid %d",
+                                         __func__, iter->info.fsid,
+                                         (unsigned long long)iter->info.number,
+                                         iter->info.pid);
+                               do_unlock_close(&iter->info);
+                               memcpy(&iter->info, &info, sizeof(info));
+                               dlm_release_plock_op(iter);
+                               return count;
+                       }
                        list_del_init(&iter->list);
                        memcpy(&iter->info, &info, sizeof(info));
                        if (iter->data)