73cb5bcc37a7509bee2071594dbe2e029768eca1
[linux-2.6-microblaze.git] / fs / gfs2 / glock.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
4  * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
5  */
6
7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8
9 #include <linux/sched.h>
10 #include <linux/slab.h>
11 #include <linux/spinlock.h>
12 #include <linux/buffer_head.h>
13 #include <linux/delay.h>
14 #include <linux/sort.h>
15 #include <linux/hash.h>
16 #include <linux/jhash.h>
17 #include <linux/kallsyms.h>
18 #include <linux/gfs2_ondisk.h>
19 #include <linux/list.h>
20 #include <linux/wait.h>
21 #include <linux/module.h>
22 #include <linux/uaccess.h>
23 #include <linux/seq_file.h>
24 #include <linux/debugfs.h>
25 #include <linux/kthread.h>
26 #include <linux/freezer.h>
27 #include <linux/workqueue.h>
28 #include <linux/jiffies.h>
29 #include <linux/rcupdate.h>
30 #include <linux/rculist_bl.h>
31 #include <linux/bit_spinlock.h>
32 #include <linux/percpu.h>
33 #include <linux/list_sort.h>
34 #include <linux/lockref.h>
35 #include <linux/rhashtable.h>
36
37 #include "gfs2.h"
38 #include "incore.h"
39 #include "glock.h"
40 #include "glops.h"
41 #include "inode.h"
42 #include "lops.h"
43 #include "meta_io.h"
44 #include "quota.h"
45 #include "super.h"
46 #include "util.h"
47 #include "bmap.h"
48 #define CREATE_TRACE_POINTS
49 #include "trace_gfs2.h"
50
51 struct gfs2_glock_iter {
52         struct gfs2_sbd *sdp;           /* incore superblock           */
53         struct rhashtable_iter hti;     /* rhashtable iterator         */
54         struct gfs2_glock *gl;          /* current glock struct        */
55         loff_t last_pos;                /* last position               */
56 };
57
58 typedef void (*glock_examiner) (struct gfs2_glock * gl);
59
60 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
61
62 static struct dentry *gfs2_root;
63 static struct workqueue_struct *glock_workqueue;
64 struct workqueue_struct *gfs2_delete_workqueue;
65 static LIST_HEAD(lru_list);
66 static atomic_t lru_count = ATOMIC_INIT(0);
67 static DEFINE_SPINLOCK(lru_lock);
68
69 #define GFS2_GL_HASH_SHIFT      15
70 #define GFS2_GL_HASH_SIZE       BIT(GFS2_GL_HASH_SHIFT)
71
72 static const struct rhashtable_params ht_parms = {
73         .nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4,
74         .key_len = offsetofend(struct lm_lockname, ln_type),
75         .key_offset = offsetof(struct gfs2_glock, gl_name),
76         .head_offset = offsetof(struct gfs2_glock, gl_node),
77 };
78
79 static struct rhashtable gl_hash_table;
80
81 #define GLOCK_WAIT_TABLE_BITS 12
82 #define GLOCK_WAIT_TABLE_SIZE (1 << GLOCK_WAIT_TABLE_BITS)
83 static wait_queue_head_t glock_wait_table[GLOCK_WAIT_TABLE_SIZE] __cacheline_aligned;
84
85 struct wait_glock_queue {
86         struct lm_lockname *name;
87         wait_queue_entry_t wait;
88 };
89
90 static int glock_wake_function(wait_queue_entry_t *wait, unsigned int mode,
91                                int sync, void *key)
92 {
93         struct wait_glock_queue *wait_glock =
94                 container_of(wait, struct wait_glock_queue, wait);
95         struct lm_lockname *wait_name = wait_glock->name;
96         struct lm_lockname *wake_name = key;
97
98         if (wake_name->ln_sbd != wait_name->ln_sbd ||
99             wake_name->ln_number != wait_name->ln_number ||
100             wake_name->ln_type != wait_name->ln_type)
101                 return 0;
102         return autoremove_wake_function(wait, mode, sync, key);
103 }
104
105 static wait_queue_head_t *glock_waitqueue(struct lm_lockname *name)
106 {
107         u32 hash = jhash2((u32 *)name, ht_parms.key_len / 4, 0);
108
109         return glock_wait_table + hash_32(hash, GLOCK_WAIT_TABLE_BITS);
110 }
111
112 /**
113  * wake_up_glock  -  Wake up waiters on a glock
114  * @gl: the glock
115  */
116 static void wake_up_glock(struct gfs2_glock *gl)
117 {
118         wait_queue_head_t *wq = glock_waitqueue(&gl->gl_name);
119
120         if (waitqueue_active(wq))
121                 __wake_up(wq, TASK_NORMAL, 1, &gl->gl_name);
122 }
123
124 static void gfs2_glock_dealloc(struct rcu_head *rcu)
125 {
126         struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
127
128         if (gl->gl_ops->go_flags & GLOF_ASPACE) {
129                 kmem_cache_free(gfs2_glock_aspace_cachep, gl);
130         } else {
131                 kfree(gl->gl_lksb.sb_lvbptr);
132                 kmem_cache_free(gfs2_glock_cachep, gl);
133         }
134 }
135
136 /**
137  * glock_blocked_by_withdraw - determine if we can still use a glock
138  * @gl: the glock
139  *
140  * We need to allow some glocks to be enqueued, dequeued, promoted, and demoted
141  * when we're withdrawn. For example, to maintain metadata integrity, we should
142  * disallow the use of inode and rgrp glocks when withdrawn. Other glocks, like
143  * iopen or the transaction glocks may be safely used because none of their
144  * metadata goes through the journal. So in general, we should disallow all
145  * glocks that are journaled, and allow all the others. One exception is:
146  * we need to allow our active journal to be promoted and demoted so others
147  * may recover it and we can reacquire it when they're done.
148  */
149 static bool glock_blocked_by_withdraw(struct gfs2_glock *gl)
150 {
151         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
152
153         if (likely(!gfs2_withdrawn(sdp)))
154                 return false;
155         if (gl->gl_ops->go_flags & GLOF_NONDISK)
156                 return false;
157         if (!sdp->sd_jdesc ||
158             gl->gl_name.ln_number == sdp->sd_jdesc->jd_no_addr)
159                 return false;
160         return true;
161 }
162
163 void gfs2_glock_free(struct gfs2_glock *gl)
164 {
165         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
166
167         BUG_ON(atomic_read(&gl->gl_revokes));
168         rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
169         smp_mb();
170         wake_up_glock(gl);
171         call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
172         if (atomic_dec_and_test(&sdp->sd_glock_disposal))
173                 wake_up(&sdp->sd_glock_wait);
174 }
175
176 /**
177  * gfs2_glock_hold() - increment reference count on glock
178  * @gl: The glock to hold
179  *
180  */
181
182 void gfs2_glock_hold(struct gfs2_glock *gl)
183 {
184         GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
185         lockref_get(&gl->gl_lockref);
186 }
187
188 /**
189  * demote_ok - Check to see if it's ok to unlock a glock
190  * @gl: the glock
191  *
192  * Returns: 1 if it's ok
193  */
194
195 static int demote_ok(const struct gfs2_glock *gl)
196 {
197         const struct gfs2_glock_operations *glops = gl->gl_ops;
198
199         if (gl->gl_state == LM_ST_UNLOCKED)
200                 return 0;
201         if (!list_empty(&gl->gl_holders))
202                 return 0;
203         if (glops->go_demote_ok)
204                 return glops->go_demote_ok(gl);
205         return 1;
206 }
207
208
209 void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
210 {
211         if (!(gl->gl_ops->go_flags & GLOF_LRU))
212                 return;
213
214         spin_lock(&lru_lock);
215
216         list_del(&gl->gl_lru);
217         list_add_tail(&gl->gl_lru, &lru_list);
218
219         if (!test_bit(GLF_LRU, &gl->gl_flags)) {
220                 set_bit(GLF_LRU, &gl->gl_flags);
221                 atomic_inc(&lru_count);
222         }
223
224         spin_unlock(&lru_lock);
225 }
226
227 static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
228 {
229         if (!(gl->gl_ops->go_flags & GLOF_LRU))
230                 return;
231
232         spin_lock(&lru_lock);
233         if (test_bit(GLF_LRU, &gl->gl_flags)) {
234                 list_del_init(&gl->gl_lru);
235                 atomic_dec(&lru_count);
236                 clear_bit(GLF_LRU, &gl->gl_flags);
237         }
238         spin_unlock(&lru_lock);
239 }
240
241 /*
242  * Enqueue the glock on the work queue.  Passes one glock reference on to the
243  * work queue.
244  */
245 static void __gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) {
246         if (!queue_delayed_work(glock_workqueue, &gl->gl_work, delay)) {
247                 /*
248                  * We are holding the lockref spinlock, and the work was still
249                  * queued above.  The queued work (glock_work_func) takes that
250                  * spinlock before dropping its glock reference(s), so it
251                  * cannot have dropped them in the meantime.
252                  */
253                 GLOCK_BUG_ON(gl, gl->gl_lockref.count < 2);
254                 gl->gl_lockref.count--;
255         }
256 }
257
258 static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) {
259         spin_lock(&gl->gl_lockref.lock);
260         __gfs2_glock_queue_work(gl, delay);
261         spin_unlock(&gl->gl_lockref.lock);
262 }
263
264 static void __gfs2_glock_put(struct gfs2_glock *gl)
265 {
266         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
267         struct address_space *mapping = gfs2_glock2aspace(gl);
268
269         lockref_mark_dead(&gl->gl_lockref);
270
271         gfs2_glock_remove_from_lru(gl);
272         spin_unlock(&gl->gl_lockref.lock);
273         GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
274         GLOCK_BUG_ON(gl, mapping && mapping->nrpages && !gfs2_withdrawn(sdp));
275         trace_gfs2_glock_put(gl);
276         sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
277 }
278
279 /*
280  * Cause the glock to be put in work queue context.
281  */
282 void gfs2_glock_queue_put(struct gfs2_glock *gl)
283 {
284         gfs2_glock_queue_work(gl, 0);
285 }
286
287 /**
288  * gfs2_glock_put() - Decrement reference count on glock
289  * @gl: The glock to put
290  *
291  */
292
293 void gfs2_glock_put(struct gfs2_glock *gl)
294 {
295         if (lockref_put_or_lock(&gl->gl_lockref))
296                 return;
297
298         __gfs2_glock_put(gl);
299 }
300
301 /**
302  * may_grant - check if its ok to grant a new lock
303  * @gl: The glock
304  * @gh: The lock request which we wish to grant
305  *
306  * Returns: true if its ok to grant the lock
307  */
308
309 static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh)
310 {
311         const struct gfs2_holder *gh_head = list_entry(gl->gl_holders.next, const struct gfs2_holder, gh_list);
312         if ((gh->gh_state == LM_ST_EXCLUSIVE ||
313              gh_head->gh_state == LM_ST_EXCLUSIVE) && gh != gh_head)
314                 return 0;
315         if (gl->gl_state == gh->gh_state)
316                 return 1;
317         if (gh->gh_flags & GL_EXACT)
318                 return 0;
319         if (gl->gl_state == LM_ST_EXCLUSIVE) {
320                 if (gh->gh_state == LM_ST_SHARED && gh_head->gh_state == LM_ST_SHARED)
321                         return 1;
322                 if (gh->gh_state == LM_ST_DEFERRED && gh_head->gh_state == LM_ST_DEFERRED)
323                         return 1;
324         }
325         if (gl->gl_state != LM_ST_UNLOCKED && (gh->gh_flags & LM_FLAG_ANY))
326                 return 1;
327         return 0;
328 }
329
330 static void gfs2_holder_wake(struct gfs2_holder *gh)
331 {
332         clear_bit(HIF_WAIT, &gh->gh_iflags);
333         smp_mb__after_atomic();
334         wake_up_bit(&gh->gh_iflags, HIF_WAIT);
335         if (gh->gh_flags & GL_ASYNC) {
336                 struct gfs2_sbd *sdp = gh->gh_gl->gl_name.ln_sbd;
337
338                 wake_up(&sdp->sd_async_glock_wait);
339         }
340 }
341
342 /**
343  * do_error - Something unexpected has happened during a lock request
344  *
345  */
346
347 static void do_error(struct gfs2_glock *gl, const int ret)
348 {
349         struct gfs2_holder *gh, *tmp;
350
351         list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
352                 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
353                         continue;
354                 if (ret & LM_OUT_ERROR)
355                         gh->gh_error = -EIO;
356                 else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))
357                         gh->gh_error = GLR_TRYFAILED;
358                 else
359                         continue;
360                 list_del_init(&gh->gh_list);
361                 trace_gfs2_glock_queue(gh, 0);
362                 gfs2_holder_wake(gh);
363         }
364 }
365
366 /**
367  * do_promote - promote as many requests as possible on the current queue
368  * @gl: The glock
369  * 
370  * Returns: 1 if there is a blocked holder at the head of the list, or 2
371  *          if a type specific operation is underway.
372  */
373
374 static int do_promote(struct gfs2_glock *gl)
375 __releases(&gl->gl_lockref.lock)
376 __acquires(&gl->gl_lockref.lock)
377 {
378         const struct gfs2_glock_operations *glops = gl->gl_ops;
379         struct gfs2_holder *gh, *tmp;
380         int ret;
381
382 restart:
383         list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
384                 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
385                         continue;
386                 if (may_grant(gl, gh)) {
387                         if (gh->gh_list.prev == &gl->gl_holders &&
388                             glops->go_lock) {
389                                 spin_unlock(&gl->gl_lockref.lock);
390                                 /* FIXME: eliminate this eventually */
391                                 ret = glops->go_lock(gh);
392                                 spin_lock(&gl->gl_lockref.lock);
393                                 if (ret) {
394                                         if (ret == 1)
395                                                 return 2;
396                                         gh->gh_error = ret;
397                                         list_del_init(&gh->gh_list);
398                                         trace_gfs2_glock_queue(gh, 0);
399                                         gfs2_holder_wake(gh);
400                                         goto restart;
401                                 }
402                                 set_bit(HIF_HOLDER, &gh->gh_iflags);
403                                 trace_gfs2_promote(gh, 1);
404                                 gfs2_holder_wake(gh);
405                                 goto restart;
406                         }
407                         set_bit(HIF_HOLDER, &gh->gh_iflags);
408                         trace_gfs2_promote(gh, 0);
409                         gfs2_holder_wake(gh);
410                         continue;
411                 }
412                 if (gh->gh_list.prev == &gl->gl_holders)
413                         return 1;
414                 do_error(gl, 0);
415                 break;
416         }
417         return 0;
418 }
419
420 /**
421  * find_first_waiter - find the first gh that's waiting for the glock
422  * @gl: the glock
423  */
424
425 static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl)
426 {
427         struct gfs2_holder *gh;
428
429         list_for_each_entry(gh, &gl->gl_holders, gh_list) {
430                 if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
431                         return gh;
432         }
433         return NULL;
434 }
435
436 /**
437  * state_change - record that the glock is now in a different state
438  * @gl: the glock
439  * @new_state the new state
440  *
441  */
442
443 static void state_change(struct gfs2_glock *gl, unsigned int new_state)
444 {
445         int held1, held2;
446
447         held1 = (gl->gl_state != LM_ST_UNLOCKED);
448         held2 = (new_state != LM_ST_UNLOCKED);
449
450         if (held1 != held2) {
451                 GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
452                 if (held2)
453                         gl->gl_lockref.count++;
454                 else
455                         gl->gl_lockref.count--;
456         }
457         if (held1 && held2 && list_empty(&gl->gl_holders))
458                 clear_bit(GLF_QUEUED, &gl->gl_flags);
459
460         if (new_state != gl->gl_target)
461                 /* shorten our minimum hold time */
462                 gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR,
463                                        GL_GLOCK_MIN_HOLD);
464         gl->gl_state = new_state;
465         gl->gl_tchange = jiffies;
466 }
467
468 static void gfs2_demote_wake(struct gfs2_glock *gl)
469 {
470         gl->gl_demote_state = LM_ST_EXCLUSIVE;
471         clear_bit(GLF_DEMOTE, &gl->gl_flags);
472         smp_mb__after_atomic();
473         wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
474 }
475
476 /**
477  * finish_xmote - The DLM has replied to one of our lock requests
478  * @gl: The glock
479  * @ret: The status from the DLM
480  *
481  */
482
483 static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
484 {
485         const struct gfs2_glock_operations *glops = gl->gl_ops;
486         struct gfs2_holder *gh;
487         unsigned state = ret & LM_OUT_ST_MASK;
488         int rv;
489
490         spin_lock(&gl->gl_lockref.lock);
491         trace_gfs2_glock_state_change(gl, state);
492         state_change(gl, state);
493         gh = find_first_waiter(gl);
494
495         /* Demote to UN request arrived during demote to SH or DF */
496         if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
497             state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED)
498                 gl->gl_target = LM_ST_UNLOCKED;
499
500         /* Check for state != intended state */
501         if (unlikely(state != gl->gl_target)) {
502                 if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) {
503                         /* move to back of queue and try next entry */
504                         if (ret & LM_OUT_CANCELED) {
505                                 if ((gh->gh_flags & LM_FLAG_PRIORITY) == 0)
506                                         list_move_tail(&gh->gh_list, &gl->gl_holders);
507                                 gh = find_first_waiter(gl);
508                                 gl->gl_target = gh->gh_state;
509                                 goto retry;
510                         }
511                         /* Some error or failed "try lock" - report it */
512                         if ((ret & LM_OUT_ERROR) ||
513                             (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
514                                 gl->gl_target = gl->gl_state;
515                                 do_error(gl, ret);
516                                 goto out;
517                         }
518                 }
519                 switch(state) {
520                 /* Unlocked due to conversion deadlock, try again */
521                 case LM_ST_UNLOCKED:
522 retry:
523                         do_xmote(gl, gh, gl->gl_target);
524                         break;
525                 /* Conversion fails, unlock and try again */
526                 case LM_ST_SHARED:
527                 case LM_ST_DEFERRED:
528                         do_xmote(gl, gh, LM_ST_UNLOCKED);
529                         break;
530                 default: /* Everything else */
531                         fs_err(gl->gl_name.ln_sbd, "wanted %u got %u\n",
532                                gl->gl_target, state);
533                         GLOCK_BUG_ON(gl, 1);
534                 }
535                 spin_unlock(&gl->gl_lockref.lock);
536                 return;
537         }
538
539         /* Fast path - we got what we asked for */
540         if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags))
541                 gfs2_demote_wake(gl);
542         if (state != LM_ST_UNLOCKED) {
543                 if (glops->go_xmote_bh) {
544                         spin_unlock(&gl->gl_lockref.lock);
545                         rv = glops->go_xmote_bh(gl, gh);
546                         spin_lock(&gl->gl_lockref.lock);
547                         if (rv) {
548                                 do_error(gl, rv);
549                                 goto out;
550                         }
551                 }
552                 rv = do_promote(gl);
553                 if (rv == 2)
554                         goto out_locked;
555         }
556 out:
557         clear_bit(GLF_LOCK, &gl->gl_flags);
558 out_locked:
559         spin_unlock(&gl->gl_lockref.lock);
560 }
561
562 /**
563  * do_xmote - Calls the DLM to change the state of a lock
564  * @gl: The lock state
565  * @gh: The holder (only for promotes)
566  * @target: The target lock state
567  *
568  */
569
570 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target)
571 __releases(&gl->gl_lockref.lock)
572 __acquires(&gl->gl_lockref.lock)
573 {
574         const struct gfs2_glock_operations *glops = gl->gl_ops;
575         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
576         unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0);
577         int ret;
578
579         if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) &&
580             gh && !(gh->gh_flags & LM_FLAG_NOEXP))
581                 return;
582         lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
583                       LM_FLAG_PRIORITY);
584         GLOCK_BUG_ON(gl, gl->gl_state == target);
585         GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target);
586         if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
587             glops->go_inval) {
588                 /*
589                  * If another process is already doing the invalidate, let that
590                  * finish first.  The glock state machine will get back to this
591                  * holder again later.
592                  */
593                 if (test_and_set_bit(GLF_INVALIDATE_IN_PROGRESS,
594                                      &gl->gl_flags))
595                         return;
596                 do_error(gl, 0); /* Fail queued try locks */
597         }
598         gl->gl_req = target;
599         set_bit(GLF_BLOCKING, &gl->gl_flags);
600         if ((gl->gl_req == LM_ST_UNLOCKED) ||
601             (gl->gl_state == LM_ST_EXCLUSIVE) ||
602             (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)))
603                 clear_bit(GLF_BLOCKING, &gl->gl_flags);
604         spin_unlock(&gl->gl_lockref.lock);
605         if (glops->go_sync)
606                 glops->go_sync(gl);
607         if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) {
608                 /*
609                  * The call to go_sync should have cleared out the ail list.
610                  * If there are still items, we have a problem. We ought to
611                  * withdraw, but we can't because the withdraw code also uses
612                  * glocks. Warn about the error, dump the glock, then fall
613                  * through and wait for logd to do the withdraw for us.
614                  */
615                 if ((atomic_read(&gl->gl_ail_count) != 0) &&
616                     (!cmpxchg(&sdp->sd_log_error, 0, -EIO))) {
617                         gfs2_assert_warn(sdp, !atomic_read(&gl->gl_ail_count));
618                         gfs2_dump_glock(NULL, gl, true);
619                 }
620                 glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA);
621                 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
622         }
623
624         gfs2_glock_hold(gl);
625         /*
626          * Check for an error encountered since we called go_sync and go_inval.
627          * If so, we can't withdraw from the glock code because the withdraw
628          * code itself uses glocks (see function signal_our_withdraw) to
629          * change the mount to read-only. Most importantly, we must not call
630          * dlm to unlock the glock until the journal is in a known good state
631          * (after journal replay) otherwise other nodes may use the object
632          * (rgrp or dinode) and then later, journal replay will corrupt the
633          * file system. The best we can do here is wait for the logd daemon
634          * to see sd_log_error and withdraw, and in the meantime, requeue the
635          * work for later.
636          *
637          * However, if we're just unlocking the lock (say, for unmount, when
638          * gfs2_gl_hash_clear calls clear_glock) and recovery is complete
639          * then it's okay to tell dlm to unlock it.
640          */
641         if (unlikely(sdp->sd_log_error && !gfs2_withdrawn(sdp)))
642                 gfs2_withdraw_delayed(sdp);
643         if (glock_blocked_by_withdraw(gl)) {
644                 if (target != LM_ST_UNLOCKED ||
645                     test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags)) {
646                         gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD);
647                         goto out;
648                 }
649         }
650
651         if (sdp->sd_lockstruct.ls_ops->lm_lock) {
652                 /* lock_dlm */
653                 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
654                 if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED &&
655                     target == LM_ST_UNLOCKED &&
656                     test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags)) {
657                         finish_xmote(gl, target);
658                         gfs2_glock_queue_work(gl, 0);
659                 } else if (ret) {
660                         fs_err(sdp, "lm_lock ret %d\n", ret);
661                         GLOCK_BUG_ON(gl, !gfs2_withdrawn(sdp));
662                 }
663         } else { /* lock_nolock */
664                 finish_xmote(gl, target);
665                 gfs2_glock_queue_work(gl, 0);
666         }
667 out:
668         spin_lock(&gl->gl_lockref.lock);
669 }
670
671 /**
672  * find_first_holder - find the first "holder" gh
673  * @gl: the glock
674  */
675
676 static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
677 {
678         struct gfs2_holder *gh;
679
680         if (!list_empty(&gl->gl_holders)) {
681                 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
682                 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
683                         return gh;
684         }
685         return NULL;
686 }
687
688 /**
689  * run_queue - do all outstanding tasks related to a glock
690  * @gl: The glock in question
691  * @nonblock: True if we must not block in run_queue
692  *
693  */
694
695 static void run_queue(struct gfs2_glock *gl, const int nonblock)
696 __releases(&gl->gl_lockref.lock)
697 __acquires(&gl->gl_lockref.lock)
698 {
699         struct gfs2_holder *gh = NULL;
700         int ret;
701
702         if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
703                 return;
704
705         GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags));
706
707         if (test_bit(GLF_DEMOTE, &gl->gl_flags) &&
708             gl->gl_demote_state != gl->gl_state) {
709                 if (find_first_holder(gl))
710                         goto out_unlock;
711                 if (nonblock)
712                         goto out_sched;
713                 smp_mb();
714                 if (atomic_read(&gl->gl_revokes) != 0)
715                         goto out_sched;
716                 set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
717                 GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE);
718                 gl->gl_target = gl->gl_demote_state;
719         } else {
720                 if (test_bit(GLF_DEMOTE, &gl->gl_flags))
721                         gfs2_demote_wake(gl);
722                 ret = do_promote(gl);
723                 if (ret == 0)
724                         goto out_unlock;
725                 if (ret == 2)
726                         goto out;
727                 gh = find_first_waiter(gl);
728                 gl->gl_target = gh->gh_state;
729                 if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
730                         do_error(gl, 0); /* Fail queued try locks */
731         }
732         do_xmote(gl, gh, gl->gl_target);
733 out:
734         return;
735
736 out_sched:
737         clear_bit(GLF_LOCK, &gl->gl_flags);
738         smp_mb__after_atomic();
739         gl->gl_lockref.count++;
740         __gfs2_glock_queue_work(gl, 0);
741         return;
742
743 out_unlock:
744         clear_bit(GLF_LOCK, &gl->gl_flags);
745         smp_mb__after_atomic();
746         return;
747 }
748
749 static void delete_work_func(struct work_struct *work)
750 {
751         struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete);
752         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
753         struct inode *inode;
754         u64 no_addr = gl->gl_name.ln_number;
755
756         /* If someone's using this glock to create a new dinode, the block must
757            have been freed by another node, then re-used, in which case our
758            iopen callback is too late after the fact. Ignore it. */
759         if (test_bit(GLF_INODE_CREATING, &gl->gl_flags))
760                 goto out;
761
762         inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
763         if (!IS_ERR_OR_NULL(inode)) {
764                 d_prune_aliases(inode);
765                 iput(inode);
766         }
767 out:
768         gfs2_glock_put(gl);
769 }
770
771 static void glock_work_func(struct work_struct *work)
772 {
773         unsigned long delay = 0;
774         struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
775         unsigned int drop_refs = 1;
776
777         if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) {
778                 finish_xmote(gl, gl->gl_reply);
779                 drop_refs++;
780         }
781         spin_lock(&gl->gl_lockref.lock);
782         if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
783             gl->gl_state != LM_ST_UNLOCKED &&
784             gl->gl_demote_state != LM_ST_EXCLUSIVE) {
785                 unsigned long holdtime, now = jiffies;
786
787                 holdtime = gl->gl_tchange + gl->gl_hold_time;
788                 if (time_before(now, holdtime))
789                         delay = holdtime - now;
790
791                 if (!delay) {
792                         clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
793                         set_bit(GLF_DEMOTE, &gl->gl_flags);
794                 }
795         }
796         run_queue(gl, 0);
797         if (delay) {
798                 /* Keep one glock reference for the work we requeue. */
799                 drop_refs--;
800                 if (gl->gl_name.ln_type != LM_TYPE_INODE)
801                         delay = 0;
802                 __gfs2_glock_queue_work(gl, delay);
803         }
804
805         /*
806          * Drop the remaining glock references manually here. (Mind that
807          * __gfs2_glock_queue_work depends on the lockref spinlock begin held
808          * here as well.)
809          */
810         gl->gl_lockref.count -= drop_refs;
811         if (!gl->gl_lockref.count) {
812                 __gfs2_glock_put(gl);
813                 return;
814         }
815         spin_unlock(&gl->gl_lockref.lock);
816 }
817
818 static struct gfs2_glock *find_insert_glock(struct lm_lockname *name,
819                                             struct gfs2_glock *new)
820 {
821         struct wait_glock_queue wait;
822         wait_queue_head_t *wq = glock_waitqueue(name);
823         struct gfs2_glock *gl;
824
825         wait.name = name;
826         init_wait(&wait.wait);
827         wait.wait.func = glock_wake_function;
828
829 again:
830         prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
831         rcu_read_lock();
832         if (new) {
833                 gl = rhashtable_lookup_get_insert_fast(&gl_hash_table,
834                         &new->gl_node, ht_parms);
835                 if (IS_ERR(gl))
836                         goto out;
837         } else {
838                 gl = rhashtable_lookup_fast(&gl_hash_table,
839                         name, ht_parms);
840         }
841         if (gl && !lockref_get_not_dead(&gl->gl_lockref)) {
842                 rcu_read_unlock();
843                 schedule();
844                 goto again;
845         }
846 out:
847         rcu_read_unlock();
848         finish_wait(wq, &wait.wait);
849         return gl;
850 }
851
852 /**
853  * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
854  * @sdp: The GFS2 superblock
855  * @number: the lock number
856  * @glops: The glock_operations to use
857  * @create: If 0, don't create the glock if it doesn't exist
858  * @glp: the glock is returned here
859  *
860  * This does not lock a glock, just finds/creates structures for one.
861  *
862  * Returns: errno
863  */
864
865 int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
866                    const struct gfs2_glock_operations *glops, int create,
867                    struct gfs2_glock **glp)
868 {
869         struct super_block *s = sdp->sd_vfs;
870         struct lm_lockname name = { .ln_number = number,
871                                     .ln_type = glops->go_type,
872                                     .ln_sbd = sdp };
873         struct gfs2_glock *gl, *tmp;
874         struct address_space *mapping;
875         struct kmem_cache *cachep;
876         int ret = 0;
877
878         gl = find_insert_glock(&name, NULL);
879         if (gl) {
880                 *glp = gl;
881                 return 0;
882         }
883         if (!create)
884                 return -ENOENT;
885
886         if (glops->go_flags & GLOF_ASPACE)
887                 cachep = gfs2_glock_aspace_cachep;
888         else
889                 cachep = gfs2_glock_cachep;
890         gl = kmem_cache_alloc(cachep, GFP_NOFS);
891         if (!gl)
892                 return -ENOMEM;
893
894         memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
895
896         if (glops->go_flags & GLOF_LVB) {
897                 gl->gl_lksb.sb_lvbptr = kzalloc(GDLM_LVB_SIZE, GFP_NOFS);
898                 if (!gl->gl_lksb.sb_lvbptr) {
899                         kmem_cache_free(cachep, gl);
900                         return -ENOMEM;
901                 }
902         }
903
904         atomic_inc(&sdp->sd_glock_disposal);
905         gl->gl_node.next = NULL;
906         gl->gl_flags = 0;
907         gl->gl_name = name;
908         gl->gl_lockref.count = 1;
909         gl->gl_state = LM_ST_UNLOCKED;
910         gl->gl_target = LM_ST_UNLOCKED;
911         gl->gl_demote_state = LM_ST_EXCLUSIVE;
912         gl->gl_ops = glops;
913         gl->gl_dstamp = 0;
914         preempt_disable();
915         /* We use the global stats to estimate the initial per-glock stats */
916         gl->gl_stats = this_cpu_ptr(sdp->sd_lkstats)->lkstats[glops->go_type];
917         preempt_enable();
918         gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0;
919         gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0;
920         gl->gl_tchange = jiffies;
921         gl->gl_object = NULL;
922         gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
923         INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
924         INIT_WORK(&gl->gl_delete, delete_work_func);
925
926         mapping = gfs2_glock2aspace(gl);
927         if (mapping) {
928                 mapping->a_ops = &gfs2_meta_aops;
929                 mapping->host = s->s_bdev->bd_inode;
930                 mapping->flags = 0;
931                 mapping_set_gfp_mask(mapping, GFP_NOFS);
932                 mapping->private_data = NULL;
933                 mapping->writeback_index = 0;
934         }
935
936         tmp = find_insert_glock(&name, gl);
937         if (!tmp) {
938                 *glp = gl;
939                 goto out;
940         }
941         if (IS_ERR(tmp)) {
942                 ret = PTR_ERR(tmp);
943                 goto out_free;
944         }
945         *glp = tmp;
946
947 out_free:
948         kfree(gl->gl_lksb.sb_lvbptr);
949         kmem_cache_free(cachep, gl);
950         atomic_dec(&sdp->sd_glock_disposal);
951
952 out:
953         return ret;
954 }
955
956 /**
957  * gfs2_holder_init - initialize a struct gfs2_holder in the default way
958  * @gl: the glock
959  * @state: the state we're requesting
960  * @flags: the modifier flags
961  * @gh: the holder structure
962  *
963  */
964
965 void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags,
966                       struct gfs2_holder *gh)
967 {
968         INIT_LIST_HEAD(&gh->gh_list);
969         gh->gh_gl = gl;
970         gh->gh_ip = _RET_IP_;
971         gh->gh_owner_pid = get_pid(task_pid(current));
972         gh->gh_state = state;
973         gh->gh_flags = flags;
974         gh->gh_error = 0;
975         gh->gh_iflags = 0;
976         gfs2_glock_hold(gl);
977 }
978
979 /**
980  * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it
981  * @state: the state we're requesting
982  * @flags: the modifier flags
983  * @gh: the holder structure
984  *
985  * Don't mess with the glock.
986  *
987  */
988
989 void gfs2_holder_reinit(unsigned int state, u16 flags, struct gfs2_holder *gh)
990 {
991         gh->gh_state = state;
992         gh->gh_flags = flags;
993         gh->gh_iflags = 0;
994         gh->gh_ip = _RET_IP_;
995         put_pid(gh->gh_owner_pid);
996         gh->gh_owner_pid = get_pid(task_pid(current));
997 }
998
999 /**
1000  * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference)
1001  * @gh: the holder structure
1002  *
1003  */
1004
1005 void gfs2_holder_uninit(struct gfs2_holder *gh)
1006 {
1007         put_pid(gh->gh_owner_pid);
1008         gfs2_glock_put(gh->gh_gl);
1009         gfs2_holder_mark_uninitialized(gh);
1010         gh->gh_ip = 0;
1011 }
1012
1013 static void gfs2_glock_update_hold_time(struct gfs2_glock *gl,
1014                                         unsigned long start_time)
1015 {
1016         /* Have we waited longer that a second? */
1017         if (time_after(jiffies, start_time + HZ)) {
1018                 /* Lengthen the minimum hold time. */
1019                 gl->gl_hold_time = min(gl->gl_hold_time + GL_GLOCK_HOLD_INCR,
1020                                        GL_GLOCK_MAX_HOLD);
1021         }
1022 }
1023
1024 /**
1025  * gfs2_glock_wait - wait on a glock acquisition
1026  * @gh: the glock holder
1027  *
1028  * Returns: 0 on success
1029  */
1030
1031 int gfs2_glock_wait(struct gfs2_holder *gh)
1032 {
1033         unsigned long start_time = jiffies;
1034
1035         might_sleep();
1036         wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE);
1037         gfs2_glock_update_hold_time(gh->gh_gl, start_time);
1038         return gh->gh_error;
1039 }
1040
1041 static int glocks_pending(unsigned int num_gh, struct gfs2_holder *ghs)
1042 {
1043         int i;
1044
1045         for (i = 0; i < num_gh; i++)
1046                 if (test_bit(HIF_WAIT, &ghs[i].gh_iflags))
1047                         return 1;
1048         return 0;
1049 }
1050
1051 /**
1052  * gfs2_glock_async_wait - wait on multiple asynchronous glock acquisitions
1053  * @num_gh: the number of holders in the array
1054  * @ghs: the glock holder array
1055  *
1056  * Returns: 0 on success, meaning all glocks have been granted and are held.
1057  *          -ESTALE if the request timed out, meaning all glocks were released,
1058  *          and the caller should retry the operation.
1059  */
1060
1061 int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs)
1062 {
1063         struct gfs2_sbd *sdp = ghs[0].gh_gl->gl_name.ln_sbd;
1064         int i, ret = 0, timeout = 0;
1065         unsigned long start_time = jiffies;
1066         bool keep_waiting;
1067
1068         might_sleep();
1069         /*
1070          * Total up the (minimum hold time * 2) of all glocks and use that to
1071          * determine the max amount of time we should wait.
1072          */
1073         for (i = 0; i < num_gh; i++)
1074                 timeout += ghs[i].gh_gl->gl_hold_time << 1;
1075
1076 wait_for_dlm:
1077         if (!wait_event_timeout(sdp->sd_async_glock_wait,
1078                                 !glocks_pending(num_gh, ghs), timeout))
1079                 ret = -ESTALE; /* request timed out. */
1080
1081         /*
1082          * If dlm granted all our requests, we need to adjust the glock
1083          * minimum hold time values according to how long we waited.
1084          *
1085          * If our request timed out, we need to repeatedly release any held
1086          * glocks we acquired thus far to allow dlm to acquire the remaining
1087          * glocks without deadlocking.  We cannot currently cancel outstanding
1088          * glock acquisitions.
1089          *
1090          * The HIF_WAIT bit tells us which requests still need a response from
1091          * dlm.
1092          *
1093          * If dlm sent us any errors, we return the first error we find.
1094          */
1095         keep_waiting = false;
1096         for (i = 0; i < num_gh; i++) {
1097                 /* Skip holders we have already dequeued below. */
1098                 if (!gfs2_holder_queued(&ghs[i]))
1099                         continue;
1100                 /* Skip holders with a pending DLM response. */
1101                 if (test_bit(HIF_WAIT, &ghs[i].gh_iflags)) {
1102                         keep_waiting = true;
1103                         continue;
1104                 }
1105
1106                 if (test_bit(HIF_HOLDER, &ghs[i].gh_iflags)) {
1107                         if (ret == -ESTALE)
1108                                 gfs2_glock_dq(&ghs[i]);
1109                         else
1110                                 gfs2_glock_update_hold_time(ghs[i].gh_gl,
1111                                                             start_time);
1112                 }
1113                 if (!ret)
1114                         ret = ghs[i].gh_error;
1115         }
1116
1117         if (keep_waiting)
1118                 goto wait_for_dlm;
1119
1120         /*
1121          * At this point, we've either acquired all locks or released them all.
1122          */
1123         return ret;
1124 }
1125
1126 /**
1127  * handle_callback - process a demote request
1128  * @gl: the glock
1129  * @state: the state the caller wants us to change to
1130  *
1131  * There are only two requests that we are going to see in actual
1132  * practise: LM_ST_SHARED and LM_ST_UNLOCKED
1133  */
1134
1135 static void handle_callback(struct gfs2_glock *gl, unsigned int state,
1136                             unsigned long delay, bool remote)
1137 {
1138         int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE;
1139
1140         set_bit(bit, &gl->gl_flags);
1141         if (gl->gl_demote_state == LM_ST_EXCLUSIVE) {
1142                 gl->gl_demote_state = state;
1143                 gl->gl_demote_time = jiffies;
1144         } else if (gl->gl_demote_state != LM_ST_UNLOCKED &&
1145                         gl->gl_demote_state != state) {
1146                 gl->gl_demote_state = LM_ST_UNLOCKED;
1147         }
1148         if (gl->gl_ops->go_callback)
1149                 gl->gl_ops->go_callback(gl, remote);
1150         trace_gfs2_demote_rq(gl, remote);
1151 }
1152
1153 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
1154 {
1155         struct va_format vaf;
1156         va_list args;
1157
1158         va_start(args, fmt);
1159
1160         if (seq) {
1161                 seq_vprintf(seq, fmt, args);
1162         } else {
1163                 vaf.fmt = fmt;
1164                 vaf.va = &args;
1165
1166                 pr_err("%pV", &vaf);
1167         }
1168
1169         va_end(args);
1170 }
1171
1172 /**
1173  * add_to_queue - Add a holder to the wait queue (but look for recursion)
1174  * @gh: the holder structure to add
1175  *
1176  * Eventually we should move the recursive locking trap to a
1177  * debugging option or something like that. This is the fast
1178  * path and needs to have the minimum number of distractions.
1179  * 
1180  */
1181
1182 static inline void add_to_queue(struct gfs2_holder *gh)
1183 __releases(&gl->gl_lockref.lock)
1184 __acquires(&gl->gl_lockref.lock)
1185 {
1186         struct gfs2_glock *gl = gh->gh_gl;
1187         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
1188         struct list_head *insert_pt = NULL;
1189         struct gfs2_holder *gh2;
1190         int try_futile = 0;
1191
1192         GLOCK_BUG_ON(gl, gh->gh_owner_pid == NULL);
1193         if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
1194                 GLOCK_BUG_ON(gl, true);
1195
1196         if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
1197                 if (test_bit(GLF_LOCK, &gl->gl_flags))
1198                         try_futile = !may_grant(gl, gh);
1199                 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
1200                         goto fail;
1201         }
1202
1203         list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
1204                 if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid &&
1205                     (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK)))
1206                         goto trap_recursive;
1207                 if (try_futile &&
1208                     !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
1209 fail:
1210                         gh->gh_error = GLR_TRYFAILED;
1211                         gfs2_holder_wake(gh);
1212                         return;
1213                 }
1214                 if (test_bit(HIF_HOLDER, &gh2->gh_iflags))
1215                         continue;
1216                 if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt))
1217                         insert_pt = &gh2->gh_list;
1218         }
1219         set_bit(GLF_QUEUED, &gl->gl_flags);
1220         trace_gfs2_glock_queue(gh, 1);
1221         gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT);
1222         gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT);
1223         if (likely(insert_pt == NULL)) {
1224                 list_add_tail(&gh->gh_list, &gl->gl_holders);
1225                 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
1226                         goto do_cancel;
1227                 return;
1228         }
1229         list_add_tail(&gh->gh_list, insert_pt);
1230 do_cancel:
1231         gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
1232         if (!(gh->gh_flags & LM_FLAG_PRIORITY)) {
1233                 spin_unlock(&gl->gl_lockref.lock);
1234                 if (sdp->sd_lockstruct.ls_ops->lm_cancel)
1235                         sdp->sd_lockstruct.ls_ops->lm_cancel(gl);
1236                 spin_lock(&gl->gl_lockref.lock);
1237         }
1238         return;
1239
1240 trap_recursive:
1241         fs_err(sdp, "original: %pSR\n", (void *)gh2->gh_ip);
1242         fs_err(sdp, "pid: %d\n", pid_nr(gh2->gh_owner_pid));
1243         fs_err(sdp, "lock type: %d req lock state : %d\n",
1244                gh2->gh_gl->gl_name.ln_type, gh2->gh_state);
1245         fs_err(sdp, "new: %pSR\n", (void *)gh->gh_ip);
1246         fs_err(sdp, "pid: %d\n", pid_nr(gh->gh_owner_pid));
1247         fs_err(sdp, "lock type: %d req lock state : %d\n",
1248                gh->gh_gl->gl_name.ln_type, gh->gh_state);
1249         gfs2_dump_glock(NULL, gl, true);
1250         BUG();
1251 }
1252
1253 /**
1254  * gfs2_glock_nq - enqueue a struct gfs2_holder onto a glock (acquire a glock)
1255  * @gh: the holder structure
1256  *
1257  * if (gh->gh_flags & GL_ASYNC), this never returns an error
1258  *
1259  * Returns: 0, GLR_TRYFAILED, or errno on failure
1260  */
1261
1262 int gfs2_glock_nq(struct gfs2_holder *gh)
1263 {
1264         struct gfs2_glock *gl = gh->gh_gl;
1265         int error = 0;
1266
1267         if (glock_blocked_by_withdraw(gl) && !(gh->gh_flags & LM_FLAG_NOEXP))
1268                 return -EIO;
1269
1270         if (test_bit(GLF_LRU, &gl->gl_flags))
1271                 gfs2_glock_remove_from_lru(gl);
1272
1273         spin_lock(&gl->gl_lockref.lock);
1274         add_to_queue(gh);
1275         if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) &&
1276                      test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) {
1277                 set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1278                 gl->gl_lockref.count++;
1279                 __gfs2_glock_queue_work(gl, 0);
1280         }
1281         run_queue(gl, 1);
1282         spin_unlock(&gl->gl_lockref.lock);
1283
1284         if (!(gh->gh_flags & GL_ASYNC))
1285                 error = gfs2_glock_wait(gh);
1286
1287         return error;
1288 }
1289
1290 /**
1291  * gfs2_glock_poll - poll to see if an async request has been completed
1292  * @gh: the holder
1293  *
1294  * Returns: 1 if the request is ready to be gfs2_glock_wait()ed on
1295  */
1296
1297 int gfs2_glock_poll(struct gfs2_holder *gh)
1298 {
1299         return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1;
1300 }
1301
1302 /**
1303  * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
1304  * @gh: the glock holder
1305  *
1306  */
1307
1308 void gfs2_glock_dq(struct gfs2_holder *gh)
1309 {
1310         struct gfs2_glock *gl = gh->gh_gl;
1311         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
1312         unsigned delay = 0;
1313         int fast_path = 0;
1314
1315         spin_lock(&gl->gl_lockref.lock);
1316         /*
1317          * If we're in the process of file system withdraw, we cannot just
1318          * dequeue any glocks until our journal is recovered, lest we
1319          * introduce file system corruption. We need two exceptions to this
1320          * rule: We need to allow unlocking of nondisk glocks and the glock
1321          * for our own journal that needs recovery.
1322          */
1323         if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
1324             glock_blocked_by_withdraw(gl) &&
1325             gh->gh_gl != sdp->sd_jinode_gl) {
1326                 sdp->sd_glock_dqs_held++;
1327                 might_sleep();
1328                 wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
1329                             TASK_UNINTERRUPTIBLE);
1330         }
1331         if (gh->gh_flags & GL_NOCACHE)
1332                 handle_callback(gl, LM_ST_UNLOCKED, 0, false);
1333
1334         list_del_init(&gh->gh_list);
1335         clear_bit(HIF_HOLDER, &gh->gh_iflags);
1336         if (find_first_holder(gl) == NULL) {
1337                 if (list_empty(&gl->gl_holders) &&
1338                     !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
1339                     !test_bit(GLF_DEMOTE, &gl->gl_flags))
1340                         fast_path = 1;
1341         }
1342         if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
1343                 gfs2_glock_add_to_lru(gl);
1344
1345         trace_gfs2_glock_queue(gh, 0);
1346         if (unlikely(!fast_path)) {
1347                 gl->gl_lockref.count++;
1348                 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
1349                     !test_bit(GLF_DEMOTE, &gl->gl_flags) &&
1350                     gl->gl_name.ln_type == LM_TYPE_INODE)
1351                         delay = gl->gl_hold_time;
1352                 __gfs2_glock_queue_work(gl, delay);
1353         }
1354         spin_unlock(&gl->gl_lockref.lock);
1355 }
1356
1357 void gfs2_glock_dq_wait(struct gfs2_holder *gh)
1358 {
1359         struct gfs2_glock *gl = gh->gh_gl;
1360         gfs2_glock_dq(gh);
1361         might_sleep();
1362         wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
1363 }
1364
1365 /**
1366  * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
1367  * @gh: the holder structure
1368  *
1369  */
1370
1371 void gfs2_glock_dq_uninit(struct gfs2_holder *gh)
1372 {
1373         gfs2_glock_dq(gh);
1374         gfs2_holder_uninit(gh);
1375 }
1376
1377 /**
1378  * gfs2_glock_nq_num - acquire a glock based on lock number
1379  * @sdp: the filesystem
1380  * @number: the lock number
1381  * @glops: the glock operations for the type of glock
1382  * @state: the state to acquire the glock in
1383  * @flags: modifier flags for the acquisition
1384  * @gh: the struct gfs2_holder
1385  *
1386  * Returns: errno
1387  */
1388
1389 int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
1390                       const struct gfs2_glock_operations *glops,
1391                       unsigned int state, u16 flags, struct gfs2_holder *gh)
1392 {
1393         struct gfs2_glock *gl;
1394         int error;
1395
1396         error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
1397         if (!error) {
1398                 error = gfs2_glock_nq_init(gl, state, flags, gh);
1399                 gfs2_glock_put(gl);
1400         }
1401
1402         return error;
1403 }
1404
1405 /**
1406  * glock_compare - Compare two struct gfs2_glock structures for sorting
1407  * @arg_a: the first structure
1408  * @arg_b: the second structure
1409  *
1410  */
1411
1412 static int glock_compare(const void *arg_a, const void *arg_b)
1413 {
1414         const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a;
1415         const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b;
1416         const struct lm_lockname *a = &gh_a->gh_gl->gl_name;
1417         const struct lm_lockname *b = &gh_b->gh_gl->gl_name;
1418
1419         if (a->ln_number > b->ln_number)
1420                 return 1;
1421         if (a->ln_number < b->ln_number)
1422                 return -1;
1423         BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type);
1424         return 0;
1425 }
1426
1427 /**
1428  * nq_m_sync - synchonously acquire more than one glock in deadlock free order
1429  * @num_gh: the number of structures
1430  * @ghs: an array of struct gfs2_holder structures
1431  *
1432  * Returns: 0 on success (all glocks acquired),
1433  *          errno on failure (no glocks acquired)
1434  */
1435
1436 static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
1437                      struct gfs2_holder **p)
1438 {
1439         unsigned int x;
1440         int error = 0;
1441
1442         for (x = 0; x < num_gh; x++)
1443                 p[x] = &ghs[x];
1444
1445         sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL);
1446
1447         for (x = 0; x < num_gh; x++) {
1448                 p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1449
1450                 error = gfs2_glock_nq(p[x]);
1451                 if (error) {
1452                         while (x--)
1453                                 gfs2_glock_dq(p[x]);
1454                         break;
1455                 }
1456         }
1457
1458         return error;
1459 }
1460
1461 /**
1462  * gfs2_glock_nq_m - acquire multiple glocks
1463  * @num_gh: the number of structures
1464  * @ghs: an array of struct gfs2_holder structures
1465  *
1466  *
1467  * Returns: 0 on success (all glocks acquired),
1468  *          errno on failure (no glocks acquired)
1469  */
1470
1471 int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1472 {
1473         struct gfs2_holder *tmp[4];
1474         struct gfs2_holder **pph = tmp;
1475         int error = 0;
1476
1477         switch(num_gh) {
1478         case 0:
1479                 return 0;
1480         case 1:
1481                 ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1482                 return gfs2_glock_nq(ghs);
1483         default:
1484                 if (num_gh <= 4)
1485                         break;
1486                 pph = kmalloc_array(num_gh, sizeof(struct gfs2_holder *),
1487                                     GFP_NOFS);
1488                 if (!pph)
1489                         return -ENOMEM;
1490         }
1491
1492         error = nq_m_sync(num_gh, ghs, pph);
1493
1494         if (pph != tmp)
1495                 kfree(pph);
1496
1497         return error;
1498 }
1499
1500 /**
1501  * gfs2_glock_dq_m - release multiple glocks
1502  * @num_gh: the number of structures
1503  * @ghs: an array of struct gfs2_holder structures
1504  *
1505  */
1506
1507 void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1508 {
1509         while (num_gh--)
1510                 gfs2_glock_dq(&ghs[num_gh]);
1511 }
1512
1513 void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
1514 {
1515         unsigned long delay = 0;
1516         unsigned long holdtime;
1517         unsigned long now = jiffies;
1518
1519         gfs2_glock_hold(gl);
1520         holdtime = gl->gl_tchange + gl->gl_hold_time;
1521         if (test_bit(GLF_QUEUED, &gl->gl_flags) &&
1522             gl->gl_name.ln_type == LM_TYPE_INODE) {
1523                 if (time_before(now, holdtime))
1524                         delay = holdtime - now;
1525                 if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
1526                         delay = gl->gl_hold_time;
1527         }
1528
1529         spin_lock(&gl->gl_lockref.lock);
1530         handle_callback(gl, state, delay, true);
1531         __gfs2_glock_queue_work(gl, delay);
1532         spin_unlock(&gl->gl_lockref.lock);
1533 }
1534
1535 /**
1536  * gfs2_should_freeze - Figure out if glock should be frozen
1537  * @gl: The glock in question
1538  *
1539  * Glocks are not frozen if (a) the result of the dlm operation is
1540  * an error, (b) the locking operation was an unlock operation or
1541  * (c) if there is a "noexp" flagged request anywhere in the queue
1542  *
1543  * Returns: 1 if freezing should occur, 0 otherwise
1544  */
1545
1546 static int gfs2_should_freeze(const struct gfs2_glock *gl)
1547 {
1548         const struct gfs2_holder *gh;
1549
1550         if (gl->gl_reply & ~LM_OUT_ST_MASK)
1551                 return 0;
1552         if (gl->gl_target == LM_ST_UNLOCKED)
1553                 return 0;
1554
1555         list_for_each_entry(gh, &gl->gl_holders, gh_list) {
1556                 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
1557                         continue;
1558                 if (LM_FLAG_NOEXP & gh->gh_flags)
1559                         return 0;
1560         }
1561
1562         return 1;
1563 }
1564
1565 /**
1566  * gfs2_glock_complete - Callback used by locking
1567  * @gl: Pointer to the glock
1568  * @ret: The return value from the dlm
1569  *
1570  * The gl_reply field is under the gl_lockref.lock lock so that it is ok
1571  * to use a bitfield shared with other glock state fields.
1572  */
1573
1574 void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1575 {
1576         struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
1577
1578         spin_lock(&gl->gl_lockref.lock);
1579         gl->gl_reply = ret;
1580
1581         if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) {
1582                 if (gfs2_should_freeze(gl)) {
1583                         set_bit(GLF_FROZEN, &gl->gl_flags);
1584                         spin_unlock(&gl->gl_lockref.lock);
1585                         return;
1586                 }
1587         }
1588
1589         gl->gl_lockref.count++;
1590         set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1591         __gfs2_glock_queue_work(gl, 0);
1592         spin_unlock(&gl->gl_lockref.lock);
1593 }
1594
1595 static int glock_cmp(void *priv, struct list_head *a, struct list_head *b)
1596 {
1597         struct gfs2_glock *gla, *glb;
1598
1599         gla = list_entry(a, struct gfs2_glock, gl_lru);
1600         glb = list_entry(b, struct gfs2_glock, gl_lru);
1601
1602         if (gla->gl_name.ln_number > glb->gl_name.ln_number)
1603                 return 1;
1604         if (gla->gl_name.ln_number < glb->gl_name.ln_number)
1605                 return -1;
1606
1607         return 0;
1608 }
1609
1610 /**
1611  * gfs2_dispose_glock_lru - Demote a list of glocks
1612  * @list: The list to dispose of
1613  *
1614  * Disposing of glocks may involve disk accesses, so that here we sort
1615  * the glocks by number (i.e. disk location of the inodes) so that if
1616  * there are any such accesses, they'll be sent in order (mostly).
1617  *
1618  * Must be called under the lru_lock, but may drop and retake this
1619  * lock. While the lru_lock is dropped, entries may vanish from the
1620  * list, but no new entries will appear on the list (since it is
1621  * private)
1622  */
1623
1624 static void gfs2_dispose_glock_lru(struct list_head *list)
1625 __releases(&lru_lock)
1626 __acquires(&lru_lock)
1627 {
1628         struct gfs2_glock *gl;
1629
1630         list_sort(NULL, list, glock_cmp);
1631
1632         while(!list_empty(list)) {
1633                 gl = list_entry(list->next, struct gfs2_glock, gl_lru);
1634                 list_del_init(&gl->gl_lru);
1635                 if (!spin_trylock(&gl->gl_lockref.lock)) {
1636 add_back_to_lru:
1637                         list_add(&gl->gl_lru, &lru_list);
1638                         set_bit(GLF_LRU, &gl->gl_flags);
1639                         atomic_inc(&lru_count);
1640                         continue;
1641                 }
1642                 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
1643                         spin_unlock(&gl->gl_lockref.lock);
1644                         goto add_back_to_lru;
1645                 }
1646                 gl->gl_lockref.count++;
1647                 if (demote_ok(gl))
1648                         handle_callback(gl, LM_ST_UNLOCKED, 0, false);
1649                 WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags));
1650                 __gfs2_glock_queue_work(gl, 0);
1651                 spin_unlock(&gl->gl_lockref.lock);
1652                 cond_resched_lock(&lru_lock);
1653         }
1654 }
1655
1656 /**
1657  * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote
1658  * @nr: The number of entries to scan
1659  *
1660  * This function selects the entries on the LRU which are able to
1661  * be demoted, and then kicks off the process by calling
1662  * gfs2_dispose_glock_lru() above.
1663  */
1664
1665 static long gfs2_scan_glock_lru(int nr)
1666 {
1667         struct gfs2_glock *gl;
1668         LIST_HEAD(skipped);
1669         LIST_HEAD(dispose);
1670         long freed = 0;
1671
1672         spin_lock(&lru_lock);
1673         while ((nr-- >= 0) && !list_empty(&lru_list)) {
1674                 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
1675
1676                 /* Test for being demotable */
1677                 if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
1678                         list_move(&gl->gl_lru, &dispose);
1679                         atomic_dec(&lru_count);
1680                         clear_bit(GLF_LRU, &gl->gl_flags);
1681                         freed++;
1682                         continue;
1683                 }
1684
1685                 list_move(&gl->gl_lru, &skipped);
1686         }
1687         list_splice(&skipped, &lru_list);
1688         if (!list_empty(&dispose))
1689                 gfs2_dispose_glock_lru(&dispose);
1690         spin_unlock(&lru_lock);
1691
1692         return freed;
1693 }
1694
1695 static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink,
1696                                             struct shrink_control *sc)
1697 {
1698         if (!(sc->gfp_mask & __GFP_FS))
1699                 return SHRINK_STOP;
1700         return gfs2_scan_glock_lru(sc->nr_to_scan);
1701 }
1702
1703 static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink,
1704                                              struct shrink_control *sc)
1705 {
1706         return vfs_pressure_ratio(atomic_read(&lru_count));
1707 }
1708
1709 static struct shrinker glock_shrinker = {
1710         .seeks = DEFAULT_SEEKS,
1711         .count_objects = gfs2_glock_shrink_count,
1712         .scan_objects = gfs2_glock_shrink_scan,
1713 };
1714
1715 /**
1716  * examine_bucket - Call a function for glock in a hash bucket
1717  * @examiner: the function
1718  * @sdp: the filesystem
1719  * @bucket: the bucket
1720  *
1721  * Note that the function can be called multiple times on the same
1722  * object.  So the user must ensure that the function can cope with
1723  * that.
1724  */
1725
1726 static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
1727 {
1728         struct gfs2_glock *gl;
1729         struct rhashtable_iter iter;
1730
1731         rhashtable_walk_enter(&gl_hash_table, &iter);
1732
1733         do {
1734                 rhashtable_walk_start(&iter);
1735
1736                 while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl))
1737                         if (gl->gl_name.ln_sbd == sdp &&
1738                             lockref_get_not_dead(&gl->gl_lockref))
1739                                 examiner(gl);
1740
1741                 rhashtable_walk_stop(&iter);
1742         } while (cond_resched(), gl == ERR_PTR(-EAGAIN));
1743
1744         rhashtable_walk_exit(&iter);
1745 }
1746
1747 /**
1748  * thaw_glock - thaw out a glock which has an unprocessed reply waiting
1749  * @gl: The glock to thaw
1750  *
1751  */
1752
1753 static void thaw_glock(struct gfs2_glock *gl)
1754 {
1755         if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) {
1756                 gfs2_glock_put(gl);
1757                 return;
1758         }
1759         set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1760         gfs2_glock_queue_work(gl, 0);
1761 }
1762
1763 /**
1764  * clear_glock - look at a glock and see if we can free it from glock cache
1765  * @gl: the glock to look at
1766  *
1767  */
1768
1769 static void clear_glock(struct gfs2_glock *gl)
1770 {
1771         gfs2_glock_remove_from_lru(gl);
1772
1773         spin_lock(&gl->gl_lockref.lock);
1774         if (gl->gl_state != LM_ST_UNLOCKED)
1775                 handle_callback(gl, LM_ST_UNLOCKED, 0, false);
1776         __gfs2_glock_queue_work(gl, 0);
1777         spin_unlock(&gl->gl_lockref.lock);
1778 }
1779
1780 /**
1781  * gfs2_glock_thaw - Thaw any frozen glocks
1782  * @sdp: The super block
1783  *
1784  */
1785
1786 void gfs2_glock_thaw(struct gfs2_sbd *sdp)
1787 {
1788         glock_hash_walk(thaw_glock, sdp);
1789 }
1790
1791 static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
1792 {
1793         spin_lock(&gl->gl_lockref.lock);
1794         gfs2_dump_glock(seq, gl, fsid);
1795         spin_unlock(&gl->gl_lockref.lock);
1796 }
1797
1798 static void dump_glock_func(struct gfs2_glock *gl)
1799 {
1800         dump_glock(NULL, gl, true);
1801 }
1802
1803 /**
1804  * gfs2_gl_hash_clear - Empty out the glock hash table
1805  * @sdp: the filesystem
1806  * @wait: wait until it's all gone
1807  *
1808  * Called when unmounting the filesystem.
1809  */
1810
1811 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
1812 {
1813         set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
1814         flush_workqueue(glock_workqueue);
1815         glock_hash_walk(clear_glock, sdp);
1816         flush_workqueue(glock_workqueue);
1817         wait_event_timeout(sdp->sd_glock_wait,
1818                            atomic_read(&sdp->sd_glock_disposal) == 0,
1819                            HZ * 600);
1820         glock_hash_walk(dump_glock_func, sdp);
1821 }
1822
1823 void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
1824 {
1825         struct gfs2_glock *gl = ip->i_gl;
1826         int ret;
1827
1828         ret = gfs2_truncatei_resume(ip);
1829         gfs2_assert_withdraw(gl->gl_name.ln_sbd, ret == 0);
1830
1831         spin_lock(&gl->gl_lockref.lock);
1832         clear_bit(GLF_LOCK, &gl->gl_flags);
1833         run_queue(gl, 1);
1834         spin_unlock(&gl->gl_lockref.lock);
1835 }
1836
1837 static const char *state2str(unsigned state)
1838 {
1839         switch(state) {
1840         case LM_ST_UNLOCKED:
1841                 return "UN";
1842         case LM_ST_SHARED:
1843                 return "SH";
1844         case LM_ST_DEFERRED:
1845                 return "DF";
1846         case LM_ST_EXCLUSIVE:
1847                 return "EX";
1848         }
1849         return "??";
1850 }
1851
1852 static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
1853 {
1854         char *p = buf;
1855         if (flags & LM_FLAG_TRY)
1856                 *p++ = 't';
1857         if (flags & LM_FLAG_TRY_1CB)
1858                 *p++ = 'T';
1859         if (flags & LM_FLAG_NOEXP)
1860                 *p++ = 'e';
1861         if (flags & LM_FLAG_ANY)
1862                 *p++ = 'A';
1863         if (flags & LM_FLAG_PRIORITY)
1864                 *p++ = 'p';
1865         if (flags & GL_ASYNC)
1866                 *p++ = 'a';
1867         if (flags & GL_EXACT)
1868                 *p++ = 'E';
1869         if (flags & GL_NOCACHE)
1870                 *p++ = 'c';
1871         if (test_bit(HIF_HOLDER, &iflags))
1872                 *p++ = 'H';
1873         if (test_bit(HIF_WAIT, &iflags))
1874                 *p++ = 'W';
1875         if (test_bit(HIF_FIRST, &iflags))
1876                 *p++ = 'F';
1877         *p = 0;
1878         return buf;
1879 }
1880
1881 /**
1882  * dump_holder - print information about a glock holder
1883  * @seq: the seq_file struct
1884  * @gh: the glock holder
1885  * @fs_id_buf: pointer to file system id (if requested)
1886  *
1887  */
1888
1889 static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh,
1890                         const char *fs_id_buf)
1891 {
1892         struct task_struct *gh_owner = NULL;
1893         char flags_buf[32];
1894
1895         rcu_read_lock();
1896         if (gh->gh_owner_pid)
1897                 gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
1898         gfs2_print_dbg(seq, "%s H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
1899                        fs_id_buf, state2str(gh->gh_state),
1900                        hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
1901                        gh->gh_error,
1902                        gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
1903                        gh_owner ? gh_owner->comm : "(ended)",
1904                        (void *)gh->gh_ip);
1905         rcu_read_unlock();
1906 }
1907
1908 static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
1909 {
1910         const unsigned long *gflags = &gl->gl_flags;
1911         char *p = buf;
1912
1913         if (test_bit(GLF_LOCK, gflags))
1914                 *p++ = 'l';
1915         if (test_bit(GLF_DEMOTE, gflags))
1916                 *p++ = 'D';
1917         if (test_bit(GLF_PENDING_DEMOTE, gflags))
1918                 *p++ = 'd';
1919         if (test_bit(GLF_DEMOTE_IN_PROGRESS, gflags))
1920                 *p++ = 'p';
1921         if (test_bit(GLF_DIRTY, gflags))
1922                 *p++ = 'y';
1923         if (test_bit(GLF_LFLUSH, gflags))
1924                 *p++ = 'f';
1925         if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags))
1926                 *p++ = 'i';
1927         if (test_bit(GLF_REPLY_PENDING, gflags))
1928                 *p++ = 'r';
1929         if (test_bit(GLF_INITIAL, gflags))
1930                 *p++ = 'I';
1931         if (test_bit(GLF_FROZEN, gflags))
1932                 *p++ = 'F';
1933         if (test_bit(GLF_QUEUED, gflags))
1934                 *p++ = 'q';
1935         if (test_bit(GLF_LRU, gflags))
1936                 *p++ = 'L';
1937         if (gl->gl_object)
1938                 *p++ = 'o';
1939         if (test_bit(GLF_BLOCKING, gflags))
1940                 *p++ = 'b';
1941         *p = 0;
1942         return buf;
1943 }
1944
1945 /**
1946  * gfs2_dump_glock - print information about a glock
1947  * @seq: The seq_file struct
1948  * @gl: the glock
1949  * @fsid: If true, also dump the file system id
1950  *
1951  * The file format is as follows:
1952  * One line per object, capital letters are used to indicate objects
1953  * G = glock, I = Inode, R = rgrp, H = holder. Glocks are not indented,
1954  * other objects are indented by a single space and follow the glock to
1955  * which they are related. Fields are indicated by lower case letters
1956  * followed by a colon and the field value, except for strings which are in
1957  * [] so that its possible to see if they are composed of spaces for
1958  * example. The field's are n = number (id of the object), f = flags,
1959  * t = type, s = state, r = refcount, e = error, p = pid.
1960  *
1961  */
1962
1963 void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
1964 {
1965         const struct gfs2_glock_operations *glops = gl->gl_ops;
1966         unsigned long long dtime;
1967         const struct gfs2_holder *gh;
1968         char gflags_buf[32];
1969         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
1970         char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
1971
1972         memset(fs_id_buf, 0, sizeof(fs_id_buf));
1973         if (fsid && sdp) /* safety precaution */
1974                 sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
1975         dtime = jiffies - gl->gl_demote_time;
1976         dtime *= 1000000/HZ; /* demote time in uSec */
1977         if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
1978                 dtime = 0;
1979         gfs2_print_dbg(seq, "%sG:  s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d "
1980                        "v:%d r:%d m:%ld\n", fs_id_buf, state2str(gl->gl_state),
1981                   gl->gl_name.ln_type,
1982                   (unsigned long long)gl->gl_name.ln_number,
1983                   gflags2str(gflags_buf, gl),
1984                   state2str(gl->gl_target),
1985                   state2str(gl->gl_demote_state), dtime,
1986                   atomic_read(&gl->gl_ail_count),
1987                   atomic_read(&gl->gl_revokes),
1988                   (int)gl->gl_lockref.count, gl->gl_hold_time);
1989
1990         list_for_each_entry(gh, &gl->gl_holders, gh_list)
1991                 dump_holder(seq, gh, fs_id_buf);
1992
1993         if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump)
1994                 glops->go_dump(seq, gl, fs_id_buf);
1995 }
1996
1997 static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr)
1998 {
1999         struct gfs2_glock *gl = iter_ptr;
2000
2001         seq_printf(seq, "G: n:%u/%llx rtt:%llu/%llu rttb:%llu/%llu irt:%llu/%llu dcnt: %llu qcnt: %llu\n",
2002                    gl->gl_name.ln_type,
2003                    (unsigned long long)gl->gl_name.ln_number,
2004                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTT],
2005                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR],
2006                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTB],
2007                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB],
2008                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRT],
2009                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR],
2010                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT],
2011                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]);
2012         return 0;
2013 }
2014
2015 static const char *gfs2_gltype[] = {
2016         "type",
2017         "reserved",
2018         "nondisk",
2019         "inode",
2020         "rgrp",
2021         "meta",
2022         "iopen",
2023         "flock",
2024         "plock",
2025         "quota",
2026         "journal",
2027 };
2028
2029 static const char *gfs2_stype[] = {
2030         [GFS2_LKS_SRTT]         = "srtt",
2031         [GFS2_LKS_SRTTVAR]      = "srttvar",
2032         [GFS2_LKS_SRTTB]        = "srttb",
2033         [GFS2_LKS_SRTTVARB]     = "srttvarb",
2034         [GFS2_LKS_SIRT]         = "sirt",
2035         [GFS2_LKS_SIRTVAR]      = "sirtvar",
2036         [GFS2_LKS_DCOUNT]       = "dlm",
2037         [GFS2_LKS_QCOUNT]       = "queue",
2038 };
2039
2040 #define GFS2_NR_SBSTATS (ARRAY_SIZE(gfs2_gltype) * ARRAY_SIZE(gfs2_stype))
2041
2042 static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr)
2043 {
2044         struct gfs2_sbd *sdp = seq->private;
2045         loff_t pos = *(loff_t *)iter_ptr;
2046         unsigned index = pos >> 3;
2047         unsigned subindex = pos & 0x07;
2048         int i;
2049
2050         if (index == 0 && subindex != 0)
2051                 return 0;
2052
2053         seq_printf(seq, "%-10s %8s:", gfs2_gltype[index],
2054                    (index == 0) ? "cpu": gfs2_stype[subindex]);
2055
2056         for_each_possible_cpu(i) {
2057                 const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i);
2058
2059                 if (index == 0)
2060                         seq_printf(seq, " %15u", i);
2061                 else
2062                         seq_printf(seq, " %15llu", (unsigned long long)lkstats->
2063                                    lkstats[index - 1].stats[subindex]);
2064         }
2065         seq_putc(seq, '\n');
2066         return 0;
2067 }
2068
2069 int __init gfs2_glock_init(void)
2070 {
2071         int i, ret;
2072
2073         ret = rhashtable_init(&gl_hash_table, &ht_parms);
2074         if (ret < 0)
2075                 return ret;
2076
2077         glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
2078                                           WQ_HIGHPRI | WQ_FREEZABLE, 0);
2079         if (!glock_workqueue) {
2080                 rhashtable_destroy(&gl_hash_table);
2081                 return -ENOMEM;
2082         }
2083         gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
2084                                                 WQ_MEM_RECLAIM | WQ_FREEZABLE,
2085                                                 0);
2086         if (!gfs2_delete_workqueue) {
2087                 destroy_workqueue(glock_workqueue);
2088                 rhashtable_destroy(&gl_hash_table);
2089                 return -ENOMEM;
2090         }
2091
2092         ret = register_shrinker(&glock_shrinker);
2093         if (ret) {
2094                 destroy_workqueue(gfs2_delete_workqueue);
2095                 destroy_workqueue(glock_workqueue);
2096                 rhashtable_destroy(&gl_hash_table);
2097                 return ret;
2098         }
2099
2100         for (i = 0; i < GLOCK_WAIT_TABLE_SIZE; i++)
2101                 init_waitqueue_head(glock_wait_table + i);
2102
2103         return 0;
2104 }
2105
2106 void gfs2_glock_exit(void)
2107 {
2108         unregister_shrinker(&glock_shrinker);
2109         rhashtable_destroy(&gl_hash_table);
2110         destroy_workqueue(glock_workqueue);
2111         destroy_workqueue(gfs2_delete_workqueue);
2112 }
2113
2114 static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n)
2115 {
2116         struct gfs2_glock *gl = gi->gl;
2117
2118         if (gl) {
2119                 if (n == 0)
2120                         return;
2121                 if (!lockref_put_not_zero(&gl->gl_lockref))
2122                         gfs2_glock_queue_put(gl);
2123         }
2124         for (;;) {
2125                 gl = rhashtable_walk_next(&gi->hti);
2126                 if (IS_ERR_OR_NULL(gl)) {
2127                         if (gl == ERR_PTR(-EAGAIN)) {
2128                                 n = 1;
2129                                 continue;
2130                         }
2131                         gl = NULL;
2132                         break;
2133                 }
2134                 if (gl->gl_name.ln_sbd != gi->sdp)
2135                         continue;
2136                 if (n <= 1) {
2137                         if (!lockref_get_not_dead(&gl->gl_lockref))
2138                                 continue;
2139                         break;
2140                 } else {
2141                         if (__lockref_is_dead(&gl->gl_lockref))
2142                                 continue;
2143                         n--;
2144                 }
2145         }
2146         gi->gl = gl;
2147 }
2148
2149 static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
2150         __acquires(RCU)
2151 {
2152         struct gfs2_glock_iter *gi = seq->private;
2153         loff_t n;
2154
2155         /*
2156          * We can either stay where we are, skip to the next hash table
2157          * entry, or start from the beginning.
2158          */
2159         if (*pos < gi->last_pos) {
2160                 rhashtable_walk_exit(&gi->hti);
2161                 rhashtable_walk_enter(&gl_hash_table, &gi->hti);
2162                 n = *pos + 1;
2163         } else {
2164                 n = *pos - gi->last_pos;
2165         }
2166
2167         rhashtable_walk_start(&gi->hti);
2168
2169         gfs2_glock_iter_next(gi, n);
2170         gi->last_pos = *pos;
2171         return gi->gl;
2172 }
2173
2174 static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
2175                                  loff_t *pos)
2176 {
2177         struct gfs2_glock_iter *gi = seq->private;
2178
2179         (*pos)++;
2180         gi->last_pos = *pos;
2181         gfs2_glock_iter_next(gi, 1);
2182         return gi->gl;
2183 }
2184
2185 static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
2186         __releases(RCU)
2187 {
2188         struct gfs2_glock_iter *gi = seq->private;
2189
2190         rhashtable_walk_stop(&gi->hti);
2191 }
2192
2193 static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
2194 {
2195         dump_glock(seq, iter_ptr, false);
2196         return 0;
2197 }
2198
2199 static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos)
2200 {
2201         preempt_disable();
2202         if (*pos >= GFS2_NR_SBSTATS)
2203                 return NULL;
2204         return pos;
2205 }
2206
2207 static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr,
2208                                    loff_t *pos)
2209 {
2210         (*pos)++;
2211         if (*pos >= GFS2_NR_SBSTATS)
2212                 return NULL;
2213         return pos;
2214 }
2215
2216 static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr)
2217 {
2218         preempt_enable();
2219 }
2220
2221 static const struct seq_operations gfs2_glock_seq_ops = {
2222         .start = gfs2_glock_seq_start,
2223         .next  = gfs2_glock_seq_next,
2224         .stop  = gfs2_glock_seq_stop,
2225         .show  = gfs2_glock_seq_show,
2226 };
2227
2228 static const struct seq_operations gfs2_glstats_seq_ops = {
2229         .start = gfs2_glock_seq_start,
2230         .next  = gfs2_glock_seq_next,
2231         .stop  = gfs2_glock_seq_stop,
2232         .show  = gfs2_glstats_seq_show,
2233 };
2234
2235 static const struct seq_operations gfs2_sbstats_seq_ops = {
2236         .start = gfs2_sbstats_seq_start,
2237         .next  = gfs2_sbstats_seq_next,
2238         .stop  = gfs2_sbstats_seq_stop,
2239         .show  = gfs2_sbstats_seq_show,
2240 };
2241
2242 #define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL)
2243
2244 static int __gfs2_glocks_open(struct inode *inode, struct file *file,
2245                               const struct seq_operations *ops)
2246 {
2247         int ret = seq_open_private(file, ops, sizeof(struct gfs2_glock_iter));
2248         if (ret == 0) {
2249                 struct seq_file *seq = file->private_data;
2250                 struct gfs2_glock_iter *gi = seq->private;
2251
2252                 gi->sdp = inode->i_private;
2253                 seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
2254                 if (seq->buf)
2255                         seq->size = GFS2_SEQ_GOODSIZE;
2256                 /*
2257                  * Initially, we are "before" the first hash table entry; the
2258                  * first call to rhashtable_walk_next gets us the first entry.
2259                  */
2260                 gi->last_pos = -1;
2261                 gi->gl = NULL;
2262                 rhashtable_walk_enter(&gl_hash_table, &gi->hti);
2263         }
2264         return ret;
2265 }
2266
2267 static int gfs2_glocks_open(struct inode *inode, struct file *file)
2268 {
2269         return __gfs2_glocks_open(inode, file, &gfs2_glock_seq_ops);
2270 }
2271
2272 static int gfs2_glocks_release(struct inode *inode, struct file *file)
2273 {
2274         struct seq_file *seq = file->private_data;
2275         struct gfs2_glock_iter *gi = seq->private;
2276
2277         if (gi->gl)
2278                 gfs2_glock_put(gi->gl);
2279         rhashtable_walk_exit(&gi->hti);
2280         return seq_release_private(inode, file);
2281 }
2282
2283 static int gfs2_glstats_open(struct inode *inode, struct file *file)
2284 {
2285         return __gfs2_glocks_open(inode, file, &gfs2_glstats_seq_ops);
2286 }
2287
2288 static int gfs2_sbstats_open(struct inode *inode, struct file *file)
2289 {
2290         int ret = seq_open(file, &gfs2_sbstats_seq_ops);
2291         if (ret == 0) {
2292                 struct seq_file *seq = file->private_data;
2293                 seq->private = inode->i_private;  /* sdp */
2294         }
2295         return ret;
2296 }
2297
2298 static const struct file_operations gfs2_glocks_fops = {
2299         .owner   = THIS_MODULE,
2300         .open    = gfs2_glocks_open,
2301         .read    = seq_read,
2302         .llseek  = seq_lseek,
2303         .release = gfs2_glocks_release,
2304 };
2305
2306 static const struct file_operations gfs2_glstats_fops = {
2307         .owner   = THIS_MODULE,
2308         .open    = gfs2_glstats_open,
2309         .read    = seq_read,
2310         .llseek  = seq_lseek,
2311         .release = gfs2_glocks_release,
2312 };
2313
2314 static const struct file_operations gfs2_sbstats_fops = {
2315         .owner   = THIS_MODULE,
2316         .open    = gfs2_sbstats_open,
2317         .read    = seq_read,
2318         .llseek  = seq_lseek,
2319         .release = seq_release,
2320 };
2321
2322 void gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
2323 {
2324         sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root);
2325
2326         debugfs_create_file("glocks", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp,
2327                             &gfs2_glocks_fops);
2328
2329         debugfs_create_file("glstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp,
2330                             &gfs2_glstats_fops);
2331
2332         debugfs_create_file("sbstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp,
2333                             &gfs2_sbstats_fops);
2334 }
2335
2336 void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
2337 {
2338         debugfs_remove_recursive(sdp->debugfs_dir);
2339         sdp->debugfs_dir = NULL;
2340 }
2341
2342 void gfs2_register_debugfs(void)
2343 {
2344         gfs2_root = debugfs_create_dir("gfs2", NULL);
2345 }
2346
2347 void gfs2_unregister_debugfs(void)
2348 {
2349         debugfs_remove(gfs2_root);
2350         gfs2_root = NULL;
2351 }