d0eceaff3cea99075b8d57914116d2517ff6b409
[linux-2.6-microblaze.git] / fs / gfs2 / glock.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
4  * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
5  */
6
7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8
9 #include <linux/sched.h>
10 #include <linux/slab.h>
11 #include <linux/spinlock.h>
12 #include <linux/buffer_head.h>
13 #include <linux/delay.h>
14 #include <linux/sort.h>
15 #include <linux/hash.h>
16 #include <linux/jhash.h>
17 #include <linux/kallsyms.h>
18 #include <linux/gfs2_ondisk.h>
19 #include <linux/list.h>
20 #include <linux/wait.h>
21 #include <linux/module.h>
22 #include <linux/uaccess.h>
23 #include <linux/seq_file.h>
24 #include <linux/debugfs.h>
25 #include <linux/kthread.h>
26 #include <linux/freezer.h>
27 #include <linux/workqueue.h>
28 #include <linux/jiffies.h>
29 #include <linux/rcupdate.h>
30 #include <linux/rculist_bl.h>
31 #include <linux/bit_spinlock.h>
32 #include <linux/percpu.h>
33 #include <linux/list_sort.h>
34 #include <linux/lockref.h>
35 #include <linux/rhashtable.h>
36
37 #include "gfs2.h"
38 #include "incore.h"
39 #include "glock.h"
40 #include "glops.h"
41 #include "inode.h"
42 #include "lops.h"
43 #include "meta_io.h"
44 #include "quota.h"
45 #include "super.h"
46 #include "util.h"
47 #include "bmap.h"
48 #define CREATE_TRACE_POINTS
49 #include "trace_gfs2.h"
50
51 struct gfs2_glock_iter {
52         struct gfs2_sbd *sdp;           /* incore superblock           */
53         struct rhashtable_iter hti;     /* rhashtable iterator         */
54         struct gfs2_glock *gl;          /* current glock struct        */
55         loff_t last_pos;                /* last position               */
56 };
57
58 typedef void (*glock_examiner) (struct gfs2_glock * gl);
59
60 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
61
62 static struct dentry *gfs2_root;
63 static struct workqueue_struct *glock_workqueue;
64 struct workqueue_struct *gfs2_delete_workqueue;
65 static LIST_HEAD(lru_list);
66 static atomic_t lru_count = ATOMIC_INIT(0);
67 static DEFINE_SPINLOCK(lru_lock);
68
69 #define GFS2_GL_HASH_SHIFT      15
70 #define GFS2_GL_HASH_SIZE       BIT(GFS2_GL_HASH_SHIFT)
71
72 static const struct rhashtable_params ht_parms = {
73         .nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4,
74         .key_len = offsetofend(struct lm_lockname, ln_type),
75         .key_offset = offsetof(struct gfs2_glock, gl_name),
76         .head_offset = offsetof(struct gfs2_glock, gl_node),
77 };
78
79 static struct rhashtable gl_hash_table;
80
81 #define GLOCK_WAIT_TABLE_BITS 12
82 #define GLOCK_WAIT_TABLE_SIZE (1 << GLOCK_WAIT_TABLE_BITS)
83 static wait_queue_head_t glock_wait_table[GLOCK_WAIT_TABLE_SIZE] __cacheline_aligned;
84
85 struct wait_glock_queue {
86         struct lm_lockname *name;
87         wait_queue_entry_t wait;
88 };
89
90 static int glock_wake_function(wait_queue_entry_t *wait, unsigned int mode,
91                                int sync, void *key)
92 {
93         struct wait_glock_queue *wait_glock =
94                 container_of(wait, struct wait_glock_queue, wait);
95         struct lm_lockname *wait_name = wait_glock->name;
96         struct lm_lockname *wake_name = key;
97
98         if (wake_name->ln_sbd != wait_name->ln_sbd ||
99             wake_name->ln_number != wait_name->ln_number ||
100             wake_name->ln_type != wait_name->ln_type)
101                 return 0;
102         return autoremove_wake_function(wait, mode, sync, key);
103 }
104
105 static wait_queue_head_t *glock_waitqueue(struct lm_lockname *name)
106 {
107         u32 hash = jhash2((u32 *)name, ht_parms.key_len / 4, 0);
108
109         return glock_wait_table + hash_32(hash, GLOCK_WAIT_TABLE_BITS);
110 }
111
112 /**
113  * wake_up_glock  -  Wake up waiters on a glock
114  * @gl: the glock
115  */
116 static void wake_up_glock(struct gfs2_glock *gl)
117 {
118         wait_queue_head_t *wq = glock_waitqueue(&gl->gl_name);
119
120         if (waitqueue_active(wq))
121                 __wake_up(wq, TASK_NORMAL, 1, &gl->gl_name);
122 }
123
124 static void gfs2_glock_dealloc(struct rcu_head *rcu)
125 {
126         struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
127
128         if (gl->gl_ops->go_flags & GLOF_ASPACE) {
129                 kmem_cache_free(gfs2_glock_aspace_cachep, gl);
130         } else {
131                 kfree(gl->gl_lksb.sb_lvbptr);
132                 kmem_cache_free(gfs2_glock_cachep, gl);
133         }
134 }
135
136 void gfs2_glock_free(struct gfs2_glock *gl)
137 {
138         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
139
140         BUG_ON(atomic_read(&gl->gl_revokes));
141         rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
142         smp_mb();
143         wake_up_glock(gl);
144         call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
145         if (atomic_dec_and_test(&sdp->sd_glock_disposal))
146                 wake_up(&sdp->sd_glock_wait);
147 }
148
149 /**
150  * gfs2_glock_hold() - increment reference count on glock
151  * @gl: The glock to hold
152  *
153  */
154
155 void gfs2_glock_hold(struct gfs2_glock *gl)
156 {
157         GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
158         lockref_get(&gl->gl_lockref);
159 }
160
161 /**
162  * demote_ok - Check to see if it's ok to unlock a glock
163  * @gl: the glock
164  *
165  * Returns: 1 if it's ok
166  */
167
168 static int demote_ok(const struct gfs2_glock *gl)
169 {
170         const struct gfs2_glock_operations *glops = gl->gl_ops;
171
172         if (gl->gl_state == LM_ST_UNLOCKED)
173                 return 0;
174         if (!list_empty(&gl->gl_holders))
175                 return 0;
176         if (glops->go_demote_ok)
177                 return glops->go_demote_ok(gl);
178         return 1;
179 }
180
181
182 void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
183 {
184         if (!(gl->gl_ops->go_flags & GLOF_LRU))
185                 return;
186
187         spin_lock(&lru_lock);
188
189         list_del(&gl->gl_lru);
190         list_add_tail(&gl->gl_lru, &lru_list);
191
192         if (!test_bit(GLF_LRU, &gl->gl_flags)) {
193                 set_bit(GLF_LRU, &gl->gl_flags);
194                 atomic_inc(&lru_count);
195         }
196
197         spin_unlock(&lru_lock);
198 }
199
200 static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
201 {
202         if (!(gl->gl_ops->go_flags & GLOF_LRU))
203                 return;
204
205         spin_lock(&lru_lock);
206         if (test_bit(GLF_LRU, &gl->gl_flags)) {
207                 list_del_init(&gl->gl_lru);
208                 atomic_dec(&lru_count);
209                 clear_bit(GLF_LRU, &gl->gl_flags);
210         }
211         spin_unlock(&lru_lock);
212 }
213
214 /*
215  * Enqueue the glock on the work queue.  Passes one glock reference on to the
216  * work queue.
217  */
218 static void __gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) {
219         if (!queue_delayed_work(glock_workqueue, &gl->gl_work, delay)) {
220                 /*
221                  * We are holding the lockref spinlock, and the work was still
222                  * queued above.  The queued work (glock_work_func) takes that
223                  * spinlock before dropping its glock reference(s), so it
224                  * cannot have dropped them in the meantime.
225                  */
226                 GLOCK_BUG_ON(gl, gl->gl_lockref.count < 2);
227                 gl->gl_lockref.count--;
228         }
229 }
230
231 static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) {
232         spin_lock(&gl->gl_lockref.lock);
233         __gfs2_glock_queue_work(gl, delay);
234         spin_unlock(&gl->gl_lockref.lock);
235 }
236
237 static void __gfs2_glock_put(struct gfs2_glock *gl)
238 {
239         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
240         struct address_space *mapping = gfs2_glock2aspace(gl);
241
242         lockref_mark_dead(&gl->gl_lockref);
243
244         gfs2_glock_remove_from_lru(gl);
245         spin_unlock(&gl->gl_lockref.lock);
246         GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
247         GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
248         trace_gfs2_glock_put(gl);
249         sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
250 }
251
252 /*
253  * Cause the glock to be put in work queue context.
254  */
255 void gfs2_glock_queue_put(struct gfs2_glock *gl)
256 {
257         gfs2_glock_queue_work(gl, 0);
258 }
259
260 /**
261  * gfs2_glock_put() - Decrement reference count on glock
262  * @gl: The glock to put
263  *
264  */
265
266 void gfs2_glock_put(struct gfs2_glock *gl)
267 {
268         if (lockref_put_or_lock(&gl->gl_lockref))
269                 return;
270
271         __gfs2_glock_put(gl);
272 }
273
274 /**
275  * may_grant - check if its ok to grant a new lock
276  * @gl: The glock
277  * @gh: The lock request which we wish to grant
278  *
279  * Returns: true if its ok to grant the lock
280  */
281
282 static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh)
283 {
284         const struct gfs2_holder *gh_head = list_entry(gl->gl_holders.next, const struct gfs2_holder, gh_list);
285         if ((gh->gh_state == LM_ST_EXCLUSIVE ||
286              gh_head->gh_state == LM_ST_EXCLUSIVE) && gh != gh_head)
287                 return 0;
288         if (gl->gl_state == gh->gh_state)
289                 return 1;
290         if (gh->gh_flags & GL_EXACT)
291                 return 0;
292         if (gl->gl_state == LM_ST_EXCLUSIVE) {
293                 if (gh->gh_state == LM_ST_SHARED && gh_head->gh_state == LM_ST_SHARED)
294                         return 1;
295                 if (gh->gh_state == LM_ST_DEFERRED && gh_head->gh_state == LM_ST_DEFERRED)
296                         return 1;
297         }
298         if (gl->gl_state != LM_ST_UNLOCKED && (gh->gh_flags & LM_FLAG_ANY))
299                 return 1;
300         return 0;
301 }
302
303 static void gfs2_holder_wake(struct gfs2_holder *gh)
304 {
305         clear_bit(HIF_WAIT, &gh->gh_iflags);
306         smp_mb__after_atomic();
307         wake_up_bit(&gh->gh_iflags, HIF_WAIT);
308         if (gh->gh_flags & GL_ASYNC) {
309                 struct gfs2_sbd *sdp = gh->gh_gl->gl_name.ln_sbd;
310
311                 wake_up(&sdp->sd_async_glock_wait);
312         }
313 }
314
315 /**
316  * do_error - Something unexpected has happened during a lock request
317  *
318  */
319
320 static void do_error(struct gfs2_glock *gl, const int ret)
321 {
322         struct gfs2_holder *gh, *tmp;
323
324         list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
325                 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
326                         continue;
327                 if (ret & LM_OUT_ERROR)
328                         gh->gh_error = -EIO;
329                 else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))
330                         gh->gh_error = GLR_TRYFAILED;
331                 else
332                         continue;
333                 list_del_init(&gh->gh_list);
334                 trace_gfs2_glock_queue(gh, 0);
335                 gfs2_holder_wake(gh);
336         }
337 }
338
339 /**
340  * do_promote - promote as many requests as possible on the current queue
341  * @gl: The glock
342  * 
343  * Returns: 1 if there is a blocked holder at the head of the list, or 2
344  *          if a type specific operation is underway.
345  */
346
347 static int do_promote(struct gfs2_glock *gl)
348 __releases(&gl->gl_lockref.lock)
349 __acquires(&gl->gl_lockref.lock)
350 {
351         const struct gfs2_glock_operations *glops = gl->gl_ops;
352         struct gfs2_holder *gh, *tmp;
353         int ret;
354
355 restart:
356         list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
357                 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
358                         continue;
359                 if (may_grant(gl, gh)) {
360                         if (gh->gh_list.prev == &gl->gl_holders &&
361                             glops->go_lock) {
362                                 spin_unlock(&gl->gl_lockref.lock);
363                                 /* FIXME: eliminate this eventually */
364                                 ret = glops->go_lock(gh);
365                                 spin_lock(&gl->gl_lockref.lock);
366                                 if (ret) {
367                                         if (ret == 1)
368                                                 return 2;
369                                         gh->gh_error = ret;
370                                         list_del_init(&gh->gh_list);
371                                         trace_gfs2_glock_queue(gh, 0);
372                                         gfs2_holder_wake(gh);
373                                         goto restart;
374                                 }
375                                 set_bit(HIF_HOLDER, &gh->gh_iflags);
376                                 trace_gfs2_promote(gh, 1);
377                                 gfs2_holder_wake(gh);
378                                 goto restart;
379                         }
380                         set_bit(HIF_HOLDER, &gh->gh_iflags);
381                         trace_gfs2_promote(gh, 0);
382                         gfs2_holder_wake(gh);
383                         continue;
384                 }
385                 if (gh->gh_list.prev == &gl->gl_holders)
386                         return 1;
387                 do_error(gl, 0);
388                 break;
389         }
390         return 0;
391 }
392
393 /**
394  * find_first_waiter - find the first gh that's waiting for the glock
395  * @gl: the glock
396  */
397
398 static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl)
399 {
400         struct gfs2_holder *gh;
401
402         list_for_each_entry(gh, &gl->gl_holders, gh_list) {
403                 if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
404                         return gh;
405         }
406         return NULL;
407 }
408
409 /**
410  * state_change - record that the glock is now in a different state
411  * @gl: the glock
412  * @new_state the new state
413  *
414  */
415
416 static void state_change(struct gfs2_glock *gl, unsigned int new_state)
417 {
418         int held1, held2;
419
420         held1 = (gl->gl_state != LM_ST_UNLOCKED);
421         held2 = (new_state != LM_ST_UNLOCKED);
422
423         if (held1 != held2) {
424                 GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
425                 if (held2)
426                         gl->gl_lockref.count++;
427                 else
428                         gl->gl_lockref.count--;
429         }
430         if (held1 && held2 && list_empty(&gl->gl_holders))
431                 clear_bit(GLF_QUEUED, &gl->gl_flags);
432
433         if (new_state != gl->gl_target)
434                 /* shorten our minimum hold time */
435                 gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR,
436                                        GL_GLOCK_MIN_HOLD);
437         gl->gl_state = new_state;
438         gl->gl_tchange = jiffies;
439 }
440
441 static void gfs2_demote_wake(struct gfs2_glock *gl)
442 {
443         gl->gl_demote_state = LM_ST_EXCLUSIVE;
444         clear_bit(GLF_DEMOTE, &gl->gl_flags);
445         smp_mb__after_atomic();
446         wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
447 }
448
449 /**
450  * finish_xmote - The DLM has replied to one of our lock requests
451  * @gl: The glock
452  * @ret: The status from the DLM
453  *
454  */
455
456 static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
457 {
458         const struct gfs2_glock_operations *glops = gl->gl_ops;
459         struct gfs2_holder *gh;
460         unsigned state = ret & LM_OUT_ST_MASK;
461         int rv;
462
463         spin_lock(&gl->gl_lockref.lock);
464         trace_gfs2_glock_state_change(gl, state);
465         state_change(gl, state);
466         gh = find_first_waiter(gl);
467
468         /* Demote to UN request arrived during demote to SH or DF */
469         if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
470             state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED)
471                 gl->gl_target = LM_ST_UNLOCKED;
472
473         /* Check for state != intended state */
474         if (unlikely(state != gl->gl_target)) {
475                 if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) {
476                         /* move to back of queue and try next entry */
477                         if (ret & LM_OUT_CANCELED) {
478                                 if ((gh->gh_flags & LM_FLAG_PRIORITY) == 0)
479                                         list_move_tail(&gh->gh_list, &gl->gl_holders);
480                                 gh = find_first_waiter(gl);
481                                 gl->gl_target = gh->gh_state;
482                                 goto retry;
483                         }
484                         /* Some error or failed "try lock" - report it */
485                         if ((ret & LM_OUT_ERROR) ||
486                             (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
487                                 gl->gl_target = gl->gl_state;
488                                 do_error(gl, ret);
489                                 goto out;
490                         }
491                 }
492                 switch(state) {
493                 /* Unlocked due to conversion deadlock, try again */
494                 case LM_ST_UNLOCKED:
495 retry:
496                         do_xmote(gl, gh, gl->gl_target);
497                         break;
498                 /* Conversion fails, unlock and try again */
499                 case LM_ST_SHARED:
500                 case LM_ST_DEFERRED:
501                         do_xmote(gl, gh, LM_ST_UNLOCKED);
502                         break;
503                 default: /* Everything else */
504                         fs_err(gl->gl_name.ln_sbd, "wanted %u got %u\n",
505                                gl->gl_target, state);
506                         GLOCK_BUG_ON(gl, 1);
507                 }
508                 spin_unlock(&gl->gl_lockref.lock);
509                 return;
510         }
511
512         /* Fast path - we got what we asked for */
513         if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags))
514                 gfs2_demote_wake(gl);
515         if (state != LM_ST_UNLOCKED) {
516                 if (glops->go_xmote_bh) {
517                         spin_unlock(&gl->gl_lockref.lock);
518                         rv = glops->go_xmote_bh(gl, gh);
519                         spin_lock(&gl->gl_lockref.lock);
520                         if (rv) {
521                                 do_error(gl, rv);
522                                 goto out;
523                         }
524                 }
525                 rv = do_promote(gl);
526                 if (rv == 2)
527                         goto out_locked;
528         }
529 out:
530         clear_bit(GLF_LOCK, &gl->gl_flags);
531 out_locked:
532         spin_unlock(&gl->gl_lockref.lock);
533 }
534
535 /**
536  * do_xmote - Calls the DLM to change the state of a lock
537  * @gl: The lock state
538  * @gh: The holder (only for promotes)
539  * @target: The target lock state
540  *
541  */
542
543 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target)
544 __releases(&gl->gl_lockref.lock)
545 __acquires(&gl->gl_lockref.lock)
546 {
547         const struct gfs2_glock_operations *glops = gl->gl_ops;
548         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
549         unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0);
550         int ret;
551
552         if (unlikely(gfs2_withdrawn(sdp)) &&
553             target != LM_ST_UNLOCKED)
554                 return;
555         lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
556                       LM_FLAG_PRIORITY);
557         GLOCK_BUG_ON(gl, gl->gl_state == target);
558         GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target);
559         if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
560             glops->go_inval) {
561                 /*
562                  * If another process is already doing the invalidate, let that
563                  * finish first.  The glock state machine will get back to this
564                  * holder again later.
565                  */
566                 if (test_and_set_bit(GLF_INVALIDATE_IN_PROGRESS,
567                                      &gl->gl_flags))
568                         return;
569                 do_error(gl, 0); /* Fail queued try locks */
570         }
571         gl->gl_req = target;
572         set_bit(GLF_BLOCKING, &gl->gl_flags);
573         if ((gl->gl_req == LM_ST_UNLOCKED) ||
574             (gl->gl_state == LM_ST_EXCLUSIVE) ||
575             (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)))
576                 clear_bit(GLF_BLOCKING, &gl->gl_flags);
577         spin_unlock(&gl->gl_lockref.lock);
578         if (glops->go_sync)
579                 glops->go_sync(gl);
580         if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
581                 glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA);
582         clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
583
584         gfs2_glock_hold(gl);
585         if (sdp->sd_lockstruct.ls_ops->lm_lock) {
586                 /* lock_dlm */
587                 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
588                 if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED &&
589                     target == LM_ST_UNLOCKED &&
590                     test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags)) {
591                         finish_xmote(gl, target);
592                         gfs2_glock_queue_work(gl, 0);
593                 }
594                 else if (ret) {
595                         fs_err(sdp, "lm_lock ret %d\n", ret);
596                         GLOCK_BUG_ON(gl, !gfs2_withdrawn(sdp));
597                 }
598         } else { /* lock_nolock */
599                 finish_xmote(gl, target);
600                 gfs2_glock_queue_work(gl, 0);
601         }
602
603         spin_lock(&gl->gl_lockref.lock);
604 }
605
606 /**
607  * find_first_holder - find the first "holder" gh
608  * @gl: the glock
609  */
610
611 static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
612 {
613         struct gfs2_holder *gh;
614
615         if (!list_empty(&gl->gl_holders)) {
616                 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
617                 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
618                         return gh;
619         }
620         return NULL;
621 }
622
623 /**
624  * run_queue - do all outstanding tasks related to a glock
625  * @gl: The glock in question
626  * @nonblock: True if we must not block in run_queue
627  *
628  */
629
630 static void run_queue(struct gfs2_glock *gl, const int nonblock)
631 __releases(&gl->gl_lockref.lock)
632 __acquires(&gl->gl_lockref.lock)
633 {
634         struct gfs2_holder *gh = NULL;
635         int ret;
636
637         if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
638                 return;
639
640         GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags));
641
642         if (test_bit(GLF_DEMOTE, &gl->gl_flags) &&
643             gl->gl_demote_state != gl->gl_state) {
644                 if (find_first_holder(gl))
645                         goto out_unlock;
646                 if (nonblock)
647                         goto out_sched;
648                 set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
649                 GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE);
650                 gl->gl_target = gl->gl_demote_state;
651         } else {
652                 if (test_bit(GLF_DEMOTE, &gl->gl_flags))
653                         gfs2_demote_wake(gl);
654                 ret = do_promote(gl);
655                 if (ret == 0)
656                         goto out_unlock;
657                 if (ret == 2)
658                         goto out;
659                 gh = find_first_waiter(gl);
660                 gl->gl_target = gh->gh_state;
661                 if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
662                         do_error(gl, 0); /* Fail queued try locks */
663         }
664         do_xmote(gl, gh, gl->gl_target);
665 out:
666         return;
667
668 out_sched:
669         clear_bit(GLF_LOCK, &gl->gl_flags);
670         smp_mb__after_atomic();
671         gl->gl_lockref.count++;
672         __gfs2_glock_queue_work(gl, 0);
673         return;
674
675 out_unlock:
676         clear_bit(GLF_LOCK, &gl->gl_flags);
677         smp_mb__after_atomic();
678         return;
679 }
680
681 static void delete_work_func(struct work_struct *work)
682 {
683         struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete);
684         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
685         struct inode *inode;
686         u64 no_addr = gl->gl_name.ln_number;
687
688         /* If someone's using this glock to create a new dinode, the block must
689            have been freed by another node, then re-used, in which case our
690            iopen callback is too late after the fact. Ignore it. */
691         if (test_bit(GLF_INODE_CREATING, &gl->gl_flags))
692                 goto out;
693
694         inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
695         if (!IS_ERR_OR_NULL(inode)) {
696                 d_prune_aliases(inode);
697                 iput(inode);
698         }
699 out:
700         gfs2_glock_put(gl);
701 }
702
703 static void glock_work_func(struct work_struct *work)
704 {
705         unsigned long delay = 0;
706         struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
707         unsigned int drop_refs = 1;
708
709         if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) {
710                 finish_xmote(gl, gl->gl_reply);
711                 drop_refs++;
712         }
713         spin_lock(&gl->gl_lockref.lock);
714         if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
715             gl->gl_state != LM_ST_UNLOCKED &&
716             gl->gl_demote_state != LM_ST_EXCLUSIVE) {
717                 unsigned long holdtime, now = jiffies;
718
719                 holdtime = gl->gl_tchange + gl->gl_hold_time;
720                 if (time_before(now, holdtime))
721                         delay = holdtime - now;
722
723                 if (!delay) {
724                         clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
725                         set_bit(GLF_DEMOTE, &gl->gl_flags);
726                 }
727         }
728         run_queue(gl, 0);
729         if (delay) {
730                 /* Keep one glock reference for the work we requeue. */
731                 drop_refs--;
732                 if (gl->gl_name.ln_type != LM_TYPE_INODE)
733                         delay = 0;
734                 __gfs2_glock_queue_work(gl, delay);
735         }
736
737         /*
738          * Drop the remaining glock references manually here. (Mind that
739          * __gfs2_glock_queue_work depends on the lockref spinlock begin held
740          * here as well.)
741          */
742         gl->gl_lockref.count -= drop_refs;
743         if (!gl->gl_lockref.count) {
744                 __gfs2_glock_put(gl);
745                 return;
746         }
747         spin_unlock(&gl->gl_lockref.lock);
748 }
749
750 static struct gfs2_glock *find_insert_glock(struct lm_lockname *name,
751                                             struct gfs2_glock *new)
752 {
753         struct wait_glock_queue wait;
754         wait_queue_head_t *wq = glock_waitqueue(name);
755         struct gfs2_glock *gl;
756
757         wait.name = name;
758         init_wait(&wait.wait);
759         wait.wait.func = glock_wake_function;
760
761 again:
762         prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
763         rcu_read_lock();
764         if (new) {
765                 gl = rhashtable_lookup_get_insert_fast(&gl_hash_table,
766                         &new->gl_node, ht_parms);
767                 if (IS_ERR(gl))
768                         goto out;
769         } else {
770                 gl = rhashtable_lookup_fast(&gl_hash_table,
771                         name, ht_parms);
772         }
773         if (gl && !lockref_get_not_dead(&gl->gl_lockref)) {
774                 rcu_read_unlock();
775                 schedule();
776                 goto again;
777         }
778 out:
779         rcu_read_unlock();
780         finish_wait(wq, &wait.wait);
781         return gl;
782 }
783
784 /**
785  * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
786  * @sdp: The GFS2 superblock
787  * @number: the lock number
788  * @glops: The glock_operations to use
789  * @create: If 0, don't create the glock if it doesn't exist
790  * @glp: the glock is returned here
791  *
792  * This does not lock a glock, just finds/creates structures for one.
793  *
794  * Returns: errno
795  */
796
797 int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
798                    const struct gfs2_glock_operations *glops, int create,
799                    struct gfs2_glock **glp)
800 {
801         struct super_block *s = sdp->sd_vfs;
802         struct lm_lockname name = { .ln_number = number,
803                                     .ln_type = glops->go_type,
804                                     .ln_sbd = sdp };
805         struct gfs2_glock *gl, *tmp;
806         struct address_space *mapping;
807         struct kmem_cache *cachep;
808         int ret = 0;
809
810         gl = find_insert_glock(&name, NULL);
811         if (gl) {
812                 *glp = gl;
813                 return 0;
814         }
815         if (!create)
816                 return -ENOENT;
817
818         if (glops->go_flags & GLOF_ASPACE)
819                 cachep = gfs2_glock_aspace_cachep;
820         else
821                 cachep = gfs2_glock_cachep;
822         gl = kmem_cache_alloc(cachep, GFP_NOFS);
823         if (!gl)
824                 return -ENOMEM;
825
826         memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
827
828         if (glops->go_flags & GLOF_LVB) {
829                 gl->gl_lksb.sb_lvbptr = kzalloc(GDLM_LVB_SIZE, GFP_NOFS);
830                 if (!gl->gl_lksb.sb_lvbptr) {
831                         kmem_cache_free(cachep, gl);
832                         return -ENOMEM;
833                 }
834         }
835
836         atomic_inc(&sdp->sd_glock_disposal);
837         gl->gl_node.next = NULL;
838         gl->gl_flags = 0;
839         gl->gl_name = name;
840         gl->gl_lockref.count = 1;
841         gl->gl_state = LM_ST_UNLOCKED;
842         gl->gl_target = LM_ST_UNLOCKED;
843         gl->gl_demote_state = LM_ST_EXCLUSIVE;
844         gl->gl_ops = glops;
845         gl->gl_dstamp = 0;
846         preempt_disable();
847         /* We use the global stats to estimate the initial per-glock stats */
848         gl->gl_stats = this_cpu_ptr(sdp->sd_lkstats)->lkstats[glops->go_type];
849         preempt_enable();
850         gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0;
851         gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0;
852         gl->gl_tchange = jiffies;
853         gl->gl_object = NULL;
854         gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
855         INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
856         INIT_WORK(&gl->gl_delete, delete_work_func);
857
858         mapping = gfs2_glock2aspace(gl);
859         if (mapping) {
860                 mapping->a_ops = &gfs2_meta_aops;
861                 mapping->host = s->s_bdev->bd_inode;
862                 mapping->flags = 0;
863                 mapping_set_gfp_mask(mapping, GFP_NOFS);
864                 mapping->private_data = NULL;
865                 mapping->writeback_index = 0;
866         }
867
868         tmp = find_insert_glock(&name, gl);
869         if (!tmp) {
870                 *glp = gl;
871                 goto out;
872         }
873         if (IS_ERR(tmp)) {
874                 ret = PTR_ERR(tmp);
875                 goto out_free;
876         }
877         *glp = tmp;
878
879 out_free:
880         kfree(gl->gl_lksb.sb_lvbptr);
881         kmem_cache_free(cachep, gl);
882         atomic_dec(&sdp->sd_glock_disposal);
883
884 out:
885         return ret;
886 }
887
888 /**
889  * gfs2_holder_init - initialize a struct gfs2_holder in the default way
890  * @gl: the glock
891  * @state: the state we're requesting
892  * @flags: the modifier flags
893  * @gh: the holder structure
894  *
895  */
896
897 void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags,
898                       struct gfs2_holder *gh)
899 {
900         INIT_LIST_HEAD(&gh->gh_list);
901         gh->gh_gl = gl;
902         gh->gh_ip = _RET_IP_;
903         gh->gh_owner_pid = get_pid(task_pid(current));
904         gh->gh_state = state;
905         gh->gh_flags = flags;
906         gh->gh_error = 0;
907         gh->gh_iflags = 0;
908         gfs2_glock_hold(gl);
909 }
910
911 /**
912  * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it
913  * @state: the state we're requesting
914  * @flags: the modifier flags
915  * @gh: the holder structure
916  *
917  * Don't mess with the glock.
918  *
919  */
920
921 void gfs2_holder_reinit(unsigned int state, u16 flags, struct gfs2_holder *gh)
922 {
923         gh->gh_state = state;
924         gh->gh_flags = flags;
925         gh->gh_iflags = 0;
926         gh->gh_ip = _RET_IP_;
927         put_pid(gh->gh_owner_pid);
928         gh->gh_owner_pid = get_pid(task_pid(current));
929 }
930
931 /**
932  * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference)
933  * @gh: the holder structure
934  *
935  */
936
937 void gfs2_holder_uninit(struct gfs2_holder *gh)
938 {
939         put_pid(gh->gh_owner_pid);
940         gfs2_glock_put(gh->gh_gl);
941         gfs2_holder_mark_uninitialized(gh);
942         gh->gh_ip = 0;
943 }
944
945 static void gfs2_glock_update_hold_time(struct gfs2_glock *gl,
946                                         unsigned long start_time)
947 {
948         /* Have we waited longer that a second? */
949         if (time_after(jiffies, start_time + HZ)) {
950                 /* Lengthen the minimum hold time. */
951                 gl->gl_hold_time = min(gl->gl_hold_time + GL_GLOCK_HOLD_INCR,
952                                        GL_GLOCK_MAX_HOLD);
953         }
954 }
955
956 /**
957  * gfs2_glock_wait - wait on a glock acquisition
958  * @gh: the glock holder
959  *
960  * Returns: 0 on success
961  */
962
963 int gfs2_glock_wait(struct gfs2_holder *gh)
964 {
965         unsigned long start_time = jiffies;
966
967         might_sleep();
968         wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE);
969         gfs2_glock_update_hold_time(gh->gh_gl, start_time);
970         return gh->gh_error;
971 }
972
973 static int glocks_pending(unsigned int num_gh, struct gfs2_holder *ghs)
974 {
975         int i;
976
977         for (i = 0; i < num_gh; i++)
978                 if (test_bit(HIF_WAIT, &ghs[i].gh_iflags))
979                         return 1;
980         return 0;
981 }
982
983 /**
984  * gfs2_glock_async_wait - wait on multiple asynchronous glock acquisitions
985  * @num_gh: the number of holders in the array
986  * @ghs: the glock holder array
987  *
988  * Returns: 0 on success, meaning all glocks have been granted and are held.
989  *          -ESTALE if the request timed out, meaning all glocks were released,
990  *          and the caller should retry the operation.
991  */
992
993 int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs)
994 {
995         struct gfs2_sbd *sdp = ghs[0].gh_gl->gl_name.ln_sbd;
996         int i, ret = 0, timeout = 0;
997         unsigned long start_time = jiffies;
998         bool keep_waiting;
999
1000         might_sleep();
1001         /*
1002          * Total up the (minimum hold time * 2) of all glocks and use that to
1003          * determine the max amount of time we should wait.
1004          */
1005         for (i = 0; i < num_gh; i++)
1006                 timeout += ghs[i].gh_gl->gl_hold_time << 1;
1007
1008 wait_for_dlm:
1009         if (!wait_event_timeout(sdp->sd_async_glock_wait,
1010                                 !glocks_pending(num_gh, ghs), timeout))
1011                 ret = -ESTALE; /* request timed out. */
1012
1013         /*
1014          * If dlm granted all our requests, we need to adjust the glock
1015          * minimum hold time values according to how long we waited.
1016          *
1017          * If our request timed out, we need to repeatedly release any held
1018          * glocks we acquired thus far to allow dlm to acquire the remaining
1019          * glocks without deadlocking.  We cannot currently cancel outstanding
1020          * glock acquisitions.
1021          *
1022          * The HIF_WAIT bit tells us which requests still need a response from
1023          * dlm.
1024          *
1025          * If dlm sent us any errors, we return the first error we find.
1026          */
1027         keep_waiting = false;
1028         for (i = 0; i < num_gh; i++) {
1029                 /* Skip holders we have already dequeued below. */
1030                 if (!gfs2_holder_queued(&ghs[i]))
1031                         continue;
1032                 /* Skip holders with a pending DLM response. */
1033                 if (test_bit(HIF_WAIT, &ghs[i].gh_iflags)) {
1034                         keep_waiting = true;
1035                         continue;
1036                 }
1037
1038                 if (test_bit(HIF_HOLDER, &ghs[i].gh_iflags)) {
1039                         if (ret == -ESTALE)
1040                                 gfs2_glock_dq(&ghs[i]);
1041                         else
1042                                 gfs2_glock_update_hold_time(ghs[i].gh_gl,
1043                                                             start_time);
1044                 }
1045                 if (!ret)
1046                         ret = ghs[i].gh_error;
1047         }
1048
1049         if (keep_waiting)
1050                 goto wait_for_dlm;
1051
1052         /*
1053          * At this point, we've either acquired all locks or released them all.
1054          */
1055         return ret;
1056 }
1057
1058 /**
1059  * handle_callback - process a demote request
1060  * @gl: the glock
1061  * @state: the state the caller wants us to change to
1062  *
1063  * There are only two requests that we are going to see in actual
1064  * practise: LM_ST_SHARED and LM_ST_UNLOCKED
1065  */
1066
1067 static void handle_callback(struct gfs2_glock *gl, unsigned int state,
1068                             unsigned long delay, bool remote)
1069 {
1070         int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE;
1071
1072         set_bit(bit, &gl->gl_flags);
1073         if (gl->gl_demote_state == LM_ST_EXCLUSIVE) {
1074                 gl->gl_demote_state = state;
1075                 gl->gl_demote_time = jiffies;
1076         } else if (gl->gl_demote_state != LM_ST_UNLOCKED &&
1077                         gl->gl_demote_state != state) {
1078                 gl->gl_demote_state = LM_ST_UNLOCKED;
1079         }
1080         if (gl->gl_ops->go_callback)
1081                 gl->gl_ops->go_callback(gl, remote);
1082         trace_gfs2_demote_rq(gl, remote);
1083 }
1084
1085 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
1086 {
1087         struct va_format vaf;
1088         va_list args;
1089
1090         va_start(args, fmt);
1091
1092         if (seq) {
1093                 seq_vprintf(seq, fmt, args);
1094         } else {
1095                 vaf.fmt = fmt;
1096                 vaf.va = &args;
1097
1098                 pr_err("%pV", &vaf);
1099         }
1100
1101         va_end(args);
1102 }
1103
1104 /**
1105  * add_to_queue - Add a holder to the wait queue (but look for recursion)
1106  * @gh: the holder structure to add
1107  *
1108  * Eventually we should move the recursive locking trap to a
1109  * debugging option or something like that. This is the fast
1110  * path and needs to have the minimum number of distractions.
1111  * 
1112  */
1113
1114 static inline void add_to_queue(struct gfs2_holder *gh)
1115 __releases(&gl->gl_lockref.lock)
1116 __acquires(&gl->gl_lockref.lock)
1117 {
1118         struct gfs2_glock *gl = gh->gh_gl;
1119         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
1120         struct list_head *insert_pt = NULL;
1121         struct gfs2_holder *gh2;
1122         int try_futile = 0;
1123
1124         GLOCK_BUG_ON(gl, gh->gh_owner_pid == NULL);
1125         if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
1126                 GLOCK_BUG_ON(gl, true);
1127
1128         if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
1129                 if (test_bit(GLF_LOCK, &gl->gl_flags))
1130                         try_futile = !may_grant(gl, gh);
1131                 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
1132                         goto fail;
1133         }
1134
1135         list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
1136                 if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid &&
1137                     (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK)))
1138                         goto trap_recursive;
1139                 if (try_futile &&
1140                     !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
1141 fail:
1142                         gh->gh_error = GLR_TRYFAILED;
1143                         gfs2_holder_wake(gh);
1144                         return;
1145                 }
1146                 if (test_bit(HIF_HOLDER, &gh2->gh_iflags))
1147                         continue;
1148                 if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt))
1149                         insert_pt = &gh2->gh_list;
1150         }
1151         set_bit(GLF_QUEUED, &gl->gl_flags);
1152         trace_gfs2_glock_queue(gh, 1);
1153         gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT);
1154         gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT);
1155         if (likely(insert_pt == NULL)) {
1156                 list_add_tail(&gh->gh_list, &gl->gl_holders);
1157                 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
1158                         goto do_cancel;
1159                 return;
1160         }
1161         list_add_tail(&gh->gh_list, insert_pt);
1162 do_cancel:
1163         gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
1164         if (!(gh->gh_flags & LM_FLAG_PRIORITY)) {
1165                 spin_unlock(&gl->gl_lockref.lock);
1166                 if (sdp->sd_lockstruct.ls_ops->lm_cancel)
1167                         sdp->sd_lockstruct.ls_ops->lm_cancel(gl);
1168                 spin_lock(&gl->gl_lockref.lock);
1169         }
1170         return;
1171
1172 trap_recursive:
1173         fs_err(sdp, "original: %pSR\n", (void *)gh2->gh_ip);
1174         fs_err(sdp, "pid: %d\n", pid_nr(gh2->gh_owner_pid));
1175         fs_err(sdp, "lock type: %d req lock state : %d\n",
1176                gh2->gh_gl->gl_name.ln_type, gh2->gh_state);
1177         fs_err(sdp, "new: %pSR\n", (void *)gh->gh_ip);
1178         fs_err(sdp, "pid: %d\n", pid_nr(gh->gh_owner_pid));
1179         fs_err(sdp, "lock type: %d req lock state : %d\n",
1180                gh->gh_gl->gl_name.ln_type, gh->gh_state);
1181         gfs2_dump_glock(NULL, gl, true);
1182         BUG();
1183 }
1184
1185 /**
1186  * gfs2_glock_nq - enqueue a struct gfs2_holder onto a glock (acquire a glock)
1187  * @gh: the holder structure
1188  *
1189  * if (gh->gh_flags & GL_ASYNC), this never returns an error
1190  *
1191  * Returns: 0, GLR_TRYFAILED, or errno on failure
1192  */
1193
1194 int gfs2_glock_nq(struct gfs2_holder *gh)
1195 {
1196         struct gfs2_glock *gl = gh->gh_gl;
1197         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
1198         int error = 0;
1199
1200         if (unlikely(gfs2_withdrawn(sdp)))
1201                 return -EIO;
1202
1203         if (test_bit(GLF_LRU, &gl->gl_flags))
1204                 gfs2_glock_remove_from_lru(gl);
1205
1206         spin_lock(&gl->gl_lockref.lock);
1207         add_to_queue(gh);
1208         if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) &&
1209                      test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) {
1210                 set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1211                 gl->gl_lockref.count++;
1212                 __gfs2_glock_queue_work(gl, 0);
1213         }
1214         run_queue(gl, 1);
1215         spin_unlock(&gl->gl_lockref.lock);
1216
1217         if (!(gh->gh_flags & GL_ASYNC))
1218                 error = gfs2_glock_wait(gh);
1219
1220         return error;
1221 }
1222
1223 /**
1224  * gfs2_glock_poll - poll to see if an async request has been completed
1225  * @gh: the holder
1226  *
1227  * Returns: 1 if the request is ready to be gfs2_glock_wait()ed on
1228  */
1229
1230 int gfs2_glock_poll(struct gfs2_holder *gh)
1231 {
1232         return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1;
1233 }
1234
1235 /**
1236  * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
1237  * @gh: the glock holder
1238  *
1239  */
1240
1241 void gfs2_glock_dq(struct gfs2_holder *gh)
1242 {
1243         struct gfs2_glock *gl = gh->gh_gl;
1244         const struct gfs2_glock_operations *glops = gl->gl_ops;
1245         unsigned delay = 0;
1246         int fast_path = 0;
1247
1248         spin_lock(&gl->gl_lockref.lock);
1249         if (gh->gh_flags & GL_NOCACHE)
1250                 handle_callback(gl, LM_ST_UNLOCKED, 0, false);
1251
1252         list_del_init(&gh->gh_list);
1253         clear_bit(HIF_HOLDER, &gh->gh_iflags);
1254         if (find_first_holder(gl) == NULL) {
1255                 if (glops->go_unlock) {
1256                         GLOCK_BUG_ON(gl, test_and_set_bit(GLF_LOCK, &gl->gl_flags));
1257                         spin_unlock(&gl->gl_lockref.lock);
1258                         glops->go_unlock(gh);
1259                         spin_lock(&gl->gl_lockref.lock);
1260                         clear_bit(GLF_LOCK, &gl->gl_flags);
1261                 }
1262                 if (list_empty(&gl->gl_holders) &&
1263                     !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
1264                     !test_bit(GLF_DEMOTE, &gl->gl_flags))
1265                         fast_path = 1;
1266         }
1267         if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
1268                 gfs2_glock_add_to_lru(gl);
1269
1270         trace_gfs2_glock_queue(gh, 0);
1271         if (unlikely(!fast_path)) {
1272                 gl->gl_lockref.count++;
1273                 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
1274                     !test_bit(GLF_DEMOTE, &gl->gl_flags) &&
1275                     gl->gl_name.ln_type == LM_TYPE_INODE)
1276                         delay = gl->gl_hold_time;
1277                 __gfs2_glock_queue_work(gl, delay);
1278         }
1279         spin_unlock(&gl->gl_lockref.lock);
1280 }
1281
1282 void gfs2_glock_dq_wait(struct gfs2_holder *gh)
1283 {
1284         struct gfs2_glock *gl = gh->gh_gl;
1285         gfs2_glock_dq(gh);
1286         might_sleep();
1287         wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
1288 }
1289
1290 /**
1291  * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
1292  * @gh: the holder structure
1293  *
1294  */
1295
1296 void gfs2_glock_dq_uninit(struct gfs2_holder *gh)
1297 {
1298         gfs2_glock_dq(gh);
1299         gfs2_holder_uninit(gh);
1300 }
1301
1302 /**
1303  * gfs2_glock_nq_num - acquire a glock based on lock number
1304  * @sdp: the filesystem
1305  * @number: the lock number
1306  * @glops: the glock operations for the type of glock
1307  * @state: the state to acquire the glock in
1308  * @flags: modifier flags for the acquisition
1309  * @gh: the struct gfs2_holder
1310  *
1311  * Returns: errno
1312  */
1313
1314 int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
1315                       const struct gfs2_glock_operations *glops,
1316                       unsigned int state, u16 flags, struct gfs2_holder *gh)
1317 {
1318         struct gfs2_glock *gl;
1319         int error;
1320
1321         error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
1322         if (!error) {
1323                 error = gfs2_glock_nq_init(gl, state, flags, gh);
1324                 gfs2_glock_put(gl);
1325         }
1326
1327         return error;
1328 }
1329
1330 /**
1331  * glock_compare - Compare two struct gfs2_glock structures for sorting
1332  * @arg_a: the first structure
1333  * @arg_b: the second structure
1334  *
1335  */
1336
1337 static int glock_compare(const void *arg_a, const void *arg_b)
1338 {
1339         const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a;
1340         const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b;
1341         const struct lm_lockname *a = &gh_a->gh_gl->gl_name;
1342         const struct lm_lockname *b = &gh_b->gh_gl->gl_name;
1343
1344         if (a->ln_number > b->ln_number)
1345                 return 1;
1346         if (a->ln_number < b->ln_number)
1347                 return -1;
1348         BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type);
1349         return 0;
1350 }
1351
1352 /**
1353  * nq_m_sync - synchonously acquire more than one glock in deadlock free order
1354  * @num_gh: the number of structures
1355  * @ghs: an array of struct gfs2_holder structures
1356  *
1357  * Returns: 0 on success (all glocks acquired),
1358  *          errno on failure (no glocks acquired)
1359  */
1360
1361 static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
1362                      struct gfs2_holder **p)
1363 {
1364         unsigned int x;
1365         int error = 0;
1366
1367         for (x = 0; x < num_gh; x++)
1368                 p[x] = &ghs[x];
1369
1370         sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL);
1371
1372         for (x = 0; x < num_gh; x++) {
1373                 p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1374
1375                 error = gfs2_glock_nq(p[x]);
1376                 if (error) {
1377                         while (x--)
1378                                 gfs2_glock_dq(p[x]);
1379                         break;
1380                 }
1381         }
1382
1383         return error;
1384 }
1385
1386 /**
1387  * gfs2_glock_nq_m - acquire multiple glocks
1388  * @num_gh: the number of structures
1389  * @ghs: an array of struct gfs2_holder structures
1390  *
1391  *
1392  * Returns: 0 on success (all glocks acquired),
1393  *          errno on failure (no glocks acquired)
1394  */
1395
1396 int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1397 {
1398         struct gfs2_holder *tmp[4];
1399         struct gfs2_holder **pph = tmp;
1400         int error = 0;
1401
1402         switch(num_gh) {
1403         case 0:
1404                 return 0;
1405         case 1:
1406                 ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1407                 return gfs2_glock_nq(ghs);
1408         default:
1409                 if (num_gh <= 4)
1410                         break;
1411                 pph = kmalloc_array(num_gh, sizeof(struct gfs2_holder *),
1412                                     GFP_NOFS);
1413                 if (!pph)
1414                         return -ENOMEM;
1415         }
1416
1417         error = nq_m_sync(num_gh, ghs, pph);
1418
1419         if (pph != tmp)
1420                 kfree(pph);
1421
1422         return error;
1423 }
1424
1425 /**
1426  * gfs2_glock_dq_m - release multiple glocks
1427  * @num_gh: the number of structures
1428  * @ghs: an array of struct gfs2_holder structures
1429  *
1430  */
1431
1432 void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1433 {
1434         while (num_gh--)
1435                 gfs2_glock_dq(&ghs[num_gh]);
1436 }
1437
1438 void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
1439 {
1440         unsigned long delay = 0;
1441         unsigned long holdtime;
1442         unsigned long now = jiffies;
1443
1444         gfs2_glock_hold(gl);
1445         holdtime = gl->gl_tchange + gl->gl_hold_time;
1446         if (test_bit(GLF_QUEUED, &gl->gl_flags) &&
1447             gl->gl_name.ln_type == LM_TYPE_INODE) {
1448                 if (time_before(now, holdtime))
1449                         delay = holdtime - now;
1450                 if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
1451                         delay = gl->gl_hold_time;
1452         }
1453
1454         spin_lock(&gl->gl_lockref.lock);
1455         handle_callback(gl, state, delay, true);
1456         __gfs2_glock_queue_work(gl, delay);
1457         spin_unlock(&gl->gl_lockref.lock);
1458 }
1459
1460 /**
1461  * gfs2_should_freeze - Figure out if glock should be frozen
1462  * @gl: The glock in question
1463  *
1464  * Glocks are not frozen if (a) the result of the dlm operation is
1465  * an error, (b) the locking operation was an unlock operation or
1466  * (c) if there is a "noexp" flagged request anywhere in the queue
1467  *
1468  * Returns: 1 if freezing should occur, 0 otherwise
1469  */
1470
1471 static int gfs2_should_freeze(const struct gfs2_glock *gl)
1472 {
1473         const struct gfs2_holder *gh;
1474
1475         if (gl->gl_reply & ~LM_OUT_ST_MASK)
1476                 return 0;
1477         if (gl->gl_target == LM_ST_UNLOCKED)
1478                 return 0;
1479
1480         list_for_each_entry(gh, &gl->gl_holders, gh_list) {
1481                 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
1482                         continue;
1483                 if (LM_FLAG_NOEXP & gh->gh_flags)
1484                         return 0;
1485         }
1486
1487         return 1;
1488 }
1489
1490 /**
1491  * gfs2_glock_complete - Callback used by locking
1492  * @gl: Pointer to the glock
1493  * @ret: The return value from the dlm
1494  *
1495  * The gl_reply field is under the gl_lockref.lock lock so that it is ok
1496  * to use a bitfield shared with other glock state fields.
1497  */
1498
1499 void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1500 {
1501         struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
1502
1503         spin_lock(&gl->gl_lockref.lock);
1504         gl->gl_reply = ret;
1505
1506         if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) {
1507                 if (gfs2_should_freeze(gl)) {
1508                         set_bit(GLF_FROZEN, &gl->gl_flags);
1509                         spin_unlock(&gl->gl_lockref.lock);
1510                         return;
1511                 }
1512         }
1513
1514         gl->gl_lockref.count++;
1515         set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1516         __gfs2_glock_queue_work(gl, 0);
1517         spin_unlock(&gl->gl_lockref.lock);
1518 }
1519
1520 static int glock_cmp(void *priv, struct list_head *a, struct list_head *b)
1521 {
1522         struct gfs2_glock *gla, *glb;
1523
1524         gla = list_entry(a, struct gfs2_glock, gl_lru);
1525         glb = list_entry(b, struct gfs2_glock, gl_lru);
1526
1527         if (gla->gl_name.ln_number > glb->gl_name.ln_number)
1528                 return 1;
1529         if (gla->gl_name.ln_number < glb->gl_name.ln_number)
1530                 return -1;
1531
1532         return 0;
1533 }
1534
1535 /**
1536  * gfs2_dispose_glock_lru - Demote a list of glocks
1537  * @list: The list to dispose of
1538  *
1539  * Disposing of glocks may involve disk accesses, so that here we sort
1540  * the glocks by number (i.e. disk location of the inodes) so that if
1541  * there are any such accesses, they'll be sent in order (mostly).
1542  *
1543  * Must be called under the lru_lock, but may drop and retake this
1544  * lock. While the lru_lock is dropped, entries may vanish from the
1545  * list, but no new entries will appear on the list (since it is
1546  * private)
1547  */
1548
1549 static void gfs2_dispose_glock_lru(struct list_head *list)
1550 __releases(&lru_lock)
1551 __acquires(&lru_lock)
1552 {
1553         struct gfs2_glock *gl;
1554
1555         list_sort(NULL, list, glock_cmp);
1556
1557         while(!list_empty(list)) {
1558                 gl = list_entry(list->next, struct gfs2_glock, gl_lru);
1559                 list_del_init(&gl->gl_lru);
1560                 if (!spin_trylock(&gl->gl_lockref.lock)) {
1561 add_back_to_lru:
1562                         list_add(&gl->gl_lru, &lru_list);
1563                         set_bit(GLF_LRU, &gl->gl_flags);
1564                         atomic_inc(&lru_count);
1565                         continue;
1566                 }
1567                 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
1568                         spin_unlock(&gl->gl_lockref.lock);
1569                         goto add_back_to_lru;
1570                 }
1571                 gl->gl_lockref.count++;
1572                 if (demote_ok(gl))
1573                         handle_callback(gl, LM_ST_UNLOCKED, 0, false);
1574                 WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags));
1575                 __gfs2_glock_queue_work(gl, 0);
1576                 spin_unlock(&gl->gl_lockref.lock);
1577                 cond_resched_lock(&lru_lock);
1578         }
1579 }
1580
1581 /**
1582  * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote
1583  * @nr: The number of entries to scan
1584  *
1585  * This function selects the entries on the LRU which are able to
1586  * be demoted, and then kicks off the process by calling
1587  * gfs2_dispose_glock_lru() above.
1588  */
1589
1590 static long gfs2_scan_glock_lru(int nr)
1591 {
1592         struct gfs2_glock *gl;
1593         LIST_HEAD(skipped);
1594         LIST_HEAD(dispose);
1595         long freed = 0;
1596
1597         spin_lock(&lru_lock);
1598         while ((nr-- >= 0) && !list_empty(&lru_list)) {
1599                 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
1600
1601                 /* Test for being demotable */
1602                 if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
1603                         list_move(&gl->gl_lru, &dispose);
1604                         atomic_dec(&lru_count);
1605                         clear_bit(GLF_LRU, &gl->gl_flags);
1606                         freed++;
1607                         continue;
1608                 }
1609
1610                 list_move(&gl->gl_lru, &skipped);
1611         }
1612         list_splice(&skipped, &lru_list);
1613         if (!list_empty(&dispose))
1614                 gfs2_dispose_glock_lru(&dispose);
1615         spin_unlock(&lru_lock);
1616
1617         return freed;
1618 }
1619
1620 static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink,
1621                                             struct shrink_control *sc)
1622 {
1623         if (!(sc->gfp_mask & __GFP_FS))
1624                 return SHRINK_STOP;
1625         return gfs2_scan_glock_lru(sc->nr_to_scan);
1626 }
1627
1628 static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink,
1629                                              struct shrink_control *sc)
1630 {
1631         return vfs_pressure_ratio(atomic_read(&lru_count));
1632 }
1633
1634 static struct shrinker glock_shrinker = {
1635         .seeks = DEFAULT_SEEKS,
1636         .count_objects = gfs2_glock_shrink_count,
1637         .scan_objects = gfs2_glock_shrink_scan,
1638 };
1639
1640 /**
1641  * examine_bucket - Call a function for glock in a hash bucket
1642  * @examiner: the function
1643  * @sdp: the filesystem
1644  * @bucket: the bucket
1645  *
1646  * Note that the function can be called multiple times on the same
1647  * object.  So the user must ensure that the function can cope with
1648  * that.
1649  */
1650
1651 static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
1652 {
1653         struct gfs2_glock *gl;
1654         struct rhashtable_iter iter;
1655
1656         rhashtable_walk_enter(&gl_hash_table, &iter);
1657
1658         do {
1659                 rhashtable_walk_start(&iter);
1660
1661                 while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl))
1662                         if (gl->gl_name.ln_sbd == sdp &&
1663                             lockref_get_not_dead(&gl->gl_lockref))
1664                                 examiner(gl);
1665
1666                 rhashtable_walk_stop(&iter);
1667         } while (cond_resched(), gl == ERR_PTR(-EAGAIN));
1668
1669         rhashtable_walk_exit(&iter);
1670 }
1671
1672 /**
1673  * thaw_glock - thaw out a glock which has an unprocessed reply waiting
1674  * @gl: The glock to thaw
1675  *
1676  */
1677
1678 static void thaw_glock(struct gfs2_glock *gl)
1679 {
1680         if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) {
1681                 gfs2_glock_put(gl);
1682                 return;
1683         }
1684         set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1685         gfs2_glock_queue_work(gl, 0);
1686 }
1687
1688 /**
1689  * clear_glock - look at a glock and see if we can free it from glock cache
1690  * @gl: the glock to look at
1691  *
1692  */
1693
1694 static void clear_glock(struct gfs2_glock *gl)
1695 {
1696         gfs2_glock_remove_from_lru(gl);
1697
1698         spin_lock(&gl->gl_lockref.lock);
1699         if (gl->gl_state != LM_ST_UNLOCKED)
1700                 handle_callback(gl, LM_ST_UNLOCKED, 0, false);
1701         __gfs2_glock_queue_work(gl, 0);
1702         spin_unlock(&gl->gl_lockref.lock);
1703 }
1704
1705 /**
1706  * gfs2_glock_thaw - Thaw any frozen glocks
1707  * @sdp: The super block
1708  *
1709  */
1710
1711 void gfs2_glock_thaw(struct gfs2_sbd *sdp)
1712 {
1713         glock_hash_walk(thaw_glock, sdp);
1714 }
1715
1716 static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
1717 {
1718         spin_lock(&gl->gl_lockref.lock);
1719         gfs2_dump_glock(seq, gl, fsid);
1720         spin_unlock(&gl->gl_lockref.lock);
1721 }
1722
1723 static void dump_glock_func(struct gfs2_glock *gl)
1724 {
1725         dump_glock(NULL, gl, true);
1726 }
1727
1728 /**
1729  * gfs2_gl_hash_clear - Empty out the glock hash table
1730  * @sdp: the filesystem
1731  * @wait: wait until it's all gone
1732  *
1733  * Called when unmounting the filesystem.
1734  */
1735
1736 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
1737 {
1738         set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
1739         flush_workqueue(glock_workqueue);
1740         glock_hash_walk(clear_glock, sdp);
1741         flush_workqueue(glock_workqueue);
1742         wait_event_timeout(sdp->sd_glock_wait,
1743                            atomic_read(&sdp->sd_glock_disposal) == 0,
1744                            HZ * 600);
1745         glock_hash_walk(dump_glock_func, sdp);
1746 }
1747
1748 void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
1749 {
1750         struct gfs2_glock *gl = ip->i_gl;
1751         int ret;
1752
1753         ret = gfs2_truncatei_resume(ip);
1754         gfs2_assert_withdraw(gl->gl_name.ln_sbd, ret == 0);
1755
1756         spin_lock(&gl->gl_lockref.lock);
1757         clear_bit(GLF_LOCK, &gl->gl_flags);
1758         run_queue(gl, 1);
1759         spin_unlock(&gl->gl_lockref.lock);
1760 }
1761
1762 static const char *state2str(unsigned state)
1763 {
1764         switch(state) {
1765         case LM_ST_UNLOCKED:
1766                 return "UN";
1767         case LM_ST_SHARED:
1768                 return "SH";
1769         case LM_ST_DEFERRED:
1770                 return "DF";
1771         case LM_ST_EXCLUSIVE:
1772                 return "EX";
1773         }
1774         return "??";
1775 }
1776
1777 static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
1778 {
1779         char *p = buf;
1780         if (flags & LM_FLAG_TRY)
1781                 *p++ = 't';
1782         if (flags & LM_FLAG_TRY_1CB)
1783                 *p++ = 'T';
1784         if (flags & LM_FLAG_NOEXP)
1785                 *p++ = 'e';
1786         if (flags & LM_FLAG_ANY)
1787                 *p++ = 'A';
1788         if (flags & LM_FLAG_PRIORITY)
1789                 *p++ = 'p';
1790         if (flags & GL_ASYNC)
1791                 *p++ = 'a';
1792         if (flags & GL_EXACT)
1793                 *p++ = 'E';
1794         if (flags & GL_NOCACHE)
1795                 *p++ = 'c';
1796         if (test_bit(HIF_HOLDER, &iflags))
1797                 *p++ = 'H';
1798         if (test_bit(HIF_WAIT, &iflags))
1799                 *p++ = 'W';
1800         if (test_bit(HIF_FIRST, &iflags))
1801                 *p++ = 'F';
1802         *p = 0;
1803         return buf;
1804 }
1805
1806 /**
1807  * dump_holder - print information about a glock holder
1808  * @seq: the seq_file struct
1809  * @gh: the glock holder
1810  * @fs_id_buf: pointer to file system id (if requested)
1811  *
1812  */
1813
1814 static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh,
1815                         const char *fs_id_buf)
1816 {
1817         struct task_struct *gh_owner = NULL;
1818         char flags_buf[32];
1819
1820         rcu_read_lock();
1821         if (gh->gh_owner_pid)
1822                 gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
1823         gfs2_print_dbg(seq, "%s H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
1824                        fs_id_buf, state2str(gh->gh_state),
1825                        hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
1826                        gh->gh_error,
1827                        gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
1828                        gh_owner ? gh_owner->comm : "(ended)",
1829                        (void *)gh->gh_ip);
1830         rcu_read_unlock();
1831 }
1832
1833 static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
1834 {
1835         const unsigned long *gflags = &gl->gl_flags;
1836         char *p = buf;
1837
1838         if (test_bit(GLF_LOCK, gflags))
1839                 *p++ = 'l';
1840         if (test_bit(GLF_DEMOTE, gflags))
1841                 *p++ = 'D';
1842         if (test_bit(GLF_PENDING_DEMOTE, gflags))
1843                 *p++ = 'd';
1844         if (test_bit(GLF_DEMOTE_IN_PROGRESS, gflags))
1845                 *p++ = 'p';
1846         if (test_bit(GLF_DIRTY, gflags))
1847                 *p++ = 'y';
1848         if (test_bit(GLF_LFLUSH, gflags))
1849                 *p++ = 'f';
1850         if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags))
1851                 *p++ = 'i';
1852         if (test_bit(GLF_REPLY_PENDING, gflags))
1853                 *p++ = 'r';
1854         if (test_bit(GLF_INITIAL, gflags))
1855                 *p++ = 'I';
1856         if (test_bit(GLF_FROZEN, gflags))
1857                 *p++ = 'F';
1858         if (test_bit(GLF_QUEUED, gflags))
1859                 *p++ = 'q';
1860         if (test_bit(GLF_LRU, gflags))
1861                 *p++ = 'L';
1862         if (gl->gl_object)
1863                 *p++ = 'o';
1864         if (test_bit(GLF_BLOCKING, gflags))
1865                 *p++ = 'b';
1866         *p = 0;
1867         return buf;
1868 }
1869
1870 /**
1871  * gfs2_dump_glock - print information about a glock
1872  * @seq: The seq_file struct
1873  * @gl: the glock
1874  * @fsid: If true, also dump the file system id
1875  *
1876  * The file format is as follows:
1877  * One line per object, capital letters are used to indicate objects
1878  * G = glock, I = Inode, R = rgrp, H = holder. Glocks are not indented,
1879  * other objects are indented by a single space and follow the glock to
1880  * which they are related. Fields are indicated by lower case letters
1881  * followed by a colon and the field value, except for strings which are in
1882  * [] so that its possible to see if they are composed of spaces for
1883  * example. The field's are n = number (id of the object), f = flags,
1884  * t = type, s = state, r = refcount, e = error, p = pid.
1885  *
1886  */
1887
1888 void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
1889 {
1890         const struct gfs2_glock_operations *glops = gl->gl_ops;
1891         unsigned long long dtime;
1892         const struct gfs2_holder *gh;
1893         char gflags_buf[32];
1894         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
1895         char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
1896
1897         memset(fs_id_buf, 0, sizeof(fs_id_buf));
1898         if (fsid && sdp) /* safety precaution */
1899                 sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
1900         dtime = jiffies - gl->gl_demote_time;
1901         dtime *= 1000000/HZ; /* demote time in uSec */
1902         if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
1903                 dtime = 0;
1904         gfs2_print_dbg(seq, "%sG:  s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d "
1905                        "v:%d r:%d m:%ld\n", fs_id_buf, state2str(gl->gl_state),
1906                   gl->gl_name.ln_type,
1907                   (unsigned long long)gl->gl_name.ln_number,
1908                   gflags2str(gflags_buf, gl),
1909                   state2str(gl->gl_target),
1910                   state2str(gl->gl_demote_state), dtime,
1911                   atomic_read(&gl->gl_ail_count),
1912                   atomic_read(&gl->gl_revokes),
1913                   (int)gl->gl_lockref.count, gl->gl_hold_time);
1914
1915         list_for_each_entry(gh, &gl->gl_holders, gh_list)
1916                 dump_holder(seq, gh, fs_id_buf);
1917
1918         if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump)
1919                 glops->go_dump(seq, gl, fs_id_buf);
1920 }
1921
1922 static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr)
1923 {
1924         struct gfs2_glock *gl = iter_ptr;
1925
1926         seq_printf(seq, "G: n:%u/%llx rtt:%llu/%llu rttb:%llu/%llu irt:%llu/%llu dcnt: %llu qcnt: %llu\n",
1927                    gl->gl_name.ln_type,
1928                    (unsigned long long)gl->gl_name.ln_number,
1929                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTT],
1930                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR],
1931                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTB],
1932                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB],
1933                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRT],
1934                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR],
1935                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT],
1936                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]);
1937         return 0;
1938 }
1939
1940 static const char *gfs2_gltype[] = {
1941         "type",
1942         "reserved",
1943         "nondisk",
1944         "inode",
1945         "rgrp",
1946         "meta",
1947         "iopen",
1948         "flock",
1949         "plock",
1950         "quota",
1951         "journal",
1952 };
1953
1954 static const char *gfs2_stype[] = {
1955         [GFS2_LKS_SRTT]         = "srtt",
1956         [GFS2_LKS_SRTTVAR]      = "srttvar",
1957         [GFS2_LKS_SRTTB]        = "srttb",
1958         [GFS2_LKS_SRTTVARB]     = "srttvarb",
1959         [GFS2_LKS_SIRT]         = "sirt",
1960         [GFS2_LKS_SIRTVAR]      = "sirtvar",
1961         [GFS2_LKS_DCOUNT]       = "dlm",
1962         [GFS2_LKS_QCOUNT]       = "queue",
1963 };
1964
1965 #define GFS2_NR_SBSTATS (ARRAY_SIZE(gfs2_gltype) * ARRAY_SIZE(gfs2_stype))
1966
1967 static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr)
1968 {
1969         struct gfs2_sbd *sdp = seq->private;
1970         loff_t pos = *(loff_t *)iter_ptr;
1971         unsigned index = pos >> 3;
1972         unsigned subindex = pos & 0x07;
1973         int i;
1974
1975         if (index == 0 && subindex != 0)
1976                 return 0;
1977
1978         seq_printf(seq, "%-10s %8s:", gfs2_gltype[index],
1979                    (index == 0) ? "cpu": gfs2_stype[subindex]);
1980
1981         for_each_possible_cpu(i) {
1982                 const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i);
1983
1984                 if (index == 0)
1985                         seq_printf(seq, " %15u", i);
1986                 else
1987                         seq_printf(seq, " %15llu", (unsigned long long)lkstats->
1988                                    lkstats[index - 1].stats[subindex]);
1989         }
1990         seq_putc(seq, '\n');
1991         return 0;
1992 }
1993
1994 int __init gfs2_glock_init(void)
1995 {
1996         int i, ret;
1997
1998         ret = rhashtable_init(&gl_hash_table, &ht_parms);
1999         if (ret < 0)
2000                 return ret;
2001
2002         glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
2003                                           WQ_HIGHPRI | WQ_FREEZABLE, 0);
2004         if (!glock_workqueue) {
2005                 rhashtable_destroy(&gl_hash_table);
2006                 return -ENOMEM;
2007         }
2008         gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
2009                                                 WQ_MEM_RECLAIM | WQ_FREEZABLE,
2010                                                 0);
2011         if (!gfs2_delete_workqueue) {
2012                 destroy_workqueue(glock_workqueue);
2013                 rhashtable_destroy(&gl_hash_table);
2014                 return -ENOMEM;
2015         }
2016
2017         ret = register_shrinker(&glock_shrinker);
2018         if (ret) {
2019                 destroy_workqueue(gfs2_delete_workqueue);
2020                 destroy_workqueue(glock_workqueue);
2021                 rhashtable_destroy(&gl_hash_table);
2022                 return ret;
2023         }
2024
2025         for (i = 0; i < GLOCK_WAIT_TABLE_SIZE; i++)
2026                 init_waitqueue_head(glock_wait_table + i);
2027
2028         return 0;
2029 }
2030
2031 void gfs2_glock_exit(void)
2032 {
2033         unregister_shrinker(&glock_shrinker);
2034         rhashtable_destroy(&gl_hash_table);
2035         destroy_workqueue(glock_workqueue);
2036         destroy_workqueue(gfs2_delete_workqueue);
2037 }
2038
2039 static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n)
2040 {
2041         struct gfs2_glock *gl = gi->gl;
2042
2043         if (gl) {
2044                 if (n == 0)
2045                         return;
2046                 if (!lockref_put_not_zero(&gl->gl_lockref))
2047                         gfs2_glock_queue_put(gl);
2048         }
2049         for (;;) {
2050                 gl = rhashtable_walk_next(&gi->hti);
2051                 if (IS_ERR_OR_NULL(gl)) {
2052                         if (gl == ERR_PTR(-EAGAIN)) {
2053                                 n = 1;
2054                                 continue;
2055                         }
2056                         gl = NULL;
2057                         break;
2058                 }
2059                 if (gl->gl_name.ln_sbd != gi->sdp)
2060                         continue;
2061                 if (n <= 1) {
2062                         if (!lockref_get_not_dead(&gl->gl_lockref))
2063                                 continue;
2064                         break;
2065                 } else {
2066                         if (__lockref_is_dead(&gl->gl_lockref))
2067                                 continue;
2068                         n--;
2069                 }
2070         }
2071         gi->gl = gl;
2072 }
2073
2074 static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
2075         __acquires(RCU)
2076 {
2077         struct gfs2_glock_iter *gi = seq->private;
2078         loff_t n;
2079
2080         /*
2081          * We can either stay where we are, skip to the next hash table
2082          * entry, or start from the beginning.
2083          */
2084         if (*pos < gi->last_pos) {
2085                 rhashtable_walk_exit(&gi->hti);
2086                 rhashtable_walk_enter(&gl_hash_table, &gi->hti);
2087                 n = *pos + 1;
2088         } else {
2089                 n = *pos - gi->last_pos;
2090         }
2091
2092         rhashtable_walk_start(&gi->hti);
2093
2094         gfs2_glock_iter_next(gi, n);
2095         gi->last_pos = *pos;
2096         return gi->gl;
2097 }
2098
2099 static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
2100                                  loff_t *pos)
2101 {
2102         struct gfs2_glock_iter *gi = seq->private;
2103
2104         (*pos)++;
2105         gi->last_pos = *pos;
2106         gfs2_glock_iter_next(gi, 1);
2107         return gi->gl;
2108 }
2109
2110 static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
2111         __releases(RCU)
2112 {
2113         struct gfs2_glock_iter *gi = seq->private;
2114
2115         rhashtable_walk_stop(&gi->hti);
2116 }
2117
2118 static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
2119 {
2120         dump_glock(seq, iter_ptr, false);
2121         return 0;
2122 }
2123
2124 static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos)
2125 {
2126         preempt_disable();
2127         if (*pos >= GFS2_NR_SBSTATS)
2128                 return NULL;
2129         return pos;
2130 }
2131
2132 static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr,
2133                                    loff_t *pos)
2134 {
2135         (*pos)++;
2136         if (*pos >= GFS2_NR_SBSTATS)
2137                 return NULL;
2138         return pos;
2139 }
2140
2141 static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr)
2142 {
2143         preempt_enable();
2144 }
2145
2146 static const struct seq_operations gfs2_glock_seq_ops = {
2147         .start = gfs2_glock_seq_start,
2148         .next  = gfs2_glock_seq_next,
2149         .stop  = gfs2_glock_seq_stop,
2150         .show  = gfs2_glock_seq_show,
2151 };
2152
2153 static const struct seq_operations gfs2_glstats_seq_ops = {
2154         .start = gfs2_glock_seq_start,
2155         .next  = gfs2_glock_seq_next,
2156         .stop  = gfs2_glock_seq_stop,
2157         .show  = gfs2_glstats_seq_show,
2158 };
2159
2160 static const struct seq_operations gfs2_sbstats_seq_ops = {
2161         .start = gfs2_sbstats_seq_start,
2162         .next  = gfs2_sbstats_seq_next,
2163         .stop  = gfs2_sbstats_seq_stop,
2164         .show  = gfs2_sbstats_seq_show,
2165 };
2166
2167 #define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL)
2168
2169 static int __gfs2_glocks_open(struct inode *inode, struct file *file,
2170                               const struct seq_operations *ops)
2171 {
2172         int ret = seq_open_private(file, ops, sizeof(struct gfs2_glock_iter));
2173         if (ret == 0) {
2174                 struct seq_file *seq = file->private_data;
2175                 struct gfs2_glock_iter *gi = seq->private;
2176
2177                 gi->sdp = inode->i_private;
2178                 seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
2179                 if (seq->buf)
2180                         seq->size = GFS2_SEQ_GOODSIZE;
2181                 /*
2182                  * Initially, we are "before" the first hash table entry; the
2183                  * first call to rhashtable_walk_next gets us the first entry.
2184                  */
2185                 gi->last_pos = -1;
2186                 gi->gl = NULL;
2187                 rhashtable_walk_enter(&gl_hash_table, &gi->hti);
2188         }
2189         return ret;
2190 }
2191
2192 static int gfs2_glocks_open(struct inode *inode, struct file *file)
2193 {
2194         return __gfs2_glocks_open(inode, file, &gfs2_glock_seq_ops);
2195 }
2196
2197 static int gfs2_glocks_release(struct inode *inode, struct file *file)
2198 {
2199         struct seq_file *seq = file->private_data;
2200         struct gfs2_glock_iter *gi = seq->private;
2201
2202         if (gi->gl)
2203                 gfs2_glock_put(gi->gl);
2204         rhashtable_walk_exit(&gi->hti);
2205         return seq_release_private(inode, file);
2206 }
2207
2208 static int gfs2_glstats_open(struct inode *inode, struct file *file)
2209 {
2210         return __gfs2_glocks_open(inode, file, &gfs2_glstats_seq_ops);
2211 }
2212
2213 static int gfs2_sbstats_open(struct inode *inode, struct file *file)
2214 {
2215         int ret = seq_open(file, &gfs2_sbstats_seq_ops);
2216         if (ret == 0) {
2217                 struct seq_file *seq = file->private_data;
2218                 seq->private = inode->i_private;  /* sdp */
2219         }
2220         return ret;
2221 }
2222
2223 static const struct file_operations gfs2_glocks_fops = {
2224         .owner   = THIS_MODULE,
2225         .open    = gfs2_glocks_open,
2226         .read    = seq_read,
2227         .llseek  = seq_lseek,
2228         .release = gfs2_glocks_release,
2229 };
2230
2231 static const struct file_operations gfs2_glstats_fops = {
2232         .owner   = THIS_MODULE,
2233         .open    = gfs2_glstats_open,
2234         .read    = seq_read,
2235         .llseek  = seq_lseek,
2236         .release = gfs2_glocks_release,
2237 };
2238
2239 static const struct file_operations gfs2_sbstats_fops = {
2240         .owner   = THIS_MODULE,
2241         .open    = gfs2_sbstats_open,
2242         .read    = seq_read,
2243         .llseek  = seq_lseek,
2244         .release = seq_release,
2245 };
2246
2247 void gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
2248 {
2249         sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root);
2250
2251         debugfs_create_file("glocks", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp,
2252                             &gfs2_glocks_fops);
2253
2254         debugfs_create_file("glstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp,
2255                             &gfs2_glstats_fops);
2256
2257         debugfs_create_file("sbstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp,
2258                             &gfs2_sbstats_fops);
2259 }
2260
2261 void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
2262 {
2263         debugfs_remove_recursive(sdp->debugfs_dir);
2264         sdp->debugfs_dir = NULL;
2265 }
2266
2267 void gfs2_register_debugfs(void)
2268 {
2269         gfs2_root = debugfs_create_dir("gfs2", NULL);
2270 }
2271
2272 void gfs2_unregister_debugfs(void)
2273 {
2274         debugfs_remove(gfs2_root);
2275         gfs2_root = NULL;
2276 }