Merge branch 'opw-next' into staging-next
[linux-2.6-microblaze.git] / drivers / staging / lustre / lustre / ldlm / ldlm_lock.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2010, 2012, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/ldlm/ldlm_lock.c
37  *
38  * Author: Peter Braam <braam@clusterfs.com>
39  * Author: Phil Schwan <phil@clusterfs.com>
40  */
41
42 #define DEBUG_SUBSYSTEM S_LDLM
43
44 # include <linux/libcfs/libcfs.h>
45 # include <linux/lustre_intent.h>
46
47 #include <obd_class.h>
48 #include "ldlm_internal.h"
49
50 /* lock types */
51 char *ldlm_lockname[] = {
52         [0]             = "--",
53         [LCK_EX]        = "EX",
54         [LCK_PW]        = "PW",
55         [LCK_PR]        = "PR",
56         [LCK_CW]        = "CW",
57         [LCK_CR]        = "CR",
58         [LCK_NL]        = "NL",
59         [LCK_GROUP]     = "GROUP",
60         [LCK_COS]       = "COS",
61 };
62 EXPORT_SYMBOL(ldlm_lockname);
63
64 char *ldlm_typename[] = {
65         [LDLM_PLAIN]    = "PLN",
66         [LDLM_EXTENT]   = "EXT",
67         [LDLM_FLOCK]    = "FLK",
68         [LDLM_IBITS]    = "IBT",
69 };
70 EXPORT_SYMBOL(ldlm_typename);
71
72 static ldlm_policy_wire_to_local_t ldlm_policy_wire18_to_local[] = {
73         [LDLM_PLAIN - LDLM_MIN_TYPE]    = ldlm_plain_policy_wire_to_local,
74         [LDLM_EXTENT - LDLM_MIN_TYPE]   = ldlm_extent_policy_wire_to_local,
75         [LDLM_FLOCK - LDLM_MIN_TYPE]    = ldlm_flock_policy_wire18_to_local,
76         [LDLM_IBITS - LDLM_MIN_TYPE]    = ldlm_ibits_policy_wire_to_local,
77 };
78
79 static ldlm_policy_wire_to_local_t ldlm_policy_wire21_to_local[] = {
80         [LDLM_PLAIN - LDLM_MIN_TYPE]    = ldlm_plain_policy_wire_to_local,
81         [LDLM_EXTENT - LDLM_MIN_TYPE]   = ldlm_extent_policy_wire_to_local,
82         [LDLM_FLOCK - LDLM_MIN_TYPE]    = ldlm_flock_policy_wire21_to_local,
83         [LDLM_IBITS - LDLM_MIN_TYPE]    = ldlm_ibits_policy_wire_to_local,
84 };
85
86 static ldlm_policy_local_to_wire_t ldlm_policy_local_to_wire[] = {
87         [LDLM_PLAIN - LDLM_MIN_TYPE]    = ldlm_plain_policy_local_to_wire,
88         [LDLM_EXTENT - LDLM_MIN_TYPE]   = ldlm_extent_policy_local_to_wire,
89         [LDLM_FLOCK - LDLM_MIN_TYPE]    = ldlm_flock_policy_local_to_wire,
90         [LDLM_IBITS - LDLM_MIN_TYPE]    = ldlm_ibits_policy_local_to_wire,
91 };
92
93 /**
94  * Converts lock policy from local format to on the wire lock_desc format
95  */
96 void ldlm_convert_policy_to_wire(ldlm_type_t type,
97                                  const ldlm_policy_data_t *lpolicy,
98                                  ldlm_wire_policy_data_t *wpolicy)
99 {
100         ldlm_policy_local_to_wire_t convert;
101
102         convert = ldlm_policy_local_to_wire[type - LDLM_MIN_TYPE];
103
104         convert(lpolicy, wpolicy);
105 }
106
107 /**
108  * Converts lock policy from on the wire lock_desc format to local format
109  */
110 void ldlm_convert_policy_to_local(struct obd_export *exp, ldlm_type_t type,
111                                   const ldlm_wire_policy_data_t *wpolicy,
112                                   ldlm_policy_data_t *lpolicy)
113 {
114         ldlm_policy_wire_to_local_t convert;
115         int new_client;
116
117         /** some badness for 2.0.0 clients, but 2.0.0 isn't supported */
118         new_client = (exp_connect_flags(exp) & OBD_CONNECT_FULL20) != 0;
119         if (new_client)
120                 convert = ldlm_policy_wire21_to_local[type - LDLM_MIN_TYPE];
121         else
122                 convert = ldlm_policy_wire18_to_local[type - LDLM_MIN_TYPE];
123
124         convert(wpolicy, lpolicy);
125 }
126
127 char *ldlm_it2str(int it)
128 {
129         switch (it) {
130         case IT_OPEN:
131                 return "open";
132         case IT_CREAT:
133                 return "creat";
134         case (IT_OPEN | IT_CREAT):
135                 return "open|creat";
136         case IT_READDIR:
137                 return "readdir";
138         case IT_GETATTR:
139                 return "getattr";
140         case IT_LOOKUP:
141                 return "lookup";
142         case IT_UNLINK:
143                 return "unlink";
144         case IT_GETXATTR:
145                 return "getxattr";
146         case IT_LAYOUT:
147                 return "layout";
148         default:
149                 CERROR("Unknown intent %d\n", it);
150                 return "UNKNOWN";
151         }
152 }
153 EXPORT_SYMBOL(ldlm_it2str);
154
155 extern struct kmem_cache *ldlm_lock_slab;
156
157
158 void ldlm_register_intent(struct ldlm_namespace *ns, ldlm_res_policy arg)
159 {
160         ns->ns_policy = arg;
161 }
162 EXPORT_SYMBOL(ldlm_register_intent);
163
164 /*
165  * REFCOUNTED LOCK OBJECTS
166  */
167
168
169 /**
170  * Get a reference on a lock.
171  *
172  * Lock refcounts, during creation:
173  *   - one special one for allocation, dec'd only once in destroy
174  *   - one for being a lock that's in-use
175  *   - one for the addref associated with a new lock
176  */
177 struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock)
178 {
179         atomic_inc(&lock->l_refc);
180         return lock;
181 }
182 EXPORT_SYMBOL(ldlm_lock_get);
183
184 /**
185  * Release lock reference.
186  *
187  * Also frees the lock if it was last reference.
188  */
189 void ldlm_lock_put(struct ldlm_lock *lock)
190 {
191         LASSERT(lock->l_resource != LP_POISON);
192         LASSERT(atomic_read(&lock->l_refc) > 0);
193         if (atomic_dec_and_test(&lock->l_refc)) {
194                 struct ldlm_resource *res;
195
196                 LDLM_DEBUG(lock,
197                            "final lock_put on destroyed lock, freeing it.");
198
199                 res = lock->l_resource;
200                 LASSERT(lock->l_flags & LDLM_FL_DESTROYED);
201                 LASSERT(list_empty(&lock->l_res_link));
202                 LASSERT(list_empty(&lock->l_pending_chain));
203
204                 lprocfs_counter_decr(ldlm_res_to_ns(res)->ns_stats,
205                                      LDLM_NSS_LOCKS);
206                 lu_ref_del(&res->lr_reference, "lock", lock);
207                 ldlm_resource_putref(res);
208                 lock->l_resource = NULL;
209                 if (lock->l_export) {
210                         class_export_lock_put(lock->l_export, lock);
211                         lock->l_export = NULL;
212                 }
213
214                 if (lock->l_lvb_data != NULL)
215                         OBD_FREE(lock->l_lvb_data, lock->l_lvb_len);
216
217                 ldlm_interval_free(ldlm_interval_detach(lock));
218                 lu_ref_fini(&lock->l_reference);
219                 OBD_FREE_RCU(lock, sizeof(*lock), &lock->l_handle);
220         }
221 }
222 EXPORT_SYMBOL(ldlm_lock_put);
223
224 /**
225  * Removes LDLM lock \a lock from LRU. Assumes LRU is already locked.
226  */
227 int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock)
228 {
229         int rc = 0;
230         if (!list_empty(&lock->l_lru)) {
231                 struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
232
233                 LASSERT(lock->l_resource->lr_type != LDLM_FLOCK);
234                 list_del_init(&lock->l_lru);
235                 if (lock->l_flags & LDLM_FL_SKIPPED)
236                         lock->l_flags &= ~LDLM_FL_SKIPPED;
237                 LASSERT(ns->ns_nr_unused > 0);
238                 ns->ns_nr_unused--;
239                 rc = 1;
240         }
241         return rc;
242 }
243
244 /**
245  * Removes LDLM lock \a lock from LRU. Obtains the LRU lock first.
246  */
247 int ldlm_lock_remove_from_lru(struct ldlm_lock *lock)
248 {
249         struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
250         int rc;
251
252         if (lock->l_flags & LDLM_FL_NS_SRV) {
253                 LASSERT(list_empty(&lock->l_lru));
254                 return 0;
255         }
256
257         spin_lock(&ns->ns_lock);
258         rc = ldlm_lock_remove_from_lru_nolock(lock);
259         spin_unlock(&ns->ns_lock);
260         return rc;
261 }
262
263 /**
264  * Adds LDLM lock \a lock to namespace LRU. Assumes LRU is already locked.
265  */
266 void ldlm_lock_add_to_lru_nolock(struct ldlm_lock *lock)
267 {
268         struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
269
270         lock->l_last_used = cfs_time_current();
271         LASSERT(list_empty(&lock->l_lru));
272         LASSERT(lock->l_resource->lr_type != LDLM_FLOCK);
273         list_add_tail(&lock->l_lru, &ns->ns_unused_list);
274         LASSERT(ns->ns_nr_unused >= 0);
275         ns->ns_nr_unused++;
276 }
277
278 /**
279  * Adds LDLM lock \a lock to namespace LRU. Obtains necessary LRU locks
280  * first.
281  */
282 void ldlm_lock_add_to_lru(struct ldlm_lock *lock)
283 {
284         struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
285
286         spin_lock(&ns->ns_lock);
287         ldlm_lock_add_to_lru_nolock(lock);
288         spin_unlock(&ns->ns_lock);
289 }
290
291 /**
292  * Moves LDLM lock \a lock that is already in namespace LRU to the tail of
293  * the LRU. Performs necessary LRU locking
294  */
295 void ldlm_lock_touch_in_lru(struct ldlm_lock *lock)
296 {
297         struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
298
299         if (lock->l_flags & LDLM_FL_NS_SRV) {
300                 LASSERT(list_empty(&lock->l_lru));
301                 return;
302         }
303
304         spin_lock(&ns->ns_lock);
305         if (!list_empty(&lock->l_lru)) {
306                 ldlm_lock_remove_from_lru_nolock(lock);
307                 ldlm_lock_add_to_lru_nolock(lock);
308         }
309         spin_unlock(&ns->ns_lock);
310 }
311
312 /**
313  * Helper to destroy a locked lock.
314  *
315  * Used by ldlm_lock_destroy and ldlm_lock_destroy_nolock
316  * Must be called with l_lock and lr_lock held.
317  *
318  * Does not actually free the lock data, but rather marks the lock as
319  * destroyed by setting l_destroyed field in the lock to 1.  Destroys a
320  * handle->lock association too, so that the lock can no longer be found
321  * and removes the lock from LRU list.  Actual lock freeing occurs when
322  * last lock reference goes away.
323  *
324  * Original comment (of some historical value):
325  * This used to have a 'strict' flag, which recovery would use to mark an
326  * in-use lock as needing-to-die.  Lest I am ever tempted to put it back, I
327  * shall explain why it's gone: with the new hash table scheme, once you call
328  * ldlm_lock_destroy, you can never drop your final references on this lock.
329  * Because it's not in the hash table anymore.  -phil
330  */
331 int ldlm_lock_destroy_internal(struct ldlm_lock *lock)
332 {
333         if (lock->l_readers || lock->l_writers) {
334                 LDLM_ERROR(lock, "lock still has references");
335                 LBUG();
336         }
337
338         if (!list_empty(&lock->l_res_link)) {
339                 LDLM_ERROR(lock, "lock still on resource");
340                 LBUG();
341         }
342
343         if (lock->l_flags & LDLM_FL_DESTROYED) {
344                 LASSERT(list_empty(&lock->l_lru));
345                 return 0;
346         }
347         lock->l_flags |= LDLM_FL_DESTROYED;
348
349         if (lock->l_export && lock->l_export->exp_lock_hash) {
350                 /* NB: it's safe to call cfs_hash_del() even lock isn't
351                  * in exp_lock_hash. */
352                 /* In the function below, .hs_keycmp resolves to
353                  * ldlm_export_lock_keycmp() */
354                 /* coverity[overrun-buffer-val] */
355                 cfs_hash_del(lock->l_export->exp_lock_hash,
356                              &lock->l_remote_handle, &lock->l_exp_hash);
357         }
358
359         ldlm_lock_remove_from_lru(lock);
360         class_handle_unhash(&lock->l_handle);
361
362 #if 0
363         /* Wake anyone waiting for this lock */
364         /* FIXME: I should probably add yet another flag, instead of using
365          * l_export to only call this on clients */
366         if (lock->l_export)
367                 class_export_put(lock->l_export);
368         lock->l_export = NULL;
369         if (lock->l_export && lock->l_completion_ast)
370                 lock->l_completion_ast(lock, 0);
371 #endif
372         return 1;
373 }
374
375 /**
376  * Destroys a LDLM lock \a lock. Performs necessary locking first.
377  */
378 void ldlm_lock_destroy(struct ldlm_lock *lock)
379 {
380         int first;
381
382         lock_res_and_lock(lock);
383         first = ldlm_lock_destroy_internal(lock);
384         unlock_res_and_lock(lock);
385
386         /* drop reference from hashtable only for first destroy */
387         if (first) {
388                 lu_ref_del(&lock->l_reference, "hash", lock);
389                 LDLM_LOCK_RELEASE(lock);
390         }
391 }
392
393 /**
394  * Destroys a LDLM lock \a lock that is already locked.
395  */
396 void ldlm_lock_destroy_nolock(struct ldlm_lock *lock)
397 {
398         int first;
399
400         first = ldlm_lock_destroy_internal(lock);
401         /* drop reference from hashtable only for first destroy */
402         if (first) {
403                 lu_ref_del(&lock->l_reference, "hash", lock);
404                 LDLM_LOCK_RELEASE(lock);
405         }
406 }
407
408 /* this is called by portals_handle2object with the handle lock taken */
409 static void lock_handle_addref(void *lock)
410 {
411         LDLM_LOCK_GET((struct ldlm_lock *)lock);
412 }
413
414 static void lock_handle_free(void *lock, int size)
415 {
416         LASSERT(size == sizeof(struct ldlm_lock));
417         OBD_SLAB_FREE(lock, ldlm_lock_slab, size);
418 }
419
420 struct portals_handle_ops lock_handle_ops = {
421         .hop_addref = lock_handle_addref,
422         .hop_free   = lock_handle_free,
423 };
424
425 /**
426  *
427  * Allocate and initialize new lock structure.
428  *
429  * usage: pass in a resource on which you have done ldlm_resource_get
430  *      new lock will take over the refcount.
431  * returns: lock with refcount 2 - one for current caller and one for remote
432  */
433 static struct ldlm_lock *ldlm_lock_new(struct ldlm_resource *resource)
434 {
435         struct ldlm_lock *lock;
436
437         if (resource == NULL)
438                 LBUG();
439
440         OBD_SLAB_ALLOC_PTR_GFP(lock, ldlm_lock_slab, __GFP_IO);
441         if (lock == NULL)
442                 return NULL;
443
444         spin_lock_init(&lock->l_lock);
445         lock->l_resource = resource;
446         lu_ref_add(&resource->lr_reference, "lock", lock);
447
448         atomic_set(&lock->l_refc, 2);
449         INIT_LIST_HEAD(&lock->l_res_link);
450         INIT_LIST_HEAD(&lock->l_lru);
451         INIT_LIST_HEAD(&lock->l_pending_chain);
452         INIT_LIST_HEAD(&lock->l_bl_ast);
453         INIT_LIST_HEAD(&lock->l_cp_ast);
454         INIT_LIST_HEAD(&lock->l_rk_ast);
455         init_waitqueue_head(&lock->l_waitq);
456         lock->l_blocking_lock = NULL;
457         INIT_LIST_HEAD(&lock->l_sl_mode);
458         INIT_LIST_HEAD(&lock->l_sl_policy);
459         INIT_HLIST_NODE(&lock->l_exp_hash);
460         INIT_HLIST_NODE(&lock->l_exp_flock_hash);
461
462         lprocfs_counter_incr(ldlm_res_to_ns(resource)->ns_stats,
463                              LDLM_NSS_LOCKS);
464         INIT_LIST_HEAD(&lock->l_handle.h_link);
465         class_handle_hash(&lock->l_handle, &lock_handle_ops);
466
467         lu_ref_init(&lock->l_reference);
468         lu_ref_add(&lock->l_reference, "hash", lock);
469         lock->l_callback_timeout = 0;
470
471 #if LUSTRE_TRACKS_LOCK_EXP_REFS
472         INIT_LIST_HEAD(&lock->l_exp_refs_link);
473         lock->l_exp_refs_nr = 0;
474         lock->l_exp_refs_target = NULL;
475 #endif
476         INIT_LIST_HEAD(&lock->l_exp_list);
477
478         return lock;
479 }
480
481 /**
482  * Moves LDLM lock \a lock to another resource.
483  * This is used on client when server returns some other lock than requested
484  * (typically as a result of intent operation)
485  */
486 int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock,
487                               const struct ldlm_res_id *new_resid)
488 {
489         struct ldlm_resource *oldres = lock->l_resource;
490         struct ldlm_resource *newres;
491         int type;
492
493         LASSERT(ns_is_client(ns));
494
495         lock_res_and_lock(lock);
496         if (memcmp(new_resid, &lock->l_resource->lr_name,
497                    sizeof(lock->l_resource->lr_name)) == 0) {
498                 /* Nothing to do */
499                 unlock_res_and_lock(lock);
500                 return 0;
501         }
502
503         LASSERT(new_resid->name[0] != 0);
504
505         /* This function assumes that the lock isn't on any lists */
506         LASSERT(list_empty(&lock->l_res_link));
507
508         type = oldres->lr_type;
509         unlock_res_and_lock(lock);
510
511         newres = ldlm_resource_get(ns, NULL, new_resid, type, 1);
512         if (newres == NULL)
513                 return -ENOMEM;
514
515         lu_ref_add(&newres->lr_reference, "lock", lock);
516         /*
517          * To flip the lock from the old to the new resource, lock, oldres and
518          * newres have to be locked. Resource spin-locks are nested within
519          * lock->l_lock, and are taken in the memory address order to avoid
520          * dead-locks.
521          */
522         spin_lock(&lock->l_lock);
523         oldres = lock->l_resource;
524         if (oldres < newres) {
525                 lock_res(oldres);
526                 lock_res_nested(newres, LRT_NEW);
527         } else {
528                 lock_res(newres);
529                 lock_res_nested(oldres, LRT_NEW);
530         }
531         LASSERT(memcmp(new_resid, &oldres->lr_name,
532                        sizeof(oldres->lr_name)) != 0);
533         lock->l_resource = newres;
534         unlock_res(oldres);
535         unlock_res_and_lock(lock);
536
537         /* ...and the flowers are still standing! */
538         lu_ref_del(&oldres->lr_reference, "lock", lock);
539         ldlm_resource_putref(oldres);
540
541         return 0;
542 }
543 EXPORT_SYMBOL(ldlm_lock_change_resource);
544
545 /** \defgroup ldlm_handles LDLM HANDLES
546  * Ways to get hold of locks without any addresses.
547  * @{
548  */
549
550 /**
551  * Fills in handle for LDLM lock \a lock into supplied \a lockh
552  * Does not take any references.
553  */
554 void ldlm_lock2handle(const struct ldlm_lock *lock, struct lustre_handle *lockh)
555 {
556         lockh->cookie = lock->l_handle.h_cookie;
557 }
558 EXPORT_SYMBOL(ldlm_lock2handle);
559
560 /**
561  * Obtain a lock reference by handle.
562  *
563  * if \a flags: atomically get the lock and set the flags.
564  *            Return NULL if flag already set
565  */
566 struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *handle,
567                                      __u64 flags)
568 {
569         struct ldlm_lock *lock;
570
571         LASSERT(handle);
572
573         lock = class_handle2object(handle->cookie);
574         if (lock == NULL)
575                 return NULL;
576
577         /* It's unlikely but possible that someone marked the lock as
578          * destroyed after we did handle2object on it */
579         if (flags == 0 && ((lock->l_flags & LDLM_FL_DESTROYED)== 0)) {
580                 lu_ref_add(&lock->l_reference, "handle", current);
581                 return lock;
582         }
583
584         lock_res_and_lock(lock);
585
586         LASSERT(lock->l_resource != NULL);
587
588         lu_ref_add_atomic(&lock->l_reference, "handle", current);
589         if (unlikely(lock->l_flags & LDLM_FL_DESTROYED)) {
590                 unlock_res_and_lock(lock);
591                 CDEBUG(D_INFO, "lock already destroyed: lock %p\n", lock);
592                 LDLM_LOCK_PUT(lock);
593                 return NULL;
594         }
595
596         if (flags && (lock->l_flags & flags)) {
597                 unlock_res_and_lock(lock);
598                 LDLM_LOCK_PUT(lock);
599                 return NULL;
600         }
601
602         if (flags)
603                 lock->l_flags |= flags;
604
605         unlock_res_and_lock(lock);
606         return lock;
607 }
608 EXPORT_SYMBOL(__ldlm_handle2lock);
609 /** @} ldlm_handles */
610
611 /**
612  * Fill in "on the wire" representation for given LDLM lock into supplied
613  * lock descriptor \a desc structure.
614  */
615 void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc)
616 {
617         struct obd_export *exp = lock->l_export ?: lock->l_conn_export;
618
619         /* INODEBITS_INTEROP: If the other side does not support
620          * inodebits, reply with a plain lock descriptor. */
621         if ((lock->l_resource->lr_type == LDLM_IBITS) &&
622             (exp && !(exp_connect_flags(exp) & OBD_CONNECT_IBITS))) {
623                 /* Make sure all the right bits are set in this lock we
624                    are going to pass to client */
625                 LASSERTF(lock->l_policy_data.l_inodebits.bits ==
626                          (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE |
627                           MDS_INODELOCK_LAYOUT),
628                          "Inappropriate inode lock bits during "
629                          "conversion " LPU64 "\n",
630                          lock->l_policy_data.l_inodebits.bits);
631
632                 ldlm_res2desc(lock->l_resource, &desc->l_resource);
633                 desc->l_resource.lr_type = LDLM_PLAIN;
634
635                 /* Convert "new" lock mode to something old client can
636                    understand */
637                 if ((lock->l_req_mode == LCK_CR) ||
638                     (lock->l_req_mode == LCK_CW))
639                         desc->l_req_mode = LCK_PR;
640                 else
641                         desc->l_req_mode = lock->l_req_mode;
642                 if ((lock->l_granted_mode == LCK_CR) ||
643                     (lock->l_granted_mode == LCK_CW)) {
644                         desc->l_granted_mode = LCK_PR;
645                 } else {
646                         /* We never grant PW/EX locks to clients */
647                         LASSERT((lock->l_granted_mode != LCK_PW) &&
648                                 (lock->l_granted_mode != LCK_EX));
649                         desc->l_granted_mode = lock->l_granted_mode;
650                 }
651
652                 /* We do not copy policy here, because there is no
653                    policy for plain locks */
654         } else {
655                 ldlm_res2desc(lock->l_resource, &desc->l_resource);
656                 desc->l_req_mode = lock->l_req_mode;
657                 desc->l_granted_mode = lock->l_granted_mode;
658                 ldlm_convert_policy_to_wire(lock->l_resource->lr_type,
659                                             &lock->l_policy_data,
660                                             &desc->l_policy_data);
661         }
662 }
663 EXPORT_SYMBOL(ldlm_lock2desc);
664
665 /**
666  * Add a lock to list of conflicting locks to send AST to.
667  *
668  * Only add if we have not sent a blocking AST to the lock yet.
669  */
670 void ldlm_add_bl_work_item(struct ldlm_lock *lock, struct ldlm_lock *new,
671                            struct list_head *work_list)
672 {
673         if ((lock->l_flags & LDLM_FL_AST_SENT) == 0) {
674                 LDLM_DEBUG(lock, "lock incompatible; sending blocking AST.");
675                 lock->l_flags |= LDLM_FL_AST_SENT;
676                 /* If the enqueuing client said so, tell the AST recipient to
677                  * discard dirty data, rather than writing back. */
678                 if (new->l_flags & LDLM_FL_AST_DISCARD_DATA)
679                         lock->l_flags |= LDLM_FL_DISCARD_DATA;
680                 LASSERT(list_empty(&lock->l_bl_ast));
681                 list_add(&lock->l_bl_ast, work_list);
682                 LDLM_LOCK_GET(lock);
683                 LASSERT(lock->l_blocking_lock == NULL);
684                 lock->l_blocking_lock = LDLM_LOCK_GET(new);
685         }
686 }
687
688 /**
689  * Add a lock to list of just granted locks to send completion AST to.
690  */
691 void ldlm_add_cp_work_item(struct ldlm_lock *lock, struct list_head *work_list)
692 {
693         if ((lock->l_flags & LDLM_FL_CP_REQD) == 0) {
694                 lock->l_flags |= LDLM_FL_CP_REQD;
695                 LDLM_DEBUG(lock, "lock granted; sending completion AST.");
696                 LASSERT(list_empty(&lock->l_cp_ast));
697                 list_add(&lock->l_cp_ast, work_list);
698                 LDLM_LOCK_GET(lock);
699         }
700 }
701
702 /**
703  * Aggregator function to add AST work items into a list. Determines
704  * what sort of an AST work needs to be done and calls the proper
705  * adding function.
706  * Must be called with lr_lock held.
707  */
708 void ldlm_add_ast_work_item(struct ldlm_lock *lock, struct ldlm_lock *new,
709                             struct list_head *work_list)
710 {
711         check_res_locked(lock->l_resource);
712         if (new)
713                 ldlm_add_bl_work_item(lock, new, work_list);
714         else
715                 ldlm_add_cp_work_item(lock, work_list);
716 }
717
718 /**
719  * Add specified reader/writer reference to LDLM lock with handle \a lockh.
720  * r/w reference type is determined by \a mode
721  * Calls ldlm_lock_addref_internal.
722  */
723 void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode)
724 {
725         struct ldlm_lock *lock;
726
727         lock = ldlm_handle2lock(lockh);
728         LASSERT(lock != NULL);
729         ldlm_lock_addref_internal(lock, mode);
730         LDLM_LOCK_PUT(lock);
731 }
732 EXPORT_SYMBOL(ldlm_lock_addref);
733
734 /**
735  * Helper function.
736  * Add specified reader/writer reference to LDLM lock \a lock.
737  * r/w reference type is determined by \a mode
738  * Removes lock from LRU if it is there.
739  * Assumes the LDLM lock is already locked.
740  */
741 void ldlm_lock_addref_internal_nolock(struct ldlm_lock *lock, __u32 mode)
742 {
743         ldlm_lock_remove_from_lru(lock);
744         if (mode & (LCK_NL | LCK_CR | LCK_PR)) {
745                 lock->l_readers++;
746                 lu_ref_add_atomic(&lock->l_reference, "reader", lock);
747         }
748         if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP | LCK_COS)) {
749                 lock->l_writers++;
750                 lu_ref_add_atomic(&lock->l_reference, "writer", lock);
751         }
752         LDLM_LOCK_GET(lock);
753         lu_ref_add_atomic(&lock->l_reference, "user", lock);
754         LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]);
755 }
756
757 /**
758  * Attempts to add reader/writer reference to a lock with handle \a lockh, and
759  * fails if lock is already LDLM_FL_CBPENDING or destroyed.
760  *
761  * \retval 0 success, lock was addref-ed
762  *
763  * \retval -EAGAIN lock is being canceled.
764  */
765 int ldlm_lock_addref_try(struct lustre_handle *lockh, __u32 mode)
766 {
767         struct ldlm_lock *lock;
768         int            result;
769
770         result = -EAGAIN;
771         lock = ldlm_handle2lock(lockh);
772         if (lock != NULL) {
773                 lock_res_and_lock(lock);
774                 if (lock->l_readers != 0 || lock->l_writers != 0 ||
775                     !(lock->l_flags & LDLM_FL_CBPENDING)) {
776                         ldlm_lock_addref_internal_nolock(lock, mode);
777                         result = 0;
778                 }
779                 unlock_res_and_lock(lock);
780                 LDLM_LOCK_PUT(lock);
781         }
782         return result;
783 }
784 EXPORT_SYMBOL(ldlm_lock_addref_try);
785
786 /**
787  * Add specified reader/writer reference to LDLM lock \a lock.
788  * Locks LDLM lock and calls ldlm_lock_addref_internal_nolock to do the work.
789  * Only called for local locks.
790  */
791 void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode)
792 {
793         lock_res_and_lock(lock);
794         ldlm_lock_addref_internal_nolock(lock, mode);
795         unlock_res_and_lock(lock);
796 }
797
798 /**
799  * Removes reader/writer reference for LDLM lock \a lock.
800  * Assumes LDLM lock is already locked.
801  * only called in ldlm_flock_destroy and for local locks.
802  * Does NOT add lock to LRU if no r/w references left to accomodate flock locks
803  * that cannot be placed in LRU.
804  */
805 void ldlm_lock_decref_internal_nolock(struct ldlm_lock *lock, __u32 mode)
806 {
807         LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
808         if (mode & (LCK_NL | LCK_CR | LCK_PR)) {
809                 LASSERT(lock->l_readers > 0);
810                 lu_ref_del(&lock->l_reference, "reader", lock);
811                 lock->l_readers--;
812         }
813         if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP | LCK_COS)) {
814                 LASSERT(lock->l_writers > 0);
815                 lu_ref_del(&lock->l_reference, "writer", lock);
816                 lock->l_writers--;
817         }
818
819         lu_ref_del(&lock->l_reference, "user", lock);
820         LDLM_LOCK_RELEASE(lock);    /* matches the LDLM_LOCK_GET() in addref */
821 }
822
823 /**
824  * Removes reader/writer reference for LDLM lock \a lock.
825  * Locks LDLM lock first.
826  * If the lock is determined to be client lock on a client and r/w refcount
827  * drops to zero and the lock is not blocked, the lock is added to LRU lock
828  * on the namespace.
829  * For blocked LDLM locks if r/w count drops to zero, blocking_ast is called.
830  */
831 void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
832 {
833         struct ldlm_namespace *ns;
834
835         lock_res_and_lock(lock);
836
837         ns = ldlm_lock_to_ns(lock);
838
839         ldlm_lock_decref_internal_nolock(lock, mode);
840
841         if (lock->l_flags & LDLM_FL_LOCAL &&
842             !lock->l_readers && !lock->l_writers) {
843                 /* If this is a local lock on a server namespace and this was
844                  * the last reference, cancel the lock. */
845                 CDEBUG(D_INFO, "forcing cancel of local lock\n");
846                 lock->l_flags |= LDLM_FL_CBPENDING;
847         }
848
849         if (!lock->l_readers && !lock->l_writers &&
850             (lock->l_flags & LDLM_FL_CBPENDING)) {
851                 /* If we received a blocked AST and this was the last reference,
852                  * run the callback. */
853                 if ((lock->l_flags & LDLM_FL_NS_SRV) && lock->l_export)
854                         CERROR("FL_CBPENDING set on non-local lock--just a "
855                                "warning\n");
856
857                 LDLM_DEBUG(lock, "final decref done on cbpending lock");
858
859                 LDLM_LOCK_GET(lock); /* dropped by bl thread */
860                 ldlm_lock_remove_from_lru(lock);
861                 unlock_res_and_lock(lock);
862
863                 if (lock->l_flags & LDLM_FL_FAIL_LOC)
864                         OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
865
866                 if ((lock->l_flags & LDLM_FL_ATOMIC_CB) ||
867                     ldlm_bl_to_thread_lock(ns, NULL, lock) != 0)
868                         ldlm_handle_bl_callback(ns, NULL, lock);
869         } else if (ns_is_client(ns) &&
870                    !lock->l_readers && !lock->l_writers &&
871                    !(lock->l_flags & LDLM_FL_NO_LRU) &&
872                    !(lock->l_flags & LDLM_FL_BL_AST)) {
873
874                 LDLM_DEBUG(lock, "add lock into lru list");
875
876                 /* If this is a client-side namespace and this was the last
877                  * reference, put it on the LRU. */
878                 ldlm_lock_add_to_lru(lock);
879                 unlock_res_and_lock(lock);
880
881                 if (lock->l_flags & LDLM_FL_FAIL_LOC)
882                         OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
883
884                 /* Call ldlm_cancel_lru() only if EARLY_CANCEL and LRU RESIZE
885                  * are not supported by the server, otherwise, it is done on
886                  * enqueue. */
887                 if (!exp_connect_cancelset(lock->l_conn_export) &&
888                     !ns_connect_lru_resize(ns))
889                         ldlm_cancel_lru(ns, 0, LCF_ASYNC, 0);
890         } else {
891                 LDLM_DEBUG(lock, "do not add lock into lru list");
892                 unlock_res_and_lock(lock);
893         }
894 }
895
896 /**
897  * Decrease reader/writer refcount for LDLM lock with handle \a lockh
898  */
899 void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode)
900 {
901         struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
902         LASSERTF(lock != NULL, "Non-existing lock: "LPX64"\n", lockh->cookie);
903         ldlm_lock_decref_internal(lock, mode);
904         LDLM_LOCK_PUT(lock);
905 }
906 EXPORT_SYMBOL(ldlm_lock_decref);
907
908 /**
909  * Decrease reader/writer refcount for LDLM lock with handle
910  * \a lockh and mark it for subsequent cancellation once r/w refcount
911  * drops to zero instead of putting into LRU.
912  *
913  * Typical usage is for GROUP locks which we cannot allow to be cached.
914  */
915 void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode)
916 {
917         struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
918
919         LASSERT(lock != NULL);
920
921         LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
922         lock_res_and_lock(lock);
923         lock->l_flags |= LDLM_FL_CBPENDING;
924         unlock_res_and_lock(lock);
925         ldlm_lock_decref_internal(lock, mode);
926         LDLM_LOCK_PUT(lock);
927 }
928 EXPORT_SYMBOL(ldlm_lock_decref_and_cancel);
929
930 struct sl_insert_point {
931         struct list_head *res_link;
932         struct list_head *mode_link;
933         struct list_head *policy_link;
934 };
935
936 /**
937  * Finds a position to insert the new lock into granted lock list.
938  *
939  * Used for locks eligible for skiplist optimization.
940  *
941  * Parameters:
942  *      queue [input]:  the granted list where search acts on;
943  *      req [input]:    the lock whose position to be located;
944  *      prev [output]:  positions within 3 lists to insert @req to
945  * Return Value:
946  *      filled @prev
947  * NOTE: called by
948  *  - ldlm_grant_lock_with_skiplist
949  */
950 static void search_granted_lock(struct list_head *queue,
951                                 struct ldlm_lock *req,
952                                 struct sl_insert_point *prev)
953 {
954         struct list_head *tmp;
955         struct ldlm_lock *lock, *mode_end, *policy_end;
956
957         list_for_each(tmp, queue) {
958                 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
959
960                 mode_end = list_entry(lock->l_sl_mode.prev,
961                                           struct ldlm_lock, l_sl_mode);
962
963                 if (lock->l_req_mode != req->l_req_mode) {
964                         /* jump to last lock of mode group */
965                         tmp = &mode_end->l_res_link;
966                         continue;
967                 }
968
969                 /* suitable mode group is found */
970                 if (lock->l_resource->lr_type == LDLM_PLAIN) {
971                         /* insert point is last lock of the mode group */
972                         prev->res_link = &mode_end->l_res_link;
973                         prev->mode_link = &mode_end->l_sl_mode;
974                         prev->policy_link = &req->l_sl_policy;
975                         return;
976                 } else if (lock->l_resource->lr_type == LDLM_IBITS) {
977                         for (;;) {
978                                 policy_end =
979                                         list_entry(lock->l_sl_policy.prev,
980                                                        struct ldlm_lock,
981                                                        l_sl_policy);
982
983                                 if (lock->l_policy_data.l_inodebits.bits ==
984                                     req->l_policy_data.l_inodebits.bits) {
985                                         /* insert point is last lock of
986                                          * the policy group */
987                                         prev->res_link =
988                                                 &policy_end->l_res_link;
989                                         prev->mode_link =
990                                                 &policy_end->l_sl_mode;
991                                         prev->policy_link =
992                                                 &policy_end->l_sl_policy;
993                                         return;
994                                 }
995
996                                 if (policy_end == mode_end)
997                                         /* done with mode group */
998                                         break;
999
1000                                 /* go to next policy group within mode group */
1001                                 tmp = policy_end->l_res_link.next;
1002                                 lock = list_entry(tmp, struct ldlm_lock,
1003                                                       l_res_link);
1004                         }  /* loop over policy groups within the mode group */
1005
1006                         /* insert point is last lock of the mode group,
1007                          * new policy group is started */
1008                         prev->res_link = &mode_end->l_res_link;
1009                         prev->mode_link = &mode_end->l_sl_mode;
1010                         prev->policy_link = &req->l_sl_policy;
1011                         return;
1012                 } else {
1013                         LDLM_ERROR(lock,"is not LDLM_PLAIN or LDLM_IBITS lock");
1014                         LBUG();
1015                 }
1016         }
1017
1018         /* insert point is last lock on the queue,
1019          * new mode group and new policy group are started */
1020         prev->res_link = queue->prev;
1021         prev->mode_link = &req->l_sl_mode;
1022         prev->policy_link = &req->l_sl_policy;
1023         return;
1024 }
1025
1026 /**
1027  * Add a lock into resource granted list after a position described by
1028  * \a prev.
1029  */
1030 static void ldlm_granted_list_add_lock(struct ldlm_lock *lock,
1031                                        struct sl_insert_point *prev)
1032 {
1033         struct ldlm_resource *res = lock->l_resource;
1034
1035         check_res_locked(res);
1036
1037         ldlm_resource_dump(D_INFO, res);
1038         LDLM_DEBUG(lock, "About to add lock:");
1039
1040         if (lock->l_flags & LDLM_FL_DESTROYED) {
1041                 CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n");
1042                 return;
1043         }
1044
1045         LASSERT(list_empty(&lock->l_res_link));
1046         LASSERT(list_empty(&lock->l_sl_mode));
1047         LASSERT(list_empty(&lock->l_sl_policy));
1048
1049         /*
1050          * lock->link == prev->link means lock is first starting the group.
1051          * Don't re-add to itself to suppress kernel warnings.
1052          */
1053         if (&lock->l_res_link != prev->res_link)
1054                 list_add(&lock->l_res_link, prev->res_link);
1055         if (&lock->l_sl_mode != prev->mode_link)
1056                 list_add(&lock->l_sl_mode, prev->mode_link);
1057         if (&lock->l_sl_policy != prev->policy_link)
1058                 list_add(&lock->l_sl_policy, prev->policy_link);
1059 }
1060
1061 /**
1062  * Add a lock to granted list on a resource maintaining skiplist
1063  * correctness.
1064  */
1065 static void ldlm_grant_lock_with_skiplist(struct ldlm_lock *lock)
1066 {
1067         struct sl_insert_point prev;
1068
1069         LASSERT(lock->l_req_mode == lock->l_granted_mode);
1070
1071         search_granted_lock(&lock->l_resource->lr_granted, lock, &prev);
1072         ldlm_granted_list_add_lock(lock, &prev);
1073 }
1074
1075 /**
1076  * Perform lock granting bookkeeping.
1077  *
1078  * Includes putting the lock into granted list and updating lock mode.
1079  * NOTE: called by
1080  *  - ldlm_lock_enqueue
1081  *  - ldlm_reprocess_queue
1082  *  - ldlm_lock_convert
1083  *
1084  * must be called with lr_lock held
1085  */
1086 void ldlm_grant_lock(struct ldlm_lock *lock, struct list_head *work_list)
1087 {
1088         struct ldlm_resource *res = lock->l_resource;
1089
1090         check_res_locked(res);
1091
1092         lock->l_granted_mode = lock->l_req_mode;
1093         if (res->lr_type == LDLM_PLAIN || res->lr_type == LDLM_IBITS)
1094                 ldlm_grant_lock_with_skiplist(lock);
1095         else if (res->lr_type == LDLM_EXTENT)
1096                 ldlm_extent_add_lock(res, lock);
1097         else
1098                 ldlm_resource_add_lock(res, &res->lr_granted, lock);
1099
1100         if (lock->l_granted_mode < res->lr_most_restr)
1101                 res->lr_most_restr = lock->l_granted_mode;
1102
1103         if (work_list && lock->l_completion_ast != NULL)
1104                 ldlm_add_ast_work_item(lock, NULL, work_list);
1105
1106         ldlm_pool_add(&ldlm_res_to_ns(res)->ns_pool, lock);
1107 }
1108
1109 /**
1110  * Search for a lock with given properties in a queue.
1111  *
1112  * \retval a referenced lock or NULL.  See the flag descriptions below, in the
1113  * comment above ldlm_lock_match
1114  */
1115 static struct ldlm_lock *search_queue(struct list_head *queue,
1116                                       ldlm_mode_t *mode,
1117                                       ldlm_policy_data_t *policy,
1118                                       struct ldlm_lock *old_lock,
1119                                       __u64 flags, int unref)
1120 {
1121         struct ldlm_lock *lock;
1122         struct list_head       *tmp;
1123
1124         list_for_each(tmp, queue) {
1125                 ldlm_mode_t match;
1126
1127                 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
1128
1129                 if (lock == old_lock)
1130                         break;
1131
1132                 /* Check if this lock can be matched.
1133                  * Used by LU-2919(exclusive open) for open lease lock */
1134                 if (ldlm_is_excl(lock))
1135                         continue;
1136
1137                 /* llite sometimes wants to match locks that will be
1138                  * canceled when their users drop, but we allow it to match
1139                  * if it passes in CBPENDING and the lock still has users.
1140                  * this is generally only going to be used by children
1141                  * whose parents already hold a lock so forward progress
1142                  * can still happen. */
1143                 if (lock->l_flags & LDLM_FL_CBPENDING &&
1144                     !(flags & LDLM_FL_CBPENDING))
1145                         continue;
1146                 if (!unref && lock->l_flags & LDLM_FL_CBPENDING &&
1147                     lock->l_readers == 0 && lock->l_writers == 0)
1148                         continue;
1149
1150                 if (!(lock->l_req_mode & *mode))
1151                         continue;
1152                 match = lock->l_req_mode;
1153
1154                 if (lock->l_resource->lr_type == LDLM_EXTENT &&
1155                     (lock->l_policy_data.l_extent.start >
1156                      policy->l_extent.start ||
1157                      lock->l_policy_data.l_extent.end < policy->l_extent.end))
1158                         continue;
1159
1160                 if (unlikely(match == LCK_GROUP) &&
1161                     lock->l_resource->lr_type == LDLM_EXTENT &&
1162                     lock->l_policy_data.l_extent.gid != policy->l_extent.gid)
1163                         continue;
1164
1165                 /* We match if we have existing lock with same or wider set
1166                    of bits. */
1167                 if (lock->l_resource->lr_type == LDLM_IBITS &&
1168                      ((lock->l_policy_data.l_inodebits.bits &
1169                       policy->l_inodebits.bits) !=
1170                       policy->l_inodebits.bits))
1171                         continue;
1172
1173                 if (!unref && (lock->l_flags & LDLM_FL_GONE_MASK))
1174                         continue;
1175
1176                 if ((flags & LDLM_FL_LOCAL_ONLY) &&
1177                     !(lock->l_flags & LDLM_FL_LOCAL))
1178                         continue;
1179
1180                 if (flags & LDLM_FL_TEST_LOCK) {
1181                         LDLM_LOCK_GET(lock);
1182                         ldlm_lock_touch_in_lru(lock);
1183                 } else {
1184                         ldlm_lock_addref_internal_nolock(lock, match);
1185                 }
1186                 *mode = match;
1187                 return lock;
1188         }
1189
1190         return NULL;
1191 }
1192
1193 void ldlm_lock_fail_match_locked(struct ldlm_lock *lock)
1194 {
1195         if ((lock->l_flags & LDLM_FL_FAIL_NOTIFIED) == 0) {
1196                 lock->l_flags |= LDLM_FL_FAIL_NOTIFIED;
1197                 wake_up_all(&lock->l_waitq);
1198         }
1199 }
1200 EXPORT_SYMBOL(ldlm_lock_fail_match_locked);
1201
1202 void ldlm_lock_fail_match(struct ldlm_lock *lock)
1203 {
1204         lock_res_and_lock(lock);
1205         ldlm_lock_fail_match_locked(lock);
1206         unlock_res_and_lock(lock);
1207 }
1208 EXPORT_SYMBOL(ldlm_lock_fail_match);
1209
1210 /**
1211  * Mark lock as "matchable" by OST.
1212  *
1213  * Used to prevent certain races in LOV/OSC where the lock is granted, but LVB
1214  * is not yet valid.
1215  * Assumes LDLM lock is already locked.
1216  */
1217 void ldlm_lock_allow_match_locked(struct ldlm_lock *lock)
1218 {
1219         lock->l_flags |= LDLM_FL_LVB_READY;
1220         wake_up_all(&lock->l_waitq);
1221 }
1222 EXPORT_SYMBOL(ldlm_lock_allow_match_locked);
1223
1224 /**
1225  * Mark lock as "matchable" by OST.
1226  * Locks the lock and then \see ldlm_lock_allow_match_locked
1227  */
1228 void ldlm_lock_allow_match(struct ldlm_lock *lock)
1229 {
1230         lock_res_and_lock(lock);
1231         ldlm_lock_allow_match_locked(lock);
1232         unlock_res_and_lock(lock);
1233 }
1234 EXPORT_SYMBOL(ldlm_lock_allow_match);
1235
1236 /**
1237  * Attempt to find a lock with specified properties.
1238  *
1239  * Typically returns a reference to matched lock unless LDLM_FL_TEST_LOCK is
1240  * set in \a flags
1241  *
1242  * Can be called in two ways:
1243  *
1244  * If 'ns' is NULL, then lockh describes an existing lock that we want to look
1245  * for a duplicate of.
1246  *
1247  * Otherwise, all of the fields must be filled in, to match against.
1248  *
1249  * If 'flags' contains LDLM_FL_LOCAL_ONLY, then only match local locks on the
1250  *     server (ie, connh is NULL)
1251  * If 'flags' contains LDLM_FL_BLOCK_GRANTED, then only locks on the granted
1252  *     list will be considered
1253  * If 'flags' contains LDLM_FL_CBPENDING, then locks that have been marked
1254  *     to be canceled can still be matched as long as they still have reader
1255  *     or writer refernces
1256  * If 'flags' contains LDLM_FL_TEST_LOCK, then don't actually reference a lock,
1257  *     just tell us if we would have matched.
1258  *
1259  * \retval 1 if it finds an already-existing lock that is compatible; in this
1260  * case, lockh is filled in with a addref()ed lock
1261  *
1262  * We also check security context, and if that fails we simply return 0 (to
1263  * keep caller code unchanged), the context failure will be discovered by
1264  * caller sometime later.
1265  */
1266 ldlm_mode_t ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
1267                             const struct ldlm_res_id *res_id, ldlm_type_t type,
1268                             ldlm_policy_data_t *policy, ldlm_mode_t mode,
1269                             struct lustre_handle *lockh, int unref)
1270 {
1271         struct ldlm_resource *res;
1272         struct ldlm_lock *lock, *old_lock = NULL;
1273         int rc = 0;
1274
1275         if (ns == NULL) {
1276                 old_lock = ldlm_handle2lock(lockh);
1277                 LASSERT(old_lock);
1278
1279                 ns = ldlm_lock_to_ns(old_lock);
1280                 res_id = &old_lock->l_resource->lr_name;
1281                 type = old_lock->l_resource->lr_type;
1282                 mode = old_lock->l_req_mode;
1283         }
1284
1285         res = ldlm_resource_get(ns, NULL, res_id, type, 0);
1286         if (res == NULL) {
1287                 LASSERT(old_lock == NULL);
1288                 return 0;
1289         }
1290
1291         LDLM_RESOURCE_ADDREF(res);
1292         lock_res(res);
1293
1294         lock = search_queue(&res->lr_granted, &mode, policy, old_lock,
1295                             flags, unref);
1296         if (lock != NULL)
1297                 GOTO(out, rc = 1);
1298         if (flags & LDLM_FL_BLOCK_GRANTED)
1299                 GOTO(out, rc = 0);
1300         lock = search_queue(&res->lr_converting, &mode, policy, old_lock,
1301                             flags, unref);
1302         if (lock != NULL)
1303                 GOTO(out, rc = 1);
1304         lock = search_queue(&res->lr_waiting, &mode, policy, old_lock,
1305                             flags, unref);
1306         if (lock != NULL)
1307                 GOTO(out, rc = 1);
1308
1309  out:
1310         unlock_res(res);
1311         LDLM_RESOURCE_DELREF(res);
1312         ldlm_resource_putref(res);
1313
1314         if (lock) {
1315                 ldlm_lock2handle(lock, lockh);
1316                 if ((flags & LDLM_FL_LVB_READY) &&
1317                     (!(lock->l_flags & LDLM_FL_LVB_READY))) {
1318                         __u64 wait_flags = LDLM_FL_LVB_READY |
1319                                 LDLM_FL_DESTROYED | LDLM_FL_FAIL_NOTIFIED;
1320                         struct l_wait_info lwi;
1321                         if (lock->l_completion_ast) {
1322                                 int err = lock->l_completion_ast(lock,
1323                                                           LDLM_FL_WAIT_NOREPROC,
1324                                                                  NULL);
1325                                 if (err) {
1326                                         if (flags & LDLM_FL_TEST_LOCK)
1327                                                 LDLM_LOCK_RELEASE(lock);
1328                                         else
1329                                                 ldlm_lock_decref_internal(lock,
1330                                                                           mode);
1331                                         rc = 0;
1332                                         goto out2;
1333                                 }
1334                         }
1335
1336                         lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(obd_timeout),
1337                                                NULL, LWI_ON_SIGNAL_NOOP, NULL);
1338
1339                         /* XXX FIXME see comment on CAN_MATCH in lustre_dlm.h */
1340                         l_wait_event(lock->l_waitq,
1341                                      lock->l_flags & wait_flags,
1342                                      &lwi);
1343                         if (!(lock->l_flags & LDLM_FL_LVB_READY)) {
1344                                 if (flags & LDLM_FL_TEST_LOCK)
1345                                         LDLM_LOCK_RELEASE(lock);
1346                                 else
1347                                         ldlm_lock_decref_internal(lock, mode);
1348                                 rc = 0;
1349                         }
1350                 }
1351         }
1352  out2:
1353         if (rc) {
1354                 LDLM_DEBUG(lock, "matched ("LPU64" "LPU64")",
1355                            (type == LDLM_PLAIN || type == LDLM_IBITS) ?
1356                                 res_id->name[2] : policy->l_extent.start,
1357                            (type == LDLM_PLAIN || type == LDLM_IBITS) ?
1358                                 res_id->name[3] : policy->l_extent.end);
1359
1360                 /* check user's security context */
1361                 if (lock->l_conn_export &&
1362                     sptlrpc_import_check_ctx(
1363                                 class_exp2cliimp(lock->l_conn_export))) {
1364                         if (!(flags & LDLM_FL_TEST_LOCK))
1365                                 ldlm_lock_decref_internal(lock, mode);
1366                         rc = 0;
1367                 }
1368
1369                 if (flags & LDLM_FL_TEST_LOCK)
1370                         LDLM_LOCK_RELEASE(lock);
1371
1372         } else if (!(flags & LDLM_FL_TEST_LOCK)) {/*less verbose for test-only*/
1373                 LDLM_DEBUG_NOLOCK("not matched ns %p type %u mode %u res "
1374                                   LPU64"/"LPU64" ("LPU64" "LPU64")", ns,
1375                                   type, mode, res_id->name[0], res_id->name[1],
1376                                   (type == LDLM_PLAIN || type == LDLM_IBITS) ?
1377                                         res_id->name[2] :policy->l_extent.start,
1378                                   (type == LDLM_PLAIN || type == LDLM_IBITS) ?
1379                                         res_id->name[3] : policy->l_extent.end);
1380         }
1381         if (old_lock)
1382                 LDLM_LOCK_PUT(old_lock);
1383
1384         return rc ? mode : 0;
1385 }
1386 EXPORT_SYMBOL(ldlm_lock_match);
1387
1388 ldlm_mode_t ldlm_revalidate_lock_handle(struct lustre_handle *lockh,
1389                                         __u64 *bits)
1390 {
1391         struct ldlm_lock *lock;
1392         ldlm_mode_t mode = 0;
1393
1394         lock = ldlm_handle2lock(lockh);
1395         if (lock != NULL) {
1396                 lock_res_and_lock(lock);
1397                 if (lock->l_flags & LDLM_FL_GONE_MASK)
1398                         GOTO(out, mode);
1399
1400                 if (lock->l_flags & LDLM_FL_CBPENDING &&
1401                     lock->l_readers == 0 && lock->l_writers == 0)
1402                         GOTO(out, mode);
1403
1404                 if (bits)
1405                         *bits = lock->l_policy_data.l_inodebits.bits;
1406                 mode = lock->l_granted_mode;
1407                 ldlm_lock_addref_internal_nolock(lock, mode);
1408         }
1409
1410 out:
1411         if (lock != NULL) {
1412                 unlock_res_and_lock(lock);
1413                 LDLM_LOCK_PUT(lock);
1414         }
1415         return mode;
1416 }
1417 EXPORT_SYMBOL(ldlm_revalidate_lock_handle);
1418
1419 /** The caller must guarantee that the buffer is large enough. */
1420 int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill,
1421                   enum req_location loc, void *data, int size)
1422 {
1423         void *lvb;
1424
1425         LASSERT(data != NULL);
1426         LASSERT(size >= 0);
1427
1428         switch (lock->l_lvb_type) {
1429         case LVB_T_OST:
1430                 if (size == sizeof(struct ost_lvb)) {
1431                         if (loc == RCL_CLIENT)
1432                                 lvb = req_capsule_client_swab_get(pill,
1433                                                 &RMF_DLM_LVB,
1434                                                 lustre_swab_ost_lvb);
1435                         else
1436                                 lvb = req_capsule_server_swab_get(pill,
1437                                                 &RMF_DLM_LVB,
1438                                                 lustre_swab_ost_lvb);
1439                         if (unlikely(lvb == NULL)) {
1440                                 LDLM_ERROR(lock, "no LVB");
1441                                 return -EPROTO;
1442                         }
1443
1444                         memcpy(data, lvb, size);
1445                 } else if (size == sizeof(struct ost_lvb_v1)) {
1446                         struct ost_lvb *olvb = data;
1447
1448                         if (loc == RCL_CLIENT)
1449                                 lvb = req_capsule_client_swab_get(pill,
1450                                                 &RMF_DLM_LVB,
1451                                                 lustre_swab_ost_lvb_v1);
1452                         else
1453                                 lvb = req_capsule_server_sized_swab_get(pill,
1454                                                 &RMF_DLM_LVB, size,
1455                                                 lustre_swab_ost_lvb_v1);
1456                         if (unlikely(lvb == NULL)) {
1457                                 LDLM_ERROR(lock, "no LVB");
1458                                 return -EPROTO;
1459                         }
1460
1461                         memcpy(data, lvb, size);
1462                         olvb->lvb_mtime_ns = 0;
1463                         olvb->lvb_atime_ns = 0;
1464                         olvb->lvb_ctime_ns = 0;
1465                 } else {
1466                         LDLM_ERROR(lock, "Replied unexpected ost LVB size %d",
1467                                    size);
1468                         return -EINVAL;
1469                 }
1470                 break;
1471         case LVB_T_LQUOTA:
1472                 if (size == sizeof(struct lquota_lvb)) {
1473                         if (loc == RCL_CLIENT)
1474                                 lvb = req_capsule_client_swab_get(pill,
1475                                                 &RMF_DLM_LVB,
1476                                                 lustre_swab_lquota_lvb);
1477                         else
1478                                 lvb = req_capsule_server_swab_get(pill,
1479                                                 &RMF_DLM_LVB,
1480                                                 lustre_swab_lquota_lvb);
1481                         if (unlikely(lvb == NULL)) {
1482                                 LDLM_ERROR(lock, "no LVB");
1483                                 return -EPROTO;
1484                         }
1485
1486                         memcpy(data, lvb, size);
1487                 } else {
1488                         LDLM_ERROR(lock, "Replied unexpected lquota LVB size %d",
1489                                    size);
1490                         return -EINVAL;
1491                 }
1492                 break;
1493         case LVB_T_LAYOUT:
1494                 if (size == 0)
1495                         break;
1496
1497                 if (loc == RCL_CLIENT)
1498                         lvb = req_capsule_client_get(pill, &RMF_DLM_LVB);
1499                 else
1500                         lvb = req_capsule_server_get(pill, &RMF_DLM_LVB);
1501                 if (unlikely(lvb == NULL)) {
1502                         LDLM_ERROR(lock, "no LVB");
1503                         return -EPROTO;
1504                 }
1505
1506                 memcpy(data, lvb, size);
1507                 break;
1508         default:
1509                 LDLM_ERROR(lock, "Unknown LVB type: %d\n", lock->l_lvb_type);
1510                 dump_stack();
1511                 return -EINVAL;
1512         }
1513
1514         return 0;
1515 }
1516
1517 /**
1518  * Create and fill in new LDLM lock with specified properties.
1519  * Returns a referenced lock
1520  */
1521 struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
1522                                    const struct ldlm_res_id *res_id,
1523                                    ldlm_type_t type,
1524                                    ldlm_mode_t mode,
1525                                    const struct ldlm_callback_suite *cbs,
1526                                    void *data, __u32 lvb_len,
1527                                    enum lvb_type lvb_type)
1528 {
1529         struct ldlm_lock *lock;
1530         struct ldlm_resource *res;
1531
1532         res = ldlm_resource_get(ns, NULL, res_id, type, 1);
1533         if (res == NULL)
1534                 return NULL;
1535
1536         lock = ldlm_lock_new(res);
1537
1538         if (lock == NULL)
1539                 return NULL;
1540
1541         lock->l_req_mode = mode;
1542         lock->l_ast_data = data;
1543         lock->l_pid = current_pid();
1544         if (ns_is_server(ns))
1545                 lock->l_flags |= LDLM_FL_NS_SRV;
1546         if (cbs) {
1547                 lock->l_blocking_ast = cbs->lcs_blocking;
1548                 lock->l_completion_ast = cbs->lcs_completion;
1549                 lock->l_glimpse_ast = cbs->lcs_glimpse;
1550         }
1551
1552         lock->l_tree_node = NULL;
1553         /* if this is the extent lock, allocate the interval tree node */
1554         if (type == LDLM_EXTENT) {
1555                 if (ldlm_interval_alloc(lock) == NULL)
1556                         GOTO(out, 0);
1557         }
1558
1559         if (lvb_len) {
1560                 lock->l_lvb_len = lvb_len;
1561                 OBD_ALLOC(lock->l_lvb_data, lvb_len);
1562                 if (lock->l_lvb_data == NULL)
1563                         GOTO(out, 0);
1564         }
1565
1566         lock->l_lvb_type = lvb_type;
1567         if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_NEW_LOCK))
1568                 GOTO(out, 0);
1569
1570         return lock;
1571
1572 out:
1573         ldlm_lock_destroy(lock);
1574         LDLM_LOCK_RELEASE(lock);
1575         return NULL;
1576 }
1577
1578 /**
1579  * Enqueue (request) a lock.
1580  *
1581  * Does not block. As a result of enqueue the lock would be put
1582  * into granted or waiting list.
1583  *
1584  * If namespace has intent policy sent and the lock has LDLM_FL_HAS_INTENT flag
1585  * set, skip all the enqueueing and delegate lock processing to intent policy
1586  * function.
1587  */
1588 ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns,
1589                                struct ldlm_lock **lockp,
1590                                void *cookie, __u64 *flags)
1591 {
1592         struct ldlm_lock *lock = *lockp;
1593         struct ldlm_resource *res = lock->l_resource;
1594         int local = ns_is_client(ldlm_res_to_ns(res));
1595         ldlm_error_t rc = ELDLM_OK;
1596         struct ldlm_interval *node = NULL;
1597
1598         lock->l_last_activity = cfs_time_current_sec();
1599         /* policies are not executed on the client or during replay */
1600         if ((*flags & (LDLM_FL_HAS_INTENT|LDLM_FL_REPLAY)) == LDLM_FL_HAS_INTENT
1601             && !local && ns->ns_policy) {
1602                 rc = ns->ns_policy(ns, lockp, cookie, lock->l_req_mode, *flags,
1603                                    NULL);
1604                 if (rc == ELDLM_LOCK_REPLACED) {
1605                         /* The lock that was returned has already been granted,
1606                          * and placed into lockp.  If it's not the same as the
1607                          * one we passed in, then destroy the old one and our
1608                          * work here is done. */
1609                         if (lock != *lockp) {
1610                                 ldlm_lock_destroy(lock);
1611                                 LDLM_LOCK_RELEASE(lock);
1612                         }
1613                         *flags |= LDLM_FL_LOCK_CHANGED;
1614                         return 0;
1615                 } else if (rc != ELDLM_OK ||
1616                            (rc == ELDLM_OK && (*flags & LDLM_FL_INTENT_ONLY))) {
1617                         ldlm_lock_destroy(lock);
1618                         return rc;
1619                 }
1620         }
1621
1622         /* For a replaying lock, it might be already in granted list. So
1623          * unlinking the lock will cause the interval node to be freed, we
1624          * have to allocate the interval node early otherwise we can't regrant
1625          * this lock in the future. - jay */
1626         if (!local && (*flags & LDLM_FL_REPLAY) && res->lr_type == LDLM_EXTENT)
1627                 OBD_SLAB_ALLOC_PTR_GFP(node, ldlm_interval_slab, __GFP_IO);
1628
1629         lock_res_and_lock(lock);
1630         if (local && lock->l_req_mode == lock->l_granted_mode) {
1631                 /* The server returned a blocked lock, but it was granted
1632                  * before we got a chance to actually enqueue it.  We don't
1633                  * need to do anything else. */
1634                 *flags &= ~(LDLM_FL_BLOCK_GRANTED |
1635                             LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_WAIT);
1636                 GOTO(out, ELDLM_OK);
1637         }
1638
1639         ldlm_resource_unlink_lock(lock);
1640         if (res->lr_type == LDLM_EXTENT && lock->l_tree_node == NULL) {
1641                 if (node == NULL) {
1642                         ldlm_lock_destroy_nolock(lock);
1643                         GOTO(out, rc = -ENOMEM);
1644                 }
1645
1646                 INIT_LIST_HEAD(&node->li_group);
1647                 ldlm_interval_attach(node, lock);
1648                 node = NULL;
1649         }
1650
1651         /* Some flags from the enqueue want to make it into the AST, via the
1652          * lock's l_flags. */
1653         lock->l_flags |= *flags & LDLM_FL_AST_DISCARD_DATA;
1654
1655         /* This distinction between local lock trees is very important; a client
1656          * namespace only has information about locks taken by that client, and
1657          * thus doesn't have enough information to decide for itself if it can
1658          * be granted (below).  In this case, we do exactly what the server
1659          * tells us to do, as dictated by the 'flags'.
1660          *
1661          * We do exactly the same thing during recovery, when the server is
1662          * more or less trusting the clients not to lie.
1663          *
1664          * FIXME (bug 268): Detect obvious lies by checking compatibility in
1665          * granted/converting queues. */
1666         if (local) {
1667                 if (*flags & LDLM_FL_BLOCK_CONV)
1668                         ldlm_resource_add_lock(res, &res->lr_converting, lock);
1669                 else if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED))
1670                         ldlm_resource_add_lock(res, &res->lr_waiting, lock);
1671                 else
1672                         ldlm_grant_lock(lock, NULL);
1673                 GOTO(out, ELDLM_OK);
1674         } else {
1675                 CERROR("This is client-side-only module, cannot handle "
1676                        "LDLM_NAMESPACE_SERVER resource type lock.\n");
1677                 LBUG();
1678         }
1679
1680 out:
1681         unlock_res_and_lock(lock);
1682         if (node)
1683                 OBD_SLAB_FREE(node, ldlm_interval_slab, sizeof(*node));
1684         return rc;
1685 }
1686
1687
1688 /**
1689  * Process a call to blocking AST callback for a lock in ast_work list
1690  */
1691 static int
1692 ldlm_work_bl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
1693 {
1694         struct ldlm_cb_set_arg *arg = opaq;
1695         struct ldlm_lock_desc   d;
1696         int                  rc;
1697         struct ldlm_lock       *lock;
1698
1699         if (list_empty(arg->list))
1700                 return -ENOENT;
1701
1702         lock = list_entry(arg->list->next, struct ldlm_lock, l_bl_ast);
1703
1704         /* nobody should touch l_bl_ast */
1705         lock_res_and_lock(lock);
1706         list_del_init(&lock->l_bl_ast);
1707
1708         LASSERT(lock->l_flags & LDLM_FL_AST_SENT);
1709         LASSERT(lock->l_bl_ast_run == 0);
1710         LASSERT(lock->l_blocking_lock);
1711         lock->l_bl_ast_run++;
1712         unlock_res_and_lock(lock);
1713
1714         ldlm_lock2desc(lock->l_blocking_lock, &d);
1715
1716         rc = lock->l_blocking_ast(lock, &d, (void *)arg, LDLM_CB_BLOCKING);
1717         LDLM_LOCK_RELEASE(lock->l_blocking_lock);
1718         lock->l_blocking_lock = NULL;
1719         LDLM_LOCK_RELEASE(lock);
1720
1721         return rc;
1722 }
1723
1724 /**
1725  * Process a call to completion AST callback for a lock in ast_work list
1726  */
1727 static int
1728 ldlm_work_cp_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
1729 {
1730         struct ldlm_cb_set_arg  *arg = opaq;
1731         int                   rc = 0;
1732         struct ldlm_lock        *lock;
1733         ldlm_completion_callback completion_callback;
1734
1735         if (list_empty(arg->list))
1736                 return -ENOENT;
1737
1738         lock = list_entry(arg->list->next, struct ldlm_lock, l_cp_ast);
1739
1740         /* It's possible to receive a completion AST before we've set
1741          * the l_completion_ast pointer: either because the AST arrived
1742          * before the reply, or simply because there's a small race
1743          * window between receiving the reply and finishing the local
1744          * enqueue. (bug 842)
1745          *
1746          * This can't happen with the blocking_ast, however, because we
1747          * will never call the local blocking_ast until we drop our
1748          * reader/writer reference, which we won't do until we get the
1749          * reply and finish enqueueing. */
1750
1751         /* nobody should touch l_cp_ast */
1752         lock_res_and_lock(lock);
1753         list_del_init(&lock->l_cp_ast);
1754         LASSERT(lock->l_flags & LDLM_FL_CP_REQD);
1755         /* save l_completion_ast since it can be changed by
1756          * mds_intent_policy(), see bug 14225 */
1757         completion_callback = lock->l_completion_ast;
1758         lock->l_flags &= ~LDLM_FL_CP_REQD;
1759         unlock_res_and_lock(lock);
1760
1761         if (completion_callback != NULL)
1762                 rc = completion_callback(lock, 0, (void *)arg);
1763         LDLM_LOCK_RELEASE(lock);
1764
1765         return rc;
1766 }
1767
1768 /**
1769  * Process a call to revocation AST callback for a lock in ast_work list
1770  */
1771 static int
1772 ldlm_work_revoke_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
1773 {
1774         struct ldlm_cb_set_arg *arg = opaq;
1775         struct ldlm_lock_desc   desc;
1776         int                  rc;
1777         struct ldlm_lock       *lock;
1778
1779         if (list_empty(arg->list))
1780                 return -ENOENT;
1781
1782         lock = list_entry(arg->list->next, struct ldlm_lock, l_rk_ast);
1783         list_del_init(&lock->l_rk_ast);
1784
1785         /* the desc just pretend to exclusive */
1786         ldlm_lock2desc(lock, &desc);
1787         desc.l_req_mode = LCK_EX;
1788         desc.l_granted_mode = 0;
1789
1790         rc = lock->l_blocking_ast(lock, &desc, (void*)arg, LDLM_CB_BLOCKING);
1791         LDLM_LOCK_RELEASE(lock);
1792
1793         return rc;
1794 }
1795
1796 /**
1797  * Process a call to glimpse AST callback for a lock in ast_work list
1798  */
1799 int ldlm_work_gl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
1800 {
1801         struct ldlm_cb_set_arg          *arg = opaq;
1802         struct ldlm_glimpse_work        *gl_work;
1803         struct ldlm_lock                *lock;
1804         int                              rc = 0;
1805
1806         if (list_empty(arg->list))
1807                 return -ENOENT;
1808
1809         gl_work = list_entry(arg->list->next, struct ldlm_glimpse_work,
1810                                  gl_list);
1811         list_del_init(&gl_work->gl_list);
1812
1813         lock = gl_work->gl_lock;
1814
1815         /* transfer the glimpse descriptor to ldlm_cb_set_arg */
1816         arg->gl_desc = gl_work->gl_desc;
1817
1818         /* invoke the actual glimpse callback */
1819         if (lock->l_glimpse_ast(lock, (void*)arg) == 0)
1820                 rc = 1;
1821
1822         LDLM_LOCK_RELEASE(lock);
1823
1824         if ((gl_work->gl_flags & LDLM_GL_WORK_NOFREE) == 0)
1825                 OBD_FREE_PTR(gl_work);
1826
1827         return rc;
1828 }
1829
1830 /**
1831  * Process list of locks in need of ASTs being sent.
1832  *
1833  * Used on server to send multiple ASTs together instead of sending one by
1834  * one.
1835  */
1836 int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list,
1837                       ldlm_desc_ast_t ast_type)
1838 {
1839         struct ldlm_cb_set_arg *arg;
1840         set_producer_func       work_ast_lock;
1841         int                  rc;
1842
1843         if (list_empty(rpc_list))
1844                 return 0;
1845
1846         OBD_ALLOC_PTR(arg);
1847         if (arg == NULL)
1848                 return -ENOMEM;
1849
1850         atomic_set(&arg->restart, 0);
1851         arg->list = rpc_list;
1852
1853         switch (ast_type) {
1854                 case LDLM_WORK_BL_AST:
1855                         arg->type = LDLM_BL_CALLBACK;
1856                         work_ast_lock = ldlm_work_bl_ast_lock;
1857                         break;
1858                 case LDLM_WORK_CP_AST:
1859                         arg->type = LDLM_CP_CALLBACK;
1860                         work_ast_lock = ldlm_work_cp_ast_lock;
1861                         break;
1862                 case LDLM_WORK_REVOKE_AST:
1863                         arg->type = LDLM_BL_CALLBACK;
1864                         work_ast_lock = ldlm_work_revoke_ast_lock;
1865                         break;
1866                 case LDLM_WORK_GL_AST:
1867                         arg->type = LDLM_GL_CALLBACK;
1868                         work_ast_lock = ldlm_work_gl_ast_lock;
1869                         break;
1870                 default:
1871                         LBUG();
1872         }
1873
1874         /* We create a ptlrpc request set with flow control extension.
1875          * This request set will use the work_ast_lock function to produce new
1876          * requests and will send a new request each time one completes in order
1877          * to keep the number of requests in flight to ns_max_parallel_ast */
1878         arg->set = ptlrpc_prep_fcset(ns->ns_max_parallel_ast ? : UINT_MAX,
1879                                      work_ast_lock, arg);
1880         if (arg->set == NULL)
1881                 GOTO(out, rc = -ENOMEM);
1882
1883         ptlrpc_set_wait(arg->set);
1884         ptlrpc_set_destroy(arg->set);
1885
1886         rc = atomic_read(&arg->restart) ? -ERESTART : 0;
1887         GOTO(out, rc);
1888 out:
1889         OBD_FREE_PTR(arg);
1890         return rc;
1891 }
1892
1893 static int reprocess_one_queue(struct ldlm_resource *res, void *closure)
1894 {
1895         ldlm_reprocess_all(res);
1896         return LDLM_ITER_CONTINUE;
1897 }
1898
1899 static int ldlm_reprocess_res(struct cfs_hash *hs, struct cfs_hash_bd *bd,
1900                               struct hlist_node *hnode, void *arg)
1901 {
1902         struct ldlm_resource *res = cfs_hash_object(hs, hnode);
1903         int    rc;
1904
1905         rc = reprocess_one_queue(res, arg);
1906
1907         return rc == LDLM_ITER_STOP;
1908 }
1909
1910 /**
1911  * Iterate through all resources on a namespace attempting to grant waiting
1912  * locks.
1913  */
1914 void ldlm_reprocess_all_ns(struct ldlm_namespace *ns)
1915 {
1916         if (ns != NULL) {
1917                 cfs_hash_for_each_nolock(ns->ns_rs_hash,
1918                                          ldlm_reprocess_res, NULL);
1919         }
1920 }
1921 EXPORT_SYMBOL(ldlm_reprocess_all_ns);
1922
1923 /**
1924  * Try to grant all waiting locks on a resource.
1925  *
1926  * Calls ldlm_reprocess_queue on converting and waiting queues.
1927  *
1928  * Typically called after some resource locks are cancelled to see
1929  * if anything could be granted as a result of the cancellation.
1930  */
1931 void ldlm_reprocess_all(struct ldlm_resource *res)
1932 {
1933         LIST_HEAD(rpc_list);
1934
1935         if (!ns_is_client(ldlm_res_to_ns(res))) {
1936                 CERROR("This is client-side-only module, cannot handle "
1937                        "LDLM_NAMESPACE_SERVER resource type lock.\n");
1938                 LBUG();
1939         }
1940 }
1941
1942 /**
1943  * Helper function to call blocking AST for LDLM lock \a lock in a
1944  * "cancelling" mode.
1945  */
1946 void ldlm_cancel_callback(struct ldlm_lock *lock)
1947 {
1948         check_res_locked(lock->l_resource);
1949         if (!(lock->l_flags & LDLM_FL_CANCEL)) {
1950                 lock->l_flags |= LDLM_FL_CANCEL;
1951                 if (lock->l_blocking_ast) {
1952                         unlock_res_and_lock(lock);
1953                         lock->l_blocking_ast(lock, NULL, lock->l_ast_data,
1954                                              LDLM_CB_CANCELING);
1955                         lock_res_and_lock(lock);
1956                 } else {
1957                         LDLM_DEBUG(lock, "no blocking ast");
1958                 }
1959         }
1960         lock->l_flags |= LDLM_FL_BL_DONE;
1961 }
1962
1963 /**
1964  * Remove skiplist-enabled LDLM lock \a req from granted list
1965  */
1966 void ldlm_unlink_lock_skiplist(struct ldlm_lock *req)
1967 {
1968         if (req->l_resource->lr_type != LDLM_PLAIN &&
1969             req->l_resource->lr_type != LDLM_IBITS)
1970                 return;
1971
1972         list_del_init(&req->l_sl_policy);
1973         list_del_init(&req->l_sl_mode);
1974 }
1975
1976 /**
1977  * Attempts to cancel LDLM lock \a lock that has no reader/writer references.
1978  */
1979 void ldlm_lock_cancel(struct ldlm_lock *lock)
1980 {
1981         struct ldlm_resource *res;
1982         struct ldlm_namespace *ns;
1983
1984         lock_res_and_lock(lock);
1985
1986         res = lock->l_resource;
1987         ns  = ldlm_res_to_ns(res);
1988
1989         /* Please do not, no matter how tempting, remove this LBUG without
1990          * talking to me first. -phik */
1991         if (lock->l_readers || lock->l_writers) {
1992                 LDLM_ERROR(lock, "lock still has references");
1993                 LBUG();
1994         }
1995
1996         if (lock->l_flags & LDLM_FL_WAITED)
1997                 ldlm_del_waiting_lock(lock);
1998
1999         /* Releases cancel callback. */
2000         ldlm_cancel_callback(lock);
2001
2002         /* Yes, second time, just in case it was added again while we were
2003          * running with no res lock in ldlm_cancel_callback */
2004         if (lock->l_flags & LDLM_FL_WAITED)
2005                 ldlm_del_waiting_lock(lock);
2006
2007         ldlm_resource_unlink_lock(lock);
2008         ldlm_lock_destroy_nolock(lock);
2009
2010         if (lock->l_granted_mode == lock->l_req_mode)
2011                 ldlm_pool_del(&ns->ns_pool, lock);
2012
2013         /* Make sure we will not be called again for same lock what is possible
2014          * if not to zero out lock->l_granted_mode */
2015         lock->l_granted_mode = LCK_MINMODE;
2016         unlock_res_and_lock(lock);
2017 }
2018 EXPORT_SYMBOL(ldlm_lock_cancel);
2019
2020 /**
2021  * Set opaque data into the lock that only makes sense to upper layer.
2022  */
2023 int ldlm_lock_set_data(struct lustre_handle *lockh, void *data)
2024 {
2025         struct ldlm_lock *lock = ldlm_handle2lock(lockh);
2026         int rc = -EINVAL;
2027
2028         if (lock) {
2029                 if (lock->l_ast_data == NULL)
2030                         lock->l_ast_data = data;
2031                 if (lock->l_ast_data == data)
2032                         rc = 0;
2033                 LDLM_LOCK_PUT(lock);
2034         }
2035         return rc;
2036 }
2037 EXPORT_SYMBOL(ldlm_lock_set_data);
2038
2039 struct export_cl_data {
2040         struct obd_export       *ecl_exp;
2041         int                     ecl_loop;
2042 };
2043
2044 /**
2045  * Iterator function for ldlm_cancel_locks_for_export.
2046  * Cancels passed locks.
2047  */
2048 int ldlm_cancel_locks_for_export_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd,
2049                                     struct hlist_node *hnode, void *data)
2050
2051 {
2052         struct export_cl_data   *ecl = (struct export_cl_data *)data;
2053         struct obd_export       *exp  = ecl->ecl_exp;
2054         struct ldlm_lock     *lock = cfs_hash_object(hs, hnode);
2055         struct ldlm_resource *res;
2056
2057         res = ldlm_resource_getref(lock->l_resource);
2058         LDLM_LOCK_GET(lock);
2059
2060         LDLM_DEBUG(lock, "export %p", exp);
2061         ldlm_res_lvbo_update(res, NULL, 1);
2062         ldlm_lock_cancel(lock);
2063         ldlm_reprocess_all(res);
2064         ldlm_resource_putref(res);
2065         LDLM_LOCK_RELEASE(lock);
2066
2067         ecl->ecl_loop++;
2068         if ((ecl->ecl_loop & -ecl->ecl_loop) == ecl->ecl_loop) {
2069                 CDEBUG(D_INFO,
2070                        "Cancel lock %p for export %p (loop %d), still have "
2071                        "%d locks left on hash table.\n",
2072                        lock, exp, ecl->ecl_loop,
2073                        atomic_read(&hs->hs_count));
2074         }
2075
2076         return 0;
2077 }
2078
2079 /**
2080  * Cancel all locks for given export.
2081  *
2082  * Typically called on client disconnection/eviction
2083  */
2084 void ldlm_cancel_locks_for_export(struct obd_export *exp)
2085 {
2086         struct export_cl_data   ecl = {
2087                 .ecl_exp        = exp,
2088                 .ecl_loop       = 0,
2089         };
2090
2091         cfs_hash_for_each_empty(exp->exp_lock_hash,
2092                                 ldlm_cancel_locks_for_export_cb, &ecl);
2093 }
2094
2095 /**
2096  * Downgrade an exclusive lock.
2097  *
2098  * A fast variant of ldlm_lock_convert for convertion of exclusive
2099  * locks. The convertion is always successful.
2100  * Used by Commit on Sharing (COS) code.
2101  *
2102  * \param lock A lock to convert
2103  * \param new_mode new lock mode
2104  */
2105 void ldlm_lock_downgrade(struct ldlm_lock *lock, int new_mode)
2106 {
2107         LASSERT(lock->l_granted_mode & (LCK_PW | LCK_EX));
2108         LASSERT(new_mode == LCK_COS);
2109
2110         lock_res_and_lock(lock);
2111         ldlm_resource_unlink_lock(lock);
2112         /*
2113          * Remove the lock from pool as it will be added again in
2114          * ldlm_grant_lock() called below.
2115          */
2116         ldlm_pool_del(&ldlm_lock_to_ns(lock)->ns_pool, lock);
2117
2118         lock->l_req_mode = new_mode;
2119         ldlm_grant_lock(lock, NULL);
2120         unlock_res_and_lock(lock);
2121         ldlm_reprocess_all(lock->l_resource);
2122 }
2123 EXPORT_SYMBOL(ldlm_lock_downgrade);
2124
2125 /**
2126  * Attempt to convert already granted lock to a different mode.
2127  *
2128  * While lock conversion is not currently used, future client-side
2129  * optimizations could take advantage of it to avoid discarding cached
2130  * pages on a file.
2131  */
2132 struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
2133                                         __u32 *flags)
2134 {
2135         LIST_HEAD(rpc_list);
2136         struct ldlm_resource *res;
2137         struct ldlm_namespace *ns;
2138         int granted = 0;
2139         struct ldlm_interval *node;
2140
2141         /* Just return if mode is unchanged. */
2142         if (new_mode == lock->l_granted_mode) {
2143                 *flags |= LDLM_FL_BLOCK_GRANTED;
2144                 return lock->l_resource;
2145         }
2146
2147         /* I can't check the type of lock here because the bitlock of lock
2148          * is not held here, so do the allocation blindly. -jay */
2149         OBD_SLAB_ALLOC_PTR_GFP(node, ldlm_interval_slab, __GFP_IO);
2150         if (node == NULL)
2151                 /* Actually, this causes EDEADLOCK to be returned */
2152                 return NULL;
2153
2154         LASSERTF((new_mode == LCK_PW && lock->l_granted_mode == LCK_PR),
2155                  "new_mode %u, granted %u\n", new_mode, lock->l_granted_mode);
2156
2157         lock_res_and_lock(lock);
2158
2159         res = lock->l_resource;
2160         ns  = ldlm_res_to_ns(res);
2161
2162         lock->l_req_mode = new_mode;
2163         if (res->lr_type == LDLM_PLAIN || res->lr_type == LDLM_IBITS) {
2164                 ldlm_resource_unlink_lock(lock);
2165         } else {
2166                 ldlm_resource_unlink_lock(lock);
2167                 if (res->lr_type == LDLM_EXTENT) {
2168                         /* FIXME: ugly code, I have to attach the lock to a
2169                          * interval node again since perhaps it will be granted
2170                          * soon */
2171                         INIT_LIST_HEAD(&node->li_group);
2172                         ldlm_interval_attach(node, lock);
2173                         node = NULL;
2174                 }
2175         }
2176
2177         /*
2178          * Remove old lock from the pool before adding the lock with new
2179          * mode below in ->policy()
2180          */
2181         ldlm_pool_del(&ns->ns_pool, lock);
2182
2183         /* If this is a local resource, put it on the appropriate list. */
2184         if (ns_is_client(ldlm_res_to_ns(res))) {
2185                 if (*flags & (LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_GRANTED)) {
2186                         ldlm_resource_add_lock(res, &res->lr_converting, lock);
2187                 } else {
2188                         /* This should never happen, because of the way the
2189                          * server handles conversions. */
2190                         LDLM_ERROR(lock, "Erroneous flags %x on local lock\n",
2191                                    *flags);
2192                         LBUG();
2193
2194                         ldlm_grant_lock(lock, &rpc_list);
2195                         granted = 1;
2196                         /* FIXME: completion handling not with lr_lock held ! */
2197                         if (lock->l_completion_ast)
2198                                 lock->l_completion_ast(lock, 0, NULL);
2199                 }
2200         } else {
2201                 CERROR("This is client-side-only module, cannot handle "
2202                        "LDLM_NAMESPACE_SERVER resource type lock.\n");
2203                 LBUG();
2204         }
2205         unlock_res_and_lock(lock);
2206
2207         if (granted)
2208                 ldlm_run_ast_work(ns, &rpc_list, LDLM_WORK_CP_AST);
2209         if (node)
2210                 OBD_SLAB_FREE(node, ldlm_interval_slab, sizeof(*node));
2211         return res;
2212 }
2213 EXPORT_SYMBOL(ldlm_lock_convert);
2214
2215 /**
2216  * Print lock with lock handle \a lockh description into debug log.
2217  *
2218  * Used when printing all locks on a resource for debug purposes.
2219  */
2220 void ldlm_lock_dump_handle(int level, struct lustre_handle *lockh)
2221 {
2222         struct ldlm_lock *lock;
2223
2224         if (!((libcfs_debug | D_ERROR) & level))
2225                 return;
2226
2227         lock = ldlm_handle2lock(lockh);
2228         if (lock == NULL)
2229                 return;
2230
2231         LDLM_DEBUG_LIMIT(level, lock, "###");
2232
2233         LDLM_LOCK_PUT(lock);
2234 }
2235 EXPORT_SYMBOL(ldlm_lock_dump_handle);
2236
2237 /**
2238  * Print lock information with custom message into debug log.
2239  * Helper function.
2240  */
2241 void _ldlm_lock_debug(struct ldlm_lock *lock,
2242                       struct libcfs_debug_msg_data *msgdata,
2243                       const char *fmt, ...)
2244 {
2245         va_list args;
2246         struct obd_export *exp = lock->l_export;
2247         struct ldlm_resource *resource = lock->l_resource;
2248         char *nid = "local";
2249
2250         va_start(args, fmt);
2251
2252         if (exp && exp->exp_connection) {
2253                 nid = libcfs_nid2str(exp->exp_connection->c_peer.nid);
2254         } else if (exp && exp->exp_obd != NULL) {
2255                 struct obd_import *imp = exp->exp_obd->u.cli.cl_import;
2256                 nid = libcfs_nid2str(imp->imp_connection->c_peer.nid);
2257         }
2258
2259         if (resource == NULL) {
2260                 libcfs_debug_vmsg2(msgdata, fmt, args,
2261                        " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
2262                        "res: \?\? rrc=\?\? type: \?\?\? flags: "LPX64" nid: %s "
2263                        "remote: "LPX64" expref: %d pid: %u timeout: %lu "
2264                        "lvb_type: %d\n",
2265                        lock,
2266                        lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
2267                        lock->l_readers, lock->l_writers,
2268                        ldlm_lockname[lock->l_granted_mode],
2269                        ldlm_lockname[lock->l_req_mode],
2270                        lock->l_flags, nid, lock->l_remote_handle.cookie,
2271                        exp ? atomic_read(&exp->exp_refcount) : -99,
2272                        lock->l_pid, lock->l_callback_timeout, lock->l_lvb_type);
2273                 va_end(args);
2274                 return;
2275         }
2276
2277         switch (resource->lr_type) {
2278         case LDLM_EXTENT:
2279                 libcfs_debug_vmsg2(msgdata, fmt, args,
2280                         " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
2281                         "res: "DLDLMRES" rrc: %d type: %s ["LPU64"->"LPU64"] "
2282                         "(req "LPU64"->"LPU64") flags: "LPX64" nid: %s remote: "
2283                         LPX64" expref: %d pid: %u timeout: %lu lvb_type: %d\n",
2284                         ldlm_lock_to_ns_name(lock), lock,
2285                         lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
2286                         lock->l_readers, lock->l_writers,
2287                         ldlm_lockname[lock->l_granted_mode],
2288                         ldlm_lockname[lock->l_req_mode],
2289                         PLDLMRES(resource),
2290                         atomic_read(&resource->lr_refcount),
2291                         ldlm_typename[resource->lr_type],
2292                         lock->l_policy_data.l_extent.start,
2293                         lock->l_policy_data.l_extent.end,
2294                         lock->l_req_extent.start, lock->l_req_extent.end,
2295                         lock->l_flags, nid, lock->l_remote_handle.cookie,
2296                         exp ? atomic_read(&exp->exp_refcount) : -99,
2297                         lock->l_pid, lock->l_callback_timeout,
2298                         lock->l_lvb_type);
2299                 break;
2300
2301         case LDLM_FLOCK:
2302                 libcfs_debug_vmsg2(msgdata, fmt, args,
2303                         " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
2304                         "res: "DLDLMRES" rrc: %d type: %s pid: %d "
2305                         "["LPU64"->"LPU64"] flags: "LPX64" nid: %s "
2306                         "remote: "LPX64" expref: %d pid: %u timeout: %lu\n",
2307                         ldlm_lock_to_ns_name(lock), lock,
2308                         lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
2309                         lock->l_readers, lock->l_writers,
2310                         ldlm_lockname[lock->l_granted_mode],
2311                         ldlm_lockname[lock->l_req_mode],
2312                         PLDLMRES(resource),
2313                         atomic_read(&resource->lr_refcount),
2314                         ldlm_typename[resource->lr_type],
2315                         lock->l_policy_data.l_flock.pid,
2316                         lock->l_policy_data.l_flock.start,
2317                         lock->l_policy_data.l_flock.end,
2318                         lock->l_flags, nid, lock->l_remote_handle.cookie,
2319                         exp ? atomic_read(&exp->exp_refcount) : -99,
2320                         lock->l_pid, lock->l_callback_timeout);
2321                 break;
2322
2323         case LDLM_IBITS:
2324                 libcfs_debug_vmsg2(msgdata, fmt, args,
2325                         " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
2326                         "res: "DLDLMRES" bits "LPX64" rrc: %d type: %s "
2327                         "flags: "LPX64" nid: %s remote: "LPX64" expref: %d "
2328                         "pid: %u timeout: %lu lvb_type: %d\n",
2329                         ldlm_lock_to_ns_name(lock),
2330                         lock, lock->l_handle.h_cookie,
2331                         atomic_read(&lock->l_refc),
2332                         lock->l_readers, lock->l_writers,
2333                         ldlm_lockname[lock->l_granted_mode],
2334                         ldlm_lockname[lock->l_req_mode],
2335                         PLDLMRES(resource),
2336                         lock->l_policy_data.l_inodebits.bits,
2337                         atomic_read(&resource->lr_refcount),
2338                         ldlm_typename[resource->lr_type],
2339                         lock->l_flags, nid, lock->l_remote_handle.cookie,
2340                         exp ? atomic_read(&exp->exp_refcount) : -99,
2341                         lock->l_pid, lock->l_callback_timeout,
2342                         lock->l_lvb_type);
2343                 break;
2344
2345         default:
2346                 libcfs_debug_vmsg2(msgdata, fmt, args,
2347                         " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
2348                         "res: "DLDLMRES" rrc: %d type: %s flags: "LPX64" "
2349                         "nid: %s remote: "LPX64" expref: %d pid: %u "
2350                         "timeout: %lu lvb_type: %d\n",
2351                         ldlm_lock_to_ns_name(lock),
2352                         lock, lock->l_handle.h_cookie,
2353                         atomic_read(&lock->l_refc),
2354                         lock->l_readers, lock->l_writers,
2355                         ldlm_lockname[lock->l_granted_mode],
2356                         ldlm_lockname[lock->l_req_mode],
2357                         PLDLMRES(resource),
2358                         atomic_read(&resource->lr_refcount),
2359                         ldlm_typename[resource->lr_type],
2360                         lock->l_flags, nid, lock->l_remote_handle.cookie,
2361                         exp ? atomic_read(&exp->exp_refcount) : -99,
2362                         lock->l_pid, lock->l_callback_timeout,
2363                         lock->l_lvb_type);
2364                 break;
2365         }
2366         va_end(args);
2367 }
2368 EXPORT_SYMBOL(_ldlm_lock_debug);