1 // SPDX-License-Identifier: GPL-2.0-only
2 /******************************************************************************
3 *******************************************************************************
5 ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
6 ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
9 *******************************************************************************
10 ******************************************************************************/
12 #include <linux/module.h>
14 #include "dlm_internal.h"
15 #include "lockspace.h"
25 #include "requestqueue.h"
30 static struct mutex ls_lock;
31 static struct list_head lslist;
32 static spinlock_t lslist_lock;
33 static struct task_struct * scand_task;
36 static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len)
40 int rc = kstrtoint(buf, 0, &n);
44 ls = dlm_find_lockspace_local(ls->ls_local_handle);
58 dlm_put_lockspace(ls);
62 static ssize_t dlm_event_store(struct dlm_ls *ls, const char *buf, size_t len)
64 int rc = kstrtoint(buf, 0, &ls->ls_uevent_result);
68 set_bit(LSFL_UEVENT_WAIT, &ls->ls_flags);
69 wake_up(&ls->ls_uevent_wait);
73 static ssize_t dlm_id_show(struct dlm_ls *ls, char *buf)
75 return snprintf(buf, PAGE_SIZE, "%u\n", ls->ls_global_id);
78 static ssize_t dlm_id_store(struct dlm_ls *ls, const char *buf, size_t len)
80 int rc = kstrtouint(buf, 0, &ls->ls_global_id);
87 static ssize_t dlm_nodir_show(struct dlm_ls *ls, char *buf)
89 return snprintf(buf, PAGE_SIZE, "%u\n", dlm_no_directory(ls));
92 static ssize_t dlm_nodir_store(struct dlm_ls *ls, const char *buf, size_t len)
95 int rc = kstrtoint(buf, 0, &val);
100 set_bit(LSFL_NODIR, &ls->ls_flags);
104 static ssize_t dlm_recover_status_show(struct dlm_ls *ls, char *buf)
106 uint32_t status = dlm_recover_status(ls);
107 return snprintf(buf, PAGE_SIZE, "%x\n", status);
110 static ssize_t dlm_recover_nodeid_show(struct dlm_ls *ls, char *buf)
112 return snprintf(buf, PAGE_SIZE, "%d\n", ls->ls_recover_nodeid);
116 struct attribute attr;
117 ssize_t (*show)(struct dlm_ls *, char *);
118 ssize_t (*store)(struct dlm_ls *, const char *, size_t);
121 static struct dlm_attr dlm_attr_control = {
122 .attr = {.name = "control", .mode = S_IWUSR},
123 .store = dlm_control_store
126 static struct dlm_attr dlm_attr_event = {
127 .attr = {.name = "event_done", .mode = S_IWUSR},
128 .store = dlm_event_store
131 static struct dlm_attr dlm_attr_id = {
132 .attr = {.name = "id", .mode = S_IRUGO | S_IWUSR},
134 .store = dlm_id_store
137 static struct dlm_attr dlm_attr_nodir = {
138 .attr = {.name = "nodir", .mode = S_IRUGO | S_IWUSR},
139 .show = dlm_nodir_show,
140 .store = dlm_nodir_store
143 static struct dlm_attr dlm_attr_recover_status = {
144 .attr = {.name = "recover_status", .mode = S_IRUGO},
145 .show = dlm_recover_status_show
148 static struct dlm_attr dlm_attr_recover_nodeid = {
149 .attr = {.name = "recover_nodeid", .mode = S_IRUGO},
150 .show = dlm_recover_nodeid_show
153 static struct attribute *dlm_attrs[] = {
154 &dlm_attr_control.attr,
155 &dlm_attr_event.attr,
157 &dlm_attr_nodir.attr,
158 &dlm_attr_recover_status.attr,
159 &dlm_attr_recover_nodeid.attr,
162 ATTRIBUTE_GROUPS(dlm);
164 static ssize_t dlm_attr_show(struct kobject *kobj, struct attribute *attr,
167 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
168 struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
169 return a->show ? a->show(ls, buf) : 0;
172 static ssize_t dlm_attr_store(struct kobject *kobj, struct attribute *attr,
173 const char *buf, size_t len)
175 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
176 struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
177 return a->store ? a->store(ls, buf, len) : len;
180 static void lockspace_kobj_release(struct kobject *k)
182 struct dlm_ls *ls = container_of(k, struct dlm_ls, ls_kobj);
186 static const struct sysfs_ops dlm_attr_ops = {
187 .show = dlm_attr_show,
188 .store = dlm_attr_store,
191 static struct kobj_type dlm_ktype = {
192 .default_groups = dlm_groups,
193 .sysfs_ops = &dlm_attr_ops,
194 .release = lockspace_kobj_release,
197 static struct kset *dlm_kset;
199 static int do_uevent(struct dlm_ls *ls, int in)
202 kobject_uevent(&ls->ls_kobj, KOBJ_ONLINE);
204 kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
206 log_rinfo(ls, "%s the lockspace group...", in ? "joining" : "leaving");
208 /* dlm_controld will see the uevent, do the necessary group management
209 and then write to sysfs to wake us */
211 wait_event(ls->ls_uevent_wait,
212 test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
214 log_rinfo(ls, "group event done %d", ls->ls_uevent_result);
216 return ls->ls_uevent_result;
219 static int dlm_uevent(struct kset *kset, struct kobject *kobj,
220 struct kobj_uevent_env *env)
222 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
224 add_uevent_var(env, "LOCKSPACE=%s", ls->ls_name);
228 static const struct kset_uevent_ops dlm_uevent_ops = {
229 .uevent = dlm_uevent,
232 int __init dlm_lockspace_init(void)
235 mutex_init(&ls_lock);
236 INIT_LIST_HEAD(&lslist);
237 spin_lock_init(&lslist_lock);
239 dlm_kset = kset_create_and_add("dlm", &dlm_uevent_ops, kernel_kobj);
241 printk(KERN_WARNING "%s: can not create kset\n", __func__);
247 void dlm_lockspace_exit(void)
249 kset_unregister(dlm_kset);
252 static struct dlm_ls *find_ls_to_scan(void)
256 spin_lock(&lslist_lock);
257 list_for_each_entry(ls, &lslist, ls_list) {
258 if (time_after_eq(jiffies, ls->ls_scan_time +
259 dlm_config.ci_scan_secs * HZ)) {
260 spin_unlock(&lslist_lock);
264 spin_unlock(&lslist_lock);
268 static int dlm_scand(void *data)
272 while (!kthread_should_stop()) {
273 ls = find_ls_to_scan();
275 if (dlm_lock_recovery_try(ls)) {
276 ls->ls_scan_time = jiffies;
278 dlm_scan_timeout(ls);
279 dlm_scan_waiters(ls);
280 dlm_unlock_recovery(ls);
282 ls->ls_scan_time += HZ;
286 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
291 static int dlm_scand_start(void)
293 struct task_struct *p;
296 p = kthread_run(dlm_scand, NULL, "dlm_scand");
304 static void dlm_scand_stop(void)
306 kthread_stop(scand_task);
309 struct dlm_ls *dlm_find_lockspace_global(uint32_t id)
313 spin_lock(&lslist_lock);
315 list_for_each_entry(ls, &lslist, ls_list) {
316 if (ls->ls_global_id == id) {
323 spin_unlock(&lslist_lock);
327 struct dlm_ls *dlm_find_lockspace_local(dlm_lockspace_t *lockspace)
331 spin_lock(&lslist_lock);
332 list_for_each_entry(ls, &lslist, ls_list) {
333 if (ls->ls_local_handle == lockspace) {
340 spin_unlock(&lslist_lock);
344 struct dlm_ls *dlm_find_lockspace_device(int minor)
348 spin_lock(&lslist_lock);
349 list_for_each_entry(ls, &lslist, ls_list) {
350 if (ls->ls_device.minor == minor) {
357 spin_unlock(&lslist_lock);
361 void dlm_put_lockspace(struct dlm_ls *ls)
363 spin_lock(&lslist_lock);
365 spin_unlock(&lslist_lock);
368 static void remove_lockspace(struct dlm_ls *ls)
371 spin_lock(&lslist_lock);
372 if (ls->ls_count == 0) {
373 WARN_ON(ls->ls_create_count != 0);
374 list_del(&ls->ls_list);
375 spin_unlock(&lslist_lock);
378 spin_unlock(&lslist_lock);
383 static int threads_start(void)
387 error = dlm_scand_start();
389 log_print("cannot start dlm_scand thread %d", error);
393 /* Thread for sending/receiving messages for all lockspace's */
394 error = dlm_midcomms_start();
396 log_print("cannot start dlm lowcomms %d", error);
408 static int new_lockspace(const char *name, const char *cluster,
409 uint32_t flags, int lvblen,
410 const struct dlm_lockspace_ops *ops, void *ops_arg,
411 int *ops_result, dlm_lockspace_t **lockspace)
416 int namelen = strlen(name);
418 if (namelen > DLM_LOCKSPACE_LEN || namelen == 0)
421 if (!lvblen || (lvblen % 8))
424 if (!try_module_get(THIS_MODULE))
427 if (!dlm_user_daemon_available()) {
428 log_print("dlm user daemon not available");
433 if (ops && ops_result) {
434 if (!dlm_config.ci_recover_callbacks)
435 *ops_result = -EOPNOTSUPP;
441 log_print("dlm cluster name '%s' is being used without an application provided cluster name",
442 dlm_config.ci_cluster_name);
444 if (dlm_config.ci_recover_callbacks && cluster &&
445 strncmp(cluster, dlm_config.ci_cluster_name, DLM_LOCKSPACE_LEN)) {
446 log_print("dlm cluster name '%s' does not match "
447 "the application cluster name '%s'",
448 dlm_config.ci_cluster_name, cluster);
455 spin_lock(&lslist_lock);
456 list_for_each_entry(ls, &lslist, ls_list) {
457 WARN_ON(ls->ls_create_count <= 0);
458 if (ls->ls_namelen != namelen)
460 if (memcmp(ls->ls_name, name, namelen))
462 if (flags & DLM_LSFL_NEWEXCL) {
466 ls->ls_create_count++;
471 spin_unlock(&lslist_lock);
478 ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_NOFS);
481 memcpy(ls->ls_name, name, namelen);
482 ls->ls_namelen = namelen;
483 ls->ls_lvblen = lvblen;
486 ls->ls_scan_time = jiffies;
488 if (ops && dlm_config.ci_recover_callbacks) {
490 ls->ls_ops_arg = ops_arg;
493 if (flags & DLM_LSFL_TIMEWARN)
494 set_bit(LSFL_TIMEWARN, &ls->ls_flags);
496 /* ls_exflags are forced to match among nodes, and we don't
497 need to require all nodes to have some flags set */
498 ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS |
501 size = dlm_config.ci_rsbtbl_size;
502 ls->ls_rsbtbl_size = size;
504 ls->ls_rsbtbl = vmalloc(array_size(size, sizeof(struct dlm_rsbtable)));
507 for (i = 0; i < size; i++) {
508 ls->ls_rsbtbl[i].keep.rb_node = NULL;
509 ls->ls_rsbtbl[i].toss.rb_node = NULL;
510 spin_lock_init(&ls->ls_rsbtbl[i].lock);
513 spin_lock_init(&ls->ls_remove_spin);
515 for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) {
516 ls->ls_remove_names[i] = kzalloc(DLM_RESNAME_MAXLEN+1,
518 if (!ls->ls_remove_names[i])
522 idr_init(&ls->ls_lkbidr);
523 spin_lock_init(&ls->ls_lkbidr_spin);
525 INIT_LIST_HEAD(&ls->ls_waiters);
526 mutex_init(&ls->ls_waiters_mutex);
527 INIT_LIST_HEAD(&ls->ls_orphans);
528 mutex_init(&ls->ls_orphans_mutex);
529 INIT_LIST_HEAD(&ls->ls_timeout);
530 mutex_init(&ls->ls_timeout_mutex);
532 INIT_LIST_HEAD(&ls->ls_new_rsb);
533 spin_lock_init(&ls->ls_new_rsb_spin);
535 INIT_LIST_HEAD(&ls->ls_nodes);
536 INIT_LIST_HEAD(&ls->ls_nodes_gone);
537 ls->ls_num_nodes = 0;
538 ls->ls_low_nodeid = 0;
539 ls->ls_total_weight = 0;
540 ls->ls_node_array = NULL;
542 memset(&ls->ls_stub_rsb, 0, sizeof(struct dlm_rsb));
543 ls->ls_stub_rsb.res_ls = ls;
545 ls->ls_debug_rsb_dentry = NULL;
546 ls->ls_debug_waiters_dentry = NULL;
548 init_waitqueue_head(&ls->ls_uevent_wait);
549 ls->ls_uevent_result = 0;
550 init_completion(&ls->ls_members_done);
551 ls->ls_members_result = -1;
553 mutex_init(&ls->ls_cb_mutex);
554 INIT_LIST_HEAD(&ls->ls_cb_delay);
556 ls->ls_recoverd_task = NULL;
557 mutex_init(&ls->ls_recoverd_active);
558 spin_lock_init(&ls->ls_recover_lock);
559 spin_lock_init(&ls->ls_rcom_spin);
560 get_random_bytes(&ls->ls_rcom_seq, sizeof(uint64_t));
561 ls->ls_recover_status = 0;
562 ls->ls_recover_seq = 0;
563 ls->ls_recover_args = NULL;
564 init_rwsem(&ls->ls_in_recovery);
565 init_rwsem(&ls->ls_recv_active);
566 INIT_LIST_HEAD(&ls->ls_requestqueue);
567 mutex_init(&ls->ls_requestqueue_mutex);
568 mutex_init(&ls->ls_clear_proc_locks);
570 ls->ls_recover_buf = kmalloc(LOWCOMMS_MAX_TX_BUFFER_LEN, GFP_NOFS);
571 if (!ls->ls_recover_buf)
575 ls->ls_num_slots = 0;
576 ls->ls_slots_size = 0;
579 INIT_LIST_HEAD(&ls->ls_recover_list);
580 spin_lock_init(&ls->ls_recover_list_lock);
581 idr_init(&ls->ls_recover_idr);
582 spin_lock_init(&ls->ls_recover_idr_lock);
583 ls->ls_recover_list_count = 0;
584 ls->ls_local_handle = ls;
585 init_waitqueue_head(&ls->ls_wait_general);
586 INIT_LIST_HEAD(&ls->ls_root_list);
587 init_rwsem(&ls->ls_root_sem);
589 spin_lock(&lslist_lock);
590 ls->ls_create_count = 1;
591 list_add(&ls->ls_list, &lslist);
592 spin_unlock(&lslist_lock);
594 if (flags & DLM_LSFL_FS) {
595 error = dlm_callback_start(ls);
597 log_error(ls, "can't start dlm_callback %d", error);
602 init_waitqueue_head(&ls->ls_recover_lock_wait);
605 * Once started, dlm_recoverd first looks for ls in lslist, then
606 * initializes ls_in_recovery as locked in "down" mode. We need
607 * to wait for the wakeup from dlm_recoverd because in_recovery
608 * has to start out in down mode.
611 error = dlm_recoverd_start(ls);
613 log_error(ls, "can't start dlm_recoverd %d", error);
617 wait_event(ls->ls_recover_lock_wait,
618 test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
620 /* let kobject handle freeing of ls if there's an error */
623 ls->ls_kobj.kset = dlm_kset;
624 error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
628 kobject_uevent(&ls->ls_kobj, KOBJ_ADD);
630 /* This uevent triggers dlm_controld in userspace to add us to the
631 group of nodes that are members of this lockspace (managed by the
632 cluster infrastructure.) Once it's done that, it tells us who the
633 current lockspace members are (via configfs) and then tells the
634 lockspace to start running (via sysfs) in dlm_ls_start(). */
636 error = do_uevent(ls, 1);
640 wait_for_completion(&ls->ls_members_done);
641 error = ls->ls_members_result;
645 dlm_create_debug_file(ls);
647 log_rinfo(ls, "join complete");
653 dlm_clear_members(ls);
654 kfree(ls->ls_node_array);
656 dlm_recoverd_stop(ls);
658 dlm_callback_stop(ls);
660 spin_lock(&lslist_lock);
661 list_del(&ls->ls_list);
662 spin_unlock(&lslist_lock);
663 idr_destroy(&ls->ls_recover_idr);
664 kfree(ls->ls_recover_buf);
666 idr_destroy(&ls->ls_lkbidr);
668 for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++)
669 kfree(ls->ls_remove_names[i]);
670 vfree(ls->ls_rsbtbl);
673 kobject_put(&ls->ls_kobj);
677 module_put(THIS_MODULE);
681 int dlm_new_lockspace(const char *name, const char *cluster,
682 uint32_t flags, int lvblen,
683 const struct dlm_lockspace_ops *ops, void *ops_arg,
684 int *ops_result, dlm_lockspace_t **lockspace)
688 mutex_lock(&ls_lock);
690 error = threads_start();
694 error = new_lockspace(name, cluster, flags, lvblen, ops, ops_arg,
695 ops_result, lockspace);
702 dlm_midcomms_shutdown();
706 mutex_unlock(&ls_lock);
710 static int lkb_idr_is_local(int id, void *p, void *data)
712 struct dlm_lkb *lkb = p;
714 return lkb->lkb_nodeid == 0 && lkb->lkb_grmode != DLM_LOCK_IV;
717 static int lkb_idr_is_any(int id, void *p, void *data)
722 static int lkb_idr_free(int id, void *p, void *data)
724 struct dlm_lkb *lkb = p;
726 if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
727 dlm_free_lvb(lkb->lkb_lvbptr);
733 /* NOTE: We check the lkbidr here rather than the resource table.
734 This is because there may be LKBs queued as ASTs that have been unlinked
735 from their RSBs and are pending deletion once the AST has been delivered */
737 static int lockspace_busy(struct dlm_ls *ls, int force)
741 spin_lock(&ls->ls_lkbidr_spin);
743 rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_any, ls);
744 } else if (force == 1) {
745 rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_local, ls);
749 spin_unlock(&ls->ls_lkbidr_spin);
753 static int release_lockspace(struct dlm_ls *ls, int force)
759 busy = lockspace_busy(ls, force);
761 spin_lock(&lslist_lock);
762 if (ls->ls_create_count == 1) {
766 /* remove_lockspace takes ls off lslist */
767 ls->ls_create_count = 0;
770 } else if (ls->ls_create_count > 1) {
771 rv = --ls->ls_create_count;
775 spin_unlock(&lslist_lock);
778 log_debug(ls, "release_lockspace no remove %d", rv);
782 dlm_device_deregister(ls);
784 if (force < 3 && dlm_user_daemon_available())
787 dlm_recoverd_stop(ls);
791 dlm_midcomms_shutdown();
794 dlm_callback_stop(ls);
796 remove_lockspace(ls);
798 dlm_delete_debug_file(ls);
800 idr_destroy(&ls->ls_recover_idr);
801 kfree(ls->ls_recover_buf);
804 * Free all lkb's in idr
807 idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls);
808 idr_destroy(&ls->ls_lkbidr);
811 * Free all rsb's on rsbtbl[] lists
814 for (i = 0; i < ls->ls_rsbtbl_size; i++) {
815 while ((n = rb_first(&ls->ls_rsbtbl[i].keep))) {
816 rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
817 rb_erase(n, &ls->ls_rsbtbl[i].keep);
821 while ((n = rb_first(&ls->ls_rsbtbl[i].toss))) {
822 rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
823 rb_erase(n, &ls->ls_rsbtbl[i].toss);
828 vfree(ls->ls_rsbtbl);
830 for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++)
831 kfree(ls->ls_remove_names[i]);
833 while (!list_empty(&ls->ls_new_rsb)) {
834 rsb = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb,
836 list_del(&rsb->res_hashchain);
841 * Free structures on any other lists
844 dlm_purge_requestqueue(ls);
845 kfree(ls->ls_recover_args);
846 dlm_clear_members(ls);
847 dlm_clear_members_gone(ls);
848 kfree(ls->ls_node_array);
849 log_rinfo(ls, "release_lockspace final free");
850 kobject_put(&ls->ls_kobj);
851 /* The ls structure will be freed when the kobject is done with */
853 module_put(THIS_MODULE);
858 * Called when a system has released all its locks and is not going to use the
859 * lockspace any longer. We free everything we're managing for this lockspace.
860 * Remaining nodes will go through the recovery process as if we'd died. The
861 * lockspace must continue to function as usual, participating in recoveries,
862 * until this returns.
864 * Force has 4 possible values:
865 * 0 - don't destroy locksapce if it has any LKBs
866 * 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs
867 * 2 - destroy lockspace regardless of LKBs
868 * 3 - destroy lockspace as part of a forced shutdown
871 int dlm_release_lockspace(void *lockspace, int force)
876 ls = dlm_find_lockspace_local(lockspace);
879 dlm_put_lockspace(ls);
881 mutex_lock(&ls_lock);
882 error = release_lockspace(ls, force);
887 mutex_unlock(&ls_lock);
892 void dlm_stop_lockspaces(void)
899 spin_lock(&lslist_lock);
900 list_for_each_entry(ls, &lslist, ls_list) {
901 if (!test_bit(LSFL_RUNNING, &ls->ls_flags)) {
905 spin_unlock(&lslist_lock);
906 log_error(ls, "no userland control daemon, stopping lockspace");
910 spin_unlock(&lslist_lock);
913 log_print("dlm user daemon left %d lockspaces", count);