staging: lustre: move libcfs to lnet layer

author James Simmons <jsimmons@infradead.org>

Tue, 8 Mar 2016 22:35:26 +0000 (17:35 -0500)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 11 Mar 2016 01:48:53 +0000 (17:48 -0800)
author James Simmons <jsimmons@infradead.org>
Tue, 8 Mar 2016 22:35:26 +0000 (17:35 -0500)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 11 Mar 2016 01:48:53 +0000 (17:48 -0800)
diff --git a/drivers/staging/lustre/lnet/Makefile b/drivers/staging/lustre/lnet/Makefile

index f6f03e3..0a380fe 100644 (file)
--- a/drivers/staging/lustre/lnet/Makefile
+++ b/drivers/staging/lustre/lnet/Makefile
@@ -1 +1 @@
-obj-$(CONFIG_LNET) += lnet/ klnds/ selftest/
+obj-$(CONFIG_LNET) += libcfs/ lnet/ klnds/ selftest/
diff --git a/drivers/staging/lustre/lnet/libcfs/Makefile b/drivers/staging/lustre/lnet/libcfs/Makefile

new file mode 100644 (file)

index 0000000..8c89455
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/Makefile
@@ -0,0 +1,17 @@
+obj-$(CONFIG_LNET) += libcfs.o
+
+libcfs-linux-objs := linux-tracefile.o linux-debug.o
+libcfs-linux-objs += linux-prim.o linux-cpu.o
+libcfs-linux-objs += linux-curproc.o
+libcfs-linux-objs += linux-module.o
+libcfs-linux-objs += linux-crypto.o
+libcfs-linux-objs += linux-crypto-adler.o
+libcfs-linux-objs += linux-mem.o
+
+libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs))
+
+libcfs-all-objs := debug.o fail.o module.o tracefile.o \
+                  libcfs_string.o hash.o prng.o workitem.o \
+                  libcfs_cpu.o libcfs_mem.o libcfs_lock.o
+
+libcfs-objs := $(libcfs-linux-objs) $(libcfs-all-objs)
diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c

new file mode 100644 (file)

index 0000000..c90e510
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/debug.c
@@ -0,0 +1,560 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/libcfs/debug.c
+ *
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ */
+
+# define DEBUG_SUBSYSTEM S_LNET
+
+#include "../../include/linux/libcfs/libcfs.h"
+#include "tracefile.h"
+
+static char debug_file_name[1024];
+
+unsigned int libcfs_subsystem_debug = ~0;
+EXPORT_SYMBOL(libcfs_subsystem_debug);
+module_param(libcfs_subsystem_debug, int, 0644);
+MODULE_PARM_DESC(libcfs_subsystem_debug, "Lustre kernel debug subsystem mask");
+
+unsigned int libcfs_debug = (D_CANTMASK |
+                            D_NETERROR | D_HA | D_CONFIG | D_IOCTL);
+EXPORT_SYMBOL(libcfs_debug);
+module_param(libcfs_debug, int, 0644);
+MODULE_PARM_DESC(libcfs_debug, "Lustre kernel debug mask");
+
+static int libcfs_param_debug_mb_set(const char *val,
+                                    const struct kernel_param *kp)
+{
+       int rc;
+       unsigned num;
+
+       rc = kstrtouint(val, 0, &num);
+       if (rc < 0)
+               return rc;
+
+       if (!*((unsigned int *)kp->arg)) {
+               *((unsigned int *)kp->arg) = num;
+               return 0;
+       }
+
+       rc = cfs_trace_set_debug_mb(num);
+
+       if (!rc)
+               *((unsigned int *)kp->arg) = cfs_trace_get_debug_mb();
+
+       return rc;
+}
+
+/* While debug_mb setting look like unsigned int, in fact
+ * it needs quite a bunch of extra processing, so we define special
+ * debugmb parameter type with corresponding methods to handle this case
+ */
+static struct kernel_param_ops param_ops_debugmb = {
+       .set = libcfs_param_debug_mb_set,
+       .get = param_get_uint,
+};
+
+#define param_check_debugmb(name, p) \
+               __param_check(name, p, unsigned int)
+
+static unsigned int libcfs_debug_mb;
+module_param(libcfs_debug_mb, debugmb, 0644);
+MODULE_PARM_DESC(libcfs_debug_mb, "Total debug buffer size.");
+
+unsigned int libcfs_printk = D_CANTMASK;
+module_param(libcfs_printk, uint, 0644);
+MODULE_PARM_DESC(libcfs_printk, "Lustre kernel debug console mask");
+
+unsigned int libcfs_console_ratelimit = 1;
+module_param(libcfs_console_ratelimit, uint, 0644);
+MODULE_PARM_DESC(libcfs_console_ratelimit, "Lustre kernel debug console ratelimit (0 to disable)");
+
+static int param_set_delay_minmax(const char *val,
+                                 const struct kernel_param *kp,
+                                 long min, long max)
+{
+       long d;
+       int sec;
+       int rc;
+
+       rc = kstrtoint(val, 0, &sec);
+       if (rc)
+               return -EINVAL;
+
+       d = cfs_time_seconds(sec) / 100;
+       if (d < min || d > max)
+               return -EINVAL;
+
+       *((unsigned int *)kp->arg) = d;
+
+       return 0;
+}
+
+static int param_get_delay(char *buffer, const struct kernel_param *kp)
+{
+       unsigned int d = *(unsigned int *)kp->arg;
+
+       return sprintf(buffer, "%u", (unsigned int)cfs_duration_sec(d * 100));
+}
+
+unsigned int libcfs_console_max_delay;
+unsigned int libcfs_console_min_delay;
+
+static int param_set_console_max_delay(const char *val,
+                                      const struct kernel_param *kp)
+{
+       return param_set_delay_minmax(val, kp,
+                                     libcfs_console_min_delay, INT_MAX);
+}
+
+static struct kernel_param_ops param_ops_console_max_delay = {
+       .set = param_set_console_max_delay,
+       .get = param_get_delay,
+};
+
+#define param_check_console_max_delay(name, p) \
+               __param_check(name, p, unsigned int)
+
+module_param(libcfs_console_max_delay, console_max_delay, 0644);
+MODULE_PARM_DESC(libcfs_console_max_delay, "Lustre kernel debug console max delay (jiffies)");
+
+static int param_set_console_min_delay(const char *val,
+                                      const struct kernel_param *kp)
+{
+       return param_set_delay_minmax(val, kp,
+                                     1, libcfs_console_max_delay);
+}
+
+static struct kernel_param_ops param_ops_console_min_delay = {
+       .set = param_set_console_min_delay,
+       .get = param_get_delay,
+};
+
+#define param_check_console_min_delay(name, p) \
+               __param_check(name, p, unsigned int)
+
+module_param(libcfs_console_min_delay, console_min_delay, 0644);
+MODULE_PARM_DESC(libcfs_console_min_delay, "Lustre kernel debug console min delay (jiffies)");
+
+static int param_set_uint_minmax(const char *val,
+                                const struct kernel_param *kp,
+                                unsigned int min, unsigned int max)
+{
+       unsigned int num;
+       int ret;
+
+       if (!val)
+               return -EINVAL;
+       ret = kstrtouint(val, 0, &num);
+       if (ret < 0 || num < min || num > max)
+               return -EINVAL;
+       *((unsigned int *)kp->arg) = num;
+       return 0;
+}
+
+static int param_set_uintpos(const char *val, const struct kernel_param *kp)
+{
+       return param_set_uint_minmax(val, kp, 1, -1);
+}
+
+static struct kernel_param_ops param_ops_uintpos = {
+       .set = param_set_uintpos,
+       .get = param_get_uint,
+};
+
+#define param_check_uintpos(name, p) \
+               __param_check(name, p, unsigned int)
+
+unsigned int libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF;
+module_param(libcfs_console_backoff, uintpos, 0644);
+MODULE_PARM_DESC(libcfs_console_backoff, "Lustre kernel debug console backoff factor");
+
+unsigned int libcfs_debug_binary = 1;
+
+unsigned int libcfs_stack = 3 * THREAD_SIZE / 4;
+EXPORT_SYMBOL(libcfs_stack);
+
+unsigned int libcfs_catastrophe;
+EXPORT_SYMBOL(libcfs_catastrophe);
+
+unsigned int libcfs_panic_on_lbug = 1;
+module_param(libcfs_panic_on_lbug, uint, 0644);
+MODULE_PARM_DESC(libcfs_panic_on_lbug, "Lustre kernel panic on LBUG");
+
+static wait_queue_head_t debug_ctlwq;
+
+char libcfs_debug_file_path_arr[PATH_MAX] = LIBCFS_DEBUG_FILE_PATH_DEFAULT;
+
+/* We need to pass a pointer here, but elsewhere this must be a const */
+static char *libcfs_debug_file_path;
+module_param(libcfs_debug_file_path, charp, 0644);
+MODULE_PARM_DESC(libcfs_debug_file_path,
+                "Path for dumping debug logs, set 'NONE' to prevent log dumping");
+
+int libcfs_panic_in_progress;
+
+/* libcfs_debug_token2mask() expects the returned string in lower-case */
+static const char *
+libcfs_debug_subsys2str(int subsys)
+{
+       switch (1 << subsys) {
+       default:
+               return NULL;
+       case S_UNDEFINED:
+               return "undefined";
+       case S_MDC:
+               return "mdc";
+       case S_MDS:
+               return "mds";
+       case S_OSC:
+               return "osc";
+       case S_OST:
+               return "ost";
+       case S_CLASS:
+               return "class";
+       case S_LOG:
+               return "log";
+       case S_LLITE:
+               return "llite";
+       case S_RPC:
+               return "rpc";
+       case S_LNET:
+               return "lnet";
+       case S_LND:
+               return "lnd";
+       case S_PINGER:
+               return "pinger";
+       case S_FILTER:
+               return "filter";
+       case S_ECHO:
+               return "echo";
+       case S_LDLM:
+               return "ldlm";
+       case S_LOV:
+               return "lov";
+       case S_LQUOTA:
+               return "lquota";
+       case S_OSD:
+               return "osd";
+       case S_LFSCK:
+               return "lfsck";
+       case S_LMV:
+               return "lmv";
+       case S_SEC:
+               return "sec";
+       case S_GSS:
+               return "gss";
+       case S_MGC:
+               return "mgc";
+       case S_MGS:
+               return "mgs";
+       case S_FID:
+               return "fid";
+       case S_FLD:
+               return "fld";
+       }
+}
+
+/* libcfs_debug_token2mask() expects the returned string in lower-case */
+static const char *
+libcfs_debug_dbg2str(int debug)
+{
+       switch (1 << debug) {
+       default:
+               return NULL;
+       case D_TRACE:
+               return "trace";
+       case D_INODE:
+               return "inode";
+       case D_SUPER:
+               return "super";
+       case D_EXT2:
+               return "ext2";
+       case D_MALLOC:
+               return "malloc";
+       case D_CACHE:
+               return "cache";
+       case D_INFO:
+               return "info";
+       case D_IOCTL:
+               return "ioctl";
+       case D_NETERROR:
+               return "neterror";
+       case D_NET:
+               return "net";
+       case D_WARNING:
+               return "warning";
+       case D_BUFFS:
+               return "buffs";
+       case D_OTHER:
+               return "other";
+       case D_DENTRY:
+               return "dentry";
+       case D_NETTRACE:
+               return "nettrace";
+       case D_PAGE:
+               return "page";
+       case D_DLMTRACE:
+               return "dlmtrace";
+       case D_ERROR:
+               return "error";
+       case D_EMERG:
+               return "emerg";
+       case D_HA:
+               return "ha";
+       case D_RPCTRACE:
+               return "rpctrace";
+       case D_VFSTRACE:
+               return "vfstrace";
+       case D_READA:
+               return "reada";
+       case D_MMAP:
+               return "mmap";
+       case D_CONFIG:
+               return "config";
+       case D_CONSOLE:
+               return "console";
+       case D_QUOTA:
+               return "quota";
+       case D_SEC:
+               return "sec";
+       case D_LFSCK:
+               return "lfsck";
+       }
+}
+
+int
+libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys)
+{
+       const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str :
+                                                libcfs_debug_dbg2str;
+       int        len = 0;
+       const char   *token;
+       int        i;
+
+       if (mask == 0) {                        /* "0" */
+               if (size > 0)
+                       str[0] = '0';
+               len = 1;
+       } else {                                /* space-separated tokens */
+               for (i = 0; i < 32; i++) {
+                       if ((mask & (1 << i)) == 0)
+                               continue;
+
+                       token = fn(i);
+                       if (!token)           /* unused bit */
+                               continue;
+
+                       if (len > 0) {            /* separator? */
+                               if (len < size)
+                                       str[len] = ' ';
+                               len++;
+                       }
+
+                       while (*token != 0) {
+                               if (len < size)
+                                       str[len] = *token;
+                               token++;
+                               len++;
+                       }
+               }
+       }
+
+       /* terminate 'str' */
+       if (len < size)
+               str[len] = 0;
+       else
+               str[size - 1] = 0;
+
+       return len;
+}
+
+int
+libcfs_debug_str2mask(int *mask, const char *str, int is_subsys)
+{
+       const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str :
+                                                libcfs_debug_dbg2str;
+       int      m = 0;
+       int      matched;
+       int      n;
+       int      t;
+
+       /* Allow a number for backwards compatibility */
+
+       for (n = strlen(str); n > 0; n--)
+               if (!isspace(str[n - 1]))
+                       break;
+       matched = n;
+       t = sscanf(str, "%i%n", &m, &matched);
+       if (t >= 1 && matched == n) {
+               /* don't print warning for lctl set_param debug=0 or -1 */
+               if (m != 0 && m != -1)
+                       CWARN("You are trying to use a numerical value for the mask - this will be deprecated in a future release.\n");
+               *mask = m;
+               return 0;
+       }
+
+       return cfs_str2mask(str, fn, mask, is_subsys ? 0 : D_CANTMASK,
+                           0xffffffff);
+}
+
+/**
+ * Dump Lustre log to ::debug_file_path by calling tracefile_dump_all_pages()
+ */
+void libcfs_debug_dumplog_internal(void *arg)
+{
+       void *journal_info;
+
+       journal_info = current->journal_info;
+       current->journal_info = NULL;
+
+       if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) != 0) {
+               snprintf(debug_file_name, sizeof(debug_file_name) - 1,
+                        "%s.%lld.%ld", libcfs_debug_file_path_arr,
+                        (s64)ktime_get_real_seconds(), (long_ptr_t)arg);
+               pr_alert("LustreError: dumping log to %s\n", debug_file_name);
+               cfs_tracefile_dump_all_pages(debug_file_name);
+               libcfs_run_debug_log_upcall(debug_file_name);
+       }
+
+       current->journal_info = journal_info;
+}
+
+static int libcfs_debug_dumplog_thread(void *arg)
+{
+       libcfs_debug_dumplog_internal(arg);
+       wake_up(&debug_ctlwq);
+       return 0;
+}
+
+void libcfs_debug_dumplog(void)
+{
+       wait_queue_t wait;
+       struct task_struct *dumper;
+
+       /* we're being careful to ensure that the kernel thread is
+        * able to set our state to running as it exits before we
+        * get to schedule()
+        */
+       init_waitqueue_entry(&wait, current);
+       set_current_state(TASK_INTERRUPTIBLE);
+       add_wait_queue(&debug_ctlwq, &wait);
+
+       dumper = kthread_run(libcfs_debug_dumplog_thread,
+                            (void *)(long)current_pid(),
+                            "libcfs_debug_dumper");
+       if (IS_ERR(dumper))
+               pr_err("LustreError: cannot start log dump thread: %ld\n",
+                      PTR_ERR(dumper));
+       else
+               schedule();
+
+       /* be sure to teardown if cfs_create_thread() failed */
+       remove_wait_queue(&debug_ctlwq, &wait);
+       set_current_state(TASK_RUNNING);
+}
+EXPORT_SYMBOL(libcfs_debug_dumplog);
+
+int libcfs_debug_init(unsigned long bufsize)
+{
+       int    rc = 0;
+       unsigned int max = libcfs_debug_mb;
+
+       init_waitqueue_head(&debug_ctlwq);
+
+       if (libcfs_console_max_delay <= 0 || /* not set by user or */
+           libcfs_console_min_delay <= 0 || /* set to invalid values */
+           libcfs_console_min_delay >= libcfs_console_max_delay) {
+               libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY;
+               libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY;
+       }
+
+       if (libcfs_debug_file_path) {
+               strlcpy(libcfs_debug_file_path_arr,
+                       libcfs_debug_file_path,
+                       sizeof(libcfs_debug_file_path_arr));
+       }
+
+       /* If libcfs_debug_mb is set to an invalid value or uninitialized
+        * then just make the total buffers smp_num_cpus * TCD_MAX_PAGES
+        */
+       if (max > cfs_trace_max_debug_mb() || max < num_possible_cpus()) {
+               max = TCD_MAX_PAGES;
+       } else {
+               max = max / num_possible_cpus();
+               max <<= (20 - PAGE_CACHE_SHIFT);
+       }
+       rc = cfs_tracefile_init(max);
+
+       if (rc == 0) {
+               libcfs_register_panic_notifier();
+               libcfs_debug_mb = cfs_trace_get_debug_mb();
+       }
+
+       return rc;
+}
+
+int libcfs_debug_cleanup(void)
+{
+       libcfs_unregister_panic_notifier();
+       cfs_tracefile_exit();
+       return 0;
+}
+
+int libcfs_debug_clear_buffer(void)
+{
+       cfs_trace_flush_pages();
+       return 0;
+}
+
+/* Debug markers, although printed by S_LNET should not be be marked as such. */
+#undef DEBUG_SUBSYSTEM
+#define DEBUG_SUBSYSTEM S_UNDEFINED
+int libcfs_debug_mark_buffer(const char *text)
+{
+       CDEBUG(D_TRACE,
+              "***************************************************\n");
+       LCONSOLE(D_WARNING, "DEBUG MARKER: %s\n", text);
+       CDEBUG(D_TRACE,
+              "***************************************************\n");
+
+       return 0;
+}
+
+#undef DEBUG_SUBSYSTEM
+#define DEBUG_SUBSYSTEM S_LNET
diff --git a/drivers/staging/lustre/lnet/libcfs/fail.c b/drivers/staging/lustre/lnet/libcfs/fail.c

new file mode 100644 (file)

index 0000000..dadaf76
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/fail.c
@@ -0,0 +1,139 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
+ *
+ * Please contact Oracle Corporation, Inc., 500 Oracle Parkway, Redwood Shores,
+ * CA 94065 USA or visit www.oracle.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2015, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Oracle Corporation, Inc.
+ */
+
+#include "../../include/linux/libcfs/libcfs.h"
+
+unsigned long cfs_fail_loc;
+EXPORT_SYMBOL(cfs_fail_loc);
+
+unsigned int cfs_fail_val;
+EXPORT_SYMBOL(cfs_fail_val);
+
+DECLARE_WAIT_QUEUE_HEAD(cfs_race_waitq);
+EXPORT_SYMBOL(cfs_race_waitq);
+
+int cfs_race_state;
+EXPORT_SYMBOL(cfs_race_state);
+
+int __cfs_fail_check_set(__u32 id, __u32 value, int set)
+{
+       static atomic_t cfs_fail_count = ATOMIC_INIT(0);
+
+       LASSERT(!(id & CFS_FAIL_ONCE));
+
+       if ((cfs_fail_loc & (CFS_FAILED | CFS_FAIL_ONCE)) ==
+           (CFS_FAILED | CFS_FAIL_ONCE)) {
+               atomic_set(&cfs_fail_count, 0); /* paranoia */
+               return 0;
+       }
+
+       /* Fail 1/cfs_fail_val times */
+       if (cfs_fail_loc & CFS_FAIL_RAND) {
+               if (cfs_fail_val < 2 || cfs_rand() % cfs_fail_val > 0)
+                       return 0;
+       }
+
+       /* Skip the first cfs_fail_val, then fail */
+       if (cfs_fail_loc & CFS_FAIL_SKIP) {
+               if (atomic_inc_return(&cfs_fail_count) <= cfs_fail_val)
+                       return 0;
+       }
+
+       /* check cfs_fail_val... */
+       if (set == CFS_FAIL_LOC_VALUE) {
+               if (cfs_fail_val != -1 && cfs_fail_val != value)
+                       return 0;
+       }
+
+       /* Fail cfs_fail_val times, overridden by FAIL_ONCE */
+       if (cfs_fail_loc & CFS_FAIL_SOME &&
+           (!(cfs_fail_loc & CFS_FAIL_ONCE) || cfs_fail_val <= 1)) {
+               int count = atomic_inc_return(&cfs_fail_count);
+
+               if (count >= cfs_fail_val) {
+                       set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc);
+                       atomic_set(&cfs_fail_count, 0);
+                       /* we are lost race to increase  */
+                       if (count > cfs_fail_val)
+                               return 0;
+               }
+       }
+
+       if ((set == CFS_FAIL_LOC_ORSET || set == CFS_FAIL_LOC_RESET) &&
+           (value & CFS_FAIL_ONCE))
+               set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc);
+       /* Lost race to set CFS_FAILED_BIT. */
+       if (test_and_set_bit(CFS_FAILED_BIT, &cfs_fail_loc)) {
+               /* If CFS_FAIL_ONCE is valid, only one process can fail,
+                * otherwise multi-process can fail at the same time.
+                */
+               if (cfs_fail_loc & CFS_FAIL_ONCE)
+                       return 0;
+       }
+
+       switch (set) {
+       case CFS_FAIL_LOC_NOSET:
+       case CFS_FAIL_LOC_VALUE:
+               break;
+       case CFS_FAIL_LOC_ORSET:
+               cfs_fail_loc |= value & ~(CFS_FAILED | CFS_FAIL_ONCE);
+               break;
+       case CFS_FAIL_LOC_RESET:
+               cfs_fail_loc = value;
+               break;
+       default:
+               LASSERTF(0, "called with bad set %u\n", set);
+               break;
+       }
+
+       return 1;
+}
+EXPORT_SYMBOL(__cfs_fail_check_set);
+
+int __cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set)
+{
+       int ret;
+
+       ret = __cfs_fail_check_set(id, value, set);
+       if (ret && likely(ms > 0)) {
+               CERROR("cfs_fail_timeout id %x sleeping for %dms\n",
+                      id, ms);
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               schedule_timeout(cfs_time_seconds(ms) / 1000);
+               CERROR("cfs_fail_timeout id %x awake\n", id);
+       }
+       return ret;
+}
+EXPORT_SYMBOL(__cfs_fail_timeout_set);
diff --git a/drivers/staging/lustre/lnet/libcfs/hash.c b/drivers/staging/lustre/lnet/libcfs/hash.c

new file mode 100644 (file)

index 0000000..f60feb3
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/hash.c
@@ -0,0 +1,2085 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/libcfs/hash.c
+ *
+ * Implement a hash class for hash process in lustre system.
+ *
+ * Author: YuZhangyong <yzy@clusterfs.com>
+ *
+ * 2008-08-15: Brian Behlendorf <behlendorf1@llnl.gov>
+ * - Simplified API and improved documentation
+ * - Added per-hash feature flags:
+ *   * CFS_HASH_DEBUG additional validation
+ *   * CFS_HASH_REHASH dynamic rehashing
+ * - Added per-hash statistics
+ * - General performance enhancements
+ *
+ * 2009-07-31: Liang Zhen <zhen.liang@sun.com>
+ * - move all stuff to libcfs
+ * - don't allow cur_bits != max_bits without setting of CFS_HASH_REHASH
+ * - ignore hs_rwlock if without CFS_HASH_REHASH setting
+ * - buckets are allocated one by one(instead of contiguous memory),
+ *   to avoid unnecessary cacheline conflict
+ *
+ * 2010-03-01: Liang Zhen <zhen.liang@sun.com>
+ * - "bucket" is a group of hlist_head now, user can specify bucket size
+ *   by bkt_bits of cfs_hash_create(), all hlist_heads in a bucket share
+ *   one lock for reducing memory overhead.
+ *
+ * - support lockless hash, caller will take care of locks:
+ *   avoid lock overhead for hash tables that are already protected
+ *   by locking in the caller for another reason
+ *
+ * - support both spin_lock/rwlock for bucket:
+ *   overhead of spinlock contention is lower than read/write
+ *   contention of rwlock, so using spinlock to serialize operations on
+ *   bucket is more reasonable for those frequently changed hash tables
+ *
+ * - support one-single lock mode:
+ *   one lock to protect all hash operations to avoid overhead of
+ *   multiple locks if hash table is always small
+ *
+ * - removed a lot of unnecessary addref & decref on hash element:
+ *   addref & decref are atomic operations in many use-cases which
+ *   are expensive.
+ *
+ * - support non-blocking cfs_hash_add() and cfs_hash_findadd():
+ *   some lustre use-cases require these functions to be strictly
+ *   non-blocking, we need to schedule required rehash on a different
+ *   thread on those cases.
+ *
+ * - safer rehash on large hash table
+ *   In old implementation, rehash function will exclusively lock the
+ *   hash table and finish rehash in one batch, it's dangerous on SMP
+ *   system because rehash millions of elements could take long time.
+ *   New implemented rehash can release lock and relax CPU in middle
+ *   of rehash, it's safe for another thread to search/change on the
+ *   hash table even it's in rehasing.
+ *
+ * - support two different refcount modes
+ *   . hash table has refcount on element
+ *   . hash table doesn't change refcount on adding/removing element
+ *
+ * - support long name hash table (for param-tree)
+ *
+ * - fix a bug for cfs_hash_rehash_key:
+ *   in old implementation, cfs_hash_rehash_key could screw up the
+ *   hash-table because @key is overwritten without any protection.
+ *   Now we need user to define hs_keycpy for those rehash enabled
+ *   hash tables, cfs_hash_rehash_key will overwrite hash-key
+ *   inside lock by calling hs_keycpy.
+ *
+ * - better hash iteration:
+ *   Now we support both locked iteration & lockless iteration of hash
+ *   table. Also, user can break the iteration by return 1 in callback.
+ */
+#include <linux/seq_file.h>
+#include <linux/log2.h>
+
+#include "../../include/linux/libcfs/libcfs.h"
+
+#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
+static unsigned int warn_on_depth = 8;
+module_param(warn_on_depth, uint, 0644);
+MODULE_PARM_DESC(warn_on_depth, "warning when hash depth is high.");
+#endif
+
+struct cfs_wi_sched *cfs_sched_rehash;
+
+static inline void
+cfs_hash_nl_lock(union cfs_hash_lock *lock, int exclusive) {}
+
+static inline void
+cfs_hash_nl_unlock(union cfs_hash_lock *lock, int exclusive) {}
+
+static inline void
+cfs_hash_spin_lock(union cfs_hash_lock *lock, int exclusive)
+       __acquires(&lock->spin)
+{
+       spin_lock(&lock->spin);
+}
+
+static inline void
+cfs_hash_spin_unlock(union cfs_hash_lock *lock, int exclusive)
+       __releases(&lock->spin)
+{
+       spin_unlock(&lock->spin);
+}
+
+static inline void
+cfs_hash_rw_lock(union cfs_hash_lock *lock, int exclusive)
+       __acquires(&lock->rw)
+{
+       if (!exclusive)
+               read_lock(&lock->rw);
+       else
+               write_lock(&lock->rw);
+}
+
+static inline void
+cfs_hash_rw_unlock(union cfs_hash_lock *lock, int exclusive)
+       __releases(&lock->rw)
+{
+       if (!exclusive)
+               read_unlock(&lock->rw);
+       else
+               write_unlock(&lock->rw);
+}
+
+/** No lock hash */
+static struct cfs_hash_lock_ops cfs_hash_nl_lops = {
+       .hs_lock        = cfs_hash_nl_lock,
+       .hs_unlock      = cfs_hash_nl_unlock,
+       .hs_bkt_lock    = cfs_hash_nl_lock,
+       .hs_bkt_unlock  = cfs_hash_nl_unlock,
+};
+
+/** no bucket lock, one spinlock to protect everything */
+static struct cfs_hash_lock_ops cfs_hash_nbl_lops = {
+       .hs_lock        = cfs_hash_spin_lock,
+       .hs_unlock      = cfs_hash_spin_unlock,
+       .hs_bkt_lock    = cfs_hash_nl_lock,
+       .hs_bkt_unlock  = cfs_hash_nl_unlock,
+};
+
+/** spin bucket lock, rehash is enabled */
+static struct cfs_hash_lock_ops cfs_hash_bkt_spin_lops = {
+       .hs_lock        = cfs_hash_rw_lock,
+       .hs_unlock      = cfs_hash_rw_unlock,
+       .hs_bkt_lock    = cfs_hash_spin_lock,
+       .hs_bkt_unlock  = cfs_hash_spin_unlock,
+};
+
+/** rw bucket lock, rehash is enabled */
+static struct cfs_hash_lock_ops cfs_hash_bkt_rw_lops = {
+       .hs_lock        = cfs_hash_rw_lock,
+       .hs_unlock      = cfs_hash_rw_unlock,
+       .hs_bkt_lock    = cfs_hash_rw_lock,
+       .hs_bkt_unlock  = cfs_hash_rw_unlock,
+};
+
+/** spin bucket lock, rehash is disabled */
+static struct cfs_hash_lock_ops cfs_hash_nr_bkt_spin_lops = {
+       .hs_lock        = cfs_hash_nl_lock,
+       .hs_unlock      = cfs_hash_nl_unlock,
+       .hs_bkt_lock    = cfs_hash_spin_lock,
+       .hs_bkt_unlock  = cfs_hash_spin_unlock,
+};
+
+/** rw bucket lock, rehash is disabled */
+static struct cfs_hash_lock_ops cfs_hash_nr_bkt_rw_lops = {
+       .hs_lock        = cfs_hash_nl_lock,
+       .hs_unlock      = cfs_hash_nl_unlock,
+       .hs_bkt_lock    = cfs_hash_rw_lock,
+       .hs_bkt_unlock  = cfs_hash_rw_unlock,
+};
+
+static void
+cfs_hash_lock_setup(struct cfs_hash *hs)
+{
+       if (cfs_hash_with_no_lock(hs)) {
+               hs->hs_lops = &cfs_hash_nl_lops;
+
+       } else if (cfs_hash_with_no_bktlock(hs)) {
+               hs->hs_lops = &cfs_hash_nbl_lops;
+               spin_lock_init(&hs->hs_lock.spin);
+
+       } else if (cfs_hash_with_rehash(hs)) {
+               rwlock_init(&hs->hs_lock.rw);
+
+               if (cfs_hash_with_rw_bktlock(hs))
+                       hs->hs_lops = &cfs_hash_bkt_rw_lops;
+               else if (cfs_hash_with_spin_bktlock(hs))
+                       hs->hs_lops = &cfs_hash_bkt_spin_lops;
+               else
+                       LBUG();
+       } else {
+               if (cfs_hash_with_rw_bktlock(hs))
+                       hs->hs_lops = &cfs_hash_nr_bkt_rw_lops;
+               else if (cfs_hash_with_spin_bktlock(hs))
+                       hs->hs_lops = &cfs_hash_nr_bkt_spin_lops;
+               else
+                       LBUG();
+       }
+}
+
+/**
+ * Simple hash head without depth tracking
+ * new element is always added to head of hlist
+ */
+struct cfs_hash_head {
+       struct hlist_head       hh_head;        /**< entries list */
+};
+
+static int
+cfs_hash_hh_hhead_size(struct cfs_hash *hs)
+{
+       return sizeof(struct cfs_hash_head);
+}
+
+static struct hlist_head *
+cfs_hash_hh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
+{
+       struct cfs_hash_head *head;
+
+       head = (struct cfs_hash_head *)&bd->bd_bucket->hsb_head[0];
+       return &head[bd->bd_offset].hh_head;
+}
+
+static int
+cfs_hash_hh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
+                     struct hlist_node *hnode)
+{
+       hlist_add_head(hnode, cfs_hash_hh_hhead(hs, bd));
+       return -1; /* unknown depth */
+}
+
+static int
+cfs_hash_hh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
+                     struct hlist_node *hnode)
+{
+       hlist_del_init(hnode);
+       return -1; /* unknown depth */
+}
+
+/**
+ * Simple hash head with depth tracking
+ * new element is always added to head of hlist
+ */
+struct cfs_hash_head_dep {
+       struct hlist_head       hd_head;        /**< entries list */
+       unsigned int            hd_depth;       /**< list length */
+};
+
+static int
+cfs_hash_hd_hhead_size(struct cfs_hash *hs)
+{
+       return sizeof(struct cfs_hash_head_dep);
+}
+
+static struct hlist_head *
+cfs_hash_hd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
+{
+       struct cfs_hash_head_dep   *head;
+
+       head = (struct cfs_hash_head_dep *)&bd->bd_bucket->hsb_head[0];
+       return &head[bd->bd_offset].hd_head;
+}
+
+static int
+cfs_hash_hd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
+                     struct hlist_node *hnode)
+{
+       struct cfs_hash_head_dep *hh;
+
+       hh = container_of(cfs_hash_hd_hhead(hs, bd),
+                         struct cfs_hash_head_dep, hd_head);
+       hlist_add_head(hnode, &hh->hd_head);
+       return ++hh->hd_depth;
+}
+
+static int
+cfs_hash_hd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
+                     struct hlist_node *hnode)
+{
+       struct cfs_hash_head_dep *hh;
+
+       hh = container_of(cfs_hash_hd_hhead(hs, bd),
+                         struct cfs_hash_head_dep, hd_head);
+       hlist_del_init(hnode);
+       return --hh->hd_depth;
+}
+
+/**
+ * double links hash head without depth tracking
+ * new element is always added to tail of hlist
+ */
+struct cfs_hash_dhead {
+       struct hlist_head       dh_head;        /**< entries list */
+       struct hlist_node       *dh_tail;       /**< the last entry */
+};
+
+static int
+cfs_hash_dh_hhead_size(struct cfs_hash *hs)
+{
+       return sizeof(struct cfs_hash_dhead);
+}
+
+static struct hlist_head *
+cfs_hash_dh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
+{
+       struct cfs_hash_dhead *head;
+
+       head = (struct cfs_hash_dhead *)&bd->bd_bucket->hsb_head[0];
+       return &head[bd->bd_offset].dh_head;
+}
+
+static int
+cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
+                     struct hlist_node *hnode)
+{
+       struct cfs_hash_dhead *dh;
+
+       dh = container_of(cfs_hash_dh_hhead(hs, bd),
+                         struct cfs_hash_dhead, dh_head);
+       if (dh->dh_tail) /* not empty */
+               hlist_add_behind(hnode, dh->dh_tail);
+       else /* empty list */
+               hlist_add_head(hnode, &dh->dh_head);
+       dh->dh_tail = hnode;
+       return -1; /* unknown depth */
+}
+
+static int
+cfs_hash_dh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
+                     struct hlist_node *hnd)
+{
+       struct cfs_hash_dhead *dh;
+
+       dh = container_of(cfs_hash_dh_hhead(hs, bd),
+                         struct cfs_hash_dhead, dh_head);
+       if (!hnd->next) { /* it's the tail */
+               dh->dh_tail = (hnd->pprev == &dh->dh_head.first) ? NULL :
+                             container_of(hnd->pprev, struct hlist_node, next);
+       }
+       hlist_del_init(hnd);
+       return -1; /* unknown depth */
+}
+
+/**
+ * double links hash head with depth tracking
+ * new element is always added to tail of hlist
+ */
+struct cfs_hash_dhead_dep {
+       struct hlist_head       dd_head;        /**< entries list */
+       struct hlist_node       *dd_tail;       /**< the last entry */
+       unsigned int            dd_depth;       /**< list length */
+};
+
+static int
+cfs_hash_dd_hhead_size(struct cfs_hash *hs)
+{
+       return sizeof(struct cfs_hash_dhead_dep);
+}
+
+static struct hlist_head *
+cfs_hash_dd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
+{
+       struct cfs_hash_dhead_dep *head;
+
+       head = (struct cfs_hash_dhead_dep *)&bd->bd_bucket->hsb_head[0];
+       return &head[bd->bd_offset].dd_head;
+}
+
+static int
+cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
+                     struct hlist_node *hnode)
+{
+       struct cfs_hash_dhead_dep *dh;
+
+       dh = container_of(cfs_hash_dd_hhead(hs, bd),
+                         struct cfs_hash_dhead_dep, dd_head);
+       if (dh->dd_tail) /* not empty */
+               hlist_add_behind(hnode, dh->dd_tail);
+       else /* empty list */
+               hlist_add_head(hnode, &dh->dd_head);
+       dh->dd_tail = hnode;
+       return ++dh->dd_depth;
+}
+
+static int
+cfs_hash_dd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
+                     struct hlist_node *hnd)
+{
+       struct cfs_hash_dhead_dep *dh;
+
+       dh = container_of(cfs_hash_dd_hhead(hs, bd),
+                         struct cfs_hash_dhead_dep, dd_head);
+       if (!hnd->next) { /* it's the tail */
+               dh->dd_tail = (hnd->pprev == &dh->dd_head.first) ? NULL :
+                             container_of(hnd->pprev, struct hlist_node, next);
+       }
+       hlist_del_init(hnd);
+       return --dh->dd_depth;
+}
+
+static struct cfs_hash_hlist_ops cfs_hash_hh_hops = {
+       .hop_hhead      = cfs_hash_hh_hhead,
+       .hop_hhead_size = cfs_hash_hh_hhead_size,
+       .hop_hnode_add  = cfs_hash_hh_hnode_add,
+       .hop_hnode_del  = cfs_hash_hh_hnode_del,
+};
+
+static struct cfs_hash_hlist_ops cfs_hash_hd_hops = {
+       .hop_hhead      = cfs_hash_hd_hhead,
+       .hop_hhead_size = cfs_hash_hd_hhead_size,
+       .hop_hnode_add  = cfs_hash_hd_hnode_add,
+       .hop_hnode_del  = cfs_hash_hd_hnode_del,
+};
+
+static struct cfs_hash_hlist_ops cfs_hash_dh_hops = {
+       .hop_hhead      = cfs_hash_dh_hhead,
+       .hop_hhead_size = cfs_hash_dh_hhead_size,
+       .hop_hnode_add  = cfs_hash_dh_hnode_add,
+       .hop_hnode_del  = cfs_hash_dh_hnode_del,
+};
+
+static struct cfs_hash_hlist_ops cfs_hash_dd_hops = {
+       .hop_hhead      = cfs_hash_dd_hhead,
+       .hop_hhead_size = cfs_hash_dd_hhead_size,
+       .hop_hnode_add  = cfs_hash_dd_hnode_add,
+       .hop_hnode_del  = cfs_hash_dd_hnode_del,
+};
+
+static void
+cfs_hash_hlist_setup(struct cfs_hash *hs)
+{
+       if (cfs_hash_with_add_tail(hs)) {
+               hs->hs_hops = cfs_hash_with_depth(hs) ?
+                             &cfs_hash_dd_hops : &cfs_hash_dh_hops;
+       } else {
+               hs->hs_hops = cfs_hash_with_depth(hs) ?
+                             &cfs_hash_hd_hops : &cfs_hash_hh_hops;
+       }
+}
+
+static void
+cfs_hash_bd_from_key(struct cfs_hash *hs, struct cfs_hash_bucket **bkts,
+                    unsigned int bits, const void *key, struct cfs_hash_bd *bd)
+{
+       unsigned int index = cfs_hash_id(hs, key, (1U << bits) - 1);
+
+       LASSERT(bits == hs->hs_cur_bits || bits == hs->hs_rehash_bits);
+
+       bd->bd_bucket = bkts[index & ((1U << (bits - hs->hs_bkt_bits)) - 1)];
+       bd->bd_offset = index >> (bits - hs->hs_bkt_bits);
+}
+
+void
+cfs_hash_bd_get(struct cfs_hash *hs, const void *key, struct cfs_hash_bd *bd)
+{
+       /* NB: caller should hold hs->hs_rwlock if REHASH is set */
+       if (likely(!hs->hs_rehash_buckets)) {
+               cfs_hash_bd_from_key(hs, hs->hs_buckets,
+                                    hs->hs_cur_bits, key, bd);
+       } else {
+               LASSERT(hs->hs_rehash_bits != 0);
+               cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
+                                    hs->hs_rehash_bits, key, bd);
+       }
+}
+EXPORT_SYMBOL(cfs_hash_bd_get);
+
+static inline void
+cfs_hash_bd_dep_record(struct cfs_hash *hs, struct cfs_hash_bd *bd, int dep_cur)
+{
+       if (likely(dep_cur <= bd->bd_bucket->hsb_depmax))
+               return;
+
+       bd->bd_bucket->hsb_depmax = dep_cur;
+# if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
+       if (likely(warn_on_depth == 0 ||
+                  max(warn_on_depth, hs->hs_dep_max) >= dep_cur))
+               return;
+
+       spin_lock(&hs->hs_dep_lock);
+       hs->hs_dep_max  = dep_cur;
+       hs->hs_dep_bkt  = bd->bd_bucket->hsb_index;
+       hs->hs_dep_off  = bd->bd_offset;
+       hs->hs_dep_bits = hs->hs_cur_bits;
+       spin_unlock(&hs->hs_dep_lock);
+
+       cfs_wi_schedule(cfs_sched_rehash, &hs->hs_dep_wi);
+# endif
+}
+
+void
+cfs_hash_bd_add_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
+                      struct hlist_node *hnode)
+{
+       int rc;
+
+       rc = hs->hs_hops->hop_hnode_add(hs, bd, hnode);
+       cfs_hash_bd_dep_record(hs, bd, rc);
+       bd->bd_bucket->hsb_version++;
+       if (unlikely(bd->bd_bucket->hsb_version == 0))
+               bd->bd_bucket->hsb_version++;
+       bd->bd_bucket->hsb_count++;
+
+       if (cfs_hash_with_counter(hs))
+               atomic_inc(&hs->hs_count);
+       if (!cfs_hash_with_no_itemref(hs))
+               cfs_hash_get(hs, hnode);
+}
+EXPORT_SYMBOL(cfs_hash_bd_add_locked);
+
+void
+cfs_hash_bd_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
+                      struct hlist_node *hnode)
+{
+       hs->hs_hops->hop_hnode_del(hs, bd, hnode);
+
+       LASSERT(bd->bd_bucket->hsb_count > 0);
+       bd->bd_bucket->hsb_count--;
+       bd->bd_bucket->hsb_version++;
+       if (unlikely(bd->bd_bucket->hsb_version == 0))
+               bd->bd_bucket->hsb_version++;
+
+       if (cfs_hash_with_counter(hs)) {
+               LASSERT(atomic_read(&hs->hs_count) > 0);
+               atomic_dec(&hs->hs_count);
+       }
+       if (!cfs_hash_with_no_itemref(hs))
+               cfs_hash_put_locked(hs, hnode);
+}
+EXPORT_SYMBOL(cfs_hash_bd_del_locked);
+
+void
+cfs_hash_bd_move_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd_old,
+                       struct cfs_hash_bd *bd_new, struct hlist_node *hnode)
+{
+       struct cfs_hash_bucket *obkt = bd_old->bd_bucket;
+       struct cfs_hash_bucket *nbkt = bd_new->bd_bucket;
+       int rc;
+
+       if (cfs_hash_bd_compare(bd_old, bd_new) == 0)
+               return;
+
+       /* use cfs_hash_bd_hnode_add/del, to avoid atomic & refcount ops
+        * in cfs_hash_bd_del/add_locked
+        */
+       hs->hs_hops->hop_hnode_del(hs, bd_old, hnode);
+       rc = hs->hs_hops->hop_hnode_add(hs, bd_new, hnode);
+       cfs_hash_bd_dep_record(hs, bd_new, rc);
+
+       LASSERT(obkt->hsb_count > 0);
+       obkt->hsb_count--;
+       obkt->hsb_version++;
+       if (unlikely(obkt->hsb_version == 0))
+               obkt->hsb_version++;
+       nbkt->hsb_count++;
+       nbkt->hsb_version++;
+       if (unlikely(nbkt->hsb_version == 0))
+               nbkt->hsb_version++;
+}
+
+enum {
+       /** always set, for sanity (avoid ZERO intent) */
+       CFS_HS_LOOKUP_MASK_FIND = BIT(0),
+       /** return entry with a ref */
+       CFS_HS_LOOKUP_MASK_REF  = BIT(1),
+       /** add entry if not existing */
+       CFS_HS_LOOKUP_MASK_ADD  = BIT(2),
+       /** delete entry, ignore other masks */
+       CFS_HS_LOOKUP_MASK_DEL  = BIT(3),
+};
+
+enum cfs_hash_lookup_intent {
+       /** return item w/o refcount */
+       CFS_HS_LOOKUP_IT_PEEK    = CFS_HS_LOOKUP_MASK_FIND,
+       /** return item with refcount */
+       CFS_HS_LOOKUP_IT_FIND    = (CFS_HS_LOOKUP_MASK_FIND |
+                                   CFS_HS_LOOKUP_MASK_REF),
+       /** return item w/o refcount if existed, otherwise add */
+       CFS_HS_LOOKUP_IT_ADD     = (CFS_HS_LOOKUP_MASK_FIND |
+                                   CFS_HS_LOOKUP_MASK_ADD),
+       /** return item with refcount if existed, otherwise add */
+       CFS_HS_LOOKUP_IT_FINDADD = (CFS_HS_LOOKUP_IT_FIND |
+                                   CFS_HS_LOOKUP_MASK_ADD),
+       /** delete if existed */
+       CFS_HS_LOOKUP_IT_FINDDEL = (CFS_HS_LOOKUP_MASK_FIND |
+                                   CFS_HS_LOOKUP_MASK_DEL)
+};
+
+static struct hlist_node *
+cfs_hash_bd_lookup_intent(struct cfs_hash *hs, struct cfs_hash_bd *bd,
+                         const void *key, struct hlist_node *hnode,
+                         enum cfs_hash_lookup_intent intent)
+
+{
+       struct hlist_head *hhead = cfs_hash_bd_hhead(hs, bd);
+       struct hlist_node *ehnode;
+       struct hlist_node *match;
+       int intent_add = (intent & CFS_HS_LOOKUP_MASK_ADD) != 0;
+
+       /* with this function, we can avoid a lot of useless refcount ops,
+        * which are expensive atomic operations most time.
+        */
+       match = intent_add ? NULL : hnode;
+       hlist_for_each(ehnode, hhead) {
+               if (!cfs_hash_keycmp(hs, key, ehnode))
+                       continue;
+
+               if (match && match != ehnode) /* can't match */
+                       continue;
+
+               /* match and ... */
+               if ((intent & CFS_HS_LOOKUP_MASK_DEL) != 0) {
+                       cfs_hash_bd_del_locked(hs, bd, ehnode);
+                       return ehnode;
+               }
+
+               /* caller wants refcount? */
+               if ((intent & CFS_HS_LOOKUP_MASK_REF) != 0)
+                       cfs_hash_get(hs, ehnode);
+               return ehnode;
+       }
+       /* no match item */
+       if (!intent_add)
+               return NULL;
+
+       LASSERT(hnode);
+       cfs_hash_bd_add_locked(hs, bd, hnode);
+       return hnode;
+}
+
+struct hlist_node *
+cfs_hash_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
+                         const void *key)
+{
+       return cfs_hash_bd_lookup_intent(hs, bd, key, NULL,
+                                        CFS_HS_LOOKUP_IT_FIND);
+}
+EXPORT_SYMBOL(cfs_hash_bd_lookup_locked);
+
+struct hlist_node *
+cfs_hash_bd_peek_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
+                       const void *key)
+{
+       return cfs_hash_bd_lookup_intent(hs, bd, key, NULL,
+                                        CFS_HS_LOOKUP_IT_PEEK);
+}
+EXPORT_SYMBOL(cfs_hash_bd_peek_locked);
+
+static void
+cfs_hash_multi_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
+                      unsigned n, int excl)
+{
+       struct cfs_hash_bucket *prev = NULL;
+       int i;
+
+       /**
+        * bds must be ascendantly ordered by bd->bd_bucket->hsb_index.
+        * NB: it's possible that several bds point to the same bucket but
+        * have different bd::bd_offset, so need take care of deadlock.
+        */
+       cfs_hash_for_each_bd(bds, n, i) {
+               if (prev == bds[i].bd_bucket)
+                       continue;
+
+               LASSERT(!prev || prev->hsb_index < bds[i].bd_bucket->hsb_index);
+               cfs_hash_bd_lock(hs, &bds[i], excl);
+               prev = bds[i].bd_bucket;
+       }
+}
+
+static void
+cfs_hash_multi_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
+                        unsigned n, int excl)
+{
+       struct cfs_hash_bucket *prev = NULL;
+       int i;
+
+       cfs_hash_for_each_bd(bds, n, i) {
+               if (prev != bds[i].bd_bucket) {
+                       cfs_hash_bd_unlock(hs, &bds[i], excl);
+                       prev = bds[i].bd_bucket;
+               }
+       }
+}
+
+static struct hlist_node *
+cfs_hash_multi_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
+                               unsigned n, const void *key)
+{
+       struct hlist_node *ehnode;
+       unsigned i;
+
+       cfs_hash_for_each_bd(bds, n, i) {
+               ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, NULL,
+                                                  CFS_HS_LOOKUP_IT_FIND);
+               if (ehnode)
+                       return ehnode;
+       }
+       return NULL;
+}
+
+static struct hlist_node *
+cfs_hash_multi_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
+                                unsigned n, const void *key,
+                                struct hlist_node *hnode, int noref)
+{
+       struct hlist_node *ehnode;
+       int intent;
+       unsigned i;
+
+       LASSERT(hnode);
+       intent = (!noref * CFS_HS_LOOKUP_MASK_REF) | CFS_HS_LOOKUP_IT_PEEK;
+
+       cfs_hash_for_each_bd(bds, n, i) {
+               ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key,
+                                                  NULL, intent);
+               if (ehnode)
+                       return ehnode;
+       }
+
+       if (i == 1) { /* only one bucket */
+               cfs_hash_bd_add_locked(hs, &bds[0], hnode);
+       } else {
+               struct cfs_hash_bd mybd;
+
+               cfs_hash_bd_get(hs, key, &mybd);
+               cfs_hash_bd_add_locked(hs, &mybd, hnode);
+       }
+
+       return hnode;
+}
+
+static struct hlist_node *
+cfs_hash_multi_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
+                                unsigned n, const void *key,
+                                struct hlist_node *hnode)
+{
+       struct hlist_node *ehnode;
+       unsigned int i;
+
+       cfs_hash_for_each_bd(bds, n, i) {
+               ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, hnode,
+                                                  CFS_HS_LOOKUP_IT_FINDDEL);
+               if (ehnode)
+                       return ehnode;
+       }
+       return NULL;
+}
+
+static void
+cfs_hash_bd_order(struct cfs_hash_bd *bd1, struct cfs_hash_bd *bd2)
+{
+       int rc;
+
+       if (!bd2->bd_bucket)
+               return;
+
+       if (!bd1->bd_bucket) {
+               *bd1 = *bd2;
+               bd2->bd_bucket = NULL;
+               return;
+       }
+
+       rc = cfs_hash_bd_compare(bd1, bd2);
+       if (!rc)
+               bd2->bd_bucket = NULL;
+       else if (rc > 0)
+               swap(*bd1, *bd2); /* swap bd1 and bd2 */
+}
+
+void
+cfs_hash_dual_bd_get(struct cfs_hash *hs, const void *key,
+                    struct cfs_hash_bd *bds)
+{
+       /* NB: caller should hold hs_lock.rw if REHASH is set */
+       cfs_hash_bd_from_key(hs, hs->hs_buckets,
+                            hs->hs_cur_bits, key, &bds[0]);
+       if (likely(!hs->hs_rehash_buckets)) {
+               /* no rehash or not rehashing */
+               bds[1].bd_bucket = NULL;
+               return;
+       }
+
+       LASSERT(hs->hs_rehash_bits != 0);
+       cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
+                            hs->hs_rehash_bits, key, &bds[1]);
+
+       cfs_hash_bd_order(&bds[0], &bds[1]);
+}
+
+void
+cfs_hash_dual_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl)
+{
+       cfs_hash_multi_bd_lock(hs, bds, 2, excl);
+}
+
+void
+cfs_hash_dual_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl)
+{
+       cfs_hash_multi_bd_unlock(hs, bds, 2, excl);
+}
+
+struct hlist_node *
+cfs_hash_dual_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
+                              const void *key)
+{
+       return cfs_hash_multi_bd_lookup_locked(hs, bds, 2, key);
+}
+
+struct hlist_node *
+cfs_hash_dual_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
+                               const void *key, struct hlist_node *hnode,
+                               int noref)
+{
+       return cfs_hash_multi_bd_findadd_locked(hs, bds, 2, key,
+                                               hnode, noref);
+}
+
+struct hlist_node *
+cfs_hash_dual_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
+                               const void *key, struct hlist_node *hnode)
+{
+       return cfs_hash_multi_bd_finddel_locked(hs, bds, 2, key, hnode);
+}
+
+static void
+cfs_hash_buckets_free(struct cfs_hash_bucket **buckets,
+                     int bkt_size, int prev_size, int size)
+{
+       int i;
+
+       for (i = prev_size; i < size; i++) {
+               if (buckets[i])
+                       LIBCFS_FREE(buckets[i], bkt_size);
+       }
+
+       LIBCFS_FREE(buckets, sizeof(buckets[0]) * size);
+}
+
+/*
+ * Create or grow bucket memory. Return old_buckets if no allocation was
+ * needed, the newly allocated buckets if allocation was needed and
+ * successful, and NULL on error.
+ */
+static struct cfs_hash_bucket **
+cfs_hash_buckets_realloc(struct cfs_hash *hs, struct cfs_hash_bucket **old_bkts,
+                        unsigned int old_size, unsigned int new_size)
+{
+       struct cfs_hash_bucket **new_bkts;
+       int i;
+
+       LASSERT(old_size == 0 || old_bkts);
+
+       if (old_bkts && old_size == new_size)
+               return old_bkts;
+
+       LIBCFS_ALLOC(new_bkts, sizeof(new_bkts[0]) * new_size);
+       if (!new_bkts)
+               return NULL;
+
+       if (old_bkts) {
+               memcpy(new_bkts, old_bkts,
+                      min(old_size, new_size) * sizeof(*old_bkts));
+       }
+
+       for (i = old_size; i < new_size; i++) {
+               struct hlist_head *hhead;
+               struct cfs_hash_bd bd;
+
+               LIBCFS_ALLOC(new_bkts[i], cfs_hash_bkt_size(hs));
+               if (!new_bkts[i]) {
+                       cfs_hash_buckets_free(new_bkts, cfs_hash_bkt_size(hs),
+                                             old_size, new_size);
+                       return NULL;
+               }
+
+               new_bkts[i]->hsb_index   = i;
+               new_bkts[i]->hsb_version = 1;  /* shouldn't be zero */
+               new_bkts[i]->hsb_depmax  = -1; /* unknown */
+               bd.bd_bucket = new_bkts[i];
+               cfs_hash_bd_for_each_hlist(hs, &bd, hhead)
+                       INIT_HLIST_HEAD(hhead);
+
+               if (cfs_hash_with_no_lock(hs) ||
+                   cfs_hash_with_no_bktlock(hs))
+                       continue;
+
+               if (cfs_hash_with_rw_bktlock(hs))
+                       rwlock_init(&new_bkts[i]->hsb_lock.rw);
+               else if (cfs_hash_with_spin_bktlock(hs))
+                       spin_lock_init(&new_bkts[i]->hsb_lock.spin);
+               else
+                       LBUG(); /* invalid use-case */
+       }
+       return new_bkts;
+}
+
+/**
+ * Initialize new libcfs hash, where:
+ * @name     - Descriptive hash name
+ * @cur_bits - Initial hash table size, in bits
+ * @max_bits - Maximum allowed hash table resize, in bits
+ * @ops      - Registered hash table operations
+ * @flags    - CFS_HASH_REHASH enable synamic hash resizing
+ *          - CFS_HASH_SORT enable chained hash sort
+ */
+static int cfs_hash_rehash_worker(cfs_workitem_t *wi);
+
+#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
+static int cfs_hash_dep_print(cfs_workitem_t *wi)
+{
+       struct cfs_hash *hs = container_of(wi, struct cfs_hash, hs_dep_wi);
+       int dep;
+       int bkt;
+       int off;
+       int bits;
+
+       spin_lock(&hs->hs_dep_lock);
+       dep  = hs->hs_dep_max;
+       bkt  = hs->hs_dep_bkt;
+       off  = hs->hs_dep_off;
+       bits = hs->hs_dep_bits;
+       spin_unlock(&hs->hs_dep_lock);
+
+       LCONSOLE_WARN("#### HASH %s (bits: %d): max depth %d at bucket %d/%d\n",
+                     hs->hs_name, bits, dep, bkt, off);
+       spin_lock(&hs->hs_dep_lock);
+       hs->hs_dep_bits = 0; /* mark as workitem done */
+       spin_unlock(&hs->hs_dep_lock);
+       return 0;
+}
+
+static void cfs_hash_depth_wi_init(struct cfs_hash *hs)
+{
+       spin_lock_init(&hs->hs_dep_lock);
+       cfs_wi_init(&hs->hs_dep_wi, hs, cfs_hash_dep_print);
+}
+
+static void cfs_hash_depth_wi_cancel(struct cfs_hash *hs)
+{
+       if (cfs_wi_deschedule(cfs_sched_rehash, &hs->hs_dep_wi))
+               return;
+
+       spin_lock(&hs->hs_dep_lock);
+       while (hs->hs_dep_bits != 0) {
+               spin_unlock(&hs->hs_dep_lock);
+               cond_resched();
+               spin_lock(&hs->hs_dep_lock);
+       }
+       spin_unlock(&hs->hs_dep_lock);
+}
+
+#else /* CFS_HASH_DEBUG_LEVEL < CFS_HASH_DEBUG_1 */
+
+static inline void cfs_hash_depth_wi_init(struct cfs_hash *hs) {}
+static inline void cfs_hash_depth_wi_cancel(struct cfs_hash *hs) {}
+
+#endif /* CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 */
+
+struct cfs_hash *
+cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits,
+               unsigned bkt_bits, unsigned extra_bytes,
+               unsigned min_theta, unsigned max_theta,
+               struct cfs_hash_ops *ops, unsigned flags)
+{
+       struct cfs_hash *hs;
+       int len;
+
+       CLASSERT(CFS_HASH_THETA_BITS < 15);
+
+       LASSERT(name);
+       LASSERT(ops->hs_key);
+       LASSERT(ops->hs_hash);
+       LASSERT(ops->hs_object);
+       LASSERT(ops->hs_keycmp);
+       LASSERT(ops->hs_get);
+       LASSERT(ops->hs_put_locked);
+
+       if ((flags & CFS_HASH_REHASH) != 0)
+               flags |= CFS_HASH_COUNTER; /* must have counter */
+
+       LASSERT(cur_bits > 0);
+       LASSERT(cur_bits >= bkt_bits);
+       LASSERT(max_bits >= cur_bits && max_bits < 31);
+       LASSERT(ergo((flags & CFS_HASH_REHASH) == 0, cur_bits == max_bits));
+       LASSERT(ergo((flags & CFS_HASH_REHASH) != 0,
+                    (flags & CFS_HASH_NO_LOCK) == 0));
+       LASSERT(ergo((flags & CFS_HASH_REHASH_KEY) != 0, ops->hs_keycpy));
+
+       len = (flags & CFS_HASH_BIGNAME) == 0 ?
+             CFS_HASH_NAME_LEN : CFS_HASH_BIGNAME_LEN;
+       LIBCFS_ALLOC(hs, offsetof(struct cfs_hash, hs_name[len]));
+       if (!hs)
+               return NULL;
+
+       strlcpy(hs->hs_name, name, len);
+       hs->hs_flags = flags;
+
+       atomic_set(&hs->hs_refcount, 1);
+       atomic_set(&hs->hs_count, 0);
+
+       cfs_hash_lock_setup(hs);
+       cfs_hash_hlist_setup(hs);
+
+       hs->hs_cur_bits = (__u8)cur_bits;
+       hs->hs_min_bits = (__u8)cur_bits;
+       hs->hs_max_bits = (__u8)max_bits;
+       hs->hs_bkt_bits = (__u8)bkt_bits;
+
+       hs->hs_ops         = ops;
+       hs->hs_extra_bytes = extra_bytes;
+       hs->hs_rehash_bits = 0;
+       cfs_wi_init(&hs->hs_rehash_wi, hs, cfs_hash_rehash_worker);
+       cfs_hash_depth_wi_init(hs);
+
+       if (cfs_hash_with_rehash(hs))
+               __cfs_hash_set_theta(hs, min_theta, max_theta);
+
+       hs->hs_buckets = cfs_hash_buckets_realloc(hs, NULL, 0,
+                                                 CFS_HASH_NBKT(hs));
+       if (hs->hs_buckets)
+               return hs;
+
+       LIBCFS_FREE(hs, offsetof(struct cfs_hash, hs_name[len]));
+       return NULL;
+}
+EXPORT_SYMBOL(cfs_hash_create);
+
+/**
+ * Cleanup libcfs hash @hs.
+ */
+static void
+cfs_hash_destroy(struct cfs_hash *hs)
+{
+       struct hlist_node *hnode;
+       struct hlist_node *pos;
+       struct cfs_hash_bd bd;
+       int i;
+
+       LASSERT(hs);
+       LASSERT(!cfs_hash_is_exiting(hs) &&
+               !cfs_hash_is_iterating(hs));
+
+       /**
+        * prohibit further rehashes, don't need any lock because
+        * I'm the only (last) one can change it.
+        */
+       hs->hs_exiting = 1;
+       if (cfs_hash_with_rehash(hs))
+               cfs_hash_rehash_cancel(hs);
+
+       cfs_hash_depth_wi_cancel(hs);
+       /* rehash should be done/canceled */
+       LASSERT(hs->hs_buckets && !hs->hs_rehash_buckets);
+
+       cfs_hash_for_each_bucket(hs, &bd, i) {
+               struct hlist_head *hhead;
+
+               LASSERT(bd.bd_bucket);
+               /* no need to take this lock, just for consistent code */
+               cfs_hash_bd_lock(hs, &bd, 1);
+
+               cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
+                       hlist_for_each_safe(hnode, pos, hhead) {
+                               LASSERTF(!cfs_hash_with_assert_empty(hs),
+                                        "hash %s bucket %u(%u) is not empty: %u items left\n",
+                                        hs->hs_name, bd.bd_bucket->hsb_index,
+                                        bd.bd_offset, bd.bd_bucket->hsb_count);
+                               /* can't assert key valicate, because we
+                                * can interrupt rehash
+                                */
+                               cfs_hash_bd_del_locked(hs, &bd, hnode);
+                               cfs_hash_exit(hs, hnode);
+                       }
+               }
+               LASSERT(bd.bd_bucket->hsb_count == 0);
+               cfs_hash_bd_unlock(hs, &bd, 1);
+               cond_resched();
+       }
+
+       LASSERT(atomic_read(&hs->hs_count) == 0);
+
+       cfs_hash_buckets_free(hs->hs_buckets, cfs_hash_bkt_size(hs),
+                             0, CFS_HASH_NBKT(hs));
+       i = cfs_hash_with_bigname(hs) ?
+           CFS_HASH_BIGNAME_LEN : CFS_HASH_NAME_LEN;
+       LIBCFS_FREE(hs, offsetof(struct cfs_hash, hs_name[i]));
+}
+
+struct cfs_hash *cfs_hash_getref(struct cfs_hash *hs)
+{
+       if (atomic_inc_not_zero(&hs->hs_refcount))
+               return hs;
+       return NULL;
+}
+EXPORT_SYMBOL(cfs_hash_getref);
+
+void cfs_hash_putref(struct cfs_hash *hs)
+{
+       if (atomic_dec_and_test(&hs->hs_refcount))
+               cfs_hash_destroy(hs);
+}
+EXPORT_SYMBOL(cfs_hash_putref);
+
+static inline int
+cfs_hash_rehash_bits(struct cfs_hash *hs)
+{
+       if (cfs_hash_with_no_lock(hs) ||
+           !cfs_hash_with_rehash(hs))
+               return -EOPNOTSUPP;
+
+       if (unlikely(cfs_hash_is_exiting(hs)))
+               return -ESRCH;
+
+       if (unlikely(cfs_hash_is_rehashing(hs)))
+               return -EALREADY;
+
+       if (unlikely(cfs_hash_is_iterating(hs)))
+               return -EAGAIN;
+
+       /* XXX: need to handle case with max_theta != 2.0
+        *      and the case with min_theta != 0.5
+        */
+       if ((hs->hs_cur_bits < hs->hs_max_bits) &&
+           (__cfs_hash_theta(hs) > hs->hs_max_theta))
+               return hs->hs_cur_bits + 1;
+
+       if (!cfs_hash_with_shrink(hs))
+               return 0;
+
+       if ((hs->hs_cur_bits > hs->hs_min_bits) &&
+           (__cfs_hash_theta(hs) < hs->hs_min_theta))
+               return hs->hs_cur_bits - 1;
+
+       return 0;
+}
+
+/**
+ * don't allow inline rehash if:
+ * - user wants non-blocking change (add/del) on hash table
+ * - too many elements
+ */
+static inline int
+cfs_hash_rehash_inline(struct cfs_hash *hs)
+{
+       return !cfs_hash_with_nblk_change(hs) &&
+              atomic_read(&hs->hs_count) < CFS_HASH_LOOP_HOG;
+}
+
+/**
+ * Add item @hnode to libcfs hash @hs using @key.  The registered
+ * ops->hs_get function will be called when the item is added.
+ */
+void
+cfs_hash_add(struct cfs_hash *hs, const void *key, struct hlist_node *hnode)
+{
+       struct cfs_hash_bd bd;
+       int bits;
+
+       LASSERT(hlist_unhashed(hnode));
+
+       cfs_hash_lock(hs, 0);
+       cfs_hash_bd_get_and_lock(hs, key, &bd, 1);
+
+       cfs_hash_key_validate(hs, key, hnode);
+       cfs_hash_bd_add_locked(hs, &bd, hnode);
+
+       cfs_hash_bd_unlock(hs, &bd, 1);
+
+       bits = cfs_hash_rehash_bits(hs);
+       cfs_hash_unlock(hs, 0);
+       if (bits > 0)
+               cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
+}
+EXPORT_SYMBOL(cfs_hash_add);
+
+static struct hlist_node *
+cfs_hash_find_or_add(struct cfs_hash *hs, const void *key,
+                    struct hlist_node *hnode, int noref)
+{
+       struct hlist_node *ehnode;
+       struct cfs_hash_bd bds[2];
+       int bits = 0;
+
+       LASSERT(hlist_unhashed(hnode));
+
+       cfs_hash_lock(hs, 0);
+       cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1);
+
+       cfs_hash_key_validate(hs, key, hnode);
+       ehnode = cfs_hash_dual_bd_findadd_locked(hs, bds, key,
+                                                hnode, noref);
+       cfs_hash_dual_bd_unlock(hs, bds, 1);
+
+       if (ehnode == hnode)    /* new item added */
+               bits = cfs_hash_rehash_bits(hs);
+       cfs_hash_unlock(hs, 0);
+       if (bits > 0)
+               cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
+
+       return ehnode;
+}
+
+/**
+ * Add item @hnode to libcfs hash @hs using @key.  The registered
+ * ops->hs_get function will be called if the item was added.
+ * Returns 0 on success or -EALREADY on key collisions.
+ */
+int
+cfs_hash_add_unique(struct cfs_hash *hs, const void *key,
+                   struct hlist_node *hnode)
+{
+       return cfs_hash_find_or_add(hs, key, hnode, 1) != hnode ?
+              -EALREADY : 0;
+}
+EXPORT_SYMBOL(cfs_hash_add_unique);
+
+/**
+ * Add item @hnode to libcfs hash @hs using @key.  If this @key
+ * already exists in the hash then ops->hs_get will be called on the
+ * conflicting entry and that entry will be returned to the caller.
+ * Otherwise ops->hs_get is called on the item which was added.
+ */
+void *
+cfs_hash_findadd_unique(struct cfs_hash *hs, const void *key,
+                       struct hlist_node *hnode)
+{
+       hnode = cfs_hash_find_or_add(hs, key, hnode, 0);
+
+       return cfs_hash_object(hs, hnode);
+}
+EXPORT_SYMBOL(cfs_hash_findadd_unique);
+
+/**
+ * Delete item @hnode from the libcfs hash @hs using @key.  The @key
+ * is required to ensure the correct hash bucket is locked since there
+ * is no direct linkage from the item to the bucket.  The object
+ * removed from the hash will be returned and obs->hs_put is called
+ * on the removed object.
+ */
+void *
+cfs_hash_del(struct cfs_hash *hs, const void *key, struct hlist_node *hnode)
+{
+       void *obj = NULL;
+       int bits = 0;
+       struct cfs_hash_bd bds[2];
+
+       cfs_hash_lock(hs, 0);
+       cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1);
+
+       /* NB: do nothing if @hnode is not in hash table */
+       if (!hnode || !hlist_unhashed(hnode)) {
+               if (!bds[1].bd_bucket && hnode) {
+                       cfs_hash_bd_del_locked(hs, &bds[0], hnode);
+               } else {
+                       hnode = cfs_hash_dual_bd_finddel_locked(hs, bds,
+                                                               key, hnode);
+               }
+       }
+
+       if (hnode) {
+               obj  = cfs_hash_object(hs, hnode);
+               bits = cfs_hash_rehash_bits(hs);
+       }
+
+       cfs_hash_dual_bd_unlock(hs, bds, 1);
+       cfs_hash_unlock(hs, 0);
+       if (bits > 0)
+               cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
+
+       return obj;
+}
+EXPORT_SYMBOL(cfs_hash_del);
+
+/**
+ * Delete item given @key in libcfs hash @hs.  The first @key found in
+ * the hash will be removed, if the key exists multiple times in the hash
+ * @hs this function must be called once per key.  The removed object
+ * will be returned and ops->hs_put is called on the removed object.
+ */
+void *
+cfs_hash_del_key(struct cfs_hash *hs, const void *key)
+{
+       return cfs_hash_del(hs, key, NULL);
+}
+EXPORT_SYMBOL(cfs_hash_del_key);
+
+/**
+ * Lookup an item using @key in the libcfs hash @hs and return it.
+ * If the @key is found in the hash hs->hs_get() is called and the
+ * matching objects is returned.  It is the callers responsibility
+ * to call the counterpart ops->hs_put using the cfs_hash_put() macro
+ * when when finished with the object.  If the @key was not found
+ * in the hash @hs NULL is returned.
+ */
+void *
+cfs_hash_lookup(struct cfs_hash *hs, const void *key)
+{
+       void *obj = NULL;
+       struct hlist_node *hnode;
+       struct cfs_hash_bd bds[2];
+
+       cfs_hash_lock(hs, 0);
+       cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0);
+
+       hnode = cfs_hash_dual_bd_lookup_locked(hs, bds, key);
+       if (hnode)
+               obj = cfs_hash_object(hs, hnode);
+
+       cfs_hash_dual_bd_unlock(hs, bds, 0);
+       cfs_hash_unlock(hs, 0);
+
+       return obj;
+}
+EXPORT_SYMBOL(cfs_hash_lookup);
+
+static void
+cfs_hash_for_each_enter(struct cfs_hash *hs)
+{
+       LASSERT(!cfs_hash_is_exiting(hs));
+
+       if (!cfs_hash_with_rehash(hs))
+               return;
+       /*
+        * NB: it's race on cfs_has_t::hs_iterating, but doesn't matter
+        * because it's just an unreliable signal to rehash-thread,
+        * rehash-thread will try to finish rehash ASAP when seeing this.
+        */
+       hs->hs_iterating = 1;
+
+       cfs_hash_lock(hs, 1);
+       hs->hs_iterators++;
+
+       /* NB: iteration is mostly called by service thread,
+        * we tend to cancel pending rehash-request, instead of
+        * blocking service thread, we will relaunch rehash request
+        * after iteration
+        */
+       if (cfs_hash_is_rehashing(hs))
+               cfs_hash_rehash_cancel_locked(hs);
+       cfs_hash_unlock(hs, 1);
+}
+
+static void
+cfs_hash_for_each_exit(struct cfs_hash *hs)
+{
+       int remained;
+       int bits;
+
+       if (!cfs_hash_with_rehash(hs))
+               return;
+       cfs_hash_lock(hs, 1);
+       remained = --hs->hs_iterators;
+       bits = cfs_hash_rehash_bits(hs);
+       cfs_hash_unlock(hs, 1);
+       /* NB: it's race on cfs_has_t::hs_iterating, see above */
+       if (remained == 0)
+               hs->hs_iterating = 0;
+       if (bits > 0) {
+               cfs_hash_rehash(hs, atomic_read(&hs->hs_count) <
+                                   CFS_HASH_LOOP_HOG);
+       }
+}
+
+/**
+ * For each item in the libcfs hash @hs call the passed callback @func
+ * and pass to it as an argument each hash item and the private @data.
+ *
+ * a) the function may sleep!
+ * b) during the callback:
+ *    . the bucket lock is held so the callback must never sleep.
+ *    . if @removal_safe is true, use can remove current item by
+ *      cfs_hash_bd_del_locked
+ */
+static __u64
+cfs_hash_for_each_tight(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
+                       void *data, int remove_safe)
+{
+       struct hlist_node *hnode;
+       struct hlist_node *pos;
+       struct cfs_hash_bd bd;
+       __u64 count = 0;
+       int excl = !!remove_safe;
+       int loop = 0;
+       int i;
+
+       cfs_hash_for_each_enter(hs);
+
+       cfs_hash_lock(hs, 0);
+       LASSERT(!cfs_hash_is_rehashing(hs));
+
+       cfs_hash_for_each_bucket(hs, &bd, i) {
+               struct hlist_head *hhead;
+
+               cfs_hash_bd_lock(hs, &bd, excl);
+               if (!func) { /* only glimpse size */
+                       count += bd.bd_bucket->hsb_count;
+                       cfs_hash_bd_unlock(hs, &bd, excl);
+                       continue;
+               }
+
+               cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
+                       hlist_for_each_safe(hnode, pos, hhead) {
+                               cfs_hash_bucket_validate(hs, &bd, hnode);
+                               count++;
+                               loop++;
+                               if (func(hs, &bd, hnode, data)) {
+                                       cfs_hash_bd_unlock(hs, &bd, excl);
+                                       goto out;
+                               }
+                       }
+               }
+               cfs_hash_bd_unlock(hs, &bd, excl);
+               if (loop < CFS_HASH_LOOP_HOG)
+                       continue;
+               loop = 0;
+               cfs_hash_unlock(hs, 0);
+               cond_resched();
+               cfs_hash_lock(hs, 0);
+       }
+ out:
+       cfs_hash_unlock(hs, 0);
+
+       cfs_hash_for_each_exit(hs);
+       return count;
+}
+
+struct cfs_hash_cond_arg {
+       cfs_hash_cond_opt_cb_t  func;
+       void                    *arg;
+};
+
+static int
+cfs_hash_cond_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
+                        struct hlist_node *hnode, void *data)
+{
+       struct cfs_hash_cond_arg *cond = data;
+
+       if (cond->func(cfs_hash_object(hs, hnode), cond->arg))
+               cfs_hash_bd_del_locked(hs, bd, hnode);
+       return 0;
+}
+
+/**
+ * Delete item from the libcfs hash @hs when @func return true.
+ * The write lock being hold during loop for each bucket to avoid
+ * any object be reference.
+ */
+void
+cfs_hash_cond_del(struct cfs_hash *hs, cfs_hash_cond_opt_cb_t func, void *data)
+{
+       struct cfs_hash_cond_arg arg = {
+               .func   = func,
+               .arg    = data,
+       };
+
+       cfs_hash_for_each_tight(hs, cfs_hash_cond_del_locked, &arg, 1);
+}
+EXPORT_SYMBOL(cfs_hash_cond_del);
+
+void
+cfs_hash_for_each(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
+                 void *data)
+{
+       cfs_hash_for_each_tight(hs, func, data, 0);
+}
+EXPORT_SYMBOL(cfs_hash_for_each);
+
+void
+cfs_hash_for_each_safe(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
+                      void *data)
+{
+       cfs_hash_for_each_tight(hs, func, data, 1);
+}
+EXPORT_SYMBOL(cfs_hash_for_each_safe);
+
+static int
+cfs_hash_peek(struct cfs_hash *hs, struct cfs_hash_bd *bd,
+             struct hlist_node *hnode, void *data)
+{
+       *(int *)data = 0;
+       return 1; /* return 1 to break the loop */
+}
+
+int
+cfs_hash_is_empty(struct cfs_hash *hs)
+{
+       int empty = 1;
+
+       cfs_hash_for_each_tight(hs, cfs_hash_peek, &empty, 0);
+       return empty;
+}
+EXPORT_SYMBOL(cfs_hash_is_empty);
+
+__u64
+cfs_hash_size_get(struct cfs_hash *hs)
+{
+       return cfs_hash_with_counter(hs) ?
+              atomic_read(&hs->hs_count) :
+              cfs_hash_for_each_tight(hs, NULL, NULL, 0);
+}
+EXPORT_SYMBOL(cfs_hash_size_get);
+
+/*
+ * cfs_hash_for_each_relax:
+ * Iterate the hash table and call @func on each item without
+ * any lock. This function can't guarantee to finish iteration
+ * if these features are enabled:
+ *
+ *  a. if rehash_key is enabled, an item can be moved from
+ *     one bucket to another bucket
+ *  b. user can remove non-zero-ref item from hash-table,
+ *     so the item can be removed from hash-table, even worse,
+ *     it's possible that user changed key and insert to another
+ *     hash bucket.
+ * there's no way for us to finish iteration correctly on previous
+ * two cases, so iteration has to be stopped on change.
+ */
+static int
+cfs_hash_for_each_relax(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
+                       void *data)
+{
+       struct hlist_node *hnode;
+       struct hlist_node *tmp;
+       struct cfs_hash_bd bd;
+       __u32 version;
+       int count = 0;
+       int stop_on_change;
+       int rc;
+       int i;
+
+       stop_on_change = cfs_hash_with_rehash_key(hs) ||
+                        !cfs_hash_with_no_itemref(hs) ||
+                        !hs->hs_ops->hs_put_locked;
+       cfs_hash_lock(hs, 0);
+       LASSERT(!cfs_hash_is_rehashing(hs));
+
+       cfs_hash_for_each_bucket(hs, &bd, i) {
+               struct hlist_head *hhead;
+
+               cfs_hash_bd_lock(hs, &bd, 0);
+               version = cfs_hash_bd_version_get(&bd);
+
+               cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
+                       for (hnode = hhead->first; hnode;) {
+                               cfs_hash_bucket_validate(hs, &bd, hnode);
+                               cfs_hash_get(hs, hnode);
+                               cfs_hash_bd_unlock(hs, &bd, 0);
+                               cfs_hash_unlock(hs, 0);
+
+                               rc = func(hs, &bd, hnode, data);
+                               if (stop_on_change)
+                                       cfs_hash_put(hs, hnode);
+                               cond_resched();
+                               count++;
+
+                               cfs_hash_lock(hs, 0);
+                               cfs_hash_bd_lock(hs, &bd, 0);
+                               if (!stop_on_change) {
+                                       tmp = hnode->next;
+                                       cfs_hash_put_locked(hs, hnode);
+                                       hnode = tmp;
+                               } else { /* bucket changed? */
+                                       if (version !=
+                                           cfs_hash_bd_version_get(&bd))
+                                               break;
+                                       /* safe to continue because no change */
+                                       hnode = hnode->next;
+                               }
+                               if (rc) /* callback wants to break iteration */
+                                       break;
+                       }
+                       if (rc) /* callback wants to break iteration */
+                               break;
+               }
+               cfs_hash_bd_unlock(hs, &bd, 0);
+               if (rc) /* callback wants to break iteration */
+                       break;
+       }
+       cfs_hash_unlock(hs, 0);
+
+       return count;
+}
+
+int
+cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
+                        void *data)
+{
+       if (cfs_hash_with_no_lock(hs) ||
+           cfs_hash_with_rehash_key(hs) ||
+           !cfs_hash_with_no_itemref(hs))
+               return -EOPNOTSUPP;
+
+       if (!hs->hs_ops->hs_get ||
+           (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked))
+               return -EOPNOTSUPP;
+
+       cfs_hash_for_each_enter(hs);
+       cfs_hash_for_each_relax(hs, func, data);
+       cfs_hash_for_each_exit(hs);
+
+       return 0;
+}
+EXPORT_SYMBOL(cfs_hash_for_each_nolock);
+
+/**
+ * For each hash bucket in the libcfs hash @hs call the passed callback
+ * @func until all the hash buckets are empty.  The passed callback @func
+ * or the previously registered callback hs->hs_put must remove the item
+ * from the hash.  You may either use the cfs_hash_del() or hlist_del()
+ * functions.  No rwlocks will be held during the callback @func it is
+ * safe to sleep if needed.  This function will not terminate until the
+ * hash is empty.  Note it is still possible to concurrently add new
+ * items in to the hash.  It is the callers responsibility to ensure
+ * the required locking is in place to prevent concurrent insertions.
+ */
+int
+cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
+                       void *data)
+{
+       unsigned i = 0;
+
+       if (cfs_hash_with_no_lock(hs))
+               return -EOPNOTSUPP;
+
+       if (!hs->hs_ops->hs_get ||
+           (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked))
+               return -EOPNOTSUPP;
+
+       cfs_hash_for_each_enter(hs);
+       while (cfs_hash_for_each_relax(hs, func, data)) {
+               CDEBUG(D_INFO, "Try to empty hash: %s, loop: %u\n",
+                      hs->hs_name, i++);
+       }
+       cfs_hash_for_each_exit(hs);
+       return 0;
+}
+EXPORT_SYMBOL(cfs_hash_for_each_empty);
+
+void
+cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned hindex,
+                       cfs_hash_for_each_cb_t func, void *data)
+{
+       struct hlist_head *hhead;
+       struct hlist_node *hnode;
+       struct cfs_hash_bd bd;
+
+       cfs_hash_for_each_enter(hs);
+       cfs_hash_lock(hs, 0);
+       if (hindex >= CFS_HASH_NHLIST(hs))
+               goto out;
+
+       cfs_hash_bd_index_set(hs, hindex, &bd);
+
+       cfs_hash_bd_lock(hs, &bd, 0);
+       hhead = cfs_hash_bd_hhead(hs, &bd);
+       hlist_for_each(hnode, hhead) {
+               if (func(hs, &bd, hnode, data))
+                       break;
+       }
+       cfs_hash_bd_unlock(hs, &bd, 0);
+out:
+       cfs_hash_unlock(hs, 0);
+       cfs_hash_for_each_exit(hs);
+}
+EXPORT_SYMBOL(cfs_hash_hlist_for_each);
+
+/*
+ * For each item in the libcfs hash @hs which matches the @key call
+ * the passed callback @func and pass to it as an argument each hash
+ * item and the private @data. During the callback the bucket lock
+ * is held so the callback must never sleep.
+   */
+void
+cfs_hash_for_each_key(struct cfs_hash *hs, const void *key,
+                     cfs_hash_for_each_cb_t func, void *data)
+{
+       struct hlist_node *hnode;
+       struct cfs_hash_bd bds[2];
+       unsigned int i;
+
+       cfs_hash_lock(hs, 0);
+
+       cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0);
+
+       cfs_hash_for_each_bd(bds, 2, i) {
+               struct hlist_head *hlist = cfs_hash_bd_hhead(hs, &bds[i]);
+
+               hlist_for_each(hnode, hlist) {
+                       cfs_hash_bucket_validate(hs, &bds[i], hnode);
+
+                       if (cfs_hash_keycmp(hs, key, hnode)) {
+                               if (func(hs, &bds[i], hnode, data))
+                                       break;
+                       }
+               }
+       }
+
+       cfs_hash_dual_bd_unlock(hs, bds, 0);
+       cfs_hash_unlock(hs, 0);
+}
+EXPORT_SYMBOL(cfs_hash_for_each_key);
+
+/**
+ * Rehash the libcfs hash @hs to the given @bits.  This can be used
+ * to grow the hash size when excessive chaining is detected, or to
+ * shrink the hash when it is larger than needed.  When the CFS_HASH_REHASH
+ * flag is set in @hs the libcfs hash may be dynamically rehashed
+ * during addition or removal if the hash's theta value exceeds
+ * either the hs->hs_min_theta or hs->max_theta values.  By default
+ * these values are tuned to keep the chained hash depth small, and
+ * this approach assumes a reasonably uniform hashing function.  The
+ * theta thresholds for @hs are tunable via cfs_hash_set_theta().
+ */
+void
+cfs_hash_rehash_cancel_locked(struct cfs_hash *hs)
+{
+       int i;
+
+       /* need hold cfs_hash_lock(hs, 1) */
+       LASSERT(cfs_hash_with_rehash(hs) &&
+               !cfs_hash_with_no_lock(hs));
+
+       if (!cfs_hash_is_rehashing(hs))
+               return;
+
+       if (cfs_wi_deschedule(cfs_sched_rehash, &hs->hs_rehash_wi)) {
+               hs->hs_rehash_bits = 0;
+               return;
+       }
+
+       for (i = 2; cfs_hash_is_rehashing(hs); i++) {
+               cfs_hash_unlock(hs, 1);
+               /* raise console warning while waiting too long */
+               CDEBUG(is_power_of_2(i >> 3) ? D_WARNING : D_INFO,
+                      "hash %s is still rehashing, rescheded %d\n",
+                      hs->hs_name, i - 1);
+               cond_resched();
+               cfs_hash_lock(hs, 1);
+       }
+}
+
+void
+cfs_hash_rehash_cancel(struct cfs_hash *hs)
+{
+       cfs_hash_lock(hs, 1);
+       cfs_hash_rehash_cancel_locked(hs);
+       cfs_hash_unlock(hs, 1);
+}
+
+int
+cfs_hash_rehash(struct cfs_hash *hs, int do_rehash)
+{
+       int rc;
+
+       LASSERT(cfs_hash_with_rehash(hs) && !cfs_hash_with_no_lock(hs));
+
+       cfs_hash_lock(hs, 1);
+
+       rc = cfs_hash_rehash_bits(hs);
+       if (rc <= 0) {
+               cfs_hash_unlock(hs, 1);
+               return rc;
+       }
+
+       hs->hs_rehash_bits = rc;
+       if (!do_rehash) {
+               /* launch and return */
+               cfs_wi_schedule(cfs_sched_rehash, &hs->hs_rehash_wi);
+               cfs_hash_unlock(hs, 1);
+               return 0;
+       }
+
+       /* rehash right now */
+       cfs_hash_unlock(hs, 1);
+
+       return cfs_hash_rehash_worker(&hs->hs_rehash_wi);
+}
+
+static int
+cfs_hash_rehash_bd(struct cfs_hash *hs, struct cfs_hash_bd *old)
+{
+       struct cfs_hash_bd new;
+       struct hlist_head *hhead;
+       struct hlist_node *hnode;
+       struct hlist_node *pos;
+       void *key;
+       int c = 0;
+
+       /* hold cfs_hash_lock(hs, 1), so don't need any bucket lock */
+       cfs_hash_bd_for_each_hlist(hs, old, hhead) {
+               hlist_for_each_safe(hnode, pos, hhead) {
+                       key = cfs_hash_key(hs, hnode);
+                       LASSERT(key);
+                       /* Validate hnode is in the correct bucket. */
+                       cfs_hash_bucket_validate(hs, old, hnode);
+                       /*
+                        * Delete from old hash bucket; move to new bucket.
+                        * ops->hs_key must be defined.
+                        */
+                       cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
+                                            hs->hs_rehash_bits, key, &new);
+                       cfs_hash_bd_move_locked(hs, old, &new, hnode);
+                       c++;
+               }
+       }
+
+       return c;
+}
+
+static int
+cfs_hash_rehash_worker(cfs_workitem_t *wi)
+{
+       struct cfs_hash *hs = container_of(wi, struct cfs_hash, hs_rehash_wi);
+       struct cfs_hash_bucket **bkts;
+       struct cfs_hash_bd bd;
+       unsigned int old_size;
+       unsigned int new_size;
+       int bsize;
+       int count = 0;
+       int rc = 0;
+       int i;
+
+       LASSERT(hs && cfs_hash_with_rehash(hs));
+
+       cfs_hash_lock(hs, 0);
+       LASSERT(cfs_hash_is_rehashing(hs));
+
+       old_size = CFS_HASH_NBKT(hs);
+       new_size = CFS_HASH_RH_NBKT(hs);
+
+       cfs_hash_unlock(hs, 0);
+
+       /*
+        * don't need hs::hs_rwlock for hs::hs_buckets,
+        * because nobody can change bkt-table except me.
+        */
+       bkts = cfs_hash_buckets_realloc(hs, hs->hs_buckets,
+                                       old_size, new_size);
+       cfs_hash_lock(hs, 1);
+       if (!bkts) {
+               rc = -ENOMEM;
+               goto out;
+       }
+
+       if (bkts == hs->hs_buckets) {
+               bkts = NULL; /* do nothing */
+               goto out;
+       }
+
+       rc = __cfs_hash_theta(hs);
+       if ((rc >= hs->hs_min_theta) && (rc <= hs->hs_max_theta)) {
+               /* free the new allocated bkt-table */
+               old_size = new_size;
+               new_size = CFS_HASH_NBKT(hs);
+               rc = -EALREADY;
+               goto out;
+       }
+
+       LASSERT(!hs->hs_rehash_buckets);
+       hs->hs_rehash_buckets = bkts;
+
+       rc = 0;
+       cfs_hash_for_each_bucket(hs, &bd, i) {
+               if (cfs_hash_is_exiting(hs)) {
+                       rc = -ESRCH;
+                       /* someone wants to destroy the hash, abort now */
+                       if (old_size < new_size) /* OK to free old bkt-table */
+                               break;
+                       /* it's shrinking, need free new bkt-table */
+                       hs->hs_rehash_buckets = NULL;
+                       old_size = new_size;
+                       new_size = CFS_HASH_NBKT(hs);
+                       goto out;
+               }
+
+               count += cfs_hash_rehash_bd(hs, &bd);
+               if (count < CFS_HASH_LOOP_HOG ||
+                   cfs_hash_is_iterating(hs)) { /* need to finish ASAP */
+                       continue;
+               }
+
+               count = 0;
+               cfs_hash_unlock(hs, 1);
+               cond_resched();
+               cfs_hash_lock(hs, 1);
+       }
+
+       hs->hs_rehash_count++;
+
+       bkts = hs->hs_buckets;
+       hs->hs_buckets = hs->hs_rehash_buckets;
+       hs->hs_rehash_buckets = NULL;
+
+       hs->hs_cur_bits = hs->hs_rehash_bits;
+out:
+       hs->hs_rehash_bits = 0;
+       if (rc == -ESRCH) /* never be scheduled again */
+               cfs_wi_exit(cfs_sched_rehash, wi);
+       bsize = cfs_hash_bkt_size(hs);
+       cfs_hash_unlock(hs, 1);
+       /* can't refer to @hs anymore because it could be destroyed */
+       if (bkts)
+               cfs_hash_buckets_free(bkts, bsize, new_size, old_size);
+       if (rc != 0)
+               CDEBUG(D_INFO, "early quit of rehashing: %d\n", rc);
+       /* return 1 only if cfs_wi_exit is called */
+       return rc == -ESRCH;
+}
+
+/**
+ * Rehash the object referenced by @hnode in the libcfs hash @hs.  The
+ * @old_key must be provided to locate the objects previous location
+ * in the hash, and the @new_key will be used to reinsert the object.
+ * Use this function instead of a cfs_hash_add() + cfs_hash_del()
+ * combo when it is critical that there is no window in time where the
+ * object is missing from the hash.  When an object is being rehashed
+ * the registered cfs_hash_get() and cfs_hash_put() functions will
+ * not be called.
+ */
+void cfs_hash_rehash_key(struct cfs_hash *hs, const void *old_key,
+                        void *new_key, struct hlist_node *hnode)
+{
+       struct cfs_hash_bd bds[3];
+       struct cfs_hash_bd old_bds[2];
+       struct cfs_hash_bd new_bd;
+
+       LASSERT(!hlist_unhashed(hnode));
+
+       cfs_hash_lock(hs, 0);
+
+       cfs_hash_dual_bd_get(hs, old_key, old_bds);
+       cfs_hash_bd_get(hs, new_key, &new_bd);
+
+       bds[0] = old_bds[0];
+       bds[1] = old_bds[1];
+       bds[2] = new_bd;
+
+       /* NB: bds[0] and bds[1] are ordered already */
+       cfs_hash_bd_order(&bds[1], &bds[2]);
+       cfs_hash_bd_order(&bds[0], &bds[1]);
+
+       cfs_hash_multi_bd_lock(hs, bds, 3, 1);
+       if (likely(!old_bds[1].bd_bucket)) {
+               cfs_hash_bd_move_locked(hs, &old_bds[0], &new_bd, hnode);
+       } else {
+               cfs_hash_dual_bd_finddel_locked(hs, old_bds, old_key, hnode);
+               cfs_hash_bd_add_locked(hs, &new_bd, hnode);
+       }
+       /* overwrite key inside locks, otherwise may screw up with
+        * other operations, i.e: rehash
+        */
+       cfs_hash_keycpy(hs, hnode, new_key);
+
+       cfs_hash_multi_bd_unlock(hs, bds, 3, 1);
+       cfs_hash_unlock(hs, 0);
+}
+EXPORT_SYMBOL(cfs_hash_rehash_key);
+
+void cfs_hash_debug_header(struct seq_file *m)
+{
+       seq_printf(m, "%-*s   cur   min   max theta t-min t-max flags rehash   count  maxdep maxdepb distribution\n",
+                  CFS_HASH_BIGNAME_LEN, "name");
+}
+EXPORT_SYMBOL(cfs_hash_debug_header);
+
+static struct cfs_hash_bucket **
+cfs_hash_full_bkts(struct cfs_hash *hs)
+{
+       /* NB: caller should hold hs->hs_rwlock if REHASH is set */
+       if (!hs->hs_rehash_buckets)
+               return hs->hs_buckets;
+
+       LASSERT(hs->hs_rehash_bits != 0);
+       return hs->hs_rehash_bits > hs->hs_cur_bits ?
+              hs->hs_rehash_buckets : hs->hs_buckets;
+}
+
+static unsigned int
+cfs_hash_full_nbkt(struct cfs_hash *hs)
+{
+       /* NB: caller should hold hs->hs_rwlock if REHASH is set */
+       if (!hs->hs_rehash_buckets)
+               return CFS_HASH_NBKT(hs);
+
+       LASSERT(hs->hs_rehash_bits != 0);
+       return hs->hs_rehash_bits > hs->hs_cur_bits ?
+              CFS_HASH_RH_NBKT(hs) : CFS_HASH_NBKT(hs);
+}
+
+void cfs_hash_debug_str(struct cfs_hash *hs, struct seq_file *m)
+{
+       int dist[8] = { 0, };
+       int maxdep = -1;
+       int maxdepb = -1;
+       int total = 0;
+       int theta;
+       int i;
+
+       cfs_hash_lock(hs, 0);
+       theta = __cfs_hash_theta(hs);
+
+       seq_printf(m, "%-*s %5d %5d %5d %d.%03d %d.%03d %d.%03d  0x%02x %6d ",
+                  CFS_HASH_BIGNAME_LEN, hs->hs_name,
+                  1 << hs->hs_cur_bits, 1 << hs->hs_min_bits,
+                  1 << hs->hs_max_bits,
+                  __cfs_hash_theta_int(theta), __cfs_hash_theta_frac(theta),
+                  __cfs_hash_theta_int(hs->hs_min_theta),
+                  __cfs_hash_theta_frac(hs->hs_min_theta),
+                  __cfs_hash_theta_int(hs->hs_max_theta),
+                  __cfs_hash_theta_frac(hs->hs_max_theta),
+                  hs->hs_flags, hs->hs_rehash_count);
+
+       /*
+        * The distribution is a summary of the chained hash depth in
+        * each of the libcfs hash buckets.  Each buckets hsb_count is
+        * divided by the hash theta value and used to generate a
+        * histogram of the hash distribution.  A uniform hash will
+        * result in all hash buckets being close to the average thus
+        * only the first few entries in the histogram will be non-zero.
+        * If you hash function results in a non-uniform hash the will
+        * be observable by outlier bucks in the distribution histogram.
+        *
+        * Uniform hash distribution:           128/128/0/0/0/0/0/0
+        * Non-Uniform hash distribution:       128/125/0/0/0/0/2/1
+        */
+       for (i = 0; i < cfs_hash_full_nbkt(hs); i++) {
+               struct cfs_hash_bd bd;
+
+               bd.bd_bucket = cfs_hash_full_bkts(hs)[i];
+               cfs_hash_bd_lock(hs, &bd, 0);
+               if (maxdep < bd.bd_bucket->hsb_depmax) {
+                       maxdep  = bd.bd_bucket->hsb_depmax;
+                       maxdepb = ffz(~maxdep);
+               }
+               total += bd.bd_bucket->hsb_count;
+               dist[min(fls(bd.bd_bucket->hsb_count / max(theta, 1)), 7)]++;
+               cfs_hash_bd_unlock(hs, &bd, 0);
+       }
+
+       seq_printf(m, "%7d %7d %7d ", total, maxdep, maxdepb);
+       for (i = 0; i < 8; i++)
+               seq_printf(m, "%d%c",  dist[i], (i == 7) ? '\n' : '/');
+
+       cfs_hash_unlock(hs, 0);
+}
+EXPORT_SYMBOL(cfs_hash_debug_str);
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c

new file mode 100644 (file)

index 0000000..33352af
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -0,0 +1,227 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Please see comments in libcfs/include/libcfs/libcfs_cpu.h for introduction
+ *
+ * Author: liang@whamcloud.com
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include "../../include/linux/libcfs/libcfs.h"
+
+/** Global CPU partition table */
+struct cfs_cpt_table   *cfs_cpt_table __read_mostly;
+EXPORT_SYMBOL(cfs_cpt_table);
+
+#ifndef HAVE_LIBCFS_CPT
+
+#define CFS_CPU_VERSION_MAGIC     0xbabecafe
+
+struct cfs_cpt_table *
+cfs_cpt_table_alloc(unsigned int ncpt)
+{
+       struct cfs_cpt_table *cptab;
+
+       if (ncpt != 1) {
+               CERROR("Can't support cpu partition number %d\n", ncpt);
+               return NULL;
+       }
+
+       LIBCFS_ALLOC(cptab, sizeof(*cptab));
+       if (cptab) {
+               cptab->ctb_version = CFS_CPU_VERSION_MAGIC;
+               node_set(0, cptab->ctb_nodemask);
+               cptab->ctb_nparts  = ncpt;
+       }
+
+       return cptab;
+}
+EXPORT_SYMBOL(cfs_cpt_table_alloc);
+
+void
+cfs_cpt_table_free(struct cfs_cpt_table *cptab)
+{
+       LASSERT(cptab->ctb_version == CFS_CPU_VERSION_MAGIC);
+
+       LIBCFS_FREE(cptab, sizeof(*cptab));
+}
+EXPORT_SYMBOL(cfs_cpt_table_free);
+
+#ifdef CONFIG_SMP
+int
+cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
+{
+       int     rc;
+
+       rc = snprintf(buf, len, "%d\t: %d\n", 0, 0);
+       len -= rc;
+       if (len <= 0)
+               return -EFBIG;
+
+       return rc;
+}
+EXPORT_SYMBOL(cfs_cpt_table_print);
+#endif /* CONFIG_SMP */
+
+int
+cfs_cpt_number(struct cfs_cpt_table *cptab)
+{
+       return 1;
+}
+EXPORT_SYMBOL(cfs_cpt_number);
+
+int
+cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
+{
+       return 1;
+}
+EXPORT_SYMBOL(cfs_cpt_weight);
+
+int
+cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
+{
+       return 1;
+}
+EXPORT_SYMBOL(cfs_cpt_online);
+
+nodemask_t *
+cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
+{
+       return &cptab->ctb_nodemask;
+}
+EXPORT_SYMBOL(cfs_cpt_cpumask);
+
+int
+cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
+{
+       return 1;
+}
+EXPORT_SYMBOL(cfs_cpt_set_cpu);
+
+void
+cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
+{
+}
+EXPORT_SYMBOL(cfs_cpt_unset_cpu);
+
+int
+cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
+{
+       return 1;
+}
+EXPORT_SYMBOL(cfs_cpt_set_cpumask);
+
+void
+cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
+{
+}
+EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
+
+int
+cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
+{
+       return 1;
+}
+EXPORT_SYMBOL(cfs_cpt_set_node);
+
+void
+cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
+{
+}
+EXPORT_SYMBOL(cfs_cpt_unset_node);
+
+int
+cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
+{
+       return 1;
+}
+EXPORT_SYMBOL(cfs_cpt_set_nodemask);
+
+void
+cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
+{
+}
+EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
+
+void
+cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
+{
+}
+EXPORT_SYMBOL(cfs_cpt_clear);
+
+int
+cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
+{
+       return 0;
+}
+EXPORT_SYMBOL(cfs_cpt_spread_node);
+
+int
+cfs_cpu_ht_nsiblings(int cpu)
+{
+       return 1;
+}
+EXPORT_SYMBOL(cfs_cpu_ht_nsiblings);
+
+int
+cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
+{
+       return 0;
+}
+EXPORT_SYMBOL(cfs_cpt_current);
+
+int
+cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
+{
+       return 0;
+}
+EXPORT_SYMBOL(cfs_cpt_of_cpu);
+
+int
+cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
+{
+       return 0;
+}
+EXPORT_SYMBOL(cfs_cpt_bind);
+
+void
+cfs_cpu_fini(void)
+{
+       if (cfs_cpt_table) {
+               cfs_cpt_table_free(cfs_cpt_table);
+               cfs_cpt_table = NULL;
+       }
+}
+
+int
+cfs_cpu_init(void)
+{
+       cfs_cpt_table = cfs_cpt_table_alloc(1);
+
+       return cfs_cpt_table ? 0 : -1;
+}
+
+#endif /* HAVE_LIBCFS_CPT */
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c b/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c

new file mode 100644 (file)

index 0000000..2de9eea
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c
@@ -0,0 +1,185 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * GPL HEADER END
+ */
+/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2015 Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Author: liang@whamcloud.com
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include "../../include/linux/libcfs/libcfs.h"
+
+/** destroy cpu-partition lock, see libcfs_private.h for more detail */
+void
+cfs_percpt_lock_free(struct cfs_percpt_lock *pcl)
+{
+       LASSERT(pcl->pcl_locks);
+       LASSERT(!pcl->pcl_locked);
+
+       cfs_percpt_free(pcl->pcl_locks);
+       LIBCFS_FREE(pcl, sizeof(*pcl));
+}
+EXPORT_SYMBOL(cfs_percpt_lock_free);
+
+/**
+ * create cpu-partition lock, see libcfs_private.h for more detail.
+ *
+ * cpu-partition lock is designed for large-scale SMP system, so we need to
+ * reduce cacheline conflict as possible as we can, that's the
+ * reason we always allocate cacheline-aligned memory block.
+ */
+struct cfs_percpt_lock *
+cfs_percpt_lock_alloc(struct cfs_cpt_table *cptab)
+{
+       struct cfs_percpt_lock  *pcl;
+       spinlock_t              *lock;
+       int                     i;
+
+       /* NB: cptab can be NULL, pcl will be for HW CPUs on that case */
+       LIBCFS_ALLOC(pcl, sizeof(*pcl));
+       if (!pcl)
+               return NULL;
+
+       pcl->pcl_cptab = cptab;
+       pcl->pcl_locks = cfs_percpt_alloc(cptab, sizeof(*lock));
+       if (!pcl->pcl_locks) {
+               LIBCFS_FREE(pcl, sizeof(*pcl));
+               return NULL;
+       }
+
+       cfs_percpt_for_each(lock, i, pcl->pcl_locks)
+               spin_lock_init(lock);
+
+       return pcl;
+}
+EXPORT_SYMBOL(cfs_percpt_lock_alloc);
+
+/**
+ * lock a CPU partition
+ *
+ * \a index != CFS_PERCPT_LOCK_EX
+ *     hold private lock indexed by \a index
+ *
+ * \a index == CFS_PERCPT_LOCK_EX
+ *     exclusively lock @pcl and nobody can take private lock
+ */
+void
+cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index)
+       __acquires(pcl->pcl_locks)
+{
+       int     ncpt = cfs_cpt_number(pcl->pcl_cptab);
+       int     i;
+
+       LASSERT(index >= CFS_PERCPT_LOCK_EX && index < ncpt);
+
+       if (ncpt == 1) {
+               index = 0;
+       } else { /* serialize with exclusive lock */
+               while (pcl->pcl_locked)
+                       cpu_relax();
+       }
+
+       if (likely(index != CFS_PERCPT_LOCK_EX)) {
+               spin_lock(pcl->pcl_locks[index]);
+               return;
+       }
+
+       /* exclusive lock request */
+       for (i = 0; i < ncpt; i++) {
+               spin_lock(pcl->pcl_locks[i]);
+               if (i == 0) {
+                       LASSERT(!pcl->pcl_locked);
+                       /* nobody should take private lock after this
+                        * so I wouldn't starve for too long time
+                        */
+                       pcl->pcl_locked = 1;
+               }
+       }
+}
+EXPORT_SYMBOL(cfs_percpt_lock);
+
+/** unlock a CPU partition */
+void
+cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index)
+       __releases(pcl->pcl_locks)
+{
+       int     ncpt = cfs_cpt_number(pcl->pcl_cptab);
+       int     i;
+
+       index = ncpt == 1 ? 0 : index;
+
+       if (likely(index != CFS_PERCPT_LOCK_EX)) {
+               spin_unlock(pcl->pcl_locks[index]);
+               return;
+       }
+
+       for (i = ncpt - 1; i >= 0; i--) {
+               if (i == 0) {
+                       LASSERT(pcl->pcl_locked);
+                       pcl->pcl_locked = 0;
+               }
+               spin_unlock(pcl->pcl_locks[i]);
+       }
+}
+EXPORT_SYMBOL(cfs_percpt_unlock);
+
+/** free cpu-partition refcount */
+void
+cfs_percpt_atomic_free(atomic_t **refs)
+{
+       cfs_percpt_free(refs);
+}
+EXPORT_SYMBOL(cfs_percpt_atomic_free);
+
+/** allocate cpu-partition refcount with initial value @init_val */
+atomic_t **
+cfs_percpt_atomic_alloc(struct cfs_cpt_table *cptab, int init_val)
+{
+       atomic_t        **refs;
+       atomic_t        *ref;
+       int             i;
+
+       refs = cfs_percpt_alloc(cptab, sizeof(*ref));
+       if (!refs)
+               return NULL;
+
+       cfs_percpt_for_each(ref, i, refs)
+               atomic_set(ref, init_val);
+       return refs;
+}
+EXPORT_SYMBOL(cfs_percpt_atomic_alloc);
+
+/** return sum of cpu-partition refs */
+int
+cfs_percpt_atomic_summary(atomic_t **refs)
+{
+       atomic_t        *ref;
+       int             i;
+       int             val = 0;
+
+       cfs_percpt_for_each(ref, i, refs)
+               val += atomic_read(ref);
+
+       return val;
+}
+EXPORT_SYMBOL(cfs_percpt_atomic_summary);
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c b/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c

new file mode 100644 (file)

index 0000000..c5a6951
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c
@@ -0,0 +1,196 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Author: liang@whamcloud.com
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include "../../include/linux/libcfs/libcfs.h"
+
+struct cfs_var_array {
+       unsigned int            va_count;       /* # of buffers */
+       unsigned int            va_size;        /* size of each var */
+       struct cfs_cpt_table    *va_cptab;      /* cpu partition table */
+       void                    *va_ptrs[0];    /* buffer addresses */
+};
+
+/*
+ * free per-cpu data, see more detail in cfs_percpt_free
+ */
+void
+cfs_percpt_free(void *vars)
+{
+       struct  cfs_var_array *arr;
+       int     i;
+
+       arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
+
+       for (i = 0; i < arr->va_count; i++) {
+               if (arr->va_ptrs[i])
+                       LIBCFS_FREE(arr->va_ptrs[i], arr->va_size);
+       }
+
+       LIBCFS_FREE(arr, offsetof(struct cfs_var_array,
+                                 va_ptrs[arr->va_count]));
+}
+EXPORT_SYMBOL(cfs_percpt_free);
+
+/*
+ * allocate per cpu-partition variables, returned value is an array of pointers,
+ * variable can be indexed by CPU partition ID, i.e:
+ *
+ *     arr = cfs_percpt_alloc(cfs_cpu_pt, size);
+ *     then caller can access memory block for CPU 0 by arr[0],
+ *     memory block for CPU 1 by arr[1]...
+ *     memory block for CPU N by arr[N]...
+ *
+ * cacheline aligned.
+ */
+void *
+cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size)
+{
+       struct cfs_var_array    *arr;
+       int                     count;
+       int                     i;
+
+       count = cfs_cpt_number(cptab);
+
+       LIBCFS_ALLOC(arr, offsetof(struct cfs_var_array, va_ptrs[count]));
+       if (!arr)
+               return NULL;
+
+       size = L1_CACHE_ALIGN(size);
+       arr->va_size = size;
+       arr->va_count = count;
+       arr->va_cptab = cptab;
+
+       for (i = 0; i < count; i++) {
+               LIBCFS_CPT_ALLOC(arr->va_ptrs[i], cptab, i, size);
+               if (!arr->va_ptrs[i]) {
+                       cfs_percpt_free((void *)&arr->va_ptrs[0]);
+                       return NULL;
+               }
+       }
+
+       return (void *)&arr->va_ptrs[0];
+}
+EXPORT_SYMBOL(cfs_percpt_alloc);
+
+/*
+ * return number of CPUs (or number of elements in per-cpu data)
+ * according to cptab of @vars
+ */
+int
+cfs_percpt_number(void *vars)
+{
+       struct cfs_var_array *arr;
+
+       arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
+
+       return arr->va_count;
+}
+EXPORT_SYMBOL(cfs_percpt_number);
+
+/*
+ * return memory block shadowed from current CPU
+ */
+void *
+cfs_percpt_current(void *vars)
+{
+       struct cfs_var_array *arr;
+       int    cpt;
+
+       arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
+       cpt = cfs_cpt_current(arr->va_cptab, 0);
+       if (cpt < 0)
+               return NULL;
+
+       return arr->va_ptrs[cpt];
+}
+
+void *
+cfs_percpt_index(void *vars, int idx)
+{
+       struct cfs_var_array *arr;
+
+       arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
+
+       LASSERT(idx >= 0 && idx < arr->va_count);
+       return arr->va_ptrs[idx];
+}
+
+/*
+ * free variable array, see more detail in cfs_array_alloc
+ */
+void
+cfs_array_free(void *vars)
+{
+       struct cfs_var_array    *arr;
+       int                     i;
+
+       arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
+
+       for (i = 0; i < arr->va_count; i++) {
+               if (!arr->va_ptrs[i])
+                       continue;
+
+               LIBCFS_FREE(arr->va_ptrs[i], arr->va_size);
+       }
+       LIBCFS_FREE(arr, offsetof(struct cfs_var_array,
+                                 va_ptrs[arr->va_count]));
+}
+EXPORT_SYMBOL(cfs_array_free);
+
+/*
+ * allocate a variable array, returned value is an array of pointers.
+ * Caller can specify length of array by @count, @size is size of each
+ * memory block in array.
+ */
+void *
+cfs_array_alloc(int count, unsigned int size)
+{
+       struct cfs_var_array    *arr;
+       int                     i;
+
+       LIBCFS_ALLOC(arr, offsetof(struct cfs_var_array, va_ptrs[count]));
+       if (!arr)
+               return NULL;
+
+       arr->va_count   = count;
+       arr->va_size    = size;
+
+       for (i = 0; i < count; i++) {
+               LIBCFS_ALLOC(arr->va_ptrs[i], size);
+
+               if (!arr->va_ptrs[i]) {
+                       cfs_array_free((void *)&arr->va_ptrs[0]);
+                       return NULL;
+               }
+       }
+
+       return (void *)&arr->va_ptrs[0];
+}
+EXPORT_SYMBOL(cfs_array_alloc);
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c

new file mode 100644 (file)

index 0000000..50ac153
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
@@ -0,0 +1,581 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, 2015 Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * String manipulation functions.
+ *
+ * libcfs/libcfs/libcfs_string.c
+ *
+ * Author: Nathan Rutman <nathan.rutman@sun.com>
+ */
+
+#include "../../include/linux/libcfs/libcfs.h"
+
+/* Convert a text string to a bitmask */
+int cfs_str2mask(const char *str, const char *(*bit2str)(int bit),
+                int *oldmask, int minmask, int allmask)
+{
+       const char *debugstr;
+       char op = '\0';
+       int newmask = minmask, i, len, found = 0;
+
+       /* <str> must be a list of tokens separated by whitespace
+        * and optionally an operator ('+' or '-').  If an operator
+        * appears first in <str>, '*oldmask' is used as the starting point
+        * (relative), otherwise minmask is used (absolute).  An operator
+        * applies to all following tokens up to the next operator.
+        */
+       while (*str != '\0') {
+               while (isspace(*str))
+                       str++;
+               if (*str == '\0')
+                       break;
+               if (*str == '+' || *str == '-') {
+                       op = *str++;
+                       if (!found)
+                               /* only if first token is relative */
+                               newmask = *oldmask;
+                       while (isspace(*str))
+                               str++;
+                       if (*str == '\0')  /* trailing op */
+                               return -EINVAL;
+               }
+
+               /* find token length */
+               len = 0;
+               while (str[len] != '\0' && !isspace(str[len]) &&
+                      str[len] != '+' && str[len] != '-')
+                       len++;
+
+               /* match token */
+               found = 0;
+               for (i = 0; i < 32; i++) {
+                       debugstr = bit2str(i);
+                       if (debugstr && strlen(debugstr) == len &&
+                           strncasecmp(str, debugstr, len) == 0) {
+                               if (op == '-')
+                                       newmask &= ~(1 << i);
+                               else
+                                       newmask |= (1 << i);
+                               found = 1;
+                               break;
+                       }
+               }
+               if (!found && len == 3 &&
+                   (strncasecmp(str, "ALL", len) == 0)) {
+                       if (op == '-')
+                               newmask = minmask;
+                       else
+                               newmask = allmask;
+                       found = 1;
+               }
+               if (!found) {
+                       CWARN("unknown mask '%.*s'.\n"
+                             "mask usage: [+|-]<all|type> ...\n", len, str);
+                       return -EINVAL;
+               }
+               str += len;
+       }
+
+       *oldmask = newmask;
+       return 0;
+}
+
+/* get the first string out of @str */
+char *cfs_firststr(char *str, size_t size)
+{
+       size_t i = 0;
+       char  *end;
+
+       /* trim leading spaces */
+       while (i < size && *str && isspace(*str)) {
+               ++i;
+               ++str;
+       }
+
+       /* string with all spaces */
+       if (*str == '\0')
+               goto out;
+
+       end = str;
+       while (i < size && *end != '\0' && !isspace(*end)) {
+               ++i;
+               ++end;
+       }
+
+       *end = '\0';
+out:
+       return str;
+}
+EXPORT_SYMBOL(cfs_firststr);
+
+char *
+cfs_trimwhite(char *str)
+{
+       char *end;
+
+       while (isspace(*str))
+               str++;
+
+       end = str + strlen(str);
+       while (end > str) {
+               if (!isspace(end[-1]))
+                       break;
+               end--;
+       }
+
+       *end = 0;
+       return str;
+}
+EXPORT_SYMBOL(cfs_trimwhite);
+
+/**
+ * Extracts tokens from strings.
+ *
+ * Looks for \a delim in string \a next, sets \a res to point to
+ * substring before the delimiter, sets \a next right after the found
+ * delimiter.
+ *
+ * \retval 1 if \a res points to a string of non-whitespace characters
+ * \retval 0 otherwise
+ */
+int
+cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res)
+{
+       char *end;
+
+       if (!next->ls_str)
+               return 0;
+
+       /* skip leading white spaces */
+       while (next->ls_len) {
+               if (!isspace(*next->ls_str))
+                       break;
+               next->ls_str++;
+               next->ls_len--;
+       }
+
+       if (next->ls_len == 0) /* whitespaces only */
+               return 0;
+
+       if (*next->ls_str == delim) {
+               /* first non-writespace is the delimiter */
+               return 0;
+       }
+
+       res->ls_str = next->ls_str;
+       end = memchr(next->ls_str, delim, next->ls_len);
+       if (!end) {
+               /* there is no the delimeter in the string */
+               end = next->ls_str + next->ls_len;
+               next->ls_str = NULL;
+       } else {
+               next->ls_str = end + 1;
+               next->ls_len -= (end - res->ls_str + 1);
+       }
+
+       /* skip ending whitespaces */
+       while (--end != res->ls_str) {
+               if (!isspace(*end))
+                       break;
+       }
+
+       res->ls_len = end - res->ls_str + 1;
+       return 1;
+}
+EXPORT_SYMBOL(cfs_gettok);
+
+/**
+ * Converts string to integer.
+ *
+ * Accepts decimal and hexadecimal number recordings.
+ *
+ * \retval 1 if first \a nob chars of \a str convert to decimal or
+ * hexadecimal integer in the range [\a min, \a max]
+ * \retval 0 otherwise
+ */
+int
+cfs_str2num_check(char *str, int nob, unsigned *num,
+                 unsigned min, unsigned max)
+{
+       bool all_numbers = true;
+       char *endp, cache;
+       int rc;
+
+       str = cfs_trimwhite(str);
+
+       /**
+        * kstrouint can only handle strings composed
+        * of only numbers. We need to scan the string
+        * passed in for the first non-digit character
+        * and end the string at that location. If we
+        * don't find any non-digit character we still
+        * need to place a '\0' at position nob since
+        * we are not interested in the rest of the
+        * string which is longer than nob in size.
+        * After we are done the character at the
+        * position we placed '\0' must be restored.
+        */
+       for (endp = str; endp < str + nob; endp++) {
+               if (!isdigit(*endp)) {
+                       all_numbers = false;
+                       break;
+               }
+       }
+       cache = *endp;
+       *endp = '\0';
+
+       rc = kstrtouint(str, 10, num);
+       *endp = cache;
+       if (rc || !all_numbers)
+               return 0;
+
+       return (*num >= min && *num <= max);
+}
+EXPORT_SYMBOL(cfs_str2num_check);
+
+/**
+ * Parses \<range_expr\> token of the syntax. If \a bracketed is false,
+ * \a src should only have a single token which can be \<number\> or  \*
+ *
+ * \retval pointer to allocated range_expr and initialized
+ * range_expr::re_lo, range_expr::re_hi and range_expr:re_stride if \a
+ `* src parses to
+ * \<number\> |
+ * \<number\> '-' \<number\> |
+ * \<number\> '-' \<number\> '/' \<number\>
+ * \retval 0 will be returned if it can be parsed, otherwise -EINVAL or
+ * -ENOMEM will be returned.
+ */
+static int
+cfs_range_expr_parse(struct cfs_lstr *src, unsigned min, unsigned max,
+                    int bracketed, struct cfs_range_expr **expr)
+{
+       struct cfs_range_expr   *re;
+       struct cfs_lstr         tok;
+
+       LIBCFS_ALLOC(re, sizeof(*re));
+       if (!re)
+               return -ENOMEM;
+
+       if (src->ls_len == 1 && src->ls_str[0] == '*') {
+               re->re_lo = min;
+               re->re_hi = max;
+               re->re_stride = 1;
+               goto out;
+       }
+
+       if (cfs_str2num_check(src->ls_str, src->ls_len,
+                             &re->re_lo, min, max)) {
+               /* <number> is parsed */
+               re->re_hi = re->re_lo;
+               re->re_stride = 1;
+               goto out;
+       }
+
+       if (!bracketed || !cfs_gettok(src, '-', &tok))
+               goto failed;
+
+       if (!cfs_str2num_check(tok.ls_str, tok.ls_len,
+                              &re->re_lo, min, max))
+               goto failed;
+
+       /* <number> - */
+       if (cfs_str2num_check(src->ls_str, src->ls_len,
+                             &re->re_hi, min, max)) {
+               /* <number> - <number> is parsed */
+               re->re_stride = 1;
+               goto out;
+       }
+
+       /* go to check <number> '-' <number> '/' <number> */
+       if (cfs_gettok(src, '/', &tok)) {
+               if (!cfs_str2num_check(tok.ls_str, tok.ls_len,
+                                      &re->re_hi, min, max))
+                       goto failed;
+
+               /* <number> - <number> / ... */
+               if (cfs_str2num_check(src->ls_str, src->ls_len,
+                                     &re->re_stride, min, max)) {
+                       /* <number> - <number> / <number> is parsed */
+                       goto out;
+               }
+       }
+
+ out:
+       *expr = re;
+       return 0;
+
+ failed:
+       LIBCFS_FREE(re, sizeof(*re));
+       return -EINVAL;
+}
+
+/**
+ * Print the range expression \a re into specified \a buffer.
+ * If \a bracketed is true, expression does not need additional
+ * brackets.
+ *
+ * \retval number of characters written
+ */
+static int
+cfs_range_expr_print(char *buffer, int count, struct cfs_range_expr *expr,
+                    bool bracketed)
+{
+       int i;
+       char s[] = "[";
+       char e[] = "]";
+
+       if (bracketed) {
+               s[0] = '\0';
+               e[0] = '\0';
+       }
+
+       if (expr->re_lo == expr->re_hi)
+               i = scnprintf(buffer, count, "%u", expr->re_lo);
+       else if (expr->re_stride == 1)
+               i = scnprintf(buffer, count, "%s%u-%u%s",
+                             s, expr->re_lo, expr->re_hi, e);
+       else
+               i = scnprintf(buffer, count, "%s%u-%u/%u%s",
+                             s, expr->re_lo, expr->re_hi, expr->re_stride, e);
+       return i;
+}
+
+/**
+ * Print a list of range expressions (\a expr_list) into specified \a buffer.
+ * If the list contains several expressions, separate them with comma
+ * and surround the list with brackets.
+ *
+ * \retval number of characters written
+ */
+int
+cfs_expr_list_print(char *buffer, int count, struct cfs_expr_list *expr_list)
+{
+       struct cfs_range_expr *expr;
+       int i = 0, j = 0;
+       int numexprs = 0;
+
+       if (count <= 0)
+               return 0;
+
+       list_for_each_entry(expr, &expr_list->el_exprs, re_link)
+               numexprs++;
+
+       if (numexprs > 1)
+               i += scnprintf(buffer + i, count - i, "[");
+
+       list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
+               if (j++ != 0)
+                       i += scnprintf(buffer + i, count - i, ",");
+               i += cfs_range_expr_print(buffer + i, count - i, expr,
+                                         numexprs > 1);
+       }
+
+       if (numexprs > 1)
+               i += scnprintf(buffer + i, count - i, "]");
+
+       return i;
+}
+EXPORT_SYMBOL(cfs_expr_list_print);
+
+/**
+ * Matches value (\a value) against ranges expression list \a expr_list.
+ *
+ * \retval 1 if \a value matches
+ * \retval 0 otherwise
+ */
+int
+cfs_expr_list_match(__u32 value, struct cfs_expr_list *expr_list)
+{
+       struct cfs_range_expr   *expr;
+
+       list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
+               if (value >= expr->re_lo && value <= expr->re_hi &&
+                   ((value - expr->re_lo) % expr->re_stride) == 0)
+                       return 1;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(cfs_expr_list_match);
+
+/**
+ * Convert express list (\a expr_list) to an array of all matched values
+ *
+ * \retval N N is total number of all matched values
+ * \retval 0 if expression list is empty
+ * \retval < 0 for failure
+ */
+int
+cfs_expr_list_values(struct cfs_expr_list *expr_list, int max, __u32 **valpp)
+{
+       struct cfs_range_expr   *expr;
+       __u32                   *val;
+       int                     count = 0;
+       int                     i;
+
+       list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
+               for (i = expr->re_lo; i <= expr->re_hi; i++) {
+                       if (((i - expr->re_lo) % expr->re_stride) == 0)
+                               count++;
+               }
+       }
+
+       if (count == 0) /* empty expression list */
+               return 0;
+
+       if (count > max) {
+               CERROR("Number of values %d exceeds max allowed %d\n",
+                      max, count);
+               return -EINVAL;
+       }
+
+       LIBCFS_ALLOC(val, sizeof(val[0]) * count);
+       if (!val)
+               return -ENOMEM;
+
+       count = 0;
+       list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
+               for (i = expr->re_lo; i <= expr->re_hi; i++) {
+                       if (((i - expr->re_lo) % expr->re_stride) == 0)
+                               val[count++] = i;
+               }
+       }
+
+       *valpp = val;
+       return count;
+}
+EXPORT_SYMBOL(cfs_expr_list_values);
+
+/**
+ * Frees cfs_range_expr structures of \a expr_list.
+ *
+ * \retval none
+ */
+void
+cfs_expr_list_free(struct cfs_expr_list *expr_list)
+{
+       while (!list_empty(&expr_list->el_exprs)) {
+               struct cfs_range_expr *expr;
+
+               expr = list_entry(expr_list->el_exprs.next,
+                                 struct cfs_range_expr, re_link);
+               list_del(&expr->re_link);
+               LIBCFS_FREE(expr, sizeof(*expr));
+       }
+
+       LIBCFS_FREE(expr_list, sizeof(*expr_list));
+}
+EXPORT_SYMBOL(cfs_expr_list_free);
+
+/**
+ * Parses \<cfs_expr_list\> token of the syntax.
+ *
+ * \retval 0 if \a str parses to \<number\> | \<expr_list\>
+ * \retval -errno otherwise
+ */
+int
+cfs_expr_list_parse(char *str, int len, unsigned min, unsigned max,
+                   struct cfs_expr_list **elpp)
+{
+       struct cfs_expr_list    *expr_list;
+       struct cfs_range_expr   *expr;
+       struct cfs_lstr         src;
+       int                     rc;
+
+       LIBCFS_ALLOC(expr_list, sizeof(*expr_list));
+       if (!expr_list)
+               return -ENOMEM;
+
+       src.ls_str = str;
+       src.ls_len = len;
+
+       INIT_LIST_HEAD(&expr_list->el_exprs);
+
+       if (src.ls_str[0] == '[' &&
+           src.ls_str[src.ls_len - 1] == ']') {
+               src.ls_str++;
+               src.ls_len -= 2;
+
+               rc = -EINVAL;
+               while (src.ls_str) {
+                       struct cfs_lstr tok;
+
+                       if (!cfs_gettok(&src, ',', &tok)) {
+                               rc = -EINVAL;
+                               break;
+                       }
+
+                       rc = cfs_range_expr_parse(&tok, min, max, 1, &expr);
+                       if (rc != 0)
+                               break;
+
+                       list_add_tail(&expr->re_link, &expr_list->el_exprs);
+               }
+       } else {
+               rc = cfs_range_expr_parse(&src, min, max, 0, &expr);
+               if (rc == 0)
+                       list_add_tail(&expr->re_link, &expr_list->el_exprs);
+       }
+
+       if (rc != 0)
+               cfs_expr_list_free(expr_list);
+       else
+               *elpp = expr_list;
+
+       return rc;
+}
+EXPORT_SYMBOL(cfs_expr_list_parse);
+
+/**
+ * Frees cfs_expr_list structures of \a list.
+ *
+ * For each struct cfs_expr_list structure found on \a list it frees
+ * range_expr list attached to it and frees the cfs_expr_list itself.
+ *
+ * \retval none
+ */
+void
+cfs_expr_list_free_list(struct list_head *list)
+{
+       struct cfs_expr_list *el;
+
+       while (!list_empty(list)) {
+               el = list_entry(list->next, struct cfs_expr_list, el_link);
+               list_del(&el->el_link);
+               cfs_expr_list_free(el);
+       }
+}
+EXPORT_SYMBOL(cfs_expr_list_free_list);
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c

new file mode 100644 (file)

index 0000000..389fb9e
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -0,0 +1,1040 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2012, 2015 Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Author: liang@whamcloud.com
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include "../../../include/linux/libcfs/libcfs.h"
+
+#ifdef CONFIG_SMP
+
+/**
+ * modparam for setting number of partitions
+ *
+ *  0 : estimate best value based on cores or NUMA nodes
+ *  1 : disable multiple partitions
+ * >1 : specify number of partitions
+ */
+static int     cpu_npartitions;
+module_param(cpu_npartitions, int, 0444);
+MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions");
+
+/**
+ * modparam for setting CPU partitions patterns:
+ *
+ * i.e: "0[0,1,2,3] 1[4,5,6,7]", number before bracket is CPU partition ID,
+ *      number in bracket is processor ID (core or HT)
+ *
+ * i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket
+ *       are NUMA node ID, number before bracket is CPU partition ID.
+ *
+ * NB: If user specified cpu_pattern, cpu_npartitions will be ignored
+ */
+static char    *cpu_pattern = "";
+module_param(cpu_pattern, charp, 0444);
+MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern");
+
+struct cfs_cpt_data {
+       /* serialize hotplug etc */
+       spinlock_t              cpt_lock;
+       /* reserved for hotplug */
+       unsigned long           cpt_version;
+       /* mutex to protect cpt_cpumask */
+       struct mutex            cpt_mutex;
+       /* scratch buffer for set/unset_node */
+       cpumask_t               *cpt_cpumask;
+};
+
+static struct cfs_cpt_data     cpt_data;
+
+void
+cfs_cpt_table_free(struct cfs_cpt_table *cptab)
+{
+       int     i;
+
+       if (cptab->ctb_cpu2cpt) {
+               LIBCFS_FREE(cptab->ctb_cpu2cpt,
+                           num_possible_cpus() *
+                           sizeof(cptab->ctb_cpu2cpt[0]));
+       }
+
+       for (i = 0; cptab->ctb_parts && i < cptab->ctb_nparts; i++) {
+               struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
+
+               if (part->cpt_nodemask) {
+                       LIBCFS_FREE(part->cpt_nodemask,
+                                   sizeof(*part->cpt_nodemask));
+               }
+
+               if (part->cpt_cpumask)
+                       LIBCFS_FREE(part->cpt_cpumask, cpumask_size());
+       }
+
+       if (cptab->ctb_parts) {
+               LIBCFS_FREE(cptab->ctb_parts,
+                           cptab->ctb_nparts * sizeof(cptab->ctb_parts[0]));
+       }
+
+       if (cptab->ctb_nodemask)
+               LIBCFS_FREE(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
+       if (cptab->ctb_cpumask)
+               LIBCFS_FREE(cptab->ctb_cpumask, cpumask_size());
+
+       LIBCFS_FREE(cptab, sizeof(*cptab));
+}
+EXPORT_SYMBOL(cfs_cpt_table_free);
+
+struct cfs_cpt_table *
+cfs_cpt_table_alloc(unsigned int ncpt)
+{
+       struct cfs_cpt_table *cptab;
+       int     i;
+
+       LIBCFS_ALLOC(cptab, sizeof(*cptab));
+       if (!cptab)
+               return NULL;
+
+       cptab->ctb_nparts = ncpt;
+
+       LIBCFS_ALLOC(cptab->ctb_cpumask, cpumask_size());
+       LIBCFS_ALLOC(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
+
+       if (!cptab->ctb_cpumask || !cptab->ctb_nodemask)
+               goto failed;
+
+       LIBCFS_ALLOC(cptab->ctb_cpu2cpt,
+                    num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
+       if (!cptab->ctb_cpu2cpt)
+               goto failed;
+
+       memset(cptab->ctb_cpu2cpt, -1,
+              num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
+
+       LIBCFS_ALLOC(cptab->ctb_parts, ncpt * sizeof(cptab->ctb_parts[0]));
+       if (!cptab->ctb_parts)
+               goto failed;
+
+       for (i = 0; i < ncpt; i++) {
+               struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
+
+               LIBCFS_ALLOC(part->cpt_cpumask, cpumask_size());
+               LIBCFS_ALLOC(part->cpt_nodemask, sizeof(*part->cpt_nodemask));
+               if (!part->cpt_cpumask || !part->cpt_nodemask)
+                       goto failed;
+       }
+
+       spin_lock(&cpt_data.cpt_lock);
+       /* Reserved for hotplug */
+       cptab->ctb_version = cpt_data.cpt_version;
+       spin_unlock(&cpt_data.cpt_lock);
+
+       return cptab;
+
+ failed:
+       cfs_cpt_table_free(cptab);
+       return NULL;
+}
+EXPORT_SYMBOL(cfs_cpt_table_alloc);
+
+int
+cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
+{
+       char    *tmp = buf;
+       int     rc = 0;
+       int     i;
+       int     j;
+
+       for (i = 0; i < cptab->ctb_nparts; i++) {
+               if (len > 0) {
+                       rc = snprintf(tmp, len, "%d\t: ", i);
+                       len -= rc;
+               }
+
+               if (len <= 0) {
+                       rc = -EFBIG;
+                       goto out;
+               }
+
+               tmp += rc;
+               for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) {
+                       rc = snprintf(tmp, len, "%d ", j);
+                       len -= rc;
+                       if (len <= 0) {
+                               rc = -EFBIG;
+                               goto out;
+                       }
+                       tmp += rc;
+               }
+
+               *tmp = '\n';
+               tmp++;
+               len--;
+       }
+
+ out:
+       if (rc < 0)
+               return rc;
+
+       return tmp - buf;
+}
+EXPORT_SYMBOL(cfs_cpt_table_print);
+
+int
+cfs_cpt_number(struct cfs_cpt_table *cptab)
+{
+       return cptab->ctb_nparts;
+}
+EXPORT_SYMBOL(cfs_cpt_number);
+
+int
+cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
+{
+       LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
+
+       return cpt == CFS_CPT_ANY ?
+              cpumask_weight(cptab->ctb_cpumask) :
+              cpumask_weight(cptab->ctb_parts[cpt].cpt_cpumask);
+}
+EXPORT_SYMBOL(cfs_cpt_weight);
+
+int
+cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
+{
+       LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
+
+       return cpt == CFS_CPT_ANY ?
+              cpumask_any_and(cptab->ctb_cpumask,
+                              cpu_online_mask) < nr_cpu_ids :
+              cpumask_any_and(cptab->ctb_parts[cpt].cpt_cpumask,
+                              cpu_online_mask) < nr_cpu_ids;
+}
+EXPORT_SYMBOL(cfs_cpt_online);
+
+cpumask_t *
+cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
+{
+       LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
+
+       return cpt == CFS_CPT_ANY ?
+              cptab->ctb_cpumask : cptab->ctb_parts[cpt].cpt_cpumask;
+}
+EXPORT_SYMBOL(cfs_cpt_cpumask);
+
+nodemask_t *
+cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
+{
+       LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
+
+       return cpt == CFS_CPT_ANY ?
+              cptab->ctb_nodemask : cptab->ctb_parts[cpt].cpt_nodemask;
+}
+EXPORT_SYMBOL(cfs_cpt_nodemask);
+
+int
+cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
+{
+       int     node;
+
+       LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts);
+
+       if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_online(cpu)) {
+               CDEBUG(D_INFO, "CPU %d is invalid or it's offline\n", cpu);
+               return 0;
+       }
+
+       if (cptab->ctb_cpu2cpt[cpu] != -1) {
+               CDEBUG(D_INFO, "CPU %d is already in partition %d\n",
+                      cpu, cptab->ctb_cpu2cpt[cpu]);
+               return 0;
+       }
+
+       cptab->ctb_cpu2cpt[cpu] = cpt;
+
+       LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask));
+       LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
+
+       cpumask_set_cpu(cpu, cptab->ctb_cpumask);
+       cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
+
+       node = cpu_to_node(cpu);
+
+       /* first CPU of @node in this CPT table */
+       if (!node_isset(node, *cptab->ctb_nodemask))
+               node_set(node, *cptab->ctb_nodemask);
+
+       /* first CPU of @node in this partition */
+       if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask))
+               node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask);
+
+       return 1;
+}
+EXPORT_SYMBOL(cfs_cpt_set_cpu);
+
+void
+cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
+{
+       int     node;
+       int     i;
+
+       LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
+
+       if (cpu < 0 || cpu >= nr_cpu_ids) {
+               CDEBUG(D_INFO, "Invalid CPU id %d\n", cpu);
+               return;
+       }
+
+       if (cpt == CFS_CPT_ANY) {
+               /* caller doesn't know the partition ID */
+               cpt = cptab->ctb_cpu2cpt[cpu];
+               if (cpt < 0) { /* not set in this CPT-table */
+                       CDEBUG(D_INFO, "Try to unset cpu %d which is not in CPT-table %p\n",
+                              cpt, cptab);
+                       return;
+               }
+
+       } else if (cpt != cptab->ctb_cpu2cpt[cpu]) {
+               CDEBUG(D_INFO,
+                      "CPU %d is not in cpu-partition %d\n", cpu, cpt);
+               return;
+       }
+
+       LASSERT(cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
+       LASSERT(cpumask_test_cpu(cpu, cptab->ctb_cpumask));
+
+       cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
+       cpumask_clear_cpu(cpu, cptab->ctb_cpumask);
+       cptab->ctb_cpu2cpt[cpu] = -1;
+
+       node = cpu_to_node(cpu);
+
+       LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask));
+       LASSERT(node_isset(node, *cptab->ctb_nodemask));
+
+       for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) {
+               /* this CPT has other CPU belonging to this node? */
+               if (cpu_to_node(i) == node)
+                       break;
+       }
+
+       if (i >= nr_cpu_ids)
+               node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask);
+
+       for_each_cpu(i, cptab->ctb_cpumask) {
+               /* this CPT-table has other CPU belonging to this node? */
+               if (cpu_to_node(i) == node)
+                       break;
+       }
+
+       if (i >= nr_cpu_ids)
+               node_clear(node, *cptab->ctb_nodemask);
+}
+EXPORT_SYMBOL(cfs_cpt_unset_cpu);
+
+int
+cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
+{
+       int     i;
+
+       if (cpumask_weight(mask) == 0 ||
+           cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) {
+               CDEBUG(D_INFO, "No online CPU is found in the CPU mask for CPU partition %d\n",
+                      cpt);
+               return 0;
+       }
+
+       for_each_cpu(i, mask) {
+               if (!cfs_cpt_set_cpu(cptab, cpt, i))
+                       return 0;
+       }
+
+       return 1;
+}
+EXPORT_SYMBOL(cfs_cpt_set_cpumask);
+
+void
+cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
+{
+       int     i;
+
+       for_each_cpu(i, mask)
+               cfs_cpt_unset_cpu(cptab, cpt, i);
+}
+EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
+
+int
+cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
+{
+       cpumask_t       *mask;
+       int             rc;
+
+       if (node < 0 || node >= MAX_NUMNODES) {
+               CDEBUG(D_INFO,
+                      "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
+               return 0;
+       }
+
+       mutex_lock(&cpt_data.cpt_mutex);
+
+       mask = cpt_data.cpt_cpumask;
+       cpumask_copy(mask, cpumask_of_node(node));
+
+       rc = cfs_cpt_set_cpumask(cptab, cpt, mask);
+
+       mutex_unlock(&cpt_data.cpt_mutex);
+
+       return rc;
+}
+EXPORT_SYMBOL(cfs_cpt_set_node);
+
+void
+cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
+{
+       cpumask_t *mask;
+
+       if (node < 0 || node >= MAX_NUMNODES) {
+               CDEBUG(D_INFO,
+                      "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
+               return;
+       }
+
+       mutex_lock(&cpt_data.cpt_mutex);
+
+       mask = cpt_data.cpt_cpumask;
+       cpumask_copy(mask, cpumask_of_node(node));
+
+       cfs_cpt_unset_cpumask(cptab, cpt, mask);
+
+       mutex_unlock(&cpt_data.cpt_mutex);
+}
+EXPORT_SYMBOL(cfs_cpt_unset_node);
+
+int
+cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
+{
+       int     i;
+
+       for_each_node_mask(i, *mask) {
+               if (!cfs_cpt_set_node(cptab, cpt, i))
+                       return 0;
+       }
+
+       return 1;
+}
+EXPORT_SYMBOL(cfs_cpt_set_nodemask);
+
+void
+cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
+{
+       int     i;
+
+       for_each_node_mask(i, *mask)
+               cfs_cpt_unset_node(cptab, cpt, i);
+}
+EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
+
+void
+cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
+{
+       int     last;
+       int     i;
+
+       if (cpt == CFS_CPT_ANY) {
+               last = cptab->ctb_nparts - 1;
+               cpt = 0;
+       } else {
+               last = cpt;
+       }
+
+       for (; cpt <= last; cpt++) {
+               for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask)
+                       cfs_cpt_unset_cpu(cptab, cpt, i);
+       }
+}
+EXPORT_SYMBOL(cfs_cpt_clear);
+
+int
+cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
+{
+       nodemask_t      *mask;
+       int             weight;
+       int             rotor;
+       int             node;
+
+       /* convert CPU partition ID to HW node id */
+
+       if (cpt < 0 || cpt >= cptab->ctb_nparts) {
+               mask = cptab->ctb_nodemask;
+               rotor = cptab->ctb_spread_rotor++;
+       } else {
+               mask = cptab->ctb_parts[cpt].cpt_nodemask;
+               rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++;
+       }
+
+       weight = nodes_weight(*mask);
+       LASSERT(weight > 0);
+
+       rotor %= weight;
+
+       for_each_node_mask(node, *mask) {
+               if (rotor-- == 0)
+                       return node;
+       }
+
+       LBUG();
+       return 0;
+}
+EXPORT_SYMBOL(cfs_cpt_spread_node);
+
+int
+cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
+{
+       int     cpu = smp_processor_id();
+       int     cpt = cptab->ctb_cpu2cpt[cpu];
+
+       if (cpt < 0) {
+               if (!remap)
+                       return cpt;
+
+               /* don't return negative value for safety of upper layer,
+                * instead we shadow the unknown cpu to a valid partition ID
+                */
+               cpt = cpu % cptab->ctb_nparts;
+       }
+
+       return cpt;
+}
+EXPORT_SYMBOL(cfs_cpt_current);
+
+int
+cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
+{
+       LASSERT(cpu >= 0 && cpu < nr_cpu_ids);
+
+       return cptab->ctb_cpu2cpt[cpu];
+}
+EXPORT_SYMBOL(cfs_cpt_of_cpu);
+
+int
+cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
+{
+       cpumask_t       *cpumask;
+       nodemask_t      *nodemask;
+       int             rc;
+       int             i;
+
+       LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
+
+       if (cpt == CFS_CPT_ANY) {
+               cpumask = cptab->ctb_cpumask;
+               nodemask = cptab->ctb_nodemask;
+       } else {
+               cpumask = cptab->ctb_parts[cpt].cpt_cpumask;
+               nodemask = cptab->ctb_parts[cpt].cpt_nodemask;
+       }
+
+       if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) {
+               CERROR("No online CPU found in CPU partition %d, did someone do CPU hotplug on system? You might need to reload Lustre modules to keep system working well.\n",
+                      cpt);
+               return -EINVAL;
+       }
+
+       for_each_online_cpu(i) {
+               if (cpumask_test_cpu(i, cpumask))
+                       continue;
+
+               rc = set_cpus_allowed_ptr(current, cpumask);
+               set_mems_allowed(*nodemask);
+               if (rc == 0)
+                       schedule(); /* switch to allowed CPU */
+
+               return rc;
+       }
+
+       /* don't need to set affinity because all online CPUs are covered */
+       return 0;
+}
+EXPORT_SYMBOL(cfs_cpt_bind);
+
+/**
+ * Choose max to \a number CPUs from \a node and set them in \a cpt.
+ * We always prefer to choose CPU in the same core/socket.
+ */
+static int
+cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
+                    cpumask_t *node, int number)
+{
+       cpumask_t       *socket = NULL;
+       cpumask_t       *core = NULL;
+       int             rc = 0;
+       int             cpu;
+
+       LASSERT(number > 0);
+
+       if (number >= cpumask_weight(node)) {
+               while (!cpumask_empty(node)) {
+                       cpu = cpumask_first(node);
+
+                       rc = cfs_cpt_set_cpu(cptab, cpt, cpu);
+                       if (!rc)
+                               return -EINVAL;
+                       cpumask_clear_cpu(cpu, node);
+               }
+               return 0;
+       }
+
+       /* allocate scratch buffer */
+       LIBCFS_ALLOC(socket, cpumask_size());
+       LIBCFS_ALLOC(core, cpumask_size());
+       if (!socket || !core) {
+               rc = -ENOMEM;
+               goto out;
+       }
+
+       while (!cpumask_empty(node)) {
+               cpu = cpumask_first(node);
+
+               /* get cpumask for cores in the same socket */
+               cpumask_copy(socket, topology_core_cpumask(cpu));
+               cpumask_and(socket, socket, node);
+
+               LASSERT(!cpumask_empty(socket));
+
+               while (!cpumask_empty(socket)) {
+                       int     i;
+
+                       /* get cpumask for hts in the same core */
+                       cpumask_copy(core, topology_sibling_cpumask(cpu));
+                       cpumask_and(core, core, node);
+
+                       LASSERT(!cpumask_empty(core));
+
+                       for_each_cpu(i, core) {
+                               cpumask_clear_cpu(i, socket);
+                               cpumask_clear_cpu(i, node);
+
+                               rc = cfs_cpt_set_cpu(cptab, cpt, i);
+                               if (!rc) {
+                                       rc = -EINVAL;
+                                       goto out;
+                               }
+
+                               if (--number == 0)
+                                       goto out;
+                       }
+                       cpu = cpumask_first(socket);
+               }
+       }
+
+ out:
+       if (socket)
+               LIBCFS_FREE(socket, cpumask_size());
+       if (core)
+               LIBCFS_FREE(core, cpumask_size());
+       return rc;
+}
+
+#define CPT_WEIGHT_MIN  4u
+
+static unsigned int
+cfs_cpt_num_estimate(void)
+{
+       unsigned nnode = num_online_nodes();
+       unsigned ncpu  = num_online_cpus();
+       unsigned ncpt;
+
+       if (ncpu <= CPT_WEIGHT_MIN) {
+               ncpt = 1;
+               goto out;
+       }
+
+       /* generate reasonable number of CPU partitions based on total number
+        * of CPUs, Preferred N should be power2 and match this condition:
+        * 2 * (N - 1)^2 < NCPUS <= 2 * N^2
+        */
+       for (ncpt = 2; ncpu > 2 * ncpt * ncpt; ncpt <<= 1)
+               ;
+
+       if (ncpt <= nnode) { /* fat numa system */
+               while (nnode > ncpt)
+                       nnode >>= 1;
+
+       } else { /* ncpt > nnode */
+               while ((nnode << 1) <= ncpt)
+                       nnode <<= 1;
+       }
+
+       ncpt = nnode;
+
+ out:
+#if (BITS_PER_LONG == 32)
+       /* config many CPU partitions on 32-bit system could consume
+        * too much memory
+        */
+       ncpt = min(2U, ncpt);
+#endif
+       while (ncpu % ncpt != 0)
+               ncpt--; /* worst case is 1 */
+
+       return ncpt;
+}
+
+static struct cfs_cpt_table *
+cfs_cpt_table_create(int ncpt)
+{
+       struct cfs_cpt_table *cptab = NULL;
+       cpumask_t       *mask = NULL;
+       int             cpt = 0;
+       int             num;
+       int             rc;
+       int             i;
+
+       rc = cfs_cpt_num_estimate();
+       if (ncpt <= 0)
+               ncpt = rc;
+
+       if (ncpt > num_online_cpus() || ncpt > 4 * rc) {
+               CWARN("CPU partition number %d is larger than suggested value (%d), your system may have performance issue or run out of memory while under pressure\n",
+                     ncpt, rc);
+       }
+
+       if (num_online_cpus() % ncpt != 0) {
+               CERROR("CPU number %d is not multiple of cpu_npartition %d, please try different cpu_npartitions value or set pattern string by cpu_pattern=STRING\n",
+                      (int)num_online_cpus(), ncpt);
+               goto failed;
+       }
+
+       cptab = cfs_cpt_table_alloc(ncpt);
+       if (!cptab) {
+               CERROR("Failed to allocate CPU map(%d)\n", ncpt);
+               goto failed;
+       }
+
+       num = num_online_cpus() / ncpt;
+       if (num == 0) {
+               CERROR("CPU changed while setting CPU partition\n");
+               goto failed;
+       }
+
+       LIBCFS_ALLOC(mask, cpumask_size());
+       if (!mask) {
+               CERROR("Failed to allocate scratch cpumask\n");
+               goto failed;
+       }
+
+       for_each_online_node(i) {
+               cpumask_copy(mask, cpumask_of_node(i));
+
+               while (!cpumask_empty(mask)) {
+                       struct cfs_cpu_partition *part;
+                       int    n;
+
+                       if (cpt >= ncpt)
+                               goto failed;
+
+                       part = &cptab->ctb_parts[cpt];
+
+                       n = num - cpumask_weight(part->cpt_cpumask);
+                       LASSERT(n > 0);
+
+                       rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n);
+                       if (rc < 0)
+                               goto failed;
+
+                       LASSERT(num >= cpumask_weight(part->cpt_cpumask));
+                       if (num == cpumask_weight(part->cpt_cpumask))
+                               cpt++;
+               }
+       }
+
+       if (cpt != ncpt ||
+           num != cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)) {
+               CERROR("Expect %d(%d) CPU partitions but got %d(%d), CPU hotplug/unplug while setting?\n",
+                      cptab->ctb_nparts, num, cpt,
+                      cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask));
+               goto failed;
+       }
+
+       LIBCFS_FREE(mask, cpumask_size());
+
+       return cptab;
+
+ failed:
+       CERROR("Failed to setup CPU-partition-table with %d CPU-partitions, online HW nodes: %d, HW cpus: %d.\n",
+              ncpt, num_online_nodes(), num_online_cpus());
+
+       if (mask)
+               LIBCFS_FREE(mask, cpumask_size());
+
+       if (cptab)
+               cfs_cpt_table_free(cptab);
+
+       return NULL;
+}
+
+static struct cfs_cpt_table *
+cfs_cpt_table_create_pattern(char *pattern)
+{
+       struct cfs_cpt_table    *cptab;
+       char                    *str    = pattern;
+       int                     node    = 0;
+       int                     high;
+       int                     ncpt;
+       int                     c;
+
+       for (ncpt = 0;; ncpt++) { /* quick scan bracket */
+               str = strchr(str, '[');
+               if (!str)
+                       break;
+               str++;
+       }
+
+       str = cfs_trimwhite(pattern);
+       if (*str == 'n' || *str == 'N') {
+               pattern = str + 1;
+               node = 1;
+       }
+
+       if (ncpt == 0 ||
+           (node && ncpt > num_online_nodes()) ||
+           (!node && ncpt > num_online_cpus())) {
+               CERROR("Invalid pattern %s, or too many partitions %d\n",
+                      pattern, ncpt);
+               return NULL;
+       }
+
+       high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1;
+
+       cptab = cfs_cpt_table_alloc(ncpt);
+       if (!cptab) {
+               CERROR("Failed to allocate cpu partition table\n");
+               return NULL;
+       }
+
+       for (str = cfs_trimwhite(pattern), c = 0;; c++) {
+               struct cfs_range_expr   *range;
+               struct cfs_expr_list    *el;
+               char                    *bracket = strchr(str, '[');
+               int                     cpt;
+               int                     rc;
+               int                     i;
+               int                     n;
+
+               if (!bracket) {
+                       if (*str != 0) {
+                               CERROR("Invalid pattern %s\n", str);
+                               goto failed;
+                       }
+                       if (c != ncpt) {
+                               CERROR("expect %d partitions but found %d\n",
+                                      ncpt, c);
+                               goto failed;
+                       }
+                       break;
+               }
+
+               if (sscanf(str, "%d%n", &cpt, &n) < 1) {
+                       CERROR("Invalid cpu pattern %s\n", str);
+                       goto failed;
+               }
+
+               if (cpt < 0 || cpt >= ncpt) {
+                       CERROR("Invalid partition id %d, total partitions %d\n",
+                              cpt, ncpt);
+                       goto failed;
+               }
+
+               if (cfs_cpt_weight(cptab, cpt) != 0) {
+                       CERROR("Partition %d has already been set.\n", cpt);
+                       goto failed;
+               }
+
+               str = cfs_trimwhite(str + n);
+               if (str != bracket) {
+                       CERROR("Invalid pattern %s\n", str);
+                       goto failed;
+               }
+
+               bracket = strchr(str, ']');
+               if (!bracket) {
+                       CERROR("missing right bracket for cpt %d, %s\n",
+                              cpt, str);
+                       goto failed;
+               }
+
+               if (cfs_expr_list_parse(str, (bracket - str) + 1,
+                                       0, high, &el) != 0) {
+                       CERROR("Can't parse number range: %s\n", str);
+                       goto failed;
+               }
+
+               list_for_each_entry(range, &el->el_exprs, re_link) {
+                       for (i = range->re_lo; i <= range->re_hi; i++) {
+                               if ((i - range->re_lo) % range->re_stride != 0)
+                                       continue;
+
+                               rc = node ? cfs_cpt_set_node(cptab, cpt, i) :
+                                           cfs_cpt_set_cpu(cptab, cpt, i);
+                               if (!rc) {
+                                       cfs_expr_list_free(el);
+                                       goto failed;
+                               }
+                       }
+               }
+
+               cfs_expr_list_free(el);
+
+               if (!cfs_cpt_online(cptab, cpt)) {
+                       CERROR("No online CPU is found on partition %d\n", cpt);
+                       goto failed;
+               }
+
+               str = cfs_trimwhite(bracket + 1);
+       }
+
+       return cptab;
+
+ failed:
+       cfs_cpt_table_free(cptab);
+       return NULL;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int
+cfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+       unsigned int  cpu = (unsigned long)hcpu;
+       bool         warn;
+
+       switch (action) {
+       case CPU_DEAD:
+       case CPU_DEAD_FROZEN:
+       case CPU_ONLINE:
+       case CPU_ONLINE_FROZEN:
+               spin_lock(&cpt_data.cpt_lock);
+               cpt_data.cpt_version++;
+               spin_unlock(&cpt_data.cpt_lock);
+               /* Fall through */
+       default:
+               if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) {
+                       CDEBUG(D_INFO, "CPU changed [cpu %u action %lx]\n",
+                              cpu, action);
+                       break;
+               }
+
+               mutex_lock(&cpt_data.cpt_mutex);
+               /* if all HTs in a core are offline, it may break affinity */
+               cpumask_copy(cpt_data.cpt_cpumask,
+                            topology_sibling_cpumask(cpu));
+               warn = cpumask_any_and(cpt_data.cpt_cpumask,
+                                      cpu_online_mask) >= nr_cpu_ids;
+               mutex_unlock(&cpt_data.cpt_mutex);
+               CDEBUG(warn ? D_WARNING : D_INFO,
+                      "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u action: %lx]\n",
+                      cpu, action);
+       }
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block cfs_cpu_notifier = {
+       .notifier_call  = cfs_cpu_notify,
+       .priority       = 0
+};
+
+#endif
+
+void
+cfs_cpu_fini(void)
+{
+       if (cfs_cpt_table)
+               cfs_cpt_table_free(cfs_cpt_table);
+
+#ifdef CONFIG_HOTPLUG_CPU
+       unregister_hotcpu_notifier(&cfs_cpu_notifier);
+#endif
+       if (cpt_data.cpt_cpumask)
+               LIBCFS_FREE(cpt_data.cpt_cpumask, cpumask_size());
+}
+
+int
+cfs_cpu_init(void)
+{
+       LASSERT(!cfs_cpt_table);
+
+       memset(&cpt_data, 0, sizeof(cpt_data));
+
+       LIBCFS_ALLOC(cpt_data.cpt_cpumask, cpumask_size());
+       if (!cpt_data.cpt_cpumask) {
+               CERROR("Failed to allocate scratch buffer\n");
+               return -1;
+       }
+
+       spin_lock_init(&cpt_data.cpt_lock);
+       mutex_init(&cpt_data.cpt_mutex);
+
+#ifdef CONFIG_HOTPLUG_CPU
+       register_hotcpu_notifier(&cfs_cpu_notifier);
+#endif
+
+       if (*cpu_pattern != 0) {
+               cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern);
+               if (!cfs_cpt_table) {
+                       CERROR("Failed to create cptab from pattern %s\n",
+                              cpu_pattern);
+                       goto failed;
+               }
+
+       } else {
+               cfs_cpt_table = cfs_cpt_table_create(cpu_npartitions);
+               if (!cfs_cpt_table) {
+                       CERROR("Failed to create ptable with npartitions %d\n",
+                              cpu_npartitions);
+                       goto failed;
+               }
+       }
+
+       spin_lock(&cpt_data.cpt_lock);
+       if (cfs_cpt_table->ctb_version != cpt_data.cpt_version) {
+               spin_unlock(&cpt_data.cpt_lock);
+               CERROR("CPU hotplug/unplug during setup\n");
+               goto failed;
+       }
+       spin_unlock(&cpt_data.cpt_lock);
+
+       LCONSOLE(0, "HW CPU cores: %d, npartitions: %d\n",
+                num_online_cpus(), cfs_cpt_number(cfs_cpt_table));
+       return 0;
+
+ failed:
+       cfs_cpu_fini();
+       return -1;
+}
+
+#endif
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c

new file mode 100644 (file)

index 0000000..db05727
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c
@@ -0,0 +1,137 @@
+/* GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
+ *
+ * Please  visit http://www.xyratex.com/contact if you need additional
+ * information or have any questions.
+ *
+ * GPL HEADER END
+ */
+
+/*
+ * Copyright 2012 Xyratex Technology Limited
+ */
+
+/*
+ * This is crypto api shash wrappers to zlib_adler32.
+ */
+
+#include <linux/module.h>
+#include <linux/zutil.h>
+#include <crypto/internal/hash.h>
+#include "linux-crypto.h"
+
+#define CHKSUM_BLOCK_SIZE      1
+#define CHKSUM_DIGEST_SIZE     4
+
+static int adler32_cra_init(struct crypto_tfm *tfm)
+{
+       u32 *key = crypto_tfm_ctx(tfm);
+
+       *key = 1;
+
+       return 0;
+}
+
+static int adler32_setkey(struct crypto_shash *hash, const u8 *key,
+                         unsigned int keylen)
+{
+       u32 *mctx = crypto_shash_ctx(hash);
+
+       if (keylen != sizeof(u32)) {
+               crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+       *mctx = *(u32 *)key;
+       return 0;
+}
+
+static int adler32_init(struct shash_desc *desc)
+{
+       u32 *mctx = crypto_shash_ctx(desc->tfm);
+       u32 *cksump = shash_desc_ctx(desc);
+
+       *cksump = *mctx;
+
+       return 0;
+}
+
+static int adler32_update(struct shash_desc *desc, const u8 *data,
+                         unsigned int len)
+{
+       u32 *cksump = shash_desc_ctx(desc);
+
+       *cksump = zlib_adler32(*cksump, data, len);
+       return 0;
+}
+
+static int __adler32_finup(u32 *cksump, const u8 *data, unsigned int len,
+                          u8 *out)
+{
+       *(u32 *)out = zlib_adler32(*cksump, data, len);
+       return 0;
+}
+
+static int adler32_finup(struct shash_desc *desc, const u8 *data,
+                        unsigned int len, u8 *out)
+{
+       return __adler32_finup(shash_desc_ctx(desc), data, len, out);
+}
+
+static int adler32_final(struct shash_desc *desc, u8 *out)
+{
+       u32 *cksump = shash_desc_ctx(desc);
+
+       *(u32 *)out = *cksump;
+       return 0;
+}
+
+static int adler32_digest(struct shash_desc *desc, const u8 *data,
+                         unsigned int len, u8 *out)
+{
+       return __adler32_finup(crypto_shash_ctx(desc->tfm), data, len,
+                                   out);
+}
+
+static struct shash_alg alg = {
+       .setkey         = adler32_setkey,
+       .init           = adler32_init,
+       .update         = adler32_update,
+       .final          = adler32_final,
+       .finup          = adler32_finup,
+       .digest         = adler32_digest,
+       .descsize       = sizeof(u32),
+       .digestsize     = CHKSUM_DIGEST_SIZE,
+       .base           = {
+               .cra_name               = "adler32",
+               .cra_driver_name        = "adler32-zlib",
+               .cra_priority           = 100,
+               .cra_blocksize          = CHKSUM_BLOCK_SIZE,
+               .cra_ctxsize            = sizeof(u32),
+               .cra_module             = THIS_MODULE,
+               .cra_init               = adler32_cra_init,
+       }
+};
+
+int cfs_crypto_adler32_register(void)
+{
+       return crypto_register_shash(&alg);
+}
+
+void cfs_crypto_adler32_unregister(void)
+{
+       crypto_unregister_shash(&alg);
+}
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c

new file mode 100644 (file)

index 0000000..1d2f70f
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c
@@ -0,0 +1,284 @@
+/* GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
+ *
+ * Please  visit http://www.xyratex.com/contact if you need additional
+ * information or have any questions.
+ *
+ * GPL HEADER END
+ */
+
+/*
+ * Copyright 2012 Xyratex Technology Limited
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+#include "../../../include/linux/libcfs/libcfs.h"
+#include "linux-crypto.h"
+/**
+ *  Array of  hash algorithm speed in MByte per second
+ */
+static int cfs_crypto_hash_speeds[CFS_HASH_ALG_MAX];
+
+static int cfs_crypto_hash_alloc(unsigned char alg_id,
+                                const struct cfs_crypto_hash_type **type,
+                                struct hash_desc *desc, unsigned char *key,
+                                unsigned int key_len)
+{
+       int     err = 0;
+
+       *type = cfs_crypto_hash_type(alg_id);
+
+       if (!*type) {
+               CWARN("Unsupported hash algorithm id = %d, max id is %d\n",
+                     alg_id, CFS_HASH_ALG_MAX);
+               return -EINVAL;
+       }
+       desc->tfm = crypto_alloc_hash((*type)->cht_name, 0, 0);
+
+       if (!desc->tfm)
+               return -EINVAL;
+
+       if (IS_ERR(desc->tfm)) {
+               CDEBUG(D_INFO, "Failed to alloc crypto hash %s\n",
+                      (*type)->cht_name);
+               return PTR_ERR(desc->tfm);
+       }
+
+       desc->flags = 0;
+
+       /** Shash have different logic for initialization then digest
+        * shash: crypto_hash_setkey, crypto_hash_init
+        * digest: crypto_digest_init, crypto_digest_setkey
+        * Skip this function for digest, because we use shash logic at
+        * cfs_crypto_hash_alloc.
+        */
+       if (key)
+               err = crypto_hash_setkey(desc->tfm, key, key_len);
+       else if ((*type)->cht_key != 0)
+               err = crypto_hash_setkey(desc->tfm,
+                                        (unsigned char *)&((*type)->cht_key),
+                                        (*type)->cht_size);
+
+       if (err != 0) {
+               crypto_free_hash(desc->tfm);
+               return err;
+       }
+
+       CDEBUG(D_INFO, "Using crypto hash: %s (%s) speed %d MB/s\n",
+              (crypto_hash_tfm(desc->tfm))->__crt_alg->cra_name,
+              (crypto_hash_tfm(desc->tfm))->__crt_alg->cra_driver_name,
+              cfs_crypto_hash_speeds[alg_id]);
+
+       return crypto_hash_init(desc);
+}
+
+int cfs_crypto_hash_digest(unsigned char alg_id,
+                          const void *buf, unsigned int buf_len,
+                          unsigned char *key, unsigned int key_len,
+                          unsigned char *hash, unsigned int *hash_len)
+{
+       struct scatterlist      sl;
+       struct hash_desc        hdesc;
+       int                     err;
+       const struct cfs_crypto_hash_type       *type;
+
+       if (!buf || buf_len == 0 || !hash_len)
+               return -EINVAL;
+
+       err = cfs_crypto_hash_alloc(alg_id, &type, &hdesc, key, key_len);
+       if (err != 0)
+               return err;
+
+       if (!hash || *hash_len < type->cht_size) {
+               *hash_len = type->cht_size;
+               crypto_free_hash(hdesc.tfm);
+               return -ENOSPC;
+       }
+       sg_init_one(&sl, buf, buf_len);
+
+       hdesc.flags = 0;
+       err = crypto_hash_digest(&hdesc, &sl, sl.length, hash);
+       crypto_free_hash(hdesc.tfm);
+
+       return err;
+}
+EXPORT_SYMBOL(cfs_crypto_hash_digest);
+
+struct cfs_crypto_hash_desc *
+       cfs_crypto_hash_init(unsigned char alg_id,
+                            unsigned char *key, unsigned int key_len)
+{
+       struct  hash_desc       *hdesc;
+       int                  err;
+       const struct cfs_crypto_hash_type       *type;
+
+       hdesc = kmalloc(sizeof(*hdesc), 0);
+       if (!hdesc)
+               return ERR_PTR(-ENOMEM);
+
+       err = cfs_crypto_hash_alloc(alg_id, &type, hdesc, key, key_len);
+
+       if (err) {
+               kfree(hdesc);
+               return ERR_PTR(err);
+       }
+       return (struct cfs_crypto_hash_desc *)hdesc;
+}
+EXPORT_SYMBOL(cfs_crypto_hash_init);
+
+int cfs_crypto_hash_update_page(struct cfs_crypto_hash_desc *hdesc,
+                               struct page *page, unsigned int offset,
+                               unsigned int len)
+{
+       struct scatterlist sl;
+
+       sg_init_table(&sl, 1);
+       sg_set_page(&sl, page, len, offset & ~CFS_PAGE_MASK);
+
+       return crypto_hash_update((struct hash_desc *)hdesc, &sl, sl.length);
+}
+EXPORT_SYMBOL(cfs_crypto_hash_update_page);
+
+int cfs_crypto_hash_update(struct cfs_crypto_hash_desc *hdesc,
+                          const void *buf, unsigned int buf_len)
+{
+       struct scatterlist sl;
+
+       sg_init_one(&sl, buf, buf_len);
+
+       return crypto_hash_update((struct hash_desc *)hdesc, &sl, sl.length);
+}
+EXPORT_SYMBOL(cfs_crypto_hash_update);
+
+/*      If hash_len pointer is NULL - destroy descriptor. */
+int cfs_crypto_hash_final(struct cfs_crypto_hash_desc *hdesc,
+                         unsigned char *hash, unsigned int *hash_len)
+{
+       int     err;
+       int     size = crypto_hash_digestsize(((struct hash_desc *)hdesc)->tfm);
+
+       if (!hash_len) {
+               crypto_free_hash(((struct hash_desc *)hdesc)->tfm);
+               kfree(hdesc);
+               return 0;
+       }
+       if (!hash || *hash_len < size) {
+               *hash_len = size;
+               return -ENOSPC;
+       }
+       err = crypto_hash_final((struct hash_desc *)hdesc, hash);
+
+       if (err < 0) {
+               /* May be caller can fix error */
+               return err;
+       }
+       crypto_free_hash(((struct hash_desc *)hdesc)->tfm);
+       kfree(hdesc);
+       return err;
+}
+EXPORT_SYMBOL(cfs_crypto_hash_final);
+
+static void cfs_crypto_performance_test(unsigned char alg_id,
+                                       const unsigned char *buf,
+                                       unsigned int buf_len)
+{
+       unsigned long              start, end;
+       int                          bcount, err = 0;
+       int                          sec = 1; /* do test only 1 sec */
+       unsigned char              hash[64];
+       unsigned int                hash_len = 64;
+
+       for (start = jiffies, end = start + sec * HZ, bcount = 0;
+            time_before(jiffies, end); bcount++) {
+               err = cfs_crypto_hash_digest(alg_id, buf, buf_len, NULL, 0,
+                                            hash, &hash_len);
+               if (err)
+                       break;
+       }
+       end = jiffies;
+
+       if (err) {
+               cfs_crypto_hash_speeds[alg_id] =  -1;
+               CDEBUG(D_INFO, "Crypto hash algorithm %s, err = %d\n",
+                      cfs_crypto_hash_name(alg_id), err);
+       } else {
+               unsigned long   tmp;
+
+               tmp = ((bcount * buf_len / jiffies_to_msecs(end - start)) *
+                      1000) / (1024 * 1024);
+               cfs_crypto_hash_speeds[alg_id] = (int)tmp;
+       }
+       CDEBUG(D_INFO, "Crypto hash algorithm %s speed = %d MB/s\n",
+              cfs_crypto_hash_name(alg_id), cfs_crypto_hash_speeds[alg_id]);
+}
+
+int cfs_crypto_hash_speed(unsigned char hash_alg)
+{
+       if (hash_alg < CFS_HASH_ALG_MAX)
+               return cfs_crypto_hash_speeds[hash_alg];
+       return -1;
+}
+EXPORT_SYMBOL(cfs_crypto_hash_speed);
+
+/**
+ * Do performance test for all hash algorithms.
+ */
+static int cfs_crypto_test_hashes(void)
+{
+       unsigned char      i;
+       unsigned char      *data;
+       unsigned int        j;
+       /* Data block size for testing hash. Maximum
+        * kmalloc size for 2.6.18 kernel is 128K
+        */
+       unsigned int        data_len = 1 * 128 * 1024;
+
+       data = kmalloc(data_len, 0);
+       if (!data)
+               return -ENOMEM;
+
+       for (j = 0; j < data_len; j++)
+               data[j] = j & 0xff;
+
+       for (i = 0; i < CFS_HASH_ALG_MAX; i++)
+               cfs_crypto_performance_test(i, data, data_len);
+
+       kfree(data);
+       return 0;
+}
+
+static int adler32;
+
+int cfs_crypto_register(void)
+{
+       request_module("crc32c");
+
+       adler32 = cfs_crypto_adler32_register();
+
+       /* check all algorithms and do performance test */
+       cfs_crypto_test_hashes();
+       return 0;
+}
+
+void cfs_crypto_unregister(void)
+{
+       if (adler32 == 0)
+               cfs_crypto_adler32_unregister();
+}
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h

new file mode 100644 (file)

index 0000000..18e8cd4
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h
@@ -0,0 +1,29 @@
+ /*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
+ *
+ * Please  visit http://www.xyratex.com/contact if you need additional
+ * information or have any questions.
+ *
+ * GPL HEADER END
+ */
+
+/**
+ * Functions for start/stop shash adler32 algorithm.
+ */
+int cfs_crypto_adler32_register(void);
+void cfs_crypto_adler32_unregister(void);
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-curproc.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-curproc.c

new file mode 100644 (file)

index 0000000..13d31e8
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-curproc.c
@@ -0,0 +1,111 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2015, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/libcfs/linux/linux-curproc.c
+ *
+ * Lustre curproc API implementation for Linux kernel
+ *
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/fs_struct.h>
+
+#include <linux/compat.h>
+#include <linux/thread_info.h>
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include "../../../include/linux/libcfs/libcfs.h"
+
+/*
+ * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h)
+ * for Linux kernel.
+ */
+
+void cfs_cap_raise(cfs_cap_t cap)
+{
+       struct cred *cred;
+
+       cred = prepare_creds();
+       if (cred) {
+               cap_raise(cred->cap_effective, cap);
+               commit_creds(cred);
+       }
+}
+EXPORT_SYMBOL(cfs_cap_raise);
+
+void cfs_cap_lower(cfs_cap_t cap)
+{
+       struct cred *cred;
+
+       cred = prepare_creds();
+       if (cred) {
+               cap_lower(cred->cap_effective, cap);
+               commit_creds(cred);
+       }
+}
+EXPORT_SYMBOL(cfs_cap_lower);
+
+int cfs_cap_raised(cfs_cap_t cap)
+{
+       return cap_raised(current_cap(), cap);
+}
+EXPORT_SYMBOL(cfs_cap_raised);
+
+static void cfs_kernel_cap_pack(kernel_cap_t kcap, cfs_cap_t *cap)
+{
+       /* XXX lost high byte */
+       *cap = kcap.cap[0];
+}
+
+cfs_cap_t cfs_curproc_cap_pack(void)
+{
+       cfs_cap_t cap;
+
+       cfs_kernel_cap_pack(current_cap(), &cap);
+       return cap;
+}
+EXPORT_SYMBOL(cfs_curproc_cap_pack);
+
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c

new file mode 100644 (file)

index 0000000..638e4b3
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c
@@ -0,0 +1,200 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/libcfs/linux/linux-debug.c
+ *
+ * Author: Phil Schwan <phil@clusterfs.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/notifier.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/interrupt.h>
+#include <linux/completion.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/miscdevice.h>
+
+# define DEBUG_SUBSYSTEM S_LNET
+
+#include "../../../include/linux/libcfs/libcfs.h"
+
+#include "../tracefile.h"
+
+#include <linux/kallsyms.h>
+
+char lnet_upcall[1024] = "/usr/lib/lustre/lnet_upcall";
+char lnet_debug_log_upcall[1024] = "/usr/lib/lustre/lnet_debug_log_upcall";
+
+/**
+ * Upcall function once a Lustre log has been dumped.
+ *
+ * \param file  path of the dumped log
+ */
+void libcfs_run_debug_log_upcall(char *file)
+{
+       char *argv[3];
+       int   rc;
+       char *envp[] = {
+               "HOME=/",
+               "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
+               NULL};
+
+       argv[0] = lnet_debug_log_upcall;
+
+       LASSERTF(file, "called on a null filename\n");
+       argv[1] = file; /* only need to pass the path of the file */
+
+       argv[2] = NULL;
+
+       rc = call_usermodehelper(argv[0], argv, envp, 1);
+       if (rc < 0 && rc != -ENOENT) {
+               CERROR("Error %d invoking LNET debug log upcall %s %s; check /sys/kernel/debug/lnet/debug_log_upcall\n",
+                      rc, argv[0], argv[1]);
+       } else {
+               CDEBUG(D_HA, "Invoked LNET debug log upcall %s %s\n",
+                      argv[0], argv[1]);
+       }
+}
+
+void libcfs_run_upcall(char **argv)
+{
+       int   rc;
+       int   argc;
+       char *envp[] = {
+               "HOME=/",
+               "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
+               NULL};
+
+       argv[0] = lnet_upcall;
+       argc = 1;
+       while (argv[argc])
+               argc++;
+
+       LASSERT(argc >= 2);
+
+       rc = call_usermodehelper(argv[0], argv, envp, 1);
+       if (rc < 0 && rc != -ENOENT) {
+               CERROR("Error %d invoking LNET upcall %s %s%s%s%s%s%s%s%s; check /sys/kernel/debug/lnet/upcall\n",
+                      rc, argv[0], argv[1],
+                      argc < 3 ? "" : ",", argc < 3 ? "" : argv[2],
+                      argc < 4 ? "" : ",", argc < 4 ? "" : argv[3],
+                      argc < 5 ? "" : ",", argc < 5 ? "" : argv[4],
+                      argc < 6 ? "" : ",...");
+       } else {
+               CDEBUG(D_HA, "Invoked LNET upcall %s %s%s%s%s%s%s%s%s\n",
+                      argv[0], argv[1],
+                      argc < 3 ? "" : ",", argc < 3 ? "" : argv[2],
+                      argc < 4 ? "" : ",", argc < 4 ? "" : argv[3],
+                      argc < 5 ? "" : ",", argc < 5 ? "" : argv[4],
+                      argc < 6 ? "" : ",...");
+       }
+}
+
+void libcfs_run_lbug_upcall(struct libcfs_debug_msg_data *msgdata)
+{
+       char *argv[6];
+       char buf[32];
+
+       snprintf(buf, sizeof(buf), "%d", msgdata->msg_line);
+
+       argv[1] = "LBUG";
+       argv[2] = (char *)msgdata->msg_file;
+       argv[3] = (char *)msgdata->msg_fn;
+       argv[4] = buf;
+       argv[5] = NULL;
+
+       libcfs_run_upcall(argv);
+}
+EXPORT_SYMBOL(libcfs_run_lbug_upcall);
+
+/* coverity[+kill] */
+void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msgdata)
+{
+       libcfs_catastrophe = 1;
+       libcfs_debug_msg(msgdata, "LBUG\n");
+
+       if (in_interrupt()) {
+               panic("LBUG in interrupt.\n");
+               /* not reached */
+       }
+
+       dump_stack();
+       if (!libcfs_panic_on_lbug)
+               libcfs_debug_dumplog();
+       libcfs_run_lbug_upcall(msgdata);
+       if (libcfs_panic_on_lbug)
+               panic("LBUG");
+       set_task_state(current, TASK_UNINTERRUPTIBLE);
+       while (1)
+               schedule();
+}
+EXPORT_SYMBOL(lbug_with_loc);
+
+static int panic_notifier(struct notifier_block *self, unsigned long unused1,
+                         void *unused2)
+{
+       if (libcfs_panic_in_progress)
+               return 0;
+
+       libcfs_panic_in_progress = 1;
+       mb();
+
+       return 0;
+}
+
+static struct notifier_block libcfs_panic_notifier = {
+       .notifier_call  = panic_notifier,
+       .next           = NULL,
+       .priority       = 10000,
+};
+
+void libcfs_register_panic_notifier(void)
+{
+       atomic_notifier_chain_register(&panic_notifier_list,
+                                      &libcfs_panic_notifier);
+}
+
+void libcfs_unregister_panic_notifier(void)
+{
+       atomic_notifier_chain_unregister(&panic_notifier_list,
+                                        &libcfs_panic_notifier);
+}
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c

new file mode 100644 (file)

index 0000000..86f32ff
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c
@@ -0,0 +1,59 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ */
+/*
+ * This file creates a memory allocation primitive for Lustre, that
+ * allows to fallback to vmalloc allocations should regular kernel allocations
+ * fail due to size or system memory fragmentation.
+ *
+ * Author: Oleg Drokin <green@linuxhacker.ru>
+ *
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Seagate Technology.
+ */
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include "../../../include/linux/libcfs/libcfs.h"
+
+void *libcfs_kvzalloc(size_t size, gfp_t flags)
+{
+       void *ret;
+
+       ret = kzalloc(size, flags | __GFP_NOWARN);
+       if (!ret)
+               ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
+       return ret;
+}
+EXPORT_SYMBOL(libcfs_kvzalloc);
+
+void *libcfs_kvzalloc_cpt(struct cfs_cpt_table *cptab, int cpt, size_t size,
+                         gfp_t flags)
+{
+       void *ret;
+
+       ret = kzalloc_node(size, flags | __GFP_NOWARN,
+                          cfs_cpt_spread_node(cptab, cpt));
+       if (!ret) {
+               WARN_ON(!(flags & (__GFP_FS | __GFP_HIGH)));
+               ret = vmalloc_node(size, cfs_cpt_spread_node(cptab, cpt));
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL(libcfs_kvzalloc_cpt);
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c

new file mode 100644 (file)

index 0000000..ebc60ac
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
@@ -0,0 +1,159 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include "../../../include/linux/libcfs/libcfs.h"
+
+#define LNET_MINOR 240
+
+int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data)
+{
+       if (libcfs_ioctl_is_invalid(data)) {
+               CERROR("LNET: ioctl not correctly formatted\n");
+               return -EINVAL;
+       }
+
+       if (data->ioc_inllen1)
+               data->ioc_inlbuf1 = &data->ioc_bulk[0];
+
+       if (data->ioc_inllen2)
+               data->ioc_inlbuf2 = &data->ioc_bulk[0] +
+                       cfs_size_round(data->ioc_inllen1);
+
+       return 0;
+}
+
+int libcfs_ioctl_getdata_len(const struct libcfs_ioctl_hdr __user *arg,
+                            __u32 *len)
+{
+       struct libcfs_ioctl_hdr hdr;
+
+       if (copy_from_user(&hdr, arg, sizeof(hdr)))
+               return -EFAULT;
+
+       if (hdr.ioc_version != LIBCFS_IOCTL_VERSION &&
+           hdr.ioc_version != LIBCFS_IOCTL_VERSION2) {
+               CERROR("LNET: version mismatch expected %#x, got %#x\n",
+                      LIBCFS_IOCTL_VERSION, hdr.ioc_version);
+               return -EINVAL;
+       }
+
+       *len = hdr.ioc_len;
+
+       return 0;
+}
+
+int libcfs_ioctl_popdata(void __user *arg, void *data, int size)
+{
+       if (copy_to_user(arg, data, size))
+               return -EFAULT;
+       return 0;
+}
+
+static int
+libcfs_psdev_open(struct inode *inode, struct file *file)
+{
+       int    rc = 0;
+
+       if (!inode)
+               return -EINVAL;
+       if (libcfs_psdev_ops.p_open)
+               rc = libcfs_psdev_ops.p_open(0, NULL);
+       else
+               return -EPERM;
+       return rc;
+}
+
+/* called when closing /dev/device */
+static int
+libcfs_psdev_release(struct inode *inode, struct file *file)
+{
+       int    rc = 0;
+
+       if (!inode)
+               return -EINVAL;
+       if (libcfs_psdev_ops.p_close)
+               rc = libcfs_psdev_ops.p_close(0, NULL);
+       else
+               rc = -EPERM;
+       return rc;
+}
+
+static long libcfs_ioctl(struct file *file,
+                        unsigned int cmd, unsigned long arg)
+{
+       struct cfs_psdev_file    pfile;
+       int    rc = 0;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EACCES;
+
+       if (_IOC_TYPE(cmd) != IOC_LIBCFS_TYPE ||
+           _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR  ||
+           _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR) {
+               CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
+                      _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
+               return -EINVAL;
+       }
+
+       /* Handle platform-dependent IOC requests */
+       switch (cmd) {
+       case IOC_LIBCFS_PANIC:
+               if (!capable(CFS_CAP_SYS_BOOT))
+                       return -EPERM;
+               panic("debugctl-invoked panic");
+               return 0;
+       }
+
+       if (libcfs_psdev_ops.p_ioctl)
+               rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void __user *)arg);
+       else
+               rc = -EPERM;
+       return rc;
+}
+
+static const struct file_operations libcfs_fops = {
+       .unlocked_ioctl = libcfs_ioctl,
+       .open           = libcfs_psdev_open,
+       .release        = libcfs_psdev_release,
+};
+
+struct miscdevice libcfs_dev = {
+       .minor = LNET_MINOR,
+       .name = "lnet",
+       .fops = &libcfs_fops,
+};
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c

new file mode 100644 (file)

index 0000000..8908446
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c
@@ -0,0 +1,147 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fs_struct.h>
+#include <linux/sched.h>
+
+#include "../../../include/linux/libcfs/libcfs.h"
+
+#if defined(CONFIG_KGDB)
+#include <linux/kgdb.h>
+#endif
+
+/**
+ * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively
+ * waiting threads, which is not always desirable because all threads will
+ * be waken up again and again, even user only needs a few of them to be
+ * active most time. This is not good for performance because cache can
+ * be polluted by different threads.
+ *
+ * LIFO list can resolve this problem because we always wakeup the most
+ * recent active thread by default.
+ *
+ * NB: please don't call non-exclusive & exclusive wait on the same
+ * waitq if add_wait_queue_exclusive_head is used.
+ */
+void
+add_wait_queue_exclusive_head(wait_queue_head_t *waitq, wait_queue_t *link)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&waitq->lock, flags);
+       __add_wait_queue_exclusive(waitq, link);
+       spin_unlock_irqrestore(&waitq->lock, flags);
+}
+EXPORT_SYMBOL(add_wait_queue_exclusive_head);
+
+sigset_t
+cfs_block_allsigs(void)
+{
+       unsigned long     flags;
+       sigset_t        old;
+
+       spin_lock_irqsave(&current->sighand->siglock, flags);
+       old = current->blocked;
+       sigfillset(&current->blocked);
+       recalc_sigpending();
+       spin_unlock_irqrestore(&current->sighand->siglock, flags);
+
+       return old;
+}
+EXPORT_SYMBOL(cfs_block_allsigs);
+
+sigset_t cfs_block_sigs(unsigned long sigs)
+{
+       unsigned long  flags;
+       sigset_t        old;
+
+       spin_lock_irqsave(&current->sighand->siglock, flags);
+       old = current->blocked;
+       sigaddsetmask(&current->blocked, sigs);
+       recalc_sigpending();
+       spin_unlock_irqrestore(&current->sighand->siglock, flags);
+       return old;
+}
+EXPORT_SYMBOL(cfs_block_sigs);
+
+/* Block all signals except for the @sigs */
+sigset_t cfs_block_sigsinv(unsigned long sigs)
+{
+       unsigned long flags;
+       sigset_t old;
+
+       spin_lock_irqsave(&current->sighand->siglock, flags);
+       old = current->blocked;
+       sigaddsetmask(&current->blocked, ~sigs);
+       recalc_sigpending();
+       spin_unlock_irqrestore(&current->sighand->siglock, flags);
+
+       return old;
+}
+EXPORT_SYMBOL(cfs_block_sigsinv);
+
+void
+cfs_restore_sigs(sigset_t old)
+{
+       unsigned long  flags;
+
+       spin_lock_irqsave(&current->sighand->siglock, flags);
+       current->blocked = old;
+       recalc_sigpending();
+       spin_unlock_irqrestore(&current->sighand->siglock, flags);
+}
+EXPORT_SYMBOL(cfs_restore_sigs);
+
+int
+cfs_signal_pending(void)
+{
+       return signal_pending(current);
+}
+EXPORT_SYMBOL(cfs_signal_pending);
+
+void
+cfs_clear_sigpending(void)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&current->sighand->siglock, flags);
+       clear_tsk_thread_flag(current, TIF_SIGPENDING);
+       spin_unlock_irqrestore(&current->sighand->siglock, flags);
+}
+EXPORT_SYMBOL(cfs_clear_sigpending);
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c

new file mode 100644 (file)

index 0000000..91c2ae8
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c
@@ -0,0 +1,259 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+#define LUSTRE_TRACEFILE_PRIVATE
+
+#include "../../../include/linux/libcfs/libcfs.h"
+#include "../tracefile.h"
+
+/* percents to share the total debug memory for each type */
+static unsigned int pages_factor[CFS_TCD_TYPE_MAX] = {
+       80,  /* 80% pages for CFS_TCD_TYPE_PROC */
+       10,  /* 10% pages for CFS_TCD_TYPE_SOFTIRQ */
+       10   /* 10% pages for CFS_TCD_TYPE_IRQ */
+};
+
+char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX];
+
+static DECLARE_RWSEM(cfs_tracefile_sem);
+
+int cfs_tracefile_init_arch(void)
+{
+       int    i;
+       int    j;
+       struct cfs_trace_cpu_data *tcd;
+
+       /* initialize trace_data */
+       memset(cfs_trace_data, 0, sizeof(cfs_trace_data));
+       for (i = 0; i < CFS_TCD_TYPE_MAX; i++) {
+               cfs_trace_data[i] =
+                       kmalloc(sizeof(union cfs_trace_data_union) *
+                               num_possible_cpus(), GFP_KERNEL);
+               if (!cfs_trace_data[i])
+                       goto out;
+       }
+
+       /* arch related info initialized */
+       cfs_tcd_for_each(tcd, i, j) {
+               spin_lock_init(&tcd->tcd_lock);
+               tcd->tcd_pages_factor = pages_factor[i];
+               tcd->tcd_type = i;
+               tcd->tcd_cpu = j;
+       }
+
+       for (i = 0; i < num_possible_cpus(); i++)
+               for (j = 0; j < 3; j++) {
+                       cfs_trace_console_buffers[i][j] =
+                               kmalloc(CFS_TRACE_CONSOLE_BUFFER_SIZE,
+                                       GFP_KERNEL);
+
+                       if (!cfs_trace_console_buffers[i][j])
+                               goto out;
+               }
+
+       return 0;
+
+out:
+       cfs_tracefile_fini_arch();
+       printk(KERN_ERR "lnet: Not enough memory\n");
+       return -ENOMEM;
+}
+
+void cfs_tracefile_fini_arch(void)
+{
+       int    i;
+       int    j;
+
+       for (i = 0; i < num_possible_cpus(); i++)
+               for (j = 0; j < 3; j++) {
+                       kfree(cfs_trace_console_buffers[i][j]);
+                       cfs_trace_console_buffers[i][j] = NULL;
+               }
+
+       for (i = 0; cfs_trace_data[i]; i++) {
+               kfree(cfs_trace_data[i]);
+               cfs_trace_data[i] = NULL;
+       }
+}
+
+void cfs_tracefile_read_lock(void)
+{
+       down_read(&cfs_tracefile_sem);
+}
+
+void cfs_tracefile_read_unlock(void)
+{
+       up_read(&cfs_tracefile_sem);
+}
+
+void cfs_tracefile_write_lock(void)
+{
+       down_write(&cfs_tracefile_sem);
+}
+
+void cfs_tracefile_write_unlock(void)
+{
+       up_write(&cfs_tracefile_sem);
+}
+
+enum cfs_trace_buf_type cfs_trace_buf_idx_get(void)
+{
+       if (in_irq())
+               return CFS_TCD_TYPE_IRQ;
+       if (in_softirq())
+               return CFS_TCD_TYPE_SOFTIRQ;
+       return CFS_TCD_TYPE_PROC;
+}
+
+/*
+ * The walking argument indicates the locking comes from all tcd types
+ * iterator and we must lock it and dissable local irqs to avoid deadlocks
+ * with other interrupt locks that might be happening. See LU-1311
+ * for details.
+ */
+int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
+       __acquires(&tcd->tc_lock)
+{
+       __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX);
+       if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
+               spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags);
+       else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
+               spin_lock_bh(&tcd->tcd_lock);
+       else if (unlikely(walking))
+               spin_lock_irq(&tcd->tcd_lock);
+       else
+               spin_lock(&tcd->tcd_lock);
+       return 1;
+}
+
+void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
+       __releases(&tcd->tcd_lock)
+{
+       __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX);
+       if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
+               spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags);
+       else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
+               spin_unlock_bh(&tcd->tcd_lock);
+       else if (unlikely(walking))
+               spin_unlock_irq(&tcd->tcd_lock);
+       else
+               spin_unlock(&tcd->tcd_lock);
+}
+
+void
+cfs_set_ptldebug_header(struct ptldebug_header *header,
+                       struct libcfs_debug_msg_data *msgdata,
+                       unsigned long stack)
+{
+       struct timespec64 ts;
+
+       ktime_get_real_ts64(&ts);
+
+       header->ph_subsys = msgdata->msg_subsys;
+       header->ph_mask = msgdata->msg_mask;
+       header->ph_cpu_id = smp_processor_id();
+       header->ph_type = cfs_trace_buf_idx_get();
+       /* y2038 safe since all user space treats this as unsigned, but
+        * will overflow in 2106
+        */
+       header->ph_sec = (u32)ts.tv_sec;
+       header->ph_usec = ts.tv_nsec / NSEC_PER_USEC;
+       header->ph_stack = stack;
+       header->ph_pid = current->pid;
+       header->ph_line_num = msgdata->msg_line;
+       header->ph_extern_pid = 0;
+}
+
+static char *
+dbghdr_to_err_string(struct ptldebug_header *hdr)
+{
+       switch (hdr->ph_subsys) {
+       case S_LND:
+       case S_LNET:
+               return "LNetError";
+       default:
+               return "LustreError";
+       }
+}
+
+static char *
+dbghdr_to_info_string(struct ptldebug_header *hdr)
+{
+       switch (hdr->ph_subsys) {
+       case S_LND:
+       case S_LNET:
+               return "LNet";
+       default:
+               return "Lustre";
+       }
+}
+
+void cfs_print_to_console(struct ptldebug_header *hdr, int mask,
+                         const char *buf, int len, const char *file,
+                         const char *fn)
+{
+       char *prefix = "Lustre", *ptype = NULL;
+
+       if ((mask & D_EMERG) != 0) {
+               prefix = dbghdr_to_err_string(hdr);
+               ptype = KERN_EMERG;
+       } else if ((mask & D_ERROR) != 0) {
+               prefix = dbghdr_to_err_string(hdr);
+               ptype = KERN_ERR;
+       } else if ((mask & D_WARNING) != 0) {
+               prefix = dbghdr_to_info_string(hdr);
+               ptype = KERN_WARNING;
+       } else if ((mask & (D_CONSOLE | libcfs_printk)) != 0) {
+               prefix = dbghdr_to_info_string(hdr);
+               ptype = KERN_INFO;
+       }
+
+       if ((mask & D_CONSOLE) != 0) {
+               printk("%s%s: %.*s", ptype, prefix, len, buf);
+       } else {
+               printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix,
+                      hdr->ph_pid, hdr->ph_extern_pid, file, hdr->ph_line_num,
+                      fn, len, buf);
+       }
+}
+
+int cfs_trace_max_debug_mb(void)
+{
+       int  total_mb = (totalram_pages >> (20 - PAGE_SHIFT));
+
+       return max(512, (total_mb * 80) / 100);
+}
diff --git a/drivers/staging/lustre/lnet/libcfs/module.c b/drivers/staging/lustre/lnet/libcfs/module.c

new file mode 100644 (file)

index 0000000..cdc640b
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/module.c
@@ -0,0 +1,674 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, 2015 Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <net/sock.h>
+#include <linux/uio.h>
+
+#include <linux/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/list.h>
+
+#include <linux/sysctl.h>
+#include <linux/debugfs.h>
+
+# define DEBUG_SUBSYSTEM S_LNET
+
+#define LNET_MAX_IOCTL_BUF_LEN (sizeof(struct lnet_ioctl_net_config) + \
+                               sizeof(struct lnet_ioctl_config_data))
+
+#include "../../include/linux/libcfs/libcfs.h"
+#include <asm/div64.h>
+
+#include "../../include/linux/libcfs/libcfs_crypto.h"
+#include "../../include/linux/lnet/lib-lnet.h"
+#include "../../include/linux/lnet/lib-dlc.h"
+#include "../../include/linux/lnet/lnet.h"
+#include "tracefile.h"
+
+static struct dentry *lnet_debugfs_root;
+
+/* called when opening /dev/device */
+static int libcfs_psdev_open(unsigned long flags, void *args)
+{
+       try_module_get(THIS_MODULE);
+       return 0;
+}
+
+/* called when closing /dev/device */
+static int libcfs_psdev_release(unsigned long flags, void *args)
+{
+       module_put(THIS_MODULE);
+       return 0;
+}
+
+static DECLARE_RWSEM(ioctl_list_sem);
+static LIST_HEAD(ioctl_list);
+
+int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand)
+{
+       int rc = 0;
+
+       down_write(&ioctl_list_sem);
+       if (!list_empty(&hand->item))
+               rc = -EBUSY;
+       else
+               list_add_tail(&hand->item, &ioctl_list);
+       up_write(&ioctl_list_sem);
+
+       return rc;
+}
+EXPORT_SYMBOL(libcfs_register_ioctl);
+
+int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand)
+{
+       int rc = 0;
+
+       down_write(&ioctl_list_sem);
+       if (list_empty(&hand->item))
+               rc = -ENOENT;
+       else
+               list_del_init(&hand->item);
+       up_write(&ioctl_list_sem);
+
+       return rc;
+}
+EXPORT_SYMBOL(libcfs_deregister_ioctl);
+
+static int libcfs_ioctl_handle(struct cfs_psdev_file *pfile, unsigned long cmd,
+                              void __user *arg, struct libcfs_ioctl_hdr *hdr)
+{
+       struct libcfs_ioctl_data *data = NULL;
+       int err = -EINVAL;
+
+       /*
+        * The libcfs_ioctl_data_adjust() function performs adjustment
+        * operations on the libcfs_ioctl_data structure to make
+        * it usable by the code.  This doesn't need to be called
+        * for new data structures added.
+        */
+       if (hdr->ioc_version == LIBCFS_IOCTL_VERSION) {
+               data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr);
+               err = libcfs_ioctl_data_adjust(data);
+               if (err)
+                       return err;
+       }
+
+       switch (cmd) {
+       case IOC_LIBCFS_CLEAR_DEBUG:
+               libcfs_debug_clear_buffer();
+               return 0;
+       /*
+        * case IOC_LIBCFS_PANIC:
+        * Handled in arch/cfs_module.c
+        */
+       case IOC_LIBCFS_MARK_DEBUG:
+               if (!data->ioc_inlbuf1 ||
+                   data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0')
+                       return -EINVAL;
+               libcfs_debug_mark_buffer(data->ioc_inlbuf1);
+               return 0;
+
+       default: {
+               struct libcfs_ioctl_handler *hand;
+
+               err = -EINVAL;
+               down_read(&ioctl_list_sem);
+               list_for_each_entry(hand, &ioctl_list, item) {
+                       err = hand->handle_ioctl(cmd, hdr);
+                       if (err != -EINVAL) {
+                               if (err == 0)
+                                       err = libcfs_ioctl_popdata(arg,
+                                                       hdr, hdr->ioc_len);
+                               break;
+                       }
+               }
+               up_read(&ioctl_list_sem);
+               break;
+       }
+       }
+
+       return err;
+}
+
+static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd,
+                       void __user *arg)
+{
+       struct libcfs_ioctl_hdr *hdr;
+       int err = 0;
+       __u32 buf_len;
+
+       err = libcfs_ioctl_getdata_len(arg, &buf_len);
+       if (err)
+               return err;
+
+       /*
+        * do a check here to restrict the size of the memory
+        * to allocate to guard against DoS attacks.
+        */
+       if (buf_len > LNET_MAX_IOCTL_BUF_LEN) {
+               CERROR("LNET: user buffer exceeds kernel buffer\n");
+               return -EINVAL;
+       }
+
+       LIBCFS_ALLOC_GFP(hdr, buf_len, GFP_KERNEL);
+       if (!hdr)
+               return -ENOMEM;
+
+       /* 'cmd' and permissions get checked in our arch-specific caller */
+       if (copy_from_user(hdr, arg, buf_len)) {
+               CERROR("LNET ioctl: data error\n");
+               err = -EFAULT;
+               goto out;
+       }
+
+       err = libcfs_ioctl_handle(pfile, cmd, arg, hdr);
+
+out:
+       LIBCFS_FREE(hdr, buf_len);
+       return err;
+}
+
+struct cfs_psdev_ops libcfs_psdev_ops = {
+       libcfs_psdev_open,
+       libcfs_psdev_release,
+       NULL,
+       NULL,
+       libcfs_ioctl
+};
+
+int lprocfs_call_handler(void *data, int write, loff_t *ppos,
+                        void __user *buffer, size_t *lenp,
+                        int (*handler)(void *data, int write, loff_t pos,
+                                       void __user *buffer, int len))
+{
+       int rc = handler(data, write, *ppos, buffer, *lenp);
+
+       if (rc < 0)
+               return rc;
+
+       if (write) {
+               *ppos += *lenp;
+       } else {
+               *lenp = rc;
+               *ppos += rc;
+       }
+       return 0;
+}
+EXPORT_SYMBOL(lprocfs_call_handler);
+
+static int __proc_dobitmasks(void *data, int write,
+                            loff_t pos, void __user *buffer, int nob)
+{
+       const int     tmpstrlen = 512;
+       char     *tmpstr;
+       int        rc;
+       unsigned int *mask = data;
+       int        is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0;
+       int        is_printk = (mask == &libcfs_printk) ? 1 : 0;
+
+       rc = cfs_trace_allocate_string_buffer(&tmpstr, tmpstrlen);
+       if (rc < 0)
+               return rc;
+
+       if (!write) {
+               libcfs_debug_mask2str(tmpstr, tmpstrlen, *mask, is_subsys);
+               rc = strlen(tmpstr);
+
+               if (pos >= rc) {
+                       rc = 0;
+               } else {
+                       rc = cfs_trace_copyout_string(buffer, nob,
+                                                     tmpstr + pos, "\n");
+               }
+       } else {
+               rc = cfs_trace_copyin_string(tmpstr, tmpstrlen, buffer, nob);
+               if (rc < 0) {
+                       kfree(tmpstr);
+                       return rc;
+               }
+
+               rc = libcfs_debug_str2mask(mask, tmpstr, is_subsys);
+               /* Always print LBUG/LASSERT to console, so keep this mask */
+               if (is_printk)
+                       *mask |= D_EMERG;
+       }
+
+       kfree(tmpstr);
+       return rc;
+}
+
+static int proc_dobitmasks(struct ctl_table *table, int write,
+                          void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
+                                   __proc_dobitmasks);
+}
+
+static int __proc_dump_kernel(void *data, int write,
+                             loff_t pos, void __user *buffer, int nob)
+{
+       if (!write)
+               return 0;
+
+       return cfs_trace_dump_debug_buffer_usrstr(buffer, nob);
+}
+
+static int proc_dump_kernel(struct ctl_table *table, int write,
+                           void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
+                                   __proc_dump_kernel);
+}
+
+static int __proc_daemon_file(void *data, int write,
+                             loff_t pos, void __user *buffer, int nob)
+{
+       if (!write) {
+               int len = strlen(cfs_tracefile);
+
+               if (pos >= len)
+                       return 0;
+
+               return cfs_trace_copyout_string(buffer, nob,
+                                               cfs_tracefile + pos, "\n");
+       }
+
+       return cfs_trace_daemon_command_usrstr(buffer, nob);
+}
+
+static int proc_daemon_file(struct ctl_table *table, int write,
+                           void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
+                                   __proc_daemon_file);
+}
+
+static int libcfs_force_lbug(struct ctl_table *table, int write,
+                            void __user *buffer,
+                            size_t *lenp, loff_t *ppos)
+{
+       if (write)
+               LBUG();
+       return 0;
+}
+
+static int proc_fail_loc(struct ctl_table *table, int write,
+                        void __user *buffer,
+                        size_t *lenp, loff_t *ppos)
+{
+       int rc;
+       long old_fail_loc = cfs_fail_loc;
+
+       rc = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
+       if (old_fail_loc != cfs_fail_loc)
+               wake_up(&cfs_race_waitq);
+       return rc;
+}
+
+static int __proc_cpt_table(void *data, int write,
+                           loff_t pos, void __user *buffer, int nob)
+{
+       char *buf = NULL;
+       int   len = 4096;
+       int   rc  = 0;
+
+       if (write)
+               return -EPERM;
+
+       LASSERT(cfs_cpt_table);
+
+       while (1) {
+               LIBCFS_ALLOC(buf, len);
+               if (!buf)
+                       return -ENOMEM;
+
+               rc = cfs_cpt_table_print(cfs_cpt_table, buf, len);
+               if (rc >= 0)
+                       break;
+
+               if (rc == -EFBIG) {
+                       LIBCFS_FREE(buf, len);
+                       len <<= 1;
+                       continue;
+               }
+               goto out;
+       }
+
+       if (pos >= rc) {
+               rc = 0;
+               goto out;
+       }
+
+       rc = cfs_trace_copyout_string(buffer, nob, buf + pos, NULL);
+ out:
+       if (buf)
+               LIBCFS_FREE(buf, len);
+       return rc;
+}
+
+static int proc_cpt_table(struct ctl_table *table, int write,
+                         void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
+                                   __proc_cpt_table);
+}
+
+static struct ctl_table lnet_table[] = {
+       {
+               .procname = "debug",
+               .data     = &libcfs_debug,
+               .maxlen   = sizeof(int),
+               .mode     = 0644,
+               .proc_handler = &proc_dobitmasks,
+       },
+       {
+               .procname = "subsystem_debug",
+               .data     = &libcfs_subsystem_debug,
+               .maxlen   = sizeof(int),
+               .mode     = 0644,
+               .proc_handler = &proc_dobitmasks,
+       },
+       {
+               .procname = "printk",
+               .data     = &libcfs_printk,
+               .maxlen   = sizeof(int),
+               .mode     = 0644,
+               .proc_handler = &proc_dobitmasks,
+       },
+       {
+               .procname = "cpu_partition_table",
+               .maxlen   = 128,
+               .mode     = 0444,
+               .proc_handler = &proc_cpt_table,
+       },
+
+       {
+               .procname = "upcall",
+               .data     = lnet_upcall,
+               .maxlen   = sizeof(lnet_upcall),
+               .mode     = 0644,
+               .proc_handler = &proc_dostring,
+       },
+       {
+               .procname = "debug_log_upcall",
+               .data     = lnet_debug_log_upcall,
+               .maxlen   = sizeof(lnet_debug_log_upcall),
+               .mode     = 0644,
+               .proc_handler = &proc_dostring,
+       },
+       {
+               .procname = "catastrophe",
+               .data     = &libcfs_catastrophe,
+               .maxlen   = sizeof(int),
+               .mode     = 0444,
+               .proc_handler = &proc_dointvec,
+       },
+       {
+               .procname = "dump_kernel",
+               .maxlen   = 256,
+               .mode     = 0200,
+               .proc_handler = &proc_dump_kernel,
+       },
+       {
+               .procname = "daemon_file",
+               .mode     = 0644,
+               .maxlen   = 256,
+               .proc_handler = &proc_daemon_file,
+       },
+       {
+               .procname = "force_lbug",
+               .data     = NULL,
+               .maxlen   = 0,
+               .mode     = 0200,
+               .proc_handler = &libcfs_force_lbug
+       },
+       {
+               .procname = "fail_loc",
+               .data     = &cfs_fail_loc,
+               .maxlen   = sizeof(cfs_fail_loc),
+               .mode     = 0644,
+               .proc_handler = &proc_fail_loc
+       },
+       {
+               .procname = "fail_val",
+               .data     = &cfs_fail_val,
+               .maxlen   = sizeof(int),
+               .mode     = 0644,
+               .proc_handler = &proc_dointvec
+       },
+       {
+       }
+};
+
+static const struct lnet_debugfs_symlink_def lnet_debugfs_symlinks[] = {
+       { "console_ratelimit",
+         "/sys/module/libcfs/parameters/libcfs_console_ratelimit"},
+       { "debug_path",
+         "/sys/module/libcfs/parameters/libcfs_debug_file_path"},
+       { "panic_on_lbug",
+         "/sys/module/libcfs/parameters/libcfs_panic_on_lbug"},
+       { "libcfs_console_backoff",
+         "/sys/module/libcfs/parameters/libcfs_console_backoff"},
+       { "debug_mb",
+         "/sys/module/libcfs/parameters/libcfs_debug_mb"},
+       { "console_min_delay_centisecs",
+         "/sys/module/libcfs/parameters/libcfs_console_min_delay"},
+       { "console_max_delay_centisecs",
+         "/sys/module/libcfs/parameters/libcfs_console_max_delay"},
+       {},
+};
+
+static ssize_t lnet_debugfs_read(struct file *filp, char __user *buf,
+                                size_t count, loff_t *ppos)
+{
+       struct ctl_table *table = filp->private_data;
+       int error;
+
+       error = table->proc_handler(table, 0, (void __user *)buf, &count, ppos);
+       if (!error)
+               error = count;
+
+       return error;
+}
+
+static ssize_t lnet_debugfs_write(struct file *filp, const char __user *buf,
+                                 size_t count, loff_t *ppos)
+{
+       struct ctl_table *table = filp->private_data;
+       int error;
+
+       error = table->proc_handler(table, 1, (void __user *)buf, &count, ppos);
+       if (!error)
+               error = count;
+
+       return error;
+}
+
+static const struct file_operations lnet_debugfs_file_operations_rw = {
+       .open           = simple_open,
+       .read           = lnet_debugfs_read,
+       .write          = lnet_debugfs_write,
+       .llseek         = default_llseek,
+};
+
+static const struct file_operations lnet_debugfs_file_operations_ro = {
+       .open           = simple_open,
+       .read           = lnet_debugfs_read,
+       .llseek         = default_llseek,
+};
+
+static const struct file_operations lnet_debugfs_file_operations_wo = {
+       .open           = simple_open,
+       .write          = lnet_debugfs_write,
+       .llseek         = default_llseek,
+};
+
+static const struct file_operations *lnet_debugfs_fops_select(umode_t mode)
+{
+       if (!(mode & S_IWUGO))
+               return &lnet_debugfs_file_operations_ro;
+
+       if (!(mode & S_IRUGO))
+               return &lnet_debugfs_file_operations_wo;
+
+       return &lnet_debugfs_file_operations_rw;
+}
+
+void lustre_insert_debugfs(struct ctl_table *table,
+                          const struct lnet_debugfs_symlink_def *symlinks)
+{
+       if (!lnet_debugfs_root)
+               lnet_debugfs_root = debugfs_create_dir("lnet", NULL);
+
+       /* Even if we cannot create, just ignore it altogether) */
+       if (IS_ERR_OR_NULL(lnet_debugfs_root))
+               return;
+
+       /* We don't save the dentry returned in next two calls, because
+        * we don't call debugfs_remove() but rather remove_recursive()
+        */
+       for (; table->procname; table++)
+               debugfs_create_file(table->procname, table->mode,
+                                   lnet_debugfs_root, table,
+                                   lnet_debugfs_fops_select(table->mode));
+
+       for (; symlinks && symlinks->name; symlinks++)
+               debugfs_create_symlink(symlinks->name, lnet_debugfs_root,
+                                      symlinks->target);
+}
+EXPORT_SYMBOL_GPL(lustre_insert_debugfs);
+
+static void lustre_remove_debugfs(void)
+{
+       debugfs_remove_recursive(lnet_debugfs_root);
+
+       lnet_debugfs_root = NULL;
+}
+
+static int libcfs_init(void)
+{
+       int rc;
+
+       rc = libcfs_debug_init(5 * 1024 * 1024);
+       if (rc < 0) {
+               pr_err("LustreError: libcfs_debug_init: %d\n", rc);
+               return rc;
+       }
+
+       rc = cfs_cpu_init();
+       if (rc != 0)
+               goto cleanup_debug;
+
+       rc = misc_register(&libcfs_dev);
+       if (rc) {
+               CERROR("misc_register: error %d\n", rc);
+               goto cleanup_cpu;
+       }
+
+       rc = cfs_wi_startup();
+       if (rc) {
+               CERROR("initialize workitem: error %d\n", rc);
+               goto cleanup_deregister;
+       }
+
+       /* max to 4 threads, should be enough for rehash */
+       rc = min(cfs_cpt_weight(cfs_cpt_table, CFS_CPT_ANY), 4);
+       rc = cfs_wi_sched_create("cfs_rh", cfs_cpt_table, CFS_CPT_ANY,
+                                rc, &cfs_sched_rehash);
+       if (rc != 0) {
+               CERROR("Startup workitem scheduler: error: %d\n", rc);
+               goto cleanup_deregister;
+       }
+
+       rc = cfs_crypto_register();
+       if (rc) {
+               CERROR("cfs_crypto_register: error %d\n", rc);
+               goto cleanup_wi;
+       }
+
+       lustre_insert_debugfs(lnet_table, lnet_debugfs_symlinks);
+
+       CDEBUG(D_OTHER, "portals setup OK\n");
+       return 0;
+ cleanup_wi:
+       cfs_wi_shutdown();
+ cleanup_deregister:
+       misc_deregister(&libcfs_dev);
+cleanup_cpu:
+       cfs_cpu_fini();
+ cleanup_debug:
+       libcfs_debug_cleanup();
+       return rc;
+}
+
+static void libcfs_exit(void)
+{
+       int rc;
+
+       lustre_remove_debugfs();
+
+       if (cfs_sched_rehash) {
+               cfs_wi_sched_destroy(cfs_sched_rehash);
+               cfs_sched_rehash = NULL;
+       }
+
+       cfs_crypto_unregister();
+       cfs_wi_shutdown();
+
+       misc_deregister(&libcfs_dev);
+
+       cfs_cpu_fini();
+
+       rc = libcfs_debug_cleanup();
+       if (rc)
+               pr_err("LustreError: libcfs_debug_cleanup: %d\n", rc);
+}
+
+MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Lustre helper library");
+MODULE_VERSION(LIBCFS_VERSION);
+MODULE_LICENSE("GPL");
+
+module_init(libcfs_init);
+module_exit(libcfs_exit);
diff --git a/drivers/staging/lustre/lnet/libcfs/prng.c b/drivers/staging/lustre/lnet/libcfs/prng.c

new file mode 100644 (file)

index 0000000..c75ae9a
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/prng.c
@@ -0,0 +1,140 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/libcfs/prng.c
+ *
+ * concatenation of following two 16-bit multiply with carry generators
+ * x(n)=a*x(n-1)+carry mod 2^16 and y(n)=b*y(n-1)+carry mod 2^16,
+ * number and carry packed within the same 32 bit integer.
+ * algorithm recommended by Marsaglia
+*/
+
+#include "../../include/linux/libcfs/libcfs.h"
+
+/*
+ * From: George Marsaglia <geo@stat.fsu.edu>
+ * Newsgroups: sci.math
+ * Subject: Re: A RANDOM NUMBER GENERATOR FOR C
+ * Date: Tue, 30 Sep 1997 05:29:35 -0700
+ *
+ * You may replace the two constants 36969 and 18000 by any
+ * pair of distinct constants from this list:
+ * 18000 18030 18273 18513 18879 19074 19098 19164 19215 19584
+ * 19599 19950 20088 20508 20544 20664 20814 20970 21153 21243
+ * 21423 21723 21954 22125 22188 22293 22860 22938 22965 22974
+ * 23109 23124 23163 23208 23508 23520 23553 23658 23865 24114
+ * 24219 24660 24699 24864 24948 25023 25308 25443 26004 26088
+ * 26154 26550 26679 26838 27183 27258 27753 27795 27810 27834
+ * 27960 28320 28380 28689 28710 28794 28854 28959 28980 29013
+ * 29379 29889 30135 30345 30459 30714 30903 30963 31059 31083
+ * (or any other 16-bit constants k for which both k*2^16-1
+ * and k*2^15-1 are prime)
+ */
+
+#define RANDOM_CONST_A 18030
+#define RANDOM_CONST_B 29013
+
+static unsigned int seed_x = 521288629;
+static unsigned int seed_y = 362436069;
+
+/**
+ * cfs_rand - creates new seeds
+ *
+ * First it creates new seeds from the previous seeds. Then it generates a
+ * new pseudo random number for use.
+ *
+ * Returns a pseudo-random 32-bit integer
+ */
+unsigned int cfs_rand(void)
+{
+       seed_x = RANDOM_CONST_A * (seed_x & 65535) + (seed_x >> 16);
+       seed_y = RANDOM_CONST_B * (seed_y & 65535) + (seed_y >> 16);
+
+       return ((seed_x << 16) + (seed_y & 65535));
+}
+EXPORT_SYMBOL(cfs_rand);
+
+/**
+ * cfs_srand - sets the initial seed
+ * @seed1 : (seed_x) should have the most entropy in the low bits of the word
+ * @seed2 : (seed_y) should have the most entropy in the high bits of the word
+ *
+ * Replaces the original seeds with new values. Used to generate a new pseudo
+ * random numbers.
+ */
+void cfs_srand(unsigned int seed1, unsigned int seed2)
+{
+       if (seed1)
+               seed_x = seed1; /* use default seeds if parameter is 0 */
+       if (seed2)
+               seed_y = seed2;
+}
+EXPORT_SYMBOL(cfs_srand);
+
+/**
+ * cfs_get_random_bytes - generate a bunch of random numbers
+ * @buf : buffer to fill with random numbers
+ * @size: size of passed in buffer
+ *
+ * Fills a buffer with random bytes
+ */
+void cfs_get_random_bytes(void *buf, int size)
+{
+       int *p = buf;
+       int rem, tmp;
+
+       LASSERT(size >= 0);
+
+       rem = min((int)((unsigned long)buf & (sizeof(int) - 1)), size);
+       if (rem) {
+               get_random_bytes(&tmp, sizeof(tmp));
+               tmp ^= cfs_rand();
+               memcpy(buf, &tmp, rem);
+               p = buf + rem;
+               size -= rem;
+       }
+
+       while (size >= sizeof(int)) {
+               get_random_bytes(&tmp, sizeof(tmp));
+               *p = cfs_rand() ^ tmp;
+               size -= sizeof(int);
+               p++;
+       }
+       buf = p;
+       if (size) {
+               get_random_bytes(&tmp, sizeof(tmp));
+               tmp ^= cfs_rand();
+               memcpy(buf, &tmp, size);
+       }
+}
+EXPORT_SYMBOL(cfs_get_random_bytes);
diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.c b/drivers/staging/lustre/lnet/libcfs/tracefile.c

new file mode 100644 (file)

index 0000000..ec3bc04
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/tracefile.c
@@ -0,0 +1,1208 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/libcfs/tracefile.c
+ *
+ * Author: Zach Brown <zab@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+#define LUSTRE_TRACEFILE_PRIVATE
+#include "tracefile.h"
+
+#include "../../include/linux/libcfs/libcfs.h"
+
+/* XXX move things up to the top, comment */
+union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned;
+
+char cfs_tracefile[TRACEFILE_NAME_SIZE];
+long long cfs_tracefile_size = CFS_TRACEFILE_SIZE;
+static struct tracefiled_ctl trace_tctl;
+static DEFINE_MUTEX(cfs_trace_thread_mutex);
+static int thread_running;
+
+static atomic_t cfs_tage_allocated = ATOMIC_INIT(0);
+
+struct page_collection {
+       struct list_head        pc_pages;
+       /*
+        * if this flag is set, collect_pages() will spill both
+        * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise,
+        * only ->tcd_pages are spilled.
+        */
+       int             pc_want_daemon_pages;
+};
+
+struct tracefiled_ctl {
+       struct completion       tctl_start;
+       struct completion       tctl_stop;
+       wait_queue_head_t               tctl_waitq;
+       pid_t                   tctl_pid;
+       atomic_t                tctl_shutdown;
+};
+
+/*
+ * small data-structure for each page owned by tracefiled.
+ */
+struct cfs_trace_page {
+       /*
+        * page itself
+        */
+       struct page       *page;
+       /*
+        * linkage into one of the lists in trace_data_union or
+        * page_collection
+        */
+       struct list_head           linkage;
+       /*
+        * number of bytes used within this page
+        */
+       unsigned int     used;
+       /*
+        * cpu that owns this page
+        */
+       unsigned short       cpu;
+       /*
+        * type(context) of this page
+        */
+       unsigned short       type;
+};
+
+static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
+                                        struct cfs_trace_cpu_data *tcd);
+
+static inline struct cfs_trace_page *
+cfs_tage_from_list(struct list_head *list)
+{
+       return list_entry(list, struct cfs_trace_page, linkage);
+}
+
+static struct cfs_trace_page *cfs_tage_alloc(gfp_t gfp)
+{
+       struct page         *page;
+       struct cfs_trace_page *tage;
+
+       /* My caller is trying to free memory */
+       if (!in_interrupt() && memory_pressure_get())
+               return NULL;
+
+       /*
+        * Don't spam console with allocation failures: they will be reported
+        * by upper layer anyway.
+        */
+       gfp |= __GFP_NOWARN;
+       page = alloc_page(gfp);
+       if (!page)
+               return NULL;
+
+       tage = kmalloc(sizeof(*tage), gfp);
+       if (!tage) {
+               __free_page(page);
+               return NULL;
+       }
+
+       tage->page = page;
+       atomic_inc(&cfs_tage_allocated);
+       return tage;
+}
+
+static void cfs_tage_free(struct cfs_trace_page *tage)
+{
+       __free_page(tage->page);
+       kfree(tage);
+       atomic_dec(&cfs_tage_allocated);
+}
+
+static void cfs_tage_to_tail(struct cfs_trace_page *tage,
+                            struct list_head *queue)
+{
+       list_move_tail(&tage->linkage, queue);
+}
+
+int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp,
+                          struct list_head *stock)
+{
+       int i;
+
+       /*
+        * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+        * from here: this will lead to infinite recursion.
+        */
+
+       for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++i) {
+               struct cfs_trace_page *tage;
+
+               tage = cfs_tage_alloc(gfp);
+               if (!tage)
+                       break;
+               list_add_tail(&tage->linkage, stock);
+       }
+       return i;
+}
+
+/* return a page that has 'len' bytes left at the end */
+static struct cfs_trace_page *
+cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len)
+{
+       struct cfs_trace_page *tage;
+
+       if (tcd->tcd_cur_pages > 0) {
+               __LASSERT(!list_empty(&tcd->tcd_pages));
+               tage = cfs_tage_from_list(tcd->tcd_pages.prev);
+               if (tage->used + len <= PAGE_CACHE_SIZE)
+                       return tage;
+       }
+
+       if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
+               if (tcd->tcd_cur_stock_pages > 0) {
+                       tage = cfs_tage_from_list(tcd->tcd_stock_pages.prev);
+                       --tcd->tcd_cur_stock_pages;
+                       list_del_init(&tage->linkage);
+               } else {
+                       tage = cfs_tage_alloc(GFP_ATOMIC);
+                       if (unlikely(!tage)) {
+                               if ((!memory_pressure_get() ||
+                                    in_interrupt()) && printk_ratelimit())
+                                       printk(KERN_WARNING
+                                              "cannot allocate a tage (%ld)\n",
+                                              tcd->tcd_cur_pages);
+                               return NULL;
+                       }
+               }
+
+               tage->used = 0;
+               tage->cpu = smp_processor_id();
+               tage->type = tcd->tcd_type;
+               list_add_tail(&tage->linkage, &tcd->tcd_pages);
+               tcd->tcd_cur_pages++;
+
+               if (tcd->tcd_cur_pages > 8 && thread_running) {
+                       struct tracefiled_ctl *tctl = &trace_tctl;
+                       /*
+                        * wake up tracefiled to process some pages.
+                        */
+                       wake_up(&tctl->tctl_waitq);
+               }
+               return tage;
+       }
+       return NULL;
+}
+
+static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd)
+{
+       int pgcount = tcd->tcd_cur_pages / 10;
+       struct page_collection pc;
+       struct cfs_trace_page *tage;
+       struct cfs_trace_page *tmp;
+
+       /*
+        * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+        * from here: this will lead to infinite recursion.
+        */
+
+       if (printk_ratelimit())
+               printk(KERN_WARNING "debug daemon buffer overflowed; discarding 10%% of pages (%d of %ld)\n",
+                      pgcount + 1, tcd->tcd_cur_pages);
+
+       INIT_LIST_HEAD(&pc.pc_pages);
+
+       list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
+               if (pgcount-- == 0)
+                       break;
+
+               list_move_tail(&tage->linkage, &pc.pc_pages);
+               tcd->tcd_cur_pages--;
+       }
+       put_pages_on_tcd_daemon_list(&pc, tcd);
+}
+
+/* return a page that has 'len' bytes left at the end */
+static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd,
+                                                unsigned long len)
+{
+       struct cfs_trace_page *tage;
+
+       /*
+        * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+        * from here: this will lead to infinite recursion.
+        */
+
+       if (len > PAGE_CACHE_SIZE) {
+               pr_err("cowardly refusing to write %lu bytes in a page\n", len);
+               return NULL;
+       }
+
+       tage = cfs_trace_get_tage_try(tcd, len);
+       if (tage)
+               return tage;
+       if (thread_running)
+               cfs_tcd_shrink(tcd);
+       if (tcd->tcd_cur_pages > 0) {
+               tage = cfs_tage_from_list(tcd->tcd_pages.next);
+               tage->used = 0;
+               cfs_tage_to_tail(tage, &tcd->tcd_pages);
+       }
+       return tage;
+}
+
+int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
+                    const char *format, ...)
+{
+       va_list args;
+       int     rc;
+
+       va_start(args, format);
+       rc = libcfs_debug_vmsg2(msgdata, format, args, NULL);
+       va_end(args);
+
+       return rc;
+}
+EXPORT_SYMBOL(libcfs_debug_msg);
+
+int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
+                      const char *format1, va_list args,
+                      const char *format2, ...)
+{
+       struct cfs_trace_cpu_data *tcd = NULL;
+       struct ptldebug_header     header = {0};
+       struct cfs_trace_page     *tage;
+       /* string_buf is used only if tcd != NULL, and is always set then */
+       char                  *string_buf = NULL;
+       char                  *debug_buf;
+       int                     known_size;
+       int                     needed = 85; /* average message length */
+       int                     max_nob;
+       va_list             ap;
+       int                     depth;
+       int                     i;
+       int                     remain;
+       int                     mask = msgdata->msg_mask;
+       const char              *file = kbasename(msgdata->msg_file);
+       struct cfs_debug_limit_state   *cdls = msgdata->msg_cdls;
+
+       tcd = cfs_trace_get_tcd();
+
+       /* cfs_trace_get_tcd() grabs a lock, which disables preemption and
+        * pins us to a particular CPU.  This avoids an smp_processor_id()
+        * warning on Linux when debugging is enabled.
+        */
+       cfs_set_ptldebug_header(&header, msgdata, CDEBUG_STACK());
+
+       if (!tcd)               /* arch may not log in IRQ context */
+               goto console;
+
+       if (tcd->tcd_cur_pages == 0)
+               header.ph_flags |= PH_FLAG_FIRST_RECORD;
+
+       if (tcd->tcd_shutting_down) {
+               cfs_trace_put_tcd(tcd);
+               tcd = NULL;
+               goto console;
+       }
+
+       depth = __current_nesting_level();
+       known_size = strlen(file) + 1 + depth;
+       if (msgdata->msg_fn)
+               known_size += strlen(msgdata->msg_fn) + 1;
+
+       if (libcfs_debug_binary)
+               known_size += sizeof(header);
+
+       /*
+        * '2' used because vsnprintf return real size required for output
+        * _without_ terminating NULL.
+        * if needed is to small for this format.
+        */
+       for (i = 0; i < 2; i++) {
+               tage = cfs_trace_get_tage(tcd, needed + known_size + 1);
+               if (!tage) {
+                       if (needed + known_size > PAGE_CACHE_SIZE)
+                               mask |= D_ERROR;
+
+                       cfs_trace_put_tcd(tcd);
+                       tcd = NULL;
+                       goto console;
+               }
+
+               string_buf = (char *)page_address(tage->page) +
+                                       tage->used + known_size;
+
+               max_nob = PAGE_CACHE_SIZE - tage->used - known_size;
+               if (max_nob <= 0) {
+                       printk(KERN_EMERG "negative max_nob: %d\n",
+                              max_nob);
+                       mask |= D_ERROR;
+                       cfs_trace_put_tcd(tcd);
+                       tcd = NULL;
+                       goto console;
+               }
+
+               needed = 0;
+               if (format1) {
+                       va_copy(ap, args);
+                       needed = vsnprintf(string_buf, max_nob, format1, ap);
+                       va_end(ap);
+               }
+
+               if (format2) {
+                       remain = max_nob - needed;
+                       if (remain < 0)
+                               remain = 0;
+
+                       va_start(ap, format2);
+                       needed += vsnprintf(string_buf + needed, remain,
+                                           format2, ap);
+                       va_end(ap);
+               }
+
+               if (needed < max_nob) /* well. printing ok.. */
+                       break;
+       }
+
+       if (*(string_buf + needed - 1) != '\n')
+               printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
+                      file, msgdata->msg_line, msgdata->msg_fn);
+
+       header.ph_len = known_size + needed;
+       debug_buf = (char *)page_address(tage->page) + tage->used;
+
+       if (libcfs_debug_binary) {
+               memcpy(debug_buf, &header, sizeof(header));
+               tage->used += sizeof(header);
+               debug_buf += sizeof(header);
+       }
+
+       /* indent message according to the nesting level */
+       while (depth-- > 0) {
+               *(debug_buf++) = '.';
+               ++tage->used;
+       }
+
+       strcpy(debug_buf, file);
+       tage->used += strlen(file) + 1;
+       debug_buf += strlen(file) + 1;
+
+       if (msgdata->msg_fn) {
+               strcpy(debug_buf, msgdata->msg_fn);
+               tage->used += strlen(msgdata->msg_fn) + 1;
+               debug_buf += strlen(msgdata->msg_fn) + 1;
+       }
+
+       __LASSERT(debug_buf == string_buf);
+
+       tage->used += needed;
+       __LASSERT(tage->used <= PAGE_CACHE_SIZE);
+
+console:
+       if ((mask & libcfs_printk) == 0) {
+               /* no console output requested */
+               if (tcd)
+                       cfs_trace_put_tcd(tcd);
+               return 1;
+       }
+
+       if (cdls) {
+               if (libcfs_console_ratelimit &&
+                   cdls->cdls_next != 0 &&     /* not first time ever */
+                   !cfs_time_after(cfs_time_current(), cdls->cdls_next)) {
+                       /* skipping a console message */
+                       cdls->cdls_count++;
+                       if (tcd)
+                               cfs_trace_put_tcd(tcd);
+                       return 1;
+               }
+
+               if (cfs_time_after(cfs_time_current(),
+                                  cdls->cdls_next + libcfs_console_max_delay +
+                                  cfs_time_seconds(10))) {
+                       /* last timeout was a long time ago */
+                       cdls->cdls_delay /= libcfs_console_backoff * 4;
+               } else {
+                       cdls->cdls_delay *= libcfs_console_backoff;
+               }
+
+               if (cdls->cdls_delay < libcfs_console_min_delay)
+                       cdls->cdls_delay = libcfs_console_min_delay;
+               else if (cdls->cdls_delay > libcfs_console_max_delay)
+                       cdls->cdls_delay = libcfs_console_max_delay;
+
+               /* ensure cdls_next is never zero after it's been seen */
+               cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1;
+       }
+
+       if (tcd) {
+               cfs_print_to_console(&header, mask, string_buf, needed, file,
+                                    msgdata->msg_fn);
+               cfs_trace_put_tcd(tcd);
+       } else {
+               string_buf = cfs_trace_get_console_buffer();
+
+               needed = 0;
+               if (format1) {
+                       va_copy(ap, args);
+                       needed = vsnprintf(string_buf,
+                                          CFS_TRACE_CONSOLE_BUFFER_SIZE,
+                                          format1, ap);
+                       va_end(ap);
+               }
+               if (format2) {
+                       remain = CFS_TRACE_CONSOLE_BUFFER_SIZE - needed;
+                       if (remain > 0) {
+                               va_start(ap, format2);
+                               needed += vsnprintf(string_buf + needed, remain,
+                                                   format2, ap);
+                               va_end(ap);
+                       }
+               }
+               cfs_print_to_console(&header, mask,
+                                    string_buf, needed, file, msgdata->msg_fn);
+
+               put_cpu();
+       }
+
+       if (cdls && cdls->cdls_count != 0) {
+               string_buf = cfs_trace_get_console_buffer();
+
+               needed = snprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE,
+                                 "Skipped %d previous similar message%s\n",
+                                 cdls->cdls_count,
+                                 (cdls->cdls_count > 1) ? "s" : "");
+
+               cfs_print_to_console(&header, mask,
+                                    string_buf, needed, file, msgdata->msg_fn);
+
+               put_cpu();
+               cdls->cdls_count = 0;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(libcfs_debug_vmsg2);
+
+void
+cfs_trace_assertion_failed(const char *str,
+                          struct libcfs_debug_msg_data *msgdata)
+{
+       struct ptldebug_header hdr;
+
+       libcfs_panic_in_progress = 1;
+       libcfs_catastrophe = 1;
+       mb();
+
+       cfs_set_ptldebug_header(&hdr, msgdata, CDEBUG_STACK());
+
+       cfs_print_to_console(&hdr, D_EMERG, str, strlen(str),
+                            msgdata->msg_file, msgdata->msg_fn);
+
+       panic("Lustre debug assertion failure\n");
+
+       /* not reached */
+}
+
+static void
+panic_collect_pages(struct page_collection *pc)
+{
+       /* Do the collect_pages job on a single CPU: assumes that all other
+        * CPUs have been stopped during a panic.  If this isn't true for some
+        * arch, this will have to be implemented separately in each arch.
+        */
+       int                     i;
+       int                     j;
+       struct cfs_trace_cpu_data *tcd;
+
+       INIT_LIST_HEAD(&pc->pc_pages);
+
+       cfs_tcd_for_each(tcd, i, j) {
+               list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
+               tcd->tcd_cur_pages = 0;
+
+               if (pc->pc_want_daemon_pages) {
+                       list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages);
+                       tcd->tcd_cur_daemon_pages = 0;
+               }
+       }
+}
+
+static void collect_pages_on_all_cpus(struct page_collection *pc)
+{
+       struct cfs_trace_cpu_data *tcd;
+       int i, cpu;
+
+       for_each_possible_cpu(cpu) {
+               cfs_tcd_for_each_type_lock(tcd, i, cpu) {
+                       list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
+                       tcd->tcd_cur_pages = 0;
+                       if (pc->pc_want_daemon_pages) {
+                               list_splice_init(&tcd->tcd_daemon_pages,
+                                                &pc->pc_pages);
+                               tcd->tcd_cur_daemon_pages = 0;
+                       }
+               }
+       }
+}
+
+static void collect_pages(struct page_collection *pc)
+{
+       INIT_LIST_HEAD(&pc->pc_pages);
+
+       if (libcfs_panic_in_progress)
+               panic_collect_pages(pc);
+       else
+               collect_pages_on_all_cpus(pc);
+}
+
+static void put_pages_back_on_all_cpus(struct page_collection *pc)
+{
+       struct cfs_trace_cpu_data *tcd;
+       struct list_head *cur_head;
+       struct cfs_trace_page *tage;
+       struct cfs_trace_page *tmp;
+       int i, cpu;
+
+       for_each_possible_cpu(cpu) {
+               cfs_tcd_for_each_type_lock(tcd, i, cpu) {
+                       cur_head = tcd->tcd_pages.next;
+
+                       list_for_each_entry_safe(tage, tmp, &pc->pc_pages,
+                                                linkage) {
+                               __LASSERT_TAGE_INVARIANT(tage);
+
+                               if (tage->cpu != cpu || tage->type != i)
+                                       continue;
+
+                               cfs_tage_to_tail(tage, cur_head);
+                               tcd->tcd_cur_pages++;
+                       }
+               }
+       }
+}
+
+static void put_pages_back(struct page_collection *pc)
+{
+       if (!libcfs_panic_in_progress)
+               put_pages_back_on_all_cpus(pc);
+}
+
+/* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
+ * we have a good amount of data at all times for dumping during an LBUG, even
+ * if we have been steadily writing (and otherwise discarding) pages via the
+ * debug daemon.
+ */
+static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
+                                        struct cfs_trace_cpu_data *tcd)
+{
+       struct cfs_trace_page *tage;
+       struct cfs_trace_page *tmp;
+
+       list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
+               __LASSERT_TAGE_INVARIANT(tage);
+
+               if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type)
+                       continue;
+
+               cfs_tage_to_tail(tage, &tcd->tcd_daemon_pages);
+               tcd->tcd_cur_daemon_pages++;
+
+               if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
+                       struct cfs_trace_page *victim;
+
+                       __LASSERT(!list_empty(&tcd->tcd_daemon_pages));
+                       victim = cfs_tage_from_list(tcd->tcd_daemon_pages.next);
+
+                       __LASSERT_TAGE_INVARIANT(victim);
+
+                       list_del(&victim->linkage);
+                       cfs_tage_free(victim);
+                       tcd->tcd_cur_daemon_pages--;
+               }
+       }
+}
+
+static void put_pages_on_daemon_list(struct page_collection *pc)
+{
+       struct cfs_trace_cpu_data *tcd;
+       int i, cpu;
+
+       for_each_possible_cpu(cpu) {
+               cfs_tcd_for_each_type_lock(tcd, i, cpu)
+                       put_pages_on_tcd_daemon_list(pc, tcd);
+       }
+}
+
+void cfs_trace_debug_print(void)
+{
+       struct page_collection pc;
+       struct cfs_trace_page *tage;
+       struct cfs_trace_page *tmp;
+
+       pc.pc_want_daemon_pages = 1;
+       collect_pages(&pc);
+       list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
+               char *p, *file, *fn;
+               struct page *page;
+
+               __LASSERT_TAGE_INVARIANT(tage);
+
+               page = tage->page;
+               p = page_address(page);
+               while (p < ((char *)page_address(page) + tage->used)) {
+                       struct ptldebug_header *hdr;
+                       int len;
+
+                       hdr = (void *)p;
+                       p += sizeof(*hdr);
+                       file = p;
+                       p += strlen(file) + 1;
+                       fn = p;
+                       p += strlen(fn) + 1;
+                       len = hdr->ph_len - (int)(p - (char *)hdr);
+
+                       cfs_print_to_console(hdr, D_EMERG, p, len, file, fn);
+
+                       p += len;
+               }
+
+               list_del(&tage->linkage);
+               cfs_tage_free(tage);
+       }
+}
+
+int cfs_tracefile_dump_all_pages(char *filename)
+{
+       struct page_collection  pc;
+       struct file             *filp;
+       struct cfs_trace_page   *tage;
+       struct cfs_trace_page   *tmp;
+       char                    *buf;
+       int rc;
+
+       DECL_MMSPACE;
+
+       cfs_tracefile_write_lock();
+
+       filp = filp_open(filename, O_CREAT | O_EXCL | O_WRONLY | O_LARGEFILE,
+                        0600);
+       if (IS_ERR(filp)) {
+               rc = PTR_ERR(filp);
+               filp = NULL;
+               pr_err("LustreError: can't open %s for dump: rc %d\n",
+                      filename, rc);
+               goto out;
+       }
+
+       pc.pc_want_daemon_pages = 1;
+       collect_pages(&pc);
+       if (list_empty(&pc.pc_pages)) {
+               rc = 0;
+               goto close;
+       }
+
+       /* ok, for now, just write the pages.  in the future we'll be building
+        * iobufs with the pages and calling generic_direct_IO
+        */
+       MMSPACE_OPEN;
+       list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
+               __LASSERT_TAGE_INVARIANT(tage);
+
+               buf = kmap(tage->page);
+               rc = vfs_write(filp, (__force const char __user *)buf,
+                              tage->used, &filp->f_pos);
+               kunmap(tage->page);
+
+               if (rc != (int)tage->used) {
+                       printk(KERN_WARNING "wanted to write %u but wrote %d\n",
+                              tage->used, rc);
+                       put_pages_back(&pc);
+                       __LASSERT(list_empty(&pc.pc_pages));
+                       break;
+               }
+               list_del(&tage->linkage);
+               cfs_tage_free(tage);
+       }
+       MMSPACE_CLOSE;
+       rc = vfs_fsync(filp, 1);
+       if (rc)
+               pr_err("sync returns %d\n", rc);
+close:
+       filp_close(filp, NULL);
+out:
+       cfs_tracefile_write_unlock();
+       return rc;
+}
+
+void cfs_trace_flush_pages(void)
+{
+       struct page_collection pc;
+       struct cfs_trace_page *tage;
+       struct cfs_trace_page *tmp;
+
+       pc.pc_want_daemon_pages = 1;
+       collect_pages(&pc);
+       list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
+               __LASSERT_TAGE_INVARIANT(tage);
+
+               list_del(&tage->linkage);
+               cfs_tage_free(tage);
+       }
+}
+
+int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
+                           const char __user *usr_buffer, int usr_buffer_nob)
+{
+       int    nob;
+
+       if (usr_buffer_nob > knl_buffer_nob)
+               return -EOVERFLOW;
+
+       if (copy_from_user((void *)knl_buffer,
+                          usr_buffer, usr_buffer_nob))
+               return -EFAULT;
+
+       nob = strnlen(knl_buffer, usr_buffer_nob);
+       while (nob-- >= 0)                    /* strip trailing whitespace */
+               if (!isspace(knl_buffer[nob]))
+                       break;
+
+       if (nob < 0)                        /* empty string */
+               return -EINVAL;
+
+       if (nob == knl_buffer_nob)            /* no space to terminate */
+               return -EOVERFLOW;
+
+       knl_buffer[nob + 1] = 0;                /* terminate */
+       return 0;
+}
+EXPORT_SYMBOL(cfs_trace_copyin_string);
+
+int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
+                            const char *knl_buffer, char *append)
+{
+       /*
+        * NB if 'append' != NULL, it's a single character to append to the
+        * copied out string - usually "\n" or "" (i.e. a terminating zero byte)
+        */
+       int   nob = strlen(knl_buffer);
+
+       if (nob > usr_buffer_nob)
+               nob = usr_buffer_nob;
+
+       if (copy_to_user(usr_buffer, knl_buffer, nob))
+               return -EFAULT;
+
+       if (append && nob < usr_buffer_nob) {
+               if (copy_to_user(usr_buffer + nob, append, 1))
+                       return -EFAULT;
+
+               nob++;
+       }
+
+       return nob;
+}
+EXPORT_SYMBOL(cfs_trace_copyout_string);
+
+int cfs_trace_allocate_string_buffer(char **str, int nob)
+{
+       if (nob > 2 * PAGE_CACHE_SIZE)      /* string must be "sensible" */
+               return -EINVAL;
+
+       *str = kmalloc(nob, GFP_KERNEL | __GFP_ZERO);
+       if (!*str)
+               return -ENOMEM;
+
+       return 0;
+}
+
+int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob)
+{
+       char     *str;
+       int        rc;
+
+       rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
+       if (rc != 0)
+               return rc;
+
+       rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
+                                    usr_str, usr_str_nob);
+       if (rc != 0)
+               goto out;
+
+       if (str[0] != '/') {
+               rc = -EINVAL;
+               goto out;
+       }
+       rc = cfs_tracefile_dump_all_pages(str);
+out:
+       kfree(str);
+       return rc;
+}
+
+int cfs_trace_daemon_command(char *str)
+{
+       int       rc = 0;
+
+       cfs_tracefile_write_lock();
+
+       if (strcmp(str, "stop") == 0) {
+               cfs_tracefile_write_unlock();
+               cfs_trace_stop_thread();
+               cfs_tracefile_write_lock();
+               memset(cfs_tracefile, 0, sizeof(cfs_tracefile));
+
+       } else if (strncmp(str, "size=", 5) == 0) {
+               unsigned long tmp;
+
+               rc = kstrtoul(str + 5, 10, &tmp);
+               if (!rc) {
+                       if (tmp < 10 || tmp > 20480)
+                               cfs_tracefile_size = CFS_TRACEFILE_SIZE;
+                       else
+                               cfs_tracefile_size = tmp << 20;
+               }
+       } else if (strlen(str) >= sizeof(cfs_tracefile)) {
+               rc = -ENAMETOOLONG;
+       } else if (str[0] != '/') {
+               rc = -EINVAL;
+       } else {
+               strcpy(cfs_tracefile, str);
+
+               printk(KERN_INFO
+                      "Lustre: debug daemon will attempt to start writing to %s (%lukB max)\n",
+                      cfs_tracefile,
+                      (long)(cfs_tracefile_size >> 10));
+
+               cfs_trace_start_thread();
+       }
+
+       cfs_tracefile_write_unlock();
+       return rc;
+}
+
+int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob)
+{
+       char *str;
+       int   rc;
+
+       rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
+       if (rc != 0)
+               return rc;
+
+       rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
+                                    usr_str, usr_str_nob);
+       if (rc == 0)
+               rc = cfs_trace_daemon_command(str);
+
+       kfree(str);
+       return rc;
+}
+
+int cfs_trace_set_debug_mb(int mb)
+{
+       int i;
+       int j;
+       int pages;
+       int limit = cfs_trace_max_debug_mb();
+       struct cfs_trace_cpu_data *tcd;
+
+       if (mb < num_possible_cpus()) {
+               printk(KERN_WARNING
+                      "Lustre: %d MB is too small for debug buffer size, setting it to %d MB.\n",
+                      mb, num_possible_cpus());
+               mb = num_possible_cpus();
+       }
+
+       if (mb > limit) {
+               printk(KERN_WARNING
+                      "Lustre: %d MB is too large for debug buffer size, setting it to %d MB.\n",
+                      mb, limit);
+               mb = limit;
+       }
+
+       mb /= num_possible_cpus();
+       pages = mb << (20 - PAGE_CACHE_SHIFT);
+
+       cfs_tracefile_write_lock();
+
+       cfs_tcd_for_each(tcd, i, j)
+               tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
+
+       cfs_tracefile_write_unlock();
+
+       return 0;
+}
+
+int cfs_trace_get_debug_mb(void)
+{
+       int i;
+       int j;
+       struct cfs_trace_cpu_data *tcd;
+       int total_pages = 0;
+
+       cfs_tracefile_read_lock();
+
+       cfs_tcd_for_each(tcd, i, j)
+               total_pages += tcd->tcd_max_pages;
+
+       cfs_tracefile_read_unlock();
+
+       return (total_pages >> (20 - PAGE_CACHE_SHIFT)) + 1;
+}
+
+static int tracefiled(void *arg)
+{
+       struct page_collection pc;
+       struct tracefiled_ctl *tctl = arg;
+       struct cfs_trace_page *tage;
+       struct cfs_trace_page *tmp;
+       struct file *filp;
+       char *buf;
+       int last_loop = 0;
+       int rc;
+
+       DECL_MMSPACE;
+
+       /* we're started late enough that we pick up init's fs context */
+       /* this is so broken in uml?  what on earth is going on? */
+
+       complete(&tctl->tctl_start);
+
+       while (1) {
+               wait_queue_t __wait;
+
+               pc.pc_want_daemon_pages = 0;
+               collect_pages(&pc);
+               if (list_empty(&pc.pc_pages))
+                       goto end_loop;
+
+               filp = NULL;
+               cfs_tracefile_read_lock();
+               if (cfs_tracefile[0] != 0) {
+                       filp = filp_open(cfs_tracefile,
+                                        O_CREAT | O_RDWR | O_LARGEFILE,
+                                        0600);
+                       if (IS_ERR(filp)) {
+                               rc = PTR_ERR(filp);
+                               filp = NULL;
+                               printk(KERN_WARNING "couldn't open %s: %d\n",
+                                      cfs_tracefile, rc);
+                       }
+               }
+               cfs_tracefile_read_unlock();
+               if (!filp) {
+                       put_pages_on_daemon_list(&pc);
+                       __LASSERT(list_empty(&pc.pc_pages));
+                       goto end_loop;
+               }
+
+               MMSPACE_OPEN;
+
+               list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
+                       static loff_t f_pos;
+
+                       __LASSERT_TAGE_INVARIANT(tage);
+
+                       if (f_pos >= (off_t)cfs_tracefile_size)
+                               f_pos = 0;
+                       else if (f_pos > i_size_read(file_inode(filp)))
+                               f_pos = i_size_read(file_inode(filp));
+
+                       buf = kmap(tage->page);
+                       rc = vfs_write(filp, (__force const char __user *)buf,
+                                      tage->used, &f_pos);
+                       kunmap(tage->page);
+
+                       if (rc != (int)tage->used) {
+                               printk(KERN_WARNING "wanted to write %u but wrote %d\n",
+                                      tage->used, rc);
+                               put_pages_back(&pc);
+                               __LASSERT(list_empty(&pc.pc_pages));
+                               break;
+                       }
+               }
+               MMSPACE_CLOSE;
+
+               filp_close(filp, NULL);
+               put_pages_on_daemon_list(&pc);
+               if (!list_empty(&pc.pc_pages)) {
+                       int i;
+
+                       printk(KERN_ALERT "Lustre: trace pages aren't empty\n");
+                       pr_err("total cpus(%d): ", num_possible_cpus());
+                       for (i = 0; i < num_possible_cpus(); i++)
+                               if (cpu_online(i))
+                                       pr_cont("%d(on) ", i);
+                               else
+                                       pr_cont("%d(off) ", i);
+                       pr_cont("\n");
+
+                       i = 0;
+                       list_for_each_entry_safe(tage, tmp, &pc.pc_pages,
+                                                linkage)
+                               pr_err("page %d belongs to cpu %d\n",
+                                      ++i, tage->cpu);
+                       pr_err("There are %d pages unwritten\n", i);
+               }
+               __LASSERT(list_empty(&pc.pc_pages));
+end_loop:
+               if (atomic_read(&tctl->tctl_shutdown)) {
+                       if (last_loop == 0) {
+                               last_loop = 1;
+                               continue;
+                       } else {
+                               break;
+                       }
+               }
+               init_waitqueue_entry(&__wait, current);
+               add_wait_queue(&tctl->tctl_waitq, &__wait);
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(cfs_time_seconds(1));
+               remove_wait_queue(&tctl->tctl_waitq, &__wait);
+       }
+       complete(&tctl->tctl_stop);
+       return 0;
+}
+
+int cfs_trace_start_thread(void)
+{
+       struct tracefiled_ctl *tctl = &trace_tctl;
+       struct task_struct *task;
+       int rc = 0;
+
+       mutex_lock(&cfs_trace_thread_mutex);
+       if (thread_running)
+               goto out;
+
+       init_completion(&tctl->tctl_start);
+       init_completion(&tctl->tctl_stop);
+       init_waitqueue_head(&tctl->tctl_waitq);
+       atomic_set(&tctl->tctl_shutdown, 0);
+
+       task = kthread_run(tracefiled, tctl, "ktracefiled");
+       if (IS_ERR(task)) {
+               rc = PTR_ERR(task);
+               goto out;
+       }
+
+       wait_for_completion(&tctl->tctl_start);
+       thread_running = 1;
+out:
+       mutex_unlock(&cfs_trace_thread_mutex);
+       return rc;
+}
+
+void cfs_trace_stop_thread(void)
+{
+       struct tracefiled_ctl *tctl = &trace_tctl;
+
+       mutex_lock(&cfs_trace_thread_mutex);
+       if (thread_running) {
+               printk(KERN_INFO
+                      "Lustre: shutting down debug daemon thread...\n");
+               atomic_set(&tctl->tctl_shutdown, 1);
+               wait_for_completion(&tctl->tctl_stop);
+               thread_running = 0;
+       }
+       mutex_unlock(&cfs_trace_thread_mutex);
+}
+
+int cfs_tracefile_init(int max_pages)
+{
+       struct cfs_trace_cpu_data *tcd;
+       int                 i;
+       int                 j;
+       int                 rc;
+       int                 factor;
+
+       rc = cfs_tracefile_init_arch();
+       if (rc != 0)
+               return rc;
+
+       cfs_tcd_for_each(tcd, i, j) {
+               /* tcd_pages_factor is initialized int tracefile_init_arch. */
+               factor = tcd->tcd_pages_factor;
+               INIT_LIST_HEAD(&tcd->tcd_pages);
+               INIT_LIST_HEAD(&tcd->tcd_stock_pages);
+               INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
+               tcd->tcd_cur_pages = 0;
+               tcd->tcd_cur_stock_pages = 0;
+               tcd->tcd_cur_daemon_pages = 0;
+               tcd->tcd_max_pages = (max_pages * factor) / 100;
+               LASSERT(tcd->tcd_max_pages > 0);
+               tcd->tcd_shutting_down = 0;
+       }
+
+       return 0;
+}
+
+static void trace_cleanup_on_all_cpus(void)
+{
+       struct cfs_trace_cpu_data *tcd;
+       struct cfs_trace_page *tage;
+       struct cfs_trace_page *tmp;
+       int i, cpu;
+
+       for_each_possible_cpu(cpu) {
+               cfs_tcd_for_each_type_lock(tcd, i, cpu) {
+                       tcd->tcd_shutting_down = 1;
+
+                       list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages,
+                                                linkage) {
+                               __LASSERT_TAGE_INVARIANT(tage);
+
+                               list_del(&tage->linkage);
+                               cfs_tage_free(tage);
+                       }
+
+                       tcd->tcd_cur_pages = 0;
+               }
+       }
+}
+
+static void cfs_trace_cleanup(void)
+{
+       struct page_collection pc;
+
+       INIT_LIST_HEAD(&pc.pc_pages);
+
+       trace_cleanup_on_all_cpus();
+
+       cfs_tracefile_fini_arch();
+}
+
+void cfs_tracefile_exit(void)
+{
+       cfs_trace_stop_thread();
+       cfs_trace_cleanup();
+}
diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.h b/drivers/staging/lustre/lnet/libcfs/tracefile.h

new file mode 100644 (file)

index 0000000..4c77f90
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/tracefile.h
@@ -0,0 +1,266 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LIBCFS_TRACEFILE_H__
+#define __LIBCFS_TRACEFILE_H__
+
+#include "../../include/linux/libcfs/libcfs.h"
+
+enum cfs_trace_buf_type {
+       CFS_TCD_TYPE_PROC = 0,
+       CFS_TCD_TYPE_SOFTIRQ,
+       CFS_TCD_TYPE_IRQ,
+       CFS_TCD_TYPE_MAX
+};
+
+/* trace file lock routines */
+
+#define TRACEFILE_NAME_SIZE 1024
+extern char      cfs_tracefile[TRACEFILE_NAME_SIZE];
+extern long long cfs_tracefile_size;
+
+void libcfs_run_debug_log_upcall(char *file);
+
+int  cfs_tracefile_init_arch(void);
+void cfs_tracefile_fini_arch(void);
+
+void cfs_tracefile_read_lock(void);
+void cfs_tracefile_read_unlock(void);
+void cfs_tracefile_write_lock(void);
+void cfs_tracefile_write_unlock(void);
+
+int cfs_tracefile_dump_all_pages(char *filename);
+void cfs_trace_debug_print(void);
+void cfs_trace_flush_pages(void);
+int cfs_trace_start_thread(void);
+void cfs_trace_stop_thread(void);
+int cfs_tracefile_init(int max_pages);
+void cfs_tracefile_exit(void);
+
+int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
+                           const char __user *usr_buffer, int usr_buffer_nob);
+int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
+                            const char *knl_str, char *append);
+int cfs_trace_allocate_string_buffer(char **str, int nob);
+int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob);
+int cfs_trace_daemon_command(char *str);
+int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob);
+int cfs_trace_set_debug_mb(int mb);
+int cfs_trace_get_debug_mb(void);
+
+void libcfs_debug_dumplog_internal(void *arg);
+void libcfs_register_panic_notifier(void);
+void libcfs_unregister_panic_notifier(void);
+extern int  libcfs_panic_in_progress;
+int cfs_trace_max_debug_mb(void);
+
+#define TCD_MAX_PAGES (5 << (20 - PAGE_CACHE_SHIFT))
+#define TCD_STOCK_PAGES (TCD_MAX_PAGES)
+#define CFS_TRACEFILE_SIZE (500 << 20)
+
+#ifdef LUSTRE_TRACEFILE_PRIVATE
+
+/*
+ * Private declare for tracefile
+ */
+#define TCD_MAX_PAGES (5 << (20 - PAGE_CACHE_SHIFT))
+#define TCD_STOCK_PAGES (TCD_MAX_PAGES)
+
+#define CFS_TRACEFILE_SIZE (500 << 20)
+
+/*
+ * Size of a buffer for sprinting console messages if we can't get a page
+ * from system
+ */
+#define CFS_TRACE_CONSOLE_BUFFER_SIZE   1024
+
+union cfs_trace_data_union {
+       struct cfs_trace_cpu_data {
+               /*
+                * Even though this structure is meant to be per-CPU, locking
+                * is needed because in some places the data may be accessed
+                * from other CPUs. This lock is directly used in trace_get_tcd
+                * and trace_put_tcd, which are called in libcfs_debug_vmsg2 and
+                * tcd_for_each_type_lock
+                */
+               spinlock_t              tcd_lock;
+               unsigned long      tcd_lock_flags;
+
+               /*
+                * pages with trace records not yet processed by tracefiled.
+                */
+               struct list_head              tcd_pages;
+               /* number of pages on ->tcd_pages */
+               unsigned long      tcd_cur_pages;
+
+               /*
+                * pages with trace records already processed by
+                * tracefiled. These pages are kept in memory, so that some
+                * portion of log can be written in the event of LBUG. This
+                * list is maintained in LRU order.
+                *
+                * Pages are moved to ->tcd_daemon_pages by tracefiled()
+                * (put_pages_on_daemon_list()). LRU pages from this list are
+                * discarded when list grows too large.
+                */
+               struct list_head              tcd_daemon_pages;
+               /* number of pages on ->tcd_daemon_pages */
+               unsigned long      tcd_cur_daemon_pages;
+
+               /*
+                * Maximal number of pages allowed on ->tcd_pages and
+                * ->tcd_daemon_pages each.
+                * Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current
+                * implementation.
+                */
+               unsigned long      tcd_max_pages;
+
+               /*
+                * preallocated pages to write trace records into. Pages from
+                * ->tcd_stock_pages are moved to ->tcd_pages by
+                * portals_debug_msg().
+                *
+                * This list is necessary, because on some platforms it's
+                * impossible to perform efficient atomic page allocation in a
+                * non-blockable context.
+                *
+                * Such platforms fill ->tcd_stock_pages "on occasion", when
+                * tracing code is entered in blockable context.
+                *
+                * trace_get_tage_try() tries to get a page from
+                * ->tcd_stock_pages first and resorts to atomic page
+                * allocation only if this queue is empty. ->tcd_stock_pages
+                * is replenished when tracing code is entered in blocking
+                * context (darwin-tracefile.c:trace_get_tcd()). We try to
+                * maintain TCD_STOCK_PAGES (40 by default) pages in this
+                * queue. Atomic allocation is only required if more than
+                * TCD_STOCK_PAGES pagesful are consumed by trace records all
+                * emitted in non-blocking contexts. Which is quite unlikely.
+                */
+               struct list_head              tcd_stock_pages;
+               /* number of pages on ->tcd_stock_pages */
+               unsigned long      tcd_cur_stock_pages;
+
+               unsigned short    tcd_shutting_down;
+               unsigned short    tcd_cpu;
+               unsigned short    tcd_type;
+               /* The factors to share debug memory. */
+               unsigned short    tcd_pages_factor;
+       } tcd;
+       char __pad[L1_CACHE_ALIGN(sizeof(struct cfs_trace_cpu_data))];
+};
+
+#define TCD_MAX_TYPES      8
+extern union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS];
+
+#define cfs_tcd_for_each(tcd, i, j)                                   \
+       for (i = 0; cfs_trace_data[i]; i++)                             \
+               for (j = 0, ((tcd) = &(*cfs_trace_data[i])[j].tcd);     \
+                    j < num_possible_cpus();                            \
+                    j++, (tcd) = &(*cfs_trace_data[i])[j].tcd)
+
+#define cfs_tcd_for_each_type_lock(tcd, i, cpu)                           \
+       for (i = 0; cfs_trace_data[i] &&                                \
+            (tcd = &(*cfs_trace_data[i])[cpu].tcd) &&                  \
+            cfs_trace_lock_tcd(tcd, 1); cfs_trace_unlock_tcd(tcd, 1), i++)
+
+void cfs_set_ptldebug_header(struct ptldebug_header *header,
+                            struct libcfs_debug_msg_data *m,
+                            unsigned long stack);
+void cfs_print_to_console(struct ptldebug_header *hdr, int mask,
+                         const char *buf, int len, const char *file,
+                         const char *fn);
+
+int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking);
+void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking);
+
+extern char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX];
+enum cfs_trace_buf_type cfs_trace_buf_idx_get(void);
+
+static inline char *
+cfs_trace_get_console_buffer(void)
+{
+       unsigned int i = get_cpu();
+       unsigned int j = cfs_trace_buf_idx_get();
+
+       return cfs_trace_console_buffers[i][j];
+}
+
+static inline struct cfs_trace_cpu_data *
+cfs_trace_get_tcd(void)
+{
+       struct cfs_trace_cpu_data *tcd =
+               &(*cfs_trace_data[cfs_trace_buf_idx_get()])[get_cpu()].tcd;
+
+       cfs_trace_lock_tcd(tcd, 0);
+
+       return tcd;
+}
+
+static inline void cfs_trace_put_tcd(struct cfs_trace_cpu_data *tcd)
+{
+       cfs_trace_unlock_tcd(tcd, 0);
+
+       put_cpu();
+}
+
+int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp,
+                          struct list_head *stock);
+
+void cfs_trace_assertion_failed(const char *str,
+                               struct libcfs_debug_msg_data *m);
+
+/* ASSERTION that is safe to use within the debug system */
+#define __LASSERT(cond)                                                 \
+do {                                                               \
+       if (unlikely(!(cond))) {                                        \
+               LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_EMERG, NULL);     \
+               cfs_trace_assertion_failed("ASSERTION("#cond") failed", \
+                                          &msgdata);              \
+       }                                                              \
+} while (0)
+
+#define __LASSERT_TAGE_INVARIANT(tage)                           \
+do {                                                               \
+       __LASSERT(tage);                                        \
+       __LASSERT(tage->page);                            \
+       __LASSERT(tage->used <= PAGE_CACHE_SIZE);                        \
+       __LASSERT(page_count(tage->page) > 0);                \
+} while (0)
+
+#endif /* LUSTRE_TRACEFILE_PRIVATE */
+
+#endif /* __LIBCFS_TRACEFILE_H__ */
diff --git a/drivers/staging/lustre/lnet/libcfs/workitem.c b/drivers/staging/lustre/lnet/libcfs/workitem.c

new file mode 100644 (file)

index 0000000..f2ebed8
--- /dev/null
+++ b/drivers/staging/lustre/lnet/libcfs/workitem.c
@@ -0,0 +1,470 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/libcfs/workitem.c
+ *
+ * Author: Isaac Huang <isaac@clusterfs.com>
+ *      Liang Zhen  <zhen.liang@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include "../../include/linux/libcfs/libcfs.h"
+
+#define CFS_WS_NAME_LEN         16
+
+struct cfs_wi_sched {
+       /* chain on global list */
+       struct list_head                ws_list;
+       /** serialised workitems */
+       spinlock_t              ws_lock;
+       /** where schedulers sleep */
+       wait_queue_head_t               ws_waitq;
+       /** concurrent workitems */
+       struct list_head                ws_runq;
+       /**
+        * rescheduled running-workitems, a workitem can be rescheduled
+        * while running in wi_action(), but we don't to execute it again
+        * unless it returns from wi_action(), so we put it on ws_rerunq
+        * while rescheduling, and move it to runq after it returns
+        * from wi_action()
+        */
+       struct list_head                ws_rerunq;
+       /** CPT-table for this scheduler */
+       struct cfs_cpt_table    *ws_cptab;
+       /** CPT id for affinity */
+       int                     ws_cpt;
+       /** number of scheduled workitems */
+       int                     ws_nscheduled;
+       /** started scheduler thread, protected by cfs_wi_data::wi_glock */
+       unsigned int            ws_nthreads:30;
+       /** shutting down, protected by cfs_wi_data::wi_glock */
+       unsigned int            ws_stopping:1;
+       /** serialize starting thread, protected by cfs_wi_data::wi_glock */
+       unsigned int            ws_starting:1;
+       /** scheduler name */
+       char                    ws_name[CFS_WS_NAME_LEN];
+};
+
+static struct cfs_workitem_data {
+       /** serialize */
+       spinlock_t              wi_glock;
+       /** list of all schedulers */
+       struct list_head                wi_scheds;
+       /** WI module is initialized */
+       int                     wi_init;
+       /** shutting down the whole WI module */
+       int                     wi_stopping;
+} cfs_wi_data;
+
+static inline int
+cfs_wi_sched_cansleep(struct cfs_wi_sched *sched)
+{
+       spin_lock(&sched->ws_lock);
+       if (sched->ws_stopping) {
+               spin_unlock(&sched->ws_lock);
+               return 0;
+       }
+
+       if (!list_empty(&sched->ws_runq)) {
+               spin_unlock(&sched->ws_lock);
+               return 0;
+       }
+       spin_unlock(&sched->ws_lock);
+       return 1;
+}
+
+/* XXX:
+ * 0. it only works when called from wi->wi_action.
+ * 1. when it returns no one shall try to schedule the workitem.
+ */
+void
+cfs_wi_exit(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
+{
+       LASSERT(!in_interrupt()); /* because we use plain spinlock */
+       LASSERT(!sched->ws_stopping);
+
+       spin_lock(&sched->ws_lock);
+
+       LASSERT(wi->wi_running);
+       if (wi->wi_scheduled) { /* cancel pending schedules */
+               LASSERT(!list_empty(&wi->wi_list));
+               list_del_init(&wi->wi_list);
+
+               LASSERT(sched->ws_nscheduled > 0);
+               sched->ws_nscheduled--;
+       }
+
+       LASSERT(list_empty(&wi->wi_list));
+
+       wi->wi_scheduled = 1; /* LBUG future schedule attempts */
+       spin_unlock(&sched->ws_lock);
+}
+EXPORT_SYMBOL(cfs_wi_exit);
+
+/**
+ * cancel schedule request of workitem \a wi
+ */
+int
+cfs_wi_deschedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
+{
+       int     rc;
+
+       LASSERT(!in_interrupt()); /* because we use plain spinlock */
+       LASSERT(!sched->ws_stopping);
+
+       /*
+        * return 0 if it's running already, otherwise return 1, which
+        * means the workitem will not be scheduled and will not have
+        * any race with wi_action.
+        */
+       spin_lock(&sched->ws_lock);
+
+       rc = !(wi->wi_running);
+
+       if (wi->wi_scheduled) { /* cancel pending schedules */
+               LASSERT(!list_empty(&wi->wi_list));
+               list_del_init(&wi->wi_list);
+
+               LASSERT(sched->ws_nscheduled > 0);
+               sched->ws_nscheduled--;
+
+               wi->wi_scheduled = 0;
+       }
+
+       LASSERT(list_empty(&wi->wi_list));
+
+       spin_unlock(&sched->ws_lock);
+       return rc;
+}
+EXPORT_SYMBOL(cfs_wi_deschedule);
+
+/*
+ * Workitem scheduled with (serial == 1) is strictly serialised not only with
+ * itself, but also with others scheduled this way.
+ *
+ * Now there's only one static serialised queue, but in the future more might
+ * be added, and even dynamic creation of serialised queues might be supported.
+ */
+void
+cfs_wi_schedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
+{
+       LASSERT(!in_interrupt()); /* because we use plain spinlock */
+       LASSERT(!sched->ws_stopping);
+
+       spin_lock(&sched->ws_lock);
+
+       if (!wi->wi_scheduled) {
+               LASSERT(list_empty(&wi->wi_list));
+
+               wi->wi_scheduled = 1;
+               sched->ws_nscheduled++;
+               if (!wi->wi_running) {
+                       list_add_tail(&wi->wi_list, &sched->ws_runq);
+                       wake_up(&sched->ws_waitq);
+               } else {
+                       list_add(&wi->wi_list, &sched->ws_rerunq);
+               }
+       }
+
+       LASSERT(!list_empty(&wi->wi_list));
+       spin_unlock(&sched->ws_lock);
+}
+EXPORT_SYMBOL(cfs_wi_schedule);
+
+static int cfs_wi_scheduler(void *arg)
+{
+       struct cfs_wi_sched     *sched = (struct cfs_wi_sched *)arg;
+
+       cfs_block_allsigs();
+
+       /* CPT affinity scheduler? */
+       if (sched->ws_cptab)
+               if (cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt) != 0)
+                       CWARN("Failed to bind %s on CPT %d\n",
+                             sched->ws_name, sched->ws_cpt);
+
+       spin_lock(&cfs_wi_data.wi_glock);
+
+       LASSERT(sched->ws_starting == 1);
+       sched->ws_starting--;
+       sched->ws_nthreads++;
+
+       spin_unlock(&cfs_wi_data.wi_glock);
+
+       spin_lock(&sched->ws_lock);
+
+       while (!sched->ws_stopping) {
+               int          nloops = 0;
+               int          rc;
+               cfs_workitem_t *wi;
+
+               while (!list_empty(&sched->ws_runq) &&
+                      nloops < CFS_WI_RESCHED) {
+                       wi = list_entry(sched->ws_runq.next, cfs_workitem_t,
+                                       wi_list);
+                       LASSERT(wi->wi_scheduled && !wi->wi_running);
+
+                       list_del_init(&wi->wi_list);
+
+                       LASSERT(sched->ws_nscheduled > 0);
+                       sched->ws_nscheduled--;
+
+                       wi->wi_running   = 1;
+                       wi->wi_scheduled = 0;
+
+                       spin_unlock(&sched->ws_lock);
+                       nloops++;
+
+                       rc = (*wi->wi_action) (wi);
+
+                       spin_lock(&sched->ws_lock);
+                       if (rc != 0) /* WI should be dead, even be freed! */
+                               continue;
+
+                       wi->wi_running = 0;
+                       if (list_empty(&wi->wi_list))
+                               continue;
+
+                       LASSERT(wi->wi_scheduled);
+                       /* wi is rescheduled, should be on rerunq now, we
+                        * move it to runq so it can run action now
+                        */
+                       list_move_tail(&wi->wi_list, &sched->ws_runq);
+               }
+
+               if (!list_empty(&sched->ws_runq)) {
+                       spin_unlock(&sched->ws_lock);
+                       /* don't sleep because some workitems still
+                        * expect me to come back soon
+                        */
+                       cond_resched();
+                       spin_lock(&sched->ws_lock);
+                       continue;
+               }
+
+               spin_unlock(&sched->ws_lock);
+               rc = wait_event_interruptible_exclusive(sched->ws_waitq,
+                                               !cfs_wi_sched_cansleep(sched));
+               spin_lock(&sched->ws_lock);
+       }
+
+       spin_unlock(&sched->ws_lock);
+
+       spin_lock(&cfs_wi_data.wi_glock);
+       sched->ws_nthreads--;
+       spin_unlock(&cfs_wi_data.wi_glock);
+
+       return 0;
+}
+
+void
+cfs_wi_sched_destroy(struct cfs_wi_sched *sched)
+{
+       int     i;
+
+       LASSERT(cfs_wi_data.wi_init);
+       LASSERT(!cfs_wi_data.wi_stopping);
+
+       spin_lock(&cfs_wi_data.wi_glock);
+       if (sched->ws_stopping) {
+               CDEBUG(D_INFO, "%s is in progress of stopping\n",
+                      sched->ws_name);
+               spin_unlock(&cfs_wi_data.wi_glock);
+               return;
+       }
+
+       LASSERT(!list_empty(&sched->ws_list));
+       sched->ws_stopping = 1;
+
+       spin_unlock(&cfs_wi_data.wi_glock);
+
+       i = 2;
+       wake_up_all(&sched->ws_waitq);
+
+       spin_lock(&cfs_wi_data.wi_glock);
+       while (sched->ws_nthreads > 0) {
+               CDEBUG(is_power_of_2(++i) ? D_WARNING : D_NET,
+                      "waiting for %d threads of WI sched[%s] to terminate\n",
+                      sched->ws_nthreads, sched->ws_name);
+
+               spin_unlock(&cfs_wi_data.wi_glock);
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               schedule_timeout(cfs_time_seconds(1) / 20);
+               spin_lock(&cfs_wi_data.wi_glock);
+       }
+
+       list_del(&sched->ws_list);
+
+       spin_unlock(&cfs_wi_data.wi_glock);
+       LASSERT(sched->ws_nscheduled == 0);
+
+       LIBCFS_FREE(sched, sizeof(*sched));
+}
+EXPORT_SYMBOL(cfs_wi_sched_destroy);
+
+int
+cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab,
+                   int cpt, int nthrs, struct cfs_wi_sched **sched_pp)
+{
+       struct cfs_wi_sched     *sched;
+       int                     rc;
+
+       LASSERT(cfs_wi_data.wi_init);
+       LASSERT(!cfs_wi_data.wi_stopping);
+       LASSERT(!cptab || cpt == CFS_CPT_ANY ||
+               (cpt >= 0 && cpt < cfs_cpt_number(cptab)));
+
+       LIBCFS_ALLOC(sched, sizeof(*sched));
+       if (!sched)
+               return -ENOMEM;
+
+       if (strlen(name) > sizeof(sched->ws_name) - 1) {
+               LIBCFS_FREE(sched, sizeof(*sched));
+               return -E2BIG;
+       }
+       strncpy(sched->ws_name, name, sizeof(sched->ws_name));
+
+       sched->ws_cptab = cptab;
+       sched->ws_cpt = cpt;
+
+       spin_lock_init(&sched->ws_lock);
+       init_waitqueue_head(&sched->ws_waitq);
+       INIT_LIST_HEAD(&sched->ws_runq);
+       INIT_LIST_HEAD(&sched->ws_rerunq);
+       INIT_LIST_HEAD(&sched->ws_list);
+
+       rc = 0;
+       while (nthrs > 0)  {
+               char    name[16];
+               struct task_struct *task;
+
+               spin_lock(&cfs_wi_data.wi_glock);
+               while (sched->ws_starting > 0) {
+                       spin_unlock(&cfs_wi_data.wi_glock);
+                       schedule();
+                       spin_lock(&cfs_wi_data.wi_glock);
+               }
+
+               sched->ws_starting++;
+               spin_unlock(&cfs_wi_data.wi_glock);
+
+               if (sched->ws_cptab && sched->ws_cpt >= 0) {
+                       snprintf(name, sizeof(name), "%s_%02d_%02u",
+                                sched->ws_name, sched->ws_cpt,
+                                sched->ws_nthreads);
+               } else {
+                       snprintf(name, sizeof(name), "%s_%02u",
+                                sched->ws_name, sched->ws_nthreads);
+               }
+
+               task = kthread_run(cfs_wi_scheduler, sched, "%s", name);
+               if (!IS_ERR(task)) {
+                       nthrs--;
+                       continue;
+               }
+               rc = PTR_ERR(task);
+
+               CERROR("Failed to create thread for WI scheduler %s: %d\n",
+                      name, rc);
+
+               spin_lock(&cfs_wi_data.wi_glock);
+
+               /* make up for cfs_wi_sched_destroy */
+               list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
+               sched->ws_starting--;
+
+               spin_unlock(&cfs_wi_data.wi_glock);
+
+               cfs_wi_sched_destroy(sched);
+               return rc;
+       }
+       spin_lock(&cfs_wi_data.wi_glock);
+       list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
+       spin_unlock(&cfs_wi_data.wi_glock);
+
+       *sched_pp = sched;
+       return 0;
+}
+EXPORT_SYMBOL(cfs_wi_sched_create);
+
+int
+cfs_wi_startup(void)
+{
+       memset(&cfs_wi_data, 0, sizeof(cfs_wi_data));
+
+       spin_lock_init(&cfs_wi_data.wi_glock);
+       INIT_LIST_HEAD(&cfs_wi_data.wi_scheds);
+       cfs_wi_data.wi_init = 1;
+
+       return 0;
+}
+
+void
+cfs_wi_shutdown(void)
+{
+       struct cfs_wi_sched     *sched;
+
+       spin_lock(&cfs_wi_data.wi_glock);
+       cfs_wi_data.wi_stopping = 1;
+       spin_unlock(&cfs_wi_data.wi_glock);
+
+       /* nobody should contend on this list */
+       list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
+               sched->ws_stopping = 1;
+               wake_up_all(&sched->ws_waitq);
+       }
+
+       list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
+               spin_lock(&cfs_wi_data.wi_glock);
+
+               while (sched->ws_nthreads != 0) {
+                       spin_unlock(&cfs_wi_data.wi_glock);
+                       set_current_state(TASK_UNINTERRUPTIBLE);
+                       schedule_timeout(cfs_time_seconds(1) / 20);
+                       spin_lock(&cfs_wi_data.wi_glock);
+               }
+               spin_unlock(&cfs_wi_data.wi_glock);
+       }
+       while (!list_empty(&cfs_wi_data.wi_scheds)) {
+               sched = list_entry(cfs_wi_data.wi_scheds.next,
+                                  struct cfs_wi_sched, ws_list);
+               list_del(&sched->ws_list);
+               LIBCFS_FREE(sched, sizeof(*sched));
+       }
+
+       cfs_wi_data.wi_stopping = 0;
+       cfs_wi_data.wi_init = 0;
+}
diff --git a/drivers/staging/lustre/lustre/Makefile b/drivers/staging/lustre/lustre/Makefile

index 35d8b0b..331e4fc 100644 (file)
--- a/drivers/staging/lustre/lustre/Makefile
+++ b/drivers/staging/lustre/lustre/Makefile
@@ -1,2 +1,2 @@
-obj-$(CONFIG_LUSTRE_FS) += libcfs/ obdclass/ ptlrpc/ fld/ osc/ mgc/ \
+obj-$(CONFIG_LUSTRE_FS) += obdclass/ ptlrpc/ fld/ osc/ mgc/ \
                            fid/ lov/ mdc/ lmv/ llite/ obdecho/
diff --git a/drivers/staging/lustre/lustre/libcfs/Makefile b/drivers/staging/lustre/lustre/libcfs/Makefile

deleted file mode 100644 (file)

index 277c123..0000000
--- a/drivers/staging/lustre/lustre/libcfs/Makefile
+++ /dev/null
@@ -1,17 +0,0 @@
-obj-$(CONFIG_LUSTRE_FS) += libcfs.o
-
-libcfs-linux-objs := linux-tracefile.o linux-debug.o
-libcfs-linux-objs += linux-prim.o linux-cpu.o
-libcfs-linux-objs += linux-curproc.o
-libcfs-linux-objs += linux-module.o
-libcfs-linux-objs += linux-crypto.o
-libcfs-linux-objs += linux-crypto-adler.o
-libcfs-linux-objs += linux-mem.o
-
-libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs))
-
-libcfs-all-objs := debug.o fail.o module.o tracefile.o \
-                  libcfs_string.o hash.o prng.o workitem.o \
-                  libcfs_cpu.o libcfs_mem.o libcfs_lock.o
-
-libcfs-objs := $(libcfs-linux-objs) $(libcfs-all-objs)
diff --git a/drivers/staging/lustre/lustre/libcfs/debug.c b/drivers/staging/lustre/lustre/libcfs/debug.c

deleted file mode 100644 (file)

index c90e510..0000000
--- a/drivers/staging/lustre/lustre/libcfs/debug.c
+++ /dev/null
@@ -1,560 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/libcfs/debug.c
- *
- * Author: Phil Schwan <phil@clusterfs.com>
- *
- */
-
-# define DEBUG_SUBSYSTEM S_LNET
-
-#include "../../include/linux/libcfs/libcfs.h"
-#include "tracefile.h"
-
-static char debug_file_name[1024];
-
-unsigned int libcfs_subsystem_debug = ~0;
-EXPORT_SYMBOL(libcfs_subsystem_debug);
-module_param(libcfs_subsystem_debug, int, 0644);
-MODULE_PARM_DESC(libcfs_subsystem_debug, "Lustre kernel debug subsystem mask");
-
-unsigned int libcfs_debug = (D_CANTMASK |
-                            D_NETERROR | D_HA | D_CONFIG | D_IOCTL);
-EXPORT_SYMBOL(libcfs_debug);
-module_param(libcfs_debug, int, 0644);
-MODULE_PARM_DESC(libcfs_debug, "Lustre kernel debug mask");
-
-static int libcfs_param_debug_mb_set(const char *val,
-                                    const struct kernel_param *kp)
-{
-       int rc;
-       unsigned num;
-
-       rc = kstrtouint(val, 0, &num);
-       if (rc < 0)
-               return rc;
-
-       if (!*((unsigned int *)kp->arg)) {
-               *((unsigned int *)kp->arg) = num;
-               return 0;
-       }
-
-       rc = cfs_trace_set_debug_mb(num);
-
-       if (!rc)
-               *((unsigned int *)kp->arg) = cfs_trace_get_debug_mb();
-
-       return rc;
-}
-
-/* While debug_mb setting look like unsigned int, in fact
- * it needs quite a bunch of extra processing, so we define special
- * debugmb parameter type with corresponding methods to handle this case
- */
-static struct kernel_param_ops param_ops_debugmb = {
-       .set = libcfs_param_debug_mb_set,
-       .get = param_get_uint,
-};
-
-#define param_check_debugmb(name, p) \
-               __param_check(name, p, unsigned int)
-
-static unsigned int libcfs_debug_mb;
-module_param(libcfs_debug_mb, debugmb, 0644);
-MODULE_PARM_DESC(libcfs_debug_mb, "Total debug buffer size.");
-
-unsigned int libcfs_printk = D_CANTMASK;
-module_param(libcfs_printk, uint, 0644);
-MODULE_PARM_DESC(libcfs_printk, "Lustre kernel debug console mask");
-
-unsigned int libcfs_console_ratelimit = 1;
-module_param(libcfs_console_ratelimit, uint, 0644);
-MODULE_PARM_DESC(libcfs_console_ratelimit, "Lustre kernel debug console ratelimit (0 to disable)");
-
-static int param_set_delay_minmax(const char *val,
-                                 const struct kernel_param *kp,
-                                 long min, long max)
-{
-       long d;
-       int sec;
-       int rc;
-
-       rc = kstrtoint(val, 0, &sec);
-       if (rc)
-               return -EINVAL;
-
-       d = cfs_time_seconds(sec) / 100;
-       if (d < min || d > max)
-               return -EINVAL;
-
-       *((unsigned int *)kp->arg) = d;
-
-       return 0;
-}
-
-static int param_get_delay(char *buffer, const struct kernel_param *kp)
-{
-       unsigned int d = *(unsigned int *)kp->arg;
-
-       return sprintf(buffer, "%u", (unsigned int)cfs_duration_sec(d * 100));
-}
-
-unsigned int libcfs_console_max_delay;
-unsigned int libcfs_console_min_delay;
-
-static int param_set_console_max_delay(const char *val,
-                                      const struct kernel_param *kp)
-{
-       return param_set_delay_minmax(val, kp,
-                                     libcfs_console_min_delay, INT_MAX);
-}
-
-static struct kernel_param_ops param_ops_console_max_delay = {
-       .set = param_set_console_max_delay,
-       .get = param_get_delay,
-};
-
-#define param_check_console_max_delay(name, p) \
-               __param_check(name, p, unsigned int)
-
-module_param(libcfs_console_max_delay, console_max_delay, 0644);
-MODULE_PARM_DESC(libcfs_console_max_delay, "Lustre kernel debug console max delay (jiffies)");
-
-static int param_set_console_min_delay(const char *val,
-                                      const struct kernel_param *kp)
-{
-       return param_set_delay_minmax(val, kp,
-                                     1, libcfs_console_max_delay);
-}
-
-static struct kernel_param_ops param_ops_console_min_delay = {
-       .set = param_set_console_min_delay,
-       .get = param_get_delay,
-};
-
-#define param_check_console_min_delay(name, p) \
-               __param_check(name, p, unsigned int)
-
-module_param(libcfs_console_min_delay, console_min_delay, 0644);
-MODULE_PARM_DESC(libcfs_console_min_delay, "Lustre kernel debug console min delay (jiffies)");
-
-static int param_set_uint_minmax(const char *val,
-                                const struct kernel_param *kp,
-                                unsigned int min, unsigned int max)
-{
-       unsigned int num;
-       int ret;
-
-       if (!val)
-               return -EINVAL;
-       ret = kstrtouint(val, 0, &num);
-       if (ret < 0 || num < min || num > max)
-               return -EINVAL;
-       *((unsigned int *)kp->arg) = num;
-       return 0;
-}
-
-static int param_set_uintpos(const char *val, const struct kernel_param *kp)
-{
-       return param_set_uint_minmax(val, kp, 1, -1);
-}
-
-static struct kernel_param_ops param_ops_uintpos = {
-       .set = param_set_uintpos,
-       .get = param_get_uint,
-};
-
-#define param_check_uintpos(name, p) \
-               __param_check(name, p, unsigned int)
-
-unsigned int libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF;
-module_param(libcfs_console_backoff, uintpos, 0644);
-MODULE_PARM_DESC(libcfs_console_backoff, "Lustre kernel debug console backoff factor");
-
-unsigned int libcfs_debug_binary = 1;
-
-unsigned int libcfs_stack = 3 * THREAD_SIZE / 4;
-EXPORT_SYMBOL(libcfs_stack);
-
-unsigned int libcfs_catastrophe;
-EXPORT_SYMBOL(libcfs_catastrophe);
-
-unsigned int libcfs_panic_on_lbug = 1;
-module_param(libcfs_panic_on_lbug, uint, 0644);
-MODULE_PARM_DESC(libcfs_panic_on_lbug, "Lustre kernel panic on LBUG");
-
-static wait_queue_head_t debug_ctlwq;
-
-char libcfs_debug_file_path_arr[PATH_MAX] = LIBCFS_DEBUG_FILE_PATH_DEFAULT;
-
-/* We need to pass a pointer here, but elsewhere this must be a const */
-static char *libcfs_debug_file_path;
-module_param(libcfs_debug_file_path, charp, 0644);
-MODULE_PARM_DESC(libcfs_debug_file_path,
-                "Path for dumping debug logs, set 'NONE' to prevent log dumping");
-
-int libcfs_panic_in_progress;
-
-/* libcfs_debug_token2mask() expects the returned string in lower-case */
-static const char *
-libcfs_debug_subsys2str(int subsys)
-{
-       switch (1 << subsys) {
-       default:
-               return NULL;
-       case S_UNDEFINED:
-               return "undefined";
-       case S_MDC:
-               return "mdc";
-       case S_MDS:
-               return "mds";
-       case S_OSC:
-               return "osc";
-       case S_OST:
-               return "ost";
-       case S_CLASS:
-               return "class";
-       case S_LOG:
-               return "log";
-       case S_LLITE:
-               return "llite";
-       case S_RPC:
-               return "rpc";
-       case S_LNET:
-               return "lnet";
-       case S_LND:
-               return "lnd";
-       case S_PINGER:
-               return "pinger";
-       case S_FILTER:
-               return "filter";
-       case S_ECHO:
-               return "echo";
-       case S_LDLM:
-               return "ldlm";
-       case S_LOV:
-               return "lov";
-       case S_LQUOTA:
-               return "lquota";
-       case S_OSD:
-               return "osd";
-       case S_LFSCK:
-               return "lfsck";
-       case S_LMV:
-               return "lmv";
-       case S_SEC:
-               return "sec";
-       case S_GSS:
-               return "gss";
-       case S_MGC:
-               return "mgc";
-       case S_MGS:
-               return "mgs";
-       case S_FID:
-               return "fid";
-       case S_FLD:
-               return "fld";
-       }
-}
-
-/* libcfs_debug_token2mask() expects the returned string in lower-case */
-static const char *
-libcfs_debug_dbg2str(int debug)
-{
-       switch (1 << debug) {
-       default:
-               return NULL;
-       case D_TRACE:
-               return "trace";
-       case D_INODE:
-               return "inode";
-       case D_SUPER:
-               return "super";
-       case D_EXT2:
-               return "ext2";
-       case D_MALLOC:
-               return "malloc";
-       case D_CACHE:
-               return "cache";
-       case D_INFO:
-               return "info";
-       case D_IOCTL:
-               return "ioctl";
-       case D_NETERROR:
-               return "neterror";
-       case D_NET:
-               return "net";
-       case D_WARNING:
-               return "warning";
-       case D_BUFFS:
-               return "buffs";
-       case D_OTHER:
-               return "other";
-       case D_DENTRY:
-               return "dentry";
-       case D_NETTRACE:
-               return "nettrace";
-       case D_PAGE:
-               return "page";
-       case D_DLMTRACE:
-               return "dlmtrace";
-       case D_ERROR:
-               return "error";
-       case D_EMERG:
-               return "emerg";
-       case D_HA:
-               return "ha";
-       case D_RPCTRACE:
-               return "rpctrace";
-       case D_VFSTRACE:
-               return "vfstrace";
-       case D_READA:
-               return "reada";
-       case D_MMAP:
-               return "mmap";
-       case D_CONFIG:
-               return "config";
-       case D_CONSOLE:
-               return "console";
-       case D_QUOTA:
-               return "quota";
-       case D_SEC:
-               return "sec";
-       case D_LFSCK:
-               return "lfsck";
-       }
-}
-
-int
-libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys)
-{
-       const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str :
-                                                libcfs_debug_dbg2str;
-       int        len = 0;
-       const char   *token;
-       int        i;
-
-       if (mask == 0) {                        /* "0" */
-               if (size > 0)
-                       str[0] = '0';
-               len = 1;
-       } else {                                /* space-separated tokens */
-               for (i = 0; i < 32; i++) {
-                       if ((mask & (1 << i)) == 0)
-                               continue;
-
-                       token = fn(i);
-                       if (!token)           /* unused bit */
-                               continue;
-
-                       if (len > 0) {            /* separator? */
-                               if (len < size)
-                                       str[len] = ' ';
-                               len++;
-                       }
-
-                       while (*token != 0) {
-                               if (len < size)
-                                       str[len] = *token;
-                               token++;
-                               len++;
-                       }
-               }
-       }
-
-       /* terminate 'str' */
-       if (len < size)
-               str[len] = 0;
-       else
-               str[size - 1] = 0;
-
-       return len;
-}
-
-int
-libcfs_debug_str2mask(int *mask, const char *str, int is_subsys)
-{
-       const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str :
-                                                libcfs_debug_dbg2str;
-       int      m = 0;
-       int      matched;
-       int      n;
-       int      t;
-
-       /* Allow a number for backwards compatibility */
-
-       for (n = strlen(str); n > 0; n--)
-               if (!isspace(str[n - 1]))
-                       break;
-       matched = n;
-       t = sscanf(str, "%i%n", &m, &matched);
-       if (t >= 1 && matched == n) {
-               /* don't print warning for lctl set_param debug=0 or -1 */
-               if (m != 0 && m != -1)
-                       CWARN("You are trying to use a numerical value for the mask - this will be deprecated in a future release.\n");
-               *mask = m;
-               return 0;
-       }
-
-       return cfs_str2mask(str, fn, mask, is_subsys ? 0 : D_CANTMASK,
-                           0xffffffff);
-}
-
-/**
- * Dump Lustre log to ::debug_file_path by calling tracefile_dump_all_pages()
- */
-void libcfs_debug_dumplog_internal(void *arg)
-{
-       void *journal_info;
-
-       journal_info = current->journal_info;
-       current->journal_info = NULL;
-
-       if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) != 0) {
-               snprintf(debug_file_name, sizeof(debug_file_name) - 1,
-                        "%s.%lld.%ld", libcfs_debug_file_path_arr,
-                        (s64)ktime_get_real_seconds(), (long_ptr_t)arg);
-               pr_alert("LustreError: dumping log to %s\n", debug_file_name);
-               cfs_tracefile_dump_all_pages(debug_file_name);
-               libcfs_run_debug_log_upcall(debug_file_name);
-       }
-
-       current->journal_info = journal_info;
-}
-
-static int libcfs_debug_dumplog_thread(void *arg)
-{
-       libcfs_debug_dumplog_internal(arg);
-       wake_up(&debug_ctlwq);
-       return 0;
-}
-
-void libcfs_debug_dumplog(void)
-{
-       wait_queue_t wait;
-       struct task_struct *dumper;
-
-       /* we're being careful to ensure that the kernel thread is
-        * able to set our state to running as it exits before we
-        * get to schedule()
-        */
-       init_waitqueue_entry(&wait, current);
-       set_current_state(TASK_INTERRUPTIBLE);
-       add_wait_queue(&debug_ctlwq, &wait);
-
-       dumper = kthread_run(libcfs_debug_dumplog_thread,
-                            (void *)(long)current_pid(),
-                            "libcfs_debug_dumper");
-       if (IS_ERR(dumper))
-               pr_err("LustreError: cannot start log dump thread: %ld\n",
-                      PTR_ERR(dumper));
-       else
-               schedule();
-
-       /* be sure to teardown if cfs_create_thread() failed */
-       remove_wait_queue(&debug_ctlwq, &wait);
-       set_current_state(TASK_RUNNING);
-}
-EXPORT_SYMBOL(libcfs_debug_dumplog);
-
-int libcfs_debug_init(unsigned long bufsize)
-{
-       int    rc = 0;
-       unsigned int max = libcfs_debug_mb;
-
-       init_waitqueue_head(&debug_ctlwq);
-
-       if (libcfs_console_max_delay <= 0 || /* not set by user or */
-           libcfs_console_min_delay <= 0 || /* set to invalid values */
-           libcfs_console_min_delay >= libcfs_console_max_delay) {
-               libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY;
-               libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY;
-       }
-
-       if (libcfs_debug_file_path) {
-               strlcpy(libcfs_debug_file_path_arr,
-                       libcfs_debug_file_path,
-                       sizeof(libcfs_debug_file_path_arr));
-       }
-
-       /* If libcfs_debug_mb is set to an invalid value or uninitialized
-        * then just make the total buffers smp_num_cpus * TCD_MAX_PAGES
-        */
-       if (max > cfs_trace_max_debug_mb() || max < num_possible_cpus()) {
-               max = TCD_MAX_PAGES;
-       } else {
-               max = max / num_possible_cpus();
-               max <<= (20 - PAGE_CACHE_SHIFT);
-       }
-       rc = cfs_tracefile_init(max);
-
-       if (rc == 0) {
-               libcfs_register_panic_notifier();
-               libcfs_debug_mb = cfs_trace_get_debug_mb();
-       }
-
-       return rc;
-}
-
-int libcfs_debug_cleanup(void)
-{
-       libcfs_unregister_panic_notifier();
-       cfs_tracefile_exit();
-       return 0;
-}
-
-int libcfs_debug_clear_buffer(void)
-{
-       cfs_trace_flush_pages();
-       return 0;
-}
-
-/* Debug markers, although printed by S_LNET should not be be marked as such. */
-#undef DEBUG_SUBSYSTEM
-#define DEBUG_SUBSYSTEM S_UNDEFINED
-int libcfs_debug_mark_buffer(const char *text)
-{
-       CDEBUG(D_TRACE,
-              "***************************************************\n");
-       LCONSOLE(D_WARNING, "DEBUG MARKER: %s\n", text);
-       CDEBUG(D_TRACE,
-              "***************************************************\n");
-
-       return 0;
-}
-
-#undef DEBUG_SUBSYSTEM
-#define DEBUG_SUBSYSTEM S_LNET
diff --git a/drivers/staging/lustre/lustre/libcfs/fail.c b/drivers/staging/lustre/lustre/libcfs/fail.c

deleted file mode 100644 (file)

index dadaf76..0000000
--- a/drivers/staging/lustre/lustre/libcfs/fail.c
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please contact Oracle Corporation, Inc., 500 Oracle Parkway, Redwood Shores,
- * CA 94065 USA or visit www.oracle.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Oracle Corporation, Inc.
- */
-
-#include "../../include/linux/libcfs/libcfs.h"
-
-unsigned long cfs_fail_loc;
-EXPORT_SYMBOL(cfs_fail_loc);
-
-unsigned int cfs_fail_val;
-EXPORT_SYMBOL(cfs_fail_val);
-
-DECLARE_WAIT_QUEUE_HEAD(cfs_race_waitq);
-EXPORT_SYMBOL(cfs_race_waitq);
-
-int cfs_race_state;
-EXPORT_SYMBOL(cfs_race_state);
-
-int __cfs_fail_check_set(__u32 id, __u32 value, int set)
-{
-       static atomic_t cfs_fail_count = ATOMIC_INIT(0);
-
-       LASSERT(!(id & CFS_FAIL_ONCE));
-
-       if ((cfs_fail_loc & (CFS_FAILED | CFS_FAIL_ONCE)) ==
-           (CFS_FAILED | CFS_FAIL_ONCE)) {
-               atomic_set(&cfs_fail_count, 0); /* paranoia */
-               return 0;
-       }
-
-       /* Fail 1/cfs_fail_val times */
-       if (cfs_fail_loc & CFS_FAIL_RAND) {
-               if (cfs_fail_val < 2 || cfs_rand() % cfs_fail_val > 0)
-                       return 0;
-       }
-
-       /* Skip the first cfs_fail_val, then fail */
-       if (cfs_fail_loc & CFS_FAIL_SKIP) {
-               if (atomic_inc_return(&cfs_fail_count) <= cfs_fail_val)
-                       return 0;
-       }
-
-       /* check cfs_fail_val... */
-       if (set == CFS_FAIL_LOC_VALUE) {
-               if (cfs_fail_val != -1 && cfs_fail_val != value)
-                       return 0;
-       }
-
-       /* Fail cfs_fail_val times, overridden by FAIL_ONCE */
-       if (cfs_fail_loc & CFS_FAIL_SOME &&
-           (!(cfs_fail_loc & CFS_FAIL_ONCE) || cfs_fail_val <= 1)) {
-               int count = atomic_inc_return(&cfs_fail_count);
-
-               if (count >= cfs_fail_val) {
-                       set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc);
-                       atomic_set(&cfs_fail_count, 0);
-                       /* we are lost race to increase  */
-                       if (count > cfs_fail_val)
-                               return 0;
-               }
-       }
-
-       if ((set == CFS_FAIL_LOC_ORSET || set == CFS_FAIL_LOC_RESET) &&
-           (value & CFS_FAIL_ONCE))
-               set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc);
-       /* Lost race to set CFS_FAILED_BIT. */
-       if (test_and_set_bit(CFS_FAILED_BIT, &cfs_fail_loc)) {
-               /* If CFS_FAIL_ONCE is valid, only one process can fail,
-                * otherwise multi-process can fail at the same time.
-                */
-               if (cfs_fail_loc & CFS_FAIL_ONCE)
-                       return 0;
-       }
-
-       switch (set) {
-       case CFS_FAIL_LOC_NOSET:
-       case CFS_FAIL_LOC_VALUE:
-               break;
-       case CFS_FAIL_LOC_ORSET:
-               cfs_fail_loc |= value & ~(CFS_FAILED | CFS_FAIL_ONCE);
-               break;
-       case CFS_FAIL_LOC_RESET:
-               cfs_fail_loc = value;
-               break;
-       default:
-               LASSERTF(0, "called with bad set %u\n", set);
-               break;
-       }
-
-       return 1;
-}
-EXPORT_SYMBOL(__cfs_fail_check_set);
-
-int __cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set)
-{
-       int ret;
-
-       ret = __cfs_fail_check_set(id, value, set);
-       if (ret && likely(ms > 0)) {
-               CERROR("cfs_fail_timeout id %x sleeping for %dms\n",
-                      id, ms);
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               schedule_timeout(cfs_time_seconds(ms) / 1000);
-               CERROR("cfs_fail_timeout id %x awake\n", id);
-       }
-       return ret;
-}
-EXPORT_SYMBOL(__cfs_fail_timeout_set);
diff --git a/drivers/staging/lustre/lustre/libcfs/hash.c b/drivers/staging/lustre/lustre/libcfs/hash.c

deleted file mode 100644 (file)

index f60feb3..0000000
--- a/drivers/staging/lustre/lustre/libcfs/hash.c
+++ /dev/null
@@ -1,2085 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/libcfs/hash.c
- *
- * Implement a hash class for hash process in lustre system.
- *
- * Author: YuZhangyong <yzy@clusterfs.com>
- *
- * 2008-08-15: Brian Behlendorf <behlendorf1@llnl.gov>
- * - Simplified API and improved documentation
- * - Added per-hash feature flags:
- *   * CFS_HASH_DEBUG additional validation
- *   * CFS_HASH_REHASH dynamic rehashing
- * - Added per-hash statistics
- * - General performance enhancements
- *
- * 2009-07-31: Liang Zhen <zhen.liang@sun.com>
- * - move all stuff to libcfs
- * - don't allow cur_bits != max_bits without setting of CFS_HASH_REHASH
- * - ignore hs_rwlock if without CFS_HASH_REHASH setting
- * - buckets are allocated one by one(instead of contiguous memory),
- *   to avoid unnecessary cacheline conflict
- *
- * 2010-03-01: Liang Zhen <zhen.liang@sun.com>
- * - "bucket" is a group of hlist_head now, user can specify bucket size
- *   by bkt_bits of cfs_hash_create(), all hlist_heads in a bucket share
- *   one lock for reducing memory overhead.
- *
- * - support lockless hash, caller will take care of locks:
- *   avoid lock overhead for hash tables that are already protected
- *   by locking in the caller for another reason
- *
- * - support both spin_lock/rwlock for bucket:
- *   overhead of spinlock contention is lower than read/write
- *   contention of rwlock, so using spinlock to serialize operations on
- *   bucket is more reasonable for those frequently changed hash tables
- *
- * - support one-single lock mode:
- *   one lock to protect all hash operations to avoid overhead of
- *   multiple locks if hash table is always small
- *
- * - removed a lot of unnecessary addref & decref on hash element:
- *   addref & decref are atomic operations in many use-cases which
- *   are expensive.
- *
- * - support non-blocking cfs_hash_add() and cfs_hash_findadd():
- *   some lustre use-cases require these functions to be strictly
- *   non-blocking, we need to schedule required rehash on a different
- *   thread on those cases.
- *
- * - safer rehash on large hash table
- *   In old implementation, rehash function will exclusively lock the
- *   hash table and finish rehash in one batch, it's dangerous on SMP
- *   system because rehash millions of elements could take long time.
- *   New implemented rehash can release lock and relax CPU in middle
- *   of rehash, it's safe for another thread to search/change on the
- *   hash table even it's in rehasing.
- *
- * - support two different refcount modes
- *   . hash table has refcount on element
- *   . hash table doesn't change refcount on adding/removing element
- *
- * - support long name hash table (for param-tree)
- *
- * - fix a bug for cfs_hash_rehash_key:
- *   in old implementation, cfs_hash_rehash_key could screw up the
- *   hash-table because @key is overwritten without any protection.
- *   Now we need user to define hs_keycpy for those rehash enabled
- *   hash tables, cfs_hash_rehash_key will overwrite hash-key
- *   inside lock by calling hs_keycpy.
- *
- * - better hash iteration:
- *   Now we support both locked iteration & lockless iteration of hash
- *   table. Also, user can break the iteration by return 1 in callback.
- */
-#include <linux/seq_file.h>
-#include <linux/log2.h>
-
-#include "../../include/linux/libcfs/libcfs.h"
-
-#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
-static unsigned int warn_on_depth = 8;
-module_param(warn_on_depth, uint, 0644);
-MODULE_PARM_DESC(warn_on_depth, "warning when hash depth is high.");
-#endif
-
-struct cfs_wi_sched *cfs_sched_rehash;
-
-static inline void
-cfs_hash_nl_lock(union cfs_hash_lock *lock, int exclusive) {}
-
-static inline void
-cfs_hash_nl_unlock(union cfs_hash_lock *lock, int exclusive) {}
-
-static inline void
-cfs_hash_spin_lock(union cfs_hash_lock *lock, int exclusive)
-       __acquires(&lock->spin)
-{
-       spin_lock(&lock->spin);
-}
-
-static inline void
-cfs_hash_spin_unlock(union cfs_hash_lock *lock, int exclusive)
-       __releases(&lock->spin)
-{
-       spin_unlock(&lock->spin);
-}
-
-static inline void
-cfs_hash_rw_lock(union cfs_hash_lock *lock, int exclusive)
-       __acquires(&lock->rw)
-{
-       if (!exclusive)
-               read_lock(&lock->rw);
-       else
-               write_lock(&lock->rw);
-}
-
-static inline void
-cfs_hash_rw_unlock(union cfs_hash_lock *lock, int exclusive)
-       __releases(&lock->rw)
-{
-       if (!exclusive)
-               read_unlock(&lock->rw);
-       else
-               write_unlock(&lock->rw);
-}
-
-/** No lock hash */
-static struct cfs_hash_lock_ops cfs_hash_nl_lops = {
-       .hs_lock        = cfs_hash_nl_lock,
-       .hs_unlock      = cfs_hash_nl_unlock,
-       .hs_bkt_lock    = cfs_hash_nl_lock,
-       .hs_bkt_unlock  = cfs_hash_nl_unlock,
-};
-
-/** no bucket lock, one spinlock to protect everything */
-static struct cfs_hash_lock_ops cfs_hash_nbl_lops = {
-       .hs_lock        = cfs_hash_spin_lock,
-       .hs_unlock      = cfs_hash_spin_unlock,
-       .hs_bkt_lock    = cfs_hash_nl_lock,
-       .hs_bkt_unlock  = cfs_hash_nl_unlock,
-};
-
-/** spin bucket lock, rehash is enabled */
-static struct cfs_hash_lock_ops cfs_hash_bkt_spin_lops = {
-       .hs_lock        = cfs_hash_rw_lock,
-       .hs_unlock      = cfs_hash_rw_unlock,
-       .hs_bkt_lock    = cfs_hash_spin_lock,
-       .hs_bkt_unlock  = cfs_hash_spin_unlock,
-};
-
-/** rw bucket lock, rehash is enabled */
-static struct cfs_hash_lock_ops cfs_hash_bkt_rw_lops = {
-       .hs_lock        = cfs_hash_rw_lock,
-       .hs_unlock      = cfs_hash_rw_unlock,
-       .hs_bkt_lock    = cfs_hash_rw_lock,
-       .hs_bkt_unlock  = cfs_hash_rw_unlock,
-};
-
-/** spin bucket lock, rehash is disabled */
-static struct cfs_hash_lock_ops cfs_hash_nr_bkt_spin_lops = {
-       .hs_lock        = cfs_hash_nl_lock,
-       .hs_unlock      = cfs_hash_nl_unlock,
-       .hs_bkt_lock    = cfs_hash_spin_lock,
-       .hs_bkt_unlock  = cfs_hash_spin_unlock,
-};
-
-/** rw bucket lock, rehash is disabled */
-static struct cfs_hash_lock_ops cfs_hash_nr_bkt_rw_lops = {
-       .hs_lock        = cfs_hash_nl_lock,
-       .hs_unlock      = cfs_hash_nl_unlock,
-       .hs_bkt_lock    = cfs_hash_rw_lock,
-       .hs_bkt_unlock  = cfs_hash_rw_unlock,
-};
-
-static void
-cfs_hash_lock_setup(struct cfs_hash *hs)
-{
-       if (cfs_hash_with_no_lock(hs)) {
-               hs->hs_lops = &cfs_hash_nl_lops;
-
-       } else if (cfs_hash_with_no_bktlock(hs)) {
-               hs->hs_lops = &cfs_hash_nbl_lops;
-               spin_lock_init(&hs->hs_lock.spin);
-
-       } else if (cfs_hash_with_rehash(hs)) {
-               rwlock_init(&hs->hs_lock.rw);
-
-               if (cfs_hash_with_rw_bktlock(hs))
-                       hs->hs_lops = &cfs_hash_bkt_rw_lops;
-               else if (cfs_hash_with_spin_bktlock(hs))
-                       hs->hs_lops = &cfs_hash_bkt_spin_lops;
-               else
-                       LBUG();
-       } else {
-               if (cfs_hash_with_rw_bktlock(hs))
-                       hs->hs_lops = &cfs_hash_nr_bkt_rw_lops;
-               else if (cfs_hash_with_spin_bktlock(hs))
-                       hs->hs_lops = &cfs_hash_nr_bkt_spin_lops;
-               else
-                       LBUG();
-       }
-}
-
-/**
- * Simple hash head without depth tracking
- * new element is always added to head of hlist
- */
-struct cfs_hash_head {
-       struct hlist_head       hh_head;        /**< entries list */
-};
-
-static int
-cfs_hash_hh_hhead_size(struct cfs_hash *hs)
-{
-       return sizeof(struct cfs_hash_head);
-}
-
-static struct hlist_head *
-cfs_hash_hh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
-       struct cfs_hash_head *head;
-
-       head = (struct cfs_hash_head *)&bd->bd_bucket->hsb_head[0];
-       return &head[bd->bd_offset].hh_head;
-}
-
-static int
-cfs_hash_hh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-                     struct hlist_node *hnode)
-{
-       hlist_add_head(hnode, cfs_hash_hh_hhead(hs, bd));
-       return -1; /* unknown depth */
-}
-
-static int
-cfs_hash_hh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-                     struct hlist_node *hnode)
-{
-       hlist_del_init(hnode);
-       return -1; /* unknown depth */
-}
-
-/**
- * Simple hash head with depth tracking
- * new element is always added to head of hlist
- */
-struct cfs_hash_head_dep {
-       struct hlist_head       hd_head;        /**< entries list */
-       unsigned int            hd_depth;       /**< list length */
-};
-
-static int
-cfs_hash_hd_hhead_size(struct cfs_hash *hs)
-{
-       return sizeof(struct cfs_hash_head_dep);
-}
-
-static struct hlist_head *
-cfs_hash_hd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
-       struct cfs_hash_head_dep   *head;
-
-       head = (struct cfs_hash_head_dep *)&bd->bd_bucket->hsb_head[0];
-       return &head[bd->bd_offset].hd_head;
-}
-
-static int
-cfs_hash_hd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-                     struct hlist_node *hnode)
-{
-       struct cfs_hash_head_dep *hh;
-
-       hh = container_of(cfs_hash_hd_hhead(hs, bd),
-                         struct cfs_hash_head_dep, hd_head);
-       hlist_add_head(hnode, &hh->hd_head);
-       return ++hh->hd_depth;
-}
-
-static int
-cfs_hash_hd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-                     struct hlist_node *hnode)
-{
-       struct cfs_hash_head_dep *hh;
-
-       hh = container_of(cfs_hash_hd_hhead(hs, bd),
-                         struct cfs_hash_head_dep, hd_head);
-       hlist_del_init(hnode);
-       return --hh->hd_depth;
-}
-
-/**
- * double links hash head without depth tracking
- * new element is always added to tail of hlist
- */
-struct cfs_hash_dhead {
-       struct hlist_head       dh_head;        /**< entries list */
-       struct hlist_node       *dh_tail;       /**< the last entry */
-};
-
-static int
-cfs_hash_dh_hhead_size(struct cfs_hash *hs)
-{
-       return sizeof(struct cfs_hash_dhead);
-}
-
-static struct hlist_head *
-cfs_hash_dh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
-       struct cfs_hash_dhead *head;
-
-       head = (struct cfs_hash_dhead *)&bd->bd_bucket->hsb_head[0];
-       return &head[bd->bd_offset].dh_head;
-}
-
-static int
-cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-                     struct hlist_node *hnode)
-{
-       struct cfs_hash_dhead *dh;
-
-       dh = container_of(cfs_hash_dh_hhead(hs, bd),
-                         struct cfs_hash_dhead, dh_head);
-       if (dh->dh_tail) /* not empty */
-               hlist_add_behind(hnode, dh->dh_tail);
-       else /* empty list */
-               hlist_add_head(hnode, &dh->dh_head);
-       dh->dh_tail = hnode;
-       return -1; /* unknown depth */
-}
-
-static int
-cfs_hash_dh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-                     struct hlist_node *hnd)
-{
-       struct cfs_hash_dhead *dh;
-
-       dh = container_of(cfs_hash_dh_hhead(hs, bd),
-                         struct cfs_hash_dhead, dh_head);
-       if (!hnd->next) { /* it's the tail */
-               dh->dh_tail = (hnd->pprev == &dh->dh_head.first) ? NULL :
-                             container_of(hnd->pprev, struct hlist_node, next);
-       }
-       hlist_del_init(hnd);
-       return -1; /* unknown depth */
-}
-
-/**
- * double links hash head with depth tracking
- * new element is always added to tail of hlist
- */
-struct cfs_hash_dhead_dep {
-       struct hlist_head       dd_head;        /**< entries list */
-       struct hlist_node       *dd_tail;       /**< the last entry */
-       unsigned int            dd_depth;       /**< list length */
-};
-
-static int
-cfs_hash_dd_hhead_size(struct cfs_hash *hs)
-{
-       return sizeof(struct cfs_hash_dhead_dep);
-}
-
-static struct hlist_head *
-cfs_hash_dd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
-       struct cfs_hash_dhead_dep *head;
-
-       head = (struct cfs_hash_dhead_dep *)&bd->bd_bucket->hsb_head[0];
-       return &head[bd->bd_offset].dd_head;
-}
-
-static int
-cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-                     struct hlist_node *hnode)
-{
-       struct cfs_hash_dhead_dep *dh;
-
-       dh = container_of(cfs_hash_dd_hhead(hs, bd),
-                         struct cfs_hash_dhead_dep, dd_head);
-       if (dh->dd_tail) /* not empty */
-               hlist_add_behind(hnode, dh->dd_tail);
-       else /* empty list */
-               hlist_add_head(hnode, &dh->dd_head);
-       dh->dd_tail = hnode;
-       return ++dh->dd_depth;
-}
-
-static int
-cfs_hash_dd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-                     struct hlist_node *hnd)
-{
-       struct cfs_hash_dhead_dep *dh;
-
-       dh = container_of(cfs_hash_dd_hhead(hs, bd),
-                         struct cfs_hash_dhead_dep, dd_head);
-       if (!hnd->next) { /* it's the tail */
-               dh->dd_tail = (hnd->pprev == &dh->dd_head.first) ? NULL :
-                             container_of(hnd->pprev, struct hlist_node, next);
-       }
-       hlist_del_init(hnd);
-       return --dh->dd_depth;
-}
-
-static struct cfs_hash_hlist_ops cfs_hash_hh_hops = {
-       .hop_hhead      = cfs_hash_hh_hhead,
-       .hop_hhead_size = cfs_hash_hh_hhead_size,
-       .hop_hnode_add  = cfs_hash_hh_hnode_add,
-       .hop_hnode_del  = cfs_hash_hh_hnode_del,
-};
-
-static struct cfs_hash_hlist_ops cfs_hash_hd_hops = {
-       .hop_hhead      = cfs_hash_hd_hhead,
-       .hop_hhead_size = cfs_hash_hd_hhead_size,
-       .hop_hnode_add  = cfs_hash_hd_hnode_add,
-       .hop_hnode_del  = cfs_hash_hd_hnode_del,
-};
-
-static struct cfs_hash_hlist_ops cfs_hash_dh_hops = {
-       .hop_hhead      = cfs_hash_dh_hhead,
-       .hop_hhead_size = cfs_hash_dh_hhead_size,
-       .hop_hnode_add  = cfs_hash_dh_hnode_add,
-       .hop_hnode_del  = cfs_hash_dh_hnode_del,
-};
-
-static struct cfs_hash_hlist_ops cfs_hash_dd_hops = {
-       .hop_hhead      = cfs_hash_dd_hhead,
-       .hop_hhead_size = cfs_hash_dd_hhead_size,
-       .hop_hnode_add  = cfs_hash_dd_hnode_add,
-       .hop_hnode_del  = cfs_hash_dd_hnode_del,
-};
-
-static void
-cfs_hash_hlist_setup(struct cfs_hash *hs)
-{
-       if (cfs_hash_with_add_tail(hs)) {
-               hs->hs_hops = cfs_hash_with_depth(hs) ?
-                             &cfs_hash_dd_hops : &cfs_hash_dh_hops;
-       } else {
-               hs->hs_hops = cfs_hash_with_depth(hs) ?
-                             &cfs_hash_hd_hops : &cfs_hash_hh_hops;
-       }
-}
-
-static void
-cfs_hash_bd_from_key(struct cfs_hash *hs, struct cfs_hash_bucket **bkts,
-                    unsigned int bits, const void *key, struct cfs_hash_bd *bd)
-{
-       unsigned int index = cfs_hash_id(hs, key, (1U << bits) - 1);
-
-       LASSERT(bits == hs->hs_cur_bits || bits == hs->hs_rehash_bits);
-
-       bd->bd_bucket = bkts[index & ((1U << (bits - hs->hs_bkt_bits)) - 1)];
-       bd->bd_offset = index >> (bits - hs->hs_bkt_bits);
-}
-
-void
-cfs_hash_bd_get(struct cfs_hash *hs, const void *key, struct cfs_hash_bd *bd)
-{
-       /* NB: caller should hold hs->hs_rwlock if REHASH is set */
-       if (likely(!hs->hs_rehash_buckets)) {
-               cfs_hash_bd_from_key(hs, hs->hs_buckets,
-                                    hs->hs_cur_bits, key, bd);
-       } else {
-               LASSERT(hs->hs_rehash_bits != 0);
-               cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
-                                    hs->hs_rehash_bits, key, bd);
-       }
-}
-EXPORT_SYMBOL(cfs_hash_bd_get);
-
-static inline void
-cfs_hash_bd_dep_record(struct cfs_hash *hs, struct cfs_hash_bd *bd, int dep_cur)
-{
-       if (likely(dep_cur <= bd->bd_bucket->hsb_depmax))
-               return;
-
-       bd->bd_bucket->hsb_depmax = dep_cur;
-# if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
-       if (likely(warn_on_depth == 0 ||
-                  max(warn_on_depth, hs->hs_dep_max) >= dep_cur))
-               return;
-
-       spin_lock(&hs->hs_dep_lock);
-       hs->hs_dep_max  = dep_cur;
-       hs->hs_dep_bkt  = bd->bd_bucket->hsb_index;
-       hs->hs_dep_off  = bd->bd_offset;
-       hs->hs_dep_bits = hs->hs_cur_bits;
-       spin_unlock(&hs->hs_dep_lock);
-
-       cfs_wi_schedule(cfs_sched_rehash, &hs->hs_dep_wi);
-# endif
-}
-
-void
-cfs_hash_bd_add_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-                      struct hlist_node *hnode)
-{
-       int rc;
-
-       rc = hs->hs_hops->hop_hnode_add(hs, bd, hnode);
-       cfs_hash_bd_dep_record(hs, bd, rc);
-       bd->bd_bucket->hsb_version++;
-       if (unlikely(bd->bd_bucket->hsb_version == 0))
-               bd->bd_bucket->hsb_version++;
-       bd->bd_bucket->hsb_count++;
-
-       if (cfs_hash_with_counter(hs))
-               atomic_inc(&hs->hs_count);
-       if (!cfs_hash_with_no_itemref(hs))
-               cfs_hash_get(hs, hnode);
-}
-EXPORT_SYMBOL(cfs_hash_bd_add_locked);
-
-void
-cfs_hash_bd_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-                      struct hlist_node *hnode)
-{
-       hs->hs_hops->hop_hnode_del(hs, bd, hnode);
-
-       LASSERT(bd->bd_bucket->hsb_count > 0);
-       bd->bd_bucket->hsb_count--;
-       bd->bd_bucket->hsb_version++;
-       if (unlikely(bd->bd_bucket->hsb_version == 0))
-               bd->bd_bucket->hsb_version++;
-
-       if (cfs_hash_with_counter(hs)) {
-               LASSERT(atomic_read(&hs->hs_count) > 0);
-               atomic_dec(&hs->hs_count);
-       }
-       if (!cfs_hash_with_no_itemref(hs))
-               cfs_hash_put_locked(hs, hnode);
-}
-EXPORT_SYMBOL(cfs_hash_bd_del_locked);
-
-void
-cfs_hash_bd_move_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd_old,
-                       struct cfs_hash_bd *bd_new, struct hlist_node *hnode)
-{
-       struct cfs_hash_bucket *obkt = bd_old->bd_bucket;
-       struct cfs_hash_bucket *nbkt = bd_new->bd_bucket;
-       int rc;
-
-       if (cfs_hash_bd_compare(bd_old, bd_new) == 0)
-               return;
-
-       /* use cfs_hash_bd_hnode_add/del, to avoid atomic & refcount ops
-        * in cfs_hash_bd_del/add_locked
-        */
-       hs->hs_hops->hop_hnode_del(hs, bd_old, hnode);
-       rc = hs->hs_hops->hop_hnode_add(hs, bd_new, hnode);
-       cfs_hash_bd_dep_record(hs, bd_new, rc);
-
-       LASSERT(obkt->hsb_count > 0);
-       obkt->hsb_count--;
-       obkt->hsb_version++;
-       if (unlikely(obkt->hsb_version == 0))
-               obkt->hsb_version++;
-       nbkt->hsb_count++;
-       nbkt->hsb_version++;
-       if (unlikely(nbkt->hsb_version == 0))
-               nbkt->hsb_version++;
-}
-
-enum {
-       /** always set, for sanity (avoid ZERO intent) */
-       CFS_HS_LOOKUP_MASK_FIND = BIT(0),
-       /** return entry with a ref */
-       CFS_HS_LOOKUP_MASK_REF  = BIT(1),
-       /** add entry if not existing */
-       CFS_HS_LOOKUP_MASK_ADD  = BIT(2),
-       /** delete entry, ignore other masks */
-       CFS_HS_LOOKUP_MASK_DEL  = BIT(3),
-};
-
-enum cfs_hash_lookup_intent {
-       /** return item w/o refcount */
-       CFS_HS_LOOKUP_IT_PEEK    = CFS_HS_LOOKUP_MASK_FIND,
-       /** return item with refcount */
-       CFS_HS_LOOKUP_IT_FIND    = (CFS_HS_LOOKUP_MASK_FIND |
-                                   CFS_HS_LOOKUP_MASK_REF),
-       /** return item w/o refcount if existed, otherwise add */
-       CFS_HS_LOOKUP_IT_ADD     = (CFS_HS_LOOKUP_MASK_FIND |
-                                   CFS_HS_LOOKUP_MASK_ADD),
-       /** return item with refcount if existed, otherwise add */
-       CFS_HS_LOOKUP_IT_FINDADD = (CFS_HS_LOOKUP_IT_FIND |
-                                   CFS_HS_LOOKUP_MASK_ADD),
-       /** delete if existed */
-       CFS_HS_LOOKUP_IT_FINDDEL = (CFS_HS_LOOKUP_MASK_FIND |
-                                   CFS_HS_LOOKUP_MASK_DEL)
-};
-
-static struct hlist_node *
-cfs_hash_bd_lookup_intent(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-                         const void *key, struct hlist_node *hnode,
-                         enum cfs_hash_lookup_intent intent)
-
-{
-       struct hlist_head *hhead = cfs_hash_bd_hhead(hs, bd);
-       struct hlist_node *ehnode;
-       struct hlist_node *match;
-       int intent_add = (intent & CFS_HS_LOOKUP_MASK_ADD) != 0;
-
-       /* with this function, we can avoid a lot of useless refcount ops,
-        * which are expensive atomic operations most time.
-        */
-       match = intent_add ? NULL : hnode;
-       hlist_for_each(ehnode, hhead) {
-               if (!cfs_hash_keycmp(hs, key, ehnode))
-                       continue;
-
-               if (match && match != ehnode) /* can't match */
-                       continue;
-
-               /* match and ... */
-               if ((intent & CFS_HS_LOOKUP_MASK_DEL) != 0) {
-                       cfs_hash_bd_del_locked(hs, bd, ehnode);
-                       return ehnode;
-               }
-
-               /* caller wants refcount? */
-               if ((intent & CFS_HS_LOOKUP_MASK_REF) != 0)
-                       cfs_hash_get(hs, ehnode);
-               return ehnode;
-       }
-       /* no match item */
-       if (!intent_add)
-               return NULL;
-
-       LASSERT(hnode);
-       cfs_hash_bd_add_locked(hs, bd, hnode);
-       return hnode;
-}
-
-struct hlist_node *
-cfs_hash_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-                         const void *key)
-{
-       return cfs_hash_bd_lookup_intent(hs, bd, key, NULL,
-                                        CFS_HS_LOOKUP_IT_FIND);
-}
-EXPORT_SYMBOL(cfs_hash_bd_lookup_locked);
-
-struct hlist_node *
-cfs_hash_bd_peek_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-                       const void *key)
-{
-       return cfs_hash_bd_lookup_intent(hs, bd, key, NULL,
-                                        CFS_HS_LOOKUP_IT_PEEK);
-}
-EXPORT_SYMBOL(cfs_hash_bd_peek_locked);
-
-static void
-cfs_hash_multi_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
-                      unsigned n, int excl)
-{
-       struct cfs_hash_bucket *prev = NULL;
-       int i;
-
-       /**
-        * bds must be ascendantly ordered by bd->bd_bucket->hsb_index.
-        * NB: it's possible that several bds point to the same bucket but
-        * have different bd::bd_offset, so need take care of deadlock.
-        */
-       cfs_hash_for_each_bd(bds, n, i) {
-               if (prev == bds[i].bd_bucket)
-                       continue;
-
-               LASSERT(!prev || prev->hsb_index < bds[i].bd_bucket->hsb_index);
-               cfs_hash_bd_lock(hs, &bds[i], excl);
-               prev = bds[i].bd_bucket;
-       }
-}
-
-static void
-cfs_hash_multi_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
-                        unsigned n, int excl)
-{
-       struct cfs_hash_bucket *prev = NULL;
-       int i;
-
-       cfs_hash_for_each_bd(bds, n, i) {
-               if (prev != bds[i].bd_bucket) {
-                       cfs_hash_bd_unlock(hs, &bds[i], excl);
-                       prev = bds[i].bd_bucket;
-               }
-       }
-}
-
-static struct hlist_node *
-cfs_hash_multi_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
-                               unsigned n, const void *key)
-{
-       struct hlist_node *ehnode;
-       unsigned i;
-
-       cfs_hash_for_each_bd(bds, n, i) {
-               ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, NULL,
-                                                  CFS_HS_LOOKUP_IT_FIND);
-               if (ehnode)
-                       return ehnode;
-       }
-       return NULL;
-}
-
-static struct hlist_node *
-cfs_hash_multi_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
-                                unsigned n, const void *key,
-                                struct hlist_node *hnode, int noref)
-{
-       struct hlist_node *ehnode;
-       int intent;
-       unsigned i;
-
-       LASSERT(hnode);
-       intent = (!noref * CFS_HS_LOOKUP_MASK_REF) | CFS_HS_LOOKUP_IT_PEEK;
-
-       cfs_hash_for_each_bd(bds, n, i) {
-               ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key,
-                                                  NULL, intent);
-               if (ehnode)
-                       return ehnode;
-       }
-
-       if (i == 1) { /* only one bucket */
-               cfs_hash_bd_add_locked(hs, &bds[0], hnode);
-       } else {
-               struct cfs_hash_bd mybd;
-
-               cfs_hash_bd_get(hs, key, &mybd);
-               cfs_hash_bd_add_locked(hs, &mybd, hnode);
-       }
-
-       return hnode;
-}
-
-static struct hlist_node *
-cfs_hash_multi_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
-                                unsigned n, const void *key,
-                                struct hlist_node *hnode)
-{
-       struct hlist_node *ehnode;
-       unsigned int i;
-
-       cfs_hash_for_each_bd(bds, n, i) {
-               ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, hnode,
-                                                  CFS_HS_LOOKUP_IT_FINDDEL);
-               if (ehnode)
-                       return ehnode;
-       }
-       return NULL;
-}
-
-static void
-cfs_hash_bd_order(struct cfs_hash_bd *bd1, struct cfs_hash_bd *bd2)
-{
-       int rc;
-
-       if (!bd2->bd_bucket)
-               return;
-
-       if (!bd1->bd_bucket) {
-               *bd1 = *bd2;
-               bd2->bd_bucket = NULL;
-               return;
-       }
-
-       rc = cfs_hash_bd_compare(bd1, bd2);
-       if (!rc)
-               bd2->bd_bucket = NULL;
-       else if (rc > 0)
-               swap(*bd1, *bd2); /* swap bd1 and bd2 */
-}
-
-void
-cfs_hash_dual_bd_get(struct cfs_hash *hs, const void *key,
-                    struct cfs_hash_bd *bds)
-{
-       /* NB: caller should hold hs_lock.rw if REHASH is set */
-       cfs_hash_bd_from_key(hs, hs->hs_buckets,
-                            hs->hs_cur_bits, key, &bds[0]);
-       if (likely(!hs->hs_rehash_buckets)) {
-               /* no rehash or not rehashing */
-               bds[1].bd_bucket = NULL;
-               return;
-       }
-
-       LASSERT(hs->hs_rehash_bits != 0);
-       cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
-                            hs->hs_rehash_bits, key, &bds[1]);
-
-       cfs_hash_bd_order(&bds[0], &bds[1]);
-}
-
-void
-cfs_hash_dual_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl)
-{
-       cfs_hash_multi_bd_lock(hs, bds, 2, excl);
-}
-
-void
-cfs_hash_dual_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl)
-{
-       cfs_hash_multi_bd_unlock(hs, bds, 2, excl);
-}
-
-struct hlist_node *
-cfs_hash_dual_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
-                              const void *key)
-{
-       return cfs_hash_multi_bd_lookup_locked(hs, bds, 2, key);
-}
-
-struct hlist_node *
-cfs_hash_dual_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
-                               const void *key, struct hlist_node *hnode,
-                               int noref)
-{
-       return cfs_hash_multi_bd_findadd_locked(hs, bds, 2, key,
-                                               hnode, noref);
-}
-
-struct hlist_node *
-cfs_hash_dual_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
-                               const void *key, struct hlist_node *hnode)
-{
-       return cfs_hash_multi_bd_finddel_locked(hs, bds, 2, key, hnode);
-}
-
-static void
-cfs_hash_buckets_free(struct cfs_hash_bucket **buckets,
-                     int bkt_size, int prev_size, int size)
-{
-       int i;
-
-       for (i = prev_size; i < size; i++) {
-               if (buckets[i])
-                       LIBCFS_FREE(buckets[i], bkt_size);
-       }
-
-       LIBCFS_FREE(buckets, sizeof(buckets[0]) * size);
-}
-
-/*
- * Create or grow bucket memory. Return old_buckets if no allocation was
- * needed, the newly allocated buckets if allocation was needed and
- * successful, and NULL on error.
- */
-static struct cfs_hash_bucket **
-cfs_hash_buckets_realloc(struct cfs_hash *hs, struct cfs_hash_bucket **old_bkts,
-                        unsigned int old_size, unsigned int new_size)
-{
-       struct cfs_hash_bucket **new_bkts;
-       int i;
-
-       LASSERT(old_size == 0 || old_bkts);
-
-       if (old_bkts && old_size == new_size)
-               return old_bkts;
-
-       LIBCFS_ALLOC(new_bkts, sizeof(new_bkts[0]) * new_size);
-       if (!new_bkts)
-               return NULL;
-
-       if (old_bkts) {
-               memcpy(new_bkts, old_bkts,
-                      min(old_size, new_size) * sizeof(*old_bkts));
-       }
-
-       for (i = old_size; i < new_size; i++) {
-               struct hlist_head *hhead;
-               struct cfs_hash_bd bd;
-
-               LIBCFS_ALLOC(new_bkts[i], cfs_hash_bkt_size(hs));
-               if (!new_bkts[i]) {
-                       cfs_hash_buckets_free(new_bkts, cfs_hash_bkt_size(hs),
-                                             old_size, new_size);
-                       return NULL;
-               }
-
-               new_bkts[i]->hsb_index   = i;
-               new_bkts[i]->hsb_version = 1;  /* shouldn't be zero */
-               new_bkts[i]->hsb_depmax  = -1; /* unknown */
-               bd.bd_bucket = new_bkts[i];
-               cfs_hash_bd_for_each_hlist(hs, &bd, hhead)
-                       INIT_HLIST_HEAD(hhead);
-
-               if (cfs_hash_with_no_lock(hs) ||
-                   cfs_hash_with_no_bktlock(hs))
-                       continue;
-
-               if (cfs_hash_with_rw_bktlock(hs))
-                       rwlock_init(&new_bkts[i]->hsb_lock.rw);
-               else if (cfs_hash_with_spin_bktlock(hs))
-                       spin_lock_init(&new_bkts[i]->hsb_lock.spin);
-               else
-                       LBUG(); /* invalid use-case */
-       }
-       return new_bkts;
-}
-
-/**
- * Initialize new libcfs hash, where:
- * @name     - Descriptive hash name
- * @cur_bits - Initial hash table size, in bits
- * @max_bits - Maximum allowed hash table resize, in bits
- * @ops      - Registered hash table operations
- * @flags    - CFS_HASH_REHASH enable synamic hash resizing
- *          - CFS_HASH_SORT enable chained hash sort
- */
-static int cfs_hash_rehash_worker(cfs_workitem_t *wi);
-
-#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
-static int cfs_hash_dep_print(cfs_workitem_t *wi)
-{
-       struct cfs_hash *hs = container_of(wi, struct cfs_hash, hs_dep_wi);
-       int dep;
-       int bkt;
-       int off;
-       int bits;
-
-       spin_lock(&hs->hs_dep_lock);
-       dep  = hs->hs_dep_max;
-       bkt  = hs->hs_dep_bkt;
-       off  = hs->hs_dep_off;
-       bits = hs->hs_dep_bits;
-       spin_unlock(&hs->hs_dep_lock);
-
-       LCONSOLE_WARN("#### HASH %s (bits: %d): max depth %d at bucket %d/%d\n",
-                     hs->hs_name, bits, dep, bkt, off);
-       spin_lock(&hs->hs_dep_lock);
-       hs->hs_dep_bits = 0; /* mark as workitem done */
-       spin_unlock(&hs->hs_dep_lock);
-       return 0;
-}
-
-static void cfs_hash_depth_wi_init(struct cfs_hash *hs)
-{
-       spin_lock_init(&hs->hs_dep_lock);
-       cfs_wi_init(&hs->hs_dep_wi, hs, cfs_hash_dep_print);
-}
-
-static void cfs_hash_depth_wi_cancel(struct cfs_hash *hs)
-{
-       if (cfs_wi_deschedule(cfs_sched_rehash, &hs->hs_dep_wi))
-               return;
-
-       spin_lock(&hs->hs_dep_lock);
-       while (hs->hs_dep_bits != 0) {
-               spin_unlock(&hs->hs_dep_lock);
-               cond_resched();
-               spin_lock(&hs->hs_dep_lock);
-       }
-       spin_unlock(&hs->hs_dep_lock);
-}
-
-#else /* CFS_HASH_DEBUG_LEVEL < CFS_HASH_DEBUG_1 */
-
-static inline void cfs_hash_depth_wi_init(struct cfs_hash *hs) {}
-static inline void cfs_hash_depth_wi_cancel(struct cfs_hash *hs) {}
-
-#endif /* CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 */
-
-struct cfs_hash *
-cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits,
-               unsigned bkt_bits, unsigned extra_bytes,
-               unsigned min_theta, unsigned max_theta,
-               struct cfs_hash_ops *ops, unsigned flags)
-{
-       struct cfs_hash *hs;
-       int len;
-
-       CLASSERT(CFS_HASH_THETA_BITS < 15);
-
-       LASSERT(name);
-       LASSERT(ops->hs_key);
-       LASSERT(ops->hs_hash);
-       LASSERT(ops->hs_object);
-       LASSERT(ops->hs_keycmp);
-       LASSERT(ops->hs_get);
-       LASSERT(ops->hs_put_locked);
-
-       if ((flags & CFS_HASH_REHASH) != 0)
-               flags |= CFS_HASH_COUNTER; /* must have counter */
-
-       LASSERT(cur_bits > 0);
-       LASSERT(cur_bits >= bkt_bits);
-       LASSERT(max_bits >= cur_bits && max_bits < 31);
-       LASSERT(ergo((flags & CFS_HASH_REHASH) == 0, cur_bits == max_bits));
-       LASSERT(ergo((flags & CFS_HASH_REHASH) != 0,
-                    (flags & CFS_HASH_NO_LOCK) == 0));
-       LASSERT(ergo((flags & CFS_HASH_REHASH_KEY) != 0, ops->hs_keycpy));
-
-       len = (flags & CFS_HASH_BIGNAME) == 0 ?
-             CFS_HASH_NAME_LEN : CFS_HASH_BIGNAME_LEN;
-       LIBCFS_ALLOC(hs, offsetof(struct cfs_hash, hs_name[len]));
-       if (!hs)
-               return NULL;
-
-       strlcpy(hs->hs_name, name, len);
-       hs->hs_flags = flags;
-
-       atomic_set(&hs->hs_refcount, 1);
-       atomic_set(&hs->hs_count, 0);
-
-       cfs_hash_lock_setup(hs);
-       cfs_hash_hlist_setup(hs);
-
-       hs->hs_cur_bits = (__u8)cur_bits;
-       hs->hs_min_bits = (__u8)cur_bits;
-       hs->hs_max_bits = (__u8)max_bits;
-       hs->hs_bkt_bits = (__u8)bkt_bits;
-
-       hs->hs_ops         = ops;
-       hs->hs_extra_bytes = extra_bytes;
-       hs->hs_rehash_bits = 0;
-       cfs_wi_init(&hs->hs_rehash_wi, hs, cfs_hash_rehash_worker);
-       cfs_hash_depth_wi_init(hs);
-
-       if (cfs_hash_with_rehash(hs))
-               __cfs_hash_set_theta(hs, min_theta, max_theta);
-
-       hs->hs_buckets = cfs_hash_buckets_realloc(hs, NULL, 0,
-                                                 CFS_HASH_NBKT(hs));
-       if (hs->hs_buckets)
-               return hs;
-
-       LIBCFS_FREE(hs, offsetof(struct cfs_hash, hs_name[len]));
-       return NULL;
-}
-EXPORT_SYMBOL(cfs_hash_create);
-
-/**
- * Cleanup libcfs hash @hs.
- */
-static void
-cfs_hash_destroy(struct cfs_hash *hs)
-{
-       struct hlist_node *hnode;
-       struct hlist_node *pos;
-       struct cfs_hash_bd bd;
-       int i;
-
-       LASSERT(hs);
-       LASSERT(!cfs_hash_is_exiting(hs) &&
-               !cfs_hash_is_iterating(hs));
-
-       /**
-        * prohibit further rehashes, don't need any lock because
-        * I'm the only (last) one can change it.
-        */
-       hs->hs_exiting = 1;
-       if (cfs_hash_with_rehash(hs))
-               cfs_hash_rehash_cancel(hs);
-
-       cfs_hash_depth_wi_cancel(hs);
-       /* rehash should be done/canceled */
-       LASSERT(hs->hs_buckets && !hs->hs_rehash_buckets);
-
-       cfs_hash_for_each_bucket(hs, &bd, i) {
-               struct hlist_head *hhead;
-
-               LASSERT(bd.bd_bucket);
-               /* no need to take this lock, just for consistent code */
-               cfs_hash_bd_lock(hs, &bd, 1);
-
-               cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
-                       hlist_for_each_safe(hnode, pos, hhead) {
-                               LASSERTF(!cfs_hash_with_assert_empty(hs),
-                                        "hash %s bucket %u(%u) is not empty: %u items left\n",
-                                        hs->hs_name, bd.bd_bucket->hsb_index,
-                                        bd.bd_offset, bd.bd_bucket->hsb_count);
-                               /* can't assert key valicate, because we
-                                * can interrupt rehash
-                                */
-                               cfs_hash_bd_del_locked(hs, &bd, hnode);
-                               cfs_hash_exit(hs, hnode);
-                       }
-               }
-               LASSERT(bd.bd_bucket->hsb_count == 0);
-               cfs_hash_bd_unlock(hs, &bd, 1);
-               cond_resched();
-       }
-
-       LASSERT(atomic_read(&hs->hs_count) == 0);
-
-       cfs_hash_buckets_free(hs->hs_buckets, cfs_hash_bkt_size(hs),
-                             0, CFS_HASH_NBKT(hs));
-       i = cfs_hash_with_bigname(hs) ?
-           CFS_HASH_BIGNAME_LEN : CFS_HASH_NAME_LEN;
-       LIBCFS_FREE(hs, offsetof(struct cfs_hash, hs_name[i]));
-}
-
-struct cfs_hash *cfs_hash_getref(struct cfs_hash *hs)
-{
-       if (atomic_inc_not_zero(&hs->hs_refcount))
-               return hs;
-       return NULL;
-}
-EXPORT_SYMBOL(cfs_hash_getref);
-
-void cfs_hash_putref(struct cfs_hash *hs)
-{
-       if (atomic_dec_and_test(&hs->hs_refcount))
-               cfs_hash_destroy(hs);
-}
-EXPORT_SYMBOL(cfs_hash_putref);
-
-static inline int
-cfs_hash_rehash_bits(struct cfs_hash *hs)
-{
-       if (cfs_hash_with_no_lock(hs) ||
-           !cfs_hash_with_rehash(hs))
-               return -EOPNOTSUPP;
-
-       if (unlikely(cfs_hash_is_exiting(hs)))
-               return -ESRCH;
-
-       if (unlikely(cfs_hash_is_rehashing(hs)))
-               return -EALREADY;
-
-       if (unlikely(cfs_hash_is_iterating(hs)))
-               return -EAGAIN;
-
-       /* XXX: need to handle case with max_theta != 2.0
-        *      and the case with min_theta != 0.5
-        */
-       if ((hs->hs_cur_bits < hs->hs_max_bits) &&
-           (__cfs_hash_theta(hs) > hs->hs_max_theta))
-               return hs->hs_cur_bits + 1;
-
-       if (!cfs_hash_with_shrink(hs))
-               return 0;
-
-       if ((hs->hs_cur_bits > hs->hs_min_bits) &&
-           (__cfs_hash_theta(hs) < hs->hs_min_theta))
-               return hs->hs_cur_bits - 1;
-
-       return 0;
-}
-
-/**
- * don't allow inline rehash if:
- * - user wants non-blocking change (add/del) on hash table
- * - too many elements
- */
-static inline int
-cfs_hash_rehash_inline(struct cfs_hash *hs)
-{
-       return !cfs_hash_with_nblk_change(hs) &&
-              atomic_read(&hs->hs_count) < CFS_HASH_LOOP_HOG;
-}
-
-/**
- * Add item @hnode to libcfs hash @hs using @key.  The registered
- * ops->hs_get function will be called when the item is added.
- */
-void
-cfs_hash_add(struct cfs_hash *hs, const void *key, struct hlist_node *hnode)
-{
-       struct cfs_hash_bd bd;
-       int bits;
-
-       LASSERT(hlist_unhashed(hnode));
-
-       cfs_hash_lock(hs, 0);
-       cfs_hash_bd_get_and_lock(hs, key, &bd, 1);
-
-       cfs_hash_key_validate(hs, key, hnode);
-       cfs_hash_bd_add_locked(hs, &bd, hnode);
-
-       cfs_hash_bd_unlock(hs, &bd, 1);
-
-       bits = cfs_hash_rehash_bits(hs);
-       cfs_hash_unlock(hs, 0);
-       if (bits > 0)
-               cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
-}
-EXPORT_SYMBOL(cfs_hash_add);
-
-static struct hlist_node *
-cfs_hash_find_or_add(struct cfs_hash *hs, const void *key,
-                    struct hlist_node *hnode, int noref)
-{
-       struct hlist_node *ehnode;
-       struct cfs_hash_bd bds[2];
-       int bits = 0;
-
-       LASSERT(hlist_unhashed(hnode));
-
-       cfs_hash_lock(hs, 0);
-       cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1);
-
-       cfs_hash_key_validate(hs, key, hnode);
-       ehnode = cfs_hash_dual_bd_findadd_locked(hs, bds, key,
-                                                hnode, noref);
-       cfs_hash_dual_bd_unlock(hs, bds, 1);
-
-       if (ehnode == hnode)    /* new item added */
-               bits = cfs_hash_rehash_bits(hs);
-       cfs_hash_unlock(hs, 0);
-       if (bits > 0)
-               cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
-
-       return ehnode;
-}
-
-/**
- * Add item @hnode to libcfs hash @hs using @key.  The registered
- * ops->hs_get function will be called if the item was added.
- * Returns 0 on success or -EALREADY on key collisions.
- */
-int
-cfs_hash_add_unique(struct cfs_hash *hs, const void *key,
-                   struct hlist_node *hnode)
-{
-       return cfs_hash_find_or_add(hs, key, hnode, 1) != hnode ?
-              -EALREADY : 0;
-}
-EXPORT_SYMBOL(cfs_hash_add_unique);
-
-/**
- * Add item @hnode to libcfs hash @hs using @key.  If this @key
- * already exists in the hash then ops->hs_get will be called on the
- * conflicting entry and that entry will be returned to the caller.
- * Otherwise ops->hs_get is called on the item which was added.
- */
-void *
-cfs_hash_findadd_unique(struct cfs_hash *hs, const void *key,
-                       struct hlist_node *hnode)
-{
-       hnode = cfs_hash_find_or_add(hs, key, hnode, 0);
-
-       return cfs_hash_object(hs, hnode);
-}
-EXPORT_SYMBOL(cfs_hash_findadd_unique);
-
-/**
- * Delete item @hnode from the libcfs hash @hs using @key.  The @key
- * is required to ensure the correct hash bucket is locked since there
- * is no direct linkage from the item to the bucket.  The object
- * removed from the hash will be returned and obs->hs_put is called
- * on the removed object.
- */
-void *
-cfs_hash_del(struct cfs_hash *hs, const void *key, struct hlist_node *hnode)
-{
-       void *obj = NULL;
-       int bits = 0;
-       struct cfs_hash_bd bds[2];
-
-       cfs_hash_lock(hs, 0);
-       cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1);
-
-       /* NB: do nothing if @hnode is not in hash table */
-       if (!hnode || !hlist_unhashed(hnode)) {
-               if (!bds[1].bd_bucket && hnode) {
-                       cfs_hash_bd_del_locked(hs, &bds[0], hnode);
-               } else {
-                       hnode = cfs_hash_dual_bd_finddel_locked(hs, bds,
-                                                               key, hnode);
-               }
-       }
-
-       if (hnode) {
-               obj  = cfs_hash_object(hs, hnode);
-               bits = cfs_hash_rehash_bits(hs);
-       }
-
-       cfs_hash_dual_bd_unlock(hs, bds, 1);
-       cfs_hash_unlock(hs, 0);
-       if (bits > 0)
-               cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
-
-       return obj;
-}
-EXPORT_SYMBOL(cfs_hash_del);
-
-/**
- * Delete item given @key in libcfs hash @hs.  The first @key found in
- * the hash will be removed, if the key exists multiple times in the hash
- * @hs this function must be called once per key.  The removed object
- * will be returned and ops->hs_put is called on the removed object.
- */
-void *
-cfs_hash_del_key(struct cfs_hash *hs, const void *key)
-{
-       return cfs_hash_del(hs, key, NULL);
-}
-EXPORT_SYMBOL(cfs_hash_del_key);
-
-/**
- * Lookup an item using @key in the libcfs hash @hs and return it.
- * If the @key is found in the hash hs->hs_get() is called and the
- * matching objects is returned.  It is the callers responsibility
- * to call the counterpart ops->hs_put using the cfs_hash_put() macro
- * when when finished with the object.  If the @key was not found
- * in the hash @hs NULL is returned.
- */
-void *
-cfs_hash_lookup(struct cfs_hash *hs, const void *key)
-{
-       void *obj = NULL;
-       struct hlist_node *hnode;
-       struct cfs_hash_bd bds[2];
-
-       cfs_hash_lock(hs, 0);
-       cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0);
-
-       hnode = cfs_hash_dual_bd_lookup_locked(hs, bds, key);
-       if (hnode)
-               obj = cfs_hash_object(hs, hnode);
-
-       cfs_hash_dual_bd_unlock(hs, bds, 0);
-       cfs_hash_unlock(hs, 0);
-
-       return obj;
-}
-EXPORT_SYMBOL(cfs_hash_lookup);
-
-static void
-cfs_hash_for_each_enter(struct cfs_hash *hs)
-{
-       LASSERT(!cfs_hash_is_exiting(hs));
-
-       if (!cfs_hash_with_rehash(hs))
-               return;
-       /*
-        * NB: it's race on cfs_has_t::hs_iterating, but doesn't matter
-        * because it's just an unreliable signal to rehash-thread,
-        * rehash-thread will try to finish rehash ASAP when seeing this.
-        */
-       hs->hs_iterating = 1;
-
-       cfs_hash_lock(hs, 1);
-       hs->hs_iterators++;
-
-       /* NB: iteration is mostly called by service thread,
-        * we tend to cancel pending rehash-request, instead of
-        * blocking service thread, we will relaunch rehash request
-        * after iteration
-        */
-       if (cfs_hash_is_rehashing(hs))
-               cfs_hash_rehash_cancel_locked(hs);
-       cfs_hash_unlock(hs, 1);
-}
-
-static void
-cfs_hash_for_each_exit(struct cfs_hash *hs)
-{
-       int remained;
-       int bits;
-
-       if (!cfs_hash_with_rehash(hs))
-               return;
-       cfs_hash_lock(hs, 1);
-       remained = --hs->hs_iterators;
-       bits = cfs_hash_rehash_bits(hs);
-       cfs_hash_unlock(hs, 1);
-       /* NB: it's race on cfs_has_t::hs_iterating, see above */
-       if (remained == 0)
-               hs->hs_iterating = 0;
-       if (bits > 0) {
-               cfs_hash_rehash(hs, atomic_read(&hs->hs_count) <
-                                   CFS_HASH_LOOP_HOG);
-       }
-}
-
-/**
- * For each item in the libcfs hash @hs call the passed callback @func
- * and pass to it as an argument each hash item and the private @data.
- *
- * a) the function may sleep!
- * b) during the callback:
- *    . the bucket lock is held so the callback must never sleep.
- *    . if @removal_safe is true, use can remove current item by
- *      cfs_hash_bd_del_locked
- */
-static __u64
-cfs_hash_for_each_tight(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
-                       void *data, int remove_safe)
-{
-       struct hlist_node *hnode;
-       struct hlist_node *pos;
-       struct cfs_hash_bd bd;
-       __u64 count = 0;
-       int excl = !!remove_safe;
-       int loop = 0;
-       int i;
-
-       cfs_hash_for_each_enter(hs);
-
-       cfs_hash_lock(hs, 0);
-       LASSERT(!cfs_hash_is_rehashing(hs));
-
-       cfs_hash_for_each_bucket(hs, &bd, i) {
-               struct hlist_head *hhead;
-
-               cfs_hash_bd_lock(hs, &bd, excl);
-               if (!func) { /* only glimpse size */
-                       count += bd.bd_bucket->hsb_count;
-                       cfs_hash_bd_unlock(hs, &bd, excl);
-                       continue;
-               }
-
-               cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
-                       hlist_for_each_safe(hnode, pos, hhead) {
-                               cfs_hash_bucket_validate(hs, &bd, hnode);
-                               count++;
-                               loop++;
-                               if (func(hs, &bd, hnode, data)) {
-                                       cfs_hash_bd_unlock(hs, &bd, excl);
-                                       goto out;
-                               }
-                       }
-               }
-               cfs_hash_bd_unlock(hs, &bd, excl);
-               if (loop < CFS_HASH_LOOP_HOG)
-                       continue;
-               loop = 0;
-               cfs_hash_unlock(hs, 0);
-               cond_resched();
-               cfs_hash_lock(hs, 0);
-       }
- out:
-       cfs_hash_unlock(hs, 0);
-
-       cfs_hash_for_each_exit(hs);
-       return count;
-}
-
-struct cfs_hash_cond_arg {
-       cfs_hash_cond_opt_cb_t  func;
-       void                    *arg;
-};
-
-static int
-cfs_hash_cond_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-                        struct hlist_node *hnode, void *data)
-{
-       struct cfs_hash_cond_arg *cond = data;
-
-       if (cond->func(cfs_hash_object(hs, hnode), cond->arg))
-               cfs_hash_bd_del_locked(hs, bd, hnode);
-       return 0;
-}
-
-/**
- * Delete item from the libcfs hash @hs when @func return true.
- * The write lock being hold during loop for each bucket to avoid
- * any object be reference.
- */
-void
-cfs_hash_cond_del(struct cfs_hash *hs, cfs_hash_cond_opt_cb_t func, void *data)
-{
-       struct cfs_hash_cond_arg arg = {
-               .func   = func,
-               .arg    = data,
-       };
-
-       cfs_hash_for_each_tight(hs, cfs_hash_cond_del_locked, &arg, 1);
-}
-EXPORT_SYMBOL(cfs_hash_cond_del);
-
-void
-cfs_hash_for_each(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
-                 void *data)
-{
-       cfs_hash_for_each_tight(hs, func, data, 0);
-}
-EXPORT_SYMBOL(cfs_hash_for_each);
-
-void
-cfs_hash_for_each_safe(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
-                      void *data)
-{
-       cfs_hash_for_each_tight(hs, func, data, 1);
-}
-EXPORT_SYMBOL(cfs_hash_for_each_safe);
-
-static int
-cfs_hash_peek(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-             struct hlist_node *hnode, void *data)
-{
-       *(int *)data = 0;
-       return 1; /* return 1 to break the loop */
-}
-
-int
-cfs_hash_is_empty(struct cfs_hash *hs)
-{
-       int empty = 1;
-
-       cfs_hash_for_each_tight(hs, cfs_hash_peek, &empty, 0);
-       return empty;
-}
-EXPORT_SYMBOL(cfs_hash_is_empty);
-
-__u64
-cfs_hash_size_get(struct cfs_hash *hs)
-{
-       return cfs_hash_with_counter(hs) ?
-              atomic_read(&hs->hs_count) :
-              cfs_hash_for_each_tight(hs, NULL, NULL, 0);
-}
-EXPORT_SYMBOL(cfs_hash_size_get);
-
-/*
- * cfs_hash_for_each_relax:
- * Iterate the hash table and call @func on each item without
- * any lock. This function can't guarantee to finish iteration
- * if these features are enabled:
- *
- *  a. if rehash_key is enabled, an item can be moved from
- *     one bucket to another bucket
- *  b. user can remove non-zero-ref item from hash-table,
- *     so the item can be removed from hash-table, even worse,
- *     it's possible that user changed key and insert to another
- *     hash bucket.
- * there's no way for us to finish iteration correctly on previous
- * two cases, so iteration has to be stopped on change.
- */
-static int
-cfs_hash_for_each_relax(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
-                       void *data)
-{
-       struct hlist_node *hnode;
-       struct hlist_node *tmp;
-       struct cfs_hash_bd bd;
-       __u32 version;
-       int count = 0;
-       int stop_on_change;
-       int rc;
-       int i;
-
-       stop_on_change = cfs_hash_with_rehash_key(hs) ||
-                        !cfs_hash_with_no_itemref(hs) ||
-                        !hs->hs_ops->hs_put_locked;
-       cfs_hash_lock(hs, 0);
-       LASSERT(!cfs_hash_is_rehashing(hs));
-
-       cfs_hash_for_each_bucket(hs, &bd, i) {
-               struct hlist_head *hhead;
-
-               cfs_hash_bd_lock(hs, &bd, 0);
-               version = cfs_hash_bd_version_get(&bd);
-
-               cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
-                       for (hnode = hhead->first; hnode;) {
-                               cfs_hash_bucket_validate(hs, &bd, hnode);
-                               cfs_hash_get(hs, hnode);
-                               cfs_hash_bd_unlock(hs, &bd, 0);
-                               cfs_hash_unlock(hs, 0);
-
-                               rc = func(hs, &bd, hnode, data);
-                               if (stop_on_change)
-                                       cfs_hash_put(hs, hnode);
-                               cond_resched();
-                               count++;
-
-                               cfs_hash_lock(hs, 0);
-                               cfs_hash_bd_lock(hs, &bd, 0);
-                               if (!stop_on_change) {
-                                       tmp = hnode->next;
-                                       cfs_hash_put_locked(hs, hnode);
-                                       hnode = tmp;
-                               } else { /* bucket changed? */
-                                       if (version !=
-                                           cfs_hash_bd_version_get(&bd))
-                                               break;
-                                       /* safe to continue because no change */
-                                       hnode = hnode->next;
-                               }
-                               if (rc) /* callback wants to break iteration */
-                                       break;
-                       }
-                       if (rc) /* callback wants to break iteration */
-                               break;
-               }
-               cfs_hash_bd_unlock(hs, &bd, 0);
-               if (rc) /* callback wants to break iteration */
-                       break;
-       }
-       cfs_hash_unlock(hs, 0);
-
-       return count;
-}
-
-int
-cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
-                        void *data)
-{
-       if (cfs_hash_with_no_lock(hs) ||
-           cfs_hash_with_rehash_key(hs) ||
-           !cfs_hash_with_no_itemref(hs))
-               return -EOPNOTSUPP;
-
-       if (!hs->hs_ops->hs_get ||
-           (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked))
-               return -EOPNOTSUPP;
-
-       cfs_hash_for_each_enter(hs);
-       cfs_hash_for_each_relax(hs, func, data);
-       cfs_hash_for_each_exit(hs);
-
-       return 0;
-}
-EXPORT_SYMBOL(cfs_hash_for_each_nolock);
-
-/**
- * For each hash bucket in the libcfs hash @hs call the passed callback
- * @func until all the hash buckets are empty.  The passed callback @func
- * or the previously registered callback hs->hs_put must remove the item
- * from the hash.  You may either use the cfs_hash_del() or hlist_del()
- * functions.  No rwlocks will be held during the callback @func it is
- * safe to sleep if needed.  This function will not terminate until the
- * hash is empty.  Note it is still possible to concurrently add new
- * items in to the hash.  It is the callers responsibility to ensure
- * the required locking is in place to prevent concurrent insertions.
- */
-int
-cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
-                       void *data)
-{
-       unsigned i = 0;
-
-       if (cfs_hash_with_no_lock(hs))
-               return -EOPNOTSUPP;
-
-       if (!hs->hs_ops->hs_get ||
-           (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked))
-               return -EOPNOTSUPP;
-
-       cfs_hash_for_each_enter(hs);
-       while (cfs_hash_for_each_relax(hs, func, data)) {
-               CDEBUG(D_INFO, "Try to empty hash: %s, loop: %u\n",
-                      hs->hs_name, i++);
-       }
-       cfs_hash_for_each_exit(hs);
-       return 0;
-}
-EXPORT_SYMBOL(cfs_hash_for_each_empty);
-
-void
-cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned hindex,
-                       cfs_hash_for_each_cb_t func, void *data)
-{
-       struct hlist_head *hhead;
-       struct hlist_node *hnode;
-       struct cfs_hash_bd bd;
-
-       cfs_hash_for_each_enter(hs);
-       cfs_hash_lock(hs, 0);
-       if (hindex >= CFS_HASH_NHLIST(hs))
-               goto out;
-
-       cfs_hash_bd_index_set(hs, hindex, &bd);
-
-       cfs_hash_bd_lock(hs, &bd, 0);
-       hhead = cfs_hash_bd_hhead(hs, &bd);
-       hlist_for_each(hnode, hhead) {
-               if (func(hs, &bd, hnode, data))
-                       break;
-       }
-       cfs_hash_bd_unlock(hs, &bd, 0);
-out:
-       cfs_hash_unlock(hs, 0);
-       cfs_hash_for_each_exit(hs);
-}
-EXPORT_SYMBOL(cfs_hash_hlist_for_each);
-
-/*
- * For each item in the libcfs hash @hs which matches the @key call
- * the passed callback @func and pass to it as an argument each hash
- * item and the private @data. During the callback the bucket lock
- * is held so the callback must never sleep.
-   */
-void
-cfs_hash_for_each_key(struct cfs_hash *hs, const void *key,
-                     cfs_hash_for_each_cb_t func, void *data)
-{
-       struct hlist_node *hnode;
-       struct cfs_hash_bd bds[2];
-       unsigned int i;
-
-       cfs_hash_lock(hs, 0);
-
-       cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0);
-
-       cfs_hash_for_each_bd(bds, 2, i) {
-               struct hlist_head *hlist = cfs_hash_bd_hhead(hs, &bds[i]);
-
-               hlist_for_each(hnode, hlist) {
-                       cfs_hash_bucket_validate(hs, &bds[i], hnode);
-
-                       if (cfs_hash_keycmp(hs, key, hnode)) {
-                               if (func(hs, &bds[i], hnode, data))
-                                       break;
-                       }
-               }
-       }
-
-       cfs_hash_dual_bd_unlock(hs, bds, 0);
-       cfs_hash_unlock(hs, 0);
-}
-EXPORT_SYMBOL(cfs_hash_for_each_key);
-
-/**
- * Rehash the libcfs hash @hs to the given @bits.  This can be used
- * to grow the hash size when excessive chaining is detected, or to
- * shrink the hash when it is larger than needed.  When the CFS_HASH_REHASH
- * flag is set in @hs the libcfs hash may be dynamically rehashed
- * during addition or removal if the hash's theta value exceeds
- * either the hs->hs_min_theta or hs->max_theta values.  By default
- * these values are tuned to keep the chained hash depth small, and
- * this approach assumes a reasonably uniform hashing function.  The
- * theta thresholds for @hs are tunable via cfs_hash_set_theta().
- */
-void
-cfs_hash_rehash_cancel_locked(struct cfs_hash *hs)
-{
-       int i;
-
-       /* need hold cfs_hash_lock(hs, 1) */
-       LASSERT(cfs_hash_with_rehash(hs) &&
-               !cfs_hash_with_no_lock(hs));
-
-       if (!cfs_hash_is_rehashing(hs))
-               return;
-
-       if (cfs_wi_deschedule(cfs_sched_rehash, &hs->hs_rehash_wi)) {
-               hs->hs_rehash_bits = 0;
-               return;
-       }
-
-       for (i = 2; cfs_hash_is_rehashing(hs); i++) {
-               cfs_hash_unlock(hs, 1);
-               /* raise console warning while waiting too long */
-               CDEBUG(is_power_of_2(i >> 3) ? D_WARNING : D_INFO,
-                      "hash %s is still rehashing, rescheded %d\n",
-                      hs->hs_name, i - 1);
-               cond_resched();
-               cfs_hash_lock(hs, 1);
-       }
-}
-
-void
-cfs_hash_rehash_cancel(struct cfs_hash *hs)
-{
-       cfs_hash_lock(hs, 1);
-       cfs_hash_rehash_cancel_locked(hs);
-       cfs_hash_unlock(hs, 1);
-}
-
-int
-cfs_hash_rehash(struct cfs_hash *hs, int do_rehash)
-{
-       int rc;
-
-       LASSERT(cfs_hash_with_rehash(hs) && !cfs_hash_with_no_lock(hs));
-
-       cfs_hash_lock(hs, 1);
-
-       rc = cfs_hash_rehash_bits(hs);
-       if (rc <= 0) {
-               cfs_hash_unlock(hs, 1);
-               return rc;
-       }
-
-       hs->hs_rehash_bits = rc;
-       if (!do_rehash) {
-               /* launch and return */
-               cfs_wi_schedule(cfs_sched_rehash, &hs->hs_rehash_wi);
-               cfs_hash_unlock(hs, 1);
-               return 0;
-       }
-
-       /* rehash right now */
-       cfs_hash_unlock(hs, 1);
-
-       return cfs_hash_rehash_worker(&hs->hs_rehash_wi);
-}
-
-static int
-cfs_hash_rehash_bd(struct cfs_hash *hs, struct cfs_hash_bd *old)
-{
-       struct cfs_hash_bd new;
-       struct hlist_head *hhead;
-       struct hlist_node *hnode;
-       struct hlist_node *pos;
-       void *key;
-       int c = 0;
-
-       /* hold cfs_hash_lock(hs, 1), so don't need any bucket lock */
-       cfs_hash_bd_for_each_hlist(hs, old, hhead) {
-               hlist_for_each_safe(hnode, pos, hhead) {
-                       key = cfs_hash_key(hs, hnode);
-                       LASSERT(key);
-                       /* Validate hnode is in the correct bucket. */
-                       cfs_hash_bucket_validate(hs, old, hnode);
-                       /*
-                        * Delete from old hash bucket; move to new bucket.
-                        * ops->hs_key must be defined.
-                        */
-                       cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
-                                            hs->hs_rehash_bits, key, &new);
-                       cfs_hash_bd_move_locked(hs, old, &new, hnode);
-                       c++;
-               }
-       }
-
-       return c;
-}
-
-static int
-cfs_hash_rehash_worker(cfs_workitem_t *wi)
-{
-       struct cfs_hash *hs = container_of(wi, struct cfs_hash, hs_rehash_wi);
-       struct cfs_hash_bucket **bkts;
-       struct cfs_hash_bd bd;
-       unsigned int old_size;
-       unsigned int new_size;
-       int bsize;
-       int count = 0;
-       int rc = 0;
-       int i;
-
-       LASSERT(hs && cfs_hash_with_rehash(hs));
-
-       cfs_hash_lock(hs, 0);
-       LASSERT(cfs_hash_is_rehashing(hs));
-
-       old_size = CFS_HASH_NBKT(hs);
-       new_size = CFS_HASH_RH_NBKT(hs);
-
-       cfs_hash_unlock(hs, 0);
-
-       /*
-        * don't need hs::hs_rwlock for hs::hs_buckets,
-        * because nobody can change bkt-table except me.
-        */
-       bkts = cfs_hash_buckets_realloc(hs, hs->hs_buckets,
-                                       old_size, new_size);
-       cfs_hash_lock(hs, 1);
-       if (!bkts) {
-               rc = -ENOMEM;
-               goto out;
-       }
-
-       if (bkts == hs->hs_buckets) {
-               bkts = NULL; /* do nothing */
-               goto out;
-       }
-
-       rc = __cfs_hash_theta(hs);
-       if ((rc >= hs->hs_min_theta) && (rc <= hs->hs_max_theta)) {
-               /* free the new allocated bkt-table */
-               old_size = new_size;
-               new_size = CFS_HASH_NBKT(hs);
-               rc = -EALREADY;
-               goto out;
-       }
-
-       LASSERT(!hs->hs_rehash_buckets);
-       hs->hs_rehash_buckets = bkts;
-
-       rc = 0;
-       cfs_hash_for_each_bucket(hs, &bd, i) {
-               if (cfs_hash_is_exiting(hs)) {
-                       rc = -ESRCH;
-                       /* someone wants to destroy the hash, abort now */
-                       if (old_size < new_size) /* OK to free old bkt-table */
-                               break;
-                       /* it's shrinking, need free new bkt-table */
-                       hs->hs_rehash_buckets = NULL;
-                       old_size = new_size;
-                       new_size = CFS_HASH_NBKT(hs);
-                       goto out;
-               }
-
-               count += cfs_hash_rehash_bd(hs, &bd);
-               if (count < CFS_HASH_LOOP_HOG ||
-                   cfs_hash_is_iterating(hs)) { /* need to finish ASAP */
-                       continue;
-               }
-
-               count = 0;
-               cfs_hash_unlock(hs, 1);
-               cond_resched();
-               cfs_hash_lock(hs, 1);
-       }
-
-       hs->hs_rehash_count++;
-
-       bkts = hs->hs_buckets;
-       hs->hs_buckets = hs->hs_rehash_buckets;
-       hs->hs_rehash_buckets = NULL;
-
-       hs->hs_cur_bits = hs->hs_rehash_bits;
-out:
-       hs->hs_rehash_bits = 0;
-       if (rc == -ESRCH) /* never be scheduled again */
-               cfs_wi_exit(cfs_sched_rehash, wi);
-       bsize = cfs_hash_bkt_size(hs);
-       cfs_hash_unlock(hs, 1);
-       /* can't refer to @hs anymore because it could be destroyed */
-       if (bkts)
-               cfs_hash_buckets_free(bkts, bsize, new_size, old_size);
-       if (rc != 0)
-               CDEBUG(D_INFO, "early quit of rehashing: %d\n", rc);
-       /* return 1 only if cfs_wi_exit is called */
-       return rc == -ESRCH;
-}
-
-/**
- * Rehash the object referenced by @hnode in the libcfs hash @hs.  The
- * @old_key must be provided to locate the objects previous location
- * in the hash, and the @new_key will be used to reinsert the object.
- * Use this function instead of a cfs_hash_add() + cfs_hash_del()
- * combo when it is critical that there is no window in time where the
- * object is missing from the hash.  When an object is being rehashed
- * the registered cfs_hash_get() and cfs_hash_put() functions will
- * not be called.
- */
-void cfs_hash_rehash_key(struct cfs_hash *hs, const void *old_key,
-                        void *new_key, struct hlist_node *hnode)
-{
-       struct cfs_hash_bd bds[3];
-       struct cfs_hash_bd old_bds[2];
-       struct cfs_hash_bd new_bd;
-
-       LASSERT(!hlist_unhashed(hnode));
-
-       cfs_hash_lock(hs, 0);
-
-       cfs_hash_dual_bd_get(hs, old_key, old_bds);
-       cfs_hash_bd_get(hs, new_key, &new_bd);
-
-       bds[0] = old_bds[0];
-       bds[1] = old_bds[1];
-       bds[2] = new_bd;
-
-       /* NB: bds[0] and bds[1] are ordered already */
-       cfs_hash_bd_order(&bds[1], &bds[2]);
-       cfs_hash_bd_order(&bds[0], &bds[1]);
-
-       cfs_hash_multi_bd_lock(hs, bds, 3, 1);
-       if (likely(!old_bds[1].bd_bucket)) {
-               cfs_hash_bd_move_locked(hs, &old_bds[0], &new_bd, hnode);
-       } else {
-               cfs_hash_dual_bd_finddel_locked(hs, old_bds, old_key, hnode);
-               cfs_hash_bd_add_locked(hs, &new_bd, hnode);
-       }
-       /* overwrite key inside locks, otherwise may screw up with
-        * other operations, i.e: rehash
-        */
-       cfs_hash_keycpy(hs, hnode, new_key);
-
-       cfs_hash_multi_bd_unlock(hs, bds, 3, 1);
-       cfs_hash_unlock(hs, 0);
-}
-EXPORT_SYMBOL(cfs_hash_rehash_key);
-
-void cfs_hash_debug_header(struct seq_file *m)
-{
-       seq_printf(m, "%-*s   cur   min   max theta t-min t-max flags rehash   count  maxdep maxdepb distribution\n",
-                  CFS_HASH_BIGNAME_LEN, "name");
-}
-EXPORT_SYMBOL(cfs_hash_debug_header);
-
-static struct cfs_hash_bucket **
-cfs_hash_full_bkts(struct cfs_hash *hs)
-{
-       /* NB: caller should hold hs->hs_rwlock if REHASH is set */
-       if (!hs->hs_rehash_buckets)
-               return hs->hs_buckets;
-
-       LASSERT(hs->hs_rehash_bits != 0);
-       return hs->hs_rehash_bits > hs->hs_cur_bits ?
-              hs->hs_rehash_buckets : hs->hs_buckets;
-}
-
-static unsigned int
-cfs_hash_full_nbkt(struct cfs_hash *hs)
-{
-       /* NB: caller should hold hs->hs_rwlock if REHASH is set */
-       if (!hs->hs_rehash_buckets)
-               return CFS_HASH_NBKT(hs);
-
-       LASSERT(hs->hs_rehash_bits != 0);
-       return hs->hs_rehash_bits > hs->hs_cur_bits ?
-              CFS_HASH_RH_NBKT(hs) : CFS_HASH_NBKT(hs);
-}
-
-void cfs_hash_debug_str(struct cfs_hash *hs, struct seq_file *m)
-{
-       int dist[8] = { 0, };
-       int maxdep = -1;
-       int maxdepb = -1;
-       int total = 0;
-       int theta;
-       int i;
-
-       cfs_hash_lock(hs, 0);
-       theta = __cfs_hash_theta(hs);
-
-       seq_printf(m, "%-*s %5d %5d %5d %d.%03d %d.%03d %d.%03d  0x%02x %6d ",
-                  CFS_HASH_BIGNAME_LEN, hs->hs_name,
-                  1 << hs->hs_cur_bits, 1 << hs->hs_min_bits,
-                  1 << hs->hs_max_bits,
-                  __cfs_hash_theta_int(theta), __cfs_hash_theta_frac(theta),
-                  __cfs_hash_theta_int(hs->hs_min_theta),
-                  __cfs_hash_theta_frac(hs->hs_min_theta),
-                  __cfs_hash_theta_int(hs->hs_max_theta),
-                  __cfs_hash_theta_frac(hs->hs_max_theta),
-                  hs->hs_flags, hs->hs_rehash_count);
-
-       /*
-        * The distribution is a summary of the chained hash depth in
-        * each of the libcfs hash buckets.  Each buckets hsb_count is
-        * divided by the hash theta value and used to generate a
-        * histogram of the hash distribution.  A uniform hash will
-        * result in all hash buckets being close to the average thus
-        * only the first few entries in the histogram will be non-zero.
-        * If you hash function results in a non-uniform hash the will
-        * be observable by outlier bucks in the distribution histogram.
-        *
-        * Uniform hash distribution:           128/128/0/0/0/0/0/0
-        * Non-Uniform hash distribution:       128/125/0/0/0/0/2/1
-        */
-       for (i = 0; i < cfs_hash_full_nbkt(hs); i++) {
-               struct cfs_hash_bd bd;
-
-               bd.bd_bucket = cfs_hash_full_bkts(hs)[i];
-               cfs_hash_bd_lock(hs, &bd, 0);
-               if (maxdep < bd.bd_bucket->hsb_depmax) {
-                       maxdep  = bd.bd_bucket->hsb_depmax;
-                       maxdepb = ffz(~maxdep);
-               }
-               total += bd.bd_bucket->hsb_count;
-               dist[min(fls(bd.bd_bucket->hsb_count / max(theta, 1)), 7)]++;
-               cfs_hash_bd_unlock(hs, &bd, 0);
-       }
-
-       seq_printf(m, "%7d %7d %7d ", total, maxdep, maxdepb);
-       for (i = 0; i < 8; i++)
-               seq_printf(m, "%d%c",  dist[i], (i == 7) ? '\n' : '/');
-
-       cfs_hash_unlock(hs, 0);
-}
-EXPORT_SYMBOL(cfs_hash_debug_str);
diff --git a/drivers/staging/lustre/lustre/libcfs/libcfs_cpu.c b/drivers/staging/lustre/lustre/libcfs/libcfs_cpu.c

deleted file mode 100644 (file)

index 33352af..0000000
--- a/drivers/staging/lustre/lustre/libcfs/libcfs_cpu.c
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Please see comments in libcfs/include/libcfs/libcfs_cpu.h for introduction
- *
- * Author: liang@whamcloud.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "../../include/linux/libcfs/libcfs.h"
-
-/** Global CPU partition table */
-struct cfs_cpt_table   *cfs_cpt_table __read_mostly;
-EXPORT_SYMBOL(cfs_cpt_table);
-
-#ifndef HAVE_LIBCFS_CPT
-
-#define CFS_CPU_VERSION_MAGIC     0xbabecafe
-
-struct cfs_cpt_table *
-cfs_cpt_table_alloc(unsigned int ncpt)
-{
-       struct cfs_cpt_table *cptab;
-
-       if (ncpt != 1) {
-               CERROR("Can't support cpu partition number %d\n", ncpt);
-               return NULL;
-       }
-
-       LIBCFS_ALLOC(cptab, sizeof(*cptab));
-       if (cptab) {
-               cptab->ctb_version = CFS_CPU_VERSION_MAGIC;
-               node_set(0, cptab->ctb_nodemask);
-               cptab->ctb_nparts  = ncpt;
-       }
-
-       return cptab;
-}
-EXPORT_SYMBOL(cfs_cpt_table_alloc);
-
-void
-cfs_cpt_table_free(struct cfs_cpt_table *cptab)
-{
-       LASSERT(cptab->ctb_version == CFS_CPU_VERSION_MAGIC);
-
-       LIBCFS_FREE(cptab, sizeof(*cptab));
-}
-EXPORT_SYMBOL(cfs_cpt_table_free);
-
-#ifdef CONFIG_SMP
-int
-cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
-{
-       int     rc;
-
-       rc = snprintf(buf, len, "%d\t: %d\n", 0, 0);
-       len -= rc;
-       if (len <= 0)
-               return -EFBIG;
-
-       return rc;
-}
-EXPORT_SYMBOL(cfs_cpt_table_print);
-#endif /* CONFIG_SMP */
-
-int
-cfs_cpt_number(struct cfs_cpt_table *cptab)
-{
-       return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_number);
-
-int
-cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
-{
-       return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_weight);
-
-int
-cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
-{
-       return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_online);
-
-nodemask_t *
-cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
-{
-       return &cptab->ctb_nodemask;
-}
-EXPORT_SYMBOL(cfs_cpt_cpumask);
-
-int
-cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
-{
-       return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_set_cpu);
-
-void
-cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
-{
-}
-EXPORT_SYMBOL(cfs_cpt_unset_cpu);
-
-int
-cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
-{
-       return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_set_cpumask);
-
-void
-cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
-{
-}
-EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
-
-int
-cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
-{
-       return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_set_node);
-
-void
-cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
-{
-}
-EXPORT_SYMBOL(cfs_cpt_unset_node);
-
-int
-cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
-{
-       return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_set_nodemask);
-
-void
-cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
-{
-}
-EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
-
-void
-cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
-{
-}
-EXPORT_SYMBOL(cfs_cpt_clear);
-
-int
-cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
-{
-       return 0;
-}
-EXPORT_SYMBOL(cfs_cpt_spread_node);
-
-int
-cfs_cpu_ht_nsiblings(int cpu)
-{
-       return 1;
-}
-EXPORT_SYMBOL(cfs_cpu_ht_nsiblings);
-
-int
-cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
-{
-       return 0;
-}
-EXPORT_SYMBOL(cfs_cpt_current);
-
-int
-cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
-{
-       return 0;
-}
-EXPORT_SYMBOL(cfs_cpt_of_cpu);
-
-int
-cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
-{
-       return 0;
-}
-EXPORT_SYMBOL(cfs_cpt_bind);
-
-void
-cfs_cpu_fini(void)
-{
-       if (cfs_cpt_table) {
-               cfs_cpt_table_free(cfs_cpt_table);
-               cfs_cpt_table = NULL;
-       }
-}
-
-int
-cfs_cpu_init(void)
-{
-       cfs_cpt_table = cfs_cpt_table_alloc(1);
-
-       return cfs_cpt_table ? 0 : -1;
-}
-
-#endif /* HAVE_LIBCFS_CPT */
diff --git a/drivers/staging/lustre/lustre/libcfs/libcfs_lock.c b/drivers/staging/lustre/lustre/libcfs/libcfs_lock.c

deleted file mode 100644 (file)

index 2de9eea..0000000
--- a/drivers/staging/lustre/lustre/libcfs/libcfs_lock.c
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Author: liang@whamcloud.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "../../include/linux/libcfs/libcfs.h"
-
-/** destroy cpu-partition lock, see libcfs_private.h for more detail */
-void
-cfs_percpt_lock_free(struct cfs_percpt_lock *pcl)
-{
-       LASSERT(pcl->pcl_locks);
-       LASSERT(!pcl->pcl_locked);
-
-       cfs_percpt_free(pcl->pcl_locks);
-       LIBCFS_FREE(pcl, sizeof(*pcl));
-}
-EXPORT_SYMBOL(cfs_percpt_lock_free);
-
-/**
- * create cpu-partition lock, see libcfs_private.h for more detail.
- *
- * cpu-partition lock is designed for large-scale SMP system, so we need to
- * reduce cacheline conflict as possible as we can, that's the
- * reason we always allocate cacheline-aligned memory block.
- */
-struct cfs_percpt_lock *
-cfs_percpt_lock_alloc(struct cfs_cpt_table *cptab)
-{
-       struct cfs_percpt_lock  *pcl;
-       spinlock_t              *lock;
-       int                     i;
-
-       /* NB: cptab can be NULL, pcl will be for HW CPUs on that case */
-       LIBCFS_ALLOC(pcl, sizeof(*pcl));
-       if (!pcl)
-               return NULL;
-
-       pcl->pcl_cptab = cptab;
-       pcl->pcl_locks = cfs_percpt_alloc(cptab, sizeof(*lock));
-       if (!pcl->pcl_locks) {
-               LIBCFS_FREE(pcl, sizeof(*pcl));
-               return NULL;
-       }
-
-       cfs_percpt_for_each(lock, i, pcl->pcl_locks)
-               spin_lock_init(lock);
-
-       return pcl;
-}
-EXPORT_SYMBOL(cfs_percpt_lock_alloc);
-
-/**
- * lock a CPU partition
- *
- * \a index != CFS_PERCPT_LOCK_EX
- *     hold private lock indexed by \a index
- *
- * \a index == CFS_PERCPT_LOCK_EX
- *     exclusively lock @pcl and nobody can take private lock
- */
-void
-cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index)
-       __acquires(pcl->pcl_locks)
-{
-       int     ncpt = cfs_cpt_number(pcl->pcl_cptab);
-       int     i;
-
-       LASSERT(index >= CFS_PERCPT_LOCK_EX && index < ncpt);
-
-       if (ncpt == 1) {
-               index = 0;
-       } else { /* serialize with exclusive lock */
-               while (pcl->pcl_locked)
-                       cpu_relax();
-       }
-
-       if (likely(index != CFS_PERCPT_LOCK_EX)) {
-               spin_lock(pcl->pcl_locks[index]);
-               return;
-       }
-
-       /* exclusive lock request */
-       for (i = 0; i < ncpt; i++) {
-               spin_lock(pcl->pcl_locks[i]);
-               if (i == 0) {
-                       LASSERT(!pcl->pcl_locked);
-                       /* nobody should take private lock after this
-                        * so I wouldn't starve for too long time
-                        */
-                       pcl->pcl_locked = 1;
-               }
-       }
-}
-EXPORT_SYMBOL(cfs_percpt_lock);
-
-/** unlock a CPU partition */
-void
-cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index)
-       __releases(pcl->pcl_locks)
-{
-       int     ncpt = cfs_cpt_number(pcl->pcl_cptab);
-       int     i;
-
-       index = ncpt == 1 ? 0 : index;
-
-       if (likely(index != CFS_PERCPT_LOCK_EX)) {
-               spin_unlock(pcl->pcl_locks[index]);
-               return;
-       }
-
-       for (i = ncpt - 1; i >= 0; i--) {
-               if (i == 0) {
-                       LASSERT(pcl->pcl_locked);
-                       pcl->pcl_locked = 0;
-               }
-               spin_unlock(pcl->pcl_locks[i]);
-       }
-}
-EXPORT_SYMBOL(cfs_percpt_unlock);
-
-/** free cpu-partition refcount */
-void
-cfs_percpt_atomic_free(atomic_t **refs)
-{
-       cfs_percpt_free(refs);
-}
-EXPORT_SYMBOL(cfs_percpt_atomic_free);
-
-/** allocate cpu-partition refcount with initial value @init_val */
-atomic_t **
-cfs_percpt_atomic_alloc(struct cfs_cpt_table *cptab, int init_val)
-{
-       atomic_t        **refs;
-       atomic_t        *ref;
-       int             i;
-
-       refs = cfs_percpt_alloc(cptab, sizeof(*ref));
-       if (!refs)
-               return NULL;
-
-       cfs_percpt_for_each(ref, i, refs)
-               atomic_set(ref, init_val);
-       return refs;
-}
-EXPORT_SYMBOL(cfs_percpt_atomic_alloc);
-
-/** return sum of cpu-partition refs */
-int
-cfs_percpt_atomic_summary(atomic_t **refs)
-{
-       atomic_t        *ref;
-       int             i;
-       int             val = 0;
-
-       cfs_percpt_for_each(ref, i, refs)
-               val += atomic_read(ref);
-
-       return val;
-}
-EXPORT_SYMBOL(cfs_percpt_atomic_summary);
diff --git a/drivers/staging/lustre/lustre/libcfs/libcfs_mem.c b/drivers/staging/lustre/lustre/libcfs/libcfs_mem.c

deleted file mode 100644 (file)

index c5a6951..0000000
--- a/drivers/staging/lustre/lustre/libcfs/libcfs_mem.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Author: liang@whamcloud.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "../../include/linux/libcfs/libcfs.h"
-
-struct cfs_var_array {
-       unsigned int            va_count;       /* # of buffers */
-       unsigned int            va_size;        /* size of each var */
-       struct cfs_cpt_table    *va_cptab;      /* cpu partition table */
-       void                    *va_ptrs[0];    /* buffer addresses */
-};
-
-/*
- * free per-cpu data, see more detail in cfs_percpt_free
- */
-void
-cfs_percpt_free(void *vars)
-{
-       struct  cfs_var_array *arr;
-       int     i;
-
-       arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
-
-       for (i = 0; i < arr->va_count; i++) {
-               if (arr->va_ptrs[i])
-                       LIBCFS_FREE(arr->va_ptrs[i], arr->va_size);
-       }
-
-       LIBCFS_FREE(arr, offsetof(struct cfs_var_array,
-                                 va_ptrs[arr->va_count]));
-}
-EXPORT_SYMBOL(cfs_percpt_free);
-
-/*
- * allocate per cpu-partition variables, returned value is an array of pointers,
- * variable can be indexed by CPU partition ID, i.e:
- *
- *     arr = cfs_percpt_alloc(cfs_cpu_pt, size);
- *     then caller can access memory block for CPU 0 by arr[0],
- *     memory block for CPU 1 by arr[1]...
- *     memory block for CPU N by arr[N]...
- *
- * cacheline aligned.
- */
-void *
-cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size)
-{
-       struct cfs_var_array    *arr;
-       int                     count;
-       int                     i;
-
-       count = cfs_cpt_number(cptab);
-
-       LIBCFS_ALLOC(arr, offsetof(struct cfs_var_array, va_ptrs[count]));
-       if (!arr)
-               return NULL;
-
-       size = L1_CACHE_ALIGN(size);
-       arr->va_size = size;
-       arr->va_count = count;
-       arr->va_cptab = cptab;
-
-       for (i = 0; i < count; i++) {
-               LIBCFS_CPT_ALLOC(arr->va_ptrs[i], cptab, i, size);
-               if (!arr->va_ptrs[i]) {
-                       cfs_percpt_free((void *)&arr->va_ptrs[0]);
-                       return NULL;
-               }
-       }
-
-       return (void *)&arr->va_ptrs[0];
-}
-EXPORT_SYMBOL(cfs_percpt_alloc);
-
-/*
- * return number of CPUs (or number of elements in per-cpu data)
- * according to cptab of @vars
- */
-int
-cfs_percpt_number(void *vars)
-{
-       struct cfs_var_array *arr;
-
-       arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
-
-       return arr->va_count;
-}
-EXPORT_SYMBOL(cfs_percpt_number);
-
-/*
- * return memory block shadowed from current CPU
- */
-void *
-cfs_percpt_current(void *vars)
-{
-       struct cfs_var_array *arr;
-       int    cpt;
-
-       arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
-       cpt = cfs_cpt_current(arr->va_cptab, 0);
-       if (cpt < 0)
-               return NULL;
-
-       return arr->va_ptrs[cpt];
-}
-
-void *
-cfs_percpt_index(void *vars, int idx)
-{
-       struct cfs_var_array *arr;
-
-       arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
-
-       LASSERT(idx >= 0 && idx < arr->va_count);
-       return arr->va_ptrs[idx];
-}
-
-/*
- * free variable array, see more detail in cfs_array_alloc
- */
-void
-cfs_array_free(void *vars)
-{
-       struct cfs_var_array    *arr;
-       int                     i;
-
-       arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
-
-       for (i = 0; i < arr->va_count; i++) {
-               if (!arr->va_ptrs[i])
-                       continue;
-
-               LIBCFS_FREE(arr->va_ptrs[i], arr->va_size);
-       }
-       LIBCFS_FREE(arr, offsetof(struct cfs_var_array,
-                                 va_ptrs[arr->va_count]));
-}
-EXPORT_SYMBOL(cfs_array_free);
-
-/*
- * allocate a variable array, returned value is an array of pointers.
- * Caller can specify length of array by @count, @size is size of each
- * memory block in array.
- */
-void *
-cfs_array_alloc(int count, unsigned int size)
-{
-       struct cfs_var_array    *arr;
-       int                     i;
-
-       LIBCFS_ALLOC(arr, offsetof(struct cfs_var_array, va_ptrs[count]));
-       if (!arr)
-               return NULL;
-
-       arr->va_count   = count;
-       arr->va_size    = size;
-
-       for (i = 0; i < count; i++) {
-               LIBCFS_ALLOC(arr->va_ptrs[i], size);
-
-               if (!arr->va_ptrs[i]) {
-                       cfs_array_free((void *)&arr->va_ptrs[0]);
-                       return NULL;
-               }
-       }
-
-       return (void *)&arr->va_ptrs[0];
-}
-EXPORT_SYMBOL(cfs_array_alloc);
diff --git a/drivers/staging/lustre/lustre/libcfs/libcfs_string.c b/drivers/staging/lustre/lustre/libcfs/libcfs_string.c

deleted file mode 100644 (file)

index 50ac153..0000000
--- a/drivers/staging/lustre/lustre/libcfs/libcfs_string.c
+++ /dev/null
@@ -1,581 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * String manipulation functions.
- *
- * libcfs/libcfs/libcfs_string.c
- *
- * Author: Nathan Rutman <nathan.rutman@sun.com>
- */
-
-#include "../../include/linux/libcfs/libcfs.h"
-
-/* Convert a text string to a bitmask */
-int cfs_str2mask(const char *str, const char *(*bit2str)(int bit),
-                int *oldmask, int minmask, int allmask)
-{
-       const char *debugstr;
-       char op = '\0';
-       int newmask = minmask, i, len, found = 0;
-
-       /* <str> must be a list of tokens separated by whitespace
-        * and optionally an operator ('+' or '-').  If an operator
-        * appears first in <str>, '*oldmask' is used as the starting point
-        * (relative), otherwise minmask is used (absolute).  An operator
-        * applies to all following tokens up to the next operator.
-        */
-       while (*str != '\0') {
-               while (isspace(*str))
-                       str++;
-               if (*str == '\0')
-                       break;
-               if (*str == '+' || *str == '-') {
-                       op = *str++;
-                       if (!found)
-                               /* only if first token is relative */
-                               newmask = *oldmask;
-                       while (isspace(*str))
-                               str++;
-                       if (*str == '\0')  /* trailing op */
-                               return -EINVAL;
-               }
-
-               /* find token length */
-               len = 0;
-               while (str[len] != '\0' && !isspace(str[len]) &&
-                      str[len] != '+' && str[len] != '-')
-                       len++;
-
-               /* match token */
-               found = 0;
-               for (i = 0; i < 32; i++) {
-                       debugstr = bit2str(i);
-                       if (debugstr && strlen(debugstr) == len &&
-                           strncasecmp(str, debugstr, len) == 0) {
-                               if (op == '-')
-                                       newmask &= ~(1 << i);
-                               else
-                                       newmask |= (1 << i);
-                               found = 1;
-                               break;
-                       }
-               }
-               if (!found && len == 3 &&
-                   (strncasecmp(str, "ALL", len) == 0)) {
-                       if (op == '-')
-                               newmask = minmask;
-                       else
-                               newmask = allmask;
-                       found = 1;
-               }
-               if (!found) {
-                       CWARN("unknown mask '%.*s'.\n"
-                             "mask usage: [+|-]<all|type> ...\n", len, str);
-                       return -EINVAL;
-               }
-               str += len;
-       }
-
-       *oldmask = newmask;
-       return 0;
-}
-
-/* get the first string out of @str */
-char *cfs_firststr(char *str, size_t size)
-{
-       size_t i = 0;
-       char  *end;
-
-       /* trim leading spaces */
-       while (i < size && *str && isspace(*str)) {
-               ++i;
-               ++str;
-       }
-
-       /* string with all spaces */
-       if (*str == '\0')
-               goto out;
-
-       end = str;
-       while (i < size && *end != '\0' && !isspace(*end)) {
-               ++i;
-               ++end;
-       }
-
-       *end = '\0';
-out:
-       return str;
-}
-EXPORT_SYMBOL(cfs_firststr);
-
-char *
-cfs_trimwhite(char *str)
-{
-       char *end;
-
-       while (isspace(*str))
-               str++;
-
-       end = str + strlen(str);
-       while (end > str) {
-               if (!isspace(end[-1]))
-                       break;
-               end--;
-       }
-
-       *end = 0;
-       return str;
-}
-EXPORT_SYMBOL(cfs_trimwhite);
-
-/**
- * Extracts tokens from strings.
- *
- * Looks for \a delim in string \a next, sets \a res to point to
- * substring before the delimiter, sets \a next right after the found
- * delimiter.
- *
- * \retval 1 if \a res points to a string of non-whitespace characters
- * \retval 0 otherwise
- */
-int
-cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res)
-{
-       char *end;
-
-       if (!next->ls_str)
-               return 0;
-
-       /* skip leading white spaces */
-       while (next->ls_len) {
-               if (!isspace(*next->ls_str))
-                       break;
-               next->ls_str++;
-               next->ls_len--;
-       }
-
-       if (next->ls_len == 0) /* whitespaces only */
-               return 0;
-
-       if (*next->ls_str == delim) {
-               /* first non-writespace is the delimiter */
-               return 0;
-       }
-
-       res->ls_str = next->ls_str;
-       end = memchr(next->ls_str, delim, next->ls_len);
-       if (!end) {
-               /* there is no the delimeter in the string */
-               end = next->ls_str + next->ls_len;
-               next->ls_str = NULL;
-       } else {
-               next->ls_str = end + 1;
-               next->ls_len -= (end - res->ls_str + 1);
-       }
-
-       /* skip ending whitespaces */
-       while (--end != res->ls_str) {
-               if (!isspace(*end))
-                       break;
-       }
-
-       res->ls_len = end - res->ls_str + 1;
-       return 1;
-}
-EXPORT_SYMBOL(cfs_gettok);
-
-/**
- * Converts string to integer.
- *
- * Accepts decimal and hexadecimal number recordings.
- *
- * \retval 1 if first \a nob chars of \a str convert to decimal or
- * hexadecimal integer in the range [\a min, \a max]
- * \retval 0 otherwise
- */
-int
-cfs_str2num_check(char *str, int nob, unsigned *num,
-                 unsigned min, unsigned max)
-{
-       bool all_numbers = true;
-       char *endp, cache;
-       int rc;
-
-       str = cfs_trimwhite(str);
-
-       /**
-        * kstrouint can only handle strings composed
-        * of only numbers. We need to scan the string
-        * passed in for the first non-digit character
-        * and end the string at that location. If we
-        * don't find any non-digit character we still
-        * need to place a '\0' at position nob since
-        * we are not interested in the rest of the
-        * string which is longer than nob in size.
-        * After we are done the character at the
-        * position we placed '\0' must be restored.
-        */
-       for (endp = str; endp < str + nob; endp++) {
-               if (!isdigit(*endp)) {
-                       all_numbers = false;
-                       break;
-               }
-       }
-       cache = *endp;
-       *endp = '\0';
-
-       rc = kstrtouint(str, 10, num);
-       *endp = cache;
-       if (rc || !all_numbers)
-               return 0;
-
-       return (*num >= min && *num <= max);
-}
-EXPORT_SYMBOL(cfs_str2num_check);
-
-/**
- * Parses \<range_expr\> token of the syntax. If \a bracketed is false,
- * \a src should only have a single token which can be \<number\> or  \*
- *
- * \retval pointer to allocated range_expr and initialized
- * range_expr::re_lo, range_expr::re_hi and range_expr:re_stride if \a
- `* src parses to
- * \<number\> |
- * \<number\> '-' \<number\> |
- * \<number\> '-' \<number\> '/' \<number\>
- * \retval 0 will be returned if it can be parsed, otherwise -EINVAL or
- * -ENOMEM will be returned.
- */
-static int
-cfs_range_expr_parse(struct cfs_lstr *src, unsigned min, unsigned max,
-                    int bracketed, struct cfs_range_expr **expr)
-{
-       struct cfs_range_expr   *re;
-       struct cfs_lstr         tok;
-
-       LIBCFS_ALLOC(re, sizeof(*re));
-       if (!re)
-               return -ENOMEM;
-
-       if (src->ls_len == 1 && src->ls_str[0] == '*') {
-               re->re_lo = min;
-               re->re_hi = max;
-               re->re_stride = 1;
-               goto out;
-       }
-
-       if (cfs_str2num_check(src->ls_str, src->ls_len,
-                             &re->re_lo, min, max)) {
-               /* <number> is parsed */
-               re->re_hi = re->re_lo;
-               re->re_stride = 1;
-               goto out;
-       }
-
-       if (!bracketed || !cfs_gettok(src, '-', &tok))
-               goto failed;
-
-       if (!cfs_str2num_check(tok.ls_str, tok.ls_len,
-                              &re->re_lo, min, max))
-               goto failed;
-
-       /* <number> - */
-       if (cfs_str2num_check(src->ls_str, src->ls_len,
-                             &re->re_hi, min, max)) {
-               /* <number> - <number> is parsed */
-               re->re_stride = 1;
-               goto out;
-       }
-
-       /* go to check <number> '-' <number> '/' <number> */
-       if (cfs_gettok(src, '/', &tok)) {
-               if (!cfs_str2num_check(tok.ls_str, tok.ls_len,
-                                      &re->re_hi, min, max))
-                       goto failed;
-
-               /* <number> - <number> / ... */
-               if (cfs_str2num_check(src->ls_str, src->ls_len,
-                                     &re->re_stride, min, max)) {
-                       /* <number> - <number> / <number> is parsed */
-                       goto out;
-               }
-       }
-
- out:
-       *expr = re;
-       return 0;
-
- failed:
-       LIBCFS_FREE(re, sizeof(*re));
-       return -EINVAL;
-}
-
-/**
- * Print the range expression \a re into specified \a buffer.
- * If \a bracketed is true, expression does not need additional
- * brackets.
- *
- * \retval number of characters written
- */
-static int
-cfs_range_expr_print(char *buffer, int count, struct cfs_range_expr *expr,
-                    bool bracketed)
-{
-       int i;
-       char s[] = "[";
-       char e[] = "]";
-
-       if (bracketed) {
-               s[0] = '\0';
-               e[0] = '\0';
-       }
-
-       if (expr->re_lo == expr->re_hi)
-               i = scnprintf(buffer, count, "%u", expr->re_lo);
-       else if (expr->re_stride == 1)
-               i = scnprintf(buffer, count, "%s%u-%u%s",
-                             s, expr->re_lo, expr->re_hi, e);
-       else
-               i = scnprintf(buffer, count, "%s%u-%u/%u%s",
-                             s, expr->re_lo, expr->re_hi, expr->re_stride, e);
-       return i;
-}
-
-/**
- * Print a list of range expressions (\a expr_list) into specified \a buffer.
- * If the list contains several expressions, separate them with comma
- * and surround the list with brackets.
- *
- * \retval number of characters written
- */
-int
-cfs_expr_list_print(char *buffer, int count, struct cfs_expr_list *expr_list)
-{
-       struct cfs_range_expr *expr;
-       int i = 0, j = 0;
-       int numexprs = 0;
-
-       if (count <= 0)
-               return 0;
-
-       list_for_each_entry(expr, &expr_list->el_exprs, re_link)
-               numexprs++;
-
-       if (numexprs > 1)
-               i += scnprintf(buffer + i, count - i, "[");
-
-       list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
-               if (j++ != 0)
-                       i += scnprintf(buffer + i, count - i, ",");
-               i += cfs_range_expr_print(buffer + i, count - i, expr,
-                                         numexprs > 1);
-       }
-
-       if (numexprs > 1)
-               i += scnprintf(buffer + i, count - i, "]");
-
-       return i;
-}
-EXPORT_SYMBOL(cfs_expr_list_print);
-
-/**
- * Matches value (\a value) against ranges expression list \a expr_list.
- *
- * \retval 1 if \a value matches
- * \retval 0 otherwise
- */
-int
-cfs_expr_list_match(__u32 value, struct cfs_expr_list *expr_list)
-{
-       struct cfs_range_expr   *expr;
-
-       list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
-               if (value >= expr->re_lo && value <= expr->re_hi &&
-                   ((value - expr->re_lo) % expr->re_stride) == 0)
-                       return 1;
-       }
-
-       return 0;
-}
-EXPORT_SYMBOL(cfs_expr_list_match);
-
-/**
- * Convert express list (\a expr_list) to an array of all matched values
- *
- * \retval N N is total number of all matched values
- * \retval 0 if expression list is empty
- * \retval < 0 for failure
- */
-int
-cfs_expr_list_values(struct cfs_expr_list *expr_list, int max, __u32 **valpp)
-{
-       struct cfs_range_expr   *expr;
-       __u32                   *val;
-       int                     count = 0;
-       int                     i;
-
-       list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
-               for (i = expr->re_lo; i <= expr->re_hi; i++) {
-                       if (((i - expr->re_lo) % expr->re_stride) == 0)
-                               count++;
-               }
-       }
-
-       if (count == 0) /* empty expression list */
-               return 0;
-
-       if (count > max) {
-               CERROR("Number of values %d exceeds max allowed %d\n",
-                      max, count);
-               return -EINVAL;
-       }
-
-       LIBCFS_ALLOC(val, sizeof(val[0]) * count);
-       if (!val)
-               return -ENOMEM;
-
-       count = 0;
-       list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
-               for (i = expr->re_lo; i <= expr->re_hi; i++) {
-                       if (((i - expr->re_lo) % expr->re_stride) == 0)
-                               val[count++] = i;
-               }
-       }
-
-       *valpp = val;
-       return count;
-}
-EXPORT_SYMBOL(cfs_expr_list_values);
-
-/**
- * Frees cfs_range_expr structures of \a expr_list.
- *
- * \retval none
- */
-void
-cfs_expr_list_free(struct cfs_expr_list *expr_list)
-{
-       while (!list_empty(&expr_list->el_exprs)) {
-               struct cfs_range_expr *expr;
-
-               expr = list_entry(expr_list->el_exprs.next,
-                                 struct cfs_range_expr, re_link);
-               list_del(&expr->re_link);
-               LIBCFS_FREE(expr, sizeof(*expr));
-       }
-
-       LIBCFS_FREE(expr_list, sizeof(*expr_list));
-}
-EXPORT_SYMBOL(cfs_expr_list_free);
-
-/**
- * Parses \<cfs_expr_list\> token of the syntax.
- *
- * \retval 0 if \a str parses to \<number\> | \<expr_list\>
- * \retval -errno otherwise
- */
-int
-cfs_expr_list_parse(char *str, int len, unsigned min, unsigned max,
-                   struct cfs_expr_list **elpp)
-{
-       struct cfs_expr_list    *expr_list;
-       struct cfs_range_expr   *expr;
-       struct cfs_lstr         src;
-       int                     rc;
-
-       LIBCFS_ALLOC(expr_list, sizeof(*expr_list));
-       if (!expr_list)
-               return -ENOMEM;
-
-       src.ls_str = str;
-       src.ls_len = len;
-
-       INIT_LIST_HEAD(&expr_list->el_exprs);
-
-       if (src.ls_str[0] == '[' &&
-           src.ls_str[src.ls_len - 1] == ']') {
-               src.ls_str++;
-               src.ls_len -= 2;
-
-               rc = -EINVAL;
-               while (src.ls_str) {
-                       struct cfs_lstr tok;
-
-                       if (!cfs_gettok(&src, ',', &tok)) {
-                               rc = -EINVAL;
-                               break;
-                       }
-
-                       rc = cfs_range_expr_parse(&tok, min, max, 1, &expr);
-                       if (rc != 0)
-                               break;
-
-                       list_add_tail(&expr->re_link, &expr_list->el_exprs);
-               }
-       } else {
-               rc = cfs_range_expr_parse(&src, min, max, 0, &expr);
-               if (rc == 0)
-                       list_add_tail(&expr->re_link, &expr_list->el_exprs);
-       }
-
-       if (rc != 0)
-               cfs_expr_list_free(expr_list);
-       else
-               *elpp = expr_list;
-
-       return rc;
-}
-EXPORT_SYMBOL(cfs_expr_list_parse);
-
-/**
- * Frees cfs_expr_list structures of \a list.
- *
- * For each struct cfs_expr_list structure found on \a list it frees
- * range_expr list attached to it and frees the cfs_expr_list itself.
- *
- * \retval none
- */
-void
-cfs_expr_list_free_list(struct list_head *list)
-{
-       struct cfs_expr_list *el;
-
-       while (!list_empty(list)) {
-               el = list_entry(list->next, struct cfs_expr_list, el_link);
-               list_del(&el->el_link);
-               cfs_expr_list_free(el);
-       }
-}
-EXPORT_SYMBOL(cfs_expr_list_free_list);
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-cpu.c

deleted file mode 100644 (file)

index 389fb9e..0000000
--- a/drivers/staging/lustre/lustre/libcfs/linux/linux-cpu.c
+++ /dev/null
@@ -1,1040 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Author: liang@whamcloud.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/cpu.h>
-#include <linux/sched.h>
-#include "../../../include/linux/libcfs/libcfs.h"
-
-#ifdef CONFIG_SMP
-
-/**
- * modparam for setting number of partitions
- *
- *  0 : estimate best value based on cores or NUMA nodes
- *  1 : disable multiple partitions
- * >1 : specify number of partitions
- */
-static int     cpu_npartitions;
-module_param(cpu_npartitions, int, 0444);
-MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions");
-
-/**
- * modparam for setting CPU partitions patterns:
- *
- * i.e: "0[0,1,2,3] 1[4,5,6,7]", number before bracket is CPU partition ID,
- *      number in bracket is processor ID (core or HT)
- *
- * i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket
- *       are NUMA node ID, number before bracket is CPU partition ID.
- *
- * NB: If user specified cpu_pattern, cpu_npartitions will be ignored
- */
-static char    *cpu_pattern = "";
-module_param(cpu_pattern, charp, 0444);
-MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern");
-
-struct cfs_cpt_data {
-       /* serialize hotplug etc */
-       spinlock_t              cpt_lock;
-       /* reserved for hotplug */
-       unsigned long           cpt_version;
-       /* mutex to protect cpt_cpumask */
-       struct mutex            cpt_mutex;
-       /* scratch buffer for set/unset_node */
-       cpumask_t               *cpt_cpumask;
-};
-
-static struct cfs_cpt_data     cpt_data;
-
-void
-cfs_cpt_table_free(struct cfs_cpt_table *cptab)
-{
-       int     i;
-
-       if (cptab->ctb_cpu2cpt) {
-               LIBCFS_FREE(cptab->ctb_cpu2cpt,
-                           num_possible_cpus() *
-                           sizeof(cptab->ctb_cpu2cpt[0]));
-       }
-
-       for (i = 0; cptab->ctb_parts && i < cptab->ctb_nparts; i++) {
-               struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
-
-               if (part->cpt_nodemask) {
-                       LIBCFS_FREE(part->cpt_nodemask,
-                                   sizeof(*part->cpt_nodemask));
-               }
-
-               if (part->cpt_cpumask)
-                       LIBCFS_FREE(part->cpt_cpumask, cpumask_size());
-       }
-
-       if (cptab->ctb_parts) {
-               LIBCFS_FREE(cptab->ctb_parts,
-                           cptab->ctb_nparts * sizeof(cptab->ctb_parts[0]));
-       }
-
-       if (cptab->ctb_nodemask)
-               LIBCFS_FREE(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
-       if (cptab->ctb_cpumask)
-               LIBCFS_FREE(cptab->ctb_cpumask, cpumask_size());
-
-       LIBCFS_FREE(cptab, sizeof(*cptab));
-}
-EXPORT_SYMBOL(cfs_cpt_table_free);
-
-struct cfs_cpt_table *
-cfs_cpt_table_alloc(unsigned int ncpt)
-{
-       struct cfs_cpt_table *cptab;
-       int     i;
-
-       LIBCFS_ALLOC(cptab, sizeof(*cptab));
-       if (!cptab)
-               return NULL;
-
-       cptab->ctb_nparts = ncpt;
-
-       LIBCFS_ALLOC(cptab->ctb_cpumask, cpumask_size());
-       LIBCFS_ALLOC(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
-
-       if (!cptab->ctb_cpumask || !cptab->ctb_nodemask)
-               goto failed;
-
-       LIBCFS_ALLOC(cptab->ctb_cpu2cpt,
-                    num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
-       if (!cptab->ctb_cpu2cpt)
-               goto failed;
-
-       memset(cptab->ctb_cpu2cpt, -1,
-              num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
-
-       LIBCFS_ALLOC(cptab->ctb_parts, ncpt * sizeof(cptab->ctb_parts[0]));
-       if (!cptab->ctb_parts)
-               goto failed;
-
-       for (i = 0; i < ncpt; i++) {
-               struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
-
-               LIBCFS_ALLOC(part->cpt_cpumask, cpumask_size());
-               LIBCFS_ALLOC(part->cpt_nodemask, sizeof(*part->cpt_nodemask));
-               if (!part->cpt_cpumask || !part->cpt_nodemask)
-                       goto failed;
-       }
-
-       spin_lock(&cpt_data.cpt_lock);
-       /* Reserved for hotplug */
-       cptab->ctb_version = cpt_data.cpt_version;
-       spin_unlock(&cpt_data.cpt_lock);
-
-       return cptab;
-
- failed:
-       cfs_cpt_table_free(cptab);
-       return NULL;
-}
-EXPORT_SYMBOL(cfs_cpt_table_alloc);
-
-int
-cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
-{
-       char    *tmp = buf;
-       int     rc = 0;
-       int     i;
-       int     j;
-
-       for (i = 0; i < cptab->ctb_nparts; i++) {
-               if (len > 0) {
-                       rc = snprintf(tmp, len, "%d\t: ", i);
-                       len -= rc;
-               }
-
-               if (len <= 0) {
-                       rc = -EFBIG;
-                       goto out;
-               }
-
-               tmp += rc;
-               for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) {
-                       rc = snprintf(tmp, len, "%d ", j);
-                       len -= rc;
-                       if (len <= 0) {
-                               rc = -EFBIG;
-                               goto out;
-                       }
-                       tmp += rc;
-               }
-
-               *tmp = '\n';
-               tmp++;
-               len--;
-       }
-
- out:
-       if (rc < 0)
-               return rc;
-
-       return tmp - buf;
-}
-EXPORT_SYMBOL(cfs_cpt_table_print);
-
-int
-cfs_cpt_number(struct cfs_cpt_table *cptab)
-{
-       return cptab->ctb_nparts;
-}
-EXPORT_SYMBOL(cfs_cpt_number);
-
-int
-cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
-{
-       LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
-       return cpt == CFS_CPT_ANY ?
-              cpumask_weight(cptab->ctb_cpumask) :
-              cpumask_weight(cptab->ctb_parts[cpt].cpt_cpumask);
-}
-EXPORT_SYMBOL(cfs_cpt_weight);
-
-int
-cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
-{
-       LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
-       return cpt == CFS_CPT_ANY ?
-              cpumask_any_and(cptab->ctb_cpumask,
-                              cpu_online_mask) < nr_cpu_ids :
-              cpumask_any_and(cptab->ctb_parts[cpt].cpt_cpumask,
-                              cpu_online_mask) < nr_cpu_ids;
-}
-EXPORT_SYMBOL(cfs_cpt_online);
-
-cpumask_t *
-cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
-{
-       LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
-       return cpt == CFS_CPT_ANY ?
-              cptab->ctb_cpumask : cptab->ctb_parts[cpt].cpt_cpumask;
-}
-EXPORT_SYMBOL(cfs_cpt_cpumask);
-
-nodemask_t *
-cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
-{
-       LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
-       return cpt == CFS_CPT_ANY ?
-              cptab->ctb_nodemask : cptab->ctb_parts[cpt].cpt_nodemask;
-}
-EXPORT_SYMBOL(cfs_cpt_nodemask);
-
-int
-cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
-{
-       int     node;
-
-       LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts);
-
-       if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_online(cpu)) {
-               CDEBUG(D_INFO, "CPU %d is invalid or it's offline\n", cpu);
-               return 0;
-       }
-
-       if (cptab->ctb_cpu2cpt[cpu] != -1) {
-               CDEBUG(D_INFO, "CPU %d is already in partition %d\n",
-                      cpu, cptab->ctb_cpu2cpt[cpu]);
-               return 0;
-       }
-
-       cptab->ctb_cpu2cpt[cpu] = cpt;
-
-       LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask));
-       LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
-
-       cpumask_set_cpu(cpu, cptab->ctb_cpumask);
-       cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
-
-       node = cpu_to_node(cpu);
-
-       /* first CPU of @node in this CPT table */
-       if (!node_isset(node, *cptab->ctb_nodemask))
-               node_set(node, *cptab->ctb_nodemask);
-
-       /* first CPU of @node in this partition */
-       if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask))
-               node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask);
-
-       return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_set_cpu);
-
-void
-cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
-{
-       int     node;
-       int     i;
-
-       LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
-       if (cpu < 0 || cpu >= nr_cpu_ids) {
-               CDEBUG(D_INFO, "Invalid CPU id %d\n", cpu);
-               return;
-       }
-
-       if (cpt == CFS_CPT_ANY) {
-               /* caller doesn't know the partition ID */
-               cpt = cptab->ctb_cpu2cpt[cpu];
-               if (cpt < 0) { /* not set in this CPT-table */
-                       CDEBUG(D_INFO, "Try to unset cpu %d which is not in CPT-table %p\n",
-                              cpt, cptab);
-                       return;
-               }
-
-       } else if (cpt != cptab->ctb_cpu2cpt[cpu]) {
-               CDEBUG(D_INFO,
-                      "CPU %d is not in cpu-partition %d\n", cpu, cpt);
-               return;
-       }
-
-       LASSERT(cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
-       LASSERT(cpumask_test_cpu(cpu, cptab->ctb_cpumask));
-
-       cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
-       cpumask_clear_cpu(cpu, cptab->ctb_cpumask);
-       cptab->ctb_cpu2cpt[cpu] = -1;
-
-       node = cpu_to_node(cpu);
-
-       LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask));
-       LASSERT(node_isset(node, *cptab->ctb_nodemask));
-
-       for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) {
-               /* this CPT has other CPU belonging to this node? */
-               if (cpu_to_node(i) == node)
-                       break;
-       }
-
-       if (i >= nr_cpu_ids)
-               node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask);
-
-       for_each_cpu(i, cptab->ctb_cpumask) {
-               /* this CPT-table has other CPU belonging to this node? */
-               if (cpu_to_node(i) == node)
-                       break;
-       }
-
-       if (i >= nr_cpu_ids)
-               node_clear(node, *cptab->ctb_nodemask);
-}
-EXPORT_SYMBOL(cfs_cpt_unset_cpu);
-
-int
-cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
-{
-       int     i;
-
-       if (cpumask_weight(mask) == 0 ||
-           cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) {
-               CDEBUG(D_INFO, "No online CPU is found in the CPU mask for CPU partition %d\n",
-                      cpt);
-               return 0;
-       }
-
-       for_each_cpu(i, mask) {
-               if (!cfs_cpt_set_cpu(cptab, cpt, i))
-                       return 0;
-       }
-
-       return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_set_cpumask);
-
-void
-cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
-{
-       int     i;
-
-       for_each_cpu(i, mask)
-               cfs_cpt_unset_cpu(cptab, cpt, i);
-}
-EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
-
-int
-cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
-{
-       cpumask_t       *mask;
-       int             rc;
-
-       if (node < 0 || node >= MAX_NUMNODES) {
-               CDEBUG(D_INFO,
-                      "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
-               return 0;
-       }
-
-       mutex_lock(&cpt_data.cpt_mutex);
-
-       mask = cpt_data.cpt_cpumask;
-       cpumask_copy(mask, cpumask_of_node(node));
-
-       rc = cfs_cpt_set_cpumask(cptab, cpt, mask);
-
-       mutex_unlock(&cpt_data.cpt_mutex);
-
-       return rc;
-}
-EXPORT_SYMBOL(cfs_cpt_set_node);
-
-void
-cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
-{
-       cpumask_t *mask;
-
-       if (node < 0 || node >= MAX_NUMNODES) {
-               CDEBUG(D_INFO,
-                      "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
-               return;
-       }
-
-       mutex_lock(&cpt_data.cpt_mutex);
-
-       mask = cpt_data.cpt_cpumask;
-       cpumask_copy(mask, cpumask_of_node(node));
-
-       cfs_cpt_unset_cpumask(cptab, cpt, mask);
-
-       mutex_unlock(&cpt_data.cpt_mutex);
-}
-EXPORT_SYMBOL(cfs_cpt_unset_node);
-
-int
-cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
-{
-       int     i;
-
-       for_each_node_mask(i, *mask) {
-               if (!cfs_cpt_set_node(cptab, cpt, i))
-                       return 0;
-       }
-
-       return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_set_nodemask);
-
-void
-cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
-{
-       int     i;
-
-       for_each_node_mask(i, *mask)
-               cfs_cpt_unset_node(cptab, cpt, i);
-}
-EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
-
-void
-cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
-{
-       int     last;
-       int     i;
-
-       if (cpt == CFS_CPT_ANY) {
-               last = cptab->ctb_nparts - 1;
-               cpt = 0;
-       } else {
-               last = cpt;
-       }
-
-       for (; cpt <= last; cpt++) {
-               for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask)
-                       cfs_cpt_unset_cpu(cptab, cpt, i);
-       }
-}
-EXPORT_SYMBOL(cfs_cpt_clear);
-
-int
-cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
-{
-       nodemask_t      *mask;
-       int             weight;
-       int             rotor;
-       int             node;
-
-       /* convert CPU partition ID to HW node id */
-
-       if (cpt < 0 || cpt >= cptab->ctb_nparts) {
-               mask = cptab->ctb_nodemask;
-               rotor = cptab->ctb_spread_rotor++;
-       } else {
-               mask = cptab->ctb_parts[cpt].cpt_nodemask;
-               rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++;
-       }
-
-       weight = nodes_weight(*mask);
-       LASSERT(weight > 0);
-
-       rotor %= weight;
-
-       for_each_node_mask(node, *mask) {
-               if (rotor-- == 0)
-                       return node;
-       }
-
-       LBUG();
-       return 0;
-}
-EXPORT_SYMBOL(cfs_cpt_spread_node);
-
-int
-cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
-{
-       int     cpu = smp_processor_id();
-       int     cpt = cptab->ctb_cpu2cpt[cpu];
-
-       if (cpt < 0) {
-               if (!remap)
-                       return cpt;
-
-               /* don't return negative value for safety of upper layer,
-                * instead we shadow the unknown cpu to a valid partition ID
-                */
-               cpt = cpu % cptab->ctb_nparts;
-       }
-
-       return cpt;
-}
-EXPORT_SYMBOL(cfs_cpt_current);
-
-int
-cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
-{
-       LASSERT(cpu >= 0 && cpu < nr_cpu_ids);
-
-       return cptab->ctb_cpu2cpt[cpu];
-}
-EXPORT_SYMBOL(cfs_cpt_of_cpu);
-
-int
-cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
-{
-       cpumask_t       *cpumask;
-       nodemask_t      *nodemask;
-       int             rc;
-       int             i;
-
-       LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
-       if (cpt == CFS_CPT_ANY) {
-               cpumask = cptab->ctb_cpumask;
-               nodemask = cptab->ctb_nodemask;
-       } else {
-               cpumask = cptab->ctb_parts[cpt].cpt_cpumask;
-               nodemask = cptab->ctb_parts[cpt].cpt_nodemask;
-       }
-
-       if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) {
-               CERROR("No online CPU found in CPU partition %d, did someone do CPU hotplug on system? You might need to reload Lustre modules to keep system working well.\n",
-                      cpt);
-               return -EINVAL;
-       }
-
-       for_each_online_cpu(i) {
-               if (cpumask_test_cpu(i, cpumask))
-                       continue;
-
-               rc = set_cpus_allowed_ptr(current, cpumask);
-               set_mems_allowed(*nodemask);
-               if (rc == 0)
-                       schedule(); /* switch to allowed CPU */
-
-               return rc;
-       }
-
-       /* don't need to set affinity because all online CPUs are covered */
-       return 0;
-}
-EXPORT_SYMBOL(cfs_cpt_bind);
-
-/**
- * Choose max to \a number CPUs from \a node and set them in \a cpt.
- * We always prefer to choose CPU in the same core/socket.
- */
-static int
-cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
-                    cpumask_t *node, int number)
-{
-       cpumask_t       *socket = NULL;
-       cpumask_t       *core = NULL;
-       int             rc = 0;
-       int             cpu;
-
-       LASSERT(number > 0);
-
-       if (number >= cpumask_weight(node)) {
-               while (!cpumask_empty(node)) {
-                       cpu = cpumask_first(node);
-
-                       rc = cfs_cpt_set_cpu(cptab, cpt, cpu);
-                       if (!rc)
-                               return -EINVAL;
-                       cpumask_clear_cpu(cpu, node);
-               }
-               return 0;
-       }
-
-       /* allocate scratch buffer */
-       LIBCFS_ALLOC(socket, cpumask_size());
-       LIBCFS_ALLOC(core, cpumask_size());
-       if (!socket || !core) {
-               rc = -ENOMEM;
-               goto out;
-       }
-
-       while (!cpumask_empty(node)) {
-               cpu = cpumask_first(node);
-
-               /* get cpumask for cores in the same socket */
-               cpumask_copy(socket, topology_core_cpumask(cpu));
-               cpumask_and(socket, socket, node);
-
-               LASSERT(!cpumask_empty(socket));
-
-               while (!cpumask_empty(socket)) {
-                       int     i;
-
-                       /* get cpumask for hts in the same core */
-                       cpumask_copy(core, topology_sibling_cpumask(cpu));
-                       cpumask_and(core, core, node);
-
-                       LASSERT(!cpumask_empty(core));
-
-                       for_each_cpu(i, core) {
-                               cpumask_clear_cpu(i, socket);
-                               cpumask_clear_cpu(i, node);
-
-                               rc = cfs_cpt_set_cpu(cptab, cpt, i);
-                               if (!rc) {
-                                       rc = -EINVAL;
-                                       goto out;
-                               }
-
-                               if (--number == 0)
-                                       goto out;
-                       }
-                       cpu = cpumask_first(socket);
-               }
-       }
-
- out:
-       if (socket)
-               LIBCFS_FREE(socket, cpumask_size());
-       if (core)
-               LIBCFS_FREE(core, cpumask_size());
-       return rc;
-}
-
-#define CPT_WEIGHT_MIN  4u
-
-static unsigned int
-cfs_cpt_num_estimate(void)
-{
-       unsigned nnode = num_online_nodes();
-       unsigned ncpu  = num_online_cpus();
-       unsigned ncpt;
-
-       if (ncpu <= CPT_WEIGHT_MIN) {
-               ncpt = 1;
-               goto out;
-       }
-
-       /* generate reasonable number of CPU partitions based on total number
-        * of CPUs, Preferred N should be power2 and match this condition:
-        * 2 * (N - 1)^2 < NCPUS <= 2 * N^2
-        */
-       for (ncpt = 2; ncpu > 2 * ncpt * ncpt; ncpt <<= 1)
-               ;
-
-       if (ncpt <= nnode) { /* fat numa system */
-               while (nnode > ncpt)
-                       nnode >>= 1;
-
-       } else { /* ncpt > nnode */
-               while ((nnode << 1) <= ncpt)
-                       nnode <<= 1;
-       }
-
-       ncpt = nnode;
-
- out:
-#if (BITS_PER_LONG == 32)
-       /* config many CPU partitions on 32-bit system could consume
-        * too much memory
-        */
-       ncpt = min(2U, ncpt);
-#endif
-       while (ncpu % ncpt != 0)
-               ncpt--; /* worst case is 1 */
-
-       return ncpt;
-}
-
-static struct cfs_cpt_table *
-cfs_cpt_table_create(int ncpt)
-{
-       struct cfs_cpt_table *cptab = NULL;
-       cpumask_t       *mask = NULL;
-       int             cpt = 0;
-       int             num;
-       int             rc;
-       int             i;
-
-       rc = cfs_cpt_num_estimate();
-       if (ncpt <= 0)
-               ncpt = rc;
-
-       if (ncpt > num_online_cpus() || ncpt > 4 * rc) {
-               CWARN("CPU partition number %d is larger than suggested value (%d), your system may have performance issue or run out of memory while under pressure\n",
-                     ncpt, rc);
-       }
-
-       if (num_online_cpus() % ncpt != 0) {
-               CERROR("CPU number %d is not multiple of cpu_npartition %d, please try different cpu_npartitions value or set pattern string by cpu_pattern=STRING\n",
-                      (int)num_online_cpus(), ncpt);
-               goto failed;
-       }
-
-       cptab = cfs_cpt_table_alloc(ncpt);
-       if (!cptab) {
-               CERROR("Failed to allocate CPU map(%d)\n", ncpt);
-               goto failed;
-       }
-
-       num = num_online_cpus() / ncpt;
-       if (num == 0) {
-               CERROR("CPU changed while setting CPU partition\n");
-               goto failed;
-       }
-
-       LIBCFS_ALLOC(mask, cpumask_size());
-       if (!mask) {
-               CERROR("Failed to allocate scratch cpumask\n");
-               goto failed;
-       }
-
-       for_each_online_node(i) {
-               cpumask_copy(mask, cpumask_of_node(i));
-
-               while (!cpumask_empty(mask)) {
-                       struct cfs_cpu_partition *part;
-                       int    n;
-
-                       if (cpt >= ncpt)
-                               goto failed;
-
-                       part = &cptab->ctb_parts[cpt];
-
-                       n = num - cpumask_weight(part->cpt_cpumask);
-                       LASSERT(n > 0);
-
-                       rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n);
-                       if (rc < 0)
-                               goto failed;
-
-                       LASSERT(num >= cpumask_weight(part->cpt_cpumask));
-                       if (num == cpumask_weight(part->cpt_cpumask))
-                               cpt++;
-               }
-       }
-
-       if (cpt != ncpt ||
-           num != cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)) {
-               CERROR("Expect %d(%d) CPU partitions but got %d(%d), CPU hotplug/unplug while setting?\n",
-                      cptab->ctb_nparts, num, cpt,
-                      cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask));
-               goto failed;
-       }
-
-       LIBCFS_FREE(mask, cpumask_size());
-
-       return cptab;
-
- failed:
-       CERROR("Failed to setup CPU-partition-table with %d CPU-partitions, online HW nodes: %d, HW cpus: %d.\n",
-              ncpt, num_online_nodes(), num_online_cpus());
-
-       if (mask)
-               LIBCFS_FREE(mask, cpumask_size());
-
-       if (cptab)
-               cfs_cpt_table_free(cptab);
-
-       return NULL;
-}
-
-static struct cfs_cpt_table *
-cfs_cpt_table_create_pattern(char *pattern)
-{
-       struct cfs_cpt_table    *cptab;
-       char                    *str    = pattern;
-       int                     node    = 0;
-       int                     high;
-       int                     ncpt;
-       int                     c;
-
-       for (ncpt = 0;; ncpt++) { /* quick scan bracket */
-               str = strchr(str, '[');
-               if (!str)
-                       break;
-               str++;
-       }
-
-       str = cfs_trimwhite(pattern);
-       if (*str == 'n' || *str == 'N') {
-               pattern = str + 1;
-               node = 1;
-       }
-
-       if (ncpt == 0 ||
-           (node && ncpt > num_online_nodes()) ||
-           (!node && ncpt > num_online_cpus())) {
-               CERROR("Invalid pattern %s, or too many partitions %d\n",
-                      pattern, ncpt);
-               return NULL;
-       }
-
-       high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1;
-
-       cptab = cfs_cpt_table_alloc(ncpt);
-       if (!cptab) {
-               CERROR("Failed to allocate cpu partition table\n");
-               return NULL;
-       }
-
-       for (str = cfs_trimwhite(pattern), c = 0;; c++) {
-               struct cfs_range_expr   *range;
-               struct cfs_expr_list    *el;
-               char                    *bracket = strchr(str, '[');
-               int                     cpt;
-               int                     rc;
-               int                     i;
-               int                     n;
-
-               if (!bracket) {
-                       if (*str != 0) {
-                               CERROR("Invalid pattern %s\n", str);
-                               goto failed;
-                       }
-                       if (c != ncpt) {
-                               CERROR("expect %d partitions but found %d\n",
-                                      ncpt, c);
-                               goto failed;
-                       }
-                       break;
-               }
-
-               if (sscanf(str, "%d%n", &cpt, &n) < 1) {
-                       CERROR("Invalid cpu pattern %s\n", str);
-                       goto failed;
-               }
-
-               if (cpt < 0 || cpt >= ncpt) {
-                       CERROR("Invalid partition id %d, total partitions %d\n",
-                              cpt, ncpt);
-                       goto failed;
-               }
-
-               if (cfs_cpt_weight(cptab, cpt) != 0) {
-                       CERROR("Partition %d has already been set.\n", cpt);
-                       goto failed;
-               }
-
-               str = cfs_trimwhite(str + n);
-               if (str != bracket) {
-                       CERROR("Invalid pattern %s\n", str);
-                       goto failed;
-               }
-
-               bracket = strchr(str, ']');
-               if (!bracket) {
-                       CERROR("missing right bracket for cpt %d, %s\n",
-                              cpt, str);
-                       goto failed;
-               }
-
-               if (cfs_expr_list_parse(str, (bracket - str) + 1,
-                                       0, high, &el) != 0) {
-                       CERROR("Can't parse number range: %s\n", str);
-                       goto failed;
-               }
-
-               list_for_each_entry(range, &el->el_exprs, re_link) {
-                       for (i = range->re_lo; i <= range->re_hi; i++) {
-                               if ((i - range->re_lo) % range->re_stride != 0)
-                                       continue;
-
-                               rc = node ? cfs_cpt_set_node(cptab, cpt, i) :
-                                           cfs_cpt_set_cpu(cptab, cpt, i);
-                               if (!rc) {
-                                       cfs_expr_list_free(el);
-                                       goto failed;
-                               }
-                       }
-               }
-
-               cfs_expr_list_free(el);
-
-               if (!cfs_cpt_online(cptab, cpt)) {
-                       CERROR("No online CPU is found on partition %d\n", cpt);
-                       goto failed;
-               }
-
-               str = cfs_trimwhite(bracket + 1);
-       }
-
-       return cptab;
-
- failed:
-       cfs_cpt_table_free(cptab);
-       return NULL;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static int
-cfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
-{
-       unsigned int  cpu = (unsigned long)hcpu;
-       bool         warn;
-
-       switch (action) {
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-       case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-               spin_lock(&cpt_data.cpt_lock);
-               cpt_data.cpt_version++;
-               spin_unlock(&cpt_data.cpt_lock);
-               /* Fall through */
-       default:
-               if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) {
-                       CDEBUG(D_INFO, "CPU changed [cpu %u action %lx]\n",
-                              cpu, action);
-                       break;
-               }
-
-               mutex_lock(&cpt_data.cpt_mutex);
-               /* if all HTs in a core are offline, it may break affinity */
-               cpumask_copy(cpt_data.cpt_cpumask,
-                            topology_sibling_cpumask(cpu));
-               warn = cpumask_any_and(cpt_data.cpt_cpumask,
-                                      cpu_online_mask) >= nr_cpu_ids;
-               mutex_unlock(&cpt_data.cpt_mutex);
-               CDEBUG(warn ? D_WARNING : D_INFO,
-                      "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u action: %lx]\n",
-                      cpu, action);
-       }
-
-       return NOTIFY_OK;
-}
-
-static struct notifier_block cfs_cpu_notifier = {
-       .notifier_call  = cfs_cpu_notify,
-       .priority       = 0
-};
-
-#endif
-
-void
-cfs_cpu_fini(void)
-{
-       if (cfs_cpt_table)
-               cfs_cpt_table_free(cfs_cpt_table);
-
-#ifdef CONFIG_HOTPLUG_CPU
-       unregister_hotcpu_notifier(&cfs_cpu_notifier);
-#endif
-       if (cpt_data.cpt_cpumask)
-               LIBCFS_FREE(cpt_data.cpt_cpumask, cpumask_size());
-}
-
-int
-cfs_cpu_init(void)
-{
-       LASSERT(!cfs_cpt_table);
-
-       memset(&cpt_data, 0, sizeof(cpt_data));
-
-       LIBCFS_ALLOC(cpt_data.cpt_cpumask, cpumask_size());
-       if (!cpt_data.cpt_cpumask) {
-               CERROR("Failed to allocate scratch buffer\n");
-               return -1;
-       }
-
-       spin_lock_init(&cpt_data.cpt_lock);
-       mutex_init(&cpt_data.cpt_mutex);
-
-#ifdef CONFIG_HOTPLUG_CPU
-       register_hotcpu_notifier(&cfs_cpu_notifier);
-#endif
-
-       if (*cpu_pattern != 0) {
-               cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern);
-               if (!cfs_cpt_table) {
-                       CERROR("Failed to create cptab from pattern %s\n",
-                              cpu_pattern);
-                       goto failed;
-               }
-
-       } else {
-               cfs_cpt_table = cfs_cpt_table_create(cpu_npartitions);
-               if (!cfs_cpt_table) {
-                       CERROR("Failed to create ptable with npartitions %d\n",
-                              cpu_npartitions);
-                       goto failed;
-               }
-       }
-
-       spin_lock(&cpt_data.cpt_lock);
-       if (cfs_cpt_table->ctb_version != cpt_data.cpt_version) {
-               spin_unlock(&cpt_data.cpt_lock);
-               CERROR("CPU hotplug/unplug during setup\n");
-               goto failed;
-       }
-       spin_unlock(&cpt_data.cpt_lock);
-
-       LCONSOLE(0, "HW CPU cores: %d, npartitions: %d\n",
-                num_online_cpus(), cfs_cpt_number(cfs_cpt_table));
-       return 0;
-
- failed:
-       cfs_cpu_fini();
-       return -1;
-}
-
-#endif
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto-adler.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto-adler.c

deleted file mode 100644 (file)

index db05727..0000000
--- a/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto-adler.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/* GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please  visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright 2012 Xyratex Technology Limited
- */
-
-/*
- * This is crypto api shash wrappers to zlib_adler32.
- */
-
-#include <linux/module.h>
-#include <linux/zutil.h>
-#include <crypto/internal/hash.h>
-#include "linux-crypto.h"
-
-#define CHKSUM_BLOCK_SIZE      1
-#define CHKSUM_DIGEST_SIZE     4
-
-static int adler32_cra_init(struct crypto_tfm *tfm)
-{
-       u32 *key = crypto_tfm_ctx(tfm);
-
-       *key = 1;
-
-       return 0;
-}
-
-static int adler32_setkey(struct crypto_shash *hash, const u8 *key,
-                         unsigned int keylen)
-{
-       u32 *mctx = crypto_shash_ctx(hash);
-
-       if (keylen != sizeof(u32)) {
-               crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
-               return -EINVAL;
-       }
-       *mctx = *(u32 *)key;
-       return 0;
-}
-
-static int adler32_init(struct shash_desc *desc)
-{
-       u32 *mctx = crypto_shash_ctx(desc->tfm);
-       u32 *cksump = shash_desc_ctx(desc);
-
-       *cksump = *mctx;
-
-       return 0;
-}
-
-static int adler32_update(struct shash_desc *desc, const u8 *data,
-                         unsigned int len)
-{
-       u32 *cksump = shash_desc_ctx(desc);
-
-       *cksump = zlib_adler32(*cksump, data, len);
-       return 0;
-}
-
-static int __adler32_finup(u32 *cksump, const u8 *data, unsigned int len,
-                          u8 *out)
-{
-       *(u32 *)out = zlib_adler32(*cksump, data, len);
-       return 0;
-}
-
-static int adler32_finup(struct shash_desc *desc, const u8 *data,
-                        unsigned int len, u8 *out)
-{
-       return __adler32_finup(shash_desc_ctx(desc), data, len, out);
-}
-
-static int adler32_final(struct shash_desc *desc, u8 *out)
-{
-       u32 *cksump = shash_desc_ctx(desc);
-
-       *(u32 *)out = *cksump;
-       return 0;
-}
-
-static int adler32_digest(struct shash_desc *desc, const u8 *data,
-                         unsigned int len, u8 *out)
-{
-       return __adler32_finup(crypto_shash_ctx(desc->tfm), data, len,
-                                   out);
-}
-
-static struct shash_alg alg = {
-       .setkey         = adler32_setkey,
-       .init           = adler32_init,
-       .update         = adler32_update,
-       .final          = adler32_final,
-       .finup          = adler32_finup,
-       .digest         = adler32_digest,
-       .descsize       = sizeof(u32),
-       .digestsize     = CHKSUM_DIGEST_SIZE,
-       .base           = {
-               .cra_name               = "adler32",
-               .cra_driver_name        = "adler32-zlib",
-               .cra_priority           = 100,
-               .cra_blocksize          = CHKSUM_BLOCK_SIZE,
-               .cra_ctxsize            = sizeof(u32),
-               .cra_module             = THIS_MODULE,
-               .cra_init               = adler32_cra_init,
-       }
-};
-
-int cfs_crypto_adler32_register(void)
-{
-       return crypto_register_shash(&alg);
-}
-
-void cfs_crypto_adler32_unregister(void)
-{
-       crypto_unregister_shash(&alg);
-}
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto.c

deleted file mode 100644 (file)

index 1d2f70f..0000000
--- a/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto.c
+++ /dev/null
@@ -1,284 +0,0 @@
-/* GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please  visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright 2012 Xyratex Technology Limited
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-
-#include <linux/crypto.h>
-#include <linux/scatterlist.h>
-#include "../../../include/linux/libcfs/libcfs.h"
-#include "linux-crypto.h"
-/**
- *  Array of  hash algorithm speed in MByte per second
- */
-static int cfs_crypto_hash_speeds[CFS_HASH_ALG_MAX];
-
-static int cfs_crypto_hash_alloc(unsigned char alg_id,
-                                const struct cfs_crypto_hash_type **type,
-                                struct hash_desc *desc, unsigned char *key,
-                                unsigned int key_len)
-{
-       int     err = 0;
-
-       *type = cfs_crypto_hash_type(alg_id);
-
-       if (!*type) {
-               CWARN("Unsupported hash algorithm id = %d, max id is %d\n",
-                     alg_id, CFS_HASH_ALG_MAX);
-               return -EINVAL;
-       }
-       desc->tfm = crypto_alloc_hash((*type)->cht_name, 0, 0);
-
-       if (!desc->tfm)
-               return -EINVAL;
-
-       if (IS_ERR(desc->tfm)) {
-               CDEBUG(D_INFO, "Failed to alloc crypto hash %s\n",
-                      (*type)->cht_name);
-               return PTR_ERR(desc->tfm);
-       }
-
-       desc->flags = 0;
-
-       /** Shash have different logic for initialization then digest
-        * shash: crypto_hash_setkey, crypto_hash_init
-        * digest: crypto_digest_init, crypto_digest_setkey
-        * Skip this function for digest, because we use shash logic at
-        * cfs_crypto_hash_alloc.
-        */
-       if (key)
-               err = crypto_hash_setkey(desc->tfm, key, key_len);
-       else if ((*type)->cht_key != 0)
-               err = crypto_hash_setkey(desc->tfm,
-                                        (unsigned char *)&((*type)->cht_key),
-                                        (*type)->cht_size);
-
-       if (err != 0) {
-               crypto_free_hash(desc->tfm);
-               return err;
-       }
-
-       CDEBUG(D_INFO, "Using crypto hash: %s (%s) speed %d MB/s\n",
-              (crypto_hash_tfm(desc->tfm))->__crt_alg->cra_name,
-              (crypto_hash_tfm(desc->tfm))->__crt_alg->cra_driver_name,
-              cfs_crypto_hash_speeds[alg_id]);
-
-       return crypto_hash_init(desc);
-}
-
-int cfs_crypto_hash_digest(unsigned char alg_id,
-                          const void *buf, unsigned int buf_len,
-                          unsigned char *key, unsigned int key_len,
-                          unsigned char *hash, unsigned int *hash_len)
-{
-       struct scatterlist      sl;
-       struct hash_desc        hdesc;
-       int                     err;
-       const struct cfs_crypto_hash_type       *type;
-
-       if (!buf || buf_len == 0 || !hash_len)
-               return -EINVAL;
-
-       err = cfs_crypto_hash_alloc(alg_id, &type, &hdesc, key, key_len);
-       if (err != 0)
-               return err;
-
-       if (!hash || *hash_len < type->cht_size) {
-               *hash_len = type->cht_size;
-               crypto_free_hash(hdesc.tfm);
-               return -ENOSPC;
-       }
-       sg_init_one(&sl, buf, buf_len);
-
-       hdesc.flags = 0;
-       err = crypto_hash_digest(&hdesc, &sl, sl.length, hash);
-       crypto_free_hash(hdesc.tfm);
-
-       return err;
-}
-EXPORT_SYMBOL(cfs_crypto_hash_digest);
-
-struct cfs_crypto_hash_desc *
-       cfs_crypto_hash_init(unsigned char alg_id,
-                            unsigned char *key, unsigned int key_len)
-{
-       struct  hash_desc       *hdesc;
-       int                  err;
-       const struct cfs_crypto_hash_type       *type;
-
-       hdesc = kmalloc(sizeof(*hdesc), 0);
-       if (!hdesc)
-               return ERR_PTR(-ENOMEM);
-
-       err = cfs_crypto_hash_alloc(alg_id, &type, hdesc, key, key_len);
-
-       if (err) {
-               kfree(hdesc);
-               return ERR_PTR(err);
-       }
-       return (struct cfs_crypto_hash_desc *)hdesc;
-}
-EXPORT_SYMBOL(cfs_crypto_hash_init);
-
-int cfs_crypto_hash_update_page(struct cfs_crypto_hash_desc *hdesc,
-                               struct page *page, unsigned int offset,
-                               unsigned int len)
-{
-       struct scatterlist sl;
-
-       sg_init_table(&sl, 1);
-       sg_set_page(&sl, page, len, offset & ~CFS_PAGE_MASK);
-
-       return crypto_hash_update((struct hash_desc *)hdesc, &sl, sl.length);
-}
-EXPORT_SYMBOL(cfs_crypto_hash_update_page);
-
-int cfs_crypto_hash_update(struct cfs_crypto_hash_desc *hdesc,
-                          const void *buf, unsigned int buf_len)
-{
-       struct scatterlist sl;
-
-       sg_init_one(&sl, buf, buf_len);
-
-       return crypto_hash_update((struct hash_desc *)hdesc, &sl, sl.length);
-}
-EXPORT_SYMBOL(cfs_crypto_hash_update);
-
-/*      If hash_len pointer is NULL - destroy descriptor. */
-int cfs_crypto_hash_final(struct cfs_crypto_hash_desc *hdesc,
-                         unsigned char *hash, unsigned int *hash_len)
-{
-       int     err;
-       int     size = crypto_hash_digestsize(((struct hash_desc *)hdesc)->tfm);
-
-       if (!hash_len) {
-               crypto_free_hash(((struct hash_desc *)hdesc)->tfm);
-               kfree(hdesc);
-               return 0;
-       }
-       if (!hash || *hash_len < size) {
-               *hash_len = size;
-               return -ENOSPC;
-       }
-       err = crypto_hash_final((struct hash_desc *)hdesc, hash);
-
-       if (err < 0) {
-               /* May be caller can fix error */
-               return err;
-       }
-       crypto_free_hash(((struct hash_desc *)hdesc)->tfm);
-       kfree(hdesc);
-       return err;
-}
-EXPORT_SYMBOL(cfs_crypto_hash_final);
-
-static void cfs_crypto_performance_test(unsigned char alg_id,
-                                       const unsigned char *buf,
-                                       unsigned int buf_len)
-{
-       unsigned long              start, end;
-       int                          bcount, err = 0;
-       int                          sec = 1; /* do test only 1 sec */
-       unsigned char              hash[64];
-       unsigned int                hash_len = 64;
-
-       for (start = jiffies, end = start + sec * HZ, bcount = 0;
-            time_before(jiffies, end); bcount++) {
-               err = cfs_crypto_hash_digest(alg_id, buf, buf_len, NULL, 0,
-                                            hash, &hash_len);
-               if (err)
-                       break;
-       }
-       end = jiffies;
-
-       if (err) {
-               cfs_crypto_hash_speeds[alg_id] =  -1;
-               CDEBUG(D_INFO, "Crypto hash algorithm %s, err = %d\n",
-                      cfs_crypto_hash_name(alg_id), err);
-       } else {
-               unsigned long   tmp;
-
-               tmp = ((bcount * buf_len / jiffies_to_msecs(end - start)) *
-                      1000) / (1024 * 1024);
-               cfs_crypto_hash_speeds[alg_id] = (int)tmp;
-       }
-       CDEBUG(D_INFO, "Crypto hash algorithm %s speed = %d MB/s\n",
-              cfs_crypto_hash_name(alg_id), cfs_crypto_hash_speeds[alg_id]);
-}
-
-int cfs_crypto_hash_speed(unsigned char hash_alg)
-{
-       if (hash_alg < CFS_HASH_ALG_MAX)
-               return cfs_crypto_hash_speeds[hash_alg];
-       return -1;
-}
-EXPORT_SYMBOL(cfs_crypto_hash_speed);
-
-/**
- * Do performance test for all hash algorithms.
- */
-static int cfs_crypto_test_hashes(void)
-{
-       unsigned char      i;
-       unsigned char      *data;
-       unsigned int        j;
-       /* Data block size for testing hash. Maximum
-        * kmalloc size for 2.6.18 kernel is 128K
-        */
-       unsigned int        data_len = 1 * 128 * 1024;
-
-       data = kmalloc(data_len, 0);
-       if (!data)
-               return -ENOMEM;
-
-       for (j = 0; j < data_len; j++)
-               data[j] = j & 0xff;
-
-       for (i = 0; i < CFS_HASH_ALG_MAX; i++)
-               cfs_crypto_performance_test(i, data, data_len);
-
-       kfree(data);
-       return 0;
-}
-
-static int adler32;
-
-int cfs_crypto_register(void)
-{
-       request_module("crc32c");
-
-       adler32 = cfs_crypto_adler32_register();
-
-       /* check all algorithms and do performance test */
-       cfs_crypto_test_hashes();
-       return 0;
-}
-
-void cfs_crypto_unregister(void)
-{
-       if (adler32 == 0)
-               cfs_crypto_adler32_unregister();
-}
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto.h b/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto.h

deleted file mode 100644 (file)

index 18e8cd4..0000000
--- a/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto.h
+++ /dev/null
@@ -1,29 +0,0 @@
- /*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please  visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/**
- * Functions for start/stop shash adler32 algorithm.
- */
-int cfs_crypto_adler32_register(void);
-void cfs_crypto_adler32_unregister(void);
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-curproc.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-curproc.c

deleted file mode 100644 (file)

index 13d31e8..0000000
--- a/drivers/staging/lustre/lustre/libcfs/linux/linux-curproc.c
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/libcfs/linux/linux-curproc.c
- *
- * Lustre curproc API implementation for Linux kernel
- *
- * Author: Nikita Danilov <nikita@clusterfs.com>
- */
-
-#include <linux/sched.h>
-#include <linux/fs_struct.h>
-
-#include <linux/compat.h>
-#include <linux/thread_info.h>
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "../../../include/linux/libcfs/libcfs.h"
-
-/*
- * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h)
- * for Linux kernel.
- */
-
-void cfs_cap_raise(cfs_cap_t cap)
-{
-       struct cred *cred;
-
-       cred = prepare_creds();
-       if (cred) {
-               cap_raise(cred->cap_effective, cap);
-               commit_creds(cred);
-       }
-}
-EXPORT_SYMBOL(cfs_cap_raise);
-
-void cfs_cap_lower(cfs_cap_t cap)
-{
-       struct cred *cred;
-
-       cred = prepare_creds();
-       if (cred) {
-               cap_lower(cred->cap_effective, cap);
-               commit_creds(cred);
-       }
-}
-EXPORT_SYMBOL(cfs_cap_lower);
-
-int cfs_cap_raised(cfs_cap_t cap)
-{
-       return cap_raised(current_cap(), cap);
-}
-EXPORT_SYMBOL(cfs_cap_raised);
-
-static void cfs_kernel_cap_pack(kernel_cap_t kcap, cfs_cap_t *cap)
-{
-       /* XXX lost high byte */
-       *cap = kcap.cap[0];
-}
-
-cfs_cap_t cfs_curproc_cap_pack(void)
-{
-       cfs_cap_t cap;
-
-       cfs_kernel_cap_pack(current_cap(), &cap);
-       return cap;
-}
-EXPORT_SYMBOL(cfs_curproc_cap_pack);
-
-/*
- * Local variables:
- * c-indentation-style: "K&R"
- * c-basic-offset: 8
- * tab-width: 8
- * fill-column: 80
- * scroll-step: 1
- * End:
- */
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-debug.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-debug.c

deleted file mode 100644 (file)

index 638e4b3..0000000
--- a/drivers/staging/lustre/lustre/libcfs/linux/linux-debug.c
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/libcfs/linux/linux-debug.c
- *
- * Author: Phil Schwan <phil@clusterfs.com>
- */
-
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/notifier.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <linux/interrupt.h>
-#include <linux/completion.h>
-#include <linux/fs.h>
-#include <linux/uaccess.h>
-#include <linux/miscdevice.h>
-
-# define DEBUG_SUBSYSTEM S_LNET
-
-#include "../../../include/linux/libcfs/libcfs.h"
-
-#include "../tracefile.h"
-
-#include <linux/kallsyms.h>
-
-char lnet_upcall[1024] = "/usr/lib/lustre/lnet_upcall";
-char lnet_debug_log_upcall[1024] = "/usr/lib/lustre/lnet_debug_log_upcall";
-
-/**
- * Upcall function once a Lustre log has been dumped.
- *
- * \param file  path of the dumped log
- */
-void libcfs_run_debug_log_upcall(char *file)
-{
-       char *argv[3];
-       int   rc;
-       char *envp[] = {
-               "HOME=/",
-               "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
-               NULL};
-
-       argv[0] = lnet_debug_log_upcall;
-
-       LASSERTF(file, "called on a null filename\n");
-       argv[1] = file; /* only need to pass the path of the file */
-
-       argv[2] = NULL;
-
-       rc = call_usermodehelper(argv[0], argv, envp, 1);
-       if (rc < 0 && rc != -ENOENT) {
-               CERROR("Error %d invoking LNET debug log upcall %s %s; check /sys/kernel/debug/lnet/debug_log_upcall\n",
-                      rc, argv[0], argv[1]);
-       } else {
-               CDEBUG(D_HA, "Invoked LNET debug log upcall %s %s\n",
-                      argv[0], argv[1]);
-       }
-}
-
-void libcfs_run_upcall(char **argv)
-{
-       int   rc;
-       int   argc;
-       char *envp[] = {
-               "HOME=/",
-               "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
-               NULL};
-
-       argv[0] = lnet_upcall;
-       argc = 1;
-       while (argv[argc])
-               argc++;
-
-       LASSERT(argc >= 2);
-
-       rc = call_usermodehelper(argv[0], argv, envp, 1);
-       if (rc < 0 && rc != -ENOENT) {
-               CERROR("Error %d invoking LNET upcall %s %s%s%s%s%s%s%s%s; check /sys/kernel/debug/lnet/upcall\n",
-                      rc, argv[0], argv[1],
-                      argc < 3 ? "" : ",", argc < 3 ? "" : argv[2],
-                      argc < 4 ? "" : ",", argc < 4 ? "" : argv[3],
-                      argc < 5 ? "" : ",", argc < 5 ? "" : argv[4],
-                      argc < 6 ? "" : ",...");
-       } else {
-               CDEBUG(D_HA, "Invoked LNET upcall %s %s%s%s%s%s%s%s%s\n",
-                      argv[0], argv[1],
-                      argc < 3 ? "" : ",", argc < 3 ? "" : argv[2],
-                      argc < 4 ? "" : ",", argc < 4 ? "" : argv[3],
-                      argc < 5 ? "" : ",", argc < 5 ? "" : argv[4],
-                      argc < 6 ? "" : ",...");
-       }
-}
-
-void libcfs_run_lbug_upcall(struct libcfs_debug_msg_data *msgdata)
-{
-       char *argv[6];
-       char buf[32];
-
-       snprintf(buf, sizeof(buf), "%d", msgdata->msg_line);
-
-       argv[1] = "LBUG";
-       argv[2] = (char *)msgdata->msg_file;
-       argv[3] = (char *)msgdata->msg_fn;
-       argv[4] = buf;
-       argv[5] = NULL;
-
-       libcfs_run_upcall(argv);
-}
-EXPORT_SYMBOL(libcfs_run_lbug_upcall);
-
-/* coverity[+kill] */
-void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msgdata)
-{
-       libcfs_catastrophe = 1;
-       libcfs_debug_msg(msgdata, "LBUG\n");
-
-       if (in_interrupt()) {
-               panic("LBUG in interrupt.\n");
-               /* not reached */
-       }
-
-       dump_stack();
-       if (!libcfs_panic_on_lbug)
-               libcfs_debug_dumplog();
-       libcfs_run_lbug_upcall(msgdata);
-       if (libcfs_panic_on_lbug)
-               panic("LBUG");
-       set_task_state(current, TASK_UNINTERRUPTIBLE);
-       while (1)
-               schedule();
-}
-EXPORT_SYMBOL(lbug_with_loc);
-
-static int panic_notifier(struct notifier_block *self, unsigned long unused1,
-                         void *unused2)
-{
-       if (libcfs_panic_in_progress)
-               return 0;
-
-       libcfs_panic_in_progress = 1;
-       mb();
-
-       return 0;
-}
-
-static struct notifier_block libcfs_panic_notifier = {
-       .notifier_call  = panic_notifier,
-       .next           = NULL,
-       .priority       = 10000,
-};
-
-void libcfs_register_panic_notifier(void)
-{
-       atomic_notifier_chain_register(&panic_notifier_list,
-                                      &libcfs_panic_notifier);
-}
-
-void libcfs_unregister_panic_notifier(void)
-{
-       atomic_notifier_chain_unregister(&panic_notifier_list,
-                                        &libcfs_panic_notifier);
-}
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-mem.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-mem.c

deleted file mode 100644 (file)

index 86f32ff..0000000
--- a/drivers/staging/lustre/lustre/libcfs/linux/linux-mem.c
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- */
-/*
- * This file creates a memory allocation primitive for Lustre, that
- * allows to fallback to vmalloc allocations should regular kernel allocations
- * fail due to size or system memory fragmentation.
- *
- * Author: Oleg Drokin <green@linuxhacker.ru>
- *
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Seagate Technology.
- */
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "../../../include/linux/libcfs/libcfs.h"
-
-void *libcfs_kvzalloc(size_t size, gfp_t flags)
-{
-       void *ret;
-
-       ret = kzalloc(size, flags | __GFP_NOWARN);
-       if (!ret)
-               ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
-       return ret;
-}
-EXPORT_SYMBOL(libcfs_kvzalloc);
-
-void *libcfs_kvzalloc_cpt(struct cfs_cpt_table *cptab, int cpt, size_t size,
-                         gfp_t flags)
-{
-       void *ret;
-
-       ret = kzalloc_node(size, flags | __GFP_NOWARN,
-                          cfs_cpt_spread_node(cptab, cpt));
-       if (!ret) {
-               WARN_ON(!(flags & (__GFP_FS | __GFP_HIGH)));
-               ret = vmalloc_node(size, cfs_cpt_spread_node(cptab, cpt));
-       }
-
-       return ret;
-}
-EXPORT_SYMBOL(libcfs_kvzalloc_cpt);
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-module.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-module.c

deleted file mode 100644 (file)

index ebc60ac..0000000
--- a/drivers/staging/lustre/lustre/libcfs/linux/linux-module.c
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "../../../include/linux/libcfs/libcfs.h"
-
-#define LNET_MINOR 240
-
-int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data)
-{
-       if (libcfs_ioctl_is_invalid(data)) {
-               CERROR("LNET: ioctl not correctly formatted\n");
-               return -EINVAL;
-       }
-
-       if (data->ioc_inllen1)
-               data->ioc_inlbuf1 = &data->ioc_bulk[0];
-
-       if (data->ioc_inllen2)
-               data->ioc_inlbuf2 = &data->ioc_bulk[0] +
-                       cfs_size_round(data->ioc_inllen1);
-
-       return 0;
-}
-
-int libcfs_ioctl_getdata_len(const struct libcfs_ioctl_hdr __user *arg,
-                            __u32 *len)
-{
-       struct libcfs_ioctl_hdr hdr;
-
-       if (copy_from_user(&hdr, arg, sizeof(hdr)))
-               return -EFAULT;
-
-       if (hdr.ioc_version != LIBCFS_IOCTL_VERSION &&
-           hdr.ioc_version != LIBCFS_IOCTL_VERSION2) {
-               CERROR("LNET: version mismatch expected %#x, got %#x\n",
-                      LIBCFS_IOCTL_VERSION, hdr.ioc_version);
-               return -EINVAL;
-       }
-
-       *len = hdr.ioc_len;
-
-       return 0;
-}
-
-int libcfs_ioctl_popdata(void __user *arg, void *data, int size)
-{
-       if (copy_to_user(arg, data, size))
-               return -EFAULT;
-       return 0;
-}
-
-static int
-libcfs_psdev_open(struct inode *inode, struct file *file)
-{
-       int    rc = 0;
-
-       if (!inode)
-               return -EINVAL;
-       if (libcfs_psdev_ops.p_open)
-               rc = libcfs_psdev_ops.p_open(0, NULL);
-       else
-               return -EPERM;
-       return rc;
-}
-
-/* called when closing /dev/device */
-static int
-libcfs_psdev_release(struct inode *inode, struct file *file)
-{
-       int    rc = 0;
-
-       if (!inode)
-               return -EINVAL;
-       if (libcfs_psdev_ops.p_close)
-               rc = libcfs_psdev_ops.p_close(0, NULL);
-       else
-               rc = -EPERM;
-       return rc;
-}
-
-static long libcfs_ioctl(struct file *file,
-                        unsigned int cmd, unsigned long arg)
-{
-       struct cfs_psdev_file    pfile;
-       int    rc = 0;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -EACCES;
-
-       if (_IOC_TYPE(cmd) != IOC_LIBCFS_TYPE ||
-           _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR  ||
-           _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR) {
-               CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
-                      _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
-               return -EINVAL;
-       }
-
-       /* Handle platform-dependent IOC requests */
-       switch (cmd) {
-       case IOC_LIBCFS_PANIC:
-               if (!capable(CFS_CAP_SYS_BOOT))
-                       return -EPERM;
-               panic("debugctl-invoked panic");
-               return 0;
-       }
-
-       if (libcfs_psdev_ops.p_ioctl)
-               rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void __user *)arg);
-       else
-               rc = -EPERM;
-       return rc;
-}
-
-static const struct file_operations libcfs_fops = {
-       .unlocked_ioctl = libcfs_ioctl,
-       .open           = libcfs_psdev_open,
-       .release        = libcfs_psdev_release,
-};
-
-struct miscdevice libcfs_dev = {
-       .minor = LNET_MINOR,
-       .name = "lnet",
-       .fops = &libcfs_fops,
-};
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-prim.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-prim.c

deleted file mode 100644 (file)

index 8908446..0000000
--- a/drivers/staging/lustre/lustre/libcfs/linux/linux-prim.c
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/fs_struct.h>
-#include <linux/sched.h>
-
-#include "../../../include/linux/libcfs/libcfs.h"
-
-#if defined(CONFIG_KGDB)
-#include <linux/kgdb.h>
-#endif
-
-/**
- * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively
- * waiting threads, which is not always desirable because all threads will
- * be waken up again and again, even user only needs a few of them to be
- * active most time. This is not good for performance because cache can
- * be polluted by different threads.
- *
- * LIFO list can resolve this problem because we always wakeup the most
- * recent active thread by default.
- *
- * NB: please don't call non-exclusive & exclusive wait on the same
- * waitq if add_wait_queue_exclusive_head is used.
- */
-void
-add_wait_queue_exclusive_head(wait_queue_head_t *waitq, wait_queue_t *link)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&waitq->lock, flags);
-       __add_wait_queue_exclusive(waitq, link);
-       spin_unlock_irqrestore(&waitq->lock, flags);
-}
-EXPORT_SYMBOL(add_wait_queue_exclusive_head);
-
-sigset_t
-cfs_block_allsigs(void)
-{
-       unsigned long     flags;
-       sigset_t        old;
-
-       spin_lock_irqsave(&current->sighand->siglock, flags);
-       old = current->blocked;
-       sigfillset(&current->blocked);
-       recalc_sigpending();
-       spin_unlock_irqrestore(&current->sighand->siglock, flags);
-
-       return old;
-}
-EXPORT_SYMBOL(cfs_block_allsigs);
-
-sigset_t cfs_block_sigs(unsigned long sigs)
-{
-       unsigned long  flags;
-       sigset_t        old;
-
-       spin_lock_irqsave(&current->sighand->siglock, flags);
-       old = current->blocked;
-       sigaddsetmask(&current->blocked, sigs);
-       recalc_sigpending();
-       spin_unlock_irqrestore(&current->sighand->siglock, flags);
-       return old;
-}
-EXPORT_SYMBOL(cfs_block_sigs);
-
-/* Block all signals except for the @sigs */
-sigset_t cfs_block_sigsinv(unsigned long sigs)
-{
-       unsigned long flags;
-       sigset_t old;
-
-       spin_lock_irqsave(&current->sighand->siglock, flags);
-       old = current->blocked;
-       sigaddsetmask(&current->blocked, ~sigs);
-       recalc_sigpending();
-       spin_unlock_irqrestore(&current->sighand->siglock, flags);
-
-       return old;
-}
-EXPORT_SYMBOL(cfs_block_sigsinv);
-
-void
-cfs_restore_sigs(sigset_t old)
-{
-       unsigned long  flags;
-
-       spin_lock_irqsave(&current->sighand->siglock, flags);
-       current->blocked = old;
-       recalc_sigpending();
-       spin_unlock_irqrestore(&current->sighand->siglock, flags);
-}
-EXPORT_SYMBOL(cfs_restore_sigs);
-
-int
-cfs_signal_pending(void)
-{
-       return signal_pending(current);
-}
-EXPORT_SYMBOL(cfs_signal_pending);
-
-void
-cfs_clear_sigpending(void)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&current->sighand->siglock, flags);
-       clear_tsk_thread_flag(current, TIF_SIGPENDING);
-       spin_unlock_irqrestore(&current->sighand->siglock, flags);
-}
-EXPORT_SYMBOL(cfs_clear_sigpending);
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.c

deleted file mode 100644 (file)

index 91c2ae8..0000000
--- a/drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#define LUSTRE_TRACEFILE_PRIVATE
-
-#include "../../../include/linux/libcfs/libcfs.h"
-#include "../tracefile.h"
-
-/* percents to share the total debug memory for each type */
-static unsigned int pages_factor[CFS_TCD_TYPE_MAX] = {
-       80,  /* 80% pages for CFS_TCD_TYPE_PROC */
-       10,  /* 10% pages for CFS_TCD_TYPE_SOFTIRQ */
-       10   /* 10% pages for CFS_TCD_TYPE_IRQ */
-};
-
-char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX];
-
-static DECLARE_RWSEM(cfs_tracefile_sem);
-
-int cfs_tracefile_init_arch(void)
-{
-       int    i;
-       int    j;
-       struct cfs_trace_cpu_data *tcd;
-
-       /* initialize trace_data */
-       memset(cfs_trace_data, 0, sizeof(cfs_trace_data));
-       for (i = 0; i < CFS_TCD_TYPE_MAX; i++) {
-               cfs_trace_data[i] =
-                       kmalloc(sizeof(union cfs_trace_data_union) *
-                               num_possible_cpus(), GFP_KERNEL);
-               if (!cfs_trace_data[i])
-                       goto out;
-       }
-
-       /* arch related info initialized */
-       cfs_tcd_for_each(tcd, i, j) {
-               spin_lock_init(&tcd->tcd_lock);
-               tcd->tcd_pages_factor = pages_factor[i];
-               tcd->tcd_type = i;
-               tcd->tcd_cpu = j;
-       }
-
-       for (i = 0; i < num_possible_cpus(); i++)
-               for (j = 0; j < 3; j++) {
-                       cfs_trace_console_buffers[i][j] =
-                               kmalloc(CFS_TRACE_CONSOLE_BUFFER_SIZE,
-                                       GFP_KERNEL);
-
-                       if (!cfs_trace_console_buffers[i][j])
-                               goto out;
-               }
-
-       return 0;
-
-out:
-       cfs_tracefile_fini_arch();
-       printk(KERN_ERR "lnet: Not enough memory\n");
-       return -ENOMEM;
-}
-
-void cfs_tracefile_fini_arch(void)
-{
-       int    i;
-       int    j;
-
-       for (i = 0; i < num_possible_cpus(); i++)
-               for (j = 0; j < 3; j++) {
-                       kfree(cfs_trace_console_buffers[i][j]);
-                       cfs_trace_console_buffers[i][j] = NULL;
-               }
-
-       for (i = 0; cfs_trace_data[i]; i++) {
-               kfree(cfs_trace_data[i]);
-               cfs_trace_data[i] = NULL;
-       }
-}
-
-void cfs_tracefile_read_lock(void)
-{
-       down_read(&cfs_tracefile_sem);
-}
-
-void cfs_tracefile_read_unlock(void)
-{
-       up_read(&cfs_tracefile_sem);
-}
-
-void cfs_tracefile_write_lock(void)
-{
-       down_write(&cfs_tracefile_sem);
-}
-
-void cfs_tracefile_write_unlock(void)
-{
-       up_write(&cfs_tracefile_sem);
-}
-
-enum cfs_trace_buf_type cfs_trace_buf_idx_get(void)
-{
-       if (in_irq())
-               return CFS_TCD_TYPE_IRQ;
-       if (in_softirq())
-               return CFS_TCD_TYPE_SOFTIRQ;
-       return CFS_TCD_TYPE_PROC;
-}
-
-/*
- * The walking argument indicates the locking comes from all tcd types
- * iterator and we must lock it and dissable local irqs to avoid deadlocks
- * with other interrupt locks that might be happening. See LU-1311
- * for details.
- */
-int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
-       __acquires(&tcd->tc_lock)
-{
-       __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX);
-       if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
-               spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags);
-       else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
-               spin_lock_bh(&tcd->tcd_lock);
-       else if (unlikely(walking))
-               spin_lock_irq(&tcd->tcd_lock);
-       else
-               spin_lock(&tcd->tcd_lock);
-       return 1;
-}
-
-void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
-       __releases(&tcd->tcd_lock)
-{
-       __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX);
-       if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
-               spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags);
-       else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
-               spin_unlock_bh(&tcd->tcd_lock);
-       else if (unlikely(walking))
-               spin_unlock_irq(&tcd->tcd_lock);
-       else
-               spin_unlock(&tcd->tcd_lock);
-}
-
-void
-cfs_set_ptldebug_header(struct ptldebug_header *header,
-                       struct libcfs_debug_msg_data *msgdata,
-                       unsigned long stack)
-{
-       struct timespec64 ts;
-
-       ktime_get_real_ts64(&ts);
-
-       header->ph_subsys = msgdata->msg_subsys;
-       header->ph_mask = msgdata->msg_mask;
-       header->ph_cpu_id = smp_processor_id();
-       header->ph_type = cfs_trace_buf_idx_get();
-       /* y2038 safe since all user space treats this as unsigned, but
-        * will overflow in 2106
-        */
-       header->ph_sec = (u32)ts.tv_sec;
-       header->ph_usec = ts.tv_nsec / NSEC_PER_USEC;
-       header->ph_stack = stack;
-       header->ph_pid = current->pid;
-       header->ph_line_num = msgdata->msg_line;
-       header->ph_extern_pid = 0;
-}
-
-static char *
-dbghdr_to_err_string(struct ptldebug_header *hdr)
-{
-       switch (hdr->ph_subsys) {
-       case S_LND:
-       case S_LNET:
-               return "LNetError";
-       default:
-               return "LustreError";
-       }
-}
-
-static char *
-dbghdr_to_info_string(struct ptldebug_header *hdr)
-{
-       switch (hdr->ph_subsys) {
-       case S_LND:
-       case S_LNET:
-               return "LNet";
-       default:
-               return "Lustre";
-       }
-}
-
-void cfs_print_to_console(struct ptldebug_header *hdr, int mask,
-                         const char *buf, int len, const char *file,
-                         const char *fn)
-{
-       char *prefix = "Lustre", *ptype = NULL;
-
-       if ((mask & D_EMERG) != 0) {
-               prefix = dbghdr_to_err_string(hdr);
-               ptype = KERN_EMERG;
-       } else if ((mask & D_ERROR) != 0) {
-               prefix = dbghdr_to_err_string(hdr);
-               ptype = KERN_ERR;
-       } else if ((mask & D_WARNING) != 0) {
-               prefix = dbghdr_to_info_string(hdr);
-               ptype = KERN_WARNING;
-       } else if ((mask & (D_CONSOLE | libcfs_printk)) != 0) {
-               prefix = dbghdr_to_info_string(hdr);
-               ptype = KERN_INFO;
-       }
-
-       if ((mask & D_CONSOLE) != 0) {
-               printk("%s%s: %.*s", ptype, prefix, len, buf);
-       } else {
-               printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix,
-                      hdr->ph_pid, hdr->ph_extern_pid, file, hdr->ph_line_num,
-                      fn, len, buf);
-       }
-}
-
-int cfs_trace_max_debug_mb(void)
-{
-       int  total_mb = (totalram_pages >> (20 - PAGE_SHIFT));
-
-       return max(512, (total_mb * 80) / 100);
-}
diff --git a/drivers/staging/lustre/lustre/libcfs/module.c b/drivers/staging/lustre/lustre/libcfs/module.c

deleted file mode 100644 (file)

index cdc640b..0000000
--- a/drivers/staging/lustre/lustre/libcfs/module.c
+++ /dev/null
@@ -1,674 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <net/sock.h>
-#include <linux/uio.h>
-
-#include <linux/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/list.h>
-
-#include <linux/sysctl.h>
-#include <linux/debugfs.h>
-
-# define DEBUG_SUBSYSTEM S_LNET
-
-#define LNET_MAX_IOCTL_BUF_LEN (sizeof(struct lnet_ioctl_net_config) + \
-                               sizeof(struct lnet_ioctl_config_data))
-
-#include "../../include/linux/libcfs/libcfs.h"
-#include <asm/div64.h>
-
-#include "../../include/linux/libcfs/libcfs_crypto.h"
-#include "../../include/linux/lnet/lib-lnet.h"
-#include "../../include/linux/lnet/lib-dlc.h"
-#include "../../include/linux/lnet/lnet.h"
-#include "tracefile.h"
-
-static struct dentry *lnet_debugfs_root;
-
-/* called when opening /dev/device */
-static int libcfs_psdev_open(unsigned long flags, void *args)
-{
-       try_module_get(THIS_MODULE);
-       return 0;
-}
-
-/* called when closing /dev/device */
-static int libcfs_psdev_release(unsigned long flags, void *args)
-{
-       module_put(THIS_MODULE);
-       return 0;
-}
-
-static DECLARE_RWSEM(ioctl_list_sem);
-static LIST_HEAD(ioctl_list);
-
-int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand)
-{
-       int rc = 0;
-
-       down_write(&ioctl_list_sem);
-       if (!list_empty(&hand->item))
-               rc = -EBUSY;
-       else
-               list_add_tail(&hand->item, &ioctl_list);
-       up_write(&ioctl_list_sem);
-
-       return rc;
-}
-EXPORT_SYMBOL(libcfs_register_ioctl);
-
-int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand)
-{
-       int rc = 0;
-
-       down_write(&ioctl_list_sem);
-       if (list_empty(&hand->item))
-               rc = -ENOENT;
-       else
-               list_del_init(&hand->item);
-       up_write(&ioctl_list_sem);
-
-       return rc;
-}
-EXPORT_SYMBOL(libcfs_deregister_ioctl);
-
-static int libcfs_ioctl_handle(struct cfs_psdev_file *pfile, unsigned long cmd,
-                              void __user *arg, struct libcfs_ioctl_hdr *hdr)
-{
-       struct libcfs_ioctl_data *data = NULL;
-       int err = -EINVAL;
-
-       /*
-        * The libcfs_ioctl_data_adjust() function performs adjustment
-        * operations on the libcfs_ioctl_data structure to make
-        * it usable by the code.  This doesn't need to be called
-        * for new data structures added.
-        */
-       if (hdr->ioc_version == LIBCFS_IOCTL_VERSION) {
-               data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr);
-               err = libcfs_ioctl_data_adjust(data);
-               if (err)
-                       return err;
-       }
-
-       switch (cmd) {
-       case IOC_LIBCFS_CLEAR_DEBUG:
-               libcfs_debug_clear_buffer();
-               return 0;
-       /*
-        * case IOC_LIBCFS_PANIC:
-        * Handled in arch/cfs_module.c
-        */
-       case IOC_LIBCFS_MARK_DEBUG:
-               if (!data->ioc_inlbuf1 ||
-                   data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0')
-                       return -EINVAL;
-               libcfs_debug_mark_buffer(data->ioc_inlbuf1);
-               return 0;
-
-       default: {
-               struct libcfs_ioctl_handler *hand;
-
-               err = -EINVAL;
-               down_read(&ioctl_list_sem);
-               list_for_each_entry(hand, &ioctl_list, item) {
-                       err = hand->handle_ioctl(cmd, hdr);
-                       if (err != -EINVAL) {
-                               if (err == 0)
-                                       err = libcfs_ioctl_popdata(arg,
-                                                       hdr, hdr->ioc_len);
-                               break;
-                       }
-               }
-               up_read(&ioctl_list_sem);
-               break;
-       }
-       }
-
-       return err;
-}
-
-static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd,
-                       void __user *arg)
-{
-       struct libcfs_ioctl_hdr *hdr;
-       int err = 0;
-       __u32 buf_len;
-
-       err = libcfs_ioctl_getdata_len(arg, &buf_len);
-       if (err)
-               return err;
-
-       /*
-        * do a check here to restrict the size of the memory
-        * to allocate to guard against DoS attacks.
-        */
-       if (buf_len > LNET_MAX_IOCTL_BUF_LEN) {
-               CERROR("LNET: user buffer exceeds kernel buffer\n");
-               return -EINVAL;
-       }
-
-       LIBCFS_ALLOC_GFP(hdr, buf_len, GFP_KERNEL);
-       if (!hdr)
-               return -ENOMEM;
-
-       /* 'cmd' and permissions get checked in our arch-specific caller */
-       if (copy_from_user(hdr, arg, buf_len)) {
-               CERROR("LNET ioctl: data error\n");
-               err = -EFAULT;
-               goto out;
-       }
-
-       err = libcfs_ioctl_handle(pfile, cmd, arg, hdr);
-
-out:
-       LIBCFS_FREE(hdr, buf_len);
-       return err;
-}
-
-struct cfs_psdev_ops libcfs_psdev_ops = {
-       libcfs_psdev_open,
-       libcfs_psdev_release,
-       NULL,
-       NULL,
-       libcfs_ioctl
-};
-
-int lprocfs_call_handler(void *data, int write, loff_t *ppos,
-                        void __user *buffer, size_t *lenp,
-                        int (*handler)(void *data, int write, loff_t pos,
-                                       void __user *buffer, int len))
-{
-       int rc = handler(data, write, *ppos, buffer, *lenp);
-
-       if (rc < 0)
-               return rc;
-
-       if (write) {
-               *ppos += *lenp;
-       } else {
-               *lenp = rc;
-               *ppos += rc;
-       }
-       return 0;
-}
-EXPORT_SYMBOL(lprocfs_call_handler);
-
-static int __proc_dobitmasks(void *data, int write,
-                            loff_t pos, void __user *buffer, int nob)
-{
-       const int     tmpstrlen = 512;
-       char     *tmpstr;
-       int        rc;
-       unsigned int *mask = data;
-       int        is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0;
-       int        is_printk = (mask == &libcfs_printk) ? 1 : 0;
-
-       rc = cfs_trace_allocate_string_buffer(&tmpstr, tmpstrlen);
-       if (rc < 0)
-               return rc;
-
-       if (!write) {
-               libcfs_debug_mask2str(tmpstr, tmpstrlen, *mask, is_subsys);
-               rc = strlen(tmpstr);
-
-               if (pos >= rc) {
-                       rc = 0;
-               } else {
-                       rc = cfs_trace_copyout_string(buffer, nob,
-                                                     tmpstr + pos, "\n");
-               }
-       } else {
-               rc = cfs_trace_copyin_string(tmpstr, tmpstrlen, buffer, nob);
-               if (rc < 0) {
-                       kfree(tmpstr);
-                       return rc;
-               }
-
-               rc = libcfs_debug_str2mask(mask, tmpstr, is_subsys);
-               /* Always print LBUG/LASSERT to console, so keep this mask */
-               if (is_printk)
-                       *mask |= D_EMERG;
-       }
-
-       kfree(tmpstr);
-       return rc;
-}
-
-static int proc_dobitmasks(struct ctl_table *table, int write,
-                          void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-       return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
-                                   __proc_dobitmasks);
-}
-
-static int __proc_dump_kernel(void *data, int write,
-                             loff_t pos, void __user *buffer, int nob)
-{
-       if (!write)
-               return 0;
-
-       return cfs_trace_dump_debug_buffer_usrstr(buffer, nob);
-}
-
-static int proc_dump_kernel(struct ctl_table *table, int write,
-                           void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-       return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
-                                   __proc_dump_kernel);
-}
-
-static int __proc_daemon_file(void *data, int write,
-                             loff_t pos, void __user *buffer, int nob)
-{
-       if (!write) {
-               int len = strlen(cfs_tracefile);
-
-               if (pos >= len)
-                       return 0;
-
-               return cfs_trace_copyout_string(buffer, nob,
-                                               cfs_tracefile + pos, "\n");
-       }
-
-       return cfs_trace_daemon_command_usrstr(buffer, nob);
-}
-
-static int proc_daemon_file(struct ctl_table *table, int write,
-                           void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-       return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
-                                   __proc_daemon_file);
-}
-
-static int libcfs_force_lbug(struct ctl_table *table, int write,
-                            void __user *buffer,
-                            size_t *lenp, loff_t *ppos)
-{
-       if (write)
-               LBUG();
-       return 0;
-}
-
-static int proc_fail_loc(struct ctl_table *table, int write,
-                        void __user *buffer,
-                        size_t *lenp, loff_t *ppos)
-{
-       int rc;
-       long old_fail_loc = cfs_fail_loc;
-
-       rc = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
-       if (old_fail_loc != cfs_fail_loc)
-               wake_up(&cfs_race_waitq);
-       return rc;
-}
-
-static int __proc_cpt_table(void *data, int write,
-                           loff_t pos, void __user *buffer, int nob)
-{
-       char *buf = NULL;
-       int   len = 4096;
-       int   rc  = 0;
-
-       if (write)
-               return -EPERM;
-
-       LASSERT(cfs_cpt_table);
-
-       while (1) {
-               LIBCFS_ALLOC(buf, len);
-               if (!buf)
-                       return -ENOMEM;
-
-               rc = cfs_cpt_table_print(cfs_cpt_table, buf, len);
-               if (rc >= 0)
-                       break;
-
-               if (rc == -EFBIG) {
-                       LIBCFS_FREE(buf, len);
-                       len <<= 1;
-                       continue;
-               }
-               goto out;
-       }
-
-       if (pos >= rc) {
-               rc = 0;
-               goto out;
-       }
-
-       rc = cfs_trace_copyout_string(buffer, nob, buf + pos, NULL);
- out:
-       if (buf)
-               LIBCFS_FREE(buf, len);
-       return rc;
-}
-
-static int proc_cpt_table(struct ctl_table *table, int write,
-                         void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-       return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
-                                   __proc_cpt_table);
-}
-
-static struct ctl_table lnet_table[] = {
-       {
-               .procname = "debug",
-               .data     = &libcfs_debug,
-               .maxlen   = sizeof(int),
-               .mode     = 0644,
-               .proc_handler = &proc_dobitmasks,
-       },
-       {
-               .procname = "subsystem_debug",
-               .data     = &libcfs_subsystem_debug,
-               .maxlen   = sizeof(int),
-               .mode     = 0644,
-               .proc_handler = &proc_dobitmasks,
-       },
-       {
-               .procname = "printk",
-               .data     = &libcfs_printk,
-               .maxlen   = sizeof(int),
-               .mode     = 0644,
-               .proc_handler = &proc_dobitmasks,
-       },
-       {
-               .procname = "cpu_partition_table",
-               .maxlen   = 128,
-               .mode     = 0444,
-               .proc_handler = &proc_cpt_table,
-       },
-
-       {
-               .procname = "upcall",
-               .data     = lnet_upcall,
-               .maxlen   = sizeof(lnet_upcall),
-               .mode     = 0644,
-               .proc_handler = &proc_dostring,
-       },
-       {
-               .procname = "debug_log_upcall",
-               .data     = lnet_debug_log_upcall,
-               .maxlen   = sizeof(lnet_debug_log_upcall),
-               .mode     = 0644,
-               .proc_handler = &proc_dostring,
-       },
-       {
-               .procname = "catastrophe",
-               .data     = &libcfs_catastrophe,
-               .maxlen   = sizeof(int),
-               .mode     = 0444,
-               .proc_handler = &proc_dointvec,
-       },
-       {
-               .procname = "dump_kernel",
-               .maxlen   = 256,
-               .mode     = 0200,
-               .proc_handler = &proc_dump_kernel,
-       },
-       {
-               .procname = "daemon_file",
-               .mode     = 0644,
-               .maxlen   = 256,
-               .proc_handler = &proc_daemon_file,
-       },
-       {
-               .procname = "force_lbug",
-               .data     = NULL,
-               .maxlen   = 0,
-               .mode     = 0200,
-               .proc_handler = &libcfs_force_lbug
-       },
-       {
-               .procname = "fail_loc",
-               .data     = &cfs_fail_loc,
-               .maxlen   = sizeof(cfs_fail_loc),
-               .mode     = 0644,
-               .proc_handler = &proc_fail_loc
-       },
-       {
-               .procname = "fail_val",
-               .data     = &cfs_fail_val,
-               .maxlen   = sizeof(int),
-               .mode     = 0644,
-               .proc_handler = &proc_dointvec
-       },
-       {
-       }
-};
-
-static const struct lnet_debugfs_symlink_def lnet_debugfs_symlinks[] = {
-       { "console_ratelimit",
-         "/sys/module/libcfs/parameters/libcfs_console_ratelimit"},
-       { "debug_path",
-         "/sys/module/libcfs/parameters/libcfs_debug_file_path"},
-       { "panic_on_lbug",
-         "/sys/module/libcfs/parameters/libcfs_panic_on_lbug"},
-       { "libcfs_console_backoff",
-         "/sys/module/libcfs/parameters/libcfs_console_backoff"},
-       { "debug_mb",
-         "/sys/module/libcfs/parameters/libcfs_debug_mb"},
-       { "console_min_delay_centisecs",
-         "/sys/module/libcfs/parameters/libcfs_console_min_delay"},
-       { "console_max_delay_centisecs",
-         "/sys/module/libcfs/parameters/libcfs_console_max_delay"},
-       {},
-};
-
-static ssize_t lnet_debugfs_read(struct file *filp, char __user *buf,
-                                size_t count, loff_t *ppos)
-{
-       struct ctl_table *table = filp->private_data;
-       int error;
-
-       error = table->proc_handler(table, 0, (void __user *)buf, &count, ppos);
-       if (!error)
-               error = count;
-
-       return error;
-}
-
-static ssize_t lnet_debugfs_write(struct file *filp, const char __user *buf,
-                                 size_t count, loff_t *ppos)
-{
-       struct ctl_table *table = filp->private_data;
-       int error;
-
-       error = table->proc_handler(table, 1, (void __user *)buf, &count, ppos);
-       if (!error)
-               error = count;
-
-       return error;
-}
-
-static const struct file_operations lnet_debugfs_file_operations_rw = {
-       .open           = simple_open,
-       .read           = lnet_debugfs_read,
-       .write          = lnet_debugfs_write,
-       .llseek         = default_llseek,
-};
-
-static const struct file_operations lnet_debugfs_file_operations_ro = {
-       .open           = simple_open,
-       .read           = lnet_debugfs_read,
-       .llseek         = default_llseek,
-};
-
-static const struct file_operations lnet_debugfs_file_operations_wo = {
-       .open           = simple_open,
-       .write          = lnet_debugfs_write,
-       .llseek         = default_llseek,
-};
-
-static const struct file_operations *lnet_debugfs_fops_select(umode_t mode)
-{
-       if (!(mode & S_IWUGO))
-               return &lnet_debugfs_file_operations_ro;
-
-       if (!(mode & S_IRUGO))
-               return &lnet_debugfs_file_operations_wo;
-
-       return &lnet_debugfs_file_operations_rw;
-}
-
-void lustre_insert_debugfs(struct ctl_table *table,
-                          const struct lnet_debugfs_symlink_def *symlinks)
-{
-       if (!lnet_debugfs_root)
-               lnet_debugfs_root = debugfs_create_dir("lnet", NULL);
-
-       /* Even if we cannot create, just ignore it altogether) */
-       if (IS_ERR_OR_NULL(lnet_debugfs_root))
-               return;
-
-       /* We don't save the dentry returned in next two calls, because
-        * we don't call debugfs_remove() but rather remove_recursive()
-        */
-       for (; table->procname; table++)
-               debugfs_create_file(table->procname, table->mode,
-                                   lnet_debugfs_root, table,
-                                   lnet_debugfs_fops_select(table->mode));
-
-       for (; symlinks && symlinks->name; symlinks++)
-               debugfs_create_symlink(symlinks->name, lnet_debugfs_root,
-                                      symlinks->target);
-}
-EXPORT_SYMBOL_GPL(lustre_insert_debugfs);
-
-static void lustre_remove_debugfs(void)
-{
-       debugfs_remove_recursive(lnet_debugfs_root);
-
-       lnet_debugfs_root = NULL;
-}
-
-static int libcfs_init(void)
-{
-       int rc;
-
-       rc = libcfs_debug_init(5 * 1024 * 1024);
-       if (rc < 0) {
-               pr_err("LustreError: libcfs_debug_init: %d\n", rc);
-               return rc;
-       }
-
-       rc = cfs_cpu_init();
-       if (rc != 0)
-               goto cleanup_debug;
-
-       rc = misc_register(&libcfs_dev);
-       if (rc) {
-               CERROR("misc_register: error %d\n", rc);
-               goto cleanup_cpu;
-       }
-
-       rc = cfs_wi_startup();
-       if (rc) {
-               CERROR("initialize workitem: error %d\n", rc);
-               goto cleanup_deregister;
-       }
-
-       /* max to 4 threads, should be enough for rehash */
-       rc = min(cfs_cpt_weight(cfs_cpt_table, CFS_CPT_ANY), 4);
-       rc = cfs_wi_sched_create("cfs_rh", cfs_cpt_table, CFS_CPT_ANY,
-                                rc, &cfs_sched_rehash);
-       if (rc != 0) {
-               CERROR("Startup workitem scheduler: error: %d\n", rc);
-               goto cleanup_deregister;
-       }
-
-       rc = cfs_crypto_register();
-       if (rc) {
-               CERROR("cfs_crypto_register: error %d\n", rc);
-               goto cleanup_wi;
-       }
-
-       lustre_insert_debugfs(lnet_table, lnet_debugfs_symlinks);
-
-       CDEBUG(D_OTHER, "portals setup OK\n");
-       return 0;
- cleanup_wi:
-       cfs_wi_shutdown();
- cleanup_deregister:
-       misc_deregister(&libcfs_dev);
-cleanup_cpu:
-       cfs_cpu_fini();
- cleanup_debug:
-       libcfs_debug_cleanup();
-       return rc;
-}
-
-static void libcfs_exit(void)
-{
-       int rc;
-
-       lustre_remove_debugfs();
-
-       if (cfs_sched_rehash) {
-               cfs_wi_sched_destroy(cfs_sched_rehash);
-               cfs_sched_rehash = NULL;
-       }
-
-       cfs_crypto_unregister();
-       cfs_wi_shutdown();
-
-       misc_deregister(&libcfs_dev);
-
-       cfs_cpu_fini();
-
-       rc = libcfs_debug_cleanup();
-       if (rc)
-               pr_err("LustreError: libcfs_debug_cleanup: %d\n", rc);
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre helper library");
-MODULE_VERSION(LIBCFS_VERSION);
-MODULE_LICENSE("GPL");
-
-module_init(libcfs_init);
-module_exit(libcfs_exit);
diff --git a/drivers/staging/lustre/lustre/libcfs/prng.c b/drivers/staging/lustre/lustre/libcfs/prng.c

deleted file mode 100644 (file)

index c75ae9a..0000000
--- a/drivers/staging/lustre/lustre/libcfs/prng.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/libcfs/prng.c
- *
- * concatenation of following two 16-bit multiply with carry generators
- * x(n)=a*x(n-1)+carry mod 2^16 and y(n)=b*y(n-1)+carry mod 2^16,
- * number and carry packed within the same 32 bit integer.
- * algorithm recommended by Marsaglia
-*/
-
-#include "../../include/linux/libcfs/libcfs.h"
-
-/*
- * From: George Marsaglia <geo@stat.fsu.edu>
- * Newsgroups: sci.math
- * Subject: Re: A RANDOM NUMBER GENERATOR FOR C
- * Date: Tue, 30 Sep 1997 05:29:35 -0700
- *
- * You may replace the two constants 36969 and 18000 by any
- * pair of distinct constants from this list:
- * 18000 18030 18273 18513 18879 19074 19098 19164 19215 19584
- * 19599 19950 20088 20508 20544 20664 20814 20970 21153 21243
- * 21423 21723 21954 22125 22188 22293 22860 22938 22965 22974
- * 23109 23124 23163 23208 23508 23520 23553 23658 23865 24114
- * 24219 24660 24699 24864 24948 25023 25308 25443 26004 26088
- * 26154 26550 26679 26838 27183 27258 27753 27795 27810 27834
- * 27960 28320 28380 28689 28710 28794 28854 28959 28980 29013
- * 29379 29889 30135 30345 30459 30714 30903 30963 31059 31083
- * (or any other 16-bit constants k for which both k*2^16-1
- * and k*2^15-1 are prime)
- */
-
-#define RANDOM_CONST_A 18030
-#define RANDOM_CONST_B 29013
-
-static unsigned int seed_x = 521288629;
-static unsigned int seed_y = 362436069;
-
-/**
- * cfs_rand - creates new seeds
- *
- * First it creates new seeds from the previous seeds. Then it generates a
- * new pseudo random number for use.
- *
- * Returns a pseudo-random 32-bit integer
- */
-unsigned int cfs_rand(void)
-{
-       seed_x = RANDOM_CONST_A * (seed_x & 65535) + (seed_x >> 16);
-       seed_y = RANDOM_CONST_B * (seed_y & 65535) + (seed_y >> 16);
-
-       return ((seed_x << 16) + (seed_y & 65535));
-}
-EXPORT_SYMBOL(cfs_rand);
-
-/**
- * cfs_srand - sets the initial seed
- * @seed1 : (seed_x) should have the most entropy in the low bits of the word
- * @seed2 : (seed_y) should have the most entropy in the high bits of the word
- *
- * Replaces the original seeds with new values. Used to generate a new pseudo
- * random numbers.
- */
-void cfs_srand(unsigned int seed1, unsigned int seed2)
-{
-       if (seed1)
-               seed_x = seed1; /* use default seeds if parameter is 0 */
-       if (seed2)
-               seed_y = seed2;
-}
-EXPORT_SYMBOL(cfs_srand);
-
-/**
- * cfs_get_random_bytes - generate a bunch of random numbers
- * @buf : buffer to fill with random numbers
- * @size: size of passed in buffer
- *
- * Fills a buffer with random bytes
- */
-void cfs_get_random_bytes(void *buf, int size)
-{
-       int *p = buf;
-       int rem, tmp;
-
-       LASSERT(size >= 0);
-
-       rem = min((int)((unsigned long)buf & (sizeof(int) - 1)), size);
-       if (rem) {
-               get_random_bytes(&tmp, sizeof(tmp));
-               tmp ^= cfs_rand();
-               memcpy(buf, &tmp, rem);
-               p = buf + rem;
-               size -= rem;
-       }
-
-       while (size >= sizeof(int)) {
-               get_random_bytes(&tmp, sizeof(tmp));
-               *p = cfs_rand() ^ tmp;
-               size -= sizeof(int);
-               p++;
-       }
-       buf = p;
-       if (size) {
-               get_random_bytes(&tmp, sizeof(tmp));
-               tmp ^= cfs_rand();
-               memcpy(buf, &tmp, size);
-       }
-}
-EXPORT_SYMBOL(cfs_get_random_bytes);
diff --git a/drivers/staging/lustre/lustre/libcfs/tracefile.c b/drivers/staging/lustre/lustre/libcfs/tracefile.c

deleted file mode 100644 (file)

index ec3bc04..0000000
--- a/drivers/staging/lustre/lustre/libcfs/tracefile.c
+++ /dev/null
@@ -1,1208 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/libcfs/tracefile.c
- *
- * Author: Zach Brown <zab@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#define LUSTRE_TRACEFILE_PRIVATE
-#include "tracefile.h"
-
-#include "../../include/linux/libcfs/libcfs.h"
-
-/* XXX move things up to the top, comment */
-union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned;
-
-char cfs_tracefile[TRACEFILE_NAME_SIZE];
-long long cfs_tracefile_size = CFS_TRACEFILE_SIZE;
-static struct tracefiled_ctl trace_tctl;
-static DEFINE_MUTEX(cfs_trace_thread_mutex);
-static int thread_running;
-
-static atomic_t cfs_tage_allocated = ATOMIC_INIT(0);
-
-struct page_collection {
-       struct list_head        pc_pages;
-       /*
-        * if this flag is set, collect_pages() will spill both
-        * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise,
-        * only ->tcd_pages are spilled.
-        */
-       int             pc_want_daemon_pages;
-};
-
-struct tracefiled_ctl {
-       struct completion       tctl_start;
-       struct completion       tctl_stop;
-       wait_queue_head_t               tctl_waitq;
-       pid_t                   tctl_pid;
-       atomic_t                tctl_shutdown;
-};
-
-/*
- * small data-structure for each page owned by tracefiled.
- */
-struct cfs_trace_page {
-       /*
-        * page itself
-        */
-       struct page       *page;
-       /*
-        * linkage into one of the lists in trace_data_union or
-        * page_collection
-        */
-       struct list_head           linkage;
-       /*
-        * number of bytes used within this page
-        */
-       unsigned int     used;
-       /*
-        * cpu that owns this page
-        */
-       unsigned short       cpu;
-       /*
-        * type(context) of this page
-        */
-       unsigned short       type;
-};
-
-static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
-                                        struct cfs_trace_cpu_data *tcd);
-
-static inline struct cfs_trace_page *
-cfs_tage_from_list(struct list_head *list)
-{
-       return list_entry(list, struct cfs_trace_page, linkage);
-}
-
-static struct cfs_trace_page *cfs_tage_alloc(gfp_t gfp)
-{
-       struct page         *page;
-       struct cfs_trace_page *tage;
-
-       /* My caller is trying to free memory */
-       if (!in_interrupt() && memory_pressure_get())
-               return NULL;
-
-       /*
-        * Don't spam console with allocation failures: they will be reported
-        * by upper layer anyway.
-        */
-       gfp |= __GFP_NOWARN;
-       page = alloc_page(gfp);
-       if (!page)
-               return NULL;
-
-       tage = kmalloc(sizeof(*tage), gfp);
-       if (!tage) {
-               __free_page(page);
-               return NULL;
-       }
-
-       tage->page = page;
-       atomic_inc(&cfs_tage_allocated);
-       return tage;
-}
-
-static void cfs_tage_free(struct cfs_trace_page *tage)
-{
-       __free_page(tage->page);
-       kfree(tage);
-       atomic_dec(&cfs_tage_allocated);
-}
-
-static void cfs_tage_to_tail(struct cfs_trace_page *tage,
-                            struct list_head *queue)
-{
-       list_move_tail(&tage->linkage, queue);
-}
-
-int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp,
-                          struct list_head *stock)
-{
-       int i;
-
-       /*
-        * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
-        * from here: this will lead to infinite recursion.
-        */
-
-       for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++i) {
-               struct cfs_trace_page *tage;
-
-               tage = cfs_tage_alloc(gfp);
-               if (!tage)
-                       break;
-               list_add_tail(&tage->linkage, stock);
-       }
-       return i;
-}
-
-/* return a page that has 'len' bytes left at the end */
-static struct cfs_trace_page *
-cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len)
-{
-       struct cfs_trace_page *tage;
-
-       if (tcd->tcd_cur_pages > 0) {
-               __LASSERT(!list_empty(&tcd->tcd_pages));
-               tage = cfs_tage_from_list(tcd->tcd_pages.prev);
-               if (tage->used + len <= PAGE_CACHE_SIZE)
-                       return tage;
-       }
-
-       if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
-               if (tcd->tcd_cur_stock_pages > 0) {
-                       tage = cfs_tage_from_list(tcd->tcd_stock_pages.prev);
-                       --tcd->tcd_cur_stock_pages;
-                       list_del_init(&tage->linkage);
-               } else {
-                       tage = cfs_tage_alloc(GFP_ATOMIC);
-                       if (unlikely(!tage)) {
-                               if ((!memory_pressure_get() ||
-                                    in_interrupt()) && printk_ratelimit())
-                                       printk(KERN_WARNING
-                                              "cannot allocate a tage (%ld)\n",
-                                              tcd->tcd_cur_pages);
-                               return NULL;
-                       }
-               }
-
-               tage->used = 0;
-               tage->cpu = smp_processor_id();
-               tage->type = tcd->tcd_type;
-               list_add_tail(&tage->linkage, &tcd->tcd_pages);
-               tcd->tcd_cur_pages++;
-
-               if (tcd->tcd_cur_pages > 8 && thread_running) {
-                       struct tracefiled_ctl *tctl = &trace_tctl;
-                       /*
-                        * wake up tracefiled to process some pages.
-                        */
-                       wake_up(&tctl->tctl_waitq);
-               }
-               return tage;
-       }
-       return NULL;
-}
-
-static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd)
-{
-       int pgcount = tcd->tcd_cur_pages / 10;
-       struct page_collection pc;
-       struct cfs_trace_page *tage;
-       struct cfs_trace_page *tmp;
-
-       /*
-        * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
-        * from here: this will lead to infinite recursion.
-        */
-
-       if (printk_ratelimit())
-               printk(KERN_WARNING "debug daemon buffer overflowed; discarding 10%% of pages (%d of %ld)\n",
-                      pgcount + 1, tcd->tcd_cur_pages);
-
-       INIT_LIST_HEAD(&pc.pc_pages);
-
-       list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
-               if (pgcount-- == 0)
-                       break;
-
-               list_move_tail(&tage->linkage, &pc.pc_pages);
-               tcd->tcd_cur_pages--;
-       }
-       put_pages_on_tcd_daemon_list(&pc, tcd);
-}
-
-/* return a page that has 'len' bytes left at the end */
-static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd,
-                                                unsigned long len)
-{
-       struct cfs_trace_page *tage;
-
-       /*
-        * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
-        * from here: this will lead to infinite recursion.
-        */
-
-       if (len > PAGE_CACHE_SIZE) {
-               pr_err("cowardly refusing to write %lu bytes in a page\n", len);
-               return NULL;
-       }
-
-       tage = cfs_trace_get_tage_try(tcd, len);
-       if (tage)
-               return tage;
-       if (thread_running)
-               cfs_tcd_shrink(tcd);
-       if (tcd->tcd_cur_pages > 0) {
-               tage = cfs_tage_from_list(tcd->tcd_pages.next);
-               tage->used = 0;
-               cfs_tage_to_tail(tage, &tcd->tcd_pages);
-       }
-       return tage;
-}
-
-int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
-                    const char *format, ...)
-{
-       va_list args;
-       int     rc;
-
-       va_start(args, format);
-       rc = libcfs_debug_vmsg2(msgdata, format, args, NULL);
-       va_end(args);
-
-       return rc;
-}
-EXPORT_SYMBOL(libcfs_debug_msg);
-
-int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
-                      const char *format1, va_list args,
-                      const char *format2, ...)
-{
-       struct cfs_trace_cpu_data *tcd = NULL;
-       struct ptldebug_header     header = {0};
-       struct cfs_trace_page     *tage;
-       /* string_buf is used only if tcd != NULL, and is always set then */
-       char                  *string_buf = NULL;
-       char                  *debug_buf;
-       int                     known_size;
-       int                     needed = 85; /* average message length */
-       int                     max_nob;
-       va_list             ap;
-       int                     depth;
-       int                     i;
-       int                     remain;
-       int                     mask = msgdata->msg_mask;
-       const char              *file = kbasename(msgdata->msg_file);
-       struct cfs_debug_limit_state   *cdls = msgdata->msg_cdls;
-
-       tcd = cfs_trace_get_tcd();
-
-       /* cfs_trace_get_tcd() grabs a lock, which disables preemption and
-        * pins us to a particular CPU.  This avoids an smp_processor_id()
-        * warning on Linux when debugging is enabled.
-        */
-       cfs_set_ptldebug_header(&header, msgdata, CDEBUG_STACK());
-
-       if (!tcd)               /* arch may not log in IRQ context */
-               goto console;
-
-       if (tcd->tcd_cur_pages == 0)
-               header.ph_flags |= PH_FLAG_FIRST_RECORD;
-
-       if (tcd->tcd_shutting_down) {
-               cfs_trace_put_tcd(tcd);
-               tcd = NULL;
-               goto console;
-       }
-
-       depth = __current_nesting_level();
-       known_size = strlen(file) + 1 + depth;
-       if (msgdata->msg_fn)
-               known_size += strlen(msgdata->msg_fn) + 1;
-
-       if (libcfs_debug_binary)
-               known_size += sizeof(header);
-
-       /*
-        * '2' used because vsnprintf return real size required for output
-        * _without_ terminating NULL.
-        * if needed is to small for this format.
-        */
-       for (i = 0; i < 2; i++) {
-               tage = cfs_trace_get_tage(tcd, needed + known_size + 1);
-               if (!tage) {
-                       if (needed + known_size > PAGE_CACHE_SIZE)
-                               mask |= D_ERROR;
-
-                       cfs_trace_put_tcd(tcd);
-                       tcd = NULL;
-                       goto console;
-               }
-
-               string_buf = (char *)page_address(tage->page) +
-                                       tage->used + known_size;
-
-               max_nob = PAGE_CACHE_SIZE - tage->used - known_size;
-               if (max_nob <= 0) {
-                       printk(KERN_EMERG "negative max_nob: %d\n",
-                              max_nob);
-                       mask |= D_ERROR;
-                       cfs_trace_put_tcd(tcd);
-                       tcd = NULL;
-                       goto console;
-               }
-
-               needed = 0;
-               if (format1) {
-                       va_copy(ap, args);
-                       needed = vsnprintf(string_buf, max_nob, format1, ap);
-                       va_end(ap);
-               }
-
-               if (format2) {
-                       remain = max_nob - needed;
-                       if (remain < 0)
-                               remain = 0;
-
-                       va_start(ap, format2);
-                       needed += vsnprintf(string_buf + needed, remain,
-                                           format2, ap);
-                       va_end(ap);
-               }
-
-               if (needed < max_nob) /* well. printing ok.. */
-                       break;
-       }
-
-       if (*(string_buf + needed - 1) != '\n')
-               printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
-                      file, msgdata->msg_line, msgdata->msg_fn);
-
-       header.ph_len = known_size + needed;
-       debug_buf = (char *)page_address(tage->page) + tage->used;
-
-       if (libcfs_debug_binary) {
-               memcpy(debug_buf, &header, sizeof(header));
-               tage->used += sizeof(header);
-               debug_buf += sizeof(header);
-       }
-
-       /* indent message according to the nesting level */
-       while (depth-- > 0) {
-               *(debug_buf++) = '.';
-               ++tage->used;
-       }
-
-       strcpy(debug_buf, file);
-       tage->used += strlen(file) + 1;
-       debug_buf += strlen(file) + 1;
-
-       if (msgdata->msg_fn) {
-               strcpy(debug_buf, msgdata->msg_fn);
-               tage->used += strlen(msgdata->msg_fn) + 1;
-               debug_buf += strlen(msgdata->msg_fn) + 1;
-       }
-
-       __LASSERT(debug_buf == string_buf);
-
-       tage->used += needed;
-       __LASSERT(tage->used <= PAGE_CACHE_SIZE);
-
-console:
-       if ((mask & libcfs_printk) == 0) {
-               /* no console output requested */
-               if (tcd)
-                       cfs_trace_put_tcd(tcd);
-               return 1;
-       }
-
-       if (cdls) {
-               if (libcfs_console_ratelimit &&
-                   cdls->cdls_next != 0 &&     /* not first time ever */
-                   !cfs_time_after(cfs_time_current(), cdls->cdls_next)) {
-                       /* skipping a console message */
-                       cdls->cdls_count++;
-                       if (tcd)
-                               cfs_trace_put_tcd(tcd);
-                       return 1;
-               }
-
-               if (cfs_time_after(cfs_time_current(),
-                                  cdls->cdls_next + libcfs_console_max_delay +
-                                  cfs_time_seconds(10))) {
-                       /* last timeout was a long time ago */
-                       cdls->cdls_delay /= libcfs_console_backoff * 4;
-               } else {
-                       cdls->cdls_delay *= libcfs_console_backoff;
-               }
-
-               if (cdls->cdls_delay < libcfs_console_min_delay)
-                       cdls->cdls_delay = libcfs_console_min_delay;
-               else if (cdls->cdls_delay > libcfs_console_max_delay)
-                       cdls->cdls_delay = libcfs_console_max_delay;
-
-               /* ensure cdls_next is never zero after it's been seen */
-               cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1;
-       }
-
-       if (tcd) {
-               cfs_print_to_console(&header, mask, string_buf, needed, file,
-                                    msgdata->msg_fn);
-               cfs_trace_put_tcd(tcd);
-       } else {
-               string_buf = cfs_trace_get_console_buffer();
-
-               needed = 0;
-               if (format1) {
-                       va_copy(ap, args);
-                       needed = vsnprintf(string_buf,
-                                          CFS_TRACE_CONSOLE_BUFFER_SIZE,
-                                          format1, ap);
-                       va_end(ap);
-               }
-               if (format2) {
-                       remain = CFS_TRACE_CONSOLE_BUFFER_SIZE - needed;
-                       if (remain > 0) {
-                               va_start(ap, format2);
-                               needed += vsnprintf(string_buf + needed, remain,
-                                                   format2, ap);
-                               va_end(ap);
-                       }
-               }
-               cfs_print_to_console(&header, mask,
-                                    string_buf, needed, file, msgdata->msg_fn);
-
-               put_cpu();
-       }
-
-       if (cdls && cdls->cdls_count != 0) {
-               string_buf = cfs_trace_get_console_buffer();
-
-               needed = snprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE,
-                                 "Skipped %d previous similar message%s\n",
-                                 cdls->cdls_count,
-                                 (cdls->cdls_count > 1) ? "s" : "");
-
-               cfs_print_to_console(&header, mask,
-                                    string_buf, needed, file, msgdata->msg_fn);
-
-               put_cpu();
-               cdls->cdls_count = 0;
-       }
-
-       return 0;
-}
-EXPORT_SYMBOL(libcfs_debug_vmsg2);
-
-void
-cfs_trace_assertion_failed(const char *str,
-                          struct libcfs_debug_msg_data *msgdata)
-{
-       struct ptldebug_header hdr;
-
-       libcfs_panic_in_progress = 1;
-       libcfs_catastrophe = 1;
-       mb();
-
-       cfs_set_ptldebug_header(&hdr, msgdata, CDEBUG_STACK());
-
-       cfs_print_to_console(&hdr, D_EMERG, str, strlen(str),
-                            msgdata->msg_file, msgdata->msg_fn);
-
-       panic("Lustre debug assertion failure\n");
-
-       /* not reached */
-}
-
-static void
-panic_collect_pages(struct page_collection *pc)
-{
-       /* Do the collect_pages job on a single CPU: assumes that all other
-        * CPUs have been stopped during a panic.  If this isn't true for some
-        * arch, this will have to be implemented separately in each arch.
-        */
-       int                     i;
-       int                     j;
-       struct cfs_trace_cpu_data *tcd;
-
-       INIT_LIST_HEAD(&pc->pc_pages);
-
-       cfs_tcd_for_each(tcd, i, j) {
-               list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
-               tcd->tcd_cur_pages = 0;
-
-               if (pc->pc_want_daemon_pages) {
-                       list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages);
-                       tcd->tcd_cur_daemon_pages = 0;
-               }
-       }
-}
-
-static void collect_pages_on_all_cpus(struct page_collection *pc)
-{
-       struct cfs_trace_cpu_data *tcd;
-       int i, cpu;
-
-       for_each_possible_cpu(cpu) {
-               cfs_tcd_for_each_type_lock(tcd, i, cpu) {
-                       list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
-                       tcd->tcd_cur_pages = 0;
-                       if (pc->pc_want_daemon_pages) {
-                               list_splice_init(&tcd->tcd_daemon_pages,
-                                                &pc->pc_pages);
-                               tcd->tcd_cur_daemon_pages = 0;
-                       }
-               }
-       }
-}
-
-static void collect_pages(struct page_collection *pc)
-{
-       INIT_LIST_HEAD(&pc->pc_pages);
-
-       if (libcfs_panic_in_progress)
-               panic_collect_pages(pc);
-       else
-               collect_pages_on_all_cpus(pc);
-}
-
-static void put_pages_back_on_all_cpus(struct page_collection *pc)
-{
-       struct cfs_trace_cpu_data *tcd;
-       struct list_head *cur_head;
-       struct cfs_trace_page *tage;
-       struct cfs_trace_page *tmp;
-       int i, cpu;
-
-       for_each_possible_cpu(cpu) {
-               cfs_tcd_for_each_type_lock(tcd, i, cpu) {
-                       cur_head = tcd->tcd_pages.next;
-
-                       list_for_each_entry_safe(tage, tmp, &pc->pc_pages,
-                                                linkage) {
-                               __LASSERT_TAGE_INVARIANT(tage);
-
-                               if (tage->cpu != cpu || tage->type != i)
-                                       continue;
-
-                               cfs_tage_to_tail(tage, cur_head);
-                               tcd->tcd_cur_pages++;
-                       }
-               }
-       }
-}
-
-static void put_pages_back(struct page_collection *pc)
-{
-       if (!libcfs_panic_in_progress)
-               put_pages_back_on_all_cpus(pc);
-}
-
-/* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
- * we have a good amount of data at all times for dumping during an LBUG, even
- * if we have been steadily writing (and otherwise discarding) pages via the
- * debug daemon.
- */
-static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
-                                        struct cfs_trace_cpu_data *tcd)
-{
-       struct cfs_trace_page *tage;
-       struct cfs_trace_page *tmp;
-
-       list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
-               __LASSERT_TAGE_INVARIANT(tage);
-
-               if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type)
-                       continue;
-
-               cfs_tage_to_tail(tage, &tcd->tcd_daemon_pages);
-               tcd->tcd_cur_daemon_pages++;
-
-               if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
-                       struct cfs_trace_page *victim;
-
-                       __LASSERT(!list_empty(&tcd->tcd_daemon_pages));
-                       victim = cfs_tage_from_list(tcd->tcd_daemon_pages.next);
-
-                       __LASSERT_TAGE_INVARIANT(victim);
-
-                       list_del(&victim->linkage);
-                       cfs_tage_free(victim);
-                       tcd->tcd_cur_daemon_pages--;
-               }
-       }
-}
-
-static void put_pages_on_daemon_list(struct page_collection *pc)
-{
-       struct cfs_trace_cpu_data *tcd;
-       int i, cpu;
-
-       for_each_possible_cpu(cpu) {
-               cfs_tcd_for_each_type_lock(tcd, i, cpu)
-                       put_pages_on_tcd_daemon_list(pc, tcd);
-       }
-}
-
-void cfs_trace_debug_print(void)
-{
-       struct page_collection pc;
-       struct cfs_trace_page *tage;
-       struct cfs_trace_page *tmp;
-
-       pc.pc_want_daemon_pages = 1;
-       collect_pages(&pc);
-       list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
-               char *p, *file, *fn;
-               struct page *page;
-
-               __LASSERT_TAGE_INVARIANT(tage);
-
-               page = tage->page;
-               p = page_address(page);
-               while (p < ((char *)page_address(page) + tage->used)) {
-                       struct ptldebug_header *hdr;
-                       int len;
-
-                       hdr = (void *)p;
-                       p += sizeof(*hdr);
-                       file = p;
-                       p += strlen(file) + 1;
-                       fn = p;
-                       p += strlen(fn) + 1;
-                       len = hdr->ph_len - (int)(p - (char *)hdr);
-
-                       cfs_print_to_console(hdr, D_EMERG, p, len, file, fn);
-
-                       p += len;
-               }
-
-               list_del(&tage->linkage);
-               cfs_tage_free(tage);
-       }
-}
-
-int cfs_tracefile_dump_all_pages(char *filename)
-{
-       struct page_collection  pc;
-       struct file             *filp;
-       struct cfs_trace_page   *tage;
-       struct cfs_trace_page   *tmp;
-       char                    *buf;
-       int rc;
-
-       DECL_MMSPACE;
-
-       cfs_tracefile_write_lock();
-
-       filp = filp_open(filename, O_CREAT | O_EXCL | O_WRONLY | O_LARGEFILE,
-                        0600);
-       if (IS_ERR(filp)) {
-               rc = PTR_ERR(filp);
-               filp = NULL;
-               pr_err("LustreError: can't open %s for dump: rc %d\n",
-                      filename, rc);
-               goto out;
-       }
-
-       pc.pc_want_daemon_pages = 1;
-       collect_pages(&pc);
-       if (list_empty(&pc.pc_pages)) {
-               rc = 0;
-               goto close;
-       }
-
-       /* ok, for now, just write the pages.  in the future we'll be building
-        * iobufs with the pages and calling generic_direct_IO
-        */
-       MMSPACE_OPEN;
-       list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
-               __LASSERT_TAGE_INVARIANT(tage);
-
-               buf = kmap(tage->page);
-               rc = vfs_write(filp, (__force const char __user *)buf,
-                              tage->used, &filp->f_pos);
-               kunmap(tage->page);
-
-               if (rc != (int)tage->used) {
-                       printk(KERN_WARNING "wanted to write %u but wrote %d\n",
-                              tage->used, rc);
-                       put_pages_back(&pc);
-                       __LASSERT(list_empty(&pc.pc_pages));
-                       break;
-               }
-               list_del(&tage->linkage);
-               cfs_tage_free(tage);
-       }
-       MMSPACE_CLOSE;
-       rc = vfs_fsync(filp, 1);
-       if (rc)
-               pr_err("sync returns %d\n", rc);
-close:
-       filp_close(filp, NULL);
-out:
-       cfs_tracefile_write_unlock();
-       return rc;
-}
-
-void cfs_trace_flush_pages(void)
-{
-       struct page_collection pc;
-       struct cfs_trace_page *tage;
-       struct cfs_trace_page *tmp;
-
-       pc.pc_want_daemon_pages = 1;
-       collect_pages(&pc);
-       list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
-               __LASSERT_TAGE_INVARIANT(tage);
-
-               list_del(&tage->linkage);
-               cfs_tage_free(tage);
-       }
-}
-
-int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
-                           const char __user *usr_buffer, int usr_buffer_nob)
-{
-       int    nob;
-
-       if (usr_buffer_nob > knl_buffer_nob)
-               return -EOVERFLOW;
-
-       if (copy_from_user((void *)knl_buffer,
-                          usr_buffer, usr_buffer_nob))
-               return -EFAULT;
-
-       nob = strnlen(knl_buffer, usr_buffer_nob);
-       while (nob-- >= 0)                    /* strip trailing whitespace */
-               if (!isspace(knl_buffer[nob]))
-                       break;
-
-       if (nob < 0)                        /* empty string */
-               return -EINVAL;
-
-       if (nob == knl_buffer_nob)            /* no space to terminate */
-               return -EOVERFLOW;
-
-       knl_buffer[nob + 1] = 0;                /* terminate */
-       return 0;
-}
-EXPORT_SYMBOL(cfs_trace_copyin_string);
-
-int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
-                            const char *knl_buffer, char *append)
-{
-       /*
-        * NB if 'append' != NULL, it's a single character to append to the
-        * copied out string - usually "\n" or "" (i.e. a terminating zero byte)
-        */
-       int   nob = strlen(knl_buffer);
-
-       if (nob > usr_buffer_nob)
-               nob = usr_buffer_nob;
-
-       if (copy_to_user(usr_buffer, knl_buffer, nob))
-               return -EFAULT;
-
-       if (append && nob < usr_buffer_nob) {
-               if (copy_to_user(usr_buffer + nob, append, 1))
-                       return -EFAULT;
-
-               nob++;
-       }
-
-       return nob;
-}
-EXPORT_SYMBOL(cfs_trace_copyout_string);
-
-int cfs_trace_allocate_string_buffer(char **str, int nob)
-{
-       if (nob > 2 * PAGE_CACHE_SIZE)      /* string must be "sensible" */
-               return -EINVAL;
-
-       *str = kmalloc(nob, GFP_KERNEL | __GFP_ZERO);
-       if (!*str)
-               return -ENOMEM;
-
-       return 0;
-}
-
-int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob)
-{
-       char     *str;
-       int        rc;
-
-       rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
-       if (rc != 0)
-               return rc;
-
-       rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
-                                    usr_str, usr_str_nob);
-       if (rc != 0)
-               goto out;
-
-       if (str[0] != '/') {
-               rc = -EINVAL;
-               goto out;
-       }
-       rc = cfs_tracefile_dump_all_pages(str);
-out:
-       kfree(str);
-       return rc;
-}
-
-int cfs_trace_daemon_command(char *str)
-{
-       int       rc = 0;
-
-       cfs_tracefile_write_lock();
-
-       if (strcmp(str, "stop") == 0) {
-               cfs_tracefile_write_unlock();
-               cfs_trace_stop_thread();
-               cfs_tracefile_write_lock();
-               memset(cfs_tracefile, 0, sizeof(cfs_tracefile));
-
-       } else if (strncmp(str, "size=", 5) == 0) {
-               unsigned long tmp;
-
-               rc = kstrtoul(str + 5, 10, &tmp);
-               if (!rc) {
-                       if (tmp < 10 || tmp > 20480)
-                               cfs_tracefile_size = CFS_TRACEFILE_SIZE;
-                       else
-                               cfs_tracefile_size = tmp << 20;
-               }
-       } else if (strlen(str) >= sizeof(cfs_tracefile)) {
-               rc = -ENAMETOOLONG;
-       } else if (str[0] != '/') {
-               rc = -EINVAL;
-       } else {
-               strcpy(cfs_tracefile, str);
-
-               printk(KERN_INFO
-                      "Lustre: debug daemon will attempt to start writing to %s (%lukB max)\n",
-                      cfs_tracefile,
-                      (long)(cfs_tracefile_size >> 10));
-
-               cfs_trace_start_thread();
-       }
-
-       cfs_tracefile_write_unlock();
-       return rc;
-}
-
-int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob)
-{
-       char *str;
-       int   rc;
-
-       rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
-       if (rc != 0)
-               return rc;
-
-       rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
-                                    usr_str, usr_str_nob);
-       if (rc == 0)
-               rc = cfs_trace_daemon_command(str);
-
-       kfree(str);
-       return rc;
-}
-
-int cfs_trace_set_debug_mb(int mb)
-{
-       int i;
-       int j;
-       int pages;
-       int limit = cfs_trace_max_debug_mb();
-       struct cfs_trace_cpu_data *tcd;
-
-       if (mb < num_possible_cpus()) {
-               printk(KERN_WARNING
-                      "Lustre: %d MB is too small for debug buffer size, setting it to %d MB.\n",
-                      mb, num_possible_cpus());
-               mb = num_possible_cpus();
-       }
-
-       if (mb > limit) {
-               printk(KERN_WARNING
-                      "Lustre: %d MB is too large for debug buffer size, setting it to %d MB.\n",
-                      mb, limit);
-               mb = limit;
-       }
-
-       mb /= num_possible_cpus();
-       pages = mb << (20 - PAGE_CACHE_SHIFT);
-
-       cfs_tracefile_write_lock();
-
-       cfs_tcd_for_each(tcd, i, j)
-               tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
-
-       cfs_tracefile_write_unlock();
-
-       return 0;
-}
-
-int cfs_trace_get_debug_mb(void)
-{
-       int i;
-       int j;
-       struct cfs_trace_cpu_data *tcd;
-       int total_pages = 0;
-
-       cfs_tracefile_read_lock();
-
-       cfs_tcd_for_each(tcd, i, j)
-               total_pages += tcd->tcd_max_pages;
-
-       cfs_tracefile_read_unlock();
-
-       return (total_pages >> (20 - PAGE_CACHE_SHIFT)) + 1;
-}
-
-static int tracefiled(void *arg)
-{
-       struct page_collection pc;
-       struct tracefiled_ctl *tctl = arg;
-       struct cfs_trace_page *tage;
-       struct cfs_trace_page *tmp;
-       struct file *filp;
-       char *buf;
-       int last_loop = 0;
-       int rc;
-
-       DECL_MMSPACE;
-
-       /* we're started late enough that we pick up init's fs context */
-       /* this is so broken in uml?  what on earth is going on? */
-
-       complete(&tctl->tctl_start);
-
-       while (1) {
-               wait_queue_t __wait;
-
-               pc.pc_want_daemon_pages = 0;
-               collect_pages(&pc);
-               if (list_empty(&pc.pc_pages))
-                       goto end_loop;
-
-               filp = NULL;
-               cfs_tracefile_read_lock();
-               if (cfs_tracefile[0] != 0) {
-                       filp = filp_open(cfs_tracefile,
-                                        O_CREAT | O_RDWR | O_LARGEFILE,
-                                        0600);
-                       if (IS_ERR(filp)) {
-                               rc = PTR_ERR(filp);
-                               filp = NULL;
-                               printk(KERN_WARNING "couldn't open %s: %d\n",
-                                      cfs_tracefile, rc);
-                       }
-               }
-               cfs_tracefile_read_unlock();
-               if (!filp) {
-                       put_pages_on_daemon_list(&pc);
-                       __LASSERT(list_empty(&pc.pc_pages));
-                       goto end_loop;
-               }
-
-               MMSPACE_OPEN;
-
-               list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
-                       static loff_t f_pos;
-
-                       __LASSERT_TAGE_INVARIANT(tage);
-
-                       if (f_pos >= (off_t)cfs_tracefile_size)
-                               f_pos = 0;
-                       else if (f_pos > i_size_read(file_inode(filp)))
-                               f_pos = i_size_read(file_inode(filp));
-
-                       buf = kmap(tage->page);
-                       rc = vfs_write(filp, (__force const char __user *)buf,
-                                      tage->used, &f_pos);
-                       kunmap(tage->page);
-
-                       if (rc != (int)tage->used) {
-                               printk(KERN_WARNING "wanted to write %u but wrote %d\n",
-                                      tage->used, rc);
-                               put_pages_back(&pc);
-                               __LASSERT(list_empty(&pc.pc_pages));
-                               break;
-                       }
-               }
-               MMSPACE_CLOSE;
-
-               filp_close(filp, NULL);
-               put_pages_on_daemon_list(&pc);
-               if (!list_empty(&pc.pc_pages)) {
-                       int i;
-
-                       printk(KERN_ALERT "Lustre: trace pages aren't empty\n");
-                       pr_err("total cpus(%d): ", num_possible_cpus());
-                       for (i = 0; i < num_possible_cpus(); i++)
-                               if (cpu_online(i))
-                                       pr_cont("%d(on) ", i);
-                               else
-                                       pr_cont("%d(off) ", i);
-                       pr_cont("\n");
-
-                       i = 0;
-                       list_for_each_entry_safe(tage, tmp, &pc.pc_pages,
-                                                linkage)
-                               pr_err("page %d belongs to cpu %d\n",
-                                      ++i, tage->cpu);
-                       pr_err("There are %d pages unwritten\n", i);
-               }
-               __LASSERT(list_empty(&pc.pc_pages));
-end_loop:
-               if (atomic_read(&tctl->tctl_shutdown)) {
-                       if (last_loop == 0) {
-                               last_loop = 1;
-                               continue;
-                       } else {
-                               break;
-                       }
-               }
-               init_waitqueue_entry(&__wait, current);
-               add_wait_queue(&tctl->tctl_waitq, &__wait);
-               set_current_state(TASK_INTERRUPTIBLE);
-               schedule_timeout(cfs_time_seconds(1));
-               remove_wait_queue(&tctl->tctl_waitq, &__wait);
-       }
-       complete(&tctl->tctl_stop);
-       return 0;
-}
-
-int cfs_trace_start_thread(void)
-{
-       struct tracefiled_ctl *tctl = &trace_tctl;
-       struct task_struct *task;
-       int rc = 0;
-
-       mutex_lock(&cfs_trace_thread_mutex);
-       if (thread_running)
-               goto out;
-
-       init_completion(&tctl->tctl_start);
-       init_completion(&tctl->tctl_stop);
-       init_waitqueue_head(&tctl->tctl_waitq);
-       atomic_set(&tctl->tctl_shutdown, 0);
-
-       task = kthread_run(tracefiled, tctl, "ktracefiled");
-       if (IS_ERR(task)) {
-               rc = PTR_ERR(task);
-               goto out;
-       }
-
-       wait_for_completion(&tctl->tctl_start);
-       thread_running = 1;
-out:
-       mutex_unlock(&cfs_trace_thread_mutex);
-       return rc;
-}
-
-void cfs_trace_stop_thread(void)
-{
-       struct tracefiled_ctl *tctl = &trace_tctl;
-
-       mutex_lock(&cfs_trace_thread_mutex);
-       if (thread_running) {
-               printk(KERN_INFO
-                      "Lustre: shutting down debug daemon thread...\n");
-               atomic_set(&tctl->tctl_shutdown, 1);
-               wait_for_completion(&tctl->tctl_stop);
-               thread_running = 0;
-       }
-       mutex_unlock(&cfs_trace_thread_mutex);
-}
-
-int cfs_tracefile_init(int max_pages)
-{
-       struct cfs_trace_cpu_data *tcd;
-       int                 i;
-       int                 j;
-       int                 rc;
-       int                 factor;
-
-       rc = cfs_tracefile_init_arch();
-       if (rc != 0)
-               return rc;
-
-       cfs_tcd_for_each(tcd, i, j) {
-               /* tcd_pages_factor is initialized int tracefile_init_arch. */
-               factor = tcd->tcd_pages_factor;
-               INIT_LIST_HEAD(&tcd->tcd_pages);
-               INIT_LIST_HEAD(&tcd->tcd_stock_pages);
-               INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
-               tcd->tcd_cur_pages = 0;
-               tcd->tcd_cur_stock_pages = 0;
-               tcd->tcd_cur_daemon_pages = 0;
-               tcd->tcd_max_pages = (max_pages * factor) / 100;
-               LASSERT(tcd->tcd_max_pages > 0);
-               tcd->tcd_shutting_down = 0;
-       }
-
-       return 0;
-}
-
-static void trace_cleanup_on_all_cpus(void)
-{
-       struct cfs_trace_cpu_data *tcd;
-       struct cfs_trace_page *tage;
-       struct cfs_trace_page *tmp;
-       int i, cpu;
-
-       for_each_possible_cpu(cpu) {
-               cfs_tcd_for_each_type_lock(tcd, i, cpu) {
-                       tcd->tcd_shutting_down = 1;
-
-                       list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages,
-                                                linkage) {
-                               __LASSERT_TAGE_INVARIANT(tage);
-
-                               list_del(&tage->linkage);
-                               cfs_tage_free(tage);
-                       }
-
-                       tcd->tcd_cur_pages = 0;
-               }
-       }
-}
-
-static void cfs_trace_cleanup(void)
-{
-       struct page_collection pc;
-
-       INIT_LIST_HEAD(&pc.pc_pages);
-
-       trace_cleanup_on_all_cpus();
-
-       cfs_tracefile_fini_arch();
-}
-
-void cfs_tracefile_exit(void)
-{
-       cfs_trace_stop_thread();
-       cfs_trace_cleanup();
-}
diff --git a/drivers/staging/lustre/lustre/libcfs/tracefile.h b/drivers/staging/lustre/lustre/libcfs/tracefile.h

deleted file mode 100644 (file)

index 4c77f90..0000000
--- a/drivers/staging/lustre/lustre/libcfs/tracefile.h
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef __LIBCFS_TRACEFILE_H__
-#define __LIBCFS_TRACEFILE_H__
-
-#include "../../include/linux/libcfs/libcfs.h"
-
-enum cfs_trace_buf_type {
-       CFS_TCD_TYPE_PROC = 0,
-       CFS_TCD_TYPE_SOFTIRQ,
-       CFS_TCD_TYPE_IRQ,
-       CFS_TCD_TYPE_MAX
-};
-
-/* trace file lock routines */
-
-#define TRACEFILE_NAME_SIZE 1024
-extern char      cfs_tracefile[TRACEFILE_NAME_SIZE];
-extern long long cfs_tracefile_size;
-
-void libcfs_run_debug_log_upcall(char *file);
-
-int  cfs_tracefile_init_arch(void);
-void cfs_tracefile_fini_arch(void);
-
-void cfs_tracefile_read_lock(void);
-void cfs_tracefile_read_unlock(void);
-void cfs_tracefile_write_lock(void);
-void cfs_tracefile_write_unlock(void);
-
-int cfs_tracefile_dump_all_pages(char *filename);
-void cfs_trace_debug_print(void);
-void cfs_trace_flush_pages(void);
-int cfs_trace_start_thread(void);
-void cfs_trace_stop_thread(void);
-int cfs_tracefile_init(int max_pages);
-void cfs_tracefile_exit(void);
-
-int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
-                           const char __user *usr_buffer, int usr_buffer_nob);
-int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
-                            const char *knl_str, char *append);
-int cfs_trace_allocate_string_buffer(char **str, int nob);
-int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob);
-int cfs_trace_daemon_command(char *str);
-int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob);
-int cfs_trace_set_debug_mb(int mb);
-int cfs_trace_get_debug_mb(void);
-
-void libcfs_debug_dumplog_internal(void *arg);
-void libcfs_register_panic_notifier(void);
-void libcfs_unregister_panic_notifier(void);
-extern int  libcfs_panic_in_progress;
-int cfs_trace_max_debug_mb(void);
-
-#define TCD_MAX_PAGES (5 << (20 - PAGE_CACHE_SHIFT))
-#define TCD_STOCK_PAGES (TCD_MAX_PAGES)
-#define CFS_TRACEFILE_SIZE (500 << 20)
-
-#ifdef LUSTRE_TRACEFILE_PRIVATE
-
-/*
- * Private declare for tracefile
- */
-#define TCD_MAX_PAGES (5 << (20 - PAGE_CACHE_SHIFT))
-#define TCD_STOCK_PAGES (TCD_MAX_PAGES)
-
-#define CFS_TRACEFILE_SIZE (500 << 20)
-
-/*
- * Size of a buffer for sprinting console messages if we can't get a page
- * from system
- */
-#define CFS_TRACE_CONSOLE_BUFFER_SIZE   1024
-
-union cfs_trace_data_union {
-       struct cfs_trace_cpu_data {
-               /*
-                * Even though this structure is meant to be per-CPU, locking
-                * is needed because in some places the data may be accessed
-                * from other CPUs. This lock is directly used in trace_get_tcd
-                * and trace_put_tcd, which are called in libcfs_debug_vmsg2 and
-                * tcd_for_each_type_lock
-                */
-               spinlock_t              tcd_lock;
-               unsigned long      tcd_lock_flags;
-
-               /*
-                * pages with trace records not yet processed by tracefiled.
-                */
-               struct list_head              tcd_pages;
-               /* number of pages on ->tcd_pages */
-               unsigned long      tcd_cur_pages;
-
-               /*
-                * pages with trace records already processed by
-                * tracefiled. These pages are kept in memory, so that some
-                * portion of log can be written in the event of LBUG. This
-                * list is maintained in LRU order.
-                *
-                * Pages are moved to ->tcd_daemon_pages by tracefiled()
-                * (put_pages_on_daemon_list()). LRU pages from this list are
-                * discarded when list grows too large.
-                */
-               struct list_head              tcd_daemon_pages;
-               /* number of pages on ->tcd_daemon_pages */
-               unsigned long      tcd_cur_daemon_pages;
-
-               /*
-                * Maximal number of pages allowed on ->tcd_pages and
-                * ->tcd_daemon_pages each.
-                * Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current
-                * implementation.
-                */
-               unsigned long      tcd_max_pages;
-
-               /*
-                * preallocated pages to write trace records into. Pages from
-                * ->tcd_stock_pages are moved to ->tcd_pages by
-                * portals_debug_msg().
-                *
-                * This list is necessary, because on some platforms it's
-                * impossible to perform efficient atomic page allocation in a
-                * non-blockable context.
-                *
-                * Such platforms fill ->tcd_stock_pages "on occasion", when
-                * tracing code is entered in blockable context.
-                *
-                * trace_get_tage_try() tries to get a page from
-                * ->tcd_stock_pages first and resorts to atomic page
-                * allocation only if this queue is empty. ->tcd_stock_pages
-                * is replenished when tracing code is entered in blocking
-                * context (darwin-tracefile.c:trace_get_tcd()). We try to
-                * maintain TCD_STOCK_PAGES (40 by default) pages in this
-                * queue. Atomic allocation is only required if more than
-                * TCD_STOCK_PAGES pagesful are consumed by trace records all
-                * emitted in non-blocking contexts. Which is quite unlikely.
-                */
-               struct list_head              tcd_stock_pages;
-               /* number of pages on ->tcd_stock_pages */
-               unsigned long      tcd_cur_stock_pages;
-
-               unsigned short    tcd_shutting_down;
-               unsigned short    tcd_cpu;
-               unsigned short    tcd_type;
-               /* The factors to share debug memory. */
-               unsigned short    tcd_pages_factor;
-       } tcd;
-       char __pad[L1_CACHE_ALIGN(sizeof(struct cfs_trace_cpu_data))];
-};
-
-#define TCD_MAX_TYPES      8
-extern union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS];
-
-#define cfs_tcd_for_each(tcd, i, j)                                   \
-       for (i = 0; cfs_trace_data[i]; i++)                             \
-               for (j = 0, ((tcd) = &(*cfs_trace_data[i])[j].tcd);     \
-                    j < num_possible_cpus();                            \
-                    j++, (tcd) = &(*cfs_trace_data[i])[j].tcd)
-
-#define cfs_tcd_for_each_type_lock(tcd, i, cpu)                           \
-       for (i = 0; cfs_trace_data[i] &&                                \
-            (tcd = &(*cfs_trace_data[i])[cpu].tcd) &&                  \
-            cfs_trace_lock_tcd(tcd, 1); cfs_trace_unlock_tcd(tcd, 1), i++)
-
-void cfs_set_ptldebug_header(struct ptldebug_header *header,
-                            struct libcfs_debug_msg_data *m,
-                            unsigned long stack);
-void cfs_print_to_console(struct ptldebug_header *hdr, int mask,
-                         const char *buf, int len, const char *file,
-                         const char *fn);
-
-int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking);
-void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking);
-
-extern char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX];
-enum cfs_trace_buf_type cfs_trace_buf_idx_get(void);
-
-static inline char *
-cfs_trace_get_console_buffer(void)
-{
-       unsigned int i = get_cpu();
-       unsigned int j = cfs_trace_buf_idx_get();
-
-       return cfs_trace_console_buffers[i][j];
-}
-
-static inline struct cfs_trace_cpu_data *
-cfs_trace_get_tcd(void)
-{
-       struct cfs_trace_cpu_data *tcd =
-               &(*cfs_trace_data[cfs_trace_buf_idx_get()])[get_cpu()].tcd;
-
-       cfs_trace_lock_tcd(tcd, 0);
-
-       return tcd;
-}
-
-static inline void cfs_trace_put_tcd(struct cfs_trace_cpu_data *tcd)
-{
-       cfs_trace_unlock_tcd(tcd, 0);
-
-       put_cpu();
-}
-
-int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp,
-                          struct list_head *stock);
-
-void cfs_trace_assertion_failed(const char *str,
-                               struct libcfs_debug_msg_data *m);
-
-/* ASSERTION that is safe to use within the debug system */
-#define __LASSERT(cond)                                                 \
-do {                                                               \
-       if (unlikely(!(cond))) {                                        \
-               LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_EMERG, NULL);     \
-               cfs_trace_assertion_failed("ASSERTION("#cond") failed", \
-                                          &msgdata);              \
-       }                                                              \
-} while (0)
-
-#define __LASSERT_TAGE_INVARIANT(tage)                           \
-do {                                                               \
-       __LASSERT(tage);                                        \
-       __LASSERT(tage->page);                            \
-       __LASSERT(tage->used <= PAGE_CACHE_SIZE);                        \
-       __LASSERT(page_count(tage->page) > 0);                \
-} while (0)
-
-#endif /* LUSTRE_TRACEFILE_PRIVATE */
-
-#endif /* __LIBCFS_TRACEFILE_H__ */
diff --git a/drivers/staging/lustre/lustre/libcfs/workitem.c b/drivers/staging/lustre/lustre/libcfs/workitem.c

deleted file mode 100644 (file)

index f2ebed8..0000000
--- a/drivers/staging/lustre/lustre/libcfs/workitem.c
+++ /dev/null
@@ -1,470 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/libcfs/workitem.c
- *
- * Author: Isaac Huang <isaac@clusterfs.com>
- *      Liang Zhen  <zhen.liang@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "../../include/linux/libcfs/libcfs.h"
-
-#define CFS_WS_NAME_LEN         16
-
-struct cfs_wi_sched {
-       /* chain on global list */
-       struct list_head                ws_list;
-       /** serialised workitems */
-       spinlock_t              ws_lock;
-       /** where schedulers sleep */
-       wait_queue_head_t               ws_waitq;
-       /** concurrent workitems */
-       struct list_head                ws_runq;
-       /**
-        * rescheduled running-workitems, a workitem can be rescheduled
-        * while running in wi_action(), but we don't to execute it again
-        * unless it returns from wi_action(), so we put it on ws_rerunq
-        * while rescheduling, and move it to runq after it returns
-        * from wi_action()
-        */
-       struct list_head                ws_rerunq;
-       /** CPT-table for this scheduler */
-       struct cfs_cpt_table    *ws_cptab;
-       /** CPT id for affinity */
-       int                     ws_cpt;
-       /** number of scheduled workitems */
-       int                     ws_nscheduled;
-       /** started scheduler thread, protected by cfs_wi_data::wi_glock */
-       unsigned int            ws_nthreads:30;
-       /** shutting down, protected by cfs_wi_data::wi_glock */
-       unsigned int            ws_stopping:1;
-       /** serialize starting thread, protected by cfs_wi_data::wi_glock */
-       unsigned int            ws_starting:1;
-       /** scheduler name */
-       char                    ws_name[CFS_WS_NAME_LEN];
-};
-
-static struct cfs_workitem_data {
-       /** serialize */
-       spinlock_t              wi_glock;
-       /** list of all schedulers */
-       struct list_head                wi_scheds;
-       /** WI module is initialized */
-       int                     wi_init;
-       /** shutting down the whole WI module */
-       int                     wi_stopping;
-} cfs_wi_data;
-
-static inline int
-cfs_wi_sched_cansleep(struct cfs_wi_sched *sched)
-{
-       spin_lock(&sched->ws_lock);
-       if (sched->ws_stopping) {
-               spin_unlock(&sched->ws_lock);
-               return 0;
-       }
-
-       if (!list_empty(&sched->ws_runq)) {
-               spin_unlock(&sched->ws_lock);
-               return 0;
-       }
-       spin_unlock(&sched->ws_lock);
-       return 1;
-}
-
-/* XXX:
- * 0. it only works when called from wi->wi_action.
- * 1. when it returns no one shall try to schedule the workitem.
- */
-void
-cfs_wi_exit(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
-{
-       LASSERT(!in_interrupt()); /* because we use plain spinlock */
-       LASSERT(!sched->ws_stopping);
-
-       spin_lock(&sched->ws_lock);
-
-       LASSERT(wi->wi_running);
-       if (wi->wi_scheduled) { /* cancel pending schedules */
-               LASSERT(!list_empty(&wi->wi_list));
-               list_del_init(&wi->wi_list);
-
-               LASSERT(sched->ws_nscheduled > 0);
-               sched->ws_nscheduled--;
-       }
-
-       LASSERT(list_empty(&wi->wi_list));
-
-       wi->wi_scheduled = 1; /* LBUG future schedule attempts */
-       spin_unlock(&sched->ws_lock);
-}
-EXPORT_SYMBOL(cfs_wi_exit);
-
-/**
- * cancel schedule request of workitem \a wi
- */
-int
-cfs_wi_deschedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
-{
-       int     rc;
-
-       LASSERT(!in_interrupt()); /* because we use plain spinlock */
-       LASSERT(!sched->ws_stopping);
-
-       /*
-        * return 0 if it's running already, otherwise return 1, which
-        * means the workitem will not be scheduled and will not have
-        * any race with wi_action.
-        */
-       spin_lock(&sched->ws_lock);
-
-       rc = !(wi->wi_running);
-
-       if (wi->wi_scheduled) { /* cancel pending schedules */
-               LASSERT(!list_empty(&wi->wi_list));
-               list_del_init(&wi->wi_list);
-
-               LASSERT(sched->ws_nscheduled > 0);
-               sched->ws_nscheduled--;
-
-               wi->wi_scheduled = 0;
-       }
-
-       LASSERT(list_empty(&wi->wi_list));
-
-       spin_unlock(&sched->ws_lock);
-       return rc;
-}
-EXPORT_SYMBOL(cfs_wi_deschedule);
-
-/*
- * Workitem scheduled with (serial == 1) is strictly serialised not only with
- * itself, but also with others scheduled this way.
- *
- * Now there's only one static serialised queue, but in the future more might
- * be added, and even dynamic creation of serialised queues might be supported.
- */
-void
-cfs_wi_schedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
-{
-       LASSERT(!in_interrupt()); /* because we use plain spinlock */
-       LASSERT(!sched->ws_stopping);
-
-       spin_lock(&sched->ws_lock);
-
-       if (!wi->wi_scheduled) {
-               LASSERT(list_empty(&wi->wi_list));
-
-               wi->wi_scheduled = 1;
-               sched->ws_nscheduled++;
-               if (!wi->wi_running) {
-                       list_add_tail(&wi->wi_list, &sched->ws_runq);
-                       wake_up(&sched->ws_waitq);
-               } else {
-                       list_add(&wi->wi_list, &sched->ws_rerunq);
-               }
-       }
-
-       LASSERT(!list_empty(&wi->wi_list));
-       spin_unlock(&sched->ws_lock);
-}
-EXPORT_SYMBOL(cfs_wi_schedule);
-
-static int cfs_wi_scheduler(void *arg)
-{
-       struct cfs_wi_sched     *sched = (struct cfs_wi_sched *)arg;
-
-       cfs_block_allsigs();
-
-       /* CPT affinity scheduler? */
-       if (sched->ws_cptab)
-               if (cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt) != 0)
-                       CWARN("Failed to bind %s on CPT %d\n",
-                             sched->ws_name, sched->ws_cpt);
-
-       spin_lock(&cfs_wi_data.wi_glock);
-
-       LASSERT(sched->ws_starting == 1);
-       sched->ws_starting--;
-       sched->ws_nthreads++;
-
-       spin_unlock(&cfs_wi_data.wi_glock);
-
-       spin_lock(&sched->ws_lock);
-
-       while (!sched->ws_stopping) {
-               int          nloops = 0;
-               int          rc;
-               cfs_workitem_t *wi;
-
-               while (!list_empty(&sched->ws_runq) &&
-                      nloops < CFS_WI_RESCHED) {
-                       wi = list_entry(sched->ws_runq.next, cfs_workitem_t,
-                                       wi_list);
-                       LASSERT(wi->wi_scheduled && !wi->wi_running);
-
-                       list_del_init(&wi->wi_list);
-
-                       LASSERT(sched->ws_nscheduled > 0);
-                       sched->ws_nscheduled--;
-
-                       wi->wi_running   = 1;
-                       wi->wi_scheduled = 0;
-
-                       spin_unlock(&sched->ws_lock);
-                       nloops++;
-
-                       rc = (*wi->wi_action) (wi);
-
-                       spin_lock(&sched->ws_lock);
-                       if (rc != 0) /* WI should be dead, even be freed! */
-                               continue;
-
-                       wi->wi_running = 0;
-                       if (list_empty(&wi->wi_list))
-                               continue;
-
-                       LASSERT(wi->wi_scheduled);
-                       /* wi is rescheduled, should be on rerunq now, we
-                        * move it to runq so it can run action now
-                        */
-                       list_move_tail(&wi->wi_list, &sched->ws_runq);
-               }
-
-               if (!list_empty(&sched->ws_runq)) {
-                       spin_unlock(&sched->ws_lock);
-                       /* don't sleep because some workitems still
-                        * expect me to come back soon
-                        */
-                       cond_resched();
-                       spin_lock(&sched->ws_lock);
-                       continue;
-               }
-
-               spin_unlock(&sched->ws_lock);
-               rc = wait_event_interruptible_exclusive(sched->ws_waitq,
-                                               !cfs_wi_sched_cansleep(sched));
-               spin_lock(&sched->ws_lock);
-       }
-
-       spin_unlock(&sched->ws_lock);
-
-       spin_lock(&cfs_wi_data.wi_glock);
-       sched->ws_nthreads--;
-       spin_unlock(&cfs_wi_data.wi_glock);
-
-       return 0;
-}
-
-void
-cfs_wi_sched_destroy(struct cfs_wi_sched *sched)
-{
-       int     i;
-
-       LASSERT(cfs_wi_data.wi_init);
-       LASSERT(!cfs_wi_data.wi_stopping);
-
-       spin_lock(&cfs_wi_data.wi_glock);
-       if (sched->ws_stopping) {
-               CDEBUG(D_INFO, "%s is in progress of stopping\n",
-                      sched->ws_name);
-               spin_unlock(&cfs_wi_data.wi_glock);
-               return;
-       }
-
-       LASSERT(!list_empty(&sched->ws_list));
-       sched->ws_stopping = 1;
-
-       spin_unlock(&cfs_wi_data.wi_glock);
-
-       i = 2;
-       wake_up_all(&sched->ws_waitq);
-
-       spin_lock(&cfs_wi_data.wi_glock);
-       while (sched->ws_nthreads > 0) {
-               CDEBUG(is_power_of_2(++i) ? D_WARNING : D_NET,
-                      "waiting for %d threads of WI sched[%s] to terminate\n",
-                      sched->ws_nthreads, sched->ws_name);
-
-               spin_unlock(&cfs_wi_data.wi_glock);
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               schedule_timeout(cfs_time_seconds(1) / 20);
-               spin_lock(&cfs_wi_data.wi_glock);
-       }
-
-       list_del(&sched->ws_list);
-
-       spin_unlock(&cfs_wi_data.wi_glock);
-       LASSERT(sched->ws_nscheduled == 0);
-
-       LIBCFS_FREE(sched, sizeof(*sched));
-}
-EXPORT_SYMBOL(cfs_wi_sched_destroy);
-
-int
-cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab,
-                   int cpt, int nthrs, struct cfs_wi_sched **sched_pp)
-{
-       struct cfs_wi_sched     *sched;
-       int                     rc;
-
-       LASSERT(cfs_wi_data.wi_init);
-       LASSERT(!cfs_wi_data.wi_stopping);
-       LASSERT(!cptab || cpt == CFS_CPT_ANY ||
-               (cpt >= 0 && cpt < cfs_cpt_number(cptab)));
-
-       LIBCFS_ALLOC(sched, sizeof(*sched));
-       if (!sched)
-               return -ENOMEM;
-
-       if (strlen(name) > sizeof(sched->ws_name) - 1) {
-               LIBCFS_FREE(sched, sizeof(*sched));
-               return -E2BIG;
-       }
-       strncpy(sched->ws_name, name, sizeof(sched->ws_name));
-
-       sched->ws_cptab = cptab;
-       sched->ws_cpt = cpt;
-
-       spin_lock_init(&sched->ws_lock);
-       init_waitqueue_head(&sched->ws_waitq);
-       INIT_LIST_HEAD(&sched->ws_runq);
-       INIT_LIST_HEAD(&sched->ws_rerunq);
-       INIT_LIST_HEAD(&sched->ws_list);
-
-       rc = 0;
-       while (nthrs > 0)  {
-               char    name[16];
-               struct task_struct *task;
-
-               spin_lock(&cfs_wi_data.wi_glock);
-               while (sched->ws_starting > 0) {
-                       spin_unlock(&cfs_wi_data.wi_glock);
-                       schedule();
-                       spin_lock(&cfs_wi_data.wi_glock);
-               }
-
-               sched->ws_starting++;
-               spin_unlock(&cfs_wi_data.wi_glock);
-
-               if (sched->ws_cptab && sched->ws_cpt >= 0) {
-                       snprintf(name, sizeof(name), "%s_%02d_%02u",
-                                sched->ws_name, sched->ws_cpt,
-                                sched->ws_nthreads);
-               } else {
-                       snprintf(name, sizeof(name), "%s_%02u",
-                                sched->ws_name, sched->ws_nthreads);
-               }
-
-               task = kthread_run(cfs_wi_scheduler, sched, "%s", name);
-               if (!IS_ERR(task)) {
-                       nthrs--;
-                       continue;
-               }
-               rc = PTR_ERR(task);
-
-               CERROR("Failed to create thread for WI scheduler %s: %d\n",
-                      name, rc);
-
-               spin_lock(&cfs_wi_data.wi_glock);
-
-               /* make up for cfs_wi_sched_destroy */
-               list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
-               sched->ws_starting--;
-
-               spin_unlock(&cfs_wi_data.wi_glock);
-
-               cfs_wi_sched_destroy(sched);
-               return rc;
-       }
-       spin_lock(&cfs_wi_data.wi_glock);
-       list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
-       spin_unlock(&cfs_wi_data.wi_glock);
-
-       *sched_pp = sched;
-       return 0;
-}
-EXPORT_SYMBOL(cfs_wi_sched_create);
-
-int
-cfs_wi_startup(void)
-{
-       memset(&cfs_wi_data, 0, sizeof(cfs_wi_data));
-
-       spin_lock_init(&cfs_wi_data.wi_glock);
-       INIT_LIST_HEAD(&cfs_wi_data.wi_scheds);
-       cfs_wi_data.wi_init = 1;
-
-       return 0;
-}
-
-void
-cfs_wi_shutdown(void)
-{
-       struct cfs_wi_sched     *sched;
-
-       spin_lock(&cfs_wi_data.wi_glock);
-       cfs_wi_data.wi_stopping = 1;
-       spin_unlock(&cfs_wi_data.wi_glock);
-
-       /* nobody should contend on this list */
-       list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
-               sched->ws_stopping = 1;
-               wake_up_all(&sched->ws_waitq);
-       }
-
-       list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
-               spin_lock(&cfs_wi_data.wi_glock);
-
-               while (sched->ws_nthreads != 0) {
-                       spin_unlock(&cfs_wi_data.wi_glock);
-                       set_current_state(TASK_UNINTERRUPTIBLE);
-                       schedule_timeout(cfs_time_seconds(1) / 20);
-                       spin_lock(&cfs_wi_data.wi_glock);
-               }
-               spin_unlock(&cfs_wi_data.wi_glock);
-       }
-       while (!list_empty(&cfs_wi_data.wi_scheds)) {
-               sched = list_entry(cfs_wi_data.wi_scheds.next,
-                                  struct cfs_wi_sched, ws_list);
-               list_del(&sched->ws_list);
-               LIBCFS_FREE(sched, sizeof(*sched));
-       }
-
-       cfs_wi_data.wi_stopping = 0;
-       cfs_wi_data.wi_init = 0;
-}
author	James Simmons <jsimmons@infradead.org>
	Tue, 8 Mar 2016 22:35:26 +0000 (17:35 -0500)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 11 Mar 2016 01:48:53 +0000 (17:48 -0800)
drivers/staging/lustre/lnet/Makefile		patch \| blob \| history
drivers/staging/lustre/lnet/libcfs/Makefile	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/debug.c	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/fail.c	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/hash.c	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/libcfs_lock.c	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/libcfs_mem.c	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/libcfs_string.c	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/linux/linux-curproc.c	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/linux/linux-module.c	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/module.c	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/prng.c	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/tracefile.c	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/tracefile.h	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lnet/libcfs/workitem.c	[new file with mode: 0644]	patch \| blob
drivers/staging/lustre/lustre/Makefile		patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/Makefile	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/debug.c	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/fail.c	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/hash.c	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/libcfs_cpu.c	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/libcfs_lock.c	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/libcfs_mem.c	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/libcfs_string.c	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/linux/linux-cpu.c	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/linux/linux-crypto-adler.c	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/linux/linux-crypto.c	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/linux/linux-crypto.h	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/linux/linux-curproc.c	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/linux/linux-debug.c	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/linux/linux-mem.c	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/linux/linux-module.c	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/linux/linux-prim.c	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.c	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/module.c	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/prng.c	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/tracefile.c	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/tracefile.h	[deleted file]	patch \| blob \| history
drivers/staging/lustre/lustre/libcfs/workitem.c	[deleted file]	patch \| blob \| history