ext4: optimize case-insensitive lookups
authorGabriel Krisman Bertazi <krisman@collabora.com>
Thu, 20 Jun 2019 03:45:09 +0000 (23:45 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Thu, 20 Jun 2019 03:45:09 +0000 (23:45 -0400)
Temporarily cache a casefolded version of the file name under lookup in
ext4_filename, to avoid repeatedly casefolding it.  I got up to 30%
speedup on lookups of large directories (>100k entries), depending on
the length of the string under lookup.

Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
fs/ext4/dir.c
fs/ext4/ext4.h
fs/ext4/namei.c
fs/unicode/utf8-core.c
include/linux/unicode.h

index 1f7784b..770a1e6 100644 (file)
@@ -677,7 +677,7 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
                return memcmp(str, name->name, len);
        }
 
-       return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr);
+       return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr, false);
 }
 
 static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
index ceb7409..7215a2a 100644 (file)
@@ -2078,6 +2078,9 @@ struct ext4_filename {
 #ifdef CONFIG_FS_ENCRYPTION
        struct fscrypt_str crypto_buf;
 #endif
+#ifdef CONFIG_UNICODE
+       struct fscrypt_str cf_name;
+#endif
 };
 
 #define fname_name(p) ((p)->disk_name.name)
@@ -2303,6 +2306,12 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
                                              struct ext4_group_desc *gdp);
 ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
 
+#ifdef CONFIG_UNICODE
+extern void ext4_fname_setup_ci_filename(struct inode *dir,
+                                        const struct qstr *iname,
+                                        struct fscrypt_str *fname);
+#endif
+
 #ifdef CONFIG_FS_ENCRYPTION
 static inline void ext4_fname_from_fscrypt_name(struct ext4_filename *dst,
                                                const struct fscrypt_name *src)
@@ -2329,6 +2338,10 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
                return err;
 
        ext4_fname_from_fscrypt_name(fname, &name);
+
+#ifdef CONFIG_UNICODE
+       ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name);
+#endif
        return 0;
 }
 
@@ -2344,6 +2357,10 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir,
                return err;
 
        ext4_fname_from_fscrypt_name(fname, &name);
+
+#ifdef CONFIG_UNICODE
+       ext4_fname_setup_ci_filename(dir, &dentry->d_name, &fname->cf_name);
+#endif
        return 0;
 }
 
@@ -2357,6 +2374,11 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname)
        fname->crypto_buf.name = NULL;
        fname->usr_fname = NULL;
        fname->disk_name.name = NULL;
+
+#ifdef CONFIG_UNICODE
+       kfree(fname->cf_name.name);
+       fname->cf_name.name = NULL;
+#endif
 }
 #else /* !CONFIG_FS_ENCRYPTION */
 static inline int ext4_fname_setup_filename(struct inode *dir,
@@ -2367,6 +2389,11 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
        fname->usr_fname = iname;
        fname->disk_name.name = (unsigned char *) iname->name;
        fname->disk_name.len = iname->len;
+
+#ifdef CONFIG_UNICODE
+       ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name);
+#endif
+
        return 0;
 }
 
@@ -2377,7 +2404,13 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir,
        return ext4_fname_setup_filename(dir, &dentry->d_name, 1, fname);
 }
 
-static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
+static inline void ext4_fname_free_filename(struct ext4_filename *fname)
+{
+#ifdef CONFIG_UNICODE
+       kfree(fname->cf_name.name);
+       fname->cf_name.name = NULL;
+#endif
+}
 #endif /* !CONFIG_FS_ENCRYPTION */
 
 /* dir.c */
@@ -3120,8 +3153,8 @@ extern int ext4_handle_dirty_dirent_node(handle_t *handle,
                                         struct inode *inode,
                                         struct buffer_head *bh);
 extern int ext4_ci_compare(const struct inode *parent,
-                          const struct qstr *name,
-                          const struct qstr *entry);
+                          const struct qstr *fname,
+                          const struct qstr *entry, bool quick);
 
 #define S_SHIFT 12
 static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
index cd01c4a..4909ced 100644 (file)
@@ -1259,19 +1259,24 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
 #ifdef CONFIG_UNICODE
 /*
  * Test whether a case-insensitive directory entry matches the filename
- * being searched for.
+ * being searched for.  If quick is set, assume the name being looked up
+ * is already in the casefolded form.
  *
  * Returns: 0 if the directory entry matches, more than 0 if it
  * doesn't match or less than zero on error.
  */
 int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
-                   const struct qstr *entry)
+                   const struct qstr *entry, bool quick)
 {
        const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb);
        const struct unicode_map *um = sbi->s_encoding;
        int ret;
 
-       ret = utf8_strncasecmp(um, name, entry);
+       if (quick)
+               ret = utf8_strncasecmp_folded(um, name, entry);
+       else
+               ret = utf8_strncasecmp(um, name, entry);
+
        if (ret < 0) {
                /* Handle invalid character sequence as either an error
                 * or as an opaque byte sequence.
@@ -1287,6 +1292,27 @@ int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
 
        return ret;
 }
+
+void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
+                                 struct fscrypt_str *cf_name)
+{
+       if (!IS_CASEFOLDED(dir)) {
+               cf_name->name = NULL;
+               return;
+       }
+
+       cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS);
+       if (!cf_name->name)
+               return;
+
+       cf_name->len = utf8_casefold(EXT4_SB(dir->i_sb)->s_encoding,
+                                    iname, cf_name->name,
+                                    EXT4_NAME_LEN);
+       if (cf_name->len <= 0) {
+               kfree(cf_name->name);
+               cf_name->name = NULL;
+       }
+}
 #endif
 
 /*
@@ -1313,8 +1339,15 @@ static inline bool ext4_match(const struct inode *parent,
 #endif
 
 #ifdef CONFIG_UNICODE
-       if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent))
-               return (ext4_ci_compare(parent, fname->usr_fname, &entry) == 0);
+       if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) {
+               if (fname->cf_name.name) {
+                       struct qstr cf = {.name = fname->cf_name.name,
+                                         .len = fname->cf_name.len};
+                       return !ext4_ci_compare(parent, &cf, &entry, true);
+               }
+               return !ext4_ci_compare(parent, fname->usr_fname, &entry,
+                                       false);
+       }
 #endif
 
        return fscrypt_match_name(&f, de->name, de->name_len);
index 6afab4f..71ca4d0 100644 (file)
@@ -73,6 +73,34 @@ int utf8_strncasecmp(const struct unicode_map *um,
 }
 EXPORT_SYMBOL(utf8_strncasecmp);
 
+/* String cf is expected to be a valid UTF-8 casefolded
+ * string.
+ */
+int utf8_strncasecmp_folded(const struct unicode_map *um,
+                           const struct qstr *cf,
+                           const struct qstr *s1)
+{
+       const struct utf8data *data = utf8nfdicf(um->version);
+       struct utf8cursor cur1;
+       int c1, c2;
+       int i = 0;
+
+       if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
+               return -EINVAL;
+
+       do {
+               c1 = utf8byte(&cur1);
+               c2 = cf->name[i++];
+               if (c1 < 0)
+                       return -EINVAL;
+               if (c1 != c2)
+                       return 1;
+       } while (c1);
+
+       return 0;
+}
+EXPORT_SYMBOL(utf8_strncasecmp_folded);
+
 int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
                  unsigned char *dest, size_t dlen)
 {
index aec2c6d..990aa97 100644 (file)
@@ -17,6 +17,9 @@ int utf8_strncmp(const struct unicode_map *um,
 
 int utf8_strncasecmp(const struct unicode_map *um,
                 const struct qstr *s1, const struct qstr *s2);
+int utf8_strncasecmp_folded(const struct unicode_map *um,
+                           const struct qstr *cf,
+                           const struct qstr *s1);
 
 int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
                   unsigned char *dest, size_t dlen);