fuse: optimize writepages search
authorMaxim Patlasov <mpatlasov@virtuozzo.com>
Thu, 19 Sep 2019 14:11:20 +0000 (17:11 +0300)
committerMiklos Szeredi <mszeredi@redhat.com>
Tue, 19 May 2020 12:50:38 +0000 (14:50 +0200)
Re-work fi->writepages, replacing list with rb-tree.  This improves
performance because kernel fuse iterates through fi->writepages for each
writeback page and typical number of entries is about 800 (for 100MB of
fuse writeback).

Before patch:

10240+0 records in
10240+0 records out
10737418240 bytes (11 GB) copied, 41.3473 s, 260 MB/s

 2  1      0 57445400  40416 6323676    0    0    33 374743 8633 19210  1  8 88  3  0

  29.86%  [kernel]               [k] _raw_spin_lock
  26.62%  [fuse]                 [k] fuse_page_is_writeback

After patch:

10240+0 records in
10240+0 records out
10737418240 bytes (11 GB) copied, 21.4954 s, 500 MB/s

 2  9      0 53676040  31744 10265984    0    0    64 854790 10956 48387  1  6 88  6  0

  23.55%  [kernel]             [k] copy_user_enhanced_fast_string
   9.87%  [kernel]             [k] __memcpy
   3.10%  [kernel]             [k] _raw_spin_lock

Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com>
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
fs/fuse/file.c
fs/fuse/fuse_i.h

index 4aa750d..15812a8 100644 (file)
@@ -357,7 +357,7 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
 
 struct fuse_writepage_args {
        struct fuse_io_args ia;
-       struct list_head writepages_entry;
+       struct rb_node writepages_entry;
        struct list_head queue_entry;
        struct fuse_writepage_args *next;
        struct inode *inode;
@@ -366,17 +366,23 @@ struct fuse_writepage_args {
 static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
                                            pgoff_t idx_from, pgoff_t idx_to)
 {
-       struct fuse_writepage_args *wpa;
+       struct rb_node *n;
+
+       n = fi->writepages.rb_node;
 
-       list_for_each_entry(wpa, &fi->writepages, writepages_entry) {
+       while (n) {
+               struct fuse_writepage_args *wpa;
                pgoff_t curr_index;
 
+               wpa = rb_entry(n, struct fuse_writepage_args, writepages_entry);
                WARN_ON(get_fuse_inode(wpa->inode) != fi);
                curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT;
-               if (idx_from < curr_index + wpa->ia.ap.num_pages &&
-                   curr_index <= idx_to) {
+               if (idx_from >= curr_index + wpa->ia.ap.num_pages)
+                       n = n->rb_right;
+               else if (idx_to < curr_index)
+                       n = n->rb_left;
+               else
                        return wpa;
-               }
        }
        return NULL;
 }
@@ -1624,7 +1630,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc,
        struct backing_dev_info *bdi = inode_to_bdi(inode);
        int i;
 
-       list_del(&wpa->writepages_entry);
+       rb_erase(&wpa->writepages_entry, &fi->writepages);
        for (i = 0; i < ap->num_pages; i++) {
                dec_wb_stat(&bdi->wb, WB_WRITEBACK);
                dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP);
@@ -1712,6 +1718,36 @@ __acquires(fi->lock)
        }
 }
 
+static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa)
+{
+       pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT;
+       pgoff_t idx_to = idx_from + wpa->ia.ap.num_pages - 1;
+       struct rb_node **p = &root->rb_node;
+       struct rb_node  *parent = NULL;
+
+       WARN_ON(!wpa->ia.ap.num_pages);
+       while (*p) {
+               struct fuse_writepage_args *curr;
+               pgoff_t curr_index;
+
+               parent = *p;
+               curr = rb_entry(parent, struct fuse_writepage_args,
+                               writepages_entry);
+               WARN_ON(curr->inode != wpa->inode);
+               curr_index = curr->ia.write.in.offset >> PAGE_SHIFT;
+
+               if (idx_from >= curr_index + curr->ia.ap.num_pages)
+                       p = &(*p)->rb_right;
+               else if (idx_to < curr_index)
+                       p = &(*p)->rb_left;
+               else
+                       return (void) WARN_ON(true);
+       }
+
+       rb_link_node(&wpa->writepages_entry, parent, p);
+       rb_insert_color(&wpa->writepages_entry, root);
+}
+
 static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
                               int error)
 {
@@ -1730,7 +1766,7 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
                wpa->next = next->next;
                next->next = NULL;
                next->ia.ff = fuse_file_get(wpa->ia.ff);
-               list_add(&next->writepages_entry, &fi->writepages);
+               tree_insert(&fi->writepages, next);
 
                /*
                 * Skip fuse_flush_writepages() to make it easy to crop requests
@@ -1865,7 +1901,7 @@ static int fuse_writepage_locked(struct page *page)
        inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
 
        spin_lock(&fi->lock);
-       list_add(&wpa->writepages_entry, &fi->writepages);
+       tree_insert(&fi->writepages, wpa);
        list_add_tail(&wpa->queue_entry, &fi->queued_writes);
        fuse_flush_writepages(inode);
        spin_unlock(&fi->lock);
@@ -1977,10 +2013,10 @@ static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa,
        WARN_ON(new_ap->num_pages != 0);
 
        spin_lock(&fi->lock);
-       list_del(&new_wpa->writepages_entry);
+       rb_erase(&new_wpa->writepages_entry, &fi->writepages);
        old_wpa = fuse_find_writeback(fi, page->index, page->index);
        if (!old_wpa) {
-               list_add(&new_wpa->writepages_entry, &fi->writepages);
+               tree_insert(&fi->writepages, new_wpa);
                spin_unlock(&fi->lock);
                return false;
        }
@@ -2095,7 +2131,7 @@ static int fuse_writepages_fill(struct page *page,
                wpa->inode = inode;
 
                spin_lock(&fi->lock);
-               list_add(&wpa->writepages_entry, &fi->writepages);
+               tree_insert(&fi->writepages, wpa);
                spin_unlock(&fi->lock);
 
                data->wpa = wpa;
@@ -3405,5 +3441,5 @@ void fuse_init_file_inode(struct inode *inode)
        INIT_LIST_HEAD(&fi->queued_writes);
        fi->writectr = 0;
        init_waitqueue_head(&fi->page_waitq);
-       INIT_LIST_HEAD(&fi->writepages);
+       fi->writepages = RB_ROOT;
 }
index d7cde21..740a8a7 100644 (file)
@@ -111,7 +111,7 @@ struct fuse_inode {
                        wait_queue_head_t page_waitq;
 
                        /* List of writepage requestst (pending or sent) */
-                       struct list_head writepages;
+                       struct rb_root writepages;
                };
 
                /* readdir cache (directory only) */