Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[linux-2.6-microblaze.git] / fs / ceph / addr.c
index 834be09..9cd0c0e 100644 (file)
@@ -315,7 +315,32 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
        struct page **pages;
        pgoff_t next_index;
        int nr_pages = 0;
-       int ret;
+       int got = 0;
+       int ret = 0;
+
+       if (!current->journal_info) {
+               /* caller of readpages does not hold buffer and read caps
+                * (fadvise, madvise and readahead cases) */
+               int want = CEPH_CAP_FILE_CACHE;
+               ret = ceph_try_get_caps(ci, CEPH_CAP_FILE_RD, want, &got);
+               if (ret < 0) {
+                       dout("start_read %p, error getting cap\n", inode);
+               } else if (!(got & want)) {
+                       dout("start_read %p, no cache cap\n", inode);
+                       ret = 0;
+               }
+               if (ret <= 0) {
+                       if (got)
+                               ceph_put_cap_refs(ci, got);
+                       while (!list_empty(page_list)) {
+                               page = list_entry(page_list->prev,
+                                                 struct page, lru);
+                               list_del(&page->lru);
+                               put_page(page);
+                       }
+                       return ret;
+               }
+       }
 
        off = (u64) page_offset(page);
 
@@ -338,15 +363,18 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
                                    CEPH_OSD_FLAG_READ, NULL,
                                    ci->i_truncate_seq, ci->i_truncate_size,
                                    false);
-       if (IS_ERR(req))
-               return PTR_ERR(req);
+       if (IS_ERR(req)) {
+               ret = PTR_ERR(req);
+               goto out;
+       }
 
        /* build page vector */
        nr_pages = calc_pages_for(0, len);
        pages = kmalloc(sizeof(*pages) * nr_pages, GFP_KERNEL);
-       ret = -ENOMEM;
-       if (!pages)
-               goto out;
+       if (!pages) {
+               ret = -ENOMEM;
+               goto out_put;
+       }
        for (i = 0; i < nr_pages; ++i) {
                page = list_entry(page_list->prev, struct page, lru);
                BUG_ON(PageLocked(page));
@@ -378,6 +406,12 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
        if (ret < 0)
                goto out_pages;
        ceph_osdc_put_request(req);
+
+       /* After adding locked pages to page cache, the inode holds cache cap.
+        * So we can drop our cap refs. */
+       if (got)
+               ceph_put_cap_refs(ci, got);
+
        return nr_pages;
 
 out_pages:
@@ -386,8 +420,11 @@ out_pages:
                unlock_page(pages[i]);
        }
        ceph_put_page_vector(pages, nr_pages, false);
-out:
+out_put:
        ceph_osdc_put_request(req);
+out:
+       if (got)
+               ceph_put_cap_refs(ci, got);
        return ret;
 }
 
@@ -424,7 +461,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
                rc = start_read(inode, page_list, max);
                if (rc < 0)
                        goto out;
-               BUG_ON(rc == 0);
        }
 out:
        ceph_fscache_readpages_cancel(inode, page_list);
@@ -438,7 +474,9 @@ out:
  * only snap context we are allowed to write back.
  */
 static struct ceph_snap_context *get_oldest_context(struct inode *inode,
-                                                   loff_t *snap_size)
+                                                   loff_t *snap_size,
+                                                   u64 *truncate_size,
+                                                   u32 *truncate_seq)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_snap_context *snapc = NULL;
@@ -452,6 +490,10 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
                        snapc = ceph_get_snap_context(capsnap->context);
                        if (snap_size)
                                *snap_size = capsnap->size;
+                       if (truncate_size)
+                               *truncate_size = capsnap->truncate_size;
+                       if (truncate_seq)
+                               *truncate_seq = capsnap->truncate_seq;
                        break;
                }
        }
@@ -459,6 +501,10 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
                snapc = ceph_get_snap_context(ci->i_head_snapc);
                dout(" head snapc %p has %d dirty pages\n",
                     snapc, ci->i_wrbuffer_ref_head);
+               if (truncate_size)
+                       *truncate_size = capsnap->truncate_size;
+               if (truncate_seq)
+                       *truncate_seq = capsnap->truncate_seq;
        }
        spin_unlock(&ci->i_ceph_lock);
        return snapc;
@@ -501,7 +547,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
                dout("writepage %p page %p not dirty?\n", inode, page);
                goto out;
        }
-       oldest = get_oldest_context(inode, &snap_size);
+       oldest = get_oldest_context(inode, &snap_size,
+                                   &truncate_size, &truncate_seq);
        if (snapc->seq > oldest->seq) {
                dout("writepage %p page %p snapc %p not writeable - noop\n",
                     inode, page, snapc);
@@ -512,12 +559,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
        }
        ceph_put_snap_context(oldest);
 
-       spin_lock(&ci->i_ceph_lock);
-       truncate_seq = ci->i_truncate_seq;
-       truncate_size = ci->i_truncate_size;
        if (snap_size == -1)
                snap_size = i_size_read(inode);
-       spin_unlock(&ci->i_ceph_lock);
 
        /* is this a partial page at end of file? */
        if (page_off >= snap_size) {
@@ -764,7 +807,8 @@ retry:
        /* find oldest snap context with dirty data */
        ceph_put_snap_context(snapc);
        snap_size = -1;
-       snapc = get_oldest_context(inode, &snap_size);
+       snapc = get_oldest_context(inode, &snap_size,
+                                  &truncate_size, &truncate_seq);
        if (!snapc) {
                /* hmm, why does writepages get called when there
                   is no dirty data? */
@@ -774,11 +818,7 @@ retry:
        dout(" oldest snapc is %p seq %lld (%d snaps)\n",
             snapc, snapc->seq, snapc->num_snaps);
 
-       spin_lock(&ci->i_ceph_lock);
-       truncate_seq = ci->i_truncate_seq;
-       truncate_size = ci->i_truncate_size;
        i_size = i_size_read(inode);
-       spin_unlock(&ci->i_ceph_lock);
 
        if (last_snapc && snapc != last_snapc) {
                /* if we switched to a newer snapc, restart our scan at the
@@ -1124,7 +1164,8 @@ out:
 static int context_is_writeable_or_written(struct inode *inode,
                                           struct ceph_snap_context *snapc)
 {
-       struct ceph_snap_context *oldest = get_oldest_context(inode, NULL);
+       struct ceph_snap_context *oldest = get_oldest_context(inode, NULL,
+                                                             NULL, NULL);
        int ret = !oldest || snapc->seq <= oldest->seq;
 
        ceph_put_snap_context(oldest);
@@ -1169,7 +1210,7 @@ retry_locked:
                 * this page is already dirty in another (older) snap
                 * context!  is it writeable now?
                 */
-               oldest = get_oldest_context(inode, NULL);
+               oldest = get_oldest_context(inode, NULL, NULL, NULL);
 
                if (snapc->seq > oldest->seq) {
                        ceph_put_snap_context(oldest);
@@ -1373,9 +1414,11 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
             inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got));
 
        if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
-           ci->i_inline_version == CEPH_INLINE_NONE)
+           ci->i_inline_version == CEPH_INLINE_NONE) {
+               current->journal_info = vma->vm_file;
                ret = filemap_fault(vma, vmf);
-       else
+               current->journal_info = NULL;
+       } else
                ret = -EAGAIN;
 
        dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
@@ -1907,6 +1950,15 @@ int ceph_pool_perm_check(struct ceph_inode_info *ci, int need)
        struct ceph_string *pool_ns;
        int ret, flags;
 
+       if (ci->i_vino.snap != CEPH_NOSNAP) {
+               /*
+                * Pool permission check needs to write to the first object.
+                * But for snapshot, head of the first object may have alread
+                * been deleted. Skip check to avoid creating orphan object.
+                */
+               return 0;
+       }
+
        if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode),
                                NOPOOLPERM))
                return 0;