iomap: support partial page discard on writeback block mapping failure
authorBrian Foster <bfoster@redhat.com>
Thu, 29 Oct 2020 21:30:48 +0000 (14:30 -0700)
committerDarrick J. Wong <darrick.wong@oracle.com>
Wed, 4 Nov 2020 16:52:46 +0000 (08:52 -0800)
iomap writeback mapping failure only calls into ->discard_page() if
the current page has not been added to the ioend. Accordingly, the
XFS callback assumes a full page discard and invalidation. This is
problematic for sub-page block size filesystems where some portion
of a page might have been mapped successfully before a failure to
map a delalloc block occurs. ->discard_page() is not called in that
error scenario and the bio is explicitly failed by iomap via the
error return from ->prepare_ioend(). As a result, the filesystem
leaks delalloc blocks and corrupts the filesystem block counters.

Since XFS is the only user of ->discard_page(), tweak the semantics
to invoke the callback unconditionally on mapping errors and provide
the file offset that failed to map. Update xfs_discard_page() to
discard the corresponding portion of the file and pass the range
along to iomap_invalidatepage(). The latter already properly handles
both full and sub-page scenarios by not changing any iomap or page
state on sub-page invalidations.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
fs/iomap/buffered-io.c
fs/xfs/xfs_aops.c
include/linux/iomap.h

index 8180061..e4ea1f9 100644 (file)
@@ -1382,14 +1382,15 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
         * appropriately.
         */
        if (unlikely(error)) {
+               /*
+                * Let the filesystem know what portion of the current page
+                * failed to map. If the page wasn't been added to ioend, it
+                * won't be affected by I/O completion and we must unlock it
+                * now.
+                */
+               if (wpc->ops->discard_page)
+                       wpc->ops->discard_page(page, file_offset);
                if (!count) {
-                       /*
-                        * If the current page hasn't been added to ioend, it
-                        * won't be affected by I/O completions and we must
-                        * discard and unlock it right here.
-                        */
-                       if (wpc->ops->discard_page)
-                               wpc->ops->discard_page(page);
                        ClearPageUptodate(page);
                        unlock_page(page);
                        goto done;
index 55d126d..5bf37af 100644 (file)
@@ -527,13 +527,15 @@ xfs_prepare_ioend(
  */
 static void
 xfs_discard_page(
-       struct page             *page)
+       struct page             *page,
+       loff_t                  fileoff)
 {
        struct inode            *inode = page->mapping->host;
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
-       loff_t                  offset = page_offset(page);
-       xfs_fileoff_t           start_fsb = XFS_B_TO_FSBT(mp, offset);
+       unsigned int            pageoff = offset_in_page(fileoff);
+       xfs_fileoff_t           start_fsb = XFS_B_TO_FSBT(mp, fileoff);
+       xfs_fileoff_t           pageoff_fsb = XFS_B_TO_FSBT(mp, pageoff);
        int                     error;
 
        if (XFS_FORCED_SHUTDOWN(mp))
@@ -541,14 +543,14 @@ xfs_discard_page(
 
        xfs_alert_ratelimited(mp,
                "page discard on page "PTR_FMT", inode 0x%llx, offset %llu.",
-                       page, ip->i_ino, offset);
+                       page, ip->i_ino, fileoff);
 
        error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
-                       i_blocks_per_page(inode, page));
+                       i_blocks_per_page(inode, page) - pageoff_fsb);
        if (error && !XFS_FORCED_SHUTDOWN(mp))
                xfs_alert(mp, "page discard unable to remove delalloc mapping.");
 out_invalidate:
-       iomap_invalidatepage(page, 0, PAGE_SIZE);
+       iomap_invalidatepage(page, pageoff, PAGE_SIZE - pageoff);
 }
 
 static const struct iomap_writeback_ops xfs_writeback_ops = {
index 172b339..5bd3cac 100644 (file)
@@ -221,7 +221,7 @@ struct iomap_writeback_ops {
         * Optional, allows the file system to discard state on a page where
         * we failed to submit any I/O.
         */
-       void (*discard_page)(struct page *page);
+       void (*discard_page)(struct page *page, loff_t fileoff);
 };
 
 struct iomap_writepage_ctx {