netfs: Fix undifferentiation of DIO reads from unbuffered reads
authorDavid Howells <dhowells@redhat.com>
Fri, 23 May 2025 07:57:52 +0000 (08:57 +0100)
committerChristian Brauner <brauner@kernel.org>
Fri, 23 May 2025 08:35:03 +0000 (10:35 +0200)
On cifs, "DIO reads" (specified by O_DIRECT) need to be differentiated from
"unbuffered reads" (specified by cache=none in the mount parameters).  The
difference is flagged in the protocol and the server may behave
differently: Windows Server will, for example, mandate that DIO reads are
block aligned.

Fix this by adding a NETFS_UNBUFFERED_READ to differentiate this from
NETFS_DIO_READ, parallelling the write differentiation that already exists.
cifs will then do the right thing.

Fixes: 016dc8516aec ("netfs: Implement unbuffered/DIO read support")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/3444961.1747987072@warthog.procyon.org.uk
Reviewed-by: "Paulo Alcantara (Red Hat)" <pc@manguebit.com>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
cc: Steve French <sfrench@samba.org>
cc: netfs@lists.linux.dev
cc: v9fs@lists.linux.dev
cc: linux-afs@lists.infradead.org
cc: linux-cifs@vger.kernel.org
cc: ceph-devel@vger.kernel.org
cc: linux-nfs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
12 files changed:
fs/9p/vfs_addr.c
fs/afs/write.c
fs/ceph/addr.c
fs/netfs/direct_read.c
fs/netfs/main.c
fs/netfs/misc.c
fs/netfs/objects.c
fs/netfs/read_collect.c
fs/nfs/fscache.c
fs/smb/client/file.c
include/linux/netfs.h
include/trace/events/netfs.h

index b5a4a28..e442059 100644 (file)
@@ -77,7 +77,8 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
 
        /* if we just extended the file size, any portion not in
         * cache won't be on server and is zeroes */
-       if (subreq->rreq->origin != NETFS_DIO_READ)
+       if (subreq->rreq->origin != NETFS_UNBUFFERED_READ &&
+           subreq->rreq->origin != NETFS_DIO_READ)
                __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
        if (pos + total >= i_size_read(rreq->inode))
                __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
index 7df7b2f..2e7526e 100644 (file)
@@ -202,6 +202,7 @@ void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *st
        case NETFS_READ_GAPS:
        case NETFS_READ_SINGLE:
        case NETFS_READ_FOR_WRITE:
+       case NETFS_UNBUFFERED_READ:
        case NETFS_DIO_READ:
                return;
        default:
index 557c326..b95c4cb 100644 (file)
@@ -238,6 +238,7 @@ static void finish_netfs_read(struct ceph_osd_request *req)
                if (sparse && err > 0)
                        err = ceph_sparse_ext_map_end(op);
                if (err < subreq->len &&
+                   subreq->rreq->origin != NETFS_UNBUFFERED_READ &&
                    subreq->rreq->origin != NETFS_DIO_READ)
                        __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
                if (IS_ENCRYPTED(inode) && err > 0) {
@@ -281,7 +282,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
        size_t len;
        int mode;
 
-       if (rreq->origin != NETFS_DIO_READ)
+       if (rreq->origin != NETFS_UNBUFFERED_READ &&
+           rreq->origin != NETFS_DIO_READ)
                __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
        __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
 
index e72c8b6..a05e134 100644 (file)
@@ -185,7 +185,8 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
 
        rreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp,
                                   iocb->ki_pos, orig_count,
-                                  NETFS_DIO_READ);
+                                  iocb->ki_flags & IOCB_DIRECT ?
+                                  NETFS_DIO_READ : NETFS_UNBUFFERED_READ);
        if (IS_ERR(rreq))
                return PTR_ERR(rreq);
 
index 4e3e620..7ade92a 100644 (file)
@@ -39,6 +39,7 @@ static const char *netfs_origins[nr__netfs_io_origin] = {
        [NETFS_READ_GAPS]               = "RG",
        [NETFS_READ_SINGLE]             = "R1",
        [NETFS_READ_FOR_WRITE]          = "RW",
+       [NETFS_UNBUFFERED_READ]         = "UR",
        [NETFS_DIO_READ]                = "DR",
        [NETFS_WRITEBACK]               = "WB",
        [NETFS_WRITEBACK_SINGLE]        = "W1",
index 77e7f7c..43b67a2 100644 (file)
@@ -461,6 +461,7 @@ all_collected:
                case NETFS_DIO_READ:
                case NETFS_DIO_WRITE:
                case NETFS_READ_SINGLE:
+               case NETFS_UNBUFFERED_READ:
                case NETFS_UNBUFFERED_WRITE:
                        break;
                default:
index f603f10..e8c9973 100644 (file)
@@ -59,6 +59,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
            origin == NETFS_READ_GAPS ||
            origin == NETFS_READ_SINGLE ||
            origin == NETFS_READ_FOR_WRITE ||
+           origin == NETFS_UNBUFFERED_READ ||
            origin == NETFS_DIO_READ) {
                INIT_WORK(&rreq->work, netfs_read_collection_worker);
                rreq->io_streams[0].avail = true;
index 31d00e3..96ee18a 100644 (file)
@@ -340,7 +340,8 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
 {
        unsigned int i;
 
-       if (rreq->origin == NETFS_DIO_READ) {
+       if (rreq->origin == NETFS_UNBUFFERED_READ ||
+           rreq->origin == NETFS_DIO_READ) {
                for (i = 0; i < rreq->direct_bv_count; i++) {
                        flush_dcache_page(rreq->direct_bv[i].bv_page);
                        // TODO: cifs marks pages in the destination buffer
@@ -358,7 +359,8 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
        }
        if (rreq->netfs_ops->done)
                rreq->netfs_ops->done(rreq);
-       if (rreq->origin == NETFS_DIO_READ)
+       if (rreq->origin == NETFS_UNBUFFERED_READ ||
+           rreq->origin == NETFS_DIO_READ)
                inode_dio_end(rreq->inode);
 }
 
@@ -414,6 +416,7 @@ bool netfs_read_collection(struct netfs_io_request *rreq)
        //netfs_rreq_is_still_valid(rreq);
 
        switch (rreq->origin) {
+       case NETFS_UNBUFFERED_READ:
        case NETFS_DIO_READ:
        case NETFS_READ_GAPS:
                netfs_rreq_assess_dio(rreq);
index e278a1a..8b07851 100644 (file)
@@ -367,6 +367,7 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr)
 
        sreq = netfs->sreq;
        if (test_bit(NFS_IOHDR_EOF, &hdr->flags) &&
+           sreq->rreq->origin != NETFS_UNBUFFERED_READ &&
            sreq->rreq->origin != NETFS_DIO_READ)
                __set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags);
 
index ba28173..ad917db 100644 (file)
@@ -219,7 +219,8 @@ static void cifs_issue_read(struct netfs_io_subrequest *subreq)
                        goto failed;
        }
 
-       if (subreq->rreq->origin != NETFS_DIO_READ)
+       if (subreq->rreq->origin != NETFS_UNBUFFERED_READ &&
+           subreq->rreq->origin != NETFS_DIO_READ)
                __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
 
        trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
index 7a649cf..065c173 100644 (file)
@@ -203,6 +203,7 @@ enum netfs_io_origin {
        NETFS_READ_GAPS,                /* This read is a synchronous read to fill gaps */
        NETFS_READ_SINGLE,              /* This read should be treated as a single object */
        NETFS_READ_FOR_WRITE,           /* This read is to prepare a write */
+       NETFS_UNBUFFERED_READ,          /* This is an unbuffered read */
        NETFS_DIO_READ,                 /* This is a direct I/O read */
        NETFS_WRITEBACK,                /* This write was triggered by writepages */
        NETFS_WRITEBACK_SINGLE,         /* This monolithic write was triggered by writepages */
index d7ceae7..333d2e3 100644 (file)
@@ -39,6 +39,7 @@
        EM(NETFS_READ_GAPS,                     "RG")           \
        EM(NETFS_READ_SINGLE,                   "R1")           \
        EM(NETFS_READ_FOR_WRITE,                "RW")           \
+       EM(NETFS_UNBUFFERED_READ,               "UR")           \
        EM(NETFS_DIO_READ,                      "DR")           \
        EM(NETFS_WRITEBACK,                     "WB")           \
        EM(NETFS_WRITEBACK_SINGLE,              "W1")           \