4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
37 #include <asm/div64.h>
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
46 #include "smbdirect.h"
47 #include "fs_context.h"
49 static inline int cifs_convert_flags(unsigned int flags)
51 if ((flags & O_ACCMODE) == O_RDONLY)
53 else if ((flags & O_ACCMODE) == O_WRONLY)
55 else if ((flags & O_ACCMODE) == O_RDWR) {
56 /* GENERIC_ALL is too much permission to request
57 can cause unnecessary access denied on create */
58 /* return GENERIC_ALL; */
59 return (GENERIC_READ | GENERIC_WRITE);
62 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
63 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
67 static u32 cifs_posix_convert_flags(unsigned int flags)
71 if ((flags & O_ACCMODE) == O_RDONLY)
72 posix_flags = SMB_O_RDONLY;
73 else if ((flags & O_ACCMODE) == O_WRONLY)
74 posix_flags = SMB_O_WRONLY;
75 else if ((flags & O_ACCMODE) == O_RDWR)
76 posix_flags = SMB_O_RDWR;
78 if (flags & O_CREAT) {
79 posix_flags |= SMB_O_CREAT;
81 posix_flags |= SMB_O_EXCL;
82 } else if (flags & O_EXCL)
83 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
84 current->comm, current->tgid);
87 posix_flags |= SMB_O_TRUNC;
88 /* be safe and imply O_SYNC for O_DSYNC */
90 posix_flags |= SMB_O_SYNC;
91 if (flags & O_DIRECTORY)
92 posix_flags |= SMB_O_DIRECTORY;
93 if (flags & O_NOFOLLOW)
94 posix_flags |= SMB_O_NOFOLLOW;
96 posix_flags |= SMB_O_DIRECT;
101 static inline int cifs_get_disposition(unsigned int flags)
103 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
105 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
106 return FILE_OVERWRITE_IF;
107 else if ((flags & O_CREAT) == O_CREAT)
109 else if ((flags & O_TRUNC) == O_TRUNC)
110 return FILE_OVERWRITE;
115 int cifs_posix_open(const char *full_path, struct inode **pinode,
116 struct super_block *sb, int mode, unsigned int f_flags,
117 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
120 FILE_UNIX_BASIC_INFO *presp_data;
121 __u32 posix_flags = 0;
122 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
123 struct cifs_fattr fattr;
124 struct tcon_link *tlink;
125 struct cifs_tcon *tcon;
127 cifs_dbg(FYI, "posix open %s\n", full_path);
129 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
130 if (presp_data == NULL)
133 tlink = cifs_sb_tlink(cifs_sb);
139 tcon = tlink_tcon(tlink);
140 mode &= ~current_umask();
142 posix_flags = cifs_posix_convert_flags(f_flags);
143 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
144 poplock, full_path, cifs_sb->local_nls,
145 cifs_remap(cifs_sb));
146 cifs_put_tlink(tlink);
151 if (presp_data->Type == cpu_to_le32(-1))
152 goto posix_open_ret; /* open ok, caller does qpathinfo */
155 goto posix_open_ret; /* caller does not need info */
157 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
159 /* get new inode and set it up */
160 if (*pinode == NULL) {
161 cifs_fill_uniqueid(sb, &fattr);
162 *pinode = cifs_iget(sb, &fattr);
168 cifs_revalidate_mapping(*pinode);
169 cifs_fattr_to_inode(*pinode, &fattr);
178 cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
179 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
180 struct cifs_fid *fid, unsigned int xid)
185 int create_options = CREATE_NOT_DIR;
187 struct TCP_Server_Info *server = tcon->ses->server;
188 struct cifs_open_parms oparms;
190 if (!server->ops->open)
193 desired_access = cifs_convert_flags(f_flags);
195 /*********************************************************************
196 * open flag mapping table:
198 * POSIX Flag CIFS Disposition
199 * ---------- ----------------
200 * O_CREAT FILE_OPEN_IF
201 * O_CREAT | O_EXCL FILE_CREATE
202 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
203 * O_TRUNC FILE_OVERWRITE
204 * none of the above FILE_OPEN
206 * Note that there is not a direct match between disposition
207 * FILE_SUPERSEDE (ie create whether or not file exists although
208 * O_CREAT | O_TRUNC is similar but truncates the existing
209 * file rather than creating a new file as FILE_SUPERSEDE does
210 * (which uses the attributes / metadata passed in on open call)
212 *? O_SYNC is a reasonable match to CIFS writethrough flag
213 *? and the read write flags match reasonably. O_LARGEFILE
214 *? is irrelevant because largefile support is always used
215 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
216 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
217 *********************************************************************/
219 disposition = cifs_get_disposition(f_flags);
221 /* BB pass O_SYNC flag through on file attributes .. BB */
223 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
227 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
228 if (f_flags & O_SYNC)
229 create_options |= CREATE_WRITE_THROUGH;
231 if (f_flags & O_DIRECT)
232 create_options |= CREATE_NO_BUFFER;
235 oparms.cifs_sb = cifs_sb;
236 oparms.desired_access = desired_access;
237 oparms.create_options = cifs_create_options(cifs_sb, create_options);
238 oparms.disposition = disposition;
239 oparms.path = full_path;
241 oparms.reconnect = false;
243 rc = server->ops->open(xid, &oparms, oplock, buf);
248 /* TODO: Add support for calling posix query info but with passing in fid */
250 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
253 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
257 server->ops->close(xid, tcon, fid);
268 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
270 struct cifs_fid_locks *cur;
271 bool has_locks = false;
273 down_read(&cinode->lock_sem);
274 list_for_each_entry(cur, &cinode->llist, llist) {
275 if (!list_empty(&cur->locks)) {
280 up_read(&cinode->lock_sem);
285 cifs_down_write(struct rw_semaphore *sem)
287 while (!down_write_trylock(sem))
291 static void cifsFileInfo_put_work(struct work_struct *work);
293 struct cifsFileInfo *
294 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
295 struct tcon_link *tlink, __u32 oplock)
297 struct dentry *dentry = file_dentry(file);
298 struct inode *inode = d_inode(dentry);
299 struct cifsInodeInfo *cinode = CIFS_I(inode);
300 struct cifsFileInfo *cfile;
301 struct cifs_fid_locks *fdlocks;
302 struct cifs_tcon *tcon = tlink_tcon(tlink);
303 struct TCP_Server_Info *server = tcon->ses->server;
305 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
309 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
315 INIT_LIST_HEAD(&fdlocks->locks);
316 fdlocks->cfile = cfile;
317 cfile->llist = fdlocks;
320 cfile->pid = current->tgid;
321 cfile->uid = current_fsuid();
322 cfile->dentry = dget(dentry);
323 cfile->f_flags = file->f_flags;
324 cfile->invalidHandle = false;
325 cfile->oplock_break_received = false;
326 cfile->deferred_scheduled = false;
327 cfile->tlink = cifs_get_tlink(tlink);
328 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
329 INIT_WORK(&cfile->put, cifsFileInfo_put_work);
330 INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
331 mutex_init(&cfile->fh_mutex);
332 spin_lock_init(&cfile->file_info_lock);
334 cifs_sb_active(inode->i_sb);
337 * If the server returned a read oplock and we have mandatory brlocks,
338 * set oplock level to None.
340 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
341 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
345 cifs_down_write(&cinode->lock_sem);
346 list_add(&fdlocks->llist, &cinode->llist);
347 up_write(&cinode->lock_sem);
349 spin_lock(&tcon->open_file_lock);
350 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
351 oplock = fid->pending_open->oplock;
352 list_del(&fid->pending_open->olist);
354 fid->purge_cache = false;
355 server->ops->set_fid(cfile, fid, oplock);
357 list_add(&cfile->tlist, &tcon->openFileList);
358 atomic_inc(&tcon->num_local_opens);
360 /* if readable file instance put first in list*/
361 spin_lock(&cinode->open_file_lock);
362 if (file->f_mode & FMODE_READ)
363 list_add(&cfile->flist, &cinode->openFileList);
365 list_add_tail(&cfile->flist, &cinode->openFileList);
366 spin_unlock(&cinode->open_file_lock);
367 spin_unlock(&tcon->open_file_lock);
369 if (fid->purge_cache)
370 cifs_zap_mapping(inode);
372 file->private_data = cfile;
376 struct cifsFileInfo *
377 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
379 spin_lock(&cifs_file->file_info_lock);
380 cifsFileInfo_get_locked(cifs_file);
381 spin_unlock(&cifs_file->file_info_lock);
385 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
387 struct inode *inode = d_inode(cifs_file->dentry);
388 struct cifsInodeInfo *cifsi = CIFS_I(inode);
389 struct cifsLockInfo *li, *tmp;
390 struct super_block *sb = inode->i_sb;
393 * Delete any outstanding lock records. We'll lose them when the file
396 cifs_down_write(&cifsi->lock_sem);
397 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
398 list_del(&li->llist);
399 cifs_del_lock_waiters(li);
402 list_del(&cifs_file->llist->llist);
403 kfree(cifs_file->llist);
404 up_write(&cifsi->lock_sem);
406 cifs_put_tlink(cifs_file->tlink);
407 dput(cifs_file->dentry);
408 cifs_sb_deactive(sb);
412 static void cifsFileInfo_put_work(struct work_struct *work)
414 struct cifsFileInfo *cifs_file = container_of(work,
415 struct cifsFileInfo, put);
417 cifsFileInfo_put_final(cifs_file);
421 * cifsFileInfo_put - release a reference of file priv data
423 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
425 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
427 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
429 _cifsFileInfo_put(cifs_file, true, true);
433 * _cifsFileInfo_put - release a reference of file priv data
435 * This may involve closing the filehandle @cifs_file out on the
436 * server. Must be called without holding tcon->open_file_lock,
437 * cinode->open_file_lock and cifs_file->file_info_lock.
439 * If @wait_for_oplock_handler is true and we are releasing the last
440 * reference, wait for any running oplock break handler of the file
441 * and cancel any pending one.
443 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
444 * @wait_oplock_handler: must be false if called from oplock_break_handler
445 * @offload: not offloaded on close and oplock breaks
448 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
449 bool wait_oplock_handler, bool offload)
451 struct inode *inode = d_inode(cifs_file->dentry);
452 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
453 struct TCP_Server_Info *server = tcon->ses->server;
454 struct cifsInodeInfo *cifsi = CIFS_I(inode);
455 struct super_block *sb = inode->i_sb;
456 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
458 struct cifs_pending_open open;
459 bool oplock_break_cancelled;
461 spin_lock(&tcon->open_file_lock);
462 spin_lock(&cifsi->open_file_lock);
463 spin_lock(&cifs_file->file_info_lock);
464 if (--cifs_file->count > 0) {
465 spin_unlock(&cifs_file->file_info_lock);
466 spin_unlock(&cifsi->open_file_lock);
467 spin_unlock(&tcon->open_file_lock);
470 spin_unlock(&cifs_file->file_info_lock);
472 if (server->ops->get_lease_key)
473 server->ops->get_lease_key(inode, &fid);
475 /* store open in pending opens to make sure we don't miss lease break */
476 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
478 /* remove it from the lists */
479 list_del(&cifs_file->flist);
480 list_del(&cifs_file->tlist);
481 atomic_dec(&tcon->num_local_opens);
483 if (list_empty(&cifsi->openFileList)) {
484 cifs_dbg(FYI, "closing last open instance for inode %p\n",
485 d_inode(cifs_file->dentry));
487 * In strict cache mode we need invalidate mapping on the last
488 * close because it may cause a error when we open this file
489 * again and get at least level II oplock.
491 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
492 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
493 cifs_set_oplock_level(cifsi, 0);
496 spin_unlock(&cifsi->open_file_lock);
497 spin_unlock(&tcon->open_file_lock);
499 oplock_break_cancelled = wait_oplock_handler ?
500 cancel_work_sync(&cifs_file->oplock_break) : false;
502 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
503 struct TCP_Server_Info *server = tcon->ses->server;
507 if (server->ops->close_getattr)
508 server->ops->close_getattr(xid, tcon, cifs_file);
509 else if (server->ops->close)
510 server->ops->close(xid, tcon, &cifs_file->fid);
514 if (oplock_break_cancelled)
515 cifs_done_oplock_break(cifsi);
517 cifs_del_pending_open(&open);
520 queue_work(fileinfo_put_wq, &cifs_file->put);
522 cifsFileInfo_put_final(cifs_file);
525 int cifs_open(struct inode *inode, struct file *file)
531 struct cifs_sb_info *cifs_sb;
532 struct TCP_Server_Info *server;
533 struct cifs_tcon *tcon;
534 struct tcon_link *tlink;
535 struct cifsFileInfo *cfile = NULL;
537 const char *full_path;
538 bool posix_open_ok = false;
540 struct cifs_pending_open open;
544 cifs_sb = CIFS_SB(inode->i_sb);
545 tlink = cifs_sb_tlink(cifs_sb);
548 return PTR_ERR(tlink);
550 tcon = tlink_tcon(tlink);
551 server = tcon->ses->server;
553 page = alloc_dentry_path();
554 full_path = build_path_from_dentry(file_dentry(file), page);
555 if (IS_ERR(full_path)) {
556 rc = PTR_ERR(full_path);
560 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
561 inode, file->f_flags, full_path);
563 if (file->f_flags & O_DIRECT &&
564 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
565 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
566 file->f_op = &cifs_file_direct_nobrl_ops;
568 file->f_op = &cifs_file_direct_ops;
571 spin_lock(&CIFS_I(inode)->deferred_lock);
572 /* Get the cached handle as SMB2 close is deferred */
573 rc = cifs_get_readable_path(tcon, full_path, &cfile);
575 if (file->f_flags == cfile->f_flags) {
576 file->private_data = cfile;
577 cifs_del_deferred_close(cfile);
578 spin_unlock(&CIFS_I(inode)->deferred_lock);
581 spin_unlock(&CIFS_I(inode)->deferred_lock);
582 _cifsFileInfo_put(cfile, true, false);
585 spin_unlock(&CIFS_I(inode)->deferred_lock);
593 if (!tcon->broken_posix_open && tcon->unix_ext &&
594 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
595 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
596 /* can not refresh inode info since size could be stale */
597 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
598 cifs_sb->ctx->file_mode /* ignored */,
599 file->f_flags, &oplock, &fid.netfid, xid);
601 cifs_dbg(FYI, "posix open succeeded\n");
602 posix_open_ok = true;
603 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
604 if (tcon->ses->serverNOS)
605 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
607 tcon->ses->serverNOS);
608 tcon->broken_posix_open = true;
609 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
610 (rc != -EOPNOTSUPP)) /* path not found or net err */
613 * Else fallthrough to retry open the old way on network i/o
618 if (server->ops->get_lease_key)
619 server->ops->get_lease_key(inode, &fid);
621 cifs_add_pending_open(&fid, tlink, &open);
623 if (!posix_open_ok) {
624 if (server->ops->get_lease_key)
625 server->ops->get_lease_key(inode, &fid);
627 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
628 file->f_flags, &oplock, &fid, xid);
630 cifs_del_pending_open(&open);
635 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
637 if (server->ops->close)
638 server->ops->close(xid, tcon, &fid);
639 cifs_del_pending_open(&open);
644 cifs_fscache_set_inode_cookie(inode, file);
646 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
648 * Time to set mode which we can not set earlier due to
649 * problems creating new read-only files.
651 struct cifs_unix_set_info_args args = {
652 .mode = inode->i_mode,
653 .uid = INVALID_UID, /* no change */
654 .gid = INVALID_GID, /* no change */
655 .ctime = NO_CHANGE_64,
656 .atime = NO_CHANGE_64,
657 .mtime = NO_CHANGE_64,
660 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
665 free_dentry_path(page);
667 cifs_put_tlink(tlink);
671 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
674 * Try to reacquire byte range locks that were released when session
675 * to server was lost.
678 cifs_relock_file(struct cifsFileInfo *cfile)
680 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
681 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
682 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
685 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
686 if (cinode->can_cache_brlcks) {
687 /* can cache locks - no need to relock */
688 up_read(&cinode->lock_sem);
692 if (cap_unix(tcon->ses) &&
693 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
694 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
695 rc = cifs_push_posix_locks(cfile);
697 rc = tcon->ses->server->ops->push_mand_locks(cfile);
699 up_read(&cinode->lock_sem);
704 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
709 struct cifs_sb_info *cifs_sb;
710 struct cifs_tcon *tcon;
711 struct TCP_Server_Info *server;
712 struct cifsInodeInfo *cinode;
715 const char *full_path;
717 int disposition = FILE_OPEN;
718 int create_options = CREATE_NOT_DIR;
719 struct cifs_open_parms oparms;
722 mutex_lock(&cfile->fh_mutex);
723 if (!cfile->invalidHandle) {
724 mutex_unlock(&cfile->fh_mutex);
729 inode = d_inode(cfile->dentry);
730 cifs_sb = CIFS_SB(inode->i_sb);
731 tcon = tlink_tcon(cfile->tlink);
732 server = tcon->ses->server;
735 * Can not grab rename sem here because various ops, including those
736 * that already have the rename sem can end up causing writepage to get
737 * called and if the server was down that means we end up here, and we
738 * can never tell if the caller already has the rename_sem.
740 page = alloc_dentry_path();
741 full_path = build_path_from_dentry(cfile->dentry, page);
742 if (IS_ERR(full_path)) {
743 mutex_unlock(&cfile->fh_mutex);
744 free_dentry_path(page);
746 return PTR_ERR(full_path);
749 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
750 inode, cfile->f_flags, full_path);
752 if (tcon->ses->server->oplocks)
757 if (tcon->unix_ext && cap_unix(tcon->ses) &&
758 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
759 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
761 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
762 * original open. Must mask them off for a reopen.
764 unsigned int oflags = cfile->f_flags &
765 ~(O_CREAT | O_EXCL | O_TRUNC);
767 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
768 cifs_sb->ctx->file_mode /* ignored */,
769 oflags, &oplock, &cfile->fid.netfid, xid);
771 cifs_dbg(FYI, "posix reopen succeeded\n");
772 oparms.reconnect = true;
776 * fallthrough to retry open the old way on errors, especially
777 * in the reconnect path it is important to retry hard
781 desired_access = cifs_convert_flags(cfile->f_flags);
783 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
784 if (cfile->f_flags & O_SYNC)
785 create_options |= CREATE_WRITE_THROUGH;
787 if (cfile->f_flags & O_DIRECT)
788 create_options |= CREATE_NO_BUFFER;
790 if (server->ops->get_lease_key)
791 server->ops->get_lease_key(inode, &cfile->fid);
794 oparms.cifs_sb = cifs_sb;
795 oparms.desired_access = desired_access;
796 oparms.create_options = cifs_create_options(cifs_sb, create_options);
797 oparms.disposition = disposition;
798 oparms.path = full_path;
799 oparms.fid = &cfile->fid;
800 oparms.reconnect = true;
803 * Can not refresh inode by passing in file_info buf to be returned by
804 * ops->open and then calling get_inode_info with returned buf since
805 * file might have write behind data that needs to be flushed and server
806 * version of file size can be stale. If we knew for sure that inode was
807 * not dirty locally we could do this.
809 rc = server->ops->open(xid, &oparms, &oplock, NULL);
810 if (rc == -ENOENT && oparms.reconnect == false) {
811 /* durable handle timeout is expired - open the file again */
812 rc = server->ops->open(xid, &oparms, &oplock, NULL);
813 /* indicate that we need to relock the file */
814 oparms.reconnect = true;
818 mutex_unlock(&cfile->fh_mutex);
819 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
820 cifs_dbg(FYI, "oplock: %d\n", oplock);
821 goto reopen_error_exit;
825 cfile->invalidHandle = false;
826 mutex_unlock(&cfile->fh_mutex);
827 cinode = CIFS_I(inode);
830 rc = filemap_write_and_wait(inode->i_mapping);
831 if (!is_interrupt_error(rc))
832 mapping_set_error(inode->i_mapping, rc);
834 if (tcon->posix_extensions)
835 rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
836 else if (tcon->unix_ext)
837 rc = cifs_get_inode_info_unix(&inode, full_path,
840 rc = cifs_get_inode_info(&inode, full_path, NULL,
841 inode->i_sb, xid, NULL);
844 * Else we are writing out data to server already and could deadlock if
845 * we tried to flush data, and since we do not know if we have data that
846 * would invalidate the current end of file on the server we can not go
847 * to the server to get the new inode info.
851 * If the server returned a read oplock and we have mandatory brlocks,
852 * set oplock level to None.
854 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
855 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
859 server->ops->set_fid(cfile, &cfile->fid, oplock);
860 if (oparms.reconnect)
861 cifs_relock_file(cfile);
864 free_dentry_path(page);
869 void smb2_deferred_work_close(struct work_struct *work)
871 struct cifsFileInfo *cfile = container_of(work,
872 struct cifsFileInfo, deferred.work);
874 spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
875 cifs_del_deferred_close(cfile);
876 cfile->deferred_scheduled = false;
877 spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
878 _cifsFileInfo_put(cfile, true, false);
881 int cifs_close(struct inode *inode, struct file *file)
883 struct cifsFileInfo *cfile;
884 struct cifsInodeInfo *cinode = CIFS_I(inode);
885 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
886 struct cifs_deferred_close *dclose;
888 if (file->private_data != NULL) {
889 cfile = file->private_data;
890 file->private_data = NULL;
891 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
892 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
894 if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags))
895 inode->i_ctime = inode->i_mtime = current_time(inode);
896 spin_lock(&cinode->deferred_lock);
897 cifs_add_deferred_close(cfile, dclose);
898 if (cfile->deferred_scheduled) {
899 mod_delayed_work(deferredclose_wq,
900 &cfile->deferred, cifs_sb->ctx->acregmax);
902 /* Deferred close for files */
903 queue_delayed_work(deferredclose_wq,
904 &cfile->deferred, cifs_sb->ctx->acregmax);
905 cfile->deferred_scheduled = true;
906 spin_unlock(&cinode->deferred_lock);
909 spin_unlock(&cinode->deferred_lock);
910 _cifsFileInfo_put(cfile, true, false);
912 _cifsFileInfo_put(cfile, true, false);
917 /* return code from the ->release op is always ignored */
922 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
924 struct cifsFileInfo *open_file;
925 struct list_head *tmp;
926 struct list_head *tmp1;
927 struct list_head tmp_list;
929 if (!tcon->use_persistent || !tcon->need_reopen_files)
932 tcon->need_reopen_files = false;
934 cifs_dbg(FYI, "Reopen persistent handles\n");
935 INIT_LIST_HEAD(&tmp_list);
937 /* list all files open on tree connection, reopen resilient handles */
938 spin_lock(&tcon->open_file_lock);
939 list_for_each(tmp, &tcon->openFileList) {
940 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
941 if (!open_file->invalidHandle)
943 cifsFileInfo_get(open_file);
944 list_add_tail(&open_file->rlist, &tmp_list);
946 spin_unlock(&tcon->open_file_lock);
948 list_for_each_safe(tmp, tmp1, &tmp_list) {
949 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
950 if (cifs_reopen_file(open_file, false /* do not flush */))
951 tcon->need_reopen_files = true;
952 list_del_init(&open_file->rlist);
953 cifsFileInfo_put(open_file);
957 int cifs_closedir(struct inode *inode, struct file *file)
961 struct cifsFileInfo *cfile = file->private_data;
962 struct cifs_tcon *tcon;
963 struct TCP_Server_Info *server;
966 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
972 tcon = tlink_tcon(cfile->tlink);
973 server = tcon->ses->server;
975 cifs_dbg(FYI, "Freeing private data in close dir\n");
976 spin_lock(&cfile->file_info_lock);
977 if (server->ops->dir_needs_close(cfile)) {
978 cfile->invalidHandle = true;
979 spin_unlock(&cfile->file_info_lock);
980 if (server->ops->close_dir)
981 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
984 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
985 /* not much we can do if it fails anyway, ignore rc */
988 spin_unlock(&cfile->file_info_lock);
990 buf = cfile->srch_inf.ntwrk_buf_start;
992 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
993 cfile->srch_inf.ntwrk_buf_start = NULL;
994 if (cfile->srch_inf.smallBuf)
995 cifs_small_buf_release(buf);
997 cifs_buf_release(buf);
1000 cifs_put_tlink(cfile->tlink);
1001 kfree(file->private_data);
1002 file->private_data = NULL;
1003 /* BB can we lock the filestruct while this is going on? */
1008 static struct cifsLockInfo *
1009 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1011 struct cifsLockInfo *lock =
1012 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1015 lock->offset = offset;
1016 lock->length = length;
1018 lock->pid = current->tgid;
1019 lock->flags = flags;
1020 INIT_LIST_HEAD(&lock->blist);
1021 init_waitqueue_head(&lock->block_q);
1026 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1028 struct cifsLockInfo *li, *tmp;
1029 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1030 list_del_init(&li->blist);
1031 wake_up(&li->block_q);
1035 #define CIFS_LOCK_OP 0
1036 #define CIFS_READ_OP 1
1037 #define CIFS_WRITE_OP 2
1039 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1041 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1042 __u64 length, __u8 type, __u16 flags,
1043 struct cifsFileInfo *cfile,
1044 struct cifsLockInfo **conf_lock, int rw_check)
1046 struct cifsLockInfo *li;
1047 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1048 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1050 list_for_each_entry(li, &fdlocks->locks, llist) {
1051 if (offset + length <= li->offset ||
1052 offset >= li->offset + li->length)
1054 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1055 server->ops->compare_fids(cfile, cur_cfile)) {
1056 /* shared lock prevents write op through the same fid */
1057 if (!(li->type & server->vals->shared_lock_type) ||
1058 rw_check != CIFS_WRITE_OP)
1061 if ((type & server->vals->shared_lock_type) &&
1062 ((server->ops->compare_fids(cfile, cur_cfile) &&
1063 current->tgid == li->pid) || type == li->type))
1065 if (rw_check == CIFS_LOCK_OP &&
1066 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1067 server->ops->compare_fids(cfile, cur_cfile))
1077 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1078 __u8 type, __u16 flags,
1079 struct cifsLockInfo **conf_lock, int rw_check)
1082 struct cifs_fid_locks *cur;
1083 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1085 list_for_each_entry(cur, &cinode->llist, llist) {
1086 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1087 flags, cfile, conf_lock,
1097 * Check if there is another lock that prevents us to set the lock (mandatory
1098 * style). If such a lock exists, update the flock structure with its
1099 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1100 * or leave it the same if we can't. Returns 0 if we don't need to request to
1101 * the server or 1 otherwise.
1104 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1105 __u8 type, struct file_lock *flock)
1108 struct cifsLockInfo *conf_lock;
1109 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1110 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1113 down_read(&cinode->lock_sem);
1115 exist = cifs_find_lock_conflict(cfile, offset, length, type,
1116 flock->fl_flags, &conf_lock,
1119 flock->fl_start = conf_lock->offset;
1120 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1121 flock->fl_pid = conf_lock->pid;
1122 if (conf_lock->type & server->vals->shared_lock_type)
1123 flock->fl_type = F_RDLCK;
1125 flock->fl_type = F_WRLCK;
1126 } else if (!cinode->can_cache_brlcks)
1129 flock->fl_type = F_UNLCK;
1131 up_read(&cinode->lock_sem);
1136 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1138 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1139 cifs_down_write(&cinode->lock_sem);
1140 list_add_tail(&lock->llist, &cfile->llist->locks);
1141 up_write(&cinode->lock_sem);
1145 * Set the byte-range lock (mandatory style). Returns:
1146 * 1) 0, if we set the lock and don't need to request to the server;
1147 * 2) 1, if no locks prevent us but we need to request to the server;
1148 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1151 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1154 struct cifsLockInfo *conf_lock;
1155 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1161 cifs_down_write(&cinode->lock_sem);
1163 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1164 lock->type, lock->flags, &conf_lock,
1166 if (!exist && cinode->can_cache_brlcks) {
1167 list_add_tail(&lock->llist, &cfile->llist->locks);
1168 up_write(&cinode->lock_sem);
1177 list_add_tail(&lock->blist, &conf_lock->blist);
1178 up_write(&cinode->lock_sem);
1179 rc = wait_event_interruptible(lock->block_q,
1180 (lock->blist.prev == &lock->blist) &&
1181 (lock->blist.next == &lock->blist));
1184 cifs_down_write(&cinode->lock_sem);
1185 list_del_init(&lock->blist);
1188 up_write(&cinode->lock_sem);
1193 * Check if there is another lock that prevents us to set the lock (posix
1194 * style). If such a lock exists, update the flock structure with its
1195 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1196 * or leave it the same if we can't. Returns 0 if we don't need to request to
1197 * the server or 1 otherwise.
1200 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1203 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1204 unsigned char saved_type = flock->fl_type;
1206 if ((flock->fl_flags & FL_POSIX) == 0)
1209 down_read(&cinode->lock_sem);
1210 posix_test_lock(file, flock);
1212 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1213 flock->fl_type = saved_type;
1217 up_read(&cinode->lock_sem);
1222 * Set the byte-range lock (posix style). Returns:
1223 * 1) <0, if the error occurs while setting the lock;
1224 * 2) 0, if we set the lock and don't need to request to the server;
1225 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1226 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1229 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1231 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1232 int rc = FILE_LOCK_DEFERRED + 1;
1234 if ((flock->fl_flags & FL_POSIX) == 0)
1237 cifs_down_write(&cinode->lock_sem);
1238 if (!cinode->can_cache_brlcks) {
1239 up_write(&cinode->lock_sem);
1243 rc = posix_lock_file(file, flock, NULL);
1244 up_write(&cinode->lock_sem);
1249 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1252 int rc = 0, stored_rc;
1253 struct cifsLockInfo *li, *tmp;
1254 struct cifs_tcon *tcon;
1255 unsigned int num, max_num, max_buf;
1256 LOCKING_ANDX_RANGE *buf, *cur;
1257 static const int types[] = {
1258 LOCKING_ANDX_LARGE_FILES,
1259 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1264 tcon = tlink_tcon(cfile->tlink);
1267 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1268 * and check it before using.
1270 max_buf = tcon->ses->server->maxBuf;
1271 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1276 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1278 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1280 max_num = (max_buf - sizeof(struct smb_hdr)) /
1281 sizeof(LOCKING_ANDX_RANGE);
1282 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1288 for (i = 0; i < 2; i++) {
1291 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1292 if (li->type != types[i])
1294 cur->Pid = cpu_to_le16(li->pid);
1295 cur->LengthLow = cpu_to_le32((u32)li->length);
1296 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1297 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1298 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1299 if (++num == max_num) {
1300 stored_rc = cifs_lockv(xid, tcon,
1302 (__u8)li->type, 0, num,
1313 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1314 (__u8)types[i], 0, num, buf);
1326 hash_lockowner(fl_owner_t owner)
1328 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1331 struct lock_to_push {
1332 struct list_head llist;
1341 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1343 struct inode *inode = d_inode(cfile->dentry);
1344 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1345 struct file_lock *flock;
1346 struct file_lock_context *flctx = inode->i_flctx;
1347 unsigned int count = 0, i;
1348 int rc = 0, xid, type;
1349 struct list_head locks_to_send, *el;
1350 struct lock_to_push *lck, *tmp;
1358 spin_lock(&flctx->flc_lock);
1359 list_for_each(el, &flctx->flc_posix) {
1362 spin_unlock(&flctx->flc_lock);
1364 INIT_LIST_HEAD(&locks_to_send);
1367 * Allocating count locks is enough because no FL_POSIX locks can be
1368 * added to the list while we are holding cinode->lock_sem that
1369 * protects locking operations of this inode.
1371 for (i = 0; i < count; i++) {
1372 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1377 list_add_tail(&lck->llist, &locks_to_send);
1380 el = locks_to_send.next;
1381 spin_lock(&flctx->flc_lock);
1382 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1383 if (el == &locks_to_send) {
1385 * The list ended. We don't have enough allocated
1386 * structures - something is really wrong.
1388 cifs_dbg(VFS, "Can't push all brlocks!\n");
1391 length = 1 + flock->fl_end - flock->fl_start;
1392 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1396 lck = list_entry(el, struct lock_to_push, llist);
1397 lck->pid = hash_lockowner(flock->fl_owner);
1398 lck->netfid = cfile->fid.netfid;
1399 lck->length = length;
1401 lck->offset = flock->fl_start;
1403 spin_unlock(&flctx->flc_lock);
1405 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1408 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1409 lck->offset, lck->length, NULL,
1413 list_del(&lck->llist);
1421 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1422 list_del(&lck->llist);
1429 cifs_push_locks(struct cifsFileInfo *cfile)
1431 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1432 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1433 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1436 /* we are going to update can_cache_brlcks here - need a write access */
1437 cifs_down_write(&cinode->lock_sem);
1438 if (!cinode->can_cache_brlcks) {
1439 up_write(&cinode->lock_sem);
1443 if (cap_unix(tcon->ses) &&
1444 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1445 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1446 rc = cifs_push_posix_locks(cfile);
1448 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1450 cinode->can_cache_brlcks = false;
1451 up_write(&cinode->lock_sem);
1456 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1457 bool *wait_flag, struct TCP_Server_Info *server)
1459 if (flock->fl_flags & FL_POSIX)
1460 cifs_dbg(FYI, "Posix\n");
1461 if (flock->fl_flags & FL_FLOCK)
1462 cifs_dbg(FYI, "Flock\n");
1463 if (flock->fl_flags & FL_SLEEP) {
1464 cifs_dbg(FYI, "Blocking lock\n");
1467 if (flock->fl_flags & FL_ACCESS)
1468 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1469 if (flock->fl_flags & FL_LEASE)
1470 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1471 if (flock->fl_flags &
1472 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1473 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1474 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1476 *type = server->vals->large_lock_type;
1477 if (flock->fl_type == F_WRLCK) {
1478 cifs_dbg(FYI, "F_WRLCK\n");
1479 *type |= server->vals->exclusive_lock_type;
1481 } else if (flock->fl_type == F_UNLCK) {
1482 cifs_dbg(FYI, "F_UNLCK\n");
1483 *type |= server->vals->unlock_lock_type;
1485 /* Check if unlock includes more than one lock range */
1486 } else if (flock->fl_type == F_RDLCK) {
1487 cifs_dbg(FYI, "F_RDLCK\n");
1488 *type |= server->vals->shared_lock_type;
1490 } else if (flock->fl_type == F_EXLCK) {
1491 cifs_dbg(FYI, "F_EXLCK\n");
1492 *type |= server->vals->exclusive_lock_type;
1494 } else if (flock->fl_type == F_SHLCK) {
1495 cifs_dbg(FYI, "F_SHLCK\n");
1496 *type |= server->vals->shared_lock_type;
1499 cifs_dbg(FYI, "Unknown type of lock\n");
1503 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1504 bool wait_flag, bool posix_lck, unsigned int xid)
1507 __u64 length = 1 + flock->fl_end - flock->fl_start;
1508 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1509 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1510 struct TCP_Server_Info *server = tcon->ses->server;
1511 __u16 netfid = cfile->fid.netfid;
1514 int posix_lock_type;
1516 rc = cifs_posix_lock_test(file, flock);
1520 if (type & server->vals->shared_lock_type)
1521 posix_lock_type = CIFS_RDLCK;
1523 posix_lock_type = CIFS_WRLCK;
1524 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1525 hash_lockowner(flock->fl_owner),
1526 flock->fl_start, length, flock,
1527 posix_lock_type, wait_flag);
1531 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1535 /* BB we could chain these into one lock request BB */
1536 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1539 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1541 flock->fl_type = F_UNLCK;
1543 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1548 if (type & server->vals->shared_lock_type) {
1549 flock->fl_type = F_WRLCK;
1553 type &= ~server->vals->exclusive_lock_type;
1555 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1556 type | server->vals->shared_lock_type,
1559 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1560 type | server->vals->shared_lock_type, 0, 1, false);
1561 flock->fl_type = F_RDLCK;
1563 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1566 flock->fl_type = F_WRLCK;
1572 cifs_move_llist(struct list_head *source, struct list_head *dest)
1574 struct list_head *li, *tmp;
1575 list_for_each_safe(li, tmp, source)
1576 list_move(li, dest);
1580 cifs_free_llist(struct list_head *llist)
1582 struct cifsLockInfo *li, *tmp;
1583 list_for_each_entry_safe(li, tmp, llist, llist) {
1584 cifs_del_lock_waiters(li);
1585 list_del(&li->llist);
1591 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1594 int rc = 0, stored_rc;
1595 static const int types[] = {
1596 LOCKING_ANDX_LARGE_FILES,
1597 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1600 unsigned int max_num, num, max_buf;
1601 LOCKING_ANDX_RANGE *buf, *cur;
1602 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1603 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1604 struct cifsLockInfo *li, *tmp;
1605 __u64 length = 1 + flock->fl_end - flock->fl_start;
1606 struct list_head tmp_llist;
1608 INIT_LIST_HEAD(&tmp_llist);
1611 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1612 * and check it before using.
1614 max_buf = tcon->ses->server->maxBuf;
1615 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1618 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1620 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1622 max_num = (max_buf - sizeof(struct smb_hdr)) /
1623 sizeof(LOCKING_ANDX_RANGE);
1624 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1628 cifs_down_write(&cinode->lock_sem);
1629 for (i = 0; i < 2; i++) {
1632 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1633 if (flock->fl_start > li->offset ||
1634 (flock->fl_start + length) <
1635 (li->offset + li->length))
1637 if (current->tgid != li->pid)
1639 if (types[i] != li->type)
1641 if (cinode->can_cache_brlcks) {
1643 * We can cache brlock requests - simply remove
1644 * a lock from the file's list.
1646 list_del(&li->llist);
1647 cifs_del_lock_waiters(li);
1651 cur->Pid = cpu_to_le16(li->pid);
1652 cur->LengthLow = cpu_to_le32((u32)li->length);
1653 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1654 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1655 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1657 * We need to save a lock here to let us add it again to
1658 * the file's list if the unlock range request fails on
1661 list_move(&li->llist, &tmp_llist);
1662 if (++num == max_num) {
1663 stored_rc = cifs_lockv(xid, tcon,
1665 li->type, num, 0, buf);
1668 * We failed on the unlock range
1669 * request - add all locks from the tmp
1670 * list to the head of the file's list.
1672 cifs_move_llist(&tmp_llist,
1673 &cfile->llist->locks);
1677 * The unlock range request succeed -
1678 * free the tmp list.
1680 cifs_free_llist(&tmp_llist);
1687 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1688 types[i], num, 0, buf);
1690 cifs_move_llist(&tmp_llist,
1691 &cfile->llist->locks);
1694 cifs_free_llist(&tmp_llist);
1698 up_write(&cinode->lock_sem);
1704 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1705 bool wait_flag, bool posix_lck, int lock, int unlock,
1709 __u64 length = 1 + flock->fl_end - flock->fl_start;
1710 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1711 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1712 struct TCP_Server_Info *server = tcon->ses->server;
1713 struct inode *inode = d_inode(cfile->dentry);
1716 int posix_lock_type;
1718 rc = cifs_posix_lock_set(file, flock);
1719 if (rc <= FILE_LOCK_DEFERRED)
1722 if (type & server->vals->shared_lock_type)
1723 posix_lock_type = CIFS_RDLCK;
1725 posix_lock_type = CIFS_WRLCK;
1728 posix_lock_type = CIFS_UNLCK;
1730 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1731 hash_lockowner(flock->fl_owner),
1732 flock->fl_start, length,
1733 NULL, posix_lock_type, wait_flag);
1738 struct cifsLockInfo *lock;
1740 lock = cifs_lock_init(flock->fl_start, length, type,
1745 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1754 * Windows 7 server can delay breaking lease from read to None
1755 * if we set a byte-range lock on a file - break it explicitly
1756 * before sending the lock to the server to be sure the next
1757 * read won't conflict with non-overlapted locks due to
1760 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1761 CIFS_CACHE_READ(CIFS_I(inode))) {
1762 cifs_zap_mapping(inode);
1763 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1765 CIFS_I(inode)->oplock = 0;
1768 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1769 type, 1, 0, wait_flag);
1775 cifs_lock_add(cfile, lock);
1777 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1780 if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1782 * If this is a request to remove all locks because we
1783 * are closing the file, it doesn't matter if the
1784 * unlocking failed as both cifs.ko and the SMB server
1785 * remove the lock on file close
1788 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1789 if (!(flock->fl_flags & FL_CLOSE))
1792 rc = locks_lock_file_wait(file, flock);
1797 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1800 int lock = 0, unlock = 0;
1801 bool wait_flag = false;
1802 bool posix_lck = false;
1803 struct cifs_sb_info *cifs_sb;
1804 struct cifs_tcon *tcon;
1805 struct cifsFileInfo *cfile;
1811 if (!(fl->fl_flags & FL_FLOCK))
1814 cfile = (struct cifsFileInfo *)file->private_data;
1815 tcon = tlink_tcon(cfile->tlink);
1817 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1819 cifs_sb = CIFS_FILE_SB(file);
1821 if (cap_unix(tcon->ses) &&
1822 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1823 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1826 if (!lock && !unlock) {
1828 * if no lock or unlock then nothing to do since we do not
1835 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1843 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1846 int lock = 0, unlock = 0;
1847 bool wait_flag = false;
1848 bool posix_lck = false;
1849 struct cifs_sb_info *cifs_sb;
1850 struct cifs_tcon *tcon;
1851 struct cifsFileInfo *cfile;
1857 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1858 cmd, flock->fl_flags, flock->fl_type,
1859 flock->fl_start, flock->fl_end);
1861 cfile = (struct cifsFileInfo *)file->private_data;
1862 tcon = tlink_tcon(cfile->tlink);
1864 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1866 cifs_sb = CIFS_FILE_SB(file);
1868 if (cap_unix(tcon->ses) &&
1869 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1870 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1873 * BB add code here to normalize offset and length to account for
1874 * negative length which we can not accept over the wire.
1876 if (IS_GETLK(cmd)) {
1877 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1882 if (!lock && !unlock) {
1884 * if no lock or unlock then nothing to do since we do not
1891 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1898 * update the file size (if needed) after a write. Should be called with
1899 * the inode->i_lock held
1902 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1903 unsigned int bytes_written)
1905 loff_t end_of_write = offset + bytes_written;
1907 if (end_of_write > cifsi->server_eof)
1908 cifsi->server_eof = end_of_write;
1912 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1913 size_t write_size, loff_t *offset)
1916 unsigned int bytes_written = 0;
1917 unsigned int total_written;
1918 struct cifs_tcon *tcon;
1919 struct TCP_Server_Info *server;
1921 struct dentry *dentry = open_file->dentry;
1922 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1923 struct cifs_io_parms io_parms = {0};
1925 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1926 write_size, *offset, dentry);
1928 tcon = tlink_tcon(open_file->tlink);
1929 server = tcon->ses->server;
1931 if (!server->ops->sync_write)
1936 for (total_written = 0; write_size > total_written;
1937 total_written += bytes_written) {
1939 while (rc == -EAGAIN) {
1943 if (open_file->invalidHandle) {
1944 /* we could deadlock if we called
1945 filemap_fdatawait from here so tell
1946 reopen_file not to flush data to
1948 rc = cifs_reopen_file(open_file, false);
1953 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1954 (unsigned int)write_size - total_written);
1955 /* iov[0] is reserved for smb header */
1956 iov[1].iov_base = (char *)write_data + total_written;
1957 iov[1].iov_len = len;
1959 io_parms.tcon = tcon;
1960 io_parms.offset = *offset;
1961 io_parms.length = len;
1962 rc = server->ops->sync_write(xid, &open_file->fid,
1963 &io_parms, &bytes_written, iov, 1);
1965 if (rc || (bytes_written == 0)) {
1973 spin_lock(&d_inode(dentry)->i_lock);
1974 cifs_update_eof(cifsi, *offset, bytes_written);
1975 spin_unlock(&d_inode(dentry)->i_lock);
1976 *offset += bytes_written;
1980 cifs_stats_bytes_written(tcon, total_written);
1982 if (total_written > 0) {
1983 spin_lock(&d_inode(dentry)->i_lock);
1984 if (*offset > d_inode(dentry)->i_size)
1985 i_size_write(d_inode(dentry), *offset);
1986 spin_unlock(&d_inode(dentry)->i_lock);
1988 mark_inode_dirty_sync(d_inode(dentry));
1990 return total_written;
1993 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1996 struct cifsFileInfo *open_file = NULL;
1997 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1999 /* only filter by fsuid on multiuser mounts */
2000 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2003 spin_lock(&cifs_inode->open_file_lock);
2004 /* we could simply get the first_list_entry since write-only entries
2005 are always at the end of the list but since the first entry might
2006 have a close pending, we go through the whole list */
2007 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2008 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2010 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2011 if ((!open_file->invalidHandle) &&
2012 (!open_file->oplock_break_received)) {
2013 /* found a good file */
2014 /* lock it so it will not be closed on us */
2015 cifsFileInfo_get(open_file);
2016 spin_unlock(&cifs_inode->open_file_lock);
2018 } /* else might as well continue, and look for
2019 another, or simply have the caller reopen it
2020 again rather than trying to fix this handle */
2021 } else /* write only file */
2022 break; /* write only files are last so must be done */
2024 spin_unlock(&cifs_inode->open_file_lock);
2028 /* Return -EBADF if no handle is found and general rc otherwise */
2030 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2031 struct cifsFileInfo **ret_file)
2033 struct cifsFileInfo *open_file, *inv_file = NULL;
2034 struct cifs_sb_info *cifs_sb;
2035 bool any_available = false;
2037 unsigned int refind = 0;
2038 bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2039 bool with_delete = flags & FIND_WR_WITH_DELETE;
2043 * Having a null inode here (because mapping->host was set to zero by
2044 * the VFS or MM) should not happen but we had reports of on oops (due
2045 * to it being zero) during stress testcases so we need to check for it
2048 if (cifs_inode == NULL) {
2049 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2054 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2056 /* only filter by fsuid on multiuser mounts */
2057 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2060 spin_lock(&cifs_inode->open_file_lock);
2062 if (refind > MAX_REOPEN_ATT) {
2063 spin_unlock(&cifs_inode->open_file_lock);
2066 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2067 if (!any_available && open_file->pid != current->tgid)
2069 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2071 if (with_delete && !(open_file->fid.access & DELETE))
2073 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2074 if (!open_file->invalidHandle) {
2075 /* found a good writable file */
2076 cifsFileInfo_get(open_file);
2077 spin_unlock(&cifs_inode->open_file_lock);
2078 *ret_file = open_file;
2082 inv_file = open_file;
2086 /* couldn't find useable FH with same pid, try any available */
2087 if (!any_available) {
2088 any_available = true;
2089 goto refind_writable;
2093 any_available = false;
2094 cifsFileInfo_get(inv_file);
2097 spin_unlock(&cifs_inode->open_file_lock);
2100 rc = cifs_reopen_file(inv_file, false);
2102 *ret_file = inv_file;
2106 spin_lock(&cifs_inode->open_file_lock);
2107 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2108 spin_unlock(&cifs_inode->open_file_lock);
2109 cifsFileInfo_put(inv_file);
2112 spin_lock(&cifs_inode->open_file_lock);
2113 goto refind_writable;
2119 struct cifsFileInfo *
2120 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2122 struct cifsFileInfo *cfile;
2125 rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2127 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2133 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2135 struct cifsFileInfo **ret_file)
2137 struct cifsFileInfo *cfile;
2138 void *page = alloc_dentry_path();
2142 spin_lock(&tcon->open_file_lock);
2143 list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2144 struct cifsInodeInfo *cinode;
2145 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2146 if (IS_ERR(full_path)) {
2147 spin_unlock(&tcon->open_file_lock);
2148 free_dentry_path(page);
2149 return PTR_ERR(full_path);
2151 if (strcmp(full_path, name))
2154 cinode = CIFS_I(d_inode(cfile->dentry));
2155 spin_unlock(&tcon->open_file_lock);
2156 free_dentry_path(page);
2157 return cifs_get_writable_file(cinode, flags, ret_file);
2160 spin_unlock(&tcon->open_file_lock);
2161 free_dentry_path(page);
2166 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2167 struct cifsFileInfo **ret_file)
2169 struct cifsFileInfo *cfile;
2170 void *page = alloc_dentry_path();
2174 spin_lock(&tcon->open_file_lock);
2175 list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2176 struct cifsInodeInfo *cinode;
2177 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2178 if (IS_ERR(full_path)) {
2179 spin_unlock(&tcon->open_file_lock);
2180 free_dentry_path(page);
2181 return PTR_ERR(full_path);
2183 if (strcmp(full_path, name))
2186 cinode = CIFS_I(d_inode(cfile->dentry));
2187 spin_unlock(&tcon->open_file_lock);
2188 free_dentry_path(page);
2189 *ret_file = find_readable_file(cinode, 0);
2190 return *ret_file ? 0 : -ENOENT;
2193 spin_unlock(&tcon->open_file_lock);
2194 free_dentry_path(page);
2198 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2200 struct address_space *mapping = page->mapping;
2201 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2204 int bytes_written = 0;
2205 struct inode *inode;
2206 struct cifsFileInfo *open_file;
2208 if (!mapping || !mapping->host)
2211 inode = page->mapping->host;
2213 offset += (loff_t)from;
2214 write_data = kmap(page);
2217 if ((to > PAGE_SIZE) || (from > to)) {
2222 /* racing with truncate? */
2223 if (offset > mapping->host->i_size) {
2225 return 0; /* don't care */
2228 /* check to make sure that we are not extending the file */
2229 if (mapping->host->i_size - offset < (loff_t)to)
2230 to = (unsigned)(mapping->host->i_size - offset);
2232 rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2235 bytes_written = cifs_write(open_file, open_file->pid,
2236 write_data, to - from, &offset);
2237 cifsFileInfo_put(open_file);
2238 /* Does mm or vfs already set times? */
2239 inode->i_atime = inode->i_mtime = current_time(inode);
2240 if ((bytes_written > 0) && (offset))
2242 else if (bytes_written < 0)
2247 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2248 if (!is_retryable_error(rc))
2256 static struct cifs_writedata *
2257 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2258 pgoff_t end, pgoff_t *index,
2259 unsigned int *found_pages)
2261 struct cifs_writedata *wdata;
2263 wdata = cifs_writedata_alloc((unsigned int)tofind,
2264 cifs_writev_complete);
2268 *found_pages = find_get_pages_range_tag(mapping, index, end,
2269 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2274 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2275 struct address_space *mapping,
2276 struct writeback_control *wbc,
2277 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2279 unsigned int nr_pages = 0, i;
2282 for (i = 0; i < found_pages; i++) {
2283 page = wdata->pages[i];
2285 * At this point we hold neither the i_pages lock nor the
2286 * page lock: the page may be truncated or invalidated
2287 * (changing page->mapping to NULL), or even swizzled
2288 * back from swapper_space to tmpfs file mapping
2293 else if (!trylock_page(page))
2296 if (unlikely(page->mapping != mapping)) {
2301 if (!wbc->range_cyclic && page->index > end) {
2307 if (*next && (page->index != *next)) {
2308 /* Not next consecutive page */
2313 if (wbc->sync_mode != WB_SYNC_NONE)
2314 wait_on_page_writeback(page);
2316 if (PageWriteback(page) ||
2317 !clear_page_dirty_for_io(page)) {
2323 * This actually clears the dirty bit in the radix tree.
2324 * See cifs_writepage() for more commentary.
2326 set_page_writeback(page);
2327 if (page_offset(page) >= i_size_read(mapping->host)) {
2330 end_page_writeback(page);
2334 wdata->pages[i] = page;
2335 *next = page->index + 1;
2339 /* reset index to refind any pages skipped */
2341 *index = wdata->pages[0]->index + 1;
2343 /* put any pages we aren't going to use */
2344 for (i = nr_pages; i < found_pages; i++) {
2345 put_page(wdata->pages[i]);
2346 wdata->pages[i] = NULL;
2353 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2354 struct address_space *mapping, struct writeback_control *wbc)
2358 wdata->sync_mode = wbc->sync_mode;
2359 wdata->nr_pages = nr_pages;
2360 wdata->offset = page_offset(wdata->pages[0]);
2361 wdata->pagesz = PAGE_SIZE;
2362 wdata->tailsz = min(i_size_read(mapping->host) -
2363 page_offset(wdata->pages[nr_pages - 1]),
2365 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2366 wdata->pid = wdata->cfile->pid;
2368 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2372 if (wdata->cfile->invalidHandle)
2375 rc = wdata->server->ops->async_writev(wdata,
2376 cifs_writedata_release);
2381 static int cifs_writepages(struct address_space *mapping,
2382 struct writeback_control *wbc)
2384 struct inode *inode = mapping->host;
2385 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2386 struct TCP_Server_Info *server;
2387 bool done = false, scanned = false, range_whole = false;
2389 struct cifs_writedata *wdata;
2390 struct cifsFileInfo *cfile = NULL;
2396 * If wsize is smaller than the page cache size, default to writing
2397 * one page at a time via cifs_writepage
2399 if (cifs_sb->ctx->wsize < PAGE_SIZE)
2400 return generic_writepages(mapping, wbc);
2403 if (wbc->range_cyclic) {
2404 index = mapping->writeback_index; /* Start from prev offset */
2407 index = wbc->range_start >> PAGE_SHIFT;
2408 end = wbc->range_end >> PAGE_SHIFT;
2409 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2413 server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2416 while (!done && index <= end) {
2417 unsigned int i, nr_pages, found_pages, wsize;
2418 pgoff_t next = 0, tofind, saved_index = index;
2419 struct cifs_credits credits_on_stack;
2420 struct cifs_credits *credits = &credits_on_stack;
2421 int get_file_rc = 0;
2424 cifsFileInfo_put(cfile);
2426 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2428 /* in case of an error store it to return later */
2432 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2439 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2441 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2446 add_credits_and_wake_if(server, credits, 0);
2450 if (found_pages == 0) {
2451 kref_put(&wdata->refcount, cifs_writedata_release);
2452 add_credits_and_wake_if(server, credits, 0);
2456 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2457 end, &index, &next, &done);
2459 /* nothing to write? */
2460 if (nr_pages == 0) {
2461 kref_put(&wdata->refcount, cifs_writedata_release);
2462 add_credits_and_wake_if(server, credits, 0);
2466 wdata->credits = credits_on_stack;
2467 wdata->cfile = cfile;
2468 wdata->server = server;
2471 if (!wdata->cfile) {
2472 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2474 if (is_retryable_error(get_file_rc))
2479 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2481 for (i = 0; i < nr_pages; ++i)
2482 unlock_page(wdata->pages[i]);
2484 /* send failure -- clean up the mess */
2486 add_credits_and_wake_if(server, &wdata->credits, 0);
2487 for (i = 0; i < nr_pages; ++i) {
2488 if (is_retryable_error(rc))
2489 redirty_page_for_writepage(wbc,
2492 SetPageError(wdata->pages[i]);
2493 end_page_writeback(wdata->pages[i]);
2494 put_page(wdata->pages[i]);
2496 if (!is_retryable_error(rc))
2497 mapping_set_error(mapping, rc);
2499 kref_put(&wdata->refcount, cifs_writedata_release);
2501 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2502 index = saved_index;
2506 /* Return immediately if we received a signal during writing */
2507 if (is_interrupt_error(rc)) {
2512 if (rc != 0 && saved_rc == 0)
2515 wbc->nr_to_write -= nr_pages;
2516 if (wbc->nr_to_write <= 0)
2522 if (!scanned && !done) {
2524 * We hit the last page and there is more work to be done: wrap
2525 * back to the start of the file
2535 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2536 mapping->writeback_index = index;
2539 cifsFileInfo_put(cfile);
2541 /* Indication to update ctime and mtime as close is deferred */
2542 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2547 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2553 /* BB add check for wbc flags */
2555 if (!PageUptodate(page))
2556 cifs_dbg(FYI, "ppw - page not up to date\n");
2559 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2561 * A writepage() implementation always needs to do either this,
2562 * or re-dirty the page with "redirty_page_for_writepage()" in
2563 * the case of a failure.
2565 * Just unlocking the page will cause the radix tree tag-bits
2566 * to fail to update with the state of the page correctly.
2568 set_page_writeback(page);
2570 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2571 if (is_retryable_error(rc)) {
2572 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2574 redirty_page_for_writepage(wbc, page);
2575 } else if (rc != 0) {
2577 mapping_set_error(page->mapping, rc);
2579 SetPageUptodate(page);
2581 end_page_writeback(page);
2587 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2589 int rc = cifs_writepage_locked(page, wbc);
2594 static int cifs_write_end(struct file *file, struct address_space *mapping,
2595 loff_t pos, unsigned len, unsigned copied,
2596 struct page *page, void *fsdata)
2599 struct inode *inode = mapping->host;
2600 struct cifsFileInfo *cfile = file->private_data;
2601 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2604 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2607 pid = current->tgid;
2609 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2612 if (PageChecked(page)) {
2614 SetPageUptodate(page);
2615 ClearPageChecked(page);
2616 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2617 SetPageUptodate(page);
2619 if (!PageUptodate(page)) {
2621 unsigned offset = pos & (PAGE_SIZE - 1);
2625 /* this is probably better than directly calling
2626 partialpage_write since in this function the file handle is
2627 known which we might as well leverage */
2628 /* BB check if anything else missing out of ppw
2629 such as updating last write time */
2630 page_data = kmap(page);
2631 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2632 /* if (rc < 0) should we set writebehind rc? */
2639 set_page_dirty(page);
2643 spin_lock(&inode->i_lock);
2644 if (pos > inode->i_size)
2645 i_size_write(inode, pos);
2646 spin_unlock(&inode->i_lock);
2651 /* Indication to update ctime and mtime as close is deferred */
2652 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2657 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2662 struct cifs_tcon *tcon;
2663 struct TCP_Server_Info *server;
2664 struct cifsFileInfo *smbfile = file->private_data;
2665 struct inode *inode = file_inode(file);
2666 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2668 rc = file_write_and_wait_range(file, start, end);
2670 trace_cifs_fsync_err(inode->i_ino, rc);
2676 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2679 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2680 rc = cifs_zap_mapping(inode);
2682 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2683 rc = 0; /* don't care about it in fsync */
2687 tcon = tlink_tcon(smbfile->tlink);
2688 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2689 server = tcon->ses->server;
2690 if (server->ops->flush)
2691 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2700 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2704 struct cifs_tcon *tcon;
2705 struct TCP_Server_Info *server;
2706 struct cifsFileInfo *smbfile = file->private_data;
2707 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2709 rc = file_write_and_wait_range(file, start, end);
2711 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2717 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2720 tcon = tlink_tcon(smbfile->tlink);
2721 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2722 server = tcon->ses->server;
2723 if (server->ops->flush)
2724 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2734 * As file closes, flush all cached write data for this inode checking
2735 * for write behind errors.
2737 int cifs_flush(struct file *file, fl_owner_t id)
2739 struct inode *inode = file_inode(file);
2742 if (file->f_mode & FMODE_WRITE)
2743 rc = filemap_write_and_wait(inode->i_mapping);
2745 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2747 trace_cifs_flush_err(inode->i_ino, rc);
2752 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2757 for (i = 0; i < num_pages; i++) {
2758 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2761 * save number of pages we have already allocated and
2762 * return with ENOMEM error
2771 for (i = 0; i < num_pages; i++)
2778 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2783 clen = min_t(const size_t, len, wsize);
2784 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2793 cifs_uncached_writedata_release(struct kref *refcount)
2796 struct cifs_writedata *wdata = container_of(refcount,
2797 struct cifs_writedata, refcount);
2799 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2800 for (i = 0; i < wdata->nr_pages; i++)
2801 put_page(wdata->pages[i]);
2802 cifs_writedata_release(refcount);
2805 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2808 cifs_uncached_writev_complete(struct work_struct *work)
2810 struct cifs_writedata *wdata = container_of(work,
2811 struct cifs_writedata, work);
2812 struct inode *inode = d_inode(wdata->cfile->dentry);
2813 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2815 spin_lock(&inode->i_lock);
2816 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2817 if (cifsi->server_eof > inode->i_size)
2818 i_size_write(inode, cifsi->server_eof);
2819 spin_unlock(&inode->i_lock);
2821 complete(&wdata->done);
2822 collect_uncached_write_data(wdata->ctx);
2823 /* the below call can possibly free the last ref to aio ctx */
2824 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2828 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2829 size_t *len, unsigned long *num_pages)
2831 size_t save_len, copied, bytes, cur_len = *len;
2832 unsigned long i, nr_pages = *num_pages;
2835 for (i = 0; i < nr_pages; i++) {
2836 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2837 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2840 * If we didn't copy as much as we expected, then that
2841 * may mean we trod into an unmapped area. Stop copying
2842 * at that point. On the next pass through the big
2843 * loop, we'll likely end up getting a zero-length
2844 * write and bailing out of it.
2849 cur_len = save_len - cur_len;
2853 * If we have no data to send, then that probably means that
2854 * the copy above failed altogether. That's most likely because
2855 * the address in the iovec was bogus. Return -EFAULT and let
2856 * the caller free anything we allocated and bail out.
2862 * i + 1 now represents the number of pages we actually used in
2863 * the copy phase above.
2870 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2871 struct cifs_aio_ctx *ctx)
2874 struct cifs_credits credits;
2876 struct TCP_Server_Info *server = wdata->server;
2879 if (wdata->cfile->invalidHandle) {
2880 rc = cifs_reopen_file(wdata->cfile, false);
2889 * Wait for credits to resend this wdata.
2890 * Note: we are attempting to resend the whole wdata not in
2894 rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2899 if (wsize < wdata->bytes) {
2900 add_credits_and_wake_if(server, &credits, 0);
2903 } while (wsize < wdata->bytes);
2904 wdata->credits = credits;
2906 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2909 if (wdata->cfile->invalidHandle)
2912 #ifdef CONFIG_CIFS_SMB_DIRECT
2914 wdata->mr->need_invalidate = true;
2915 smbd_deregister_mr(wdata->mr);
2919 rc = server->ops->async_writev(wdata,
2920 cifs_uncached_writedata_release);
2924 /* If the write was successfully sent, we are done */
2926 list_add_tail(&wdata->list, wdata_list);
2930 /* Roll back credits and retry if needed */
2931 add_credits_and_wake_if(server, &wdata->credits, 0);
2932 } while (rc == -EAGAIN);
2935 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2940 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2941 struct cifsFileInfo *open_file,
2942 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2943 struct cifs_aio_ctx *ctx)
2947 unsigned long nr_pages, num_pages, i;
2948 struct cifs_writedata *wdata;
2949 struct iov_iter saved_from = *from;
2950 loff_t saved_offset = offset;
2952 struct TCP_Server_Info *server;
2953 struct page **pagevec;
2957 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2958 pid = open_file->pid;
2960 pid = current->tgid;
2962 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2967 struct cifs_credits credits_on_stack;
2968 struct cifs_credits *credits = &credits_on_stack;
2970 if (open_file->invalidHandle) {
2971 rc = cifs_reopen_file(open_file, false);
2978 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2983 cur_len = min_t(const size_t, len, wsize);
2985 if (ctx->direct_io) {
2988 result = iov_iter_get_pages_alloc(
2989 from, &pagevec, cur_len, &start);
2992 "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
2993 result, iov_iter_type(from),
2994 from->iov_offset, from->count);
2998 add_credits_and_wake_if(server, credits, 0);
3001 cur_len = (size_t)result;
3002 iov_iter_advance(from, cur_len);
3005 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3007 wdata = cifs_writedata_direct_alloc(pagevec,
3008 cifs_uncached_writev_complete);
3011 add_credits_and_wake_if(server, credits, 0);
3016 wdata->page_offset = start;
3019 cur_len - (PAGE_SIZE - start) -
3020 (nr_pages - 2) * PAGE_SIZE :
3023 nr_pages = get_numpages(wsize, len, &cur_len);
3024 wdata = cifs_writedata_alloc(nr_pages,
3025 cifs_uncached_writev_complete);
3028 add_credits_and_wake_if(server, credits, 0);
3032 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3034 kvfree(wdata->pages);
3036 add_credits_and_wake_if(server, credits, 0);
3040 num_pages = nr_pages;
3041 rc = wdata_fill_from_iovec(
3042 wdata, from, &cur_len, &num_pages);
3044 for (i = 0; i < nr_pages; i++)
3045 put_page(wdata->pages[i]);
3046 kvfree(wdata->pages);
3048 add_credits_and_wake_if(server, credits, 0);
3053 * Bring nr_pages down to the number of pages we
3054 * actually used, and free any pages that we didn't use.
3056 for ( ; nr_pages > num_pages; nr_pages--)
3057 put_page(wdata->pages[nr_pages - 1]);
3059 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3062 wdata->sync_mode = WB_SYNC_ALL;
3063 wdata->nr_pages = nr_pages;
3064 wdata->offset = (__u64)offset;
3065 wdata->cfile = cifsFileInfo_get(open_file);
3066 wdata->server = server;
3068 wdata->bytes = cur_len;
3069 wdata->pagesz = PAGE_SIZE;
3070 wdata->credits = credits_on_stack;
3072 kref_get(&ctx->refcount);
3074 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3077 if (wdata->cfile->invalidHandle)
3080 rc = server->ops->async_writev(wdata,
3081 cifs_uncached_writedata_release);
3085 add_credits_and_wake_if(server, &wdata->credits, 0);
3086 kref_put(&wdata->refcount,
3087 cifs_uncached_writedata_release);
3088 if (rc == -EAGAIN) {
3090 iov_iter_advance(from, offset - saved_offset);
3096 list_add_tail(&wdata->list, wdata_list);
3105 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3107 struct cifs_writedata *wdata, *tmp;
3108 struct cifs_tcon *tcon;
3109 struct cifs_sb_info *cifs_sb;
3110 struct dentry *dentry = ctx->cfile->dentry;
3113 tcon = tlink_tcon(ctx->cfile->tlink);
3114 cifs_sb = CIFS_SB(dentry->d_sb);
3116 mutex_lock(&ctx->aio_mutex);
3118 if (list_empty(&ctx->list)) {
3119 mutex_unlock(&ctx->aio_mutex);
3125 * Wait for and collect replies for any successful sends in order of
3126 * increasing offset. Once an error is hit, then return without waiting
3127 * for any more replies.
3130 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3132 if (!try_wait_for_completion(&wdata->done)) {
3133 mutex_unlock(&ctx->aio_mutex);
3140 ctx->total_len += wdata->bytes;
3142 /* resend call if it's a retryable error */
3143 if (rc == -EAGAIN) {
3144 struct list_head tmp_list;
3145 struct iov_iter tmp_from = ctx->iter;
3147 INIT_LIST_HEAD(&tmp_list);
3148 list_del_init(&wdata->list);
3151 rc = cifs_resend_wdata(
3152 wdata, &tmp_list, ctx);
3154 iov_iter_advance(&tmp_from,
3155 wdata->offset - ctx->pos);
3157 rc = cifs_write_from_iter(wdata->offset,
3158 wdata->bytes, &tmp_from,
3159 ctx->cfile, cifs_sb, &tmp_list,
3162 kref_put(&wdata->refcount,
3163 cifs_uncached_writedata_release);
3166 list_splice(&tmp_list, &ctx->list);
3170 list_del_init(&wdata->list);
3171 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3174 cifs_stats_bytes_written(tcon, ctx->total_len);
3175 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3177 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3179 mutex_unlock(&ctx->aio_mutex);
3181 if (ctx->iocb && ctx->iocb->ki_complete)
3182 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3184 complete(&ctx->done);
3187 static ssize_t __cifs_writev(
3188 struct kiocb *iocb, struct iov_iter *from, bool direct)
3190 struct file *file = iocb->ki_filp;
3191 ssize_t total_written = 0;
3192 struct cifsFileInfo *cfile;
3193 struct cifs_tcon *tcon;
3194 struct cifs_sb_info *cifs_sb;
3195 struct cifs_aio_ctx *ctx;
3196 struct iov_iter saved_from = *from;
3197 size_t len = iov_iter_count(from);
3201 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3202 * In this case, fall back to non-direct write function.
3203 * this could be improved by getting pages directly in ITER_KVEC
3205 if (direct && iov_iter_is_kvec(from)) {
3206 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3210 rc = generic_write_checks(iocb, from);
3214 cifs_sb = CIFS_FILE_SB(file);
3215 cfile = file->private_data;
3216 tcon = tlink_tcon(cfile->tlink);
3218 if (!tcon->ses->server->ops->async_writev)
3221 ctx = cifs_aio_ctx_alloc();
3225 ctx->cfile = cifsFileInfo_get(cfile);
3227 if (!is_sync_kiocb(iocb))
3230 ctx->pos = iocb->ki_pos;
3233 ctx->direct_io = true;
3237 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3239 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3244 /* grab a lock here due to read response handlers can access ctx */
3245 mutex_lock(&ctx->aio_mutex);
3247 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3248 cfile, cifs_sb, &ctx->list, ctx);
3251 * If at least one write was successfully sent, then discard any rc
3252 * value from the later writes. If the other write succeeds, then
3253 * we'll end up returning whatever was written. If it fails, then
3254 * we'll get a new rc value from that.
3256 if (!list_empty(&ctx->list))
3259 mutex_unlock(&ctx->aio_mutex);
3262 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3266 if (!is_sync_kiocb(iocb)) {
3267 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3268 return -EIOCBQUEUED;
3271 rc = wait_for_completion_killable(&ctx->done);
3273 mutex_lock(&ctx->aio_mutex);
3274 ctx->rc = rc = -EINTR;
3275 total_written = ctx->total_len;
3276 mutex_unlock(&ctx->aio_mutex);
3279 total_written = ctx->total_len;
3282 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3284 if (unlikely(!total_written))
3287 iocb->ki_pos += total_written;
3288 return total_written;
3291 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3293 return __cifs_writev(iocb, from, true);
3296 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3298 return __cifs_writev(iocb, from, false);
3302 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3304 struct file *file = iocb->ki_filp;
3305 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3306 struct inode *inode = file->f_mapping->host;
3307 struct cifsInodeInfo *cinode = CIFS_I(inode);
3308 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3313 * We need to hold the sem to be sure nobody modifies lock list
3314 * with a brlock that prevents writing.
3316 down_read(&cinode->lock_sem);
3318 rc = generic_write_checks(iocb, from);
3322 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3323 server->vals->exclusive_lock_type, 0,
3324 NULL, CIFS_WRITE_OP))
3325 rc = __generic_file_write_iter(iocb, from);
3329 up_read(&cinode->lock_sem);
3330 inode_unlock(inode);
3333 rc = generic_write_sync(iocb, rc);
3338 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3340 struct inode *inode = file_inode(iocb->ki_filp);
3341 struct cifsInodeInfo *cinode = CIFS_I(inode);
3342 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3343 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3344 iocb->ki_filp->private_data;
3345 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3348 written = cifs_get_writer(cinode);
3352 if (CIFS_CACHE_WRITE(cinode)) {
3353 if (cap_unix(tcon->ses) &&
3354 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3355 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3356 written = generic_file_write_iter(iocb, from);
3359 written = cifs_writev(iocb, from);
3363 * For non-oplocked files in strict cache mode we need to write the data
3364 * to the server exactly from the pos to pos+len-1 rather than flush all
3365 * affected pages because it may cause a error with mandatory locks on
3366 * these pages but not on the region from pos to ppos+len-1.
3368 written = cifs_user_writev(iocb, from);
3369 if (CIFS_CACHE_READ(cinode)) {
3371 * We have read level caching and we have just sent a write
3372 * request to the server thus making data in the cache stale.
3373 * Zap the cache and set oplock/lease level to NONE to avoid
3374 * reading stale data from the cache. All subsequent read
3375 * operations will read new data from the server.
3377 cifs_zap_mapping(inode);
3378 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3383 cifs_put_writer(cinode);
3387 static struct cifs_readdata *
3388 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3390 struct cifs_readdata *rdata;
3392 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3393 if (rdata != NULL) {
3394 rdata->pages = pages;
3395 kref_init(&rdata->refcount);
3396 INIT_LIST_HEAD(&rdata->list);
3397 init_completion(&rdata->done);
3398 INIT_WORK(&rdata->work, complete);
3404 static struct cifs_readdata *
3405 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3407 struct page **pages =
3408 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3409 struct cifs_readdata *ret = NULL;
3412 ret = cifs_readdata_direct_alloc(pages, complete);
3421 cifs_readdata_release(struct kref *refcount)
3423 struct cifs_readdata *rdata = container_of(refcount,
3424 struct cifs_readdata, refcount);
3425 #ifdef CONFIG_CIFS_SMB_DIRECT
3427 smbd_deregister_mr(rdata->mr);
3432 cifsFileInfo_put(rdata->cfile);
3434 kvfree(rdata->pages);
3439 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3445 for (i = 0; i < nr_pages; i++) {
3446 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3451 rdata->pages[i] = page;
3455 unsigned int nr_page_failed = i;
3457 for (i = 0; i < nr_page_failed; i++) {
3458 put_page(rdata->pages[i]);
3459 rdata->pages[i] = NULL;
3466 cifs_uncached_readdata_release(struct kref *refcount)
3468 struct cifs_readdata *rdata = container_of(refcount,
3469 struct cifs_readdata, refcount);
3472 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3473 for (i = 0; i < rdata->nr_pages; i++) {
3474 put_page(rdata->pages[i]);
3476 cifs_readdata_release(refcount);
3480 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3481 * @rdata: the readdata response with list of pages holding data
3482 * @iter: destination for our data
3484 * This function copies data from a list of pages in a readdata response into
3485 * an array of iovecs. It will first calculate where the data should go
3486 * based on the info in the readdata and then copy the data into that spot.
3489 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3491 size_t remaining = rdata->got_bytes;
3494 for (i = 0; i < rdata->nr_pages; i++) {
3495 struct page *page = rdata->pages[i];
3496 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3499 if (unlikely(iov_iter_is_pipe(iter))) {
3500 void *addr = kmap_atomic(page);
3502 written = copy_to_iter(addr, copy, iter);
3503 kunmap_atomic(addr);
3505 written = copy_page_to_iter(page, 0, copy, iter);
3506 remaining -= written;
3507 if (written < copy && iov_iter_count(iter) > 0)
3510 return remaining ? -EFAULT : 0;
3513 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3516 cifs_uncached_readv_complete(struct work_struct *work)
3518 struct cifs_readdata *rdata = container_of(work,
3519 struct cifs_readdata, work);
3521 complete(&rdata->done);
3522 collect_uncached_read_data(rdata->ctx);
3523 /* the below call can possibly free the last ref to aio ctx */
3524 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3528 uncached_fill_pages(struct TCP_Server_Info *server,
3529 struct cifs_readdata *rdata, struct iov_iter *iter,
3534 unsigned int nr_pages = rdata->nr_pages;
3535 unsigned int page_offset = rdata->page_offset;
3537 rdata->got_bytes = 0;
3538 rdata->tailsz = PAGE_SIZE;
3539 for (i = 0; i < nr_pages; i++) {
3540 struct page *page = rdata->pages[i];
3542 unsigned int segment_size = rdata->pagesz;
3545 segment_size -= page_offset;
3551 /* no need to hold page hostage */
3552 rdata->pages[i] = NULL;
3559 if (len >= segment_size)
3560 /* enough data to fill the page */
3563 rdata->tailsz = len;
3567 result = copy_page_from_iter(
3568 page, page_offset, n, iter);
3569 #ifdef CONFIG_CIFS_SMB_DIRECT
3574 result = cifs_read_page_from_socket(
3575 server, page, page_offset, n);
3579 rdata->got_bytes += result;
3582 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3583 rdata->got_bytes : result;
3587 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3588 struct cifs_readdata *rdata, unsigned int len)
3590 return uncached_fill_pages(server, rdata, NULL, len);
3594 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3595 struct cifs_readdata *rdata,
3596 struct iov_iter *iter)
3598 return uncached_fill_pages(server, rdata, iter, iter->count);
3601 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3602 struct list_head *rdata_list,
3603 struct cifs_aio_ctx *ctx)
3606 struct cifs_credits credits;
3608 struct TCP_Server_Info *server;
3610 /* XXX: should we pick a new channel here? */
3611 server = rdata->server;
3614 if (rdata->cfile->invalidHandle) {
3615 rc = cifs_reopen_file(rdata->cfile, true);
3623 * Wait for credits to resend this rdata.
3624 * Note: we are attempting to resend the whole rdata not in
3628 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3634 if (rsize < rdata->bytes) {
3635 add_credits_and_wake_if(server, &credits, 0);
3638 } while (rsize < rdata->bytes);
3639 rdata->credits = credits;
3641 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3643 if (rdata->cfile->invalidHandle)
3646 #ifdef CONFIG_CIFS_SMB_DIRECT
3648 rdata->mr->need_invalidate = true;
3649 smbd_deregister_mr(rdata->mr);
3653 rc = server->ops->async_readv(rdata);
3657 /* If the read was successfully sent, we are done */
3659 /* Add to aio pending list */
3660 list_add_tail(&rdata->list, rdata_list);
3664 /* Roll back credits and retry if needed */
3665 add_credits_and_wake_if(server, &rdata->credits, 0);
3666 } while (rc == -EAGAIN);
3669 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3674 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3675 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3676 struct cifs_aio_ctx *ctx)
3678 struct cifs_readdata *rdata;
3679 unsigned int npages, rsize;
3680 struct cifs_credits credits_on_stack;
3681 struct cifs_credits *credits = &credits_on_stack;
3685 struct TCP_Server_Info *server;
3686 struct page **pagevec;
3688 struct iov_iter direct_iov = ctx->iter;
3690 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3692 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3693 pid = open_file->pid;
3695 pid = current->tgid;
3698 iov_iter_advance(&direct_iov, offset - ctx->pos);
3701 if (open_file->invalidHandle) {
3702 rc = cifs_reopen_file(open_file, true);
3709 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3714 cur_len = min_t(const size_t, len, rsize);
3716 if (ctx->direct_io) {
3719 result = iov_iter_get_pages_alloc(
3720 &direct_iov, &pagevec,
3724 "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3725 result, iov_iter_type(&direct_iov),
3726 direct_iov.iov_offset,
3731 add_credits_and_wake_if(server, credits, 0);
3734 cur_len = (size_t)result;
3735 iov_iter_advance(&direct_iov, cur_len);
3737 rdata = cifs_readdata_direct_alloc(
3738 pagevec, cifs_uncached_readv_complete);
3740 add_credits_and_wake_if(server, credits, 0);
3745 npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3746 rdata->page_offset = start;
3747 rdata->tailsz = npages > 1 ?
3748 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3753 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3754 /* allocate a readdata struct */
3755 rdata = cifs_readdata_alloc(npages,
3756 cifs_uncached_readv_complete);
3758 add_credits_and_wake_if(server, credits, 0);
3763 rc = cifs_read_allocate_pages(rdata, npages);
3765 kvfree(rdata->pages);
3767 add_credits_and_wake_if(server, credits, 0);
3771 rdata->tailsz = PAGE_SIZE;
3774 rdata->server = server;
3775 rdata->cfile = cifsFileInfo_get(open_file);
3776 rdata->nr_pages = npages;
3777 rdata->offset = offset;
3778 rdata->bytes = cur_len;
3780 rdata->pagesz = PAGE_SIZE;
3781 rdata->read_into_pages = cifs_uncached_read_into_pages;
3782 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3783 rdata->credits = credits_on_stack;
3785 kref_get(&ctx->refcount);
3787 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3790 if (rdata->cfile->invalidHandle)
3793 rc = server->ops->async_readv(rdata);
3797 add_credits_and_wake_if(server, &rdata->credits, 0);
3798 kref_put(&rdata->refcount,
3799 cifs_uncached_readdata_release);
3800 if (rc == -EAGAIN) {
3801 iov_iter_revert(&direct_iov, cur_len);
3807 list_add_tail(&rdata->list, rdata_list);
3816 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3818 struct cifs_readdata *rdata, *tmp;
3819 struct iov_iter *to = &ctx->iter;
3820 struct cifs_sb_info *cifs_sb;
3823 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3825 mutex_lock(&ctx->aio_mutex);
3827 if (list_empty(&ctx->list)) {
3828 mutex_unlock(&ctx->aio_mutex);
3833 /* the loop below should proceed in the order of increasing offsets */
3835 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3837 if (!try_wait_for_completion(&rdata->done)) {
3838 mutex_unlock(&ctx->aio_mutex);
3842 if (rdata->result == -EAGAIN) {
3843 /* resend call if it's a retryable error */
3844 struct list_head tmp_list;
3845 unsigned int got_bytes = rdata->got_bytes;
3847 list_del_init(&rdata->list);
3848 INIT_LIST_HEAD(&tmp_list);
3851 * Got a part of data and then reconnect has
3852 * happened -- fill the buffer and continue
3855 if (got_bytes && got_bytes < rdata->bytes) {
3857 if (!ctx->direct_io)
3858 rc = cifs_readdata_to_iov(rdata, to);
3860 kref_put(&rdata->refcount,
3861 cifs_uncached_readdata_release);
3866 if (ctx->direct_io) {
3868 * Re-use rdata as this is a
3871 rc = cifs_resend_rdata(
3875 rc = cifs_send_async_read(
3876 rdata->offset + got_bytes,
3877 rdata->bytes - got_bytes,
3878 rdata->cfile, cifs_sb,
3881 kref_put(&rdata->refcount,
3882 cifs_uncached_readdata_release);
3885 list_splice(&tmp_list, &ctx->list);
3888 } else if (rdata->result)
3890 else if (!ctx->direct_io)
3891 rc = cifs_readdata_to_iov(rdata, to);
3893 /* if there was a short read -- discard anything left */
3894 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3897 ctx->total_len += rdata->got_bytes;
3899 list_del_init(&rdata->list);
3900 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3903 if (!ctx->direct_io)
3904 ctx->total_len = ctx->len - iov_iter_count(to);
3906 /* mask nodata case */
3910 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3912 mutex_unlock(&ctx->aio_mutex);
3914 if (ctx->iocb && ctx->iocb->ki_complete)
3915 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3917 complete(&ctx->done);
3920 static ssize_t __cifs_readv(
3921 struct kiocb *iocb, struct iov_iter *to, bool direct)
3924 struct file *file = iocb->ki_filp;
3925 struct cifs_sb_info *cifs_sb;
3926 struct cifsFileInfo *cfile;
3927 struct cifs_tcon *tcon;
3928 ssize_t rc, total_read = 0;
3929 loff_t offset = iocb->ki_pos;
3930 struct cifs_aio_ctx *ctx;
3933 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3934 * fall back to data copy read path
3935 * this could be improved by getting pages directly in ITER_KVEC
3937 if (direct && iov_iter_is_kvec(to)) {
3938 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3942 len = iov_iter_count(to);
3946 cifs_sb = CIFS_FILE_SB(file);
3947 cfile = file->private_data;
3948 tcon = tlink_tcon(cfile->tlink);
3950 if (!tcon->ses->server->ops->async_readv)
3953 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3954 cifs_dbg(FYI, "attempting read on write only file instance\n");
3956 ctx = cifs_aio_ctx_alloc();
3960 ctx->cfile = cifsFileInfo_get(cfile);
3962 if (!is_sync_kiocb(iocb))
3965 if (iter_is_iovec(to))
3966 ctx->should_dirty = true;
3970 ctx->direct_io = true;
3974 rc = setup_aio_ctx_iter(ctx, to, READ);
3976 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3982 /* grab a lock here due to read response handlers can access ctx */
3983 mutex_lock(&ctx->aio_mutex);
3985 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3987 /* if at least one read request send succeeded, then reset rc */
3988 if (!list_empty(&ctx->list))
3991 mutex_unlock(&ctx->aio_mutex);
3994 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3998 if (!is_sync_kiocb(iocb)) {
3999 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4000 return -EIOCBQUEUED;
4003 rc = wait_for_completion_killable(&ctx->done);
4005 mutex_lock(&ctx->aio_mutex);
4006 ctx->rc = rc = -EINTR;
4007 total_read = ctx->total_len;
4008 mutex_unlock(&ctx->aio_mutex);
4011 total_read = ctx->total_len;
4014 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4017 iocb->ki_pos += total_read;
4023 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4025 return __cifs_readv(iocb, to, true);
4028 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4030 return __cifs_readv(iocb, to, false);
4034 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4036 struct inode *inode = file_inode(iocb->ki_filp);
4037 struct cifsInodeInfo *cinode = CIFS_I(inode);
4038 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4039 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4040 iocb->ki_filp->private_data;
4041 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4045 * In strict cache mode we need to read from the server all the time
4046 * if we don't have level II oplock because the server can delay mtime
4047 * change - so we can't make a decision about inode invalidating.
4048 * And we can also fail with pagereading if there are mandatory locks
4049 * on pages affected by this read but not on the region from pos to
4052 if (!CIFS_CACHE_READ(cinode))
4053 return cifs_user_readv(iocb, to);
4055 if (cap_unix(tcon->ses) &&
4056 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4057 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4058 return generic_file_read_iter(iocb, to);
4061 * We need to hold the sem to be sure nobody modifies lock list
4062 * with a brlock that prevents reading.
4064 down_read(&cinode->lock_sem);
4065 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4066 tcon->ses->server->vals->shared_lock_type,
4067 0, NULL, CIFS_READ_OP))
4068 rc = generic_file_read_iter(iocb, to);
4069 up_read(&cinode->lock_sem);
4074 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4077 unsigned int bytes_read = 0;
4078 unsigned int total_read;
4079 unsigned int current_read_size;
4081 struct cifs_sb_info *cifs_sb;
4082 struct cifs_tcon *tcon;
4083 struct TCP_Server_Info *server;
4086 struct cifsFileInfo *open_file;
4087 struct cifs_io_parms io_parms = {0};
4088 int buf_type = CIFS_NO_BUFFER;
4092 cifs_sb = CIFS_FILE_SB(file);
4094 /* FIXME: set up handlers for larger reads and/or convert to async */
4095 rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4097 if (file->private_data == NULL) {
4102 open_file = file->private_data;
4103 tcon = tlink_tcon(open_file->tlink);
4104 server = cifs_pick_channel(tcon->ses);
4106 if (!server->ops->sync_read) {
4111 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4112 pid = open_file->pid;
4114 pid = current->tgid;
4116 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4117 cifs_dbg(FYI, "attempting read on write only file instance\n");
4119 for (total_read = 0, cur_offset = read_data; read_size > total_read;
4120 total_read += bytes_read, cur_offset += bytes_read) {
4122 current_read_size = min_t(uint, read_size - total_read,
4125 * For windows me and 9x we do not want to request more
4126 * than it negotiated since it will refuse the read
4129 if (!(tcon->ses->capabilities &
4130 tcon->ses->server->vals->cap_large_files)) {
4131 current_read_size = min_t(uint,
4132 current_read_size, CIFSMaxBufSize);
4134 if (open_file->invalidHandle) {
4135 rc = cifs_reopen_file(open_file, true);
4140 io_parms.tcon = tcon;
4141 io_parms.offset = *offset;
4142 io_parms.length = current_read_size;
4143 io_parms.server = server;
4144 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4145 &bytes_read, &cur_offset,
4147 } while (rc == -EAGAIN);
4149 if (rc || (bytes_read == 0)) {
4157 cifs_stats_bytes_read(tcon, total_read);
4158 *offset += bytes_read;
4166 * If the page is mmap'ed into a process' page tables, then we need to make
4167 * sure that it doesn't change while being written back.
4170 cifs_page_mkwrite(struct vm_fault *vmf)
4172 struct page *page = vmf->page;
4175 return VM_FAULT_LOCKED;
4178 static const struct vm_operations_struct cifs_file_vm_ops = {
4179 .fault = filemap_fault,
4180 .map_pages = filemap_map_pages,
4181 .page_mkwrite = cifs_page_mkwrite,
4184 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4187 struct inode *inode = file_inode(file);
4191 if (!CIFS_CACHE_READ(CIFS_I(inode)))
4192 rc = cifs_zap_mapping(inode);
4194 rc = generic_file_mmap(file, vma);
4196 vma->vm_ops = &cifs_file_vm_ops;
4202 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4208 rc = cifs_revalidate_file(file);
4210 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4213 rc = generic_file_mmap(file, vma);
4215 vma->vm_ops = &cifs_file_vm_ops;
4222 cifs_readv_complete(struct work_struct *work)
4224 unsigned int i, got_bytes;
4225 struct cifs_readdata *rdata = container_of(work,
4226 struct cifs_readdata, work);
4228 got_bytes = rdata->got_bytes;
4229 for (i = 0; i < rdata->nr_pages; i++) {
4230 struct page *page = rdata->pages[i];
4232 lru_cache_add(page);
4234 if (rdata->result == 0 ||
4235 (rdata->result == -EAGAIN && got_bytes)) {
4236 flush_dcache_page(page);
4237 SetPageUptodate(page);
4242 if (rdata->result == 0 ||
4243 (rdata->result == -EAGAIN && got_bytes))
4244 cifs_readpage_to_fscache(rdata->mapping->host, page);
4246 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4249 rdata->pages[i] = NULL;
4251 kref_put(&rdata->refcount, cifs_readdata_release);
4255 readpages_fill_pages(struct TCP_Server_Info *server,
4256 struct cifs_readdata *rdata, struct iov_iter *iter,
4263 unsigned int nr_pages = rdata->nr_pages;
4264 unsigned int page_offset = rdata->page_offset;
4266 /* determine the eof that the server (probably) has */
4267 eof = CIFS_I(rdata->mapping->host)->server_eof;
4268 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4269 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4271 rdata->got_bytes = 0;
4272 rdata->tailsz = PAGE_SIZE;
4273 for (i = 0; i < nr_pages; i++) {
4274 struct page *page = rdata->pages[i];
4275 unsigned int to_read = rdata->pagesz;
4279 to_read -= page_offset;
4285 if (len >= to_read) {
4287 } else if (len > 0) {
4288 /* enough for partial page, fill and zero the rest */
4289 zero_user(page, len + page_offset, to_read - len);
4290 n = rdata->tailsz = len;
4292 } else if (page->index > eof_index) {
4294 * The VFS will not try to do readahead past the
4295 * i_size, but it's possible that we have outstanding
4296 * writes with gaps in the middle and the i_size hasn't
4297 * caught up yet. Populate those with zeroed out pages
4298 * to prevent the VFS from repeatedly attempting to
4299 * fill them until the writes are flushed.
4301 zero_user(page, 0, PAGE_SIZE);
4302 lru_cache_add(page);
4303 flush_dcache_page(page);
4304 SetPageUptodate(page);
4307 rdata->pages[i] = NULL;
4311 /* no need to hold page hostage */
4312 lru_cache_add(page);
4315 rdata->pages[i] = NULL;
4321 result = copy_page_from_iter(
4322 page, page_offset, n, iter);
4323 #ifdef CONFIG_CIFS_SMB_DIRECT
4328 result = cifs_read_page_from_socket(
4329 server, page, page_offset, n);
4333 rdata->got_bytes += result;
4336 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4337 rdata->got_bytes : result;
4341 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4342 struct cifs_readdata *rdata, unsigned int len)
4344 return readpages_fill_pages(server, rdata, NULL, len);
4348 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4349 struct cifs_readdata *rdata,
4350 struct iov_iter *iter)
4352 return readpages_fill_pages(server, rdata, iter, iter->count);
4356 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4357 unsigned int rsize, struct list_head *tmplist,
4358 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4360 struct page *page, *tpage;
4361 unsigned int expected_index;
4363 gfp_t gfp = readahead_gfp_mask(mapping);
4365 INIT_LIST_HEAD(tmplist);
4367 page = lru_to_page(page_list);
4370 * Lock the page and put it in the cache. Since no one else
4371 * should have access to this page, we're safe to simply set
4372 * PG_locked without checking it first.
4374 __SetPageLocked(page);
4375 rc = add_to_page_cache_locked(page, mapping,
4378 /* give up if we can't stick it in the cache */
4380 __ClearPageLocked(page);
4384 /* move first page to the tmplist */
4385 *offset = (loff_t)page->index << PAGE_SHIFT;
4388 list_move_tail(&page->lru, tmplist);
4390 /* now try and add more pages onto the request */
4391 expected_index = page->index + 1;
4392 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4393 /* discontinuity ? */
4394 if (page->index != expected_index)
4397 /* would this page push the read over the rsize? */
4398 if (*bytes + PAGE_SIZE > rsize)
4401 __SetPageLocked(page);
4402 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4404 __ClearPageLocked(page);
4407 list_move_tail(&page->lru, tmplist);
4408 (*bytes) += PAGE_SIZE;
4415 static int cifs_readpages(struct file *file, struct address_space *mapping,
4416 struct list_head *page_list, unsigned num_pages)
4420 struct list_head tmplist;
4421 struct cifsFileInfo *open_file = file->private_data;
4422 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4423 struct TCP_Server_Info *server;
4429 * Reads as many pages as possible from fscache. Returns -ENOBUFS
4430 * immediately if the cookie is negative
4432 * After this point, every page in the list might have PG_fscache set,
4433 * so we will need to clean that up off of every page we don't use.
4435 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4442 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4443 pid = open_file->pid;
4445 pid = current->tgid;
4448 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4450 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4451 __func__, file, mapping, num_pages);
4454 * Start with the page at end of list and move it to private
4455 * list. Do the same with any following pages until we hit
4456 * the rsize limit, hit an index discontinuity, or run out of
4457 * pages. Issue the async read and then start the loop again
4458 * until the list is empty.
4460 * Note that list order is important. The page_list is in
4461 * the order of declining indexes. When we put the pages in
4462 * the rdata->pages, then we want them in increasing order.
4464 while (!list_empty(page_list) && !err) {
4465 unsigned int i, nr_pages, bytes, rsize;
4467 struct page *page, *tpage;
4468 struct cifs_readdata *rdata;
4469 struct cifs_credits credits_on_stack;
4470 struct cifs_credits *credits = &credits_on_stack;
4472 if (open_file->invalidHandle) {
4473 rc = cifs_reopen_file(open_file, true);
4480 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4486 * Give up immediately if rsize is too small to read an entire
4487 * page. The VFS will fall back to readpage. We should never
4488 * reach this point however since we set ra_pages to 0 when the
4489 * rsize is smaller than a cache page.
4491 if (unlikely(rsize < PAGE_SIZE)) {
4492 add_credits_and_wake_if(server, credits, 0);
4498 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4499 &nr_pages, &offset, &bytes);
4501 add_credits_and_wake_if(server, credits, 0);
4505 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4507 /* best to give up if we're out of mem */
4508 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4509 list_del(&page->lru);
4510 lru_cache_add(page);
4515 add_credits_and_wake_if(server, credits, 0);
4519 rdata->cfile = cifsFileInfo_get(open_file);
4520 rdata->server = server;
4521 rdata->mapping = mapping;
4522 rdata->offset = offset;
4523 rdata->bytes = bytes;
4525 rdata->pagesz = PAGE_SIZE;
4526 rdata->tailsz = PAGE_SIZE;
4527 rdata->read_into_pages = cifs_readpages_read_into_pages;
4528 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4529 rdata->credits = credits_on_stack;
4531 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4532 list_del(&page->lru);
4533 rdata->pages[rdata->nr_pages++] = page;
4536 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4539 if (rdata->cfile->invalidHandle)
4542 rc = server->ops->async_readv(rdata);
4546 add_credits_and_wake_if(server, &rdata->credits, 0);
4547 for (i = 0; i < rdata->nr_pages; i++) {
4548 page = rdata->pages[i];
4549 lru_cache_add(page);
4553 /* Fallback to the readpage in error/reconnect cases */
4554 kref_put(&rdata->refcount, cifs_readdata_release);
4558 kref_put(&rdata->refcount, cifs_readdata_release);
4561 /* Any pages that have been shown to fscache but didn't get added to
4562 * the pagecache must be uncached before they get returned to the
4565 cifs_fscache_readpages_cancel(mapping->host, page_list);
4571 * cifs_readpage_worker must be called with the page pinned
4573 static int cifs_readpage_worker(struct file *file, struct page *page,
4579 /* Is the page cached? */
4580 rc = cifs_readpage_from_fscache(file_inode(file), page);
4584 read_data = kmap(page);
4585 /* for reads over a certain size could initiate async read ahead */
4587 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4592 cifs_dbg(FYI, "Bytes read %d\n", rc);
4594 /* we do not want atime to be less than mtime, it broke some apps */
4595 file_inode(file)->i_atime = current_time(file_inode(file));
4596 if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4597 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4599 file_inode(file)->i_atime = current_time(file_inode(file));
4602 memset(read_data + rc, 0, PAGE_SIZE - rc);
4604 flush_dcache_page(page);
4605 SetPageUptodate(page);
4607 /* send this page to the cache */
4608 cifs_readpage_to_fscache(file_inode(file), page);
4620 static int cifs_readpage(struct file *file, struct page *page)
4622 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4628 if (file->private_data == NULL) {
4634 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4635 page, (int)offset, (int)offset);
4637 rc = cifs_readpage_worker(file, page, &offset);
4643 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4645 struct cifsFileInfo *open_file;
4647 spin_lock(&cifs_inode->open_file_lock);
4648 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4649 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4650 spin_unlock(&cifs_inode->open_file_lock);
4654 spin_unlock(&cifs_inode->open_file_lock);
4658 /* We do not want to update the file size from server for inodes
4659 open for write - to avoid races with writepage extending
4660 the file - in the future we could consider allowing
4661 refreshing the inode only on increases in the file size
4662 but this is tricky to do without racing with writebehind
4663 page caching in the current Linux kernel design */
4664 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4669 if (is_inode_writable(cifsInode)) {
4670 /* This inode is open for write at least once */
4671 struct cifs_sb_info *cifs_sb;
4673 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4674 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4675 /* since no page cache to corrupt on directio
4676 we can change size safely */
4680 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4688 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4689 loff_t pos, unsigned len, unsigned flags,
4690 struct page **pagep, void **fsdata)
4693 pgoff_t index = pos >> PAGE_SHIFT;
4694 loff_t offset = pos & (PAGE_SIZE - 1);
4695 loff_t page_start = pos & PAGE_MASK;
4700 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4703 page = grab_cache_page_write_begin(mapping, index, flags);
4709 if (PageUptodate(page))
4713 * If we write a full page it will be up to date, no need to read from
4714 * the server. If the write is short, we'll end up doing a sync write
4717 if (len == PAGE_SIZE)
4721 * optimize away the read when we have an oplock, and we're not
4722 * expecting to use any of the data we'd be reading in. That
4723 * is, when the page lies beyond the EOF, or straddles the EOF
4724 * and the write will cover all of the existing data.
4726 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4727 i_size = i_size_read(mapping->host);
4728 if (page_start >= i_size ||
4729 (offset == 0 && (pos + len) >= i_size)) {
4730 zero_user_segments(page, 0, offset,
4734 * PageChecked means that the parts of the page
4735 * to which we're not writing are considered up
4736 * to date. Once the data is copied to the
4737 * page, it can be set uptodate.
4739 SetPageChecked(page);
4744 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4746 * might as well read a page, it is fast enough. If we get
4747 * an error, we don't need to return it. cifs_write_end will
4748 * do a sync write instead since PG_uptodate isn't set.
4750 cifs_readpage_worker(file, page, &page_start);
4755 /* we could try using another file handle if there is one -
4756 but how would we lock it to prevent close of that handle
4757 racing with this read? In any case
4758 this will be written out by write_end so is fine */
4765 static int cifs_release_page(struct page *page, gfp_t gfp)
4767 if (PagePrivate(page))
4770 return cifs_fscache_release_page(page, gfp);
4773 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4774 unsigned int length)
4776 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4778 if (offset == 0 && length == PAGE_SIZE)
4779 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4782 static int cifs_launder_page(struct page *page)
4785 loff_t range_start = page_offset(page);
4786 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4787 struct writeback_control wbc = {
4788 .sync_mode = WB_SYNC_ALL,
4790 .range_start = range_start,
4791 .range_end = range_end,
4794 cifs_dbg(FYI, "Launder page: %p\n", page);
4796 if (clear_page_dirty_for_io(page))
4797 rc = cifs_writepage_locked(page, &wbc);
4799 cifs_fscache_invalidate_page(page, page->mapping->host);
4803 void cifs_oplock_break(struct work_struct *work)
4805 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4807 struct inode *inode = d_inode(cfile->dentry);
4808 struct cifsInodeInfo *cinode = CIFS_I(inode);
4809 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4810 struct TCP_Server_Info *server = tcon->ses->server;
4812 bool purge_cache = false;
4813 bool is_deferred = false;
4814 struct cifs_deferred_close *dclose;
4816 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4817 TASK_UNINTERRUPTIBLE);
4819 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4820 cfile->oplock_epoch, &purge_cache);
4822 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4823 cifs_has_mand_locks(cinode)) {
4824 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4829 if (inode && S_ISREG(inode->i_mode)) {
4830 if (CIFS_CACHE_READ(cinode))
4831 break_lease(inode, O_RDONLY);
4833 break_lease(inode, O_WRONLY);
4834 rc = filemap_fdatawrite(inode->i_mapping);
4835 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4836 rc = filemap_fdatawait(inode->i_mapping);
4837 mapping_set_error(inode->i_mapping, rc);
4838 cifs_zap_mapping(inode);
4840 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4841 if (CIFS_CACHE_WRITE(cinode))
4842 goto oplock_break_ack;
4845 rc = cifs_push_locks(cfile);
4847 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4851 * releasing stale oplock after recent reconnect of smb session using
4852 * a now incorrect file handle is not a data integrity issue but do
4853 * not bother sending an oplock release if session to server still is
4854 * disconnected since oplock already released by the server
4856 if (!cfile->oplock_break_cancelled) {
4857 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4859 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4861 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4863 * When oplock break is received and there are no active
4864 * file handles but cached, then set the flag oplock_break_received.
4865 * So, new open will not use cached handle.
4867 spin_lock(&CIFS_I(inode)->deferred_lock);
4868 is_deferred = cifs_is_deferred_close(cfile, &dclose);
4870 cfile->oplock_break_received = true;
4871 mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
4873 spin_unlock(&CIFS_I(inode)->deferred_lock);
4874 cifs_done_oplock_break(cinode);
4878 * The presence of cifs_direct_io() in the address space ops vector
4879 * allowes open() O_DIRECT flags which would have failed otherwise.
4881 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4882 * so this method should never be called.
4884 * Direct IO is not yet supported in the cached mode.
4887 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4891 * Eventually need to support direct IO for non forcedirectio mounts
4896 static int cifs_swap_activate(struct swap_info_struct *sis,
4897 struct file *swap_file, sector_t *span)
4899 struct cifsFileInfo *cfile = swap_file->private_data;
4900 struct inode *inode = swap_file->f_mapping->host;
4901 unsigned long blocks;
4904 cifs_dbg(FYI, "swap activate\n");
4906 spin_lock(&inode->i_lock);
4907 blocks = inode->i_blocks;
4908 isize = inode->i_size;
4909 spin_unlock(&inode->i_lock);
4910 if (blocks*512 < isize) {
4911 pr_warn("swap activate: swapfile has holes\n");
4916 pr_warn_once("Swap support over SMB3 is experimental\n");
4919 * TODO: consider adding ACL (or documenting how) to prevent other
4920 * users (on this or other systems) from reading it
4924 /* TODO: add sk_set_memalloc(inet) or similar */
4927 cfile->swapfile = true;
4929 * TODO: Since file already open, we can't open with DENY_ALL here
4930 * but we could add call to grab a byte range lock to prevent others
4931 * from reading or writing the file
4937 static void cifs_swap_deactivate(struct file *file)
4939 struct cifsFileInfo *cfile = file->private_data;
4941 cifs_dbg(FYI, "swap deactivate\n");
4943 /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4946 cfile->swapfile = false;
4948 /* do we need to unpin (or unlock) the file */
4951 const struct address_space_operations cifs_addr_ops = {
4952 .readpage = cifs_readpage,
4953 .readpages = cifs_readpages,
4954 .writepage = cifs_writepage,
4955 .writepages = cifs_writepages,
4956 .write_begin = cifs_write_begin,
4957 .write_end = cifs_write_end,
4958 .set_page_dirty = __set_page_dirty_nobuffers,
4959 .releasepage = cifs_release_page,
4960 .direct_IO = cifs_direct_io,
4961 .invalidatepage = cifs_invalidate_page,
4962 .launder_page = cifs_launder_page,
4964 * TODO: investigate and if useful we could add an cifs_migratePage
4965 * helper (under an CONFIG_MIGRATION) in the future, and also
4966 * investigate and add an is_dirty_writeback helper if needed
4968 .swap_activate = cifs_swap_activate,
4969 .swap_deactivate = cifs_swap_deactivate,
4973 * cifs_readpages requires the server to support a buffer large enough to
4974 * contain the header plus one complete page of data. Otherwise, we need
4975 * to leave cifs_readpages out of the address space operations.
4977 const struct address_space_operations cifs_addr_ops_smallbuf = {
4978 .readpage = cifs_readpage,
4979 .writepage = cifs_writepage,
4980 .writepages = cifs_writepages,
4981 .write_begin = cifs_write_begin,
4982 .write_end = cifs_write_end,
4983 .set_page_dirty = __set_page_dirty_nobuffers,
4984 .releasepage = cifs_release_page,
4985 .invalidatepage = cifs_invalidate_page,
4986 .launder_page = cifs_launder_page,