4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
37 #include <asm/div64.h>
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
46 #include "smbdirect.h"
48 static inline int cifs_convert_flags(unsigned int flags)
50 if ((flags & O_ACCMODE) == O_RDONLY)
52 else if ((flags & O_ACCMODE) == O_WRONLY)
54 else if ((flags & O_ACCMODE) == O_RDWR) {
55 /* GENERIC_ALL is too much permission to request
56 can cause unnecessary access denied on create */
57 /* return GENERIC_ALL; */
58 return (GENERIC_READ | GENERIC_WRITE);
61 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
66 static u32 cifs_posix_convert_flags(unsigned int flags)
70 if ((flags & O_ACCMODE) == O_RDONLY)
71 posix_flags = SMB_O_RDONLY;
72 else if ((flags & O_ACCMODE) == O_WRONLY)
73 posix_flags = SMB_O_WRONLY;
74 else if ((flags & O_ACCMODE) == O_RDWR)
75 posix_flags = SMB_O_RDWR;
77 if (flags & O_CREAT) {
78 posix_flags |= SMB_O_CREAT;
80 posix_flags |= SMB_O_EXCL;
81 } else if (flags & O_EXCL)
82 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83 current->comm, current->tgid);
86 posix_flags |= SMB_O_TRUNC;
87 /* be safe and imply O_SYNC for O_DSYNC */
89 posix_flags |= SMB_O_SYNC;
90 if (flags & O_DIRECTORY)
91 posix_flags |= SMB_O_DIRECTORY;
92 if (flags & O_NOFOLLOW)
93 posix_flags |= SMB_O_NOFOLLOW;
95 posix_flags |= SMB_O_DIRECT;
100 static inline int cifs_get_disposition(unsigned int flags)
102 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
104 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105 return FILE_OVERWRITE_IF;
106 else if ((flags & O_CREAT) == O_CREAT)
108 else if ((flags & O_TRUNC) == O_TRUNC)
109 return FILE_OVERWRITE;
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115 struct super_block *sb, int mode, unsigned int f_flags,
116 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
119 FILE_UNIX_BASIC_INFO *presp_data;
120 __u32 posix_flags = 0;
121 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122 struct cifs_fattr fattr;
123 struct tcon_link *tlink;
124 struct cifs_tcon *tcon;
126 cifs_dbg(FYI, "posix open %s\n", full_path);
128 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129 if (presp_data == NULL)
132 tlink = cifs_sb_tlink(cifs_sb);
138 tcon = tlink_tcon(tlink);
139 mode &= ~current_umask();
141 posix_flags = cifs_posix_convert_flags(f_flags);
142 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143 poplock, full_path, cifs_sb->local_nls,
144 cifs_remap(cifs_sb));
145 cifs_put_tlink(tlink);
150 if (presp_data->Type == cpu_to_le32(-1))
151 goto posix_open_ret; /* open ok, caller does qpathinfo */
154 goto posix_open_ret; /* caller does not need info */
156 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
158 /* get new inode and set it up */
159 if (*pinode == NULL) {
160 cifs_fill_uniqueid(sb, &fattr);
161 *pinode = cifs_iget(sb, &fattr);
167 cifs_fattr_to_inode(*pinode, &fattr);
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178 struct cifs_fid *fid, unsigned int xid)
183 int create_options = CREATE_NOT_DIR;
185 struct TCP_Server_Info *server = tcon->ses->server;
186 struct cifs_open_parms oparms;
188 if (!server->ops->open)
191 desired_access = cifs_convert_flags(f_flags);
193 /*********************************************************************
194 * open flag mapping table:
196 * POSIX Flag CIFS Disposition
197 * ---------- ----------------
198 * O_CREAT FILE_OPEN_IF
199 * O_CREAT | O_EXCL FILE_CREATE
200 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
201 * O_TRUNC FILE_OVERWRITE
202 * none of the above FILE_OPEN
204 * Note that there is not a direct match between disposition
205 * FILE_SUPERSEDE (ie create whether or not file exists although
206 * O_CREAT | O_TRUNC is similar but truncates the existing
207 * file rather than creating a new file as FILE_SUPERSEDE does
208 * (which uses the attributes / metadata passed in on open call)
210 *? O_SYNC is a reasonable match to CIFS writethrough flag
211 *? and the read write flags match reasonably. O_LARGEFILE
212 *? is irrelevant because largefile support is always used
213 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215 *********************************************************************/
217 disposition = cifs_get_disposition(f_flags);
219 /* BB pass O_SYNC flag through on file attributes .. BB */
221 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
225 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
226 if (f_flags & O_SYNC)
227 create_options |= CREATE_WRITE_THROUGH;
229 if (f_flags & O_DIRECT)
230 create_options |= CREATE_NO_BUFFER;
233 oparms.cifs_sb = cifs_sb;
234 oparms.desired_access = desired_access;
235 oparms.create_options = cifs_create_options(cifs_sb, create_options);
236 oparms.disposition = disposition;
237 oparms.path = full_path;
239 oparms.reconnect = false;
241 rc = server->ops->open(xid, &oparms, oplock, buf);
246 /* TODO: Add support for calling posix query info but with passing in fid */
248 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
251 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
255 server->ops->close(xid, tcon, fid);
266 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
268 struct cifs_fid_locks *cur;
269 bool has_locks = false;
271 down_read(&cinode->lock_sem);
272 list_for_each_entry(cur, &cinode->llist, llist) {
273 if (!list_empty(&cur->locks)) {
278 up_read(&cinode->lock_sem);
283 cifs_down_write(struct rw_semaphore *sem)
285 while (!down_write_trylock(sem))
289 static void cifsFileInfo_put_work(struct work_struct *work);
291 struct cifsFileInfo *
292 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
293 struct tcon_link *tlink, __u32 oplock)
295 struct dentry *dentry = file_dentry(file);
296 struct inode *inode = d_inode(dentry);
297 struct cifsInodeInfo *cinode = CIFS_I(inode);
298 struct cifsFileInfo *cfile;
299 struct cifs_fid_locks *fdlocks;
300 struct cifs_tcon *tcon = tlink_tcon(tlink);
301 struct TCP_Server_Info *server = tcon->ses->server;
303 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
307 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
313 INIT_LIST_HEAD(&fdlocks->locks);
314 fdlocks->cfile = cfile;
315 cfile->llist = fdlocks;
318 cfile->pid = current->tgid;
319 cfile->uid = current_fsuid();
320 cfile->dentry = dget(dentry);
321 cfile->f_flags = file->f_flags;
322 cfile->invalidHandle = false;
323 cfile->tlink = cifs_get_tlink(tlink);
324 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
325 INIT_WORK(&cfile->put, cifsFileInfo_put_work);
326 mutex_init(&cfile->fh_mutex);
327 spin_lock_init(&cfile->file_info_lock);
329 cifs_sb_active(inode->i_sb);
332 * If the server returned a read oplock and we have mandatory brlocks,
333 * set oplock level to None.
335 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
336 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
340 cifs_down_write(&cinode->lock_sem);
341 list_add(&fdlocks->llist, &cinode->llist);
342 up_write(&cinode->lock_sem);
344 spin_lock(&tcon->open_file_lock);
345 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
346 oplock = fid->pending_open->oplock;
347 list_del(&fid->pending_open->olist);
349 fid->purge_cache = false;
350 server->ops->set_fid(cfile, fid, oplock);
352 list_add(&cfile->tlist, &tcon->openFileList);
353 atomic_inc(&tcon->num_local_opens);
355 /* if readable file instance put first in list*/
356 spin_lock(&cinode->open_file_lock);
357 if (file->f_mode & FMODE_READ)
358 list_add(&cfile->flist, &cinode->openFileList);
360 list_add_tail(&cfile->flist, &cinode->openFileList);
361 spin_unlock(&cinode->open_file_lock);
362 spin_unlock(&tcon->open_file_lock);
364 if (fid->purge_cache)
365 cifs_zap_mapping(inode);
367 file->private_data = cfile;
371 struct cifsFileInfo *
372 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
374 spin_lock(&cifs_file->file_info_lock);
375 cifsFileInfo_get_locked(cifs_file);
376 spin_unlock(&cifs_file->file_info_lock);
380 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
382 struct inode *inode = d_inode(cifs_file->dentry);
383 struct cifsInodeInfo *cifsi = CIFS_I(inode);
384 struct cifsLockInfo *li, *tmp;
385 struct super_block *sb = inode->i_sb;
388 * Delete any outstanding lock records. We'll lose them when the file
391 cifs_down_write(&cifsi->lock_sem);
392 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
393 list_del(&li->llist);
394 cifs_del_lock_waiters(li);
397 list_del(&cifs_file->llist->llist);
398 kfree(cifs_file->llist);
399 up_write(&cifsi->lock_sem);
401 cifs_put_tlink(cifs_file->tlink);
402 dput(cifs_file->dentry);
403 cifs_sb_deactive(sb);
407 static void cifsFileInfo_put_work(struct work_struct *work)
409 struct cifsFileInfo *cifs_file = container_of(work,
410 struct cifsFileInfo, put);
412 cifsFileInfo_put_final(cifs_file);
416 * cifsFileInfo_put - release a reference of file priv data
418 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
420 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
422 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
424 _cifsFileInfo_put(cifs_file, true, true);
428 * _cifsFileInfo_put - release a reference of file priv data
430 * This may involve closing the filehandle @cifs_file out on the
431 * server. Must be called without holding tcon->open_file_lock,
432 * cinode->open_file_lock and cifs_file->file_info_lock.
434 * If @wait_for_oplock_handler is true and we are releasing the last
435 * reference, wait for any running oplock break handler of the file
436 * and cancel any pending one.
438 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
439 * @wait_oplock_handler: must be false if called from oplock_break_handler
440 * @offload: not offloaded on close and oplock breaks
443 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
444 bool wait_oplock_handler, bool offload)
446 struct inode *inode = d_inode(cifs_file->dentry);
447 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
448 struct TCP_Server_Info *server = tcon->ses->server;
449 struct cifsInodeInfo *cifsi = CIFS_I(inode);
450 struct super_block *sb = inode->i_sb;
451 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
453 struct cifs_pending_open open;
454 bool oplock_break_cancelled;
456 spin_lock(&tcon->open_file_lock);
457 spin_lock(&cifsi->open_file_lock);
458 spin_lock(&cifs_file->file_info_lock);
459 if (--cifs_file->count > 0) {
460 spin_unlock(&cifs_file->file_info_lock);
461 spin_unlock(&cifsi->open_file_lock);
462 spin_unlock(&tcon->open_file_lock);
465 spin_unlock(&cifs_file->file_info_lock);
467 if (server->ops->get_lease_key)
468 server->ops->get_lease_key(inode, &fid);
470 /* store open in pending opens to make sure we don't miss lease break */
471 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
473 /* remove it from the lists */
474 list_del(&cifs_file->flist);
475 list_del(&cifs_file->tlist);
476 atomic_dec(&tcon->num_local_opens);
478 if (list_empty(&cifsi->openFileList)) {
479 cifs_dbg(FYI, "closing last open instance for inode %p\n",
480 d_inode(cifs_file->dentry));
482 * In strict cache mode we need invalidate mapping on the last
483 * close because it may cause a error when we open this file
484 * again and get at least level II oplock.
486 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
487 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
488 cifs_set_oplock_level(cifsi, 0);
491 spin_unlock(&cifsi->open_file_lock);
492 spin_unlock(&tcon->open_file_lock);
494 oplock_break_cancelled = wait_oplock_handler ?
495 cancel_work_sync(&cifs_file->oplock_break) : false;
497 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
498 struct TCP_Server_Info *server = tcon->ses->server;
502 if (server->ops->close_getattr)
503 server->ops->close_getattr(xid, tcon, cifs_file);
504 else if (server->ops->close)
505 server->ops->close(xid, tcon, &cifs_file->fid);
509 if (oplock_break_cancelled)
510 cifs_done_oplock_break(cifsi);
512 cifs_del_pending_open(&open);
515 queue_work(fileinfo_put_wq, &cifs_file->put);
517 cifsFileInfo_put_final(cifs_file);
520 int cifs_open(struct inode *inode, struct file *file)
526 struct cifs_sb_info *cifs_sb;
527 struct TCP_Server_Info *server;
528 struct cifs_tcon *tcon;
529 struct tcon_link *tlink;
530 struct cifsFileInfo *cfile = NULL;
531 char *full_path = NULL;
532 bool posix_open_ok = false;
534 struct cifs_pending_open open;
538 cifs_sb = CIFS_SB(inode->i_sb);
539 tlink = cifs_sb_tlink(cifs_sb);
542 return PTR_ERR(tlink);
544 tcon = tlink_tcon(tlink);
545 server = tcon->ses->server;
547 full_path = build_path_from_dentry(file_dentry(file));
548 if (full_path == NULL) {
553 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
554 inode, file->f_flags, full_path);
556 if (file->f_flags & O_DIRECT &&
557 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
558 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
559 file->f_op = &cifs_file_direct_nobrl_ops;
561 file->f_op = &cifs_file_direct_ops;
569 if (!tcon->broken_posix_open && tcon->unix_ext &&
570 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
571 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
572 /* can not refresh inode info since size could be stale */
573 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
574 cifs_sb->mnt_file_mode /* ignored */,
575 file->f_flags, &oplock, &fid.netfid, xid);
577 cifs_dbg(FYI, "posix open succeeded\n");
578 posix_open_ok = true;
579 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
580 if (tcon->ses->serverNOS)
581 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
582 tcon->ses->serverName,
583 tcon->ses->serverNOS);
584 tcon->broken_posix_open = true;
585 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
586 (rc != -EOPNOTSUPP)) /* path not found or net err */
589 * Else fallthrough to retry open the old way on network i/o
594 if (server->ops->get_lease_key)
595 server->ops->get_lease_key(inode, &fid);
597 cifs_add_pending_open(&fid, tlink, &open);
599 if (!posix_open_ok) {
600 if (server->ops->get_lease_key)
601 server->ops->get_lease_key(inode, &fid);
603 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
604 file->f_flags, &oplock, &fid, xid);
606 cifs_del_pending_open(&open);
611 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
613 if (server->ops->close)
614 server->ops->close(xid, tcon, &fid);
615 cifs_del_pending_open(&open);
620 cifs_fscache_set_inode_cookie(inode, file);
622 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
624 * Time to set mode which we can not set earlier due to
625 * problems creating new read-only files.
627 struct cifs_unix_set_info_args args = {
628 .mode = inode->i_mode,
629 .uid = INVALID_UID, /* no change */
630 .gid = INVALID_GID, /* no change */
631 .ctime = NO_CHANGE_64,
632 .atime = NO_CHANGE_64,
633 .mtime = NO_CHANGE_64,
636 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
643 cifs_put_tlink(tlink);
647 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
650 * Try to reacquire byte range locks that were released when session
651 * to server was lost.
654 cifs_relock_file(struct cifsFileInfo *cfile)
656 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
657 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
658 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
661 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
662 if (cinode->can_cache_brlcks) {
663 /* can cache locks - no need to relock */
664 up_read(&cinode->lock_sem);
668 if (cap_unix(tcon->ses) &&
669 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
670 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
671 rc = cifs_push_posix_locks(cfile);
673 rc = tcon->ses->server->ops->push_mand_locks(cfile);
675 up_read(&cinode->lock_sem);
680 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
685 struct cifs_sb_info *cifs_sb;
686 struct cifs_tcon *tcon;
687 struct TCP_Server_Info *server;
688 struct cifsInodeInfo *cinode;
690 char *full_path = NULL;
692 int disposition = FILE_OPEN;
693 int create_options = CREATE_NOT_DIR;
694 struct cifs_open_parms oparms;
697 mutex_lock(&cfile->fh_mutex);
698 if (!cfile->invalidHandle) {
699 mutex_unlock(&cfile->fh_mutex);
705 inode = d_inode(cfile->dentry);
706 cifs_sb = CIFS_SB(inode->i_sb);
707 tcon = tlink_tcon(cfile->tlink);
708 server = tcon->ses->server;
711 * Can not grab rename sem here because various ops, including those
712 * that already have the rename sem can end up causing writepage to get
713 * called and if the server was down that means we end up here, and we
714 * can never tell if the caller already has the rename_sem.
716 full_path = build_path_from_dentry(cfile->dentry);
717 if (full_path == NULL) {
719 mutex_unlock(&cfile->fh_mutex);
724 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
725 inode, cfile->f_flags, full_path);
727 if (tcon->ses->server->oplocks)
732 if (tcon->unix_ext && cap_unix(tcon->ses) &&
733 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
734 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
736 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
737 * original open. Must mask them off for a reopen.
739 unsigned int oflags = cfile->f_flags &
740 ~(O_CREAT | O_EXCL | O_TRUNC);
742 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
743 cifs_sb->mnt_file_mode /* ignored */,
744 oflags, &oplock, &cfile->fid.netfid, xid);
746 cifs_dbg(FYI, "posix reopen succeeded\n");
747 oparms.reconnect = true;
751 * fallthrough to retry open the old way on errors, especially
752 * in the reconnect path it is important to retry hard
756 desired_access = cifs_convert_flags(cfile->f_flags);
758 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
759 if (cfile->f_flags & O_SYNC)
760 create_options |= CREATE_WRITE_THROUGH;
762 if (cfile->f_flags & O_DIRECT)
763 create_options |= CREATE_NO_BUFFER;
765 if (server->ops->get_lease_key)
766 server->ops->get_lease_key(inode, &cfile->fid);
769 oparms.cifs_sb = cifs_sb;
770 oparms.desired_access = desired_access;
771 oparms.create_options = cifs_create_options(cifs_sb, create_options);
772 oparms.disposition = disposition;
773 oparms.path = full_path;
774 oparms.fid = &cfile->fid;
775 oparms.reconnect = true;
778 * Can not refresh inode by passing in file_info buf to be returned by
779 * ops->open and then calling get_inode_info with returned buf since
780 * file might have write behind data that needs to be flushed and server
781 * version of file size can be stale. If we knew for sure that inode was
782 * not dirty locally we could do this.
784 rc = server->ops->open(xid, &oparms, &oplock, NULL);
785 if (rc == -ENOENT && oparms.reconnect == false) {
786 /* durable handle timeout is expired - open the file again */
787 rc = server->ops->open(xid, &oparms, &oplock, NULL);
788 /* indicate that we need to relock the file */
789 oparms.reconnect = true;
793 mutex_unlock(&cfile->fh_mutex);
794 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
795 cifs_dbg(FYI, "oplock: %d\n", oplock);
796 goto reopen_error_exit;
800 cfile->invalidHandle = false;
801 mutex_unlock(&cfile->fh_mutex);
802 cinode = CIFS_I(inode);
805 rc = filemap_write_and_wait(inode->i_mapping);
806 if (!is_interrupt_error(rc))
807 mapping_set_error(inode->i_mapping, rc);
809 if (tcon->posix_extensions)
810 rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
811 else if (tcon->unix_ext)
812 rc = cifs_get_inode_info_unix(&inode, full_path,
815 rc = cifs_get_inode_info(&inode, full_path, NULL,
816 inode->i_sb, xid, NULL);
819 * Else we are writing out data to server already and could deadlock if
820 * we tried to flush data, and since we do not know if we have data that
821 * would invalidate the current end of file on the server we can not go
822 * to the server to get the new inode info.
826 * If the server returned a read oplock and we have mandatory brlocks,
827 * set oplock level to None.
829 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
830 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
834 server->ops->set_fid(cfile, &cfile->fid, oplock);
835 if (oparms.reconnect)
836 cifs_relock_file(cfile);
844 int cifs_close(struct inode *inode, struct file *file)
846 if (file->private_data != NULL) {
847 _cifsFileInfo_put(file->private_data, true, false);
848 file->private_data = NULL;
851 /* return code from the ->release op is always ignored */
856 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
858 struct cifsFileInfo *open_file;
859 struct list_head *tmp;
860 struct list_head *tmp1;
861 struct list_head tmp_list;
863 if (!tcon->use_persistent || !tcon->need_reopen_files)
866 tcon->need_reopen_files = false;
868 cifs_dbg(FYI, "Reopen persistent handles\n");
869 INIT_LIST_HEAD(&tmp_list);
871 /* list all files open on tree connection, reopen resilient handles */
872 spin_lock(&tcon->open_file_lock);
873 list_for_each(tmp, &tcon->openFileList) {
874 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
875 if (!open_file->invalidHandle)
877 cifsFileInfo_get(open_file);
878 list_add_tail(&open_file->rlist, &tmp_list);
880 spin_unlock(&tcon->open_file_lock);
882 list_for_each_safe(tmp, tmp1, &tmp_list) {
883 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
884 if (cifs_reopen_file(open_file, false /* do not flush */))
885 tcon->need_reopen_files = true;
886 list_del_init(&open_file->rlist);
887 cifsFileInfo_put(open_file);
891 int cifs_closedir(struct inode *inode, struct file *file)
895 struct cifsFileInfo *cfile = file->private_data;
896 struct cifs_tcon *tcon;
897 struct TCP_Server_Info *server;
900 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
906 tcon = tlink_tcon(cfile->tlink);
907 server = tcon->ses->server;
909 cifs_dbg(FYI, "Freeing private data in close dir\n");
910 spin_lock(&cfile->file_info_lock);
911 if (server->ops->dir_needs_close(cfile)) {
912 cfile->invalidHandle = true;
913 spin_unlock(&cfile->file_info_lock);
914 if (server->ops->close_dir)
915 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
918 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
919 /* not much we can do if it fails anyway, ignore rc */
922 spin_unlock(&cfile->file_info_lock);
924 buf = cfile->srch_inf.ntwrk_buf_start;
926 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
927 cfile->srch_inf.ntwrk_buf_start = NULL;
928 if (cfile->srch_inf.smallBuf)
929 cifs_small_buf_release(buf);
931 cifs_buf_release(buf);
934 cifs_put_tlink(cfile->tlink);
935 kfree(file->private_data);
936 file->private_data = NULL;
937 /* BB can we lock the filestruct while this is going on? */
942 static struct cifsLockInfo *
943 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
945 struct cifsLockInfo *lock =
946 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
949 lock->offset = offset;
950 lock->length = length;
952 lock->pid = current->tgid;
954 INIT_LIST_HEAD(&lock->blist);
955 init_waitqueue_head(&lock->block_q);
960 cifs_del_lock_waiters(struct cifsLockInfo *lock)
962 struct cifsLockInfo *li, *tmp;
963 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
964 list_del_init(&li->blist);
965 wake_up(&li->block_q);
969 #define CIFS_LOCK_OP 0
970 #define CIFS_READ_OP 1
971 #define CIFS_WRITE_OP 2
973 /* @rw_check : 0 - no op, 1 - read, 2 - write */
975 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
976 __u64 length, __u8 type, __u16 flags,
977 struct cifsFileInfo *cfile,
978 struct cifsLockInfo **conf_lock, int rw_check)
980 struct cifsLockInfo *li;
981 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
982 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
984 list_for_each_entry(li, &fdlocks->locks, llist) {
985 if (offset + length <= li->offset ||
986 offset >= li->offset + li->length)
988 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
989 server->ops->compare_fids(cfile, cur_cfile)) {
990 /* shared lock prevents write op through the same fid */
991 if (!(li->type & server->vals->shared_lock_type) ||
992 rw_check != CIFS_WRITE_OP)
995 if ((type & server->vals->shared_lock_type) &&
996 ((server->ops->compare_fids(cfile, cur_cfile) &&
997 current->tgid == li->pid) || type == li->type))
999 if (rw_check == CIFS_LOCK_OP &&
1000 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1001 server->ops->compare_fids(cfile, cur_cfile))
1011 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1012 __u8 type, __u16 flags,
1013 struct cifsLockInfo **conf_lock, int rw_check)
1016 struct cifs_fid_locks *cur;
1017 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1019 list_for_each_entry(cur, &cinode->llist, llist) {
1020 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1021 flags, cfile, conf_lock,
1031 * Check if there is another lock that prevents us to set the lock (mandatory
1032 * style). If such a lock exists, update the flock structure with its
1033 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1034 * or leave it the same if we can't. Returns 0 if we don't need to request to
1035 * the server or 1 otherwise.
1038 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1039 __u8 type, struct file_lock *flock)
1042 struct cifsLockInfo *conf_lock;
1043 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1044 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1047 down_read(&cinode->lock_sem);
1049 exist = cifs_find_lock_conflict(cfile, offset, length, type,
1050 flock->fl_flags, &conf_lock,
1053 flock->fl_start = conf_lock->offset;
1054 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1055 flock->fl_pid = conf_lock->pid;
1056 if (conf_lock->type & server->vals->shared_lock_type)
1057 flock->fl_type = F_RDLCK;
1059 flock->fl_type = F_WRLCK;
1060 } else if (!cinode->can_cache_brlcks)
1063 flock->fl_type = F_UNLCK;
1065 up_read(&cinode->lock_sem);
1070 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1072 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1073 cifs_down_write(&cinode->lock_sem);
1074 list_add_tail(&lock->llist, &cfile->llist->locks);
1075 up_write(&cinode->lock_sem);
1079 * Set the byte-range lock (mandatory style). Returns:
1080 * 1) 0, if we set the lock and don't need to request to the server;
1081 * 2) 1, if no locks prevent us but we need to request to the server;
1082 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1085 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1088 struct cifsLockInfo *conf_lock;
1089 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1095 cifs_down_write(&cinode->lock_sem);
1097 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1098 lock->type, lock->flags, &conf_lock,
1100 if (!exist && cinode->can_cache_brlcks) {
1101 list_add_tail(&lock->llist, &cfile->llist->locks);
1102 up_write(&cinode->lock_sem);
1111 list_add_tail(&lock->blist, &conf_lock->blist);
1112 up_write(&cinode->lock_sem);
1113 rc = wait_event_interruptible(lock->block_q,
1114 (lock->blist.prev == &lock->blist) &&
1115 (lock->blist.next == &lock->blist));
1118 cifs_down_write(&cinode->lock_sem);
1119 list_del_init(&lock->blist);
1122 up_write(&cinode->lock_sem);
1127 * Check if there is another lock that prevents us to set the lock (posix
1128 * style). If such a lock exists, update the flock structure with its
1129 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1130 * or leave it the same if we can't. Returns 0 if we don't need to request to
1131 * the server or 1 otherwise.
1134 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1137 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1138 unsigned char saved_type = flock->fl_type;
1140 if ((flock->fl_flags & FL_POSIX) == 0)
1143 down_read(&cinode->lock_sem);
1144 posix_test_lock(file, flock);
1146 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1147 flock->fl_type = saved_type;
1151 up_read(&cinode->lock_sem);
1156 * Set the byte-range lock (posix style). Returns:
1157 * 1) <0, if the error occurs while setting the lock;
1158 * 2) 0, if we set the lock and don't need to request to the server;
1159 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1160 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1163 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1165 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1166 int rc = FILE_LOCK_DEFERRED + 1;
1168 if ((flock->fl_flags & FL_POSIX) == 0)
1171 cifs_down_write(&cinode->lock_sem);
1172 if (!cinode->can_cache_brlcks) {
1173 up_write(&cinode->lock_sem);
1177 rc = posix_lock_file(file, flock, NULL);
1178 up_write(&cinode->lock_sem);
1183 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1186 int rc = 0, stored_rc;
1187 struct cifsLockInfo *li, *tmp;
1188 struct cifs_tcon *tcon;
1189 unsigned int num, max_num, max_buf;
1190 LOCKING_ANDX_RANGE *buf, *cur;
1191 static const int types[] = {
1192 LOCKING_ANDX_LARGE_FILES,
1193 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1198 tcon = tlink_tcon(cfile->tlink);
1201 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1202 * and check it before using.
1204 max_buf = tcon->ses->server->maxBuf;
1205 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1210 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1212 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1214 max_num = (max_buf - sizeof(struct smb_hdr)) /
1215 sizeof(LOCKING_ANDX_RANGE);
1216 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1222 for (i = 0; i < 2; i++) {
1225 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1226 if (li->type != types[i])
1228 cur->Pid = cpu_to_le16(li->pid);
1229 cur->LengthLow = cpu_to_le32((u32)li->length);
1230 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1231 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1232 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1233 if (++num == max_num) {
1234 stored_rc = cifs_lockv(xid, tcon,
1236 (__u8)li->type, 0, num,
1247 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1248 (__u8)types[i], 0, num, buf);
1260 hash_lockowner(fl_owner_t owner)
1262 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1265 struct lock_to_push {
1266 struct list_head llist;
1275 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1277 struct inode *inode = d_inode(cfile->dentry);
1278 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1279 struct file_lock *flock;
1280 struct file_lock_context *flctx = inode->i_flctx;
1281 unsigned int count = 0, i;
1282 int rc = 0, xid, type;
1283 struct list_head locks_to_send, *el;
1284 struct lock_to_push *lck, *tmp;
1292 spin_lock(&flctx->flc_lock);
1293 list_for_each(el, &flctx->flc_posix) {
1296 spin_unlock(&flctx->flc_lock);
1298 INIT_LIST_HEAD(&locks_to_send);
1301 * Allocating count locks is enough because no FL_POSIX locks can be
1302 * added to the list while we are holding cinode->lock_sem that
1303 * protects locking operations of this inode.
1305 for (i = 0; i < count; i++) {
1306 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1311 list_add_tail(&lck->llist, &locks_to_send);
1314 el = locks_to_send.next;
1315 spin_lock(&flctx->flc_lock);
1316 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1317 if (el == &locks_to_send) {
1319 * The list ended. We don't have enough allocated
1320 * structures - something is really wrong.
1322 cifs_dbg(VFS, "Can't push all brlocks!\n");
1325 length = 1 + flock->fl_end - flock->fl_start;
1326 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1330 lck = list_entry(el, struct lock_to_push, llist);
1331 lck->pid = hash_lockowner(flock->fl_owner);
1332 lck->netfid = cfile->fid.netfid;
1333 lck->length = length;
1335 lck->offset = flock->fl_start;
1337 spin_unlock(&flctx->flc_lock);
1339 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1342 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1343 lck->offset, lck->length, NULL,
1347 list_del(&lck->llist);
1355 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1356 list_del(&lck->llist);
1363 cifs_push_locks(struct cifsFileInfo *cfile)
1365 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1366 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1367 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1370 /* we are going to update can_cache_brlcks here - need a write access */
1371 cifs_down_write(&cinode->lock_sem);
1372 if (!cinode->can_cache_brlcks) {
1373 up_write(&cinode->lock_sem);
1377 if (cap_unix(tcon->ses) &&
1378 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1379 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1380 rc = cifs_push_posix_locks(cfile);
1382 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1384 cinode->can_cache_brlcks = false;
1385 up_write(&cinode->lock_sem);
1390 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1391 bool *wait_flag, struct TCP_Server_Info *server)
1393 if (flock->fl_flags & FL_POSIX)
1394 cifs_dbg(FYI, "Posix\n");
1395 if (flock->fl_flags & FL_FLOCK)
1396 cifs_dbg(FYI, "Flock\n");
1397 if (flock->fl_flags & FL_SLEEP) {
1398 cifs_dbg(FYI, "Blocking lock\n");
1401 if (flock->fl_flags & FL_ACCESS)
1402 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1403 if (flock->fl_flags & FL_LEASE)
1404 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1405 if (flock->fl_flags &
1406 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1407 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1408 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1410 *type = server->vals->large_lock_type;
1411 if (flock->fl_type == F_WRLCK) {
1412 cifs_dbg(FYI, "F_WRLCK\n");
1413 *type |= server->vals->exclusive_lock_type;
1415 } else if (flock->fl_type == F_UNLCK) {
1416 cifs_dbg(FYI, "F_UNLCK\n");
1417 *type |= server->vals->unlock_lock_type;
1419 /* Check if unlock includes more than one lock range */
1420 } else if (flock->fl_type == F_RDLCK) {
1421 cifs_dbg(FYI, "F_RDLCK\n");
1422 *type |= server->vals->shared_lock_type;
1424 } else if (flock->fl_type == F_EXLCK) {
1425 cifs_dbg(FYI, "F_EXLCK\n");
1426 *type |= server->vals->exclusive_lock_type;
1428 } else if (flock->fl_type == F_SHLCK) {
1429 cifs_dbg(FYI, "F_SHLCK\n");
1430 *type |= server->vals->shared_lock_type;
1433 cifs_dbg(FYI, "Unknown type of lock\n");
1437 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1438 bool wait_flag, bool posix_lck, unsigned int xid)
1441 __u64 length = 1 + flock->fl_end - flock->fl_start;
1442 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1443 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1444 struct TCP_Server_Info *server = tcon->ses->server;
1445 __u16 netfid = cfile->fid.netfid;
1448 int posix_lock_type;
1450 rc = cifs_posix_lock_test(file, flock);
1454 if (type & server->vals->shared_lock_type)
1455 posix_lock_type = CIFS_RDLCK;
1457 posix_lock_type = CIFS_WRLCK;
1458 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1459 hash_lockowner(flock->fl_owner),
1460 flock->fl_start, length, flock,
1461 posix_lock_type, wait_flag);
1465 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1469 /* BB we could chain these into one lock request BB */
1470 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1473 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1475 flock->fl_type = F_UNLCK;
1477 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1482 if (type & server->vals->shared_lock_type) {
1483 flock->fl_type = F_WRLCK;
1487 type &= ~server->vals->exclusive_lock_type;
1489 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1490 type | server->vals->shared_lock_type,
1493 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1494 type | server->vals->shared_lock_type, 0, 1, false);
1495 flock->fl_type = F_RDLCK;
1497 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1500 flock->fl_type = F_WRLCK;
1506 cifs_move_llist(struct list_head *source, struct list_head *dest)
1508 struct list_head *li, *tmp;
1509 list_for_each_safe(li, tmp, source)
1510 list_move(li, dest);
1514 cifs_free_llist(struct list_head *llist)
1516 struct cifsLockInfo *li, *tmp;
1517 list_for_each_entry_safe(li, tmp, llist, llist) {
1518 cifs_del_lock_waiters(li);
1519 list_del(&li->llist);
1525 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1528 int rc = 0, stored_rc;
1529 static const int types[] = {
1530 LOCKING_ANDX_LARGE_FILES,
1531 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1534 unsigned int max_num, num, max_buf;
1535 LOCKING_ANDX_RANGE *buf, *cur;
1536 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1537 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1538 struct cifsLockInfo *li, *tmp;
1539 __u64 length = 1 + flock->fl_end - flock->fl_start;
1540 struct list_head tmp_llist;
1542 INIT_LIST_HEAD(&tmp_llist);
1545 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1546 * and check it before using.
1548 max_buf = tcon->ses->server->maxBuf;
1549 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1552 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1554 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1556 max_num = (max_buf - sizeof(struct smb_hdr)) /
1557 sizeof(LOCKING_ANDX_RANGE);
1558 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1562 cifs_down_write(&cinode->lock_sem);
1563 for (i = 0; i < 2; i++) {
1566 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1567 if (flock->fl_start > li->offset ||
1568 (flock->fl_start + length) <
1569 (li->offset + li->length))
1571 if (current->tgid != li->pid)
1573 if (types[i] != li->type)
1575 if (cinode->can_cache_brlcks) {
1577 * We can cache brlock requests - simply remove
1578 * a lock from the file's list.
1580 list_del(&li->llist);
1581 cifs_del_lock_waiters(li);
1585 cur->Pid = cpu_to_le16(li->pid);
1586 cur->LengthLow = cpu_to_le32((u32)li->length);
1587 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1588 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1589 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1591 * We need to save a lock here to let us add it again to
1592 * the file's list if the unlock range request fails on
1595 list_move(&li->llist, &tmp_llist);
1596 if (++num == max_num) {
1597 stored_rc = cifs_lockv(xid, tcon,
1599 li->type, num, 0, buf);
1602 * We failed on the unlock range
1603 * request - add all locks from the tmp
1604 * list to the head of the file's list.
1606 cifs_move_llist(&tmp_llist,
1607 &cfile->llist->locks);
1611 * The unlock range request succeed -
1612 * free the tmp list.
1614 cifs_free_llist(&tmp_llist);
1621 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1622 types[i], num, 0, buf);
1624 cifs_move_llist(&tmp_llist,
1625 &cfile->llist->locks);
1628 cifs_free_llist(&tmp_llist);
1632 up_write(&cinode->lock_sem);
1638 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1639 bool wait_flag, bool posix_lck, int lock, int unlock,
1643 __u64 length = 1 + flock->fl_end - flock->fl_start;
1644 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1645 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1646 struct TCP_Server_Info *server = tcon->ses->server;
1647 struct inode *inode = d_inode(cfile->dentry);
1650 int posix_lock_type;
1652 rc = cifs_posix_lock_set(file, flock);
1653 if (rc <= FILE_LOCK_DEFERRED)
1656 if (type & server->vals->shared_lock_type)
1657 posix_lock_type = CIFS_RDLCK;
1659 posix_lock_type = CIFS_WRLCK;
1662 posix_lock_type = CIFS_UNLCK;
1664 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1665 hash_lockowner(flock->fl_owner),
1666 flock->fl_start, length,
1667 NULL, posix_lock_type, wait_flag);
1672 struct cifsLockInfo *lock;
1674 lock = cifs_lock_init(flock->fl_start, length, type,
1679 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1688 * Windows 7 server can delay breaking lease from read to None
1689 * if we set a byte-range lock on a file - break it explicitly
1690 * before sending the lock to the server to be sure the next
1691 * read won't conflict with non-overlapted locks due to
1694 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1695 CIFS_CACHE_READ(CIFS_I(inode))) {
1696 cifs_zap_mapping(inode);
1697 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1699 CIFS_I(inode)->oplock = 0;
1702 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1703 type, 1, 0, wait_flag);
1709 cifs_lock_add(cfile, lock);
1711 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1714 if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1716 * If this is a request to remove all locks because we
1717 * are closing the file, it doesn't matter if the
1718 * unlocking failed as both cifs.ko and the SMB server
1719 * remove the lock on file close
1722 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1723 if (!(flock->fl_flags & FL_CLOSE))
1726 rc = locks_lock_file_wait(file, flock);
1731 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1734 int lock = 0, unlock = 0;
1735 bool wait_flag = false;
1736 bool posix_lck = false;
1737 struct cifs_sb_info *cifs_sb;
1738 struct cifs_tcon *tcon;
1739 struct cifsFileInfo *cfile;
1745 if (!(fl->fl_flags & FL_FLOCK))
1748 cfile = (struct cifsFileInfo *)file->private_data;
1749 tcon = tlink_tcon(cfile->tlink);
1751 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1753 cifs_sb = CIFS_FILE_SB(file);
1755 if (cap_unix(tcon->ses) &&
1756 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1757 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1760 if (!lock && !unlock) {
1762 * if no lock or unlock then nothing to do since we do not
1769 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1777 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1780 int lock = 0, unlock = 0;
1781 bool wait_flag = false;
1782 bool posix_lck = false;
1783 struct cifs_sb_info *cifs_sb;
1784 struct cifs_tcon *tcon;
1785 struct cifsFileInfo *cfile;
1791 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1792 cmd, flock->fl_flags, flock->fl_type,
1793 flock->fl_start, flock->fl_end);
1795 cfile = (struct cifsFileInfo *)file->private_data;
1796 tcon = tlink_tcon(cfile->tlink);
1798 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1800 cifs_sb = CIFS_FILE_SB(file);
1802 if (cap_unix(tcon->ses) &&
1803 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1804 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1807 * BB add code here to normalize offset and length to account for
1808 * negative length which we can not accept over the wire.
1810 if (IS_GETLK(cmd)) {
1811 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1816 if (!lock && !unlock) {
1818 * if no lock or unlock then nothing to do since we do not
1825 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1832 * update the file size (if needed) after a write. Should be called with
1833 * the inode->i_lock held
1836 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1837 unsigned int bytes_written)
1839 loff_t end_of_write = offset + bytes_written;
1841 if (end_of_write > cifsi->server_eof)
1842 cifsi->server_eof = end_of_write;
1846 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1847 size_t write_size, loff_t *offset)
1850 unsigned int bytes_written = 0;
1851 unsigned int total_written;
1852 struct cifs_tcon *tcon;
1853 struct TCP_Server_Info *server;
1855 struct dentry *dentry = open_file->dentry;
1856 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1857 struct cifs_io_parms io_parms = {0};
1859 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1860 write_size, *offset, dentry);
1862 tcon = tlink_tcon(open_file->tlink);
1863 server = tcon->ses->server;
1865 if (!server->ops->sync_write)
1870 for (total_written = 0; write_size > total_written;
1871 total_written += bytes_written) {
1873 while (rc == -EAGAIN) {
1877 if (open_file->invalidHandle) {
1878 /* we could deadlock if we called
1879 filemap_fdatawait from here so tell
1880 reopen_file not to flush data to
1882 rc = cifs_reopen_file(open_file, false);
1887 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1888 (unsigned int)write_size - total_written);
1889 /* iov[0] is reserved for smb header */
1890 iov[1].iov_base = (char *)write_data + total_written;
1891 iov[1].iov_len = len;
1893 io_parms.tcon = tcon;
1894 io_parms.offset = *offset;
1895 io_parms.length = len;
1896 rc = server->ops->sync_write(xid, &open_file->fid,
1897 &io_parms, &bytes_written, iov, 1);
1899 if (rc || (bytes_written == 0)) {
1907 spin_lock(&d_inode(dentry)->i_lock);
1908 cifs_update_eof(cifsi, *offset, bytes_written);
1909 spin_unlock(&d_inode(dentry)->i_lock);
1910 *offset += bytes_written;
1914 cifs_stats_bytes_written(tcon, total_written);
1916 if (total_written > 0) {
1917 spin_lock(&d_inode(dentry)->i_lock);
1918 if (*offset > d_inode(dentry)->i_size)
1919 i_size_write(d_inode(dentry), *offset);
1920 spin_unlock(&d_inode(dentry)->i_lock);
1922 mark_inode_dirty_sync(d_inode(dentry));
1924 return total_written;
1927 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1930 struct cifsFileInfo *open_file = NULL;
1931 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1933 /* only filter by fsuid on multiuser mounts */
1934 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1937 spin_lock(&cifs_inode->open_file_lock);
1938 /* we could simply get the first_list_entry since write-only entries
1939 are always at the end of the list but since the first entry might
1940 have a close pending, we go through the whole list */
1941 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1942 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1944 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1945 if (!open_file->invalidHandle) {
1946 /* found a good file */
1947 /* lock it so it will not be closed on us */
1948 cifsFileInfo_get(open_file);
1949 spin_unlock(&cifs_inode->open_file_lock);
1951 } /* else might as well continue, and look for
1952 another, or simply have the caller reopen it
1953 again rather than trying to fix this handle */
1954 } else /* write only file */
1955 break; /* write only files are last so must be done */
1957 spin_unlock(&cifs_inode->open_file_lock);
1961 /* Return -EBADF if no handle is found and general rc otherwise */
1963 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
1964 struct cifsFileInfo **ret_file)
1966 struct cifsFileInfo *open_file, *inv_file = NULL;
1967 struct cifs_sb_info *cifs_sb;
1968 bool any_available = false;
1970 unsigned int refind = 0;
1971 bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
1972 bool with_delete = flags & FIND_WR_WITH_DELETE;
1976 * Having a null inode here (because mapping->host was set to zero by
1977 * the VFS or MM) should not happen but we had reports of on oops (due
1978 * to it being zero) during stress testcases so we need to check for it
1981 if (cifs_inode == NULL) {
1982 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1987 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1989 /* only filter by fsuid on multiuser mounts */
1990 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1993 spin_lock(&cifs_inode->open_file_lock);
1995 if (refind > MAX_REOPEN_ATT) {
1996 spin_unlock(&cifs_inode->open_file_lock);
1999 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2000 if (!any_available && open_file->pid != current->tgid)
2002 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2004 if (with_delete && !(open_file->fid.access & DELETE))
2006 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2007 if (!open_file->invalidHandle) {
2008 /* found a good writable file */
2009 cifsFileInfo_get(open_file);
2010 spin_unlock(&cifs_inode->open_file_lock);
2011 *ret_file = open_file;
2015 inv_file = open_file;
2019 /* couldn't find useable FH with same pid, try any available */
2020 if (!any_available) {
2021 any_available = true;
2022 goto refind_writable;
2026 any_available = false;
2027 cifsFileInfo_get(inv_file);
2030 spin_unlock(&cifs_inode->open_file_lock);
2033 rc = cifs_reopen_file(inv_file, false);
2035 *ret_file = inv_file;
2039 spin_lock(&cifs_inode->open_file_lock);
2040 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2041 spin_unlock(&cifs_inode->open_file_lock);
2042 cifsFileInfo_put(inv_file);
2045 spin_lock(&cifs_inode->open_file_lock);
2046 goto refind_writable;
2052 struct cifsFileInfo *
2053 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2055 struct cifsFileInfo *cfile;
2058 rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2060 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2066 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2068 struct cifsFileInfo **ret_file)
2070 struct list_head *tmp;
2071 struct cifsFileInfo *cfile;
2072 struct cifsInodeInfo *cinode;
2077 spin_lock(&tcon->open_file_lock);
2078 list_for_each(tmp, &tcon->openFileList) {
2079 cfile = list_entry(tmp, struct cifsFileInfo,
2081 full_path = build_path_from_dentry(cfile->dentry);
2082 if (full_path == NULL) {
2083 spin_unlock(&tcon->open_file_lock);
2086 if (strcmp(full_path, name)) {
2092 cinode = CIFS_I(d_inode(cfile->dentry));
2093 spin_unlock(&tcon->open_file_lock);
2094 return cifs_get_writable_file(cinode, flags, ret_file);
2097 spin_unlock(&tcon->open_file_lock);
2102 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2103 struct cifsFileInfo **ret_file)
2105 struct list_head *tmp;
2106 struct cifsFileInfo *cfile;
2107 struct cifsInodeInfo *cinode;
2112 spin_lock(&tcon->open_file_lock);
2113 list_for_each(tmp, &tcon->openFileList) {
2114 cfile = list_entry(tmp, struct cifsFileInfo,
2116 full_path = build_path_from_dentry(cfile->dentry);
2117 if (full_path == NULL) {
2118 spin_unlock(&tcon->open_file_lock);
2121 if (strcmp(full_path, name)) {
2127 cinode = CIFS_I(d_inode(cfile->dentry));
2128 spin_unlock(&tcon->open_file_lock);
2129 *ret_file = find_readable_file(cinode, 0);
2130 return *ret_file ? 0 : -ENOENT;
2133 spin_unlock(&tcon->open_file_lock);
2137 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2139 struct address_space *mapping = page->mapping;
2140 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2143 int bytes_written = 0;
2144 struct inode *inode;
2145 struct cifsFileInfo *open_file;
2147 if (!mapping || !mapping->host)
2150 inode = page->mapping->host;
2152 offset += (loff_t)from;
2153 write_data = kmap(page);
2156 if ((to > PAGE_SIZE) || (from > to)) {
2161 /* racing with truncate? */
2162 if (offset > mapping->host->i_size) {
2164 return 0; /* don't care */
2167 /* check to make sure that we are not extending the file */
2168 if (mapping->host->i_size - offset < (loff_t)to)
2169 to = (unsigned)(mapping->host->i_size - offset);
2171 rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2174 bytes_written = cifs_write(open_file, open_file->pid,
2175 write_data, to - from, &offset);
2176 cifsFileInfo_put(open_file);
2177 /* Does mm or vfs already set times? */
2178 inode->i_atime = inode->i_mtime = current_time(inode);
2179 if ((bytes_written > 0) && (offset))
2181 else if (bytes_written < 0)
2186 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2187 if (!is_retryable_error(rc))
2195 static struct cifs_writedata *
2196 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2197 pgoff_t end, pgoff_t *index,
2198 unsigned int *found_pages)
2200 struct cifs_writedata *wdata;
2202 wdata = cifs_writedata_alloc((unsigned int)tofind,
2203 cifs_writev_complete);
2207 *found_pages = find_get_pages_range_tag(mapping, index, end,
2208 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2213 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2214 struct address_space *mapping,
2215 struct writeback_control *wbc,
2216 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2218 unsigned int nr_pages = 0, i;
2221 for (i = 0; i < found_pages; i++) {
2222 page = wdata->pages[i];
2224 * At this point we hold neither the i_pages lock nor the
2225 * page lock: the page may be truncated or invalidated
2226 * (changing page->mapping to NULL), or even swizzled
2227 * back from swapper_space to tmpfs file mapping
2232 else if (!trylock_page(page))
2235 if (unlikely(page->mapping != mapping)) {
2240 if (!wbc->range_cyclic && page->index > end) {
2246 if (*next && (page->index != *next)) {
2247 /* Not next consecutive page */
2252 if (wbc->sync_mode != WB_SYNC_NONE)
2253 wait_on_page_writeback(page);
2255 if (PageWriteback(page) ||
2256 !clear_page_dirty_for_io(page)) {
2262 * This actually clears the dirty bit in the radix tree.
2263 * See cifs_writepage() for more commentary.
2265 set_page_writeback(page);
2266 if (page_offset(page) >= i_size_read(mapping->host)) {
2269 end_page_writeback(page);
2273 wdata->pages[i] = page;
2274 *next = page->index + 1;
2278 /* reset index to refind any pages skipped */
2280 *index = wdata->pages[0]->index + 1;
2282 /* put any pages we aren't going to use */
2283 for (i = nr_pages; i < found_pages; i++) {
2284 put_page(wdata->pages[i]);
2285 wdata->pages[i] = NULL;
2292 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2293 struct address_space *mapping, struct writeback_control *wbc)
2297 wdata->sync_mode = wbc->sync_mode;
2298 wdata->nr_pages = nr_pages;
2299 wdata->offset = page_offset(wdata->pages[0]);
2300 wdata->pagesz = PAGE_SIZE;
2301 wdata->tailsz = min(i_size_read(mapping->host) -
2302 page_offset(wdata->pages[nr_pages - 1]),
2304 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2305 wdata->pid = wdata->cfile->pid;
2307 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2311 if (wdata->cfile->invalidHandle)
2314 rc = wdata->server->ops->async_writev(wdata,
2315 cifs_writedata_release);
2320 static int cifs_writepages(struct address_space *mapping,
2321 struct writeback_control *wbc)
2323 struct inode *inode = mapping->host;
2324 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2325 struct TCP_Server_Info *server;
2326 bool done = false, scanned = false, range_whole = false;
2328 struct cifs_writedata *wdata;
2329 struct cifsFileInfo *cfile = NULL;
2335 * If wsize is smaller than the page cache size, default to writing
2336 * one page at a time via cifs_writepage
2338 if (cifs_sb->wsize < PAGE_SIZE)
2339 return generic_writepages(mapping, wbc);
2342 if (wbc->range_cyclic) {
2343 index = mapping->writeback_index; /* Start from prev offset */
2346 index = wbc->range_start >> PAGE_SHIFT;
2347 end = wbc->range_end >> PAGE_SHIFT;
2348 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2352 server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2355 while (!done && index <= end) {
2356 unsigned int i, nr_pages, found_pages, wsize;
2357 pgoff_t next = 0, tofind, saved_index = index;
2358 struct cifs_credits credits_on_stack;
2359 struct cifs_credits *credits = &credits_on_stack;
2360 int get_file_rc = 0;
2363 cifsFileInfo_put(cfile);
2365 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2367 /* in case of an error store it to return later */
2371 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2378 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2380 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2385 add_credits_and_wake_if(server, credits, 0);
2389 if (found_pages == 0) {
2390 kref_put(&wdata->refcount, cifs_writedata_release);
2391 add_credits_and_wake_if(server, credits, 0);
2395 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2396 end, &index, &next, &done);
2398 /* nothing to write? */
2399 if (nr_pages == 0) {
2400 kref_put(&wdata->refcount, cifs_writedata_release);
2401 add_credits_and_wake_if(server, credits, 0);
2405 wdata->credits = credits_on_stack;
2406 wdata->cfile = cfile;
2407 wdata->server = server;
2410 if (!wdata->cfile) {
2411 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2413 if (is_retryable_error(get_file_rc))
2418 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2420 for (i = 0; i < nr_pages; ++i)
2421 unlock_page(wdata->pages[i]);
2423 /* send failure -- clean up the mess */
2425 add_credits_and_wake_if(server, &wdata->credits, 0);
2426 for (i = 0; i < nr_pages; ++i) {
2427 if (is_retryable_error(rc))
2428 redirty_page_for_writepage(wbc,
2431 SetPageError(wdata->pages[i]);
2432 end_page_writeback(wdata->pages[i]);
2433 put_page(wdata->pages[i]);
2435 if (!is_retryable_error(rc))
2436 mapping_set_error(mapping, rc);
2438 kref_put(&wdata->refcount, cifs_writedata_release);
2440 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2441 index = saved_index;
2445 /* Return immediately if we received a signal during writing */
2446 if (is_interrupt_error(rc)) {
2451 if (rc != 0 && saved_rc == 0)
2454 wbc->nr_to_write -= nr_pages;
2455 if (wbc->nr_to_write <= 0)
2461 if (!scanned && !done) {
2463 * We hit the last page and there is more work to be done: wrap
2464 * back to the start of the file
2474 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2475 mapping->writeback_index = index;
2478 cifsFileInfo_put(cfile);
2484 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2490 /* BB add check for wbc flags */
2492 if (!PageUptodate(page))
2493 cifs_dbg(FYI, "ppw - page not up to date\n");
2496 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2498 * A writepage() implementation always needs to do either this,
2499 * or re-dirty the page with "redirty_page_for_writepage()" in
2500 * the case of a failure.
2502 * Just unlocking the page will cause the radix tree tag-bits
2503 * to fail to update with the state of the page correctly.
2505 set_page_writeback(page);
2507 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2508 if (is_retryable_error(rc)) {
2509 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2511 redirty_page_for_writepage(wbc, page);
2512 } else if (rc != 0) {
2514 mapping_set_error(page->mapping, rc);
2516 SetPageUptodate(page);
2518 end_page_writeback(page);
2524 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2526 int rc = cifs_writepage_locked(page, wbc);
2531 static int cifs_write_end(struct file *file, struct address_space *mapping,
2532 loff_t pos, unsigned len, unsigned copied,
2533 struct page *page, void *fsdata)
2536 struct inode *inode = mapping->host;
2537 struct cifsFileInfo *cfile = file->private_data;
2538 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2541 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2544 pid = current->tgid;
2546 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2549 if (PageChecked(page)) {
2551 SetPageUptodate(page);
2552 ClearPageChecked(page);
2553 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2554 SetPageUptodate(page);
2556 if (!PageUptodate(page)) {
2558 unsigned offset = pos & (PAGE_SIZE - 1);
2562 /* this is probably better than directly calling
2563 partialpage_write since in this function the file handle is
2564 known which we might as well leverage */
2565 /* BB check if anything else missing out of ppw
2566 such as updating last write time */
2567 page_data = kmap(page);
2568 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2569 /* if (rc < 0) should we set writebehind rc? */
2576 set_page_dirty(page);
2580 spin_lock(&inode->i_lock);
2581 if (pos > inode->i_size)
2582 i_size_write(inode, pos);
2583 spin_unlock(&inode->i_lock);
2592 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2597 struct cifs_tcon *tcon;
2598 struct TCP_Server_Info *server;
2599 struct cifsFileInfo *smbfile = file->private_data;
2600 struct inode *inode = file_inode(file);
2601 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2603 rc = file_write_and_wait_range(file, start, end);
2605 trace_cifs_fsync_err(inode->i_ino, rc);
2611 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2614 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2615 rc = cifs_zap_mapping(inode);
2617 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2618 rc = 0; /* don't care about it in fsync */
2622 tcon = tlink_tcon(smbfile->tlink);
2623 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2624 server = tcon->ses->server;
2625 if (server->ops->flush)
2626 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2635 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2639 struct cifs_tcon *tcon;
2640 struct TCP_Server_Info *server;
2641 struct cifsFileInfo *smbfile = file->private_data;
2642 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2644 rc = file_write_and_wait_range(file, start, end);
2646 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2652 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2655 tcon = tlink_tcon(smbfile->tlink);
2656 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2657 server = tcon->ses->server;
2658 if (server->ops->flush)
2659 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2669 * As file closes, flush all cached write data for this inode checking
2670 * for write behind errors.
2672 int cifs_flush(struct file *file, fl_owner_t id)
2674 struct inode *inode = file_inode(file);
2677 if (file->f_mode & FMODE_WRITE)
2678 rc = filemap_write_and_wait(inode->i_mapping);
2680 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2682 trace_cifs_flush_err(inode->i_ino, rc);
2687 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2692 for (i = 0; i < num_pages; i++) {
2693 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2696 * save number of pages we have already allocated and
2697 * return with ENOMEM error
2706 for (i = 0; i < num_pages; i++)
2713 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2718 clen = min_t(const size_t, len, wsize);
2719 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2728 cifs_uncached_writedata_release(struct kref *refcount)
2731 struct cifs_writedata *wdata = container_of(refcount,
2732 struct cifs_writedata, refcount);
2734 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2735 for (i = 0; i < wdata->nr_pages; i++)
2736 put_page(wdata->pages[i]);
2737 cifs_writedata_release(refcount);
2740 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2743 cifs_uncached_writev_complete(struct work_struct *work)
2745 struct cifs_writedata *wdata = container_of(work,
2746 struct cifs_writedata, work);
2747 struct inode *inode = d_inode(wdata->cfile->dentry);
2748 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2750 spin_lock(&inode->i_lock);
2751 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2752 if (cifsi->server_eof > inode->i_size)
2753 i_size_write(inode, cifsi->server_eof);
2754 spin_unlock(&inode->i_lock);
2756 complete(&wdata->done);
2757 collect_uncached_write_data(wdata->ctx);
2758 /* the below call can possibly free the last ref to aio ctx */
2759 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2763 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2764 size_t *len, unsigned long *num_pages)
2766 size_t save_len, copied, bytes, cur_len = *len;
2767 unsigned long i, nr_pages = *num_pages;
2770 for (i = 0; i < nr_pages; i++) {
2771 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2772 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2775 * If we didn't copy as much as we expected, then that
2776 * may mean we trod into an unmapped area. Stop copying
2777 * at that point. On the next pass through the big
2778 * loop, we'll likely end up getting a zero-length
2779 * write and bailing out of it.
2784 cur_len = save_len - cur_len;
2788 * If we have no data to send, then that probably means that
2789 * the copy above failed altogether. That's most likely because
2790 * the address in the iovec was bogus. Return -EFAULT and let
2791 * the caller free anything we allocated and bail out.
2797 * i + 1 now represents the number of pages we actually used in
2798 * the copy phase above.
2805 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2806 struct cifs_aio_ctx *ctx)
2809 struct cifs_credits credits;
2811 struct TCP_Server_Info *server = wdata->server;
2814 if (wdata->cfile->invalidHandle) {
2815 rc = cifs_reopen_file(wdata->cfile, false);
2824 * Wait for credits to resend this wdata.
2825 * Note: we are attempting to resend the whole wdata not in
2829 rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2834 if (wsize < wdata->bytes) {
2835 add_credits_and_wake_if(server, &credits, 0);
2838 } while (wsize < wdata->bytes);
2839 wdata->credits = credits;
2841 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2844 if (wdata->cfile->invalidHandle)
2847 #ifdef CONFIG_CIFS_SMB_DIRECT
2849 wdata->mr->need_invalidate = true;
2850 smbd_deregister_mr(wdata->mr);
2854 rc = server->ops->async_writev(wdata,
2855 cifs_uncached_writedata_release);
2859 /* If the write was successfully sent, we are done */
2861 list_add_tail(&wdata->list, wdata_list);
2865 /* Roll back credits and retry if needed */
2866 add_credits_and_wake_if(server, &wdata->credits, 0);
2867 } while (rc == -EAGAIN);
2870 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2875 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2876 struct cifsFileInfo *open_file,
2877 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2878 struct cifs_aio_ctx *ctx)
2882 unsigned long nr_pages, num_pages, i;
2883 struct cifs_writedata *wdata;
2884 struct iov_iter saved_from = *from;
2885 loff_t saved_offset = offset;
2887 struct TCP_Server_Info *server;
2888 struct page **pagevec;
2892 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2893 pid = open_file->pid;
2895 pid = current->tgid;
2897 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2902 struct cifs_credits credits_on_stack;
2903 struct cifs_credits *credits = &credits_on_stack;
2905 if (open_file->invalidHandle) {
2906 rc = cifs_reopen_file(open_file, false);
2913 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2918 cur_len = min_t(const size_t, len, wsize);
2920 if (ctx->direct_io) {
2923 result = iov_iter_get_pages_alloc(
2924 from, &pagevec, cur_len, &start);
2927 "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
2928 result, iov_iter_type(from),
2929 from->iov_offset, from->count);
2933 add_credits_and_wake_if(server, credits, 0);
2936 cur_len = (size_t)result;
2937 iov_iter_advance(from, cur_len);
2940 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2942 wdata = cifs_writedata_direct_alloc(pagevec,
2943 cifs_uncached_writev_complete);
2946 add_credits_and_wake_if(server, credits, 0);
2951 wdata->page_offset = start;
2954 cur_len - (PAGE_SIZE - start) -
2955 (nr_pages - 2) * PAGE_SIZE :
2958 nr_pages = get_numpages(wsize, len, &cur_len);
2959 wdata = cifs_writedata_alloc(nr_pages,
2960 cifs_uncached_writev_complete);
2963 add_credits_and_wake_if(server, credits, 0);
2967 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2969 kvfree(wdata->pages);
2971 add_credits_and_wake_if(server, credits, 0);
2975 num_pages = nr_pages;
2976 rc = wdata_fill_from_iovec(
2977 wdata, from, &cur_len, &num_pages);
2979 for (i = 0; i < nr_pages; i++)
2980 put_page(wdata->pages[i]);
2981 kvfree(wdata->pages);
2983 add_credits_and_wake_if(server, credits, 0);
2988 * Bring nr_pages down to the number of pages we
2989 * actually used, and free any pages that we didn't use.
2991 for ( ; nr_pages > num_pages; nr_pages--)
2992 put_page(wdata->pages[nr_pages - 1]);
2994 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2997 wdata->sync_mode = WB_SYNC_ALL;
2998 wdata->nr_pages = nr_pages;
2999 wdata->offset = (__u64)offset;
3000 wdata->cfile = cifsFileInfo_get(open_file);
3001 wdata->server = server;
3003 wdata->bytes = cur_len;
3004 wdata->pagesz = PAGE_SIZE;
3005 wdata->credits = credits_on_stack;
3007 kref_get(&ctx->refcount);
3009 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3012 if (wdata->cfile->invalidHandle)
3015 rc = server->ops->async_writev(wdata,
3016 cifs_uncached_writedata_release);
3020 add_credits_and_wake_if(server, &wdata->credits, 0);
3021 kref_put(&wdata->refcount,
3022 cifs_uncached_writedata_release);
3023 if (rc == -EAGAIN) {
3025 iov_iter_advance(from, offset - saved_offset);
3031 list_add_tail(&wdata->list, wdata_list);
3040 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3042 struct cifs_writedata *wdata, *tmp;
3043 struct cifs_tcon *tcon;
3044 struct cifs_sb_info *cifs_sb;
3045 struct dentry *dentry = ctx->cfile->dentry;
3048 tcon = tlink_tcon(ctx->cfile->tlink);
3049 cifs_sb = CIFS_SB(dentry->d_sb);
3051 mutex_lock(&ctx->aio_mutex);
3053 if (list_empty(&ctx->list)) {
3054 mutex_unlock(&ctx->aio_mutex);
3060 * Wait for and collect replies for any successful sends in order of
3061 * increasing offset. Once an error is hit, then return without waiting
3062 * for any more replies.
3065 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3067 if (!try_wait_for_completion(&wdata->done)) {
3068 mutex_unlock(&ctx->aio_mutex);
3075 ctx->total_len += wdata->bytes;
3077 /* resend call if it's a retryable error */
3078 if (rc == -EAGAIN) {
3079 struct list_head tmp_list;
3080 struct iov_iter tmp_from = ctx->iter;
3082 INIT_LIST_HEAD(&tmp_list);
3083 list_del_init(&wdata->list);
3086 rc = cifs_resend_wdata(
3087 wdata, &tmp_list, ctx);
3089 iov_iter_advance(&tmp_from,
3090 wdata->offset - ctx->pos);
3092 rc = cifs_write_from_iter(wdata->offset,
3093 wdata->bytes, &tmp_from,
3094 ctx->cfile, cifs_sb, &tmp_list,
3097 kref_put(&wdata->refcount,
3098 cifs_uncached_writedata_release);
3101 list_splice(&tmp_list, &ctx->list);
3105 list_del_init(&wdata->list);
3106 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3109 cifs_stats_bytes_written(tcon, ctx->total_len);
3110 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3112 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3114 mutex_unlock(&ctx->aio_mutex);
3116 if (ctx->iocb && ctx->iocb->ki_complete)
3117 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3119 complete(&ctx->done);
3122 static ssize_t __cifs_writev(
3123 struct kiocb *iocb, struct iov_iter *from, bool direct)
3125 struct file *file = iocb->ki_filp;
3126 ssize_t total_written = 0;
3127 struct cifsFileInfo *cfile;
3128 struct cifs_tcon *tcon;
3129 struct cifs_sb_info *cifs_sb;
3130 struct cifs_aio_ctx *ctx;
3131 struct iov_iter saved_from = *from;
3132 size_t len = iov_iter_count(from);
3136 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3137 * In this case, fall back to non-direct write function.
3138 * this could be improved by getting pages directly in ITER_KVEC
3140 if (direct && iov_iter_is_kvec(from)) {
3141 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3145 rc = generic_write_checks(iocb, from);
3149 cifs_sb = CIFS_FILE_SB(file);
3150 cfile = file->private_data;
3151 tcon = tlink_tcon(cfile->tlink);
3153 if (!tcon->ses->server->ops->async_writev)
3156 ctx = cifs_aio_ctx_alloc();
3160 ctx->cfile = cifsFileInfo_get(cfile);
3162 if (!is_sync_kiocb(iocb))
3165 ctx->pos = iocb->ki_pos;
3168 ctx->direct_io = true;
3172 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3174 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3179 /* grab a lock here due to read response handlers can access ctx */
3180 mutex_lock(&ctx->aio_mutex);
3182 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3183 cfile, cifs_sb, &ctx->list, ctx);
3186 * If at least one write was successfully sent, then discard any rc
3187 * value from the later writes. If the other write succeeds, then
3188 * we'll end up returning whatever was written. If it fails, then
3189 * we'll get a new rc value from that.
3191 if (!list_empty(&ctx->list))
3194 mutex_unlock(&ctx->aio_mutex);
3197 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3201 if (!is_sync_kiocb(iocb)) {
3202 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3203 return -EIOCBQUEUED;
3206 rc = wait_for_completion_killable(&ctx->done);
3208 mutex_lock(&ctx->aio_mutex);
3209 ctx->rc = rc = -EINTR;
3210 total_written = ctx->total_len;
3211 mutex_unlock(&ctx->aio_mutex);
3214 total_written = ctx->total_len;
3217 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3219 if (unlikely(!total_written))
3222 iocb->ki_pos += total_written;
3223 return total_written;
3226 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3228 return __cifs_writev(iocb, from, true);
3231 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3233 return __cifs_writev(iocb, from, false);
3237 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3239 struct file *file = iocb->ki_filp;
3240 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3241 struct inode *inode = file->f_mapping->host;
3242 struct cifsInodeInfo *cinode = CIFS_I(inode);
3243 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3248 * We need to hold the sem to be sure nobody modifies lock list
3249 * with a brlock that prevents writing.
3251 down_read(&cinode->lock_sem);
3253 rc = generic_write_checks(iocb, from);
3257 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3258 server->vals->exclusive_lock_type, 0,
3259 NULL, CIFS_WRITE_OP))
3260 rc = __generic_file_write_iter(iocb, from);
3264 up_read(&cinode->lock_sem);
3265 inode_unlock(inode);
3268 rc = generic_write_sync(iocb, rc);
3273 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3275 struct inode *inode = file_inode(iocb->ki_filp);
3276 struct cifsInodeInfo *cinode = CIFS_I(inode);
3277 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3278 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3279 iocb->ki_filp->private_data;
3280 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3283 written = cifs_get_writer(cinode);
3287 if (CIFS_CACHE_WRITE(cinode)) {
3288 if (cap_unix(tcon->ses) &&
3289 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3290 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3291 written = generic_file_write_iter(iocb, from);
3294 written = cifs_writev(iocb, from);
3298 * For non-oplocked files in strict cache mode we need to write the data
3299 * to the server exactly from the pos to pos+len-1 rather than flush all
3300 * affected pages because it may cause a error with mandatory locks on
3301 * these pages but not on the region from pos to ppos+len-1.
3303 written = cifs_user_writev(iocb, from);
3304 if (CIFS_CACHE_READ(cinode)) {
3306 * We have read level caching and we have just sent a write
3307 * request to the server thus making data in the cache stale.
3308 * Zap the cache and set oplock/lease level to NONE to avoid
3309 * reading stale data from the cache. All subsequent read
3310 * operations will read new data from the server.
3312 cifs_zap_mapping(inode);
3313 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3318 cifs_put_writer(cinode);
3322 static struct cifs_readdata *
3323 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3325 struct cifs_readdata *rdata;
3327 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3328 if (rdata != NULL) {
3329 rdata->pages = pages;
3330 kref_init(&rdata->refcount);
3331 INIT_LIST_HEAD(&rdata->list);
3332 init_completion(&rdata->done);
3333 INIT_WORK(&rdata->work, complete);
3339 static struct cifs_readdata *
3340 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3342 struct page **pages =
3343 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3344 struct cifs_readdata *ret = NULL;
3347 ret = cifs_readdata_direct_alloc(pages, complete);
3356 cifs_readdata_release(struct kref *refcount)
3358 struct cifs_readdata *rdata = container_of(refcount,
3359 struct cifs_readdata, refcount);
3360 #ifdef CONFIG_CIFS_SMB_DIRECT
3362 smbd_deregister_mr(rdata->mr);
3367 cifsFileInfo_put(rdata->cfile);
3369 kvfree(rdata->pages);
3374 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3380 for (i = 0; i < nr_pages; i++) {
3381 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3386 rdata->pages[i] = page;
3390 unsigned int nr_page_failed = i;
3392 for (i = 0; i < nr_page_failed; i++) {
3393 put_page(rdata->pages[i]);
3394 rdata->pages[i] = NULL;
3401 cifs_uncached_readdata_release(struct kref *refcount)
3403 struct cifs_readdata *rdata = container_of(refcount,
3404 struct cifs_readdata, refcount);
3407 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3408 for (i = 0; i < rdata->nr_pages; i++) {
3409 put_page(rdata->pages[i]);
3411 cifs_readdata_release(refcount);
3415 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3416 * @rdata: the readdata response with list of pages holding data
3417 * @iter: destination for our data
3419 * This function copies data from a list of pages in a readdata response into
3420 * an array of iovecs. It will first calculate where the data should go
3421 * based on the info in the readdata and then copy the data into that spot.
3424 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3426 size_t remaining = rdata->got_bytes;
3429 for (i = 0; i < rdata->nr_pages; i++) {
3430 struct page *page = rdata->pages[i];
3431 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3434 if (unlikely(iov_iter_is_pipe(iter))) {
3435 void *addr = kmap_atomic(page);
3437 written = copy_to_iter(addr, copy, iter);
3438 kunmap_atomic(addr);
3440 written = copy_page_to_iter(page, 0, copy, iter);
3441 remaining -= written;
3442 if (written < copy && iov_iter_count(iter) > 0)
3445 return remaining ? -EFAULT : 0;
3448 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3451 cifs_uncached_readv_complete(struct work_struct *work)
3453 struct cifs_readdata *rdata = container_of(work,
3454 struct cifs_readdata, work);
3456 complete(&rdata->done);
3457 collect_uncached_read_data(rdata->ctx);
3458 /* the below call can possibly free the last ref to aio ctx */
3459 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3463 uncached_fill_pages(struct TCP_Server_Info *server,
3464 struct cifs_readdata *rdata, struct iov_iter *iter,
3469 unsigned int nr_pages = rdata->nr_pages;
3470 unsigned int page_offset = rdata->page_offset;
3472 rdata->got_bytes = 0;
3473 rdata->tailsz = PAGE_SIZE;
3474 for (i = 0; i < nr_pages; i++) {
3475 struct page *page = rdata->pages[i];
3477 unsigned int segment_size = rdata->pagesz;
3480 segment_size -= page_offset;
3486 /* no need to hold page hostage */
3487 rdata->pages[i] = NULL;
3494 if (len >= segment_size)
3495 /* enough data to fill the page */
3498 rdata->tailsz = len;
3502 result = copy_page_from_iter(
3503 page, page_offset, n, iter);
3504 #ifdef CONFIG_CIFS_SMB_DIRECT
3509 result = cifs_read_page_from_socket(
3510 server, page, page_offset, n);
3514 rdata->got_bytes += result;
3517 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3518 rdata->got_bytes : result;
3522 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3523 struct cifs_readdata *rdata, unsigned int len)
3525 return uncached_fill_pages(server, rdata, NULL, len);
3529 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3530 struct cifs_readdata *rdata,
3531 struct iov_iter *iter)
3533 return uncached_fill_pages(server, rdata, iter, iter->count);
3536 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3537 struct list_head *rdata_list,
3538 struct cifs_aio_ctx *ctx)
3541 struct cifs_credits credits;
3543 struct TCP_Server_Info *server;
3545 /* XXX: should we pick a new channel here? */
3546 server = rdata->server;
3549 if (rdata->cfile->invalidHandle) {
3550 rc = cifs_reopen_file(rdata->cfile, true);
3558 * Wait for credits to resend this rdata.
3559 * Note: we are attempting to resend the whole rdata not in
3563 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3569 if (rsize < rdata->bytes) {
3570 add_credits_and_wake_if(server, &credits, 0);
3573 } while (rsize < rdata->bytes);
3574 rdata->credits = credits;
3576 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3578 if (rdata->cfile->invalidHandle)
3581 #ifdef CONFIG_CIFS_SMB_DIRECT
3583 rdata->mr->need_invalidate = true;
3584 smbd_deregister_mr(rdata->mr);
3588 rc = server->ops->async_readv(rdata);
3592 /* If the read was successfully sent, we are done */
3594 /* Add to aio pending list */
3595 list_add_tail(&rdata->list, rdata_list);
3599 /* Roll back credits and retry if needed */
3600 add_credits_and_wake_if(server, &rdata->credits, 0);
3601 } while (rc == -EAGAIN);
3604 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3609 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3610 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3611 struct cifs_aio_ctx *ctx)
3613 struct cifs_readdata *rdata;
3614 unsigned int npages, rsize;
3615 struct cifs_credits credits_on_stack;
3616 struct cifs_credits *credits = &credits_on_stack;
3620 struct TCP_Server_Info *server;
3621 struct page **pagevec;
3623 struct iov_iter direct_iov = ctx->iter;
3625 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3627 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3628 pid = open_file->pid;
3630 pid = current->tgid;
3633 iov_iter_advance(&direct_iov, offset - ctx->pos);
3636 if (open_file->invalidHandle) {
3637 rc = cifs_reopen_file(open_file, true);
3644 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3649 cur_len = min_t(const size_t, len, rsize);
3651 if (ctx->direct_io) {
3654 result = iov_iter_get_pages_alloc(
3655 &direct_iov, &pagevec,
3659 "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3660 result, iov_iter_type(&direct_iov),
3661 direct_iov.iov_offset,
3666 add_credits_and_wake_if(server, credits, 0);
3669 cur_len = (size_t)result;
3670 iov_iter_advance(&direct_iov, cur_len);
3672 rdata = cifs_readdata_direct_alloc(
3673 pagevec, cifs_uncached_readv_complete);
3675 add_credits_and_wake_if(server, credits, 0);
3680 npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3681 rdata->page_offset = start;
3682 rdata->tailsz = npages > 1 ?
3683 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3688 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3689 /* allocate a readdata struct */
3690 rdata = cifs_readdata_alloc(npages,
3691 cifs_uncached_readv_complete);
3693 add_credits_and_wake_if(server, credits, 0);
3698 rc = cifs_read_allocate_pages(rdata, npages);
3700 kvfree(rdata->pages);
3702 add_credits_and_wake_if(server, credits, 0);
3706 rdata->tailsz = PAGE_SIZE;
3709 rdata->server = server;
3710 rdata->cfile = cifsFileInfo_get(open_file);
3711 rdata->nr_pages = npages;
3712 rdata->offset = offset;
3713 rdata->bytes = cur_len;
3715 rdata->pagesz = PAGE_SIZE;
3716 rdata->read_into_pages = cifs_uncached_read_into_pages;
3717 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3718 rdata->credits = credits_on_stack;
3720 kref_get(&ctx->refcount);
3722 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3725 if (rdata->cfile->invalidHandle)
3728 rc = server->ops->async_readv(rdata);
3732 add_credits_and_wake_if(server, &rdata->credits, 0);
3733 kref_put(&rdata->refcount,
3734 cifs_uncached_readdata_release);
3735 if (rc == -EAGAIN) {
3736 iov_iter_revert(&direct_iov, cur_len);
3742 list_add_tail(&rdata->list, rdata_list);
3751 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3753 struct cifs_readdata *rdata, *tmp;
3754 struct iov_iter *to = &ctx->iter;
3755 struct cifs_sb_info *cifs_sb;
3758 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3760 mutex_lock(&ctx->aio_mutex);
3762 if (list_empty(&ctx->list)) {
3763 mutex_unlock(&ctx->aio_mutex);
3768 /* the loop below should proceed in the order of increasing offsets */
3770 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3772 if (!try_wait_for_completion(&rdata->done)) {
3773 mutex_unlock(&ctx->aio_mutex);
3777 if (rdata->result == -EAGAIN) {
3778 /* resend call if it's a retryable error */
3779 struct list_head tmp_list;
3780 unsigned int got_bytes = rdata->got_bytes;
3782 list_del_init(&rdata->list);
3783 INIT_LIST_HEAD(&tmp_list);
3786 * Got a part of data and then reconnect has
3787 * happened -- fill the buffer and continue
3790 if (got_bytes && got_bytes < rdata->bytes) {
3792 if (!ctx->direct_io)
3793 rc = cifs_readdata_to_iov(rdata, to);
3795 kref_put(&rdata->refcount,
3796 cifs_uncached_readdata_release);
3801 if (ctx->direct_io) {
3803 * Re-use rdata as this is a
3806 rc = cifs_resend_rdata(
3810 rc = cifs_send_async_read(
3811 rdata->offset + got_bytes,
3812 rdata->bytes - got_bytes,
3813 rdata->cfile, cifs_sb,
3816 kref_put(&rdata->refcount,
3817 cifs_uncached_readdata_release);
3820 list_splice(&tmp_list, &ctx->list);
3823 } else if (rdata->result)
3825 else if (!ctx->direct_io)
3826 rc = cifs_readdata_to_iov(rdata, to);
3828 /* if there was a short read -- discard anything left */
3829 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3832 ctx->total_len += rdata->got_bytes;
3834 list_del_init(&rdata->list);
3835 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3838 if (!ctx->direct_io)
3839 ctx->total_len = ctx->len - iov_iter_count(to);
3841 /* mask nodata case */
3845 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3847 mutex_unlock(&ctx->aio_mutex);
3849 if (ctx->iocb && ctx->iocb->ki_complete)
3850 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3852 complete(&ctx->done);
3855 static ssize_t __cifs_readv(
3856 struct kiocb *iocb, struct iov_iter *to, bool direct)
3859 struct file *file = iocb->ki_filp;
3860 struct cifs_sb_info *cifs_sb;
3861 struct cifsFileInfo *cfile;
3862 struct cifs_tcon *tcon;
3863 ssize_t rc, total_read = 0;
3864 loff_t offset = iocb->ki_pos;
3865 struct cifs_aio_ctx *ctx;
3868 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3869 * fall back to data copy read path
3870 * this could be improved by getting pages directly in ITER_KVEC
3872 if (direct && iov_iter_is_kvec(to)) {
3873 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3877 len = iov_iter_count(to);
3881 cifs_sb = CIFS_FILE_SB(file);
3882 cfile = file->private_data;
3883 tcon = tlink_tcon(cfile->tlink);
3885 if (!tcon->ses->server->ops->async_readv)
3888 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3889 cifs_dbg(FYI, "attempting read on write only file instance\n");
3891 ctx = cifs_aio_ctx_alloc();
3895 ctx->cfile = cifsFileInfo_get(cfile);
3897 if (!is_sync_kiocb(iocb))
3900 if (iter_is_iovec(to))
3901 ctx->should_dirty = true;
3905 ctx->direct_io = true;
3909 rc = setup_aio_ctx_iter(ctx, to, READ);
3911 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3917 /* grab a lock here due to read response handlers can access ctx */
3918 mutex_lock(&ctx->aio_mutex);
3920 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3922 /* if at least one read request send succeeded, then reset rc */
3923 if (!list_empty(&ctx->list))
3926 mutex_unlock(&ctx->aio_mutex);
3929 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3933 if (!is_sync_kiocb(iocb)) {
3934 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3935 return -EIOCBQUEUED;
3938 rc = wait_for_completion_killable(&ctx->done);
3940 mutex_lock(&ctx->aio_mutex);
3941 ctx->rc = rc = -EINTR;
3942 total_read = ctx->total_len;
3943 mutex_unlock(&ctx->aio_mutex);
3946 total_read = ctx->total_len;
3949 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3952 iocb->ki_pos += total_read;
3958 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3960 return __cifs_readv(iocb, to, true);
3963 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3965 return __cifs_readv(iocb, to, false);
3969 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3971 struct inode *inode = file_inode(iocb->ki_filp);
3972 struct cifsInodeInfo *cinode = CIFS_I(inode);
3973 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3974 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3975 iocb->ki_filp->private_data;
3976 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3980 * In strict cache mode we need to read from the server all the time
3981 * if we don't have level II oplock because the server can delay mtime
3982 * change - so we can't make a decision about inode invalidating.
3983 * And we can also fail with pagereading if there are mandatory locks
3984 * on pages affected by this read but not on the region from pos to
3987 if (!CIFS_CACHE_READ(cinode))
3988 return cifs_user_readv(iocb, to);
3990 if (cap_unix(tcon->ses) &&
3991 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3992 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3993 return generic_file_read_iter(iocb, to);
3996 * We need to hold the sem to be sure nobody modifies lock list
3997 * with a brlock that prevents reading.
3999 down_read(&cinode->lock_sem);
4000 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4001 tcon->ses->server->vals->shared_lock_type,
4002 0, NULL, CIFS_READ_OP))
4003 rc = generic_file_read_iter(iocb, to);
4004 up_read(&cinode->lock_sem);
4009 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4012 unsigned int bytes_read = 0;
4013 unsigned int total_read;
4014 unsigned int current_read_size;
4016 struct cifs_sb_info *cifs_sb;
4017 struct cifs_tcon *tcon;
4018 struct TCP_Server_Info *server;
4021 struct cifsFileInfo *open_file;
4022 struct cifs_io_parms io_parms = {0};
4023 int buf_type = CIFS_NO_BUFFER;
4027 cifs_sb = CIFS_FILE_SB(file);
4029 /* FIXME: set up handlers for larger reads and/or convert to async */
4030 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
4032 if (file->private_data == NULL) {
4037 open_file = file->private_data;
4038 tcon = tlink_tcon(open_file->tlink);
4039 server = cifs_pick_channel(tcon->ses);
4041 if (!server->ops->sync_read) {
4046 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4047 pid = open_file->pid;
4049 pid = current->tgid;
4051 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4052 cifs_dbg(FYI, "attempting read on write only file instance\n");
4054 for (total_read = 0, cur_offset = read_data; read_size > total_read;
4055 total_read += bytes_read, cur_offset += bytes_read) {
4057 current_read_size = min_t(uint, read_size - total_read,
4060 * For windows me and 9x we do not want to request more
4061 * than it negotiated since it will refuse the read
4064 if (!(tcon->ses->capabilities &
4065 tcon->ses->server->vals->cap_large_files)) {
4066 current_read_size = min_t(uint,
4067 current_read_size, CIFSMaxBufSize);
4069 if (open_file->invalidHandle) {
4070 rc = cifs_reopen_file(open_file, true);
4075 io_parms.tcon = tcon;
4076 io_parms.offset = *offset;
4077 io_parms.length = current_read_size;
4078 io_parms.server = server;
4079 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4080 &bytes_read, &cur_offset,
4082 } while (rc == -EAGAIN);
4084 if (rc || (bytes_read == 0)) {
4092 cifs_stats_bytes_read(tcon, total_read);
4093 *offset += bytes_read;
4101 * If the page is mmap'ed into a process' page tables, then we need to make
4102 * sure that it doesn't change while being written back.
4105 cifs_page_mkwrite(struct vm_fault *vmf)
4107 struct page *page = vmf->page;
4110 return VM_FAULT_LOCKED;
4113 static const struct vm_operations_struct cifs_file_vm_ops = {
4114 .fault = filemap_fault,
4115 .map_pages = filemap_map_pages,
4116 .page_mkwrite = cifs_page_mkwrite,
4119 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4122 struct inode *inode = file_inode(file);
4126 if (!CIFS_CACHE_READ(CIFS_I(inode)))
4127 rc = cifs_zap_mapping(inode);
4129 rc = generic_file_mmap(file, vma);
4131 vma->vm_ops = &cifs_file_vm_ops;
4137 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4143 rc = cifs_revalidate_file(file);
4145 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4148 rc = generic_file_mmap(file, vma);
4150 vma->vm_ops = &cifs_file_vm_ops;
4157 cifs_readv_complete(struct work_struct *work)
4159 unsigned int i, got_bytes;
4160 struct cifs_readdata *rdata = container_of(work,
4161 struct cifs_readdata, work);
4163 got_bytes = rdata->got_bytes;
4164 for (i = 0; i < rdata->nr_pages; i++) {
4165 struct page *page = rdata->pages[i];
4167 lru_cache_add(page);
4169 if (rdata->result == 0 ||
4170 (rdata->result == -EAGAIN && got_bytes)) {
4171 flush_dcache_page(page);
4172 SetPageUptodate(page);
4177 if (rdata->result == 0 ||
4178 (rdata->result == -EAGAIN && got_bytes))
4179 cifs_readpage_to_fscache(rdata->mapping->host, page);
4181 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4184 rdata->pages[i] = NULL;
4186 kref_put(&rdata->refcount, cifs_readdata_release);
4190 readpages_fill_pages(struct TCP_Server_Info *server,
4191 struct cifs_readdata *rdata, struct iov_iter *iter,
4198 unsigned int nr_pages = rdata->nr_pages;
4199 unsigned int page_offset = rdata->page_offset;
4201 /* determine the eof that the server (probably) has */
4202 eof = CIFS_I(rdata->mapping->host)->server_eof;
4203 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4204 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4206 rdata->got_bytes = 0;
4207 rdata->tailsz = PAGE_SIZE;
4208 for (i = 0; i < nr_pages; i++) {
4209 struct page *page = rdata->pages[i];
4210 unsigned int to_read = rdata->pagesz;
4214 to_read -= page_offset;
4220 if (len >= to_read) {
4222 } else if (len > 0) {
4223 /* enough for partial page, fill and zero the rest */
4224 zero_user(page, len + page_offset, to_read - len);
4225 n = rdata->tailsz = len;
4227 } else if (page->index > eof_index) {
4229 * The VFS will not try to do readahead past the
4230 * i_size, but it's possible that we have outstanding
4231 * writes with gaps in the middle and the i_size hasn't
4232 * caught up yet. Populate those with zeroed out pages
4233 * to prevent the VFS from repeatedly attempting to
4234 * fill them until the writes are flushed.
4236 zero_user(page, 0, PAGE_SIZE);
4237 lru_cache_add(page);
4238 flush_dcache_page(page);
4239 SetPageUptodate(page);
4242 rdata->pages[i] = NULL;
4246 /* no need to hold page hostage */
4247 lru_cache_add(page);
4250 rdata->pages[i] = NULL;
4256 result = copy_page_from_iter(
4257 page, page_offset, n, iter);
4258 #ifdef CONFIG_CIFS_SMB_DIRECT
4263 result = cifs_read_page_from_socket(
4264 server, page, page_offset, n);
4268 rdata->got_bytes += result;
4271 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4272 rdata->got_bytes : result;
4276 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4277 struct cifs_readdata *rdata, unsigned int len)
4279 return readpages_fill_pages(server, rdata, NULL, len);
4283 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4284 struct cifs_readdata *rdata,
4285 struct iov_iter *iter)
4287 return readpages_fill_pages(server, rdata, iter, iter->count);
4291 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4292 unsigned int rsize, struct list_head *tmplist,
4293 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4295 struct page *page, *tpage;
4296 unsigned int expected_index;
4298 gfp_t gfp = readahead_gfp_mask(mapping);
4300 INIT_LIST_HEAD(tmplist);
4302 page = lru_to_page(page_list);
4305 * Lock the page and put it in the cache. Since no one else
4306 * should have access to this page, we're safe to simply set
4307 * PG_locked without checking it first.
4309 __SetPageLocked(page);
4310 rc = add_to_page_cache_locked(page, mapping,
4313 /* give up if we can't stick it in the cache */
4315 __ClearPageLocked(page);
4319 /* move first page to the tmplist */
4320 *offset = (loff_t)page->index << PAGE_SHIFT;
4323 list_move_tail(&page->lru, tmplist);
4325 /* now try and add more pages onto the request */
4326 expected_index = page->index + 1;
4327 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4328 /* discontinuity ? */
4329 if (page->index != expected_index)
4332 /* would this page push the read over the rsize? */
4333 if (*bytes + PAGE_SIZE > rsize)
4336 __SetPageLocked(page);
4337 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4339 __ClearPageLocked(page);
4342 list_move_tail(&page->lru, tmplist);
4343 (*bytes) += PAGE_SIZE;
4350 static int cifs_readpages(struct file *file, struct address_space *mapping,
4351 struct list_head *page_list, unsigned num_pages)
4355 struct list_head tmplist;
4356 struct cifsFileInfo *open_file = file->private_data;
4357 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4358 struct TCP_Server_Info *server;
4364 * Reads as many pages as possible from fscache. Returns -ENOBUFS
4365 * immediately if the cookie is negative
4367 * After this point, every page in the list might have PG_fscache set,
4368 * so we will need to clean that up off of every page we don't use.
4370 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4377 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4378 pid = open_file->pid;
4380 pid = current->tgid;
4383 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4385 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4386 __func__, file, mapping, num_pages);
4389 * Start with the page at end of list and move it to private
4390 * list. Do the same with any following pages until we hit
4391 * the rsize limit, hit an index discontinuity, or run out of
4392 * pages. Issue the async read and then start the loop again
4393 * until the list is empty.
4395 * Note that list order is important. The page_list is in
4396 * the order of declining indexes. When we put the pages in
4397 * the rdata->pages, then we want them in increasing order.
4399 while (!list_empty(page_list) && !err) {
4400 unsigned int i, nr_pages, bytes, rsize;
4402 struct page *page, *tpage;
4403 struct cifs_readdata *rdata;
4404 struct cifs_credits credits_on_stack;
4405 struct cifs_credits *credits = &credits_on_stack;
4407 if (open_file->invalidHandle) {
4408 rc = cifs_reopen_file(open_file, true);
4415 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4421 * Give up immediately if rsize is too small to read an entire
4422 * page. The VFS will fall back to readpage. We should never
4423 * reach this point however since we set ra_pages to 0 when the
4424 * rsize is smaller than a cache page.
4426 if (unlikely(rsize < PAGE_SIZE)) {
4427 add_credits_and_wake_if(server, credits, 0);
4433 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4434 &nr_pages, &offset, &bytes);
4436 add_credits_and_wake_if(server, credits, 0);
4440 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4442 /* best to give up if we're out of mem */
4443 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4444 list_del(&page->lru);
4445 lru_cache_add(page);
4450 add_credits_and_wake_if(server, credits, 0);
4454 rdata->cfile = cifsFileInfo_get(open_file);
4455 rdata->server = server;
4456 rdata->mapping = mapping;
4457 rdata->offset = offset;
4458 rdata->bytes = bytes;
4460 rdata->pagesz = PAGE_SIZE;
4461 rdata->tailsz = PAGE_SIZE;
4462 rdata->read_into_pages = cifs_readpages_read_into_pages;
4463 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4464 rdata->credits = credits_on_stack;
4466 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4467 list_del(&page->lru);
4468 rdata->pages[rdata->nr_pages++] = page;
4471 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4474 if (rdata->cfile->invalidHandle)
4477 rc = server->ops->async_readv(rdata);
4481 add_credits_and_wake_if(server, &rdata->credits, 0);
4482 for (i = 0; i < rdata->nr_pages; i++) {
4483 page = rdata->pages[i];
4484 lru_cache_add(page);
4488 /* Fallback to the readpage in error/reconnect cases */
4489 kref_put(&rdata->refcount, cifs_readdata_release);
4493 kref_put(&rdata->refcount, cifs_readdata_release);
4496 /* Any pages that have been shown to fscache but didn't get added to
4497 * the pagecache must be uncached before they get returned to the
4500 cifs_fscache_readpages_cancel(mapping->host, page_list);
4506 * cifs_readpage_worker must be called with the page pinned
4508 static int cifs_readpage_worker(struct file *file, struct page *page,
4514 /* Is the page cached? */
4515 rc = cifs_readpage_from_fscache(file_inode(file), page);
4519 read_data = kmap(page);
4520 /* for reads over a certain size could initiate async read ahead */
4522 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4527 cifs_dbg(FYI, "Bytes read %d\n", rc);
4529 /* we do not want atime to be less than mtime, it broke some apps */
4530 file_inode(file)->i_atime = current_time(file_inode(file));
4531 if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4532 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4534 file_inode(file)->i_atime = current_time(file_inode(file));
4537 memset(read_data + rc, 0, PAGE_SIZE - rc);
4539 flush_dcache_page(page);
4540 SetPageUptodate(page);
4542 /* send this page to the cache */
4543 cifs_readpage_to_fscache(file_inode(file), page);
4555 static int cifs_readpage(struct file *file, struct page *page)
4557 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4563 if (file->private_data == NULL) {
4569 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4570 page, (int)offset, (int)offset);
4572 rc = cifs_readpage_worker(file, page, &offset);
4578 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4580 struct cifsFileInfo *open_file;
4582 spin_lock(&cifs_inode->open_file_lock);
4583 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4584 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4585 spin_unlock(&cifs_inode->open_file_lock);
4589 spin_unlock(&cifs_inode->open_file_lock);
4593 /* We do not want to update the file size from server for inodes
4594 open for write - to avoid races with writepage extending
4595 the file - in the future we could consider allowing
4596 refreshing the inode only on increases in the file size
4597 but this is tricky to do without racing with writebehind
4598 page caching in the current Linux kernel design */
4599 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4604 if (is_inode_writable(cifsInode)) {
4605 /* This inode is open for write at least once */
4606 struct cifs_sb_info *cifs_sb;
4608 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4609 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4610 /* since no page cache to corrupt on directio
4611 we can change size safely */
4615 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4623 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4624 loff_t pos, unsigned len, unsigned flags,
4625 struct page **pagep, void **fsdata)
4628 pgoff_t index = pos >> PAGE_SHIFT;
4629 loff_t offset = pos & (PAGE_SIZE - 1);
4630 loff_t page_start = pos & PAGE_MASK;
4635 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4638 page = grab_cache_page_write_begin(mapping, index, flags);
4644 if (PageUptodate(page))
4648 * If we write a full page it will be up to date, no need to read from
4649 * the server. If the write is short, we'll end up doing a sync write
4652 if (len == PAGE_SIZE)
4656 * optimize away the read when we have an oplock, and we're not
4657 * expecting to use any of the data we'd be reading in. That
4658 * is, when the page lies beyond the EOF, or straddles the EOF
4659 * and the write will cover all of the existing data.
4661 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4662 i_size = i_size_read(mapping->host);
4663 if (page_start >= i_size ||
4664 (offset == 0 && (pos + len) >= i_size)) {
4665 zero_user_segments(page, 0, offset,
4669 * PageChecked means that the parts of the page
4670 * to which we're not writing are considered up
4671 * to date. Once the data is copied to the
4672 * page, it can be set uptodate.
4674 SetPageChecked(page);
4679 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4681 * might as well read a page, it is fast enough. If we get
4682 * an error, we don't need to return it. cifs_write_end will
4683 * do a sync write instead since PG_uptodate isn't set.
4685 cifs_readpage_worker(file, page, &page_start);
4690 /* we could try using another file handle if there is one -
4691 but how would we lock it to prevent close of that handle
4692 racing with this read? In any case
4693 this will be written out by write_end so is fine */
4700 static int cifs_release_page(struct page *page, gfp_t gfp)
4702 if (PagePrivate(page))
4705 return cifs_fscache_release_page(page, gfp);
4708 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4709 unsigned int length)
4711 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4713 if (offset == 0 && length == PAGE_SIZE)
4714 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4717 static int cifs_launder_page(struct page *page)
4720 loff_t range_start = page_offset(page);
4721 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4722 struct writeback_control wbc = {
4723 .sync_mode = WB_SYNC_ALL,
4725 .range_start = range_start,
4726 .range_end = range_end,
4729 cifs_dbg(FYI, "Launder page: %p\n", page);
4731 if (clear_page_dirty_for_io(page))
4732 rc = cifs_writepage_locked(page, &wbc);
4734 cifs_fscache_invalidate_page(page, page->mapping->host);
4738 void cifs_oplock_break(struct work_struct *work)
4740 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4742 struct inode *inode = d_inode(cfile->dentry);
4743 struct cifsInodeInfo *cinode = CIFS_I(inode);
4744 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4745 struct TCP_Server_Info *server = tcon->ses->server;
4747 bool purge_cache = false;
4749 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4750 TASK_UNINTERRUPTIBLE);
4752 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4753 cfile->oplock_epoch, &purge_cache);
4755 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4756 cifs_has_mand_locks(cinode)) {
4757 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4762 if (inode && S_ISREG(inode->i_mode)) {
4763 if (CIFS_CACHE_READ(cinode))
4764 break_lease(inode, O_RDONLY);
4766 break_lease(inode, O_WRONLY);
4767 rc = filemap_fdatawrite(inode->i_mapping);
4768 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4769 rc = filemap_fdatawait(inode->i_mapping);
4770 mapping_set_error(inode->i_mapping, rc);
4771 cifs_zap_mapping(inode);
4773 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4774 if (CIFS_CACHE_WRITE(cinode))
4775 goto oplock_break_ack;
4778 rc = cifs_push_locks(cfile);
4780 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4784 * releasing stale oplock after recent reconnect of smb session using
4785 * a now incorrect file handle is not a data integrity issue but do
4786 * not bother sending an oplock release if session to server still is
4787 * disconnected since oplock already released by the server
4789 if (!cfile->oplock_break_cancelled) {
4790 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4792 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4794 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4795 cifs_done_oplock_break(cinode);
4799 * The presence of cifs_direct_io() in the address space ops vector
4800 * allowes open() O_DIRECT flags which would have failed otherwise.
4802 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4803 * so this method should never be called.
4805 * Direct IO is not yet supported in the cached mode.
4808 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4812 * Eventually need to support direct IO for non forcedirectio mounts
4817 static int cifs_swap_activate(struct swap_info_struct *sis,
4818 struct file *swap_file, sector_t *span)
4820 struct cifsFileInfo *cfile = swap_file->private_data;
4821 struct inode *inode = swap_file->f_mapping->host;
4822 unsigned long blocks;
4825 cifs_dbg(FYI, "swap activate\n");
4827 spin_lock(&inode->i_lock);
4828 blocks = inode->i_blocks;
4829 isize = inode->i_size;
4830 spin_unlock(&inode->i_lock);
4831 if (blocks*512 < isize) {
4832 pr_warn("swap activate: swapfile has holes\n");
4837 pr_warn_once("Swap support over SMB3 is experimental\n");
4840 * TODO: consider adding ACL (or documenting how) to prevent other
4841 * users (on this or other systems) from reading it
4845 /* TODO: add sk_set_memalloc(inet) or similar */
4848 cfile->swapfile = true;
4850 * TODO: Since file already open, we can't open with DENY_ALL here
4851 * but we could add call to grab a byte range lock to prevent others
4852 * from reading or writing the file
4858 static void cifs_swap_deactivate(struct file *file)
4860 struct cifsFileInfo *cfile = file->private_data;
4862 cifs_dbg(FYI, "swap deactivate\n");
4864 /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4867 cfile->swapfile = false;
4869 /* do we need to unpin (or unlock) the file */
4872 const struct address_space_operations cifs_addr_ops = {
4873 .readpage = cifs_readpage,
4874 .readpages = cifs_readpages,
4875 .writepage = cifs_writepage,
4876 .writepages = cifs_writepages,
4877 .write_begin = cifs_write_begin,
4878 .write_end = cifs_write_end,
4879 .set_page_dirty = __set_page_dirty_nobuffers,
4880 .releasepage = cifs_release_page,
4881 .direct_IO = cifs_direct_io,
4882 .invalidatepage = cifs_invalidate_page,
4883 .launder_page = cifs_launder_page,
4885 * TODO: investigate and if useful we could add an cifs_migratePage
4886 * helper (under an CONFIG_MIGRATION) in the future, and also
4887 * investigate and add an is_dirty_writeback helper if needed
4889 .swap_activate = cifs_swap_activate,
4890 .swap_deactivate = cifs_swap_deactivate,
4894 * cifs_readpages requires the server to support a buffer large enough to
4895 * contain the header plus one complete page of data. Otherwise, we need
4896 * to leave cifs_readpages out of the address space operations.
4898 const struct address_space_operations cifs_addr_ops_smallbuf = {
4899 .readpage = cifs_readpage,
4900 .writepage = cifs_writepage,
4901 .writepages = cifs_writepages,
4902 .write_begin = cifs_write_begin,
4903 .write_end = cifs_write_end,
4904 .set_page_dirty = __set_page_dirty_nobuffers,
4905 .releasepage = cifs_release_page,
4906 .invalidatepage = cifs_invalidate_page,
4907 .launder_page = cifs_launder_page,