8f9d849a00125c5a21dcbe6fa904b3d5df133f7c
[linux-2.6-microblaze.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47
48 static inline int cifs_convert_flags(unsigned int flags)
49 {
50         if ((flags & O_ACCMODE) == O_RDONLY)
51                 return GENERIC_READ;
52         else if ((flags & O_ACCMODE) == O_WRONLY)
53                 return GENERIC_WRITE;
54         else if ((flags & O_ACCMODE) == O_RDWR) {
55                 /* GENERIC_ALL is too much permission to request
56                    can cause unnecessary access denied on create */
57                 /* return GENERIC_ALL; */
58                 return (GENERIC_READ | GENERIC_WRITE);
59         }
60
61         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63                 FILE_READ_DATA);
64 }
65
66 static u32 cifs_posix_convert_flags(unsigned int flags)
67 {
68         u32 posix_flags = 0;
69
70         if ((flags & O_ACCMODE) == O_RDONLY)
71                 posix_flags = SMB_O_RDONLY;
72         else if ((flags & O_ACCMODE) == O_WRONLY)
73                 posix_flags = SMB_O_WRONLY;
74         else if ((flags & O_ACCMODE) == O_RDWR)
75                 posix_flags = SMB_O_RDWR;
76
77         if (flags & O_CREAT) {
78                 posix_flags |= SMB_O_CREAT;
79                 if (flags & O_EXCL)
80                         posix_flags |= SMB_O_EXCL;
81         } else if (flags & O_EXCL)
82                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83                          current->comm, current->tgid);
84
85         if (flags & O_TRUNC)
86                 posix_flags |= SMB_O_TRUNC;
87         /* be safe and imply O_SYNC for O_DSYNC */
88         if (flags & O_DSYNC)
89                 posix_flags |= SMB_O_SYNC;
90         if (flags & O_DIRECTORY)
91                 posix_flags |= SMB_O_DIRECTORY;
92         if (flags & O_NOFOLLOW)
93                 posix_flags |= SMB_O_NOFOLLOW;
94         if (flags & O_DIRECT)
95                 posix_flags |= SMB_O_DIRECT;
96
97         return posix_flags;
98 }
99
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103                 return FILE_CREATE;
104         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105                 return FILE_OVERWRITE_IF;
106         else if ((flags & O_CREAT) == O_CREAT)
107                 return FILE_OPEN_IF;
108         else if ((flags & O_TRUNC) == O_TRUNC)
109                 return FILE_OVERWRITE;
110         else
111                 return FILE_OPEN;
112 }
113
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115                         struct super_block *sb, int mode, unsigned int f_flags,
116                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118         int rc;
119         FILE_UNIX_BASIC_INFO *presp_data;
120         __u32 posix_flags = 0;
121         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122         struct cifs_fattr fattr;
123         struct tcon_link *tlink;
124         struct cifs_tcon *tcon;
125
126         cifs_dbg(FYI, "posix open %s\n", full_path);
127
128         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129         if (presp_data == NULL)
130                 return -ENOMEM;
131
132         tlink = cifs_sb_tlink(cifs_sb);
133         if (IS_ERR(tlink)) {
134                 rc = PTR_ERR(tlink);
135                 goto posix_open_ret;
136         }
137
138         tcon = tlink_tcon(tlink);
139         mode &= ~current_umask();
140
141         posix_flags = cifs_posix_convert_flags(f_flags);
142         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143                              poplock, full_path, cifs_sb->local_nls,
144                              cifs_remap(cifs_sb));
145         cifs_put_tlink(tlink);
146
147         if (rc)
148                 goto posix_open_ret;
149
150         if (presp_data->Type == cpu_to_le32(-1))
151                 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153         if (!pinode)
154                 goto posix_open_ret; /* caller does not need info */
155
156         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158         /* get new inode and set it up */
159         if (*pinode == NULL) {
160                 cifs_fill_uniqueid(sb, &fattr);
161                 *pinode = cifs_iget(sb, &fattr);
162                 if (!*pinode) {
163                         rc = -ENOMEM;
164                         goto posix_open_ret;
165                 }
166         } else {
167                 cifs_fattr_to_inode(*pinode, &fattr);
168         }
169
170 posix_open_ret:
171         kfree(presp_data);
172         return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178              struct cifs_fid *fid, unsigned int xid)
179 {
180         int rc;
181         int desired_access;
182         int disposition;
183         int create_options = CREATE_NOT_DIR;
184         FILE_ALL_INFO *buf;
185         struct TCP_Server_Info *server = tcon->ses->server;
186         struct cifs_open_parms oparms;
187
188         if (!server->ops->open)
189                 return -ENOSYS;
190
191         desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194  *  open flag mapping table:
195  *
196  *      POSIX Flag            CIFS Disposition
197  *      ----------            ----------------
198  *      O_CREAT               FILE_OPEN_IF
199  *      O_CREAT | O_EXCL      FILE_CREATE
200  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
201  *      O_TRUNC               FILE_OVERWRITE
202  *      none of the above     FILE_OPEN
203  *
204  *      Note that there is not a direct match between disposition
205  *      FILE_SUPERSEDE (ie create whether or not file exists although
206  *      O_CREAT | O_TRUNC is similar but truncates the existing
207  *      file rather than creating a new file as FILE_SUPERSEDE does
208  *      (which uses the attributes / metadata passed in on open call)
209  *?
210  *?  O_SYNC is a reasonable match to CIFS writethrough flag
211  *?  and the read write flags match reasonably.  O_LARGEFILE
212  *?  is irrelevant because largefile support is always used
213  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215  *********************************************************************/
216
217         disposition = cifs_get_disposition(f_flags);
218
219         /* BB pass O_SYNC flag through on file attributes .. BB */
220
221         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222         if (!buf)
223                 return -ENOMEM;
224
225         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
226         if (f_flags & O_SYNC)
227                 create_options |= CREATE_WRITE_THROUGH;
228
229         if (f_flags & O_DIRECT)
230                 create_options |= CREATE_NO_BUFFER;
231
232         oparms.tcon = tcon;
233         oparms.cifs_sb = cifs_sb;
234         oparms.desired_access = desired_access;
235         oparms.create_options = cifs_create_options(cifs_sb, create_options);
236         oparms.disposition = disposition;
237         oparms.path = full_path;
238         oparms.fid = fid;
239         oparms.reconnect = false;
240
241         rc = server->ops->open(xid, &oparms, oplock, buf);
242
243         if (rc)
244                 goto out;
245
246         if (tcon->unix_ext)
247                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
248                                               xid);
249         else
250                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
251                                          xid, fid);
252
253         if (rc) {
254                 server->ops->close(xid, tcon, fid);
255                 if (rc == -ESTALE)
256                         rc = -EOPENSTALE;
257         }
258
259 out:
260         kfree(buf);
261         return rc;
262 }
263
264 static bool
265 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
266 {
267         struct cifs_fid_locks *cur;
268         bool has_locks = false;
269
270         down_read(&cinode->lock_sem);
271         list_for_each_entry(cur, &cinode->llist, llist) {
272                 if (!list_empty(&cur->locks)) {
273                         has_locks = true;
274                         break;
275                 }
276         }
277         up_read(&cinode->lock_sem);
278         return has_locks;
279 }
280
281 void
282 cifs_down_write(struct rw_semaphore *sem)
283 {
284         while (!down_write_trylock(sem))
285                 msleep(10);
286 }
287
288 static void cifsFileInfo_put_work(struct work_struct *work);
289
290 struct cifsFileInfo *
291 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
292                   struct tcon_link *tlink, __u32 oplock)
293 {
294         struct dentry *dentry = file_dentry(file);
295         struct inode *inode = d_inode(dentry);
296         struct cifsInodeInfo *cinode = CIFS_I(inode);
297         struct cifsFileInfo *cfile;
298         struct cifs_fid_locks *fdlocks;
299         struct cifs_tcon *tcon = tlink_tcon(tlink);
300         struct TCP_Server_Info *server = tcon->ses->server;
301
302         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
303         if (cfile == NULL)
304                 return cfile;
305
306         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
307         if (!fdlocks) {
308                 kfree(cfile);
309                 return NULL;
310         }
311
312         INIT_LIST_HEAD(&fdlocks->locks);
313         fdlocks->cfile = cfile;
314         cfile->llist = fdlocks;
315
316         cfile->count = 1;
317         cfile->pid = current->tgid;
318         cfile->uid = current_fsuid();
319         cfile->dentry = dget(dentry);
320         cfile->f_flags = file->f_flags;
321         cfile->invalidHandle = false;
322         cfile->tlink = cifs_get_tlink(tlink);
323         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
324         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
325         mutex_init(&cfile->fh_mutex);
326         spin_lock_init(&cfile->file_info_lock);
327
328         cifs_sb_active(inode->i_sb);
329
330         /*
331          * If the server returned a read oplock and we have mandatory brlocks,
332          * set oplock level to None.
333          */
334         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
335                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
336                 oplock = 0;
337         }
338
339         cifs_down_write(&cinode->lock_sem);
340         list_add(&fdlocks->llist, &cinode->llist);
341         up_write(&cinode->lock_sem);
342
343         spin_lock(&tcon->open_file_lock);
344         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
345                 oplock = fid->pending_open->oplock;
346         list_del(&fid->pending_open->olist);
347
348         fid->purge_cache = false;
349         server->ops->set_fid(cfile, fid, oplock);
350
351         list_add(&cfile->tlist, &tcon->openFileList);
352         atomic_inc(&tcon->num_local_opens);
353
354         /* if readable file instance put first in list*/
355         spin_lock(&cinode->open_file_lock);
356         if (file->f_mode & FMODE_READ)
357                 list_add(&cfile->flist, &cinode->openFileList);
358         else
359                 list_add_tail(&cfile->flist, &cinode->openFileList);
360         spin_unlock(&cinode->open_file_lock);
361         spin_unlock(&tcon->open_file_lock);
362
363         if (fid->purge_cache)
364                 cifs_zap_mapping(inode);
365
366         file->private_data = cfile;
367         return cfile;
368 }
369
370 struct cifsFileInfo *
371 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
372 {
373         spin_lock(&cifs_file->file_info_lock);
374         cifsFileInfo_get_locked(cifs_file);
375         spin_unlock(&cifs_file->file_info_lock);
376         return cifs_file;
377 }
378
379 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
380 {
381         struct inode *inode = d_inode(cifs_file->dentry);
382         struct cifsInodeInfo *cifsi = CIFS_I(inode);
383         struct cifsLockInfo *li, *tmp;
384         struct super_block *sb = inode->i_sb;
385
386         /*
387          * Delete any outstanding lock records. We'll lose them when the file
388          * is closed anyway.
389          */
390         cifs_down_write(&cifsi->lock_sem);
391         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
392                 list_del(&li->llist);
393                 cifs_del_lock_waiters(li);
394                 kfree(li);
395         }
396         list_del(&cifs_file->llist->llist);
397         kfree(cifs_file->llist);
398         up_write(&cifsi->lock_sem);
399
400         cifs_put_tlink(cifs_file->tlink);
401         dput(cifs_file->dentry);
402         cifs_sb_deactive(sb);
403         kfree(cifs_file);
404 }
405
406 static void cifsFileInfo_put_work(struct work_struct *work)
407 {
408         struct cifsFileInfo *cifs_file = container_of(work,
409                         struct cifsFileInfo, put);
410
411         cifsFileInfo_put_final(cifs_file);
412 }
413
414 /**
415  * cifsFileInfo_put - release a reference of file priv data
416  *
417  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
418  */
419 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
420 {
421         _cifsFileInfo_put(cifs_file, true, true);
422 }
423
424 /**
425  * _cifsFileInfo_put - release a reference of file priv data
426  *
427  * This may involve closing the filehandle @cifs_file out on the
428  * server. Must be called without holding tcon->open_file_lock,
429  * cinode->open_file_lock and cifs_file->file_info_lock.
430  *
431  * If @wait_for_oplock_handler is true and we are releasing the last
432  * reference, wait for any running oplock break handler of the file
433  * and cancel any pending one. If calling this function from the
434  * oplock break handler, you need to pass false.
435  *
436  */
437 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
438                        bool wait_oplock_handler, bool offload)
439 {
440         struct inode *inode = d_inode(cifs_file->dentry);
441         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
442         struct TCP_Server_Info *server = tcon->ses->server;
443         struct cifsInodeInfo *cifsi = CIFS_I(inode);
444         struct super_block *sb = inode->i_sb;
445         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
446         struct cifs_fid fid;
447         struct cifs_pending_open open;
448         bool oplock_break_cancelled;
449
450         spin_lock(&tcon->open_file_lock);
451         spin_lock(&cifsi->open_file_lock);
452         spin_lock(&cifs_file->file_info_lock);
453         if (--cifs_file->count > 0) {
454                 spin_unlock(&cifs_file->file_info_lock);
455                 spin_unlock(&cifsi->open_file_lock);
456                 spin_unlock(&tcon->open_file_lock);
457                 return;
458         }
459         spin_unlock(&cifs_file->file_info_lock);
460
461         if (server->ops->get_lease_key)
462                 server->ops->get_lease_key(inode, &fid);
463
464         /* store open in pending opens to make sure we don't miss lease break */
465         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
466
467         /* remove it from the lists */
468         list_del(&cifs_file->flist);
469         list_del(&cifs_file->tlist);
470         atomic_dec(&tcon->num_local_opens);
471
472         if (list_empty(&cifsi->openFileList)) {
473                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
474                          d_inode(cifs_file->dentry));
475                 /*
476                  * In strict cache mode we need invalidate mapping on the last
477                  * close  because it may cause a error when we open this file
478                  * again and get at least level II oplock.
479                  */
480                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
481                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
482                 cifs_set_oplock_level(cifsi, 0);
483         }
484
485         spin_unlock(&cifsi->open_file_lock);
486         spin_unlock(&tcon->open_file_lock);
487
488         oplock_break_cancelled = wait_oplock_handler ?
489                 cancel_work_sync(&cifs_file->oplock_break) : false;
490
491         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
492                 struct TCP_Server_Info *server = tcon->ses->server;
493                 unsigned int xid;
494
495                 xid = get_xid();
496                 if (server->ops->close_getattr)
497                         server->ops->close_getattr(xid, tcon, cifs_file);
498                 else if (server->ops->close)
499                         server->ops->close(xid, tcon, &cifs_file->fid);
500                 _free_xid(xid);
501         }
502
503         if (oplock_break_cancelled)
504                 cifs_done_oplock_break(cifsi);
505
506         cifs_del_pending_open(&open);
507
508         if (offload)
509                 queue_work(fileinfo_put_wq, &cifs_file->put);
510         else
511                 cifsFileInfo_put_final(cifs_file);
512 }
513
514 int cifs_open(struct inode *inode, struct file *file)
515
516 {
517         int rc = -EACCES;
518         unsigned int xid;
519         __u32 oplock;
520         struct cifs_sb_info *cifs_sb;
521         struct TCP_Server_Info *server;
522         struct cifs_tcon *tcon;
523         struct tcon_link *tlink;
524         struct cifsFileInfo *cfile = NULL;
525         char *full_path = NULL;
526         bool posix_open_ok = false;
527         struct cifs_fid fid;
528         struct cifs_pending_open open;
529
530         xid = get_xid();
531
532         cifs_sb = CIFS_SB(inode->i_sb);
533         tlink = cifs_sb_tlink(cifs_sb);
534         if (IS_ERR(tlink)) {
535                 free_xid(xid);
536                 return PTR_ERR(tlink);
537         }
538         tcon = tlink_tcon(tlink);
539         server = tcon->ses->server;
540
541         full_path = build_path_from_dentry(file_dentry(file));
542         if (full_path == NULL) {
543                 rc = -ENOMEM;
544                 goto out;
545         }
546
547         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
548                  inode, file->f_flags, full_path);
549
550         if (file->f_flags & O_DIRECT &&
551             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
552                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
553                         file->f_op = &cifs_file_direct_nobrl_ops;
554                 else
555                         file->f_op = &cifs_file_direct_ops;
556         }
557
558         if (server->oplocks)
559                 oplock = REQ_OPLOCK;
560         else
561                 oplock = 0;
562
563         if (!tcon->broken_posix_open && tcon->unix_ext &&
564             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
565                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
566                 /* can not refresh inode info since size could be stale */
567                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
568                                 cifs_sb->mnt_file_mode /* ignored */,
569                                 file->f_flags, &oplock, &fid.netfid, xid);
570                 if (rc == 0) {
571                         cifs_dbg(FYI, "posix open succeeded\n");
572                         posix_open_ok = true;
573                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
574                         if (tcon->ses->serverNOS)
575                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
576                                          tcon->ses->serverName,
577                                          tcon->ses->serverNOS);
578                         tcon->broken_posix_open = true;
579                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
580                          (rc != -EOPNOTSUPP)) /* path not found or net err */
581                         goto out;
582                 /*
583                  * Else fallthrough to retry open the old way on network i/o
584                  * or DFS errors.
585                  */
586         }
587
588         if (server->ops->get_lease_key)
589                 server->ops->get_lease_key(inode, &fid);
590
591         cifs_add_pending_open(&fid, tlink, &open);
592
593         if (!posix_open_ok) {
594                 if (server->ops->get_lease_key)
595                         server->ops->get_lease_key(inode, &fid);
596
597                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
598                                   file->f_flags, &oplock, &fid, xid);
599                 if (rc) {
600                         cifs_del_pending_open(&open);
601                         goto out;
602                 }
603         }
604
605         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
606         if (cfile == NULL) {
607                 if (server->ops->close)
608                         server->ops->close(xid, tcon, &fid);
609                 cifs_del_pending_open(&open);
610                 rc = -ENOMEM;
611                 goto out;
612         }
613
614         cifs_fscache_set_inode_cookie(inode, file);
615
616         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
617                 /*
618                  * Time to set mode which we can not set earlier due to
619                  * problems creating new read-only files.
620                  */
621                 struct cifs_unix_set_info_args args = {
622                         .mode   = inode->i_mode,
623                         .uid    = INVALID_UID, /* no change */
624                         .gid    = INVALID_GID, /* no change */
625                         .ctime  = NO_CHANGE_64,
626                         .atime  = NO_CHANGE_64,
627                         .mtime  = NO_CHANGE_64,
628                         .device = 0,
629                 };
630                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
631                                        cfile->pid);
632         }
633
634 out:
635         kfree(full_path);
636         free_xid(xid);
637         cifs_put_tlink(tlink);
638         return rc;
639 }
640
641 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
642
643 /*
644  * Try to reacquire byte range locks that were released when session
645  * to server was lost.
646  */
647 static int
648 cifs_relock_file(struct cifsFileInfo *cfile)
649 {
650         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
651         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
652         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
653         int rc = 0;
654
655         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
656         if (cinode->can_cache_brlcks) {
657                 /* can cache locks - no need to relock */
658                 up_read(&cinode->lock_sem);
659                 return rc;
660         }
661
662         if (cap_unix(tcon->ses) &&
663             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
664             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
665                 rc = cifs_push_posix_locks(cfile);
666         else
667                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
668
669         up_read(&cinode->lock_sem);
670         return rc;
671 }
672
673 static int
674 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
675 {
676         int rc = -EACCES;
677         unsigned int xid;
678         __u32 oplock;
679         struct cifs_sb_info *cifs_sb;
680         struct cifs_tcon *tcon;
681         struct TCP_Server_Info *server;
682         struct cifsInodeInfo *cinode;
683         struct inode *inode;
684         char *full_path = NULL;
685         int desired_access;
686         int disposition = FILE_OPEN;
687         int create_options = CREATE_NOT_DIR;
688         struct cifs_open_parms oparms;
689
690         xid = get_xid();
691         mutex_lock(&cfile->fh_mutex);
692         if (!cfile->invalidHandle) {
693                 mutex_unlock(&cfile->fh_mutex);
694                 rc = 0;
695                 free_xid(xid);
696                 return rc;
697         }
698
699         inode = d_inode(cfile->dentry);
700         cifs_sb = CIFS_SB(inode->i_sb);
701         tcon = tlink_tcon(cfile->tlink);
702         server = tcon->ses->server;
703
704         /*
705          * Can not grab rename sem here because various ops, including those
706          * that already have the rename sem can end up causing writepage to get
707          * called and if the server was down that means we end up here, and we
708          * can never tell if the caller already has the rename_sem.
709          */
710         full_path = build_path_from_dentry(cfile->dentry);
711         if (full_path == NULL) {
712                 rc = -ENOMEM;
713                 mutex_unlock(&cfile->fh_mutex);
714                 free_xid(xid);
715                 return rc;
716         }
717
718         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
719                  inode, cfile->f_flags, full_path);
720
721         if (tcon->ses->server->oplocks)
722                 oplock = REQ_OPLOCK;
723         else
724                 oplock = 0;
725
726         if (tcon->unix_ext && cap_unix(tcon->ses) &&
727             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
728                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
729                 /*
730                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
731                  * original open. Must mask them off for a reopen.
732                  */
733                 unsigned int oflags = cfile->f_flags &
734                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
735
736                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
737                                      cifs_sb->mnt_file_mode /* ignored */,
738                                      oflags, &oplock, &cfile->fid.netfid, xid);
739                 if (rc == 0) {
740                         cifs_dbg(FYI, "posix reopen succeeded\n");
741                         oparms.reconnect = true;
742                         goto reopen_success;
743                 }
744                 /*
745                  * fallthrough to retry open the old way on errors, especially
746                  * in the reconnect path it is important to retry hard
747                  */
748         }
749
750         desired_access = cifs_convert_flags(cfile->f_flags);
751
752         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
753         if (cfile->f_flags & O_SYNC)
754                 create_options |= CREATE_WRITE_THROUGH;
755
756         if (cfile->f_flags & O_DIRECT)
757                 create_options |= CREATE_NO_BUFFER;
758
759         if (server->ops->get_lease_key)
760                 server->ops->get_lease_key(inode, &cfile->fid);
761
762         oparms.tcon = tcon;
763         oparms.cifs_sb = cifs_sb;
764         oparms.desired_access = desired_access;
765         oparms.create_options = cifs_create_options(cifs_sb, create_options);
766         oparms.disposition = disposition;
767         oparms.path = full_path;
768         oparms.fid = &cfile->fid;
769         oparms.reconnect = true;
770
771         /*
772          * Can not refresh inode by passing in file_info buf to be returned by
773          * ops->open and then calling get_inode_info with returned buf since
774          * file might have write behind data that needs to be flushed and server
775          * version of file size can be stale. If we knew for sure that inode was
776          * not dirty locally we could do this.
777          */
778         rc = server->ops->open(xid, &oparms, &oplock, NULL);
779         if (rc == -ENOENT && oparms.reconnect == false) {
780                 /* durable handle timeout is expired - open the file again */
781                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
782                 /* indicate that we need to relock the file */
783                 oparms.reconnect = true;
784         }
785
786         if (rc) {
787                 mutex_unlock(&cfile->fh_mutex);
788                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
789                 cifs_dbg(FYI, "oplock: %d\n", oplock);
790                 goto reopen_error_exit;
791         }
792
793 reopen_success:
794         cfile->invalidHandle = false;
795         mutex_unlock(&cfile->fh_mutex);
796         cinode = CIFS_I(inode);
797
798         if (can_flush) {
799                 rc = filemap_write_and_wait(inode->i_mapping);
800                 if (!is_interrupt_error(rc))
801                         mapping_set_error(inode->i_mapping, rc);
802
803                 if (tcon->unix_ext)
804                         rc = cifs_get_inode_info_unix(&inode, full_path,
805                                                       inode->i_sb, xid);
806                 else
807                         rc = cifs_get_inode_info(&inode, full_path, NULL,
808                                                  inode->i_sb, xid, NULL);
809         }
810         /*
811          * Else we are writing out data to server already and could deadlock if
812          * we tried to flush data, and since we do not know if we have data that
813          * would invalidate the current end of file on the server we can not go
814          * to the server to get the new inode info.
815          */
816
817         /*
818          * If the server returned a read oplock and we have mandatory brlocks,
819          * set oplock level to None.
820          */
821         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
822                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
823                 oplock = 0;
824         }
825
826         server->ops->set_fid(cfile, &cfile->fid, oplock);
827         if (oparms.reconnect)
828                 cifs_relock_file(cfile);
829
830 reopen_error_exit:
831         kfree(full_path);
832         free_xid(xid);
833         return rc;
834 }
835
836 int cifs_close(struct inode *inode, struct file *file)
837 {
838         if (file->private_data != NULL) {
839                 _cifsFileInfo_put(file->private_data, true, false);
840                 file->private_data = NULL;
841         }
842
843         /* return code from the ->release op is always ignored */
844         return 0;
845 }
846
847 void
848 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
849 {
850         struct cifsFileInfo *open_file;
851         struct list_head *tmp;
852         struct list_head *tmp1;
853         struct list_head tmp_list;
854
855         if (!tcon->use_persistent || !tcon->need_reopen_files)
856                 return;
857
858         tcon->need_reopen_files = false;
859
860         cifs_dbg(FYI, "Reopen persistent handles");
861         INIT_LIST_HEAD(&tmp_list);
862
863         /* list all files open on tree connection, reopen resilient handles  */
864         spin_lock(&tcon->open_file_lock);
865         list_for_each(tmp, &tcon->openFileList) {
866                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
867                 if (!open_file->invalidHandle)
868                         continue;
869                 cifsFileInfo_get(open_file);
870                 list_add_tail(&open_file->rlist, &tmp_list);
871         }
872         spin_unlock(&tcon->open_file_lock);
873
874         list_for_each_safe(tmp, tmp1, &tmp_list) {
875                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
876                 if (cifs_reopen_file(open_file, false /* do not flush */))
877                         tcon->need_reopen_files = true;
878                 list_del_init(&open_file->rlist);
879                 cifsFileInfo_put(open_file);
880         }
881 }
882
883 int cifs_closedir(struct inode *inode, struct file *file)
884 {
885         int rc = 0;
886         unsigned int xid;
887         struct cifsFileInfo *cfile = file->private_data;
888         struct cifs_tcon *tcon;
889         struct TCP_Server_Info *server;
890         char *buf;
891
892         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
893
894         if (cfile == NULL)
895                 return rc;
896
897         xid = get_xid();
898         tcon = tlink_tcon(cfile->tlink);
899         server = tcon->ses->server;
900
901         cifs_dbg(FYI, "Freeing private data in close dir\n");
902         spin_lock(&cfile->file_info_lock);
903         if (server->ops->dir_needs_close(cfile)) {
904                 cfile->invalidHandle = true;
905                 spin_unlock(&cfile->file_info_lock);
906                 if (server->ops->close_dir)
907                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
908                 else
909                         rc = -ENOSYS;
910                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
911                 /* not much we can do if it fails anyway, ignore rc */
912                 rc = 0;
913         } else
914                 spin_unlock(&cfile->file_info_lock);
915
916         buf = cfile->srch_inf.ntwrk_buf_start;
917         if (buf) {
918                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
919                 cfile->srch_inf.ntwrk_buf_start = NULL;
920                 if (cfile->srch_inf.smallBuf)
921                         cifs_small_buf_release(buf);
922                 else
923                         cifs_buf_release(buf);
924         }
925
926         cifs_put_tlink(cfile->tlink);
927         kfree(file->private_data);
928         file->private_data = NULL;
929         /* BB can we lock the filestruct while this is going on? */
930         free_xid(xid);
931         return rc;
932 }
933
934 static struct cifsLockInfo *
935 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
936 {
937         struct cifsLockInfo *lock =
938                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
939         if (!lock)
940                 return lock;
941         lock->offset = offset;
942         lock->length = length;
943         lock->type = type;
944         lock->pid = current->tgid;
945         lock->flags = flags;
946         INIT_LIST_HEAD(&lock->blist);
947         init_waitqueue_head(&lock->block_q);
948         return lock;
949 }
950
951 void
952 cifs_del_lock_waiters(struct cifsLockInfo *lock)
953 {
954         struct cifsLockInfo *li, *tmp;
955         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
956                 list_del_init(&li->blist);
957                 wake_up(&li->block_q);
958         }
959 }
960
961 #define CIFS_LOCK_OP    0
962 #define CIFS_READ_OP    1
963 #define CIFS_WRITE_OP   2
964
965 /* @rw_check : 0 - no op, 1 - read, 2 - write */
966 static bool
967 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
968                             __u64 length, __u8 type, __u16 flags,
969                             struct cifsFileInfo *cfile,
970                             struct cifsLockInfo **conf_lock, int rw_check)
971 {
972         struct cifsLockInfo *li;
973         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
974         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
975
976         list_for_each_entry(li, &fdlocks->locks, llist) {
977                 if (offset + length <= li->offset ||
978                     offset >= li->offset + li->length)
979                         continue;
980                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
981                     server->ops->compare_fids(cfile, cur_cfile)) {
982                         /* shared lock prevents write op through the same fid */
983                         if (!(li->type & server->vals->shared_lock_type) ||
984                             rw_check != CIFS_WRITE_OP)
985                                 continue;
986                 }
987                 if ((type & server->vals->shared_lock_type) &&
988                     ((server->ops->compare_fids(cfile, cur_cfile) &&
989                      current->tgid == li->pid) || type == li->type))
990                         continue;
991                 if (rw_check == CIFS_LOCK_OP &&
992                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
993                     server->ops->compare_fids(cfile, cur_cfile))
994                         continue;
995                 if (conf_lock)
996                         *conf_lock = li;
997                 return true;
998         }
999         return false;
1000 }
1001
1002 bool
1003 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1004                         __u8 type, __u16 flags,
1005                         struct cifsLockInfo **conf_lock, int rw_check)
1006 {
1007         bool rc = false;
1008         struct cifs_fid_locks *cur;
1009         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1010
1011         list_for_each_entry(cur, &cinode->llist, llist) {
1012                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1013                                                  flags, cfile, conf_lock,
1014                                                  rw_check);
1015                 if (rc)
1016                         break;
1017         }
1018
1019         return rc;
1020 }
1021
1022 /*
1023  * Check if there is another lock that prevents us to set the lock (mandatory
1024  * style). If such a lock exists, update the flock structure with its
1025  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1026  * or leave it the same if we can't. Returns 0 if we don't need to request to
1027  * the server or 1 otherwise.
1028  */
1029 static int
1030 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1031                __u8 type, struct file_lock *flock)
1032 {
1033         int rc = 0;
1034         struct cifsLockInfo *conf_lock;
1035         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1036         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1037         bool exist;
1038
1039         down_read(&cinode->lock_sem);
1040
1041         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1042                                         flock->fl_flags, &conf_lock,
1043                                         CIFS_LOCK_OP);
1044         if (exist) {
1045                 flock->fl_start = conf_lock->offset;
1046                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1047                 flock->fl_pid = conf_lock->pid;
1048                 if (conf_lock->type & server->vals->shared_lock_type)
1049                         flock->fl_type = F_RDLCK;
1050                 else
1051                         flock->fl_type = F_WRLCK;
1052         } else if (!cinode->can_cache_brlcks)
1053                 rc = 1;
1054         else
1055                 flock->fl_type = F_UNLCK;
1056
1057         up_read(&cinode->lock_sem);
1058         return rc;
1059 }
1060
1061 static void
1062 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1063 {
1064         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1065         cifs_down_write(&cinode->lock_sem);
1066         list_add_tail(&lock->llist, &cfile->llist->locks);
1067         up_write(&cinode->lock_sem);
1068 }
1069
1070 /*
1071  * Set the byte-range lock (mandatory style). Returns:
1072  * 1) 0, if we set the lock and don't need to request to the server;
1073  * 2) 1, if no locks prevent us but we need to request to the server;
1074  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1075  */
1076 static int
1077 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1078                  bool wait)
1079 {
1080         struct cifsLockInfo *conf_lock;
1081         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1082         bool exist;
1083         int rc = 0;
1084
1085 try_again:
1086         exist = false;
1087         cifs_down_write(&cinode->lock_sem);
1088
1089         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1090                                         lock->type, lock->flags, &conf_lock,
1091                                         CIFS_LOCK_OP);
1092         if (!exist && cinode->can_cache_brlcks) {
1093                 list_add_tail(&lock->llist, &cfile->llist->locks);
1094                 up_write(&cinode->lock_sem);
1095                 return rc;
1096         }
1097
1098         if (!exist)
1099                 rc = 1;
1100         else if (!wait)
1101                 rc = -EACCES;
1102         else {
1103                 list_add_tail(&lock->blist, &conf_lock->blist);
1104                 up_write(&cinode->lock_sem);
1105                 rc = wait_event_interruptible(lock->block_q,
1106                                         (lock->blist.prev == &lock->blist) &&
1107                                         (lock->blist.next == &lock->blist));
1108                 if (!rc)
1109                         goto try_again;
1110                 cifs_down_write(&cinode->lock_sem);
1111                 list_del_init(&lock->blist);
1112         }
1113
1114         up_write(&cinode->lock_sem);
1115         return rc;
1116 }
1117
1118 /*
1119  * Check if there is another lock that prevents us to set the lock (posix
1120  * style). If such a lock exists, update the flock structure with its
1121  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1122  * or leave it the same if we can't. Returns 0 if we don't need to request to
1123  * the server or 1 otherwise.
1124  */
1125 static int
1126 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1127 {
1128         int rc = 0;
1129         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1130         unsigned char saved_type = flock->fl_type;
1131
1132         if ((flock->fl_flags & FL_POSIX) == 0)
1133                 return 1;
1134
1135         down_read(&cinode->lock_sem);
1136         posix_test_lock(file, flock);
1137
1138         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1139                 flock->fl_type = saved_type;
1140                 rc = 1;
1141         }
1142
1143         up_read(&cinode->lock_sem);
1144         return rc;
1145 }
1146
1147 /*
1148  * Set the byte-range lock (posix style). Returns:
1149  * 1) 0, if we set the lock and don't need to request to the server;
1150  * 2) 1, if we need to request to the server;
1151  * 3) <0, if the error occurs while setting the lock.
1152  */
1153 static int
1154 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1155 {
1156         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1157         int rc = 1;
1158
1159         if ((flock->fl_flags & FL_POSIX) == 0)
1160                 return rc;
1161
1162 try_again:
1163         cifs_down_write(&cinode->lock_sem);
1164         if (!cinode->can_cache_brlcks) {
1165                 up_write(&cinode->lock_sem);
1166                 return rc;
1167         }
1168
1169         rc = posix_lock_file(file, flock, NULL);
1170         up_write(&cinode->lock_sem);
1171         if (rc == FILE_LOCK_DEFERRED) {
1172                 rc = wait_event_interruptible(flock->fl_wait,
1173                                         list_empty(&flock->fl_blocked_member));
1174                 if (!rc)
1175                         goto try_again;
1176                 locks_delete_block(flock);
1177         }
1178         return rc;
1179 }
1180
1181 int
1182 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1183 {
1184         unsigned int xid;
1185         int rc = 0, stored_rc;
1186         struct cifsLockInfo *li, *tmp;
1187         struct cifs_tcon *tcon;
1188         unsigned int num, max_num, max_buf;
1189         LOCKING_ANDX_RANGE *buf, *cur;
1190         static const int types[] = {
1191                 LOCKING_ANDX_LARGE_FILES,
1192                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1193         };
1194         int i;
1195
1196         xid = get_xid();
1197         tcon = tlink_tcon(cfile->tlink);
1198
1199         /*
1200          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1201          * and check it before using.
1202          */
1203         max_buf = tcon->ses->server->maxBuf;
1204         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1205                 free_xid(xid);
1206                 return -EINVAL;
1207         }
1208
1209         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1210                      PAGE_SIZE);
1211         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1212                         PAGE_SIZE);
1213         max_num = (max_buf - sizeof(struct smb_hdr)) /
1214                                                 sizeof(LOCKING_ANDX_RANGE);
1215         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1216         if (!buf) {
1217                 free_xid(xid);
1218                 return -ENOMEM;
1219         }
1220
1221         for (i = 0; i < 2; i++) {
1222                 cur = buf;
1223                 num = 0;
1224                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1225                         if (li->type != types[i])
1226                                 continue;
1227                         cur->Pid = cpu_to_le16(li->pid);
1228                         cur->LengthLow = cpu_to_le32((u32)li->length);
1229                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1230                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1231                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1232                         if (++num == max_num) {
1233                                 stored_rc = cifs_lockv(xid, tcon,
1234                                                        cfile->fid.netfid,
1235                                                        (__u8)li->type, 0, num,
1236                                                        buf);
1237                                 if (stored_rc)
1238                                         rc = stored_rc;
1239                                 cur = buf;
1240                                 num = 0;
1241                         } else
1242                                 cur++;
1243                 }
1244
1245                 if (num) {
1246                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1247                                                (__u8)types[i], 0, num, buf);
1248                         if (stored_rc)
1249                                 rc = stored_rc;
1250                 }
1251         }
1252
1253         kfree(buf);
1254         free_xid(xid);
1255         return rc;
1256 }
1257
1258 static __u32
1259 hash_lockowner(fl_owner_t owner)
1260 {
1261         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1262 }
1263
1264 struct lock_to_push {
1265         struct list_head llist;
1266         __u64 offset;
1267         __u64 length;
1268         __u32 pid;
1269         __u16 netfid;
1270         __u8 type;
1271 };
1272
1273 static int
1274 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1275 {
1276         struct inode *inode = d_inode(cfile->dentry);
1277         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1278         struct file_lock *flock;
1279         struct file_lock_context *flctx = inode->i_flctx;
1280         unsigned int count = 0, i;
1281         int rc = 0, xid, type;
1282         struct list_head locks_to_send, *el;
1283         struct lock_to_push *lck, *tmp;
1284         __u64 length;
1285
1286         xid = get_xid();
1287
1288         if (!flctx)
1289                 goto out;
1290
1291         spin_lock(&flctx->flc_lock);
1292         list_for_each(el, &flctx->flc_posix) {
1293                 count++;
1294         }
1295         spin_unlock(&flctx->flc_lock);
1296
1297         INIT_LIST_HEAD(&locks_to_send);
1298
1299         /*
1300          * Allocating count locks is enough because no FL_POSIX locks can be
1301          * added to the list while we are holding cinode->lock_sem that
1302          * protects locking operations of this inode.
1303          */
1304         for (i = 0; i < count; i++) {
1305                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1306                 if (!lck) {
1307                         rc = -ENOMEM;
1308                         goto err_out;
1309                 }
1310                 list_add_tail(&lck->llist, &locks_to_send);
1311         }
1312
1313         el = locks_to_send.next;
1314         spin_lock(&flctx->flc_lock);
1315         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1316                 if (el == &locks_to_send) {
1317                         /*
1318                          * The list ended. We don't have enough allocated
1319                          * structures - something is really wrong.
1320                          */
1321                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1322                         break;
1323                 }
1324                 length = 1 + flock->fl_end - flock->fl_start;
1325                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1326                         type = CIFS_RDLCK;
1327                 else
1328                         type = CIFS_WRLCK;
1329                 lck = list_entry(el, struct lock_to_push, llist);
1330                 lck->pid = hash_lockowner(flock->fl_owner);
1331                 lck->netfid = cfile->fid.netfid;
1332                 lck->length = length;
1333                 lck->type = type;
1334                 lck->offset = flock->fl_start;
1335         }
1336         spin_unlock(&flctx->flc_lock);
1337
1338         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1339                 int stored_rc;
1340
1341                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1342                                              lck->offset, lck->length, NULL,
1343                                              lck->type, 0);
1344                 if (stored_rc)
1345                         rc = stored_rc;
1346                 list_del(&lck->llist);
1347                 kfree(lck);
1348         }
1349
1350 out:
1351         free_xid(xid);
1352         return rc;
1353 err_out:
1354         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1355                 list_del(&lck->llist);
1356                 kfree(lck);
1357         }
1358         goto out;
1359 }
1360
1361 static int
1362 cifs_push_locks(struct cifsFileInfo *cfile)
1363 {
1364         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1365         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1366         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1367         int rc = 0;
1368
1369         /* we are going to update can_cache_brlcks here - need a write access */
1370         cifs_down_write(&cinode->lock_sem);
1371         if (!cinode->can_cache_brlcks) {
1372                 up_write(&cinode->lock_sem);
1373                 return rc;
1374         }
1375
1376         if (cap_unix(tcon->ses) &&
1377             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1378             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1379                 rc = cifs_push_posix_locks(cfile);
1380         else
1381                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1382
1383         cinode->can_cache_brlcks = false;
1384         up_write(&cinode->lock_sem);
1385         return rc;
1386 }
1387
1388 static void
1389 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1390                 bool *wait_flag, struct TCP_Server_Info *server)
1391 {
1392         if (flock->fl_flags & FL_POSIX)
1393                 cifs_dbg(FYI, "Posix\n");
1394         if (flock->fl_flags & FL_FLOCK)
1395                 cifs_dbg(FYI, "Flock\n");
1396         if (flock->fl_flags & FL_SLEEP) {
1397                 cifs_dbg(FYI, "Blocking lock\n");
1398                 *wait_flag = true;
1399         }
1400         if (flock->fl_flags & FL_ACCESS)
1401                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1402         if (flock->fl_flags & FL_LEASE)
1403                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1404         if (flock->fl_flags &
1405             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1406                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1407                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1408
1409         *type = server->vals->large_lock_type;
1410         if (flock->fl_type == F_WRLCK) {
1411                 cifs_dbg(FYI, "F_WRLCK\n");
1412                 *type |= server->vals->exclusive_lock_type;
1413                 *lock = 1;
1414         } else if (flock->fl_type == F_UNLCK) {
1415                 cifs_dbg(FYI, "F_UNLCK\n");
1416                 *type |= server->vals->unlock_lock_type;
1417                 *unlock = 1;
1418                 /* Check if unlock includes more than one lock range */
1419         } else if (flock->fl_type == F_RDLCK) {
1420                 cifs_dbg(FYI, "F_RDLCK\n");
1421                 *type |= server->vals->shared_lock_type;
1422                 *lock = 1;
1423         } else if (flock->fl_type == F_EXLCK) {
1424                 cifs_dbg(FYI, "F_EXLCK\n");
1425                 *type |= server->vals->exclusive_lock_type;
1426                 *lock = 1;
1427         } else if (flock->fl_type == F_SHLCK) {
1428                 cifs_dbg(FYI, "F_SHLCK\n");
1429                 *type |= server->vals->shared_lock_type;
1430                 *lock = 1;
1431         } else
1432                 cifs_dbg(FYI, "Unknown type of lock\n");
1433 }
1434
1435 static int
1436 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1437            bool wait_flag, bool posix_lck, unsigned int xid)
1438 {
1439         int rc = 0;
1440         __u64 length = 1 + flock->fl_end - flock->fl_start;
1441         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1442         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1443         struct TCP_Server_Info *server = tcon->ses->server;
1444         __u16 netfid = cfile->fid.netfid;
1445
1446         if (posix_lck) {
1447                 int posix_lock_type;
1448
1449                 rc = cifs_posix_lock_test(file, flock);
1450                 if (!rc)
1451                         return rc;
1452
1453                 if (type & server->vals->shared_lock_type)
1454                         posix_lock_type = CIFS_RDLCK;
1455                 else
1456                         posix_lock_type = CIFS_WRLCK;
1457                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1458                                       hash_lockowner(flock->fl_owner),
1459                                       flock->fl_start, length, flock,
1460                                       posix_lock_type, wait_flag);
1461                 return rc;
1462         }
1463
1464         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1465         if (!rc)
1466                 return rc;
1467
1468         /* BB we could chain these into one lock request BB */
1469         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1470                                     1, 0, false);
1471         if (rc == 0) {
1472                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1473                                             type, 0, 1, false);
1474                 flock->fl_type = F_UNLCK;
1475                 if (rc != 0)
1476                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1477                                  rc);
1478                 return 0;
1479         }
1480
1481         if (type & server->vals->shared_lock_type) {
1482                 flock->fl_type = F_WRLCK;
1483                 return 0;
1484         }
1485
1486         type &= ~server->vals->exclusive_lock_type;
1487
1488         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1489                                     type | server->vals->shared_lock_type,
1490                                     1, 0, false);
1491         if (rc == 0) {
1492                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1493                         type | server->vals->shared_lock_type, 0, 1, false);
1494                 flock->fl_type = F_RDLCK;
1495                 if (rc != 0)
1496                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1497                                  rc);
1498         } else
1499                 flock->fl_type = F_WRLCK;
1500
1501         return 0;
1502 }
1503
1504 void
1505 cifs_move_llist(struct list_head *source, struct list_head *dest)
1506 {
1507         struct list_head *li, *tmp;
1508         list_for_each_safe(li, tmp, source)
1509                 list_move(li, dest);
1510 }
1511
1512 void
1513 cifs_free_llist(struct list_head *llist)
1514 {
1515         struct cifsLockInfo *li, *tmp;
1516         list_for_each_entry_safe(li, tmp, llist, llist) {
1517                 cifs_del_lock_waiters(li);
1518                 list_del(&li->llist);
1519                 kfree(li);
1520         }
1521 }
1522
1523 int
1524 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1525                   unsigned int xid)
1526 {
1527         int rc = 0, stored_rc;
1528         static const int types[] = {
1529                 LOCKING_ANDX_LARGE_FILES,
1530                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1531         };
1532         unsigned int i;
1533         unsigned int max_num, num, max_buf;
1534         LOCKING_ANDX_RANGE *buf, *cur;
1535         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1536         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1537         struct cifsLockInfo *li, *tmp;
1538         __u64 length = 1 + flock->fl_end - flock->fl_start;
1539         struct list_head tmp_llist;
1540
1541         INIT_LIST_HEAD(&tmp_llist);
1542
1543         /*
1544          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1545          * and check it before using.
1546          */
1547         max_buf = tcon->ses->server->maxBuf;
1548         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1549                 return -EINVAL;
1550
1551         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1552                      PAGE_SIZE);
1553         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1554                         PAGE_SIZE);
1555         max_num = (max_buf - sizeof(struct smb_hdr)) /
1556                                                 sizeof(LOCKING_ANDX_RANGE);
1557         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1558         if (!buf)
1559                 return -ENOMEM;
1560
1561         cifs_down_write(&cinode->lock_sem);
1562         for (i = 0; i < 2; i++) {
1563                 cur = buf;
1564                 num = 0;
1565                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1566                         if (flock->fl_start > li->offset ||
1567                             (flock->fl_start + length) <
1568                             (li->offset + li->length))
1569                                 continue;
1570                         if (current->tgid != li->pid)
1571                                 continue;
1572                         if (types[i] != li->type)
1573                                 continue;
1574                         if (cinode->can_cache_brlcks) {
1575                                 /*
1576                                  * We can cache brlock requests - simply remove
1577                                  * a lock from the file's list.
1578                                  */
1579                                 list_del(&li->llist);
1580                                 cifs_del_lock_waiters(li);
1581                                 kfree(li);
1582                                 continue;
1583                         }
1584                         cur->Pid = cpu_to_le16(li->pid);
1585                         cur->LengthLow = cpu_to_le32((u32)li->length);
1586                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1587                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1588                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1589                         /*
1590                          * We need to save a lock here to let us add it again to
1591                          * the file's list if the unlock range request fails on
1592                          * the server.
1593                          */
1594                         list_move(&li->llist, &tmp_llist);
1595                         if (++num == max_num) {
1596                                 stored_rc = cifs_lockv(xid, tcon,
1597                                                        cfile->fid.netfid,
1598                                                        li->type, num, 0, buf);
1599                                 if (stored_rc) {
1600                                         /*
1601                                          * We failed on the unlock range
1602                                          * request - add all locks from the tmp
1603                                          * list to the head of the file's list.
1604                                          */
1605                                         cifs_move_llist(&tmp_llist,
1606                                                         &cfile->llist->locks);
1607                                         rc = stored_rc;
1608                                 } else
1609                                         /*
1610                                          * The unlock range request succeed -
1611                                          * free the tmp list.
1612                                          */
1613                                         cifs_free_llist(&tmp_llist);
1614                                 cur = buf;
1615                                 num = 0;
1616                         } else
1617                                 cur++;
1618                 }
1619                 if (num) {
1620                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1621                                                types[i], num, 0, buf);
1622                         if (stored_rc) {
1623                                 cifs_move_llist(&tmp_llist,
1624                                                 &cfile->llist->locks);
1625                                 rc = stored_rc;
1626                         } else
1627                                 cifs_free_llist(&tmp_llist);
1628                 }
1629         }
1630
1631         up_write(&cinode->lock_sem);
1632         kfree(buf);
1633         return rc;
1634 }
1635
1636 static int
1637 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1638            bool wait_flag, bool posix_lck, int lock, int unlock,
1639            unsigned int xid)
1640 {
1641         int rc = 0;
1642         __u64 length = 1 + flock->fl_end - flock->fl_start;
1643         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1644         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1645         struct TCP_Server_Info *server = tcon->ses->server;
1646         struct inode *inode = d_inode(cfile->dentry);
1647
1648         if (posix_lck) {
1649                 int posix_lock_type;
1650
1651                 rc = cifs_posix_lock_set(file, flock);
1652                 if (!rc || rc < 0)
1653                         return rc;
1654
1655                 if (type & server->vals->shared_lock_type)
1656                         posix_lock_type = CIFS_RDLCK;
1657                 else
1658                         posix_lock_type = CIFS_WRLCK;
1659
1660                 if (unlock == 1)
1661                         posix_lock_type = CIFS_UNLCK;
1662
1663                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1664                                       hash_lockowner(flock->fl_owner),
1665                                       flock->fl_start, length,
1666                                       NULL, posix_lock_type, wait_flag);
1667                 goto out;
1668         }
1669
1670         if (lock) {
1671                 struct cifsLockInfo *lock;
1672
1673                 lock = cifs_lock_init(flock->fl_start, length, type,
1674                                       flock->fl_flags);
1675                 if (!lock)
1676                         return -ENOMEM;
1677
1678                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1679                 if (rc < 0) {
1680                         kfree(lock);
1681                         return rc;
1682                 }
1683                 if (!rc)
1684                         goto out;
1685
1686                 /*
1687                  * Windows 7 server can delay breaking lease from read to None
1688                  * if we set a byte-range lock on a file - break it explicitly
1689                  * before sending the lock to the server to be sure the next
1690                  * read won't conflict with non-overlapted locks due to
1691                  * pagereading.
1692                  */
1693                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1694                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1695                         cifs_zap_mapping(inode);
1696                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1697                                  inode);
1698                         CIFS_I(inode)->oplock = 0;
1699                 }
1700
1701                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1702                                             type, 1, 0, wait_flag);
1703                 if (rc) {
1704                         kfree(lock);
1705                         return rc;
1706                 }
1707
1708                 cifs_lock_add(cfile, lock);
1709         } else if (unlock)
1710                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1711
1712 out:
1713         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1714                 /*
1715                  * If this is a request to remove all locks because we
1716                  * are closing the file, it doesn't matter if the
1717                  * unlocking failed as both cifs.ko and the SMB server
1718                  * remove the lock on file close
1719                  */
1720                 if (rc) {
1721                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1722                         if (!(flock->fl_flags & FL_CLOSE))
1723                                 return rc;
1724                 }
1725                 rc = locks_lock_file_wait(file, flock);
1726         }
1727         return rc;
1728 }
1729
1730 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1731 {
1732         int rc, xid;
1733         int lock = 0, unlock = 0;
1734         bool wait_flag = false;
1735         bool posix_lck = false;
1736         struct cifs_sb_info *cifs_sb;
1737         struct cifs_tcon *tcon;
1738         struct cifsFileInfo *cfile;
1739         __u32 type;
1740
1741         rc = -EACCES;
1742         xid = get_xid();
1743
1744         if (!(fl->fl_flags & FL_FLOCK))
1745                 return -ENOLCK;
1746
1747         cfile = (struct cifsFileInfo *)file->private_data;
1748         tcon = tlink_tcon(cfile->tlink);
1749
1750         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1751                         tcon->ses->server);
1752         cifs_sb = CIFS_FILE_SB(file);
1753
1754         if (cap_unix(tcon->ses) &&
1755             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1756             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1757                 posix_lck = true;
1758
1759         if (!lock && !unlock) {
1760                 /*
1761                  * if no lock or unlock then nothing to do since we do not
1762                  * know what it is
1763                  */
1764                 free_xid(xid);
1765                 return -EOPNOTSUPP;
1766         }
1767
1768         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1769                         xid);
1770         free_xid(xid);
1771         return rc;
1772
1773
1774 }
1775
1776 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1777 {
1778         int rc, xid;
1779         int lock = 0, unlock = 0;
1780         bool wait_flag = false;
1781         bool posix_lck = false;
1782         struct cifs_sb_info *cifs_sb;
1783         struct cifs_tcon *tcon;
1784         struct cifsFileInfo *cfile;
1785         __u32 type;
1786
1787         rc = -EACCES;
1788         xid = get_xid();
1789
1790         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1791                  cmd, flock->fl_flags, flock->fl_type,
1792                  flock->fl_start, flock->fl_end);
1793
1794         cfile = (struct cifsFileInfo *)file->private_data;
1795         tcon = tlink_tcon(cfile->tlink);
1796
1797         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1798                         tcon->ses->server);
1799         cifs_sb = CIFS_FILE_SB(file);
1800
1801         if (cap_unix(tcon->ses) &&
1802             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1803             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1804                 posix_lck = true;
1805         /*
1806          * BB add code here to normalize offset and length to account for
1807          * negative length which we can not accept over the wire.
1808          */
1809         if (IS_GETLK(cmd)) {
1810                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1811                 free_xid(xid);
1812                 return rc;
1813         }
1814
1815         if (!lock && !unlock) {
1816                 /*
1817                  * if no lock or unlock then nothing to do since we do not
1818                  * know what it is
1819                  */
1820                 free_xid(xid);
1821                 return -EOPNOTSUPP;
1822         }
1823
1824         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1825                         xid);
1826         free_xid(xid);
1827         return rc;
1828 }
1829
1830 /*
1831  * update the file size (if needed) after a write. Should be called with
1832  * the inode->i_lock held
1833  */
1834 void
1835 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1836                       unsigned int bytes_written)
1837 {
1838         loff_t end_of_write = offset + bytes_written;
1839
1840         if (end_of_write > cifsi->server_eof)
1841                 cifsi->server_eof = end_of_write;
1842 }
1843
1844 static ssize_t
1845 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1846            size_t write_size, loff_t *offset)
1847 {
1848         int rc = 0;
1849         unsigned int bytes_written = 0;
1850         unsigned int total_written;
1851         struct cifs_tcon *tcon;
1852         struct TCP_Server_Info *server;
1853         unsigned int xid;
1854         struct dentry *dentry = open_file->dentry;
1855         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1856         struct cifs_io_parms io_parms;
1857
1858         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1859                  write_size, *offset, dentry);
1860
1861         tcon = tlink_tcon(open_file->tlink);
1862         server = tcon->ses->server;
1863
1864         if (!server->ops->sync_write)
1865                 return -ENOSYS;
1866
1867         xid = get_xid();
1868
1869         for (total_written = 0; write_size > total_written;
1870              total_written += bytes_written) {
1871                 rc = -EAGAIN;
1872                 while (rc == -EAGAIN) {
1873                         struct kvec iov[2];
1874                         unsigned int len;
1875
1876                         if (open_file->invalidHandle) {
1877                                 /* we could deadlock if we called
1878                                    filemap_fdatawait from here so tell
1879                                    reopen_file not to flush data to
1880                                    server now */
1881                                 rc = cifs_reopen_file(open_file, false);
1882                                 if (rc != 0)
1883                                         break;
1884                         }
1885
1886                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1887                                   (unsigned int)write_size - total_written);
1888                         /* iov[0] is reserved for smb header */
1889                         iov[1].iov_base = (char *)write_data + total_written;
1890                         iov[1].iov_len = len;
1891                         io_parms.pid = pid;
1892                         io_parms.tcon = tcon;
1893                         io_parms.offset = *offset;
1894                         io_parms.length = len;
1895                         rc = server->ops->sync_write(xid, &open_file->fid,
1896                                         &io_parms, &bytes_written, iov, 1);
1897                 }
1898                 if (rc || (bytes_written == 0)) {
1899                         if (total_written)
1900                                 break;
1901                         else {
1902                                 free_xid(xid);
1903                                 return rc;
1904                         }
1905                 } else {
1906                         spin_lock(&d_inode(dentry)->i_lock);
1907                         cifs_update_eof(cifsi, *offset, bytes_written);
1908                         spin_unlock(&d_inode(dentry)->i_lock);
1909                         *offset += bytes_written;
1910                 }
1911         }
1912
1913         cifs_stats_bytes_written(tcon, total_written);
1914
1915         if (total_written > 0) {
1916                 spin_lock(&d_inode(dentry)->i_lock);
1917                 if (*offset > d_inode(dentry)->i_size)
1918                         i_size_write(d_inode(dentry), *offset);
1919                 spin_unlock(&d_inode(dentry)->i_lock);
1920         }
1921         mark_inode_dirty_sync(d_inode(dentry));
1922         free_xid(xid);
1923         return total_written;
1924 }
1925
1926 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1927                                         bool fsuid_only)
1928 {
1929         struct cifsFileInfo *open_file = NULL;
1930         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1931
1932         /* only filter by fsuid on multiuser mounts */
1933         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1934                 fsuid_only = false;
1935
1936         spin_lock(&cifs_inode->open_file_lock);
1937         /* we could simply get the first_list_entry since write-only entries
1938            are always at the end of the list but since the first entry might
1939            have a close pending, we go through the whole list */
1940         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1941                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1942                         continue;
1943                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1944                         if (!open_file->invalidHandle) {
1945                                 /* found a good file */
1946                                 /* lock it so it will not be closed on us */
1947                                 cifsFileInfo_get(open_file);
1948                                 spin_unlock(&cifs_inode->open_file_lock);
1949                                 return open_file;
1950                         } /* else might as well continue, and look for
1951                              another, or simply have the caller reopen it
1952                              again rather than trying to fix this handle */
1953                 } else /* write only file */
1954                         break; /* write only files are last so must be done */
1955         }
1956         spin_unlock(&cifs_inode->open_file_lock);
1957         return NULL;
1958 }
1959
1960 /* Return -EBADF if no handle is found and general rc otherwise */
1961 int
1962 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
1963                        struct cifsFileInfo **ret_file)
1964 {
1965         struct cifsFileInfo *open_file, *inv_file = NULL;
1966         struct cifs_sb_info *cifs_sb;
1967         bool any_available = false;
1968         int rc = -EBADF;
1969         unsigned int refind = 0;
1970         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
1971         bool with_delete = flags & FIND_WR_WITH_DELETE;
1972         *ret_file = NULL;
1973
1974         /*
1975          * Having a null inode here (because mapping->host was set to zero by
1976          * the VFS or MM) should not happen but we had reports of on oops (due
1977          * to it being zero) during stress testcases so we need to check for it
1978          */
1979
1980         if (cifs_inode == NULL) {
1981                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1982                 dump_stack();
1983                 return rc;
1984         }
1985
1986         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1987
1988         /* only filter by fsuid on multiuser mounts */
1989         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1990                 fsuid_only = false;
1991
1992         spin_lock(&cifs_inode->open_file_lock);
1993 refind_writable:
1994         if (refind > MAX_REOPEN_ATT) {
1995                 spin_unlock(&cifs_inode->open_file_lock);
1996                 return rc;
1997         }
1998         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1999                 if (!any_available && open_file->pid != current->tgid)
2000                         continue;
2001                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2002                         continue;
2003                 if (with_delete && !(open_file->fid.access & DELETE))
2004                         continue;
2005                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2006                         if (!open_file->invalidHandle) {
2007                                 /* found a good writable file */
2008                                 cifsFileInfo_get(open_file);
2009                                 spin_unlock(&cifs_inode->open_file_lock);
2010                                 *ret_file = open_file;
2011                                 return 0;
2012                         } else {
2013                                 if (!inv_file)
2014                                         inv_file = open_file;
2015                         }
2016                 }
2017         }
2018         /* couldn't find useable FH with same pid, try any available */
2019         if (!any_available) {
2020                 any_available = true;
2021                 goto refind_writable;
2022         }
2023
2024         if (inv_file) {
2025                 any_available = false;
2026                 cifsFileInfo_get(inv_file);
2027         }
2028
2029         spin_unlock(&cifs_inode->open_file_lock);
2030
2031         if (inv_file) {
2032                 rc = cifs_reopen_file(inv_file, false);
2033                 if (!rc) {
2034                         *ret_file = inv_file;
2035                         return 0;
2036                 }
2037
2038                 spin_lock(&cifs_inode->open_file_lock);
2039                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2040                 spin_unlock(&cifs_inode->open_file_lock);
2041                 cifsFileInfo_put(inv_file);
2042                 ++refind;
2043                 inv_file = NULL;
2044                 spin_lock(&cifs_inode->open_file_lock);
2045                 goto refind_writable;
2046         }
2047
2048         return rc;
2049 }
2050
2051 struct cifsFileInfo *
2052 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2053 {
2054         struct cifsFileInfo *cfile;
2055         int rc;
2056
2057         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2058         if (rc)
2059                 cifs_dbg(FYI, "couldn't find writable handle rc=%d", rc);
2060
2061         return cfile;
2062 }
2063
2064 int
2065 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2066                        int flags,
2067                        struct cifsFileInfo **ret_file)
2068 {
2069         struct list_head *tmp;
2070         struct cifsFileInfo *cfile;
2071         struct cifsInodeInfo *cinode;
2072         char *full_path;
2073
2074         *ret_file = NULL;
2075
2076         spin_lock(&tcon->open_file_lock);
2077         list_for_each(tmp, &tcon->openFileList) {
2078                 cfile = list_entry(tmp, struct cifsFileInfo,
2079                              tlist);
2080                 full_path = build_path_from_dentry(cfile->dentry);
2081                 if (full_path == NULL) {
2082                         spin_unlock(&tcon->open_file_lock);
2083                         return -ENOMEM;
2084                 }
2085                 if (strcmp(full_path, name)) {
2086                         kfree(full_path);
2087                         continue;
2088                 }
2089
2090                 kfree(full_path);
2091                 cinode = CIFS_I(d_inode(cfile->dentry));
2092                 spin_unlock(&tcon->open_file_lock);
2093                 return cifs_get_writable_file(cinode, flags, ret_file);
2094         }
2095
2096         spin_unlock(&tcon->open_file_lock);
2097         return -ENOENT;
2098 }
2099
2100 int
2101 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2102                        struct cifsFileInfo **ret_file)
2103 {
2104         struct list_head *tmp;
2105         struct cifsFileInfo *cfile;
2106         struct cifsInodeInfo *cinode;
2107         char *full_path;
2108
2109         *ret_file = NULL;
2110
2111         spin_lock(&tcon->open_file_lock);
2112         list_for_each(tmp, &tcon->openFileList) {
2113                 cfile = list_entry(tmp, struct cifsFileInfo,
2114                              tlist);
2115                 full_path = build_path_from_dentry(cfile->dentry);
2116                 if (full_path == NULL) {
2117                         spin_unlock(&tcon->open_file_lock);
2118                         return -ENOMEM;
2119                 }
2120                 if (strcmp(full_path, name)) {
2121                         kfree(full_path);
2122                         continue;
2123                 }
2124
2125                 kfree(full_path);
2126                 cinode = CIFS_I(d_inode(cfile->dentry));
2127                 spin_unlock(&tcon->open_file_lock);
2128                 *ret_file = find_readable_file(cinode, 0);
2129                 return *ret_file ? 0 : -ENOENT;
2130         }
2131
2132         spin_unlock(&tcon->open_file_lock);
2133         return -ENOENT;
2134 }
2135
2136 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2137 {
2138         struct address_space *mapping = page->mapping;
2139         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2140         char *write_data;
2141         int rc = -EFAULT;
2142         int bytes_written = 0;
2143         struct inode *inode;
2144         struct cifsFileInfo *open_file;
2145
2146         if (!mapping || !mapping->host)
2147                 return -EFAULT;
2148
2149         inode = page->mapping->host;
2150
2151         offset += (loff_t)from;
2152         write_data = kmap(page);
2153         write_data += from;
2154
2155         if ((to > PAGE_SIZE) || (from > to)) {
2156                 kunmap(page);
2157                 return -EIO;
2158         }
2159
2160         /* racing with truncate? */
2161         if (offset > mapping->host->i_size) {
2162                 kunmap(page);
2163                 return 0; /* don't care */
2164         }
2165
2166         /* check to make sure that we are not extending the file */
2167         if (mapping->host->i_size - offset < (loff_t)to)
2168                 to = (unsigned)(mapping->host->i_size - offset);
2169
2170         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2171                                     &open_file);
2172         if (!rc) {
2173                 bytes_written = cifs_write(open_file, open_file->pid,
2174                                            write_data, to - from, &offset);
2175                 cifsFileInfo_put(open_file);
2176                 /* Does mm or vfs already set times? */
2177                 inode->i_atime = inode->i_mtime = current_time(inode);
2178                 if ((bytes_written > 0) && (offset))
2179                         rc = 0;
2180                 else if (bytes_written < 0)
2181                         rc = bytes_written;
2182                 else
2183                         rc = -EFAULT;
2184         } else {
2185                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2186                 if (!is_retryable_error(rc))
2187                         rc = -EIO;
2188         }
2189
2190         kunmap(page);
2191         return rc;
2192 }
2193
2194 static struct cifs_writedata *
2195 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2196                           pgoff_t end, pgoff_t *index,
2197                           unsigned int *found_pages)
2198 {
2199         struct cifs_writedata *wdata;
2200
2201         wdata = cifs_writedata_alloc((unsigned int)tofind,
2202                                      cifs_writev_complete);
2203         if (!wdata)
2204                 return NULL;
2205
2206         *found_pages = find_get_pages_range_tag(mapping, index, end,
2207                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2208         return wdata;
2209 }
2210
2211 static unsigned int
2212 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2213                     struct address_space *mapping,
2214                     struct writeback_control *wbc,
2215                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2216 {
2217         unsigned int nr_pages = 0, i;
2218         struct page *page;
2219
2220         for (i = 0; i < found_pages; i++) {
2221                 page = wdata->pages[i];
2222                 /*
2223                  * At this point we hold neither the i_pages lock nor the
2224                  * page lock: the page may be truncated or invalidated
2225                  * (changing page->mapping to NULL), or even swizzled
2226                  * back from swapper_space to tmpfs file mapping
2227                  */
2228
2229                 if (nr_pages == 0)
2230                         lock_page(page);
2231                 else if (!trylock_page(page))
2232                         break;
2233
2234                 if (unlikely(page->mapping != mapping)) {
2235                         unlock_page(page);
2236                         break;
2237                 }
2238
2239                 if (!wbc->range_cyclic && page->index > end) {
2240                         *done = true;
2241                         unlock_page(page);
2242                         break;
2243                 }
2244
2245                 if (*next && (page->index != *next)) {
2246                         /* Not next consecutive page */
2247                         unlock_page(page);
2248                         break;
2249                 }
2250
2251                 if (wbc->sync_mode != WB_SYNC_NONE)
2252                         wait_on_page_writeback(page);
2253
2254                 if (PageWriteback(page) ||
2255                                 !clear_page_dirty_for_io(page)) {
2256                         unlock_page(page);
2257                         break;
2258                 }
2259
2260                 /*
2261                  * This actually clears the dirty bit in the radix tree.
2262                  * See cifs_writepage() for more commentary.
2263                  */
2264                 set_page_writeback(page);
2265                 if (page_offset(page) >= i_size_read(mapping->host)) {
2266                         *done = true;
2267                         unlock_page(page);
2268                         end_page_writeback(page);
2269                         break;
2270                 }
2271
2272                 wdata->pages[i] = page;
2273                 *next = page->index + 1;
2274                 ++nr_pages;
2275         }
2276
2277         /* reset index to refind any pages skipped */
2278         if (nr_pages == 0)
2279                 *index = wdata->pages[0]->index + 1;
2280
2281         /* put any pages we aren't going to use */
2282         for (i = nr_pages; i < found_pages; i++) {
2283                 put_page(wdata->pages[i]);
2284                 wdata->pages[i] = NULL;
2285         }
2286
2287         return nr_pages;
2288 }
2289
2290 static int
2291 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2292                  struct address_space *mapping, struct writeback_control *wbc)
2293 {
2294         int rc;
2295         struct TCP_Server_Info *server =
2296                                 tlink_tcon(wdata->cfile->tlink)->ses->server;
2297
2298         wdata->sync_mode = wbc->sync_mode;
2299         wdata->nr_pages = nr_pages;
2300         wdata->offset = page_offset(wdata->pages[0]);
2301         wdata->pagesz = PAGE_SIZE;
2302         wdata->tailsz = min(i_size_read(mapping->host) -
2303                         page_offset(wdata->pages[nr_pages - 1]),
2304                         (loff_t)PAGE_SIZE);
2305         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2306         wdata->pid = wdata->cfile->pid;
2307
2308         rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2309         if (rc)
2310                 return rc;
2311
2312         if (wdata->cfile->invalidHandle)
2313                 rc = -EAGAIN;
2314         else
2315                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2316
2317         return rc;
2318 }
2319
2320 static int cifs_writepages(struct address_space *mapping,
2321                            struct writeback_control *wbc)
2322 {
2323         struct inode *inode = mapping->host;
2324         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2325         struct TCP_Server_Info *server;
2326         bool done = false, scanned = false, range_whole = false;
2327         pgoff_t end, index;
2328         struct cifs_writedata *wdata;
2329         struct cifsFileInfo *cfile = NULL;
2330         int rc = 0;
2331         int saved_rc = 0;
2332         unsigned int xid;
2333
2334         /*
2335          * If wsize is smaller than the page cache size, default to writing
2336          * one page at a time via cifs_writepage
2337          */
2338         if (cifs_sb->wsize < PAGE_SIZE)
2339                 return generic_writepages(mapping, wbc);
2340
2341         xid = get_xid();
2342         if (wbc->range_cyclic) {
2343                 index = mapping->writeback_index; /* Start from prev offset */
2344                 end = -1;
2345         } else {
2346                 index = wbc->range_start >> PAGE_SHIFT;
2347                 end = wbc->range_end >> PAGE_SHIFT;
2348                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2349                         range_whole = true;
2350                 scanned = true;
2351         }
2352         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2353 retry:
2354         while (!done && index <= end) {
2355                 unsigned int i, nr_pages, found_pages, wsize;
2356                 pgoff_t next = 0, tofind, saved_index = index;
2357                 struct cifs_credits credits_on_stack;
2358                 struct cifs_credits *credits = &credits_on_stack;
2359                 int get_file_rc = 0;
2360
2361                 if (cfile)
2362                         cifsFileInfo_put(cfile);
2363
2364                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2365
2366                 /* in case of an error store it to return later */
2367                 if (rc)
2368                         get_file_rc = rc;
2369
2370                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2371                                                    &wsize, credits);
2372                 if (rc != 0) {
2373                         done = true;
2374                         break;
2375                 }
2376
2377                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2378
2379                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2380                                                   &found_pages);
2381                 if (!wdata) {
2382                         rc = -ENOMEM;
2383                         done = true;
2384                         add_credits_and_wake_if(server, credits, 0);
2385                         break;
2386                 }
2387
2388                 if (found_pages == 0) {
2389                         kref_put(&wdata->refcount, cifs_writedata_release);
2390                         add_credits_and_wake_if(server, credits, 0);
2391                         break;
2392                 }
2393
2394                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2395                                                end, &index, &next, &done);
2396
2397                 /* nothing to write? */
2398                 if (nr_pages == 0) {
2399                         kref_put(&wdata->refcount, cifs_writedata_release);
2400                         add_credits_and_wake_if(server, credits, 0);
2401                         continue;
2402                 }
2403
2404                 wdata->credits = credits_on_stack;
2405                 wdata->cfile = cfile;
2406                 cfile = NULL;
2407
2408                 if (!wdata->cfile) {
2409                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2410                                  get_file_rc);
2411                         if (is_retryable_error(get_file_rc))
2412                                 rc = get_file_rc;
2413                         else
2414                                 rc = -EBADF;
2415                 } else
2416                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2417
2418                 for (i = 0; i < nr_pages; ++i)
2419                         unlock_page(wdata->pages[i]);
2420
2421                 /* send failure -- clean up the mess */
2422                 if (rc != 0) {
2423                         add_credits_and_wake_if(server, &wdata->credits, 0);
2424                         for (i = 0; i < nr_pages; ++i) {
2425                                 if (is_retryable_error(rc))
2426                                         redirty_page_for_writepage(wbc,
2427                                                            wdata->pages[i]);
2428                                 else
2429                                         SetPageError(wdata->pages[i]);
2430                                 end_page_writeback(wdata->pages[i]);
2431                                 put_page(wdata->pages[i]);
2432                         }
2433                         if (!is_retryable_error(rc))
2434                                 mapping_set_error(mapping, rc);
2435                 }
2436                 kref_put(&wdata->refcount, cifs_writedata_release);
2437
2438                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2439                         index = saved_index;
2440                         continue;
2441                 }
2442
2443                 /* Return immediately if we received a signal during writing */
2444                 if (is_interrupt_error(rc)) {
2445                         done = true;
2446                         break;
2447                 }
2448
2449                 if (rc != 0 && saved_rc == 0)
2450                         saved_rc = rc;
2451
2452                 wbc->nr_to_write -= nr_pages;
2453                 if (wbc->nr_to_write <= 0)
2454                         done = true;
2455
2456                 index = next;
2457         }
2458
2459         if (!scanned && !done) {
2460                 /*
2461                  * We hit the last page and there is more work to be done: wrap
2462                  * back to the start of the file
2463                  */
2464                 scanned = true;
2465                 index = 0;
2466                 goto retry;
2467         }
2468
2469         if (saved_rc != 0)
2470                 rc = saved_rc;
2471
2472         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2473                 mapping->writeback_index = index;
2474
2475         if (cfile)
2476                 cifsFileInfo_put(cfile);
2477         free_xid(xid);
2478         return rc;
2479 }
2480
2481 static int
2482 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2483 {
2484         int rc;
2485         unsigned int xid;
2486
2487         xid = get_xid();
2488 /* BB add check for wbc flags */
2489         get_page(page);
2490         if (!PageUptodate(page))
2491                 cifs_dbg(FYI, "ppw - page not up to date\n");
2492
2493         /*
2494          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2495          *
2496          * A writepage() implementation always needs to do either this,
2497          * or re-dirty the page with "redirty_page_for_writepage()" in
2498          * the case of a failure.
2499          *
2500          * Just unlocking the page will cause the radix tree tag-bits
2501          * to fail to update with the state of the page correctly.
2502          */
2503         set_page_writeback(page);
2504 retry_write:
2505         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2506         if (is_retryable_error(rc)) {
2507                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2508                         goto retry_write;
2509                 redirty_page_for_writepage(wbc, page);
2510         } else if (rc != 0) {
2511                 SetPageError(page);
2512                 mapping_set_error(page->mapping, rc);
2513         } else {
2514                 SetPageUptodate(page);
2515         }
2516         end_page_writeback(page);
2517         put_page(page);
2518         free_xid(xid);
2519         return rc;
2520 }
2521
2522 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2523 {
2524         int rc = cifs_writepage_locked(page, wbc);
2525         unlock_page(page);
2526         return rc;
2527 }
2528
2529 static int cifs_write_end(struct file *file, struct address_space *mapping,
2530                         loff_t pos, unsigned len, unsigned copied,
2531                         struct page *page, void *fsdata)
2532 {
2533         int rc;
2534         struct inode *inode = mapping->host;
2535         struct cifsFileInfo *cfile = file->private_data;
2536         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2537         __u32 pid;
2538
2539         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2540                 pid = cfile->pid;
2541         else
2542                 pid = current->tgid;
2543
2544         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2545                  page, pos, copied);
2546
2547         if (PageChecked(page)) {
2548                 if (copied == len)
2549                         SetPageUptodate(page);
2550                 ClearPageChecked(page);
2551         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2552                 SetPageUptodate(page);
2553
2554         if (!PageUptodate(page)) {
2555                 char *page_data;
2556                 unsigned offset = pos & (PAGE_SIZE - 1);
2557                 unsigned int xid;
2558
2559                 xid = get_xid();
2560                 /* this is probably better than directly calling
2561                    partialpage_write since in this function the file handle is
2562                    known which we might as well leverage */
2563                 /* BB check if anything else missing out of ppw
2564                    such as updating last write time */
2565                 page_data = kmap(page);
2566                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2567                 /* if (rc < 0) should we set writebehind rc? */
2568                 kunmap(page);
2569
2570                 free_xid(xid);
2571         } else {
2572                 rc = copied;
2573                 pos += copied;
2574                 set_page_dirty(page);
2575         }
2576
2577         if (rc > 0) {
2578                 spin_lock(&inode->i_lock);
2579                 if (pos > inode->i_size)
2580                         i_size_write(inode, pos);
2581                 spin_unlock(&inode->i_lock);
2582         }
2583
2584         unlock_page(page);
2585         put_page(page);
2586
2587         return rc;
2588 }
2589
2590 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2591                       int datasync)
2592 {
2593         unsigned int xid;
2594         int rc = 0;
2595         struct cifs_tcon *tcon;
2596         struct TCP_Server_Info *server;
2597         struct cifsFileInfo *smbfile = file->private_data;
2598         struct inode *inode = file_inode(file);
2599         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2600
2601         rc = file_write_and_wait_range(file, start, end);
2602         if (rc) {
2603                 trace_cifs_fsync_err(inode->i_ino, rc);
2604                 return rc;
2605         }
2606
2607         xid = get_xid();
2608
2609         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2610                  file, datasync);
2611
2612         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2613                 rc = cifs_zap_mapping(inode);
2614                 if (rc) {
2615                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2616                         rc = 0; /* don't care about it in fsync */
2617                 }
2618         }
2619
2620         tcon = tlink_tcon(smbfile->tlink);
2621         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2622                 server = tcon->ses->server;
2623                 if (server->ops->flush)
2624                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2625                 else
2626                         rc = -ENOSYS;
2627         }
2628
2629         free_xid(xid);
2630         return rc;
2631 }
2632
2633 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2634 {
2635         unsigned int xid;
2636         int rc = 0;
2637         struct cifs_tcon *tcon;
2638         struct TCP_Server_Info *server;
2639         struct cifsFileInfo *smbfile = file->private_data;
2640         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2641
2642         rc = file_write_and_wait_range(file, start, end);
2643         if (rc) {
2644                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2645                 return rc;
2646         }
2647
2648         xid = get_xid();
2649
2650         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2651                  file, datasync);
2652
2653         tcon = tlink_tcon(smbfile->tlink);
2654         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2655                 server = tcon->ses->server;
2656                 if (server->ops->flush)
2657                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2658                 else
2659                         rc = -ENOSYS;
2660         }
2661
2662         free_xid(xid);
2663         return rc;
2664 }
2665
2666 /*
2667  * As file closes, flush all cached write data for this inode checking
2668  * for write behind errors.
2669  */
2670 int cifs_flush(struct file *file, fl_owner_t id)
2671 {
2672         struct inode *inode = file_inode(file);
2673         int rc = 0;
2674
2675         if (file->f_mode & FMODE_WRITE)
2676                 rc = filemap_write_and_wait(inode->i_mapping);
2677
2678         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2679         if (rc)
2680                 trace_cifs_flush_err(inode->i_ino, rc);
2681         return rc;
2682 }
2683
2684 static int
2685 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2686 {
2687         int rc = 0;
2688         unsigned long i;
2689
2690         for (i = 0; i < num_pages; i++) {
2691                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2692                 if (!pages[i]) {
2693                         /*
2694                          * save number of pages we have already allocated and
2695                          * return with ENOMEM error
2696                          */
2697                         num_pages = i;
2698                         rc = -ENOMEM;
2699                         break;
2700                 }
2701         }
2702
2703         if (rc) {
2704                 for (i = 0; i < num_pages; i++)
2705                         put_page(pages[i]);
2706         }
2707         return rc;
2708 }
2709
2710 static inline
2711 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2712 {
2713         size_t num_pages;
2714         size_t clen;
2715
2716         clen = min_t(const size_t, len, wsize);
2717         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2718
2719         if (cur_len)
2720                 *cur_len = clen;
2721
2722         return num_pages;
2723 }
2724
2725 static void
2726 cifs_uncached_writedata_release(struct kref *refcount)
2727 {
2728         int i;
2729         struct cifs_writedata *wdata = container_of(refcount,
2730                                         struct cifs_writedata, refcount);
2731
2732         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2733         for (i = 0; i < wdata->nr_pages; i++)
2734                 put_page(wdata->pages[i]);
2735         cifs_writedata_release(refcount);
2736 }
2737
2738 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2739
2740 static void
2741 cifs_uncached_writev_complete(struct work_struct *work)
2742 {
2743         struct cifs_writedata *wdata = container_of(work,
2744                                         struct cifs_writedata, work);
2745         struct inode *inode = d_inode(wdata->cfile->dentry);
2746         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2747
2748         spin_lock(&inode->i_lock);
2749         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2750         if (cifsi->server_eof > inode->i_size)
2751                 i_size_write(inode, cifsi->server_eof);
2752         spin_unlock(&inode->i_lock);
2753
2754         complete(&wdata->done);
2755         collect_uncached_write_data(wdata->ctx);
2756         /* the below call can possibly free the last ref to aio ctx */
2757         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2758 }
2759
2760 static int
2761 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2762                       size_t *len, unsigned long *num_pages)
2763 {
2764         size_t save_len, copied, bytes, cur_len = *len;
2765         unsigned long i, nr_pages = *num_pages;
2766
2767         save_len = cur_len;
2768         for (i = 0; i < nr_pages; i++) {
2769                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2770                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2771                 cur_len -= copied;
2772                 /*
2773                  * If we didn't copy as much as we expected, then that
2774                  * may mean we trod into an unmapped area. Stop copying
2775                  * at that point. On the next pass through the big
2776                  * loop, we'll likely end up getting a zero-length
2777                  * write and bailing out of it.
2778                  */
2779                 if (copied < bytes)
2780                         break;
2781         }
2782         cur_len = save_len - cur_len;
2783         *len = cur_len;
2784
2785         /*
2786          * If we have no data to send, then that probably means that
2787          * the copy above failed altogether. That's most likely because
2788          * the address in the iovec was bogus. Return -EFAULT and let
2789          * the caller free anything we allocated and bail out.
2790          */
2791         if (!cur_len)
2792                 return -EFAULT;
2793
2794         /*
2795          * i + 1 now represents the number of pages we actually used in
2796          * the copy phase above.
2797          */
2798         *num_pages = i + 1;
2799         return 0;
2800 }
2801
2802 static int
2803 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2804         struct cifs_aio_ctx *ctx)
2805 {
2806         unsigned int wsize;
2807         struct cifs_credits credits;
2808         int rc;
2809         struct TCP_Server_Info *server =
2810                 tlink_tcon(wdata->cfile->tlink)->ses->server;
2811
2812         do {
2813                 if (wdata->cfile->invalidHandle) {
2814                         rc = cifs_reopen_file(wdata->cfile, false);
2815                         if (rc == -EAGAIN)
2816                                 continue;
2817                         else if (rc)
2818                                 break;
2819                 }
2820
2821
2822                 /*
2823                  * Wait for credits to resend this wdata.
2824                  * Note: we are attempting to resend the whole wdata not in
2825                  * segments
2826                  */
2827                 do {
2828                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2829                                                 &wsize, &credits);
2830                         if (rc)
2831                                 goto fail;
2832
2833                         if (wsize < wdata->bytes) {
2834                                 add_credits_and_wake_if(server, &credits, 0);
2835                                 msleep(1000);
2836                         }
2837                 } while (wsize < wdata->bytes);
2838                 wdata->credits = credits;
2839
2840                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2841
2842                 if (!rc) {
2843                         if (wdata->cfile->invalidHandle)
2844                                 rc = -EAGAIN;
2845                         else {
2846 #ifdef CONFIG_CIFS_SMB_DIRECT
2847                                 if (wdata->mr) {
2848                                         wdata->mr->need_invalidate = true;
2849                                         smbd_deregister_mr(wdata->mr);
2850                                         wdata->mr = NULL;
2851                                 }
2852 #endif
2853                                 rc = server->ops->async_writev(wdata,
2854                                         cifs_uncached_writedata_release);
2855                         }
2856                 }
2857
2858                 /* If the write was successfully sent, we are done */
2859                 if (!rc) {
2860                         list_add_tail(&wdata->list, wdata_list);
2861                         return 0;
2862                 }
2863
2864                 /* Roll back credits and retry if needed */
2865                 add_credits_and_wake_if(server, &wdata->credits, 0);
2866         } while (rc == -EAGAIN);
2867
2868 fail:
2869         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2870         return rc;
2871 }
2872
2873 static int
2874 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2875                      struct cifsFileInfo *open_file,
2876                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2877                      struct cifs_aio_ctx *ctx)
2878 {
2879         int rc = 0;
2880         size_t cur_len;
2881         unsigned long nr_pages, num_pages, i;
2882         struct cifs_writedata *wdata;
2883         struct iov_iter saved_from = *from;
2884         loff_t saved_offset = offset;
2885         pid_t pid;
2886         struct TCP_Server_Info *server;
2887         struct page **pagevec;
2888         size_t start;
2889         unsigned int xid;
2890
2891         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2892                 pid = open_file->pid;
2893         else
2894                 pid = current->tgid;
2895
2896         server = tlink_tcon(open_file->tlink)->ses->server;
2897         xid = get_xid();
2898
2899         do {
2900                 unsigned int wsize;
2901                 struct cifs_credits credits_on_stack;
2902                 struct cifs_credits *credits = &credits_on_stack;
2903
2904                 if (open_file->invalidHandle) {
2905                         rc = cifs_reopen_file(open_file, false);
2906                         if (rc == -EAGAIN)
2907                                 continue;
2908                         else if (rc)
2909                                 break;
2910                 }
2911
2912                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2913                                                    &wsize, credits);
2914                 if (rc)
2915                         break;
2916
2917                 cur_len = min_t(const size_t, len, wsize);
2918
2919                 if (ctx->direct_io) {
2920                         ssize_t result;
2921
2922                         result = iov_iter_get_pages_alloc(
2923                                 from, &pagevec, cur_len, &start);
2924                         if (result < 0) {
2925                                 cifs_dbg(VFS,
2926                                         "direct_writev couldn't get user pages "
2927                                         "(rc=%zd) iter type %d iov_offset %zd "
2928                                         "count %zd\n",
2929                                         result, iov_iter_type(from),
2930                                         from->iov_offset, from->count);
2931                                 dump_stack();
2932
2933                                 rc = result;
2934                                 add_credits_and_wake_if(server, credits, 0);
2935                                 break;
2936                         }
2937                         cur_len = (size_t)result;
2938                         iov_iter_advance(from, cur_len);
2939
2940                         nr_pages =
2941                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2942
2943                         wdata = cifs_writedata_direct_alloc(pagevec,
2944                                              cifs_uncached_writev_complete);
2945                         if (!wdata) {
2946                                 rc = -ENOMEM;
2947                                 add_credits_and_wake_if(server, credits, 0);
2948                                 break;
2949                         }
2950
2951
2952                         wdata->page_offset = start;
2953                         wdata->tailsz =
2954                                 nr_pages > 1 ?
2955                                         cur_len - (PAGE_SIZE - start) -
2956                                         (nr_pages - 2) * PAGE_SIZE :
2957                                         cur_len;
2958                 } else {
2959                         nr_pages = get_numpages(wsize, len, &cur_len);
2960                         wdata = cifs_writedata_alloc(nr_pages,
2961                                              cifs_uncached_writev_complete);
2962                         if (!wdata) {
2963                                 rc = -ENOMEM;
2964                                 add_credits_and_wake_if(server, credits, 0);
2965                                 break;
2966                         }
2967
2968                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2969                         if (rc) {
2970                                 kvfree(wdata->pages);
2971                                 kfree(wdata);
2972                                 add_credits_and_wake_if(server, credits, 0);
2973                                 break;
2974                         }
2975
2976                         num_pages = nr_pages;
2977                         rc = wdata_fill_from_iovec(
2978                                 wdata, from, &cur_len, &num_pages);
2979                         if (rc) {
2980                                 for (i = 0; i < nr_pages; i++)
2981                                         put_page(wdata->pages[i]);
2982                                 kvfree(wdata->pages);
2983                                 kfree(wdata);
2984                                 add_credits_and_wake_if(server, credits, 0);
2985                                 break;
2986                         }
2987
2988                         /*
2989                          * Bring nr_pages down to the number of pages we
2990                          * actually used, and free any pages that we didn't use.
2991                          */
2992                         for ( ; nr_pages > num_pages; nr_pages--)
2993                                 put_page(wdata->pages[nr_pages - 1]);
2994
2995                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2996                 }
2997
2998                 wdata->sync_mode = WB_SYNC_ALL;
2999                 wdata->nr_pages = nr_pages;
3000                 wdata->offset = (__u64)offset;
3001                 wdata->cfile = cifsFileInfo_get(open_file);
3002                 wdata->pid = pid;
3003                 wdata->bytes = cur_len;
3004                 wdata->pagesz = PAGE_SIZE;
3005                 wdata->credits = credits_on_stack;
3006                 wdata->ctx = ctx;
3007                 kref_get(&ctx->refcount);
3008
3009                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3010
3011                 if (!rc) {
3012                         if (wdata->cfile->invalidHandle)
3013                                 rc = -EAGAIN;
3014                         else
3015                                 rc = server->ops->async_writev(wdata,
3016                                         cifs_uncached_writedata_release);
3017                 }
3018
3019                 if (rc) {
3020                         add_credits_and_wake_if(server, &wdata->credits, 0);
3021                         kref_put(&wdata->refcount,
3022                                  cifs_uncached_writedata_release);
3023                         if (rc == -EAGAIN) {
3024                                 *from = saved_from;
3025                                 iov_iter_advance(from, offset - saved_offset);
3026                                 continue;
3027                         }
3028                         break;
3029                 }
3030
3031                 list_add_tail(&wdata->list, wdata_list);
3032                 offset += cur_len;
3033                 len -= cur_len;
3034         } while (len > 0);
3035
3036         free_xid(xid);
3037         return rc;
3038 }
3039
3040 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3041 {
3042         struct cifs_writedata *wdata, *tmp;
3043         struct cifs_tcon *tcon;
3044         struct cifs_sb_info *cifs_sb;
3045         struct dentry *dentry = ctx->cfile->dentry;
3046         int rc;
3047
3048         tcon = tlink_tcon(ctx->cfile->tlink);
3049         cifs_sb = CIFS_SB(dentry->d_sb);
3050
3051         mutex_lock(&ctx->aio_mutex);
3052
3053         if (list_empty(&ctx->list)) {
3054                 mutex_unlock(&ctx->aio_mutex);
3055                 return;
3056         }
3057
3058         rc = ctx->rc;
3059         /*
3060          * Wait for and collect replies for any successful sends in order of
3061          * increasing offset. Once an error is hit, then return without waiting
3062          * for any more replies.
3063          */
3064 restart_loop:
3065         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3066                 if (!rc) {
3067                         if (!try_wait_for_completion(&wdata->done)) {
3068                                 mutex_unlock(&ctx->aio_mutex);
3069                                 return;
3070                         }
3071
3072                         if (wdata->result)
3073                                 rc = wdata->result;
3074                         else
3075                                 ctx->total_len += wdata->bytes;
3076
3077                         /* resend call if it's a retryable error */
3078                         if (rc == -EAGAIN) {
3079                                 struct list_head tmp_list;
3080                                 struct iov_iter tmp_from = ctx->iter;
3081
3082                                 INIT_LIST_HEAD(&tmp_list);
3083                                 list_del_init(&wdata->list);
3084
3085                                 if (ctx->direct_io)
3086                                         rc = cifs_resend_wdata(
3087                                                 wdata, &tmp_list, ctx);
3088                                 else {
3089                                         iov_iter_advance(&tmp_from,
3090                                                  wdata->offset - ctx->pos);
3091
3092                                         rc = cifs_write_from_iter(wdata->offset,
3093                                                 wdata->bytes, &tmp_from,
3094                                                 ctx->cfile, cifs_sb, &tmp_list,
3095                                                 ctx);
3096
3097                                         kref_put(&wdata->refcount,
3098                                                 cifs_uncached_writedata_release);
3099                                 }
3100
3101                                 list_splice(&tmp_list, &ctx->list);
3102                                 goto restart_loop;
3103                         }
3104                 }
3105                 list_del_init(&wdata->list);
3106                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3107         }
3108
3109         cifs_stats_bytes_written(tcon, ctx->total_len);
3110         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3111
3112         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3113
3114         mutex_unlock(&ctx->aio_mutex);
3115
3116         if (ctx->iocb && ctx->iocb->ki_complete)
3117                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3118         else
3119                 complete(&ctx->done);
3120 }
3121
3122 static ssize_t __cifs_writev(
3123         struct kiocb *iocb, struct iov_iter *from, bool direct)
3124 {
3125         struct file *file = iocb->ki_filp;
3126         ssize_t total_written = 0;
3127         struct cifsFileInfo *cfile;
3128         struct cifs_tcon *tcon;
3129         struct cifs_sb_info *cifs_sb;
3130         struct cifs_aio_ctx *ctx;
3131         struct iov_iter saved_from = *from;
3132         size_t len = iov_iter_count(from);
3133         int rc;
3134
3135         /*
3136          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3137          * In this case, fall back to non-direct write function.
3138          * this could be improved by getting pages directly in ITER_KVEC
3139          */
3140         if (direct && iov_iter_is_kvec(from)) {
3141                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3142                 direct = false;
3143         }
3144
3145         rc = generic_write_checks(iocb, from);
3146         if (rc <= 0)
3147                 return rc;
3148
3149         cifs_sb = CIFS_FILE_SB(file);
3150         cfile = file->private_data;
3151         tcon = tlink_tcon(cfile->tlink);
3152
3153         if (!tcon->ses->server->ops->async_writev)
3154                 return -ENOSYS;
3155
3156         ctx = cifs_aio_ctx_alloc();
3157         if (!ctx)
3158                 return -ENOMEM;
3159
3160         ctx->cfile = cifsFileInfo_get(cfile);
3161
3162         if (!is_sync_kiocb(iocb))
3163                 ctx->iocb = iocb;
3164
3165         ctx->pos = iocb->ki_pos;
3166
3167         if (direct) {
3168                 ctx->direct_io = true;
3169                 ctx->iter = *from;
3170                 ctx->len = len;
3171         } else {
3172                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3173                 if (rc) {
3174                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3175                         return rc;
3176                 }
3177         }
3178
3179         /* grab a lock here due to read response handlers can access ctx */
3180         mutex_lock(&ctx->aio_mutex);
3181
3182         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3183                                   cfile, cifs_sb, &ctx->list, ctx);
3184
3185         /*
3186          * If at least one write was successfully sent, then discard any rc
3187          * value from the later writes. If the other write succeeds, then
3188          * we'll end up returning whatever was written. If it fails, then
3189          * we'll get a new rc value from that.
3190          */
3191         if (!list_empty(&ctx->list))
3192                 rc = 0;
3193
3194         mutex_unlock(&ctx->aio_mutex);
3195
3196         if (rc) {
3197                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3198                 return rc;
3199         }
3200
3201         if (!is_sync_kiocb(iocb)) {
3202                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3203                 return -EIOCBQUEUED;
3204         }
3205
3206         rc = wait_for_completion_killable(&ctx->done);
3207         if (rc) {
3208                 mutex_lock(&ctx->aio_mutex);
3209                 ctx->rc = rc = -EINTR;
3210                 total_written = ctx->total_len;
3211                 mutex_unlock(&ctx->aio_mutex);
3212         } else {
3213                 rc = ctx->rc;
3214                 total_written = ctx->total_len;
3215         }
3216
3217         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3218
3219         if (unlikely(!total_written))
3220                 return rc;
3221
3222         iocb->ki_pos += total_written;
3223         return total_written;
3224 }
3225
3226 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3227 {
3228         return __cifs_writev(iocb, from, true);
3229 }
3230
3231 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3232 {
3233         return __cifs_writev(iocb, from, false);
3234 }
3235
3236 static ssize_t
3237 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3238 {
3239         struct file *file = iocb->ki_filp;
3240         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3241         struct inode *inode = file->f_mapping->host;
3242         struct cifsInodeInfo *cinode = CIFS_I(inode);
3243         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3244         ssize_t rc;
3245
3246         inode_lock(inode);
3247         /*
3248          * We need to hold the sem to be sure nobody modifies lock list
3249          * with a brlock that prevents writing.
3250          */
3251         down_read(&cinode->lock_sem);
3252
3253         rc = generic_write_checks(iocb, from);
3254         if (rc <= 0)
3255                 goto out;
3256
3257         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3258                                      server->vals->exclusive_lock_type, 0,
3259                                      NULL, CIFS_WRITE_OP))
3260                 rc = __generic_file_write_iter(iocb, from);
3261         else
3262                 rc = -EACCES;
3263 out:
3264         up_read(&cinode->lock_sem);
3265         inode_unlock(inode);
3266
3267         if (rc > 0)
3268                 rc = generic_write_sync(iocb, rc);
3269         return rc;
3270 }
3271
3272 ssize_t
3273 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3274 {
3275         struct inode *inode = file_inode(iocb->ki_filp);
3276         struct cifsInodeInfo *cinode = CIFS_I(inode);
3277         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3278         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3279                                                 iocb->ki_filp->private_data;
3280         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3281         ssize_t written;
3282
3283         written = cifs_get_writer(cinode);
3284         if (written)
3285                 return written;
3286
3287         if (CIFS_CACHE_WRITE(cinode)) {
3288                 if (cap_unix(tcon->ses) &&
3289                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3290                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3291                         written = generic_file_write_iter(iocb, from);
3292                         goto out;
3293                 }
3294                 written = cifs_writev(iocb, from);
3295                 goto out;
3296         }
3297         /*
3298          * For non-oplocked files in strict cache mode we need to write the data
3299          * to the server exactly from the pos to pos+len-1 rather than flush all
3300          * affected pages because it may cause a error with mandatory locks on
3301          * these pages but not on the region from pos to ppos+len-1.
3302          */
3303         written = cifs_user_writev(iocb, from);
3304         if (CIFS_CACHE_READ(cinode)) {
3305                 /*
3306                  * We have read level caching and we have just sent a write
3307                  * request to the server thus making data in the cache stale.
3308                  * Zap the cache and set oplock/lease level to NONE to avoid
3309                  * reading stale data from the cache. All subsequent read
3310                  * operations will read new data from the server.
3311                  */
3312                 cifs_zap_mapping(inode);
3313                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3314                          inode);
3315                 cinode->oplock = 0;
3316         }
3317 out:
3318         cifs_put_writer(cinode);
3319         return written;
3320 }
3321
3322 static struct cifs_readdata *
3323 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3324 {
3325         struct cifs_readdata *rdata;
3326
3327         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3328         if (rdata != NULL) {
3329                 rdata->pages = pages;
3330                 kref_init(&rdata->refcount);
3331                 INIT_LIST_HEAD(&rdata->list);
3332                 init_completion(&rdata->done);
3333                 INIT_WORK(&rdata->work, complete);
3334         }
3335
3336         return rdata;
3337 }
3338
3339 static struct cifs_readdata *
3340 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3341 {
3342         struct page **pages =
3343                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3344         struct cifs_readdata *ret = NULL;
3345
3346         if (pages) {
3347                 ret = cifs_readdata_direct_alloc(pages, complete);
3348                 if (!ret)
3349                         kfree(pages);
3350         }
3351
3352         return ret;
3353 }
3354
3355 void
3356 cifs_readdata_release(struct kref *refcount)
3357 {
3358         struct cifs_readdata *rdata = container_of(refcount,
3359                                         struct cifs_readdata, refcount);
3360 #ifdef CONFIG_CIFS_SMB_DIRECT
3361         if (rdata->mr) {
3362                 smbd_deregister_mr(rdata->mr);
3363                 rdata->mr = NULL;
3364         }
3365 #endif
3366         if (rdata->cfile)
3367                 cifsFileInfo_put(rdata->cfile);
3368
3369         kvfree(rdata->pages);
3370         kfree(rdata);
3371 }
3372
3373 static int
3374 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3375 {
3376         int rc = 0;
3377         struct page *page;
3378         unsigned int i;
3379
3380         for (i = 0; i < nr_pages; i++) {
3381                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3382                 if (!page) {
3383                         rc = -ENOMEM;
3384                         break;
3385                 }
3386                 rdata->pages[i] = page;
3387         }
3388
3389         if (rc) {
3390                 unsigned int nr_page_failed = i;
3391
3392                 for (i = 0; i < nr_page_failed; i++) {
3393                         put_page(rdata->pages[i]);
3394                         rdata->pages[i] = NULL;
3395                 }
3396         }
3397         return rc;
3398 }
3399
3400 static void
3401 cifs_uncached_readdata_release(struct kref *refcount)
3402 {
3403         struct cifs_readdata *rdata = container_of(refcount,
3404                                         struct cifs_readdata, refcount);
3405         unsigned int i;
3406
3407         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3408         for (i = 0; i < rdata->nr_pages; i++) {
3409                 put_page(rdata->pages[i]);
3410         }
3411         cifs_readdata_release(refcount);
3412 }
3413
3414 /**
3415  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3416  * @rdata:      the readdata response with list of pages holding data
3417  * @iter:       destination for our data
3418  *
3419  * This function copies data from a list of pages in a readdata response into
3420  * an array of iovecs. It will first calculate where the data should go
3421  * based on the info in the readdata and then copy the data into that spot.
3422  */
3423 static int
3424 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3425 {
3426         size_t remaining = rdata->got_bytes;
3427         unsigned int i;
3428
3429         for (i = 0; i < rdata->nr_pages; i++) {
3430                 struct page *page = rdata->pages[i];
3431                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3432                 size_t written;
3433
3434                 if (unlikely(iov_iter_is_pipe(iter))) {
3435                         void *addr = kmap_atomic(page);
3436
3437                         written = copy_to_iter(addr, copy, iter);
3438                         kunmap_atomic(addr);
3439                 } else
3440                         written = copy_page_to_iter(page, 0, copy, iter);
3441                 remaining -= written;
3442                 if (written < copy && iov_iter_count(iter) > 0)
3443                         break;
3444         }
3445         return remaining ? -EFAULT : 0;
3446 }
3447
3448 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3449
3450 static void
3451 cifs_uncached_readv_complete(struct work_struct *work)
3452 {
3453         struct cifs_readdata *rdata = container_of(work,
3454                                                 struct cifs_readdata, work);
3455
3456         complete(&rdata->done);
3457         collect_uncached_read_data(rdata->ctx);
3458         /* the below call can possibly free the last ref to aio ctx */
3459         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3460 }
3461
3462 static int
3463 uncached_fill_pages(struct TCP_Server_Info *server,
3464                     struct cifs_readdata *rdata, struct iov_iter *iter,
3465                     unsigned int len)
3466 {
3467         int result = 0;
3468         unsigned int i;
3469         unsigned int nr_pages = rdata->nr_pages;
3470         unsigned int page_offset = rdata->page_offset;
3471
3472         rdata->got_bytes = 0;
3473         rdata->tailsz = PAGE_SIZE;
3474         for (i = 0; i < nr_pages; i++) {
3475                 struct page *page = rdata->pages[i];
3476                 size_t n;
3477                 unsigned int segment_size = rdata->pagesz;
3478
3479                 if (i == 0)
3480                         segment_size -= page_offset;
3481                 else
3482                         page_offset = 0;
3483
3484
3485                 if (len <= 0) {
3486                         /* no need to hold page hostage */
3487                         rdata->pages[i] = NULL;
3488                         rdata->nr_pages--;
3489                         put_page(page);
3490                         continue;
3491                 }
3492
3493                 n = len;
3494                 if (len >= segment_size)
3495                         /* enough data to fill the page */
3496                         n = segment_size;
3497                 else
3498                         rdata->tailsz = len;
3499                 len -= n;
3500
3501                 if (iter)
3502                         result = copy_page_from_iter(
3503                                         page, page_offset, n, iter);
3504 #ifdef CONFIG_CIFS_SMB_DIRECT
3505                 else if (rdata->mr)
3506                         result = n;
3507 #endif
3508                 else
3509                         result = cifs_read_page_from_socket(
3510                                         server, page, page_offset, n);
3511                 if (result < 0)
3512                         break;
3513
3514                 rdata->got_bytes += result;
3515         }
3516
3517         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3518                                                 rdata->got_bytes : result;
3519 }
3520
3521 static int
3522 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3523                               struct cifs_readdata *rdata, unsigned int len)
3524 {
3525         return uncached_fill_pages(server, rdata, NULL, len);
3526 }
3527
3528 static int
3529 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3530                               struct cifs_readdata *rdata,
3531                               struct iov_iter *iter)
3532 {
3533         return uncached_fill_pages(server, rdata, iter, iter->count);
3534 }
3535
3536 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3537                         struct list_head *rdata_list,
3538                         struct cifs_aio_ctx *ctx)
3539 {
3540         unsigned int rsize;
3541         struct cifs_credits credits;
3542         int rc;
3543         struct TCP_Server_Info *server =
3544                 tlink_tcon(rdata->cfile->tlink)->ses->server;
3545
3546         do {
3547                 if (rdata->cfile->invalidHandle) {
3548                         rc = cifs_reopen_file(rdata->cfile, true);
3549                         if (rc == -EAGAIN)
3550                                 continue;
3551                         else if (rc)
3552                                 break;
3553                 }
3554
3555                 /*
3556                  * Wait for credits to resend this rdata.
3557                  * Note: we are attempting to resend the whole rdata not in
3558                  * segments
3559                  */
3560                 do {
3561                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3562                                                 &rsize, &credits);
3563
3564                         if (rc)
3565                                 goto fail;
3566
3567                         if (rsize < rdata->bytes) {
3568                                 add_credits_and_wake_if(server, &credits, 0);
3569                                 msleep(1000);
3570                         }
3571                 } while (rsize < rdata->bytes);
3572                 rdata->credits = credits;
3573
3574                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3575                 if (!rc) {
3576                         if (rdata->cfile->invalidHandle)
3577                                 rc = -EAGAIN;
3578                         else {
3579 #ifdef CONFIG_CIFS_SMB_DIRECT
3580                                 if (rdata->mr) {
3581                                         rdata->mr->need_invalidate = true;
3582                                         smbd_deregister_mr(rdata->mr);
3583                                         rdata->mr = NULL;
3584                                 }
3585 #endif
3586                                 rc = server->ops->async_readv(rdata);
3587                         }
3588                 }
3589
3590                 /* If the read was successfully sent, we are done */
3591                 if (!rc) {
3592                         /* Add to aio pending list */
3593                         list_add_tail(&rdata->list, rdata_list);
3594                         return 0;
3595                 }
3596
3597                 /* Roll back credits and retry if needed */
3598                 add_credits_and_wake_if(server, &rdata->credits, 0);
3599         } while (rc == -EAGAIN);
3600
3601 fail:
3602         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3603         return rc;
3604 }
3605
3606 static int
3607 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3608                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3609                      struct cifs_aio_ctx *ctx)
3610 {
3611         struct cifs_readdata *rdata;
3612         unsigned int npages, rsize;
3613         struct cifs_credits credits_on_stack;
3614         struct cifs_credits *credits = &credits_on_stack;
3615         size_t cur_len;
3616         int rc;
3617         pid_t pid;
3618         struct TCP_Server_Info *server;
3619         struct page **pagevec;
3620         size_t start;
3621         struct iov_iter direct_iov = ctx->iter;
3622
3623         server = tlink_tcon(open_file->tlink)->ses->server;
3624
3625         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3626                 pid = open_file->pid;
3627         else
3628                 pid = current->tgid;
3629
3630         if (ctx->direct_io)
3631                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3632
3633         do {
3634                 if (open_file->invalidHandle) {
3635                         rc = cifs_reopen_file(open_file, true);
3636                         if (rc == -EAGAIN)
3637                                 continue;
3638                         else if (rc)
3639                                 break;
3640                 }
3641
3642                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3643                                                    &rsize, credits);
3644                 if (rc)
3645                         break;
3646
3647                 cur_len = min_t(const size_t, len, rsize);
3648
3649                 if (ctx->direct_io) {
3650                         ssize_t result;
3651
3652                         result = iov_iter_get_pages_alloc(
3653                                         &direct_iov, &pagevec,
3654                                         cur_len, &start);
3655                         if (result < 0) {
3656                                 cifs_dbg(VFS,
3657                                         "couldn't get user pages (rc=%zd)"
3658                                         " iter type %d"
3659                                         " iov_offset %zd count %zd\n",
3660                                         result, iov_iter_type(&direct_iov),
3661                                         direct_iov.iov_offset,
3662                                         direct_iov.count);
3663                                 dump_stack();
3664
3665                                 rc = result;
3666                                 add_credits_and_wake_if(server, credits, 0);
3667                                 break;
3668                         }
3669                         cur_len = (size_t)result;
3670                         iov_iter_advance(&direct_iov, cur_len);
3671
3672                         rdata = cifs_readdata_direct_alloc(
3673                                         pagevec, cifs_uncached_readv_complete);
3674                         if (!rdata) {
3675                                 add_credits_and_wake_if(server, credits, 0);
3676                                 rc = -ENOMEM;
3677                                 break;
3678                         }
3679
3680                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3681                         rdata->page_offset = start;
3682                         rdata->tailsz = npages > 1 ?
3683                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3684                                 cur_len;
3685
3686                 } else {
3687
3688                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3689                         /* allocate a readdata struct */
3690                         rdata = cifs_readdata_alloc(npages,
3691                                             cifs_uncached_readv_complete);
3692                         if (!rdata) {
3693                                 add_credits_and_wake_if(server, credits, 0);
3694                                 rc = -ENOMEM;
3695                                 break;
3696                         }
3697
3698                         rc = cifs_read_allocate_pages(rdata, npages);
3699                         if (rc) {
3700                                 kvfree(rdata->pages);
3701                                 kfree(rdata);
3702                                 add_credits_and_wake_if(server, credits, 0);
3703                                 break;
3704                         }
3705
3706                         rdata->tailsz = PAGE_SIZE;
3707                 }
3708
3709                 rdata->cfile = cifsFileInfo_get(open_file);
3710                 rdata->nr_pages = npages;
3711                 rdata->offset = offset;
3712                 rdata->bytes = cur_len;
3713                 rdata->pid = pid;
3714                 rdata->pagesz = PAGE_SIZE;
3715                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3716                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3717                 rdata->credits = credits_on_stack;
3718                 rdata->ctx = ctx;
3719                 kref_get(&ctx->refcount);
3720
3721                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3722
3723                 if (!rc) {
3724                         if (rdata->cfile->invalidHandle)
3725                                 rc = -EAGAIN;
3726                         else
3727                                 rc = server->ops->async_readv(rdata);
3728                 }
3729
3730                 if (rc) {
3731                         add_credits_and_wake_if(server, &rdata->credits, 0);
3732                         kref_put(&rdata->refcount,
3733                                 cifs_uncached_readdata_release);
3734                         if (rc == -EAGAIN) {
3735                                 iov_iter_revert(&direct_iov, cur_len);
3736                                 continue;
3737                         }
3738                         break;
3739                 }
3740
3741                 list_add_tail(&rdata->list, rdata_list);
3742                 offset += cur_len;
3743                 len -= cur_len;
3744         } while (len > 0);
3745
3746         return rc;
3747 }
3748
3749 static void
3750 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3751 {
3752         struct cifs_readdata *rdata, *tmp;
3753         struct iov_iter *to = &ctx->iter;
3754         struct cifs_sb_info *cifs_sb;
3755         int rc;
3756
3757         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3758
3759         mutex_lock(&ctx->aio_mutex);
3760
3761         if (list_empty(&ctx->list)) {
3762                 mutex_unlock(&ctx->aio_mutex);
3763                 return;
3764         }
3765
3766         rc = ctx->rc;
3767         /* the loop below should proceed in the order of increasing offsets */
3768 again:
3769         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3770                 if (!rc) {
3771                         if (!try_wait_for_completion(&rdata->done)) {
3772                                 mutex_unlock(&ctx->aio_mutex);
3773                                 return;
3774                         }
3775
3776                         if (rdata->result == -EAGAIN) {
3777                                 /* resend call if it's a retryable error */
3778                                 struct list_head tmp_list;
3779                                 unsigned int got_bytes = rdata->got_bytes;
3780
3781                                 list_del_init(&rdata->list);
3782                                 INIT_LIST_HEAD(&tmp_list);
3783
3784                                 /*
3785                                  * Got a part of data and then reconnect has
3786                                  * happened -- fill the buffer and continue
3787                                  * reading.
3788                                  */
3789                                 if (got_bytes && got_bytes < rdata->bytes) {
3790                                         rc = 0;
3791                                         if (!ctx->direct_io)
3792                                                 rc = cifs_readdata_to_iov(rdata, to);
3793                                         if (rc) {
3794                                                 kref_put(&rdata->refcount,
3795                                                         cifs_uncached_readdata_release);
3796                                                 continue;
3797                                         }
3798                                 }
3799
3800                                 if (ctx->direct_io) {
3801                                         /*
3802                                          * Re-use rdata as this is a
3803                                          * direct I/O
3804                                          */
3805                                         rc = cifs_resend_rdata(
3806                                                 rdata,
3807                                                 &tmp_list, ctx);
3808                                 } else {
3809                                         rc = cifs_send_async_read(
3810                                                 rdata->offset + got_bytes,
3811                                                 rdata->bytes - got_bytes,
3812                                                 rdata->cfile, cifs_sb,
3813                                                 &tmp_list, ctx);
3814
3815                                         kref_put(&rdata->refcount,
3816                                                 cifs_uncached_readdata_release);
3817                                 }
3818
3819                                 list_splice(&tmp_list, &ctx->list);
3820
3821                                 goto again;
3822                         } else if (rdata->result)
3823                                 rc = rdata->result;
3824                         else if (!ctx->direct_io)
3825                                 rc = cifs_readdata_to_iov(rdata, to);
3826
3827                         /* if there was a short read -- discard anything left */
3828                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3829                                 rc = -ENODATA;
3830
3831                         ctx->total_len += rdata->got_bytes;
3832                 }
3833                 list_del_init(&rdata->list);
3834                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3835         }
3836
3837         if (!ctx->direct_io)
3838                 ctx->total_len = ctx->len - iov_iter_count(to);
3839
3840         /* mask nodata case */
3841         if (rc == -ENODATA)
3842                 rc = 0;
3843
3844         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3845
3846         mutex_unlock(&ctx->aio_mutex);
3847
3848         if (ctx->iocb && ctx->iocb->ki_complete)
3849                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3850         else
3851                 complete(&ctx->done);
3852 }
3853
3854 static ssize_t __cifs_readv(
3855         struct kiocb *iocb, struct iov_iter *to, bool direct)
3856 {
3857         size_t len;
3858         struct file *file = iocb->ki_filp;
3859         struct cifs_sb_info *cifs_sb;
3860         struct cifsFileInfo *cfile;
3861         struct cifs_tcon *tcon;
3862         ssize_t rc, total_read = 0;
3863         loff_t offset = iocb->ki_pos;
3864         struct cifs_aio_ctx *ctx;
3865
3866         /*
3867          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3868          * fall back to data copy read path
3869          * this could be improved by getting pages directly in ITER_KVEC
3870          */
3871         if (direct && iov_iter_is_kvec(to)) {
3872                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3873                 direct = false;
3874         }
3875
3876         len = iov_iter_count(to);
3877         if (!len)
3878                 return 0;
3879
3880         cifs_sb = CIFS_FILE_SB(file);
3881         cfile = file->private_data;
3882         tcon = tlink_tcon(cfile->tlink);
3883
3884         if (!tcon->ses->server->ops->async_readv)
3885                 return -ENOSYS;
3886
3887         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3888                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3889
3890         ctx = cifs_aio_ctx_alloc();
3891         if (!ctx)
3892                 return -ENOMEM;
3893
3894         ctx->cfile = cifsFileInfo_get(cfile);
3895
3896         if (!is_sync_kiocb(iocb))
3897                 ctx->iocb = iocb;
3898
3899         if (iter_is_iovec(to))
3900                 ctx->should_dirty = true;
3901
3902         if (direct) {
3903                 ctx->pos = offset;
3904                 ctx->direct_io = true;
3905                 ctx->iter = *to;
3906                 ctx->len = len;
3907         } else {
3908                 rc = setup_aio_ctx_iter(ctx, to, READ);
3909                 if (rc) {
3910                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3911                         return rc;
3912                 }
3913                 len = ctx->len;
3914         }
3915
3916         /* grab a lock here due to read response handlers can access ctx */
3917         mutex_lock(&ctx->aio_mutex);
3918
3919         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3920
3921         /* if at least one read request send succeeded, then reset rc */
3922         if (!list_empty(&ctx->list))
3923                 rc = 0;
3924
3925         mutex_unlock(&ctx->aio_mutex);
3926
3927         if (rc) {
3928                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3929                 return rc;
3930         }
3931
3932         if (!is_sync_kiocb(iocb)) {
3933                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3934                 return -EIOCBQUEUED;
3935         }
3936
3937         rc = wait_for_completion_killable(&ctx->done);
3938         if (rc) {
3939                 mutex_lock(&ctx->aio_mutex);
3940                 ctx->rc = rc = -EINTR;
3941                 total_read = ctx->total_len;
3942                 mutex_unlock(&ctx->aio_mutex);
3943         } else {
3944                 rc = ctx->rc;
3945                 total_read = ctx->total_len;
3946         }
3947
3948         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3949
3950         if (total_read) {
3951                 iocb->ki_pos += total_read;
3952                 return total_read;
3953         }
3954         return rc;
3955 }
3956
3957 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3958 {
3959         return __cifs_readv(iocb, to, true);
3960 }
3961
3962 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3963 {
3964         return __cifs_readv(iocb, to, false);
3965 }
3966
3967 ssize_t
3968 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3969 {
3970         struct inode *inode = file_inode(iocb->ki_filp);
3971         struct cifsInodeInfo *cinode = CIFS_I(inode);
3972         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3973         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3974                                                 iocb->ki_filp->private_data;
3975         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3976         int rc = -EACCES;
3977
3978         /*
3979          * In strict cache mode we need to read from the server all the time
3980          * if we don't have level II oplock because the server can delay mtime
3981          * change - so we can't make a decision about inode invalidating.
3982          * And we can also fail with pagereading if there are mandatory locks
3983          * on pages affected by this read but not on the region from pos to
3984          * pos+len-1.
3985          */
3986         if (!CIFS_CACHE_READ(cinode))
3987                 return cifs_user_readv(iocb, to);
3988
3989         if (cap_unix(tcon->ses) &&
3990             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3991             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3992                 return generic_file_read_iter(iocb, to);
3993
3994         /*
3995          * We need to hold the sem to be sure nobody modifies lock list
3996          * with a brlock that prevents reading.
3997          */
3998         down_read(&cinode->lock_sem);
3999         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4000                                      tcon->ses->server->vals->shared_lock_type,
4001                                      0, NULL, CIFS_READ_OP))
4002                 rc = generic_file_read_iter(iocb, to);
4003         up_read(&cinode->lock_sem);
4004         return rc;
4005 }
4006
4007 static ssize_t
4008 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4009 {
4010         int rc = -EACCES;
4011         unsigned int bytes_read = 0;
4012         unsigned int total_read;
4013         unsigned int current_read_size;
4014         unsigned int rsize;
4015         struct cifs_sb_info *cifs_sb;
4016         struct cifs_tcon *tcon;
4017         struct TCP_Server_Info *server;
4018         unsigned int xid;
4019         char *cur_offset;
4020         struct cifsFileInfo *open_file;
4021         struct cifs_io_parms io_parms;
4022         int buf_type = CIFS_NO_BUFFER;
4023         __u32 pid;
4024
4025         xid = get_xid();
4026         cifs_sb = CIFS_FILE_SB(file);
4027
4028         /* FIXME: set up handlers for larger reads and/or convert to async */
4029         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
4030
4031         if (file->private_data == NULL) {
4032                 rc = -EBADF;
4033                 free_xid(xid);
4034                 return rc;
4035         }
4036         open_file = file->private_data;
4037         tcon = tlink_tcon(open_file->tlink);
4038         server = tcon->ses->server;
4039
4040         if (!server->ops->sync_read) {
4041                 free_xid(xid);
4042                 return -ENOSYS;
4043         }
4044
4045         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4046                 pid = open_file->pid;
4047         else
4048                 pid = current->tgid;
4049
4050         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4051                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4052
4053         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4054              total_read += bytes_read, cur_offset += bytes_read) {
4055                 do {
4056                         current_read_size = min_t(uint, read_size - total_read,
4057                                                   rsize);
4058                         /*
4059                          * For windows me and 9x we do not want to request more
4060                          * than it negotiated since it will refuse the read
4061                          * then.
4062                          */
4063                         if ((tcon->ses) && !(tcon->ses->capabilities &
4064                                 tcon->ses->server->vals->cap_large_files)) {
4065                                 current_read_size = min_t(uint,
4066                                         current_read_size, CIFSMaxBufSize);
4067                         }
4068                         if (open_file->invalidHandle) {
4069                                 rc = cifs_reopen_file(open_file, true);
4070                                 if (rc != 0)
4071                                         break;
4072                         }
4073                         io_parms.pid = pid;
4074                         io_parms.tcon = tcon;
4075                         io_parms.offset = *offset;
4076                         io_parms.length = current_read_size;
4077                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4078                                                     &bytes_read, &cur_offset,
4079                                                     &buf_type);
4080                 } while (rc == -EAGAIN);
4081
4082                 if (rc || (bytes_read == 0)) {
4083                         if (total_read) {
4084                                 break;
4085                         } else {
4086                                 free_xid(xid);
4087                                 return rc;
4088                         }
4089                 } else {
4090                         cifs_stats_bytes_read(tcon, total_read);
4091                         *offset += bytes_read;
4092                 }
4093         }
4094         free_xid(xid);
4095         return total_read;
4096 }
4097
4098 /*
4099  * If the page is mmap'ed into a process' page tables, then we need to make
4100  * sure that it doesn't change while being written back.
4101  */
4102 static vm_fault_t
4103 cifs_page_mkwrite(struct vm_fault *vmf)
4104 {
4105         struct page *page = vmf->page;
4106
4107         lock_page(page);
4108         return VM_FAULT_LOCKED;
4109 }
4110
4111 static const struct vm_operations_struct cifs_file_vm_ops = {
4112         .fault = filemap_fault,
4113         .map_pages = filemap_map_pages,
4114         .page_mkwrite = cifs_page_mkwrite,
4115 };
4116
4117 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4118 {
4119         int xid, rc = 0;
4120         struct inode *inode = file_inode(file);
4121
4122         xid = get_xid();
4123
4124         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4125                 rc = cifs_zap_mapping(inode);
4126         if (!rc)
4127                 rc = generic_file_mmap(file, vma);
4128         if (!rc)
4129                 vma->vm_ops = &cifs_file_vm_ops;
4130
4131         free_xid(xid);
4132         return rc;
4133 }
4134
4135 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4136 {
4137         int rc, xid;
4138
4139         xid = get_xid();
4140
4141         rc = cifs_revalidate_file(file);
4142         if (rc)
4143                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4144                          rc);
4145         if (!rc)
4146                 rc = generic_file_mmap(file, vma);
4147         if (!rc)
4148                 vma->vm_ops = &cifs_file_vm_ops;
4149
4150         free_xid(xid);
4151         return rc;
4152 }
4153
4154 static void
4155 cifs_readv_complete(struct work_struct *work)
4156 {
4157         unsigned int i, got_bytes;
4158         struct cifs_readdata *rdata = container_of(work,
4159                                                 struct cifs_readdata, work);
4160
4161         got_bytes = rdata->got_bytes;
4162         for (i = 0; i < rdata->nr_pages; i++) {
4163                 struct page *page = rdata->pages[i];
4164
4165                 lru_cache_add_file(page);
4166
4167                 if (rdata->result == 0 ||
4168                     (rdata->result == -EAGAIN && got_bytes)) {
4169                         flush_dcache_page(page);
4170                         SetPageUptodate(page);
4171                 }
4172
4173                 unlock_page(page);
4174
4175                 if (rdata->result == 0 ||
4176                     (rdata->result == -EAGAIN && got_bytes))
4177                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4178
4179                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4180
4181                 put_page(page);
4182                 rdata->pages[i] = NULL;
4183         }
4184         kref_put(&rdata->refcount, cifs_readdata_release);
4185 }
4186
4187 static int
4188 readpages_fill_pages(struct TCP_Server_Info *server,
4189                      struct cifs_readdata *rdata, struct iov_iter *iter,
4190                      unsigned int len)
4191 {
4192         int result = 0;
4193         unsigned int i;
4194         u64 eof;
4195         pgoff_t eof_index;
4196         unsigned int nr_pages = rdata->nr_pages;
4197         unsigned int page_offset = rdata->page_offset;
4198
4199         /* determine the eof that the server (probably) has */
4200         eof = CIFS_I(rdata->mapping->host)->server_eof;
4201         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4202         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4203
4204         rdata->got_bytes = 0;
4205         rdata->tailsz = PAGE_SIZE;
4206         for (i = 0; i < nr_pages; i++) {
4207                 struct page *page = rdata->pages[i];
4208                 unsigned int to_read = rdata->pagesz;
4209                 size_t n;
4210
4211                 if (i == 0)
4212                         to_read -= page_offset;
4213                 else
4214                         page_offset = 0;
4215
4216                 n = to_read;
4217
4218                 if (len >= to_read) {
4219                         len -= to_read;
4220                 } else if (len > 0) {
4221                         /* enough for partial page, fill and zero the rest */
4222                         zero_user(page, len + page_offset, to_read - len);
4223                         n = rdata->tailsz = len;
4224                         len = 0;
4225                 } else if (page->index > eof_index) {
4226                         /*
4227                          * The VFS will not try to do readahead past the
4228                          * i_size, but it's possible that we have outstanding
4229                          * writes with gaps in the middle and the i_size hasn't
4230                          * caught up yet. Populate those with zeroed out pages
4231                          * to prevent the VFS from repeatedly attempting to
4232                          * fill them until the writes are flushed.
4233                          */
4234                         zero_user(page, 0, PAGE_SIZE);
4235                         lru_cache_add_file(page);
4236                         flush_dcache_page(page);
4237                         SetPageUptodate(page);
4238                         unlock_page(page);
4239                         put_page(page);
4240                         rdata->pages[i] = NULL;
4241                         rdata->nr_pages--;
4242                         continue;
4243                 } else {
4244                         /* no need to hold page hostage */
4245                         lru_cache_add_file(page);
4246                         unlock_page(page);
4247                         put_page(page);
4248                         rdata->pages[i] = NULL;
4249                         rdata->nr_pages--;
4250                         continue;
4251                 }
4252
4253                 if (iter)
4254                         result = copy_page_from_iter(
4255                                         page, page_offset, n, iter);
4256 #ifdef CONFIG_CIFS_SMB_DIRECT
4257                 else if (rdata->mr)
4258                         result = n;
4259 #endif
4260                 else
4261                         result = cifs_read_page_from_socket(
4262                                         server, page, page_offset, n);
4263                 if (result < 0)
4264                         break;
4265
4266                 rdata->got_bytes += result;
4267         }
4268
4269         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4270                                                 rdata->got_bytes : result;
4271 }
4272
4273 static int
4274 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4275                                struct cifs_readdata *rdata, unsigned int len)
4276 {
4277         return readpages_fill_pages(server, rdata, NULL, len);
4278 }
4279
4280 static int
4281 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4282                                struct cifs_readdata *rdata,
4283                                struct iov_iter *iter)
4284 {
4285         return readpages_fill_pages(server, rdata, iter, iter->count);
4286 }
4287
4288 static int
4289 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4290                     unsigned int rsize, struct list_head *tmplist,
4291                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4292 {
4293         struct page *page, *tpage;
4294         unsigned int expected_index;
4295         int rc;
4296         gfp_t gfp = readahead_gfp_mask(mapping);
4297
4298         INIT_LIST_HEAD(tmplist);
4299
4300         page = lru_to_page(page_list);
4301
4302         /*
4303          * Lock the page and put it in the cache. Since no one else
4304          * should have access to this page, we're safe to simply set
4305          * PG_locked without checking it first.
4306          */
4307         __SetPageLocked(page);
4308         rc = add_to_page_cache_locked(page, mapping,
4309                                       page->index, gfp);
4310
4311         /* give up if we can't stick it in the cache */
4312         if (rc) {
4313                 __ClearPageLocked(page);
4314                 return rc;
4315         }
4316
4317         /* move first page to the tmplist */
4318         *offset = (loff_t)page->index << PAGE_SHIFT;
4319         *bytes = PAGE_SIZE;
4320         *nr_pages = 1;
4321         list_move_tail(&page->lru, tmplist);
4322
4323         /* now try and add more pages onto the request */
4324         expected_index = page->index + 1;
4325         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4326                 /* discontinuity ? */
4327                 if (page->index != expected_index)
4328                         break;
4329
4330                 /* would this page push the read over the rsize? */
4331                 if (*bytes + PAGE_SIZE > rsize)
4332                         break;
4333
4334                 __SetPageLocked(page);
4335                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4336                         __ClearPageLocked(page);
4337                         break;
4338                 }
4339                 list_move_tail(&page->lru, tmplist);
4340                 (*bytes) += PAGE_SIZE;
4341                 expected_index++;
4342                 (*nr_pages)++;
4343         }
4344         return rc;
4345 }
4346
4347 static int cifs_readpages(struct file *file, struct address_space *mapping,
4348         struct list_head *page_list, unsigned num_pages)
4349 {
4350         int rc;
4351         struct list_head tmplist;
4352         struct cifsFileInfo *open_file = file->private_data;
4353         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4354         struct TCP_Server_Info *server;
4355         pid_t pid;
4356         unsigned int xid;
4357
4358         xid = get_xid();
4359         /*
4360          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4361          * immediately if the cookie is negative
4362          *
4363          * After this point, every page in the list might have PG_fscache set,
4364          * so we will need to clean that up off of every page we don't use.
4365          */
4366         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4367                                          &num_pages);
4368         if (rc == 0) {
4369                 free_xid(xid);
4370                 return rc;
4371         }
4372
4373         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4374                 pid = open_file->pid;
4375         else
4376                 pid = current->tgid;
4377
4378         rc = 0;
4379         server = tlink_tcon(open_file->tlink)->ses->server;
4380
4381         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4382                  __func__, file, mapping, num_pages);
4383
4384         /*
4385          * Start with the page at end of list and move it to private
4386          * list. Do the same with any following pages until we hit
4387          * the rsize limit, hit an index discontinuity, or run out of
4388          * pages. Issue the async read and then start the loop again
4389          * until the list is empty.
4390          *
4391          * Note that list order is important. The page_list is in
4392          * the order of declining indexes. When we put the pages in
4393          * the rdata->pages, then we want them in increasing order.
4394          */
4395         while (!list_empty(page_list)) {
4396                 unsigned int i, nr_pages, bytes, rsize;
4397                 loff_t offset;
4398                 struct page *page, *tpage;
4399                 struct cifs_readdata *rdata;
4400                 struct cifs_credits credits_on_stack;
4401                 struct cifs_credits *credits = &credits_on_stack;
4402
4403                 if (open_file->invalidHandle) {
4404                         rc = cifs_reopen_file(open_file, true);
4405                         if (rc == -EAGAIN)
4406                                 continue;
4407                         else if (rc)
4408                                 break;
4409                 }
4410
4411                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4412                                                    &rsize, credits);
4413                 if (rc)
4414                         break;
4415
4416                 /*
4417                  * Give up immediately if rsize is too small to read an entire
4418                  * page. The VFS will fall back to readpage. We should never
4419                  * reach this point however since we set ra_pages to 0 when the
4420                  * rsize is smaller than a cache page.
4421                  */
4422                 if (unlikely(rsize < PAGE_SIZE)) {
4423                         add_credits_and_wake_if(server, credits, 0);
4424                         free_xid(xid);
4425                         return 0;
4426                 }
4427
4428                 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4429                                          &nr_pages, &offset, &bytes);
4430                 if (rc) {
4431                         add_credits_and_wake_if(server, credits, 0);
4432                         break;
4433                 }
4434
4435                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4436                 if (!rdata) {
4437                         /* best to give up if we're out of mem */
4438                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4439                                 list_del(&page->lru);
4440                                 lru_cache_add_file(page);
4441                                 unlock_page(page);
4442                                 put_page(page);
4443                         }
4444                         rc = -ENOMEM;
4445                         add_credits_and_wake_if(server, credits, 0);
4446                         break;
4447                 }
4448
4449                 rdata->cfile = cifsFileInfo_get(open_file);
4450                 rdata->mapping = mapping;
4451                 rdata->offset = offset;
4452                 rdata->bytes = bytes;
4453                 rdata->pid = pid;
4454                 rdata->pagesz = PAGE_SIZE;
4455                 rdata->tailsz = PAGE_SIZE;
4456                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4457                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4458                 rdata->credits = credits_on_stack;
4459
4460                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4461                         list_del(&page->lru);
4462                         rdata->pages[rdata->nr_pages++] = page;
4463                 }
4464
4465                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4466
4467                 if (!rc) {
4468                         if (rdata->cfile->invalidHandle)
4469                                 rc = -EAGAIN;
4470                         else
4471                                 rc = server->ops->async_readv(rdata);
4472                 }
4473
4474                 if (rc) {
4475                         add_credits_and_wake_if(server, &rdata->credits, 0);
4476                         for (i = 0; i < rdata->nr_pages; i++) {
4477                                 page = rdata->pages[i];
4478                                 lru_cache_add_file(page);
4479                                 unlock_page(page);
4480                                 put_page(page);
4481                         }
4482                         /* Fallback to the readpage in error/reconnect cases */
4483                         kref_put(&rdata->refcount, cifs_readdata_release);
4484                         break;
4485                 }
4486
4487                 kref_put(&rdata->refcount, cifs_readdata_release);
4488         }
4489
4490         /* Any pages that have been shown to fscache but didn't get added to
4491          * the pagecache must be uncached before they get returned to the
4492          * allocator.
4493          */
4494         cifs_fscache_readpages_cancel(mapping->host, page_list);
4495         free_xid(xid);
4496         return rc;
4497 }
4498
4499 /*
4500  * cifs_readpage_worker must be called with the page pinned
4501  */
4502 static int cifs_readpage_worker(struct file *file, struct page *page,
4503         loff_t *poffset)
4504 {
4505         char *read_data;
4506         int rc;
4507
4508         /* Is the page cached? */
4509         rc = cifs_readpage_from_fscache(file_inode(file), page);
4510         if (rc == 0)
4511                 goto read_complete;
4512
4513         read_data = kmap(page);
4514         /* for reads over a certain size could initiate async read ahead */
4515
4516         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4517
4518         if (rc < 0)
4519                 goto io_error;
4520         else
4521                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4522
4523         /* we do not want atime to be less than mtime, it broke some apps */
4524         file_inode(file)->i_atime = current_time(file_inode(file));
4525         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4526                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4527         else
4528                 file_inode(file)->i_atime = current_time(file_inode(file));
4529
4530         if (PAGE_SIZE > rc)
4531                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4532
4533         flush_dcache_page(page);
4534         SetPageUptodate(page);
4535
4536         /* send this page to the cache */
4537         cifs_readpage_to_fscache(file_inode(file), page);
4538
4539         rc = 0;
4540
4541 io_error:
4542         kunmap(page);
4543         unlock_page(page);
4544
4545 read_complete:
4546         return rc;
4547 }
4548
4549 static int cifs_readpage(struct file *file, struct page *page)
4550 {
4551         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4552         int rc = -EACCES;
4553         unsigned int xid;
4554
4555         xid = get_xid();
4556
4557         if (file->private_data == NULL) {
4558                 rc = -EBADF;
4559                 free_xid(xid);
4560                 return rc;
4561         }
4562
4563         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4564                  page, (int)offset, (int)offset);
4565
4566         rc = cifs_readpage_worker(file, page, &offset);
4567
4568         free_xid(xid);
4569         return rc;
4570 }
4571
4572 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4573 {
4574         struct cifsFileInfo *open_file;
4575
4576         spin_lock(&cifs_inode->open_file_lock);
4577         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4578                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4579                         spin_unlock(&cifs_inode->open_file_lock);
4580                         return 1;
4581                 }
4582         }
4583         spin_unlock(&cifs_inode->open_file_lock);
4584         return 0;
4585 }
4586
4587 /* We do not want to update the file size from server for inodes
4588    open for write - to avoid races with writepage extending
4589    the file - in the future we could consider allowing
4590    refreshing the inode only on increases in the file size
4591    but this is tricky to do without racing with writebehind
4592    page caching in the current Linux kernel design */
4593 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4594 {
4595         if (!cifsInode)
4596                 return true;
4597
4598         if (is_inode_writable(cifsInode)) {
4599                 /* This inode is open for write at least once */
4600                 struct cifs_sb_info *cifs_sb;
4601
4602                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4603                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4604                         /* since no page cache to corrupt on directio
4605                         we can change size safely */
4606                         return true;
4607                 }
4608
4609                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4610                         return true;
4611
4612                 return false;
4613         } else
4614                 return true;
4615 }
4616
4617 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4618                         loff_t pos, unsigned len, unsigned flags,
4619                         struct page **pagep, void **fsdata)
4620 {
4621         int oncethru = 0;
4622         pgoff_t index = pos >> PAGE_SHIFT;
4623         loff_t offset = pos & (PAGE_SIZE - 1);
4624         loff_t page_start = pos & PAGE_MASK;
4625         loff_t i_size;
4626         struct page *page;
4627         int rc = 0;
4628
4629         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4630
4631 start:
4632         page = grab_cache_page_write_begin(mapping, index, flags);
4633         if (!page) {
4634                 rc = -ENOMEM;
4635                 goto out;
4636         }
4637
4638         if (PageUptodate(page))
4639                 goto out;
4640
4641         /*
4642          * If we write a full page it will be up to date, no need to read from
4643          * the server. If the write is short, we'll end up doing a sync write
4644          * instead.
4645          */
4646         if (len == PAGE_SIZE)
4647                 goto out;
4648
4649         /*
4650          * optimize away the read when we have an oplock, and we're not
4651          * expecting to use any of the data we'd be reading in. That
4652          * is, when the page lies beyond the EOF, or straddles the EOF
4653          * and the write will cover all of the existing data.
4654          */
4655         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4656                 i_size = i_size_read(mapping->host);
4657                 if (page_start >= i_size ||
4658                     (offset == 0 && (pos + len) >= i_size)) {
4659                         zero_user_segments(page, 0, offset,
4660                                            offset + len,
4661                                            PAGE_SIZE);
4662                         /*
4663                          * PageChecked means that the parts of the page
4664                          * to which we're not writing are considered up
4665                          * to date. Once the data is copied to the
4666                          * page, it can be set uptodate.
4667                          */
4668                         SetPageChecked(page);
4669                         goto out;
4670                 }
4671         }
4672
4673         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4674                 /*
4675                  * might as well read a page, it is fast enough. If we get
4676                  * an error, we don't need to return it. cifs_write_end will
4677                  * do a sync write instead since PG_uptodate isn't set.
4678                  */
4679                 cifs_readpage_worker(file, page, &page_start);
4680                 put_page(page);
4681                 oncethru = 1;
4682                 goto start;
4683         } else {
4684                 /* we could try using another file handle if there is one -
4685                    but how would we lock it to prevent close of that handle
4686                    racing with this read? In any case
4687                    this will be written out by write_end so is fine */
4688         }
4689 out:
4690         *pagep = page;
4691         return rc;
4692 }
4693
4694 static int cifs_release_page(struct page *page, gfp_t gfp)
4695 {
4696         if (PagePrivate(page))
4697                 return 0;
4698
4699         return cifs_fscache_release_page(page, gfp);
4700 }
4701
4702 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4703                                  unsigned int length)
4704 {
4705         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4706
4707         if (offset == 0 && length == PAGE_SIZE)
4708                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4709 }
4710
4711 static int cifs_launder_page(struct page *page)
4712 {
4713         int rc = 0;
4714         loff_t range_start = page_offset(page);
4715         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4716         struct writeback_control wbc = {
4717                 .sync_mode = WB_SYNC_ALL,
4718                 .nr_to_write = 0,
4719                 .range_start = range_start,
4720                 .range_end = range_end,
4721         };
4722
4723         cifs_dbg(FYI, "Launder page: %p\n", page);
4724
4725         if (clear_page_dirty_for_io(page))
4726                 rc = cifs_writepage_locked(page, &wbc);
4727
4728         cifs_fscache_invalidate_page(page, page->mapping->host);
4729         return rc;
4730 }
4731
4732 void cifs_oplock_break(struct work_struct *work)
4733 {
4734         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4735                                                   oplock_break);
4736         struct inode *inode = d_inode(cfile->dentry);
4737         struct cifsInodeInfo *cinode = CIFS_I(inode);
4738         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4739         struct TCP_Server_Info *server = tcon->ses->server;
4740         int rc = 0;
4741         bool purge_cache = false;
4742
4743         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4744                         TASK_UNINTERRUPTIBLE);
4745
4746         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4747                                       cfile->oplock_epoch, &purge_cache);
4748
4749         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4750                                                 cifs_has_mand_locks(cinode)) {
4751                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4752                          inode);
4753                 cinode->oplock = 0;
4754         }
4755
4756         if (inode && S_ISREG(inode->i_mode)) {
4757                 if (CIFS_CACHE_READ(cinode))
4758                         break_lease(inode, O_RDONLY);
4759                 else
4760                         break_lease(inode, O_WRONLY);
4761                 rc = filemap_fdatawrite(inode->i_mapping);
4762                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4763                         rc = filemap_fdatawait(inode->i_mapping);
4764                         mapping_set_error(inode->i_mapping, rc);
4765                         cifs_zap_mapping(inode);
4766                 }
4767                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4768                 if (CIFS_CACHE_WRITE(cinode))
4769                         goto oplock_break_ack;
4770         }
4771
4772         rc = cifs_push_locks(cfile);
4773         if (rc)
4774                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4775
4776 oplock_break_ack:
4777         /*
4778          * releasing stale oplock after recent reconnect of smb session using
4779          * a now incorrect file handle is not a data integrity issue but do
4780          * not bother sending an oplock release if session to server still is
4781          * disconnected since oplock already released by the server
4782          */
4783         if (!cfile->oplock_break_cancelled) {
4784                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4785                                                              cinode);
4786                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4787         }
4788         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4789         cifs_done_oplock_break(cinode);
4790 }
4791
4792 /*
4793  * The presence of cifs_direct_io() in the address space ops vector
4794  * allowes open() O_DIRECT flags which would have failed otherwise.
4795  *
4796  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4797  * so this method should never be called.
4798  *
4799  * Direct IO is not yet supported in the cached mode. 
4800  */
4801 static ssize_t
4802 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4803 {
4804         /*
4805          * FIXME
4806          * Eventually need to support direct IO for non forcedirectio mounts
4807          */
4808         return -EINVAL;
4809 }
4810
4811
4812 const struct address_space_operations cifs_addr_ops = {
4813         .readpage = cifs_readpage,
4814         .readpages = cifs_readpages,
4815         .writepage = cifs_writepage,
4816         .writepages = cifs_writepages,
4817         .write_begin = cifs_write_begin,
4818         .write_end = cifs_write_end,
4819         .set_page_dirty = __set_page_dirty_nobuffers,
4820         .releasepage = cifs_release_page,
4821         .direct_IO = cifs_direct_io,
4822         .invalidatepage = cifs_invalidate_page,
4823         .launder_page = cifs_launder_page,
4824 };
4825
4826 /*
4827  * cifs_readpages requires the server to support a buffer large enough to
4828  * contain the header plus one complete page of data.  Otherwise, we need
4829  * to leave cifs_readpages out of the address space operations.
4830  */
4831 const struct address_space_operations cifs_addr_ops_smallbuf = {
4832         .readpage = cifs_readpage,
4833         .writepage = cifs_writepage,
4834         .writepages = cifs_writepages,
4835         .write_begin = cifs_write_begin,
4836         .write_end = cifs_write_end,
4837         .set_page_dirty = __set_page_dirty_nobuffers,
4838         .releasepage = cifs_release_page,
4839         .invalidatepage = cifs_invalidate_page,
4840         .launder_page = cifs_launder_page,
4841 };