cifs: multichannel: always zero struct cifs_io_parms
[linux-2.6-microblaze.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47
48 static inline int cifs_convert_flags(unsigned int flags)
49 {
50         if ((flags & O_ACCMODE) == O_RDONLY)
51                 return GENERIC_READ;
52         else if ((flags & O_ACCMODE) == O_WRONLY)
53                 return GENERIC_WRITE;
54         else if ((flags & O_ACCMODE) == O_RDWR) {
55                 /* GENERIC_ALL is too much permission to request
56                    can cause unnecessary access denied on create */
57                 /* return GENERIC_ALL; */
58                 return (GENERIC_READ | GENERIC_WRITE);
59         }
60
61         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63                 FILE_READ_DATA);
64 }
65
66 static u32 cifs_posix_convert_flags(unsigned int flags)
67 {
68         u32 posix_flags = 0;
69
70         if ((flags & O_ACCMODE) == O_RDONLY)
71                 posix_flags = SMB_O_RDONLY;
72         else if ((flags & O_ACCMODE) == O_WRONLY)
73                 posix_flags = SMB_O_WRONLY;
74         else if ((flags & O_ACCMODE) == O_RDWR)
75                 posix_flags = SMB_O_RDWR;
76
77         if (flags & O_CREAT) {
78                 posix_flags |= SMB_O_CREAT;
79                 if (flags & O_EXCL)
80                         posix_flags |= SMB_O_EXCL;
81         } else if (flags & O_EXCL)
82                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83                          current->comm, current->tgid);
84
85         if (flags & O_TRUNC)
86                 posix_flags |= SMB_O_TRUNC;
87         /* be safe and imply O_SYNC for O_DSYNC */
88         if (flags & O_DSYNC)
89                 posix_flags |= SMB_O_SYNC;
90         if (flags & O_DIRECTORY)
91                 posix_flags |= SMB_O_DIRECTORY;
92         if (flags & O_NOFOLLOW)
93                 posix_flags |= SMB_O_NOFOLLOW;
94         if (flags & O_DIRECT)
95                 posix_flags |= SMB_O_DIRECT;
96
97         return posix_flags;
98 }
99
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103                 return FILE_CREATE;
104         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105                 return FILE_OVERWRITE_IF;
106         else if ((flags & O_CREAT) == O_CREAT)
107                 return FILE_OPEN_IF;
108         else if ((flags & O_TRUNC) == O_TRUNC)
109                 return FILE_OVERWRITE;
110         else
111                 return FILE_OPEN;
112 }
113
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115                         struct super_block *sb, int mode, unsigned int f_flags,
116                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118         int rc;
119         FILE_UNIX_BASIC_INFO *presp_data;
120         __u32 posix_flags = 0;
121         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122         struct cifs_fattr fattr;
123         struct tcon_link *tlink;
124         struct cifs_tcon *tcon;
125
126         cifs_dbg(FYI, "posix open %s\n", full_path);
127
128         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129         if (presp_data == NULL)
130                 return -ENOMEM;
131
132         tlink = cifs_sb_tlink(cifs_sb);
133         if (IS_ERR(tlink)) {
134                 rc = PTR_ERR(tlink);
135                 goto posix_open_ret;
136         }
137
138         tcon = tlink_tcon(tlink);
139         mode &= ~current_umask();
140
141         posix_flags = cifs_posix_convert_flags(f_flags);
142         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143                              poplock, full_path, cifs_sb->local_nls,
144                              cifs_remap(cifs_sb));
145         cifs_put_tlink(tlink);
146
147         if (rc)
148                 goto posix_open_ret;
149
150         if (presp_data->Type == cpu_to_le32(-1))
151                 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153         if (!pinode)
154                 goto posix_open_ret; /* caller does not need info */
155
156         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158         /* get new inode and set it up */
159         if (*pinode == NULL) {
160                 cifs_fill_uniqueid(sb, &fattr);
161                 *pinode = cifs_iget(sb, &fattr);
162                 if (!*pinode) {
163                         rc = -ENOMEM;
164                         goto posix_open_ret;
165                 }
166         } else {
167                 cifs_fattr_to_inode(*pinode, &fattr);
168         }
169
170 posix_open_ret:
171         kfree(presp_data);
172         return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178              struct cifs_fid *fid, unsigned int xid)
179 {
180         int rc;
181         int desired_access;
182         int disposition;
183         int create_options = CREATE_NOT_DIR;
184         FILE_ALL_INFO *buf;
185         struct TCP_Server_Info *server = tcon->ses->server;
186         struct cifs_open_parms oparms;
187
188         if (!server->ops->open)
189                 return -ENOSYS;
190
191         desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194  *  open flag mapping table:
195  *
196  *      POSIX Flag            CIFS Disposition
197  *      ----------            ----------------
198  *      O_CREAT               FILE_OPEN_IF
199  *      O_CREAT | O_EXCL      FILE_CREATE
200  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
201  *      O_TRUNC               FILE_OVERWRITE
202  *      none of the above     FILE_OPEN
203  *
204  *      Note that there is not a direct match between disposition
205  *      FILE_SUPERSEDE (ie create whether or not file exists although
206  *      O_CREAT | O_TRUNC is similar but truncates the existing
207  *      file rather than creating a new file as FILE_SUPERSEDE does
208  *      (which uses the attributes / metadata passed in on open call)
209  *?
210  *?  O_SYNC is a reasonable match to CIFS writethrough flag
211  *?  and the read write flags match reasonably.  O_LARGEFILE
212  *?  is irrelevant because largefile support is always used
213  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215  *********************************************************************/
216
217         disposition = cifs_get_disposition(f_flags);
218
219         /* BB pass O_SYNC flag through on file attributes .. BB */
220
221         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222         if (!buf)
223                 return -ENOMEM;
224
225         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
226         if (f_flags & O_SYNC)
227                 create_options |= CREATE_WRITE_THROUGH;
228
229         if (f_flags & O_DIRECT)
230                 create_options |= CREATE_NO_BUFFER;
231
232         oparms.tcon = tcon;
233         oparms.cifs_sb = cifs_sb;
234         oparms.desired_access = desired_access;
235         oparms.create_options = cifs_create_options(cifs_sb, create_options);
236         oparms.disposition = disposition;
237         oparms.path = full_path;
238         oparms.fid = fid;
239         oparms.reconnect = false;
240
241         rc = server->ops->open(xid, &oparms, oplock, buf);
242
243         if (rc)
244                 goto out;
245
246         if (tcon->unix_ext)
247                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
248                                               xid);
249         else
250                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
251                                          xid, fid);
252
253         if (rc) {
254                 server->ops->close(xid, tcon, fid);
255                 if (rc == -ESTALE)
256                         rc = -EOPENSTALE;
257         }
258
259 out:
260         kfree(buf);
261         return rc;
262 }
263
264 static bool
265 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
266 {
267         struct cifs_fid_locks *cur;
268         bool has_locks = false;
269
270         down_read(&cinode->lock_sem);
271         list_for_each_entry(cur, &cinode->llist, llist) {
272                 if (!list_empty(&cur->locks)) {
273                         has_locks = true;
274                         break;
275                 }
276         }
277         up_read(&cinode->lock_sem);
278         return has_locks;
279 }
280
281 void
282 cifs_down_write(struct rw_semaphore *sem)
283 {
284         while (!down_write_trylock(sem))
285                 msleep(10);
286 }
287
288 static void cifsFileInfo_put_work(struct work_struct *work);
289
290 struct cifsFileInfo *
291 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
292                   struct tcon_link *tlink, __u32 oplock)
293 {
294         struct dentry *dentry = file_dentry(file);
295         struct inode *inode = d_inode(dentry);
296         struct cifsInodeInfo *cinode = CIFS_I(inode);
297         struct cifsFileInfo *cfile;
298         struct cifs_fid_locks *fdlocks;
299         struct cifs_tcon *tcon = tlink_tcon(tlink);
300         struct TCP_Server_Info *server = tcon->ses->server;
301
302         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
303         if (cfile == NULL)
304                 return cfile;
305
306         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
307         if (!fdlocks) {
308                 kfree(cfile);
309                 return NULL;
310         }
311
312         INIT_LIST_HEAD(&fdlocks->locks);
313         fdlocks->cfile = cfile;
314         cfile->llist = fdlocks;
315
316         cfile->count = 1;
317         cfile->pid = current->tgid;
318         cfile->uid = current_fsuid();
319         cfile->dentry = dget(dentry);
320         cfile->f_flags = file->f_flags;
321         cfile->invalidHandle = false;
322         cfile->tlink = cifs_get_tlink(tlink);
323         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
324         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
325         mutex_init(&cfile->fh_mutex);
326         spin_lock_init(&cfile->file_info_lock);
327
328         cifs_sb_active(inode->i_sb);
329
330         /*
331          * If the server returned a read oplock and we have mandatory brlocks,
332          * set oplock level to None.
333          */
334         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
335                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
336                 oplock = 0;
337         }
338
339         cifs_down_write(&cinode->lock_sem);
340         list_add(&fdlocks->llist, &cinode->llist);
341         up_write(&cinode->lock_sem);
342
343         spin_lock(&tcon->open_file_lock);
344         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
345                 oplock = fid->pending_open->oplock;
346         list_del(&fid->pending_open->olist);
347
348         fid->purge_cache = false;
349         server->ops->set_fid(cfile, fid, oplock);
350
351         list_add(&cfile->tlist, &tcon->openFileList);
352         atomic_inc(&tcon->num_local_opens);
353
354         /* if readable file instance put first in list*/
355         spin_lock(&cinode->open_file_lock);
356         if (file->f_mode & FMODE_READ)
357                 list_add(&cfile->flist, &cinode->openFileList);
358         else
359                 list_add_tail(&cfile->flist, &cinode->openFileList);
360         spin_unlock(&cinode->open_file_lock);
361         spin_unlock(&tcon->open_file_lock);
362
363         if (fid->purge_cache)
364                 cifs_zap_mapping(inode);
365
366         file->private_data = cfile;
367         return cfile;
368 }
369
370 struct cifsFileInfo *
371 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
372 {
373         spin_lock(&cifs_file->file_info_lock);
374         cifsFileInfo_get_locked(cifs_file);
375         spin_unlock(&cifs_file->file_info_lock);
376         return cifs_file;
377 }
378
379 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
380 {
381         struct inode *inode = d_inode(cifs_file->dentry);
382         struct cifsInodeInfo *cifsi = CIFS_I(inode);
383         struct cifsLockInfo *li, *tmp;
384         struct super_block *sb = inode->i_sb;
385
386         /*
387          * Delete any outstanding lock records. We'll lose them when the file
388          * is closed anyway.
389          */
390         cifs_down_write(&cifsi->lock_sem);
391         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
392                 list_del(&li->llist);
393                 cifs_del_lock_waiters(li);
394                 kfree(li);
395         }
396         list_del(&cifs_file->llist->llist);
397         kfree(cifs_file->llist);
398         up_write(&cifsi->lock_sem);
399
400         cifs_put_tlink(cifs_file->tlink);
401         dput(cifs_file->dentry);
402         cifs_sb_deactive(sb);
403         kfree(cifs_file);
404 }
405
406 static void cifsFileInfo_put_work(struct work_struct *work)
407 {
408         struct cifsFileInfo *cifs_file = container_of(work,
409                         struct cifsFileInfo, put);
410
411         cifsFileInfo_put_final(cifs_file);
412 }
413
414 /**
415  * cifsFileInfo_put - release a reference of file priv data
416  *
417  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
418  */
419 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
420 {
421         _cifsFileInfo_put(cifs_file, true, true);
422 }
423
424 /**
425  * _cifsFileInfo_put - release a reference of file priv data
426  *
427  * This may involve closing the filehandle @cifs_file out on the
428  * server. Must be called without holding tcon->open_file_lock,
429  * cinode->open_file_lock and cifs_file->file_info_lock.
430  *
431  * If @wait_for_oplock_handler is true and we are releasing the last
432  * reference, wait for any running oplock break handler of the file
433  * and cancel any pending one. If calling this function from the
434  * oplock break handler, you need to pass false.
435  *
436  */
437 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
438                        bool wait_oplock_handler, bool offload)
439 {
440         struct inode *inode = d_inode(cifs_file->dentry);
441         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
442         struct TCP_Server_Info *server = tcon->ses->server;
443         struct cifsInodeInfo *cifsi = CIFS_I(inode);
444         struct super_block *sb = inode->i_sb;
445         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
446         struct cifs_fid fid;
447         struct cifs_pending_open open;
448         bool oplock_break_cancelled;
449
450         spin_lock(&tcon->open_file_lock);
451         spin_lock(&cifsi->open_file_lock);
452         spin_lock(&cifs_file->file_info_lock);
453         if (--cifs_file->count > 0) {
454                 spin_unlock(&cifs_file->file_info_lock);
455                 spin_unlock(&cifsi->open_file_lock);
456                 spin_unlock(&tcon->open_file_lock);
457                 return;
458         }
459         spin_unlock(&cifs_file->file_info_lock);
460
461         if (server->ops->get_lease_key)
462                 server->ops->get_lease_key(inode, &fid);
463
464         /* store open in pending opens to make sure we don't miss lease break */
465         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
466
467         /* remove it from the lists */
468         list_del(&cifs_file->flist);
469         list_del(&cifs_file->tlist);
470         atomic_dec(&tcon->num_local_opens);
471
472         if (list_empty(&cifsi->openFileList)) {
473                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
474                          d_inode(cifs_file->dentry));
475                 /*
476                  * In strict cache mode we need invalidate mapping on the last
477                  * close  because it may cause a error when we open this file
478                  * again and get at least level II oplock.
479                  */
480                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
481                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
482                 cifs_set_oplock_level(cifsi, 0);
483         }
484
485         spin_unlock(&cifsi->open_file_lock);
486         spin_unlock(&tcon->open_file_lock);
487
488         oplock_break_cancelled = wait_oplock_handler ?
489                 cancel_work_sync(&cifs_file->oplock_break) : false;
490
491         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
492                 struct TCP_Server_Info *server = tcon->ses->server;
493                 unsigned int xid;
494
495                 xid = get_xid();
496                 if (server->ops->close_getattr)
497                         server->ops->close_getattr(xid, tcon, cifs_file);
498                 else if (server->ops->close)
499                         server->ops->close(xid, tcon, &cifs_file->fid);
500                 _free_xid(xid);
501         }
502
503         if (oplock_break_cancelled)
504                 cifs_done_oplock_break(cifsi);
505
506         cifs_del_pending_open(&open);
507
508         if (offload)
509                 queue_work(fileinfo_put_wq, &cifs_file->put);
510         else
511                 cifsFileInfo_put_final(cifs_file);
512 }
513
514 int cifs_open(struct inode *inode, struct file *file)
515
516 {
517         int rc = -EACCES;
518         unsigned int xid;
519         __u32 oplock;
520         struct cifs_sb_info *cifs_sb;
521         struct TCP_Server_Info *server;
522         struct cifs_tcon *tcon;
523         struct tcon_link *tlink;
524         struct cifsFileInfo *cfile = NULL;
525         char *full_path = NULL;
526         bool posix_open_ok = false;
527         struct cifs_fid fid;
528         struct cifs_pending_open open;
529
530         xid = get_xid();
531
532         cifs_sb = CIFS_SB(inode->i_sb);
533         tlink = cifs_sb_tlink(cifs_sb);
534         if (IS_ERR(tlink)) {
535                 free_xid(xid);
536                 return PTR_ERR(tlink);
537         }
538         tcon = tlink_tcon(tlink);
539         server = tcon->ses->server;
540
541         full_path = build_path_from_dentry(file_dentry(file));
542         if (full_path == NULL) {
543                 rc = -ENOMEM;
544                 goto out;
545         }
546
547         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
548                  inode, file->f_flags, full_path);
549
550         if (file->f_flags & O_DIRECT &&
551             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
552                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
553                         file->f_op = &cifs_file_direct_nobrl_ops;
554                 else
555                         file->f_op = &cifs_file_direct_ops;
556         }
557
558         if (server->oplocks)
559                 oplock = REQ_OPLOCK;
560         else
561                 oplock = 0;
562
563         if (!tcon->broken_posix_open && tcon->unix_ext &&
564             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
565                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
566                 /* can not refresh inode info since size could be stale */
567                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
568                                 cifs_sb->mnt_file_mode /* ignored */,
569                                 file->f_flags, &oplock, &fid.netfid, xid);
570                 if (rc == 0) {
571                         cifs_dbg(FYI, "posix open succeeded\n");
572                         posix_open_ok = true;
573                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
574                         if (tcon->ses->serverNOS)
575                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
576                                          tcon->ses->serverName,
577                                          tcon->ses->serverNOS);
578                         tcon->broken_posix_open = true;
579                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
580                          (rc != -EOPNOTSUPP)) /* path not found or net err */
581                         goto out;
582                 /*
583                  * Else fallthrough to retry open the old way on network i/o
584                  * or DFS errors.
585                  */
586         }
587
588         if (server->ops->get_lease_key)
589                 server->ops->get_lease_key(inode, &fid);
590
591         cifs_add_pending_open(&fid, tlink, &open);
592
593         if (!posix_open_ok) {
594                 if (server->ops->get_lease_key)
595                         server->ops->get_lease_key(inode, &fid);
596
597                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
598                                   file->f_flags, &oplock, &fid, xid);
599                 if (rc) {
600                         cifs_del_pending_open(&open);
601                         goto out;
602                 }
603         }
604
605         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
606         if (cfile == NULL) {
607                 if (server->ops->close)
608                         server->ops->close(xid, tcon, &fid);
609                 cifs_del_pending_open(&open);
610                 rc = -ENOMEM;
611                 goto out;
612         }
613
614         cifs_fscache_set_inode_cookie(inode, file);
615
616         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
617                 /*
618                  * Time to set mode which we can not set earlier due to
619                  * problems creating new read-only files.
620                  */
621                 struct cifs_unix_set_info_args args = {
622                         .mode   = inode->i_mode,
623                         .uid    = INVALID_UID, /* no change */
624                         .gid    = INVALID_GID, /* no change */
625                         .ctime  = NO_CHANGE_64,
626                         .atime  = NO_CHANGE_64,
627                         .mtime  = NO_CHANGE_64,
628                         .device = 0,
629                 };
630                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
631                                        cfile->pid);
632         }
633
634 out:
635         kfree(full_path);
636         free_xid(xid);
637         cifs_put_tlink(tlink);
638         return rc;
639 }
640
641 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
642
643 /*
644  * Try to reacquire byte range locks that were released when session
645  * to server was lost.
646  */
647 static int
648 cifs_relock_file(struct cifsFileInfo *cfile)
649 {
650         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
651         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
652         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
653         int rc = 0;
654
655         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
656         if (cinode->can_cache_brlcks) {
657                 /* can cache locks - no need to relock */
658                 up_read(&cinode->lock_sem);
659                 return rc;
660         }
661
662         if (cap_unix(tcon->ses) &&
663             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
664             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
665                 rc = cifs_push_posix_locks(cfile);
666         else
667                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
668
669         up_read(&cinode->lock_sem);
670         return rc;
671 }
672
673 static int
674 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
675 {
676         int rc = -EACCES;
677         unsigned int xid;
678         __u32 oplock;
679         struct cifs_sb_info *cifs_sb;
680         struct cifs_tcon *tcon;
681         struct TCP_Server_Info *server;
682         struct cifsInodeInfo *cinode;
683         struct inode *inode;
684         char *full_path = NULL;
685         int desired_access;
686         int disposition = FILE_OPEN;
687         int create_options = CREATE_NOT_DIR;
688         struct cifs_open_parms oparms;
689
690         xid = get_xid();
691         mutex_lock(&cfile->fh_mutex);
692         if (!cfile->invalidHandle) {
693                 mutex_unlock(&cfile->fh_mutex);
694                 rc = 0;
695                 free_xid(xid);
696                 return rc;
697         }
698
699         inode = d_inode(cfile->dentry);
700         cifs_sb = CIFS_SB(inode->i_sb);
701         tcon = tlink_tcon(cfile->tlink);
702         server = tcon->ses->server;
703
704         /*
705          * Can not grab rename sem here because various ops, including those
706          * that already have the rename sem can end up causing writepage to get
707          * called and if the server was down that means we end up here, and we
708          * can never tell if the caller already has the rename_sem.
709          */
710         full_path = build_path_from_dentry(cfile->dentry);
711         if (full_path == NULL) {
712                 rc = -ENOMEM;
713                 mutex_unlock(&cfile->fh_mutex);
714                 free_xid(xid);
715                 return rc;
716         }
717
718         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
719                  inode, cfile->f_flags, full_path);
720
721         if (tcon->ses->server->oplocks)
722                 oplock = REQ_OPLOCK;
723         else
724                 oplock = 0;
725
726         if (tcon->unix_ext && cap_unix(tcon->ses) &&
727             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
728                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
729                 /*
730                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
731                  * original open. Must mask them off for a reopen.
732                  */
733                 unsigned int oflags = cfile->f_flags &
734                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
735
736                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
737                                      cifs_sb->mnt_file_mode /* ignored */,
738                                      oflags, &oplock, &cfile->fid.netfid, xid);
739                 if (rc == 0) {
740                         cifs_dbg(FYI, "posix reopen succeeded\n");
741                         oparms.reconnect = true;
742                         goto reopen_success;
743                 }
744                 /*
745                  * fallthrough to retry open the old way on errors, especially
746                  * in the reconnect path it is important to retry hard
747                  */
748         }
749
750         desired_access = cifs_convert_flags(cfile->f_flags);
751
752         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
753         if (cfile->f_flags & O_SYNC)
754                 create_options |= CREATE_WRITE_THROUGH;
755
756         if (cfile->f_flags & O_DIRECT)
757                 create_options |= CREATE_NO_BUFFER;
758
759         if (server->ops->get_lease_key)
760                 server->ops->get_lease_key(inode, &cfile->fid);
761
762         oparms.tcon = tcon;
763         oparms.cifs_sb = cifs_sb;
764         oparms.desired_access = desired_access;
765         oparms.create_options = cifs_create_options(cifs_sb, create_options);
766         oparms.disposition = disposition;
767         oparms.path = full_path;
768         oparms.fid = &cfile->fid;
769         oparms.reconnect = true;
770
771         /*
772          * Can not refresh inode by passing in file_info buf to be returned by
773          * ops->open and then calling get_inode_info with returned buf since
774          * file might have write behind data that needs to be flushed and server
775          * version of file size can be stale. If we knew for sure that inode was
776          * not dirty locally we could do this.
777          */
778         rc = server->ops->open(xid, &oparms, &oplock, NULL);
779         if (rc == -ENOENT && oparms.reconnect == false) {
780                 /* durable handle timeout is expired - open the file again */
781                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
782                 /* indicate that we need to relock the file */
783                 oparms.reconnect = true;
784         }
785
786         if (rc) {
787                 mutex_unlock(&cfile->fh_mutex);
788                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
789                 cifs_dbg(FYI, "oplock: %d\n", oplock);
790                 goto reopen_error_exit;
791         }
792
793 reopen_success:
794         cfile->invalidHandle = false;
795         mutex_unlock(&cfile->fh_mutex);
796         cinode = CIFS_I(inode);
797
798         if (can_flush) {
799                 rc = filemap_write_and_wait(inode->i_mapping);
800                 if (!is_interrupt_error(rc))
801                         mapping_set_error(inode->i_mapping, rc);
802
803                 if (tcon->unix_ext)
804                         rc = cifs_get_inode_info_unix(&inode, full_path,
805                                                       inode->i_sb, xid);
806                 else
807                         rc = cifs_get_inode_info(&inode, full_path, NULL,
808                                                  inode->i_sb, xid, NULL);
809         }
810         /*
811          * Else we are writing out data to server already and could deadlock if
812          * we tried to flush data, and since we do not know if we have data that
813          * would invalidate the current end of file on the server we can not go
814          * to the server to get the new inode info.
815          */
816
817         /*
818          * If the server returned a read oplock and we have mandatory brlocks,
819          * set oplock level to None.
820          */
821         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
822                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
823                 oplock = 0;
824         }
825
826         server->ops->set_fid(cfile, &cfile->fid, oplock);
827         if (oparms.reconnect)
828                 cifs_relock_file(cfile);
829
830 reopen_error_exit:
831         kfree(full_path);
832         free_xid(xid);
833         return rc;
834 }
835
836 int cifs_close(struct inode *inode, struct file *file)
837 {
838         if (file->private_data != NULL) {
839                 _cifsFileInfo_put(file->private_data, true, false);
840                 file->private_data = NULL;
841         }
842
843         /* return code from the ->release op is always ignored */
844         return 0;
845 }
846
847 void
848 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
849 {
850         struct cifsFileInfo *open_file;
851         struct list_head *tmp;
852         struct list_head *tmp1;
853         struct list_head tmp_list;
854
855         if (!tcon->use_persistent || !tcon->need_reopen_files)
856                 return;
857
858         tcon->need_reopen_files = false;
859
860         cifs_dbg(FYI, "Reopen persistent handles\n");
861         INIT_LIST_HEAD(&tmp_list);
862
863         /* list all files open on tree connection, reopen resilient handles  */
864         spin_lock(&tcon->open_file_lock);
865         list_for_each(tmp, &tcon->openFileList) {
866                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
867                 if (!open_file->invalidHandle)
868                         continue;
869                 cifsFileInfo_get(open_file);
870                 list_add_tail(&open_file->rlist, &tmp_list);
871         }
872         spin_unlock(&tcon->open_file_lock);
873
874         list_for_each_safe(tmp, tmp1, &tmp_list) {
875                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
876                 if (cifs_reopen_file(open_file, false /* do not flush */))
877                         tcon->need_reopen_files = true;
878                 list_del_init(&open_file->rlist);
879                 cifsFileInfo_put(open_file);
880         }
881 }
882
883 int cifs_closedir(struct inode *inode, struct file *file)
884 {
885         int rc = 0;
886         unsigned int xid;
887         struct cifsFileInfo *cfile = file->private_data;
888         struct cifs_tcon *tcon;
889         struct TCP_Server_Info *server;
890         char *buf;
891
892         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
893
894         if (cfile == NULL)
895                 return rc;
896
897         xid = get_xid();
898         tcon = tlink_tcon(cfile->tlink);
899         server = tcon->ses->server;
900
901         cifs_dbg(FYI, "Freeing private data in close dir\n");
902         spin_lock(&cfile->file_info_lock);
903         if (server->ops->dir_needs_close(cfile)) {
904                 cfile->invalidHandle = true;
905                 spin_unlock(&cfile->file_info_lock);
906                 if (server->ops->close_dir)
907                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
908                 else
909                         rc = -ENOSYS;
910                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
911                 /* not much we can do if it fails anyway, ignore rc */
912                 rc = 0;
913         } else
914                 spin_unlock(&cfile->file_info_lock);
915
916         buf = cfile->srch_inf.ntwrk_buf_start;
917         if (buf) {
918                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
919                 cfile->srch_inf.ntwrk_buf_start = NULL;
920                 if (cfile->srch_inf.smallBuf)
921                         cifs_small_buf_release(buf);
922                 else
923                         cifs_buf_release(buf);
924         }
925
926         cifs_put_tlink(cfile->tlink);
927         kfree(file->private_data);
928         file->private_data = NULL;
929         /* BB can we lock the filestruct while this is going on? */
930         free_xid(xid);
931         return rc;
932 }
933
934 static struct cifsLockInfo *
935 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
936 {
937         struct cifsLockInfo *lock =
938                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
939         if (!lock)
940                 return lock;
941         lock->offset = offset;
942         lock->length = length;
943         lock->type = type;
944         lock->pid = current->tgid;
945         lock->flags = flags;
946         INIT_LIST_HEAD(&lock->blist);
947         init_waitqueue_head(&lock->block_q);
948         return lock;
949 }
950
951 void
952 cifs_del_lock_waiters(struct cifsLockInfo *lock)
953 {
954         struct cifsLockInfo *li, *tmp;
955         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
956                 list_del_init(&li->blist);
957                 wake_up(&li->block_q);
958         }
959 }
960
961 #define CIFS_LOCK_OP    0
962 #define CIFS_READ_OP    1
963 #define CIFS_WRITE_OP   2
964
965 /* @rw_check : 0 - no op, 1 - read, 2 - write */
966 static bool
967 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
968                             __u64 length, __u8 type, __u16 flags,
969                             struct cifsFileInfo *cfile,
970                             struct cifsLockInfo **conf_lock, int rw_check)
971 {
972         struct cifsLockInfo *li;
973         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
974         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
975
976         list_for_each_entry(li, &fdlocks->locks, llist) {
977                 if (offset + length <= li->offset ||
978                     offset >= li->offset + li->length)
979                         continue;
980                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
981                     server->ops->compare_fids(cfile, cur_cfile)) {
982                         /* shared lock prevents write op through the same fid */
983                         if (!(li->type & server->vals->shared_lock_type) ||
984                             rw_check != CIFS_WRITE_OP)
985                                 continue;
986                 }
987                 if ((type & server->vals->shared_lock_type) &&
988                     ((server->ops->compare_fids(cfile, cur_cfile) &&
989                      current->tgid == li->pid) || type == li->type))
990                         continue;
991                 if (rw_check == CIFS_LOCK_OP &&
992                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
993                     server->ops->compare_fids(cfile, cur_cfile))
994                         continue;
995                 if (conf_lock)
996                         *conf_lock = li;
997                 return true;
998         }
999         return false;
1000 }
1001
1002 bool
1003 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1004                         __u8 type, __u16 flags,
1005                         struct cifsLockInfo **conf_lock, int rw_check)
1006 {
1007         bool rc = false;
1008         struct cifs_fid_locks *cur;
1009         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1010
1011         list_for_each_entry(cur, &cinode->llist, llist) {
1012                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1013                                                  flags, cfile, conf_lock,
1014                                                  rw_check);
1015                 if (rc)
1016                         break;
1017         }
1018
1019         return rc;
1020 }
1021
1022 /*
1023  * Check if there is another lock that prevents us to set the lock (mandatory
1024  * style). If such a lock exists, update the flock structure with its
1025  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1026  * or leave it the same if we can't. Returns 0 if we don't need to request to
1027  * the server or 1 otherwise.
1028  */
1029 static int
1030 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1031                __u8 type, struct file_lock *flock)
1032 {
1033         int rc = 0;
1034         struct cifsLockInfo *conf_lock;
1035         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1036         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1037         bool exist;
1038
1039         down_read(&cinode->lock_sem);
1040
1041         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1042                                         flock->fl_flags, &conf_lock,
1043                                         CIFS_LOCK_OP);
1044         if (exist) {
1045                 flock->fl_start = conf_lock->offset;
1046                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1047                 flock->fl_pid = conf_lock->pid;
1048                 if (conf_lock->type & server->vals->shared_lock_type)
1049                         flock->fl_type = F_RDLCK;
1050                 else
1051                         flock->fl_type = F_WRLCK;
1052         } else if (!cinode->can_cache_brlcks)
1053                 rc = 1;
1054         else
1055                 flock->fl_type = F_UNLCK;
1056
1057         up_read(&cinode->lock_sem);
1058         return rc;
1059 }
1060
1061 static void
1062 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1063 {
1064         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1065         cifs_down_write(&cinode->lock_sem);
1066         list_add_tail(&lock->llist, &cfile->llist->locks);
1067         up_write(&cinode->lock_sem);
1068 }
1069
1070 /*
1071  * Set the byte-range lock (mandatory style). Returns:
1072  * 1) 0, if we set the lock and don't need to request to the server;
1073  * 2) 1, if no locks prevent us but we need to request to the server;
1074  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1075  */
1076 static int
1077 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1078                  bool wait)
1079 {
1080         struct cifsLockInfo *conf_lock;
1081         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1082         bool exist;
1083         int rc = 0;
1084
1085 try_again:
1086         exist = false;
1087         cifs_down_write(&cinode->lock_sem);
1088
1089         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1090                                         lock->type, lock->flags, &conf_lock,
1091                                         CIFS_LOCK_OP);
1092         if (!exist && cinode->can_cache_brlcks) {
1093                 list_add_tail(&lock->llist, &cfile->llist->locks);
1094                 up_write(&cinode->lock_sem);
1095                 return rc;
1096         }
1097
1098         if (!exist)
1099                 rc = 1;
1100         else if (!wait)
1101                 rc = -EACCES;
1102         else {
1103                 list_add_tail(&lock->blist, &conf_lock->blist);
1104                 up_write(&cinode->lock_sem);
1105                 rc = wait_event_interruptible(lock->block_q,
1106                                         (lock->blist.prev == &lock->blist) &&
1107                                         (lock->blist.next == &lock->blist));
1108                 if (!rc)
1109                         goto try_again;
1110                 cifs_down_write(&cinode->lock_sem);
1111                 list_del_init(&lock->blist);
1112         }
1113
1114         up_write(&cinode->lock_sem);
1115         return rc;
1116 }
1117
1118 /*
1119  * Check if there is another lock that prevents us to set the lock (posix
1120  * style). If such a lock exists, update the flock structure with its
1121  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1122  * or leave it the same if we can't. Returns 0 if we don't need to request to
1123  * the server or 1 otherwise.
1124  */
1125 static int
1126 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1127 {
1128         int rc = 0;
1129         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1130         unsigned char saved_type = flock->fl_type;
1131
1132         if ((flock->fl_flags & FL_POSIX) == 0)
1133                 return 1;
1134
1135         down_read(&cinode->lock_sem);
1136         posix_test_lock(file, flock);
1137
1138         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1139                 flock->fl_type = saved_type;
1140                 rc = 1;
1141         }
1142
1143         up_read(&cinode->lock_sem);
1144         return rc;
1145 }
1146
1147 /*
1148  * Set the byte-range lock (posix style). Returns:
1149  * 1) 0, if we set the lock and don't need to request to the server;
1150  * 2) 1, if we need to request to the server;
1151  * 3) <0, if the error occurs while setting the lock.
1152  */
1153 static int
1154 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1155 {
1156         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1157         int rc = 1;
1158
1159         if ((flock->fl_flags & FL_POSIX) == 0)
1160                 return rc;
1161
1162 try_again:
1163         cifs_down_write(&cinode->lock_sem);
1164         if (!cinode->can_cache_brlcks) {
1165                 up_write(&cinode->lock_sem);
1166                 return rc;
1167         }
1168
1169         rc = posix_lock_file(file, flock, NULL);
1170         up_write(&cinode->lock_sem);
1171         if (rc == FILE_LOCK_DEFERRED) {
1172                 rc = wait_event_interruptible(flock->fl_wait,
1173                                         list_empty(&flock->fl_blocked_member));
1174                 if (!rc)
1175                         goto try_again;
1176                 locks_delete_block(flock);
1177         }
1178         return rc;
1179 }
1180
1181 int
1182 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1183 {
1184         unsigned int xid;
1185         int rc = 0, stored_rc;
1186         struct cifsLockInfo *li, *tmp;
1187         struct cifs_tcon *tcon;
1188         unsigned int num, max_num, max_buf;
1189         LOCKING_ANDX_RANGE *buf, *cur;
1190         static const int types[] = {
1191                 LOCKING_ANDX_LARGE_FILES,
1192                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1193         };
1194         int i;
1195
1196         xid = get_xid();
1197         tcon = tlink_tcon(cfile->tlink);
1198
1199         /*
1200          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1201          * and check it before using.
1202          */
1203         max_buf = tcon->ses->server->maxBuf;
1204         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1205                 free_xid(xid);
1206                 return -EINVAL;
1207         }
1208
1209         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1210                      PAGE_SIZE);
1211         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1212                         PAGE_SIZE);
1213         max_num = (max_buf - sizeof(struct smb_hdr)) /
1214                                                 sizeof(LOCKING_ANDX_RANGE);
1215         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1216         if (!buf) {
1217                 free_xid(xid);
1218                 return -ENOMEM;
1219         }
1220
1221         for (i = 0; i < 2; i++) {
1222                 cur = buf;
1223                 num = 0;
1224                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1225                         if (li->type != types[i])
1226                                 continue;
1227                         cur->Pid = cpu_to_le16(li->pid);
1228                         cur->LengthLow = cpu_to_le32((u32)li->length);
1229                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1230                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1231                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1232                         if (++num == max_num) {
1233                                 stored_rc = cifs_lockv(xid, tcon,
1234                                                        cfile->fid.netfid,
1235                                                        (__u8)li->type, 0, num,
1236                                                        buf);
1237                                 if (stored_rc)
1238                                         rc = stored_rc;
1239                                 cur = buf;
1240                                 num = 0;
1241                         } else
1242                                 cur++;
1243                 }
1244
1245                 if (num) {
1246                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1247                                                (__u8)types[i], 0, num, buf);
1248                         if (stored_rc)
1249                                 rc = stored_rc;
1250                 }
1251         }
1252
1253         kfree(buf);
1254         free_xid(xid);
1255         return rc;
1256 }
1257
1258 static __u32
1259 hash_lockowner(fl_owner_t owner)
1260 {
1261         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1262 }
1263
1264 struct lock_to_push {
1265         struct list_head llist;
1266         __u64 offset;
1267         __u64 length;
1268         __u32 pid;
1269         __u16 netfid;
1270         __u8 type;
1271 };
1272
1273 static int
1274 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1275 {
1276         struct inode *inode = d_inode(cfile->dentry);
1277         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1278         struct file_lock *flock;
1279         struct file_lock_context *flctx = inode->i_flctx;
1280         unsigned int count = 0, i;
1281         int rc = 0, xid, type;
1282         struct list_head locks_to_send, *el;
1283         struct lock_to_push *lck, *tmp;
1284         __u64 length;
1285
1286         xid = get_xid();
1287
1288         if (!flctx)
1289                 goto out;
1290
1291         spin_lock(&flctx->flc_lock);
1292         list_for_each(el, &flctx->flc_posix) {
1293                 count++;
1294         }
1295         spin_unlock(&flctx->flc_lock);
1296
1297         INIT_LIST_HEAD(&locks_to_send);
1298
1299         /*
1300          * Allocating count locks is enough because no FL_POSIX locks can be
1301          * added to the list while we are holding cinode->lock_sem that
1302          * protects locking operations of this inode.
1303          */
1304         for (i = 0; i < count; i++) {
1305                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1306                 if (!lck) {
1307                         rc = -ENOMEM;
1308                         goto err_out;
1309                 }
1310                 list_add_tail(&lck->llist, &locks_to_send);
1311         }
1312
1313         el = locks_to_send.next;
1314         spin_lock(&flctx->flc_lock);
1315         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1316                 if (el == &locks_to_send) {
1317                         /*
1318                          * The list ended. We don't have enough allocated
1319                          * structures - something is really wrong.
1320                          */
1321                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1322                         break;
1323                 }
1324                 length = 1 + flock->fl_end - flock->fl_start;
1325                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1326                         type = CIFS_RDLCK;
1327                 else
1328                         type = CIFS_WRLCK;
1329                 lck = list_entry(el, struct lock_to_push, llist);
1330                 lck->pid = hash_lockowner(flock->fl_owner);
1331                 lck->netfid = cfile->fid.netfid;
1332                 lck->length = length;
1333                 lck->type = type;
1334                 lck->offset = flock->fl_start;
1335         }
1336         spin_unlock(&flctx->flc_lock);
1337
1338         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1339                 int stored_rc;
1340
1341                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1342                                              lck->offset, lck->length, NULL,
1343                                              lck->type, 0);
1344                 if (stored_rc)
1345                         rc = stored_rc;
1346                 list_del(&lck->llist);
1347                 kfree(lck);
1348         }
1349
1350 out:
1351         free_xid(xid);
1352         return rc;
1353 err_out:
1354         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1355                 list_del(&lck->llist);
1356                 kfree(lck);
1357         }
1358         goto out;
1359 }
1360
1361 static int
1362 cifs_push_locks(struct cifsFileInfo *cfile)
1363 {
1364         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1365         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1366         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1367         int rc = 0;
1368
1369         /* we are going to update can_cache_brlcks here - need a write access */
1370         cifs_down_write(&cinode->lock_sem);
1371         if (!cinode->can_cache_brlcks) {
1372                 up_write(&cinode->lock_sem);
1373                 return rc;
1374         }
1375
1376         if (cap_unix(tcon->ses) &&
1377             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1378             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1379                 rc = cifs_push_posix_locks(cfile);
1380         else
1381                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1382
1383         cinode->can_cache_brlcks = false;
1384         up_write(&cinode->lock_sem);
1385         return rc;
1386 }
1387
1388 static void
1389 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1390                 bool *wait_flag, struct TCP_Server_Info *server)
1391 {
1392         if (flock->fl_flags & FL_POSIX)
1393                 cifs_dbg(FYI, "Posix\n");
1394         if (flock->fl_flags & FL_FLOCK)
1395                 cifs_dbg(FYI, "Flock\n");
1396         if (flock->fl_flags & FL_SLEEP) {
1397                 cifs_dbg(FYI, "Blocking lock\n");
1398                 *wait_flag = true;
1399         }
1400         if (flock->fl_flags & FL_ACCESS)
1401                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1402         if (flock->fl_flags & FL_LEASE)
1403                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1404         if (flock->fl_flags &
1405             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1406                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1407                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1408
1409         *type = server->vals->large_lock_type;
1410         if (flock->fl_type == F_WRLCK) {
1411                 cifs_dbg(FYI, "F_WRLCK\n");
1412                 *type |= server->vals->exclusive_lock_type;
1413                 *lock = 1;
1414         } else if (flock->fl_type == F_UNLCK) {
1415                 cifs_dbg(FYI, "F_UNLCK\n");
1416                 *type |= server->vals->unlock_lock_type;
1417                 *unlock = 1;
1418                 /* Check if unlock includes more than one lock range */
1419         } else if (flock->fl_type == F_RDLCK) {
1420                 cifs_dbg(FYI, "F_RDLCK\n");
1421                 *type |= server->vals->shared_lock_type;
1422                 *lock = 1;
1423         } else if (flock->fl_type == F_EXLCK) {
1424                 cifs_dbg(FYI, "F_EXLCK\n");
1425                 *type |= server->vals->exclusive_lock_type;
1426                 *lock = 1;
1427         } else if (flock->fl_type == F_SHLCK) {
1428                 cifs_dbg(FYI, "F_SHLCK\n");
1429                 *type |= server->vals->shared_lock_type;
1430                 *lock = 1;
1431         } else
1432                 cifs_dbg(FYI, "Unknown type of lock\n");
1433 }
1434
1435 static int
1436 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1437            bool wait_flag, bool posix_lck, unsigned int xid)
1438 {
1439         int rc = 0;
1440         __u64 length = 1 + flock->fl_end - flock->fl_start;
1441         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1442         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1443         struct TCP_Server_Info *server = tcon->ses->server;
1444         __u16 netfid = cfile->fid.netfid;
1445
1446         if (posix_lck) {
1447                 int posix_lock_type;
1448
1449                 rc = cifs_posix_lock_test(file, flock);
1450                 if (!rc)
1451                         return rc;
1452
1453                 if (type & server->vals->shared_lock_type)
1454                         posix_lock_type = CIFS_RDLCK;
1455                 else
1456                         posix_lock_type = CIFS_WRLCK;
1457                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1458                                       hash_lockowner(flock->fl_owner),
1459                                       flock->fl_start, length, flock,
1460                                       posix_lock_type, wait_flag);
1461                 return rc;
1462         }
1463
1464         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1465         if (!rc)
1466                 return rc;
1467
1468         /* BB we could chain these into one lock request BB */
1469         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1470                                     1, 0, false);
1471         if (rc == 0) {
1472                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1473                                             type, 0, 1, false);
1474                 flock->fl_type = F_UNLCK;
1475                 if (rc != 0)
1476                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1477                                  rc);
1478                 return 0;
1479         }
1480
1481         if (type & server->vals->shared_lock_type) {
1482                 flock->fl_type = F_WRLCK;
1483                 return 0;
1484         }
1485
1486         type &= ~server->vals->exclusive_lock_type;
1487
1488         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1489                                     type | server->vals->shared_lock_type,
1490                                     1, 0, false);
1491         if (rc == 0) {
1492                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1493                         type | server->vals->shared_lock_type, 0, 1, false);
1494                 flock->fl_type = F_RDLCK;
1495                 if (rc != 0)
1496                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1497                                  rc);
1498         } else
1499                 flock->fl_type = F_WRLCK;
1500
1501         return 0;
1502 }
1503
1504 void
1505 cifs_move_llist(struct list_head *source, struct list_head *dest)
1506 {
1507         struct list_head *li, *tmp;
1508         list_for_each_safe(li, tmp, source)
1509                 list_move(li, dest);
1510 }
1511
1512 void
1513 cifs_free_llist(struct list_head *llist)
1514 {
1515         struct cifsLockInfo *li, *tmp;
1516         list_for_each_entry_safe(li, tmp, llist, llist) {
1517                 cifs_del_lock_waiters(li);
1518                 list_del(&li->llist);
1519                 kfree(li);
1520         }
1521 }
1522
1523 int
1524 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1525                   unsigned int xid)
1526 {
1527         int rc = 0, stored_rc;
1528         static const int types[] = {
1529                 LOCKING_ANDX_LARGE_FILES,
1530                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1531         };
1532         unsigned int i;
1533         unsigned int max_num, num, max_buf;
1534         LOCKING_ANDX_RANGE *buf, *cur;
1535         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1536         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1537         struct cifsLockInfo *li, *tmp;
1538         __u64 length = 1 + flock->fl_end - flock->fl_start;
1539         struct list_head tmp_llist;
1540
1541         INIT_LIST_HEAD(&tmp_llist);
1542
1543         /*
1544          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1545          * and check it before using.
1546          */
1547         max_buf = tcon->ses->server->maxBuf;
1548         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1549                 return -EINVAL;
1550
1551         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1552                      PAGE_SIZE);
1553         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1554                         PAGE_SIZE);
1555         max_num = (max_buf - sizeof(struct smb_hdr)) /
1556                                                 sizeof(LOCKING_ANDX_RANGE);
1557         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1558         if (!buf)
1559                 return -ENOMEM;
1560
1561         cifs_down_write(&cinode->lock_sem);
1562         for (i = 0; i < 2; i++) {
1563                 cur = buf;
1564                 num = 0;
1565                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1566                         if (flock->fl_start > li->offset ||
1567                             (flock->fl_start + length) <
1568                             (li->offset + li->length))
1569                                 continue;
1570                         if (current->tgid != li->pid)
1571                                 continue;
1572                         if (types[i] != li->type)
1573                                 continue;
1574                         if (cinode->can_cache_brlcks) {
1575                                 /*
1576                                  * We can cache brlock requests - simply remove
1577                                  * a lock from the file's list.
1578                                  */
1579                                 list_del(&li->llist);
1580                                 cifs_del_lock_waiters(li);
1581                                 kfree(li);
1582                                 continue;
1583                         }
1584                         cur->Pid = cpu_to_le16(li->pid);
1585                         cur->LengthLow = cpu_to_le32((u32)li->length);
1586                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1587                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1588                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1589                         /*
1590                          * We need to save a lock here to let us add it again to
1591                          * the file's list if the unlock range request fails on
1592                          * the server.
1593                          */
1594                         list_move(&li->llist, &tmp_llist);
1595                         if (++num == max_num) {
1596                                 stored_rc = cifs_lockv(xid, tcon,
1597                                                        cfile->fid.netfid,
1598                                                        li->type, num, 0, buf);
1599                                 if (stored_rc) {
1600                                         /*
1601                                          * We failed on the unlock range
1602                                          * request - add all locks from the tmp
1603                                          * list to the head of the file's list.
1604                                          */
1605                                         cifs_move_llist(&tmp_llist,
1606                                                         &cfile->llist->locks);
1607                                         rc = stored_rc;
1608                                 } else
1609                                         /*
1610                                          * The unlock range request succeed -
1611                                          * free the tmp list.
1612                                          */
1613                                         cifs_free_llist(&tmp_llist);
1614                                 cur = buf;
1615                                 num = 0;
1616                         } else
1617                                 cur++;
1618                 }
1619                 if (num) {
1620                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1621                                                types[i], num, 0, buf);
1622                         if (stored_rc) {
1623                                 cifs_move_llist(&tmp_llist,
1624                                                 &cfile->llist->locks);
1625                                 rc = stored_rc;
1626                         } else
1627                                 cifs_free_llist(&tmp_llist);
1628                 }
1629         }
1630
1631         up_write(&cinode->lock_sem);
1632         kfree(buf);
1633         return rc;
1634 }
1635
1636 static int
1637 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1638            bool wait_flag, bool posix_lck, int lock, int unlock,
1639            unsigned int xid)
1640 {
1641         int rc = 0;
1642         __u64 length = 1 + flock->fl_end - flock->fl_start;
1643         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1644         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1645         struct TCP_Server_Info *server = tcon->ses->server;
1646         struct inode *inode = d_inode(cfile->dentry);
1647
1648         if (posix_lck) {
1649                 int posix_lock_type;
1650
1651                 rc = cifs_posix_lock_set(file, flock);
1652                 if (!rc || rc < 0)
1653                         return rc;
1654
1655                 if (type & server->vals->shared_lock_type)
1656                         posix_lock_type = CIFS_RDLCK;
1657                 else
1658                         posix_lock_type = CIFS_WRLCK;
1659
1660                 if (unlock == 1)
1661                         posix_lock_type = CIFS_UNLCK;
1662
1663                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1664                                       hash_lockowner(flock->fl_owner),
1665                                       flock->fl_start, length,
1666                                       NULL, posix_lock_type, wait_flag);
1667                 goto out;
1668         }
1669
1670         if (lock) {
1671                 struct cifsLockInfo *lock;
1672
1673                 lock = cifs_lock_init(flock->fl_start, length, type,
1674                                       flock->fl_flags);
1675                 if (!lock)
1676                         return -ENOMEM;
1677
1678                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1679                 if (rc < 0) {
1680                         kfree(lock);
1681                         return rc;
1682                 }
1683                 if (!rc)
1684                         goto out;
1685
1686                 /*
1687                  * Windows 7 server can delay breaking lease from read to None
1688                  * if we set a byte-range lock on a file - break it explicitly
1689                  * before sending the lock to the server to be sure the next
1690                  * read won't conflict with non-overlapted locks due to
1691                  * pagereading.
1692                  */
1693                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1694                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1695                         cifs_zap_mapping(inode);
1696                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1697                                  inode);
1698                         CIFS_I(inode)->oplock = 0;
1699                 }
1700
1701                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1702                                             type, 1, 0, wait_flag);
1703                 if (rc) {
1704                         kfree(lock);
1705                         return rc;
1706                 }
1707
1708                 cifs_lock_add(cfile, lock);
1709         } else if (unlock)
1710                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1711
1712 out:
1713         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1714                 /*
1715                  * If this is a request to remove all locks because we
1716                  * are closing the file, it doesn't matter if the
1717                  * unlocking failed as both cifs.ko and the SMB server
1718                  * remove the lock on file close
1719                  */
1720                 if (rc) {
1721                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1722                         if (!(flock->fl_flags & FL_CLOSE))
1723                                 return rc;
1724                 }
1725                 rc = locks_lock_file_wait(file, flock);
1726         }
1727         return rc;
1728 }
1729
1730 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1731 {
1732         int rc, xid;
1733         int lock = 0, unlock = 0;
1734         bool wait_flag = false;
1735         bool posix_lck = false;
1736         struct cifs_sb_info *cifs_sb;
1737         struct cifs_tcon *tcon;
1738         struct cifsFileInfo *cfile;
1739         __u32 type;
1740
1741         rc = -EACCES;
1742         xid = get_xid();
1743
1744         if (!(fl->fl_flags & FL_FLOCK))
1745                 return -ENOLCK;
1746
1747         cfile = (struct cifsFileInfo *)file->private_data;
1748         tcon = tlink_tcon(cfile->tlink);
1749
1750         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1751                         tcon->ses->server);
1752         cifs_sb = CIFS_FILE_SB(file);
1753
1754         if (cap_unix(tcon->ses) &&
1755             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1756             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1757                 posix_lck = true;
1758
1759         if (!lock && !unlock) {
1760                 /*
1761                  * if no lock or unlock then nothing to do since we do not
1762                  * know what it is
1763                  */
1764                 free_xid(xid);
1765                 return -EOPNOTSUPP;
1766         }
1767
1768         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1769                         xid);
1770         free_xid(xid);
1771         return rc;
1772
1773
1774 }
1775
1776 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1777 {
1778         int rc, xid;
1779         int lock = 0, unlock = 0;
1780         bool wait_flag = false;
1781         bool posix_lck = false;
1782         struct cifs_sb_info *cifs_sb;
1783         struct cifs_tcon *tcon;
1784         struct cifsFileInfo *cfile;
1785         __u32 type;
1786
1787         rc = -EACCES;
1788         xid = get_xid();
1789
1790         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1791                  cmd, flock->fl_flags, flock->fl_type,
1792                  flock->fl_start, flock->fl_end);
1793
1794         cfile = (struct cifsFileInfo *)file->private_data;
1795         tcon = tlink_tcon(cfile->tlink);
1796
1797         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1798                         tcon->ses->server);
1799         cifs_sb = CIFS_FILE_SB(file);
1800
1801         if (cap_unix(tcon->ses) &&
1802             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1803             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1804                 posix_lck = true;
1805         /*
1806          * BB add code here to normalize offset and length to account for
1807          * negative length which we can not accept over the wire.
1808          */
1809         if (IS_GETLK(cmd)) {
1810                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1811                 free_xid(xid);
1812                 return rc;
1813         }
1814
1815         if (!lock && !unlock) {
1816                 /*
1817                  * if no lock or unlock then nothing to do since we do not
1818                  * know what it is
1819                  */
1820                 free_xid(xid);
1821                 return -EOPNOTSUPP;
1822         }
1823
1824         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1825                         xid);
1826         free_xid(xid);
1827         return rc;
1828 }
1829
1830 /*
1831  * update the file size (if needed) after a write. Should be called with
1832  * the inode->i_lock held
1833  */
1834 void
1835 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1836                       unsigned int bytes_written)
1837 {
1838         loff_t end_of_write = offset + bytes_written;
1839
1840         if (end_of_write > cifsi->server_eof)
1841                 cifsi->server_eof = end_of_write;
1842 }
1843
1844 static ssize_t
1845 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1846            size_t write_size, loff_t *offset)
1847 {
1848         int rc = 0;
1849         unsigned int bytes_written = 0;
1850         unsigned int total_written;
1851         struct cifs_tcon *tcon;
1852         struct TCP_Server_Info *server;
1853         unsigned int xid;
1854         struct dentry *dentry = open_file->dentry;
1855         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1856         struct cifs_io_parms io_parms = {0};
1857
1858         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1859                  write_size, *offset, dentry);
1860
1861         tcon = tlink_tcon(open_file->tlink);
1862         server = tcon->ses->server;
1863
1864         if (!server->ops->sync_write)
1865                 return -ENOSYS;
1866
1867         xid = get_xid();
1868
1869         for (total_written = 0; write_size > total_written;
1870              total_written += bytes_written) {
1871                 rc = -EAGAIN;
1872                 while (rc == -EAGAIN) {
1873                         struct kvec iov[2];
1874                         unsigned int len;
1875
1876                         if (open_file->invalidHandle) {
1877                                 /* we could deadlock if we called
1878                                    filemap_fdatawait from here so tell
1879                                    reopen_file not to flush data to
1880                                    server now */
1881                                 rc = cifs_reopen_file(open_file, false);
1882                                 if (rc != 0)
1883                                         break;
1884                         }
1885
1886                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1887                                   (unsigned int)write_size - total_written);
1888                         /* iov[0] is reserved for smb header */
1889                         iov[1].iov_base = (char *)write_data + total_written;
1890                         iov[1].iov_len = len;
1891                         io_parms.pid = pid;
1892                         io_parms.tcon = tcon;
1893                         io_parms.offset = *offset;
1894                         io_parms.length = len;
1895                         rc = server->ops->sync_write(xid, &open_file->fid,
1896                                         &io_parms, &bytes_written, iov, 1);
1897                 }
1898                 if (rc || (bytes_written == 0)) {
1899                         if (total_written)
1900                                 break;
1901                         else {
1902                                 free_xid(xid);
1903                                 return rc;
1904                         }
1905                 } else {
1906                         spin_lock(&d_inode(dentry)->i_lock);
1907                         cifs_update_eof(cifsi, *offset, bytes_written);
1908                         spin_unlock(&d_inode(dentry)->i_lock);
1909                         *offset += bytes_written;
1910                 }
1911         }
1912
1913         cifs_stats_bytes_written(tcon, total_written);
1914
1915         if (total_written > 0) {
1916                 spin_lock(&d_inode(dentry)->i_lock);
1917                 if (*offset > d_inode(dentry)->i_size)
1918                         i_size_write(d_inode(dentry), *offset);
1919                 spin_unlock(&d_inode(dentry)->i_lock);
1920         }
1921         mark_inode_dirty_sync(d_inode(dentry));
1922         free_xid(xid);
1923         return total_written;
1924 }
1925
1926 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1927                                         bool fsuid_only)
1928 {
1929         struct cifsFileInfo *open_file = NULL;
1930         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1931
1932         /* only filter by fsuid on multiuser mounts */
1933         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1934                 fsuid_only = false;
1935
1936         spin_lock(&cifs_inode->open_file_lock);
1937         /* we could simply get the first_list_entry since write-only entries
1938            are always at the end of the list but since the first entry might
1939            have a close pending, we go through the whole list */
1940         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1941                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1942                         continue;
1943                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1944                         if (!open_file->invalidHandle) {
1945                                 /* found a good file */
1946                                 /* lock it so it will not be closed on us */
1947                                 cifsFileInfo_get(open_file);
1948                                 spin_unlock(&cifs_inode->open_file_lock);
1949                                 return open_file;
1950                         } /* else might as well continue, and look for
1951                              another, or simply have the caller reopen it
1952                              again rather than trying to fix this handle */
1953                 } else /* write only file */
1954                         break; /* write only files are last so must be done */
1955         }
1956         spin_unlock(&cifs_inode->open_file_lock);
1957         return NULL;
1958 }
1959
1960 /* Return -EBADF if no handle is found and general rc otherwise */
1961 int
1962 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
1963                        struct cifsFileInfo **ret_file)
1964 {
1965         struct cifsFileInfo *open_file, *inv_file = NULL;
1966         struct cifs_sb_info *cifs_sb;
1967         bool any_available = false;
1968         int rc = -EBADF;
1969         unsigned int refind = 0;
1970         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
1971         bool with_delete = flags & FIND_WR_WITH_DELETE;
1972         *ret_file = NULL;
1973
1974         /*
1975          * Having a null inode here (because mapping->host was set to zero by
1976          * the VFS or MM) should not happen but we had reports of on oops (due
1977          * to it being zero) during stress testcases so we need to check for it
1978          */
1979
1980         if (cifs_inode == NULL) {
1981                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1982                 dump_stack();
1983                 return rc;
1984         }
1985
1986         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1987
1988         /* only filter by fsuid on multiuser mounts */
1989         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1990                 fsuid_only = false;
1991
1992         spin_lock(&cifs_inode->open_file_lock);
1993 refind_writable:
1994         if (refind > MAX_REOPEN_ATT) {
1995                 spin_unlock(&cifs_inode->open_file_lock);
1996                 return rc;
1997         }
1998         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1999                 if (!any_available && open_file->pid != current->tgid)
2000                         continue;
2001                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2002                         continue;
2003                 if (with_delete && !(open_file->fid.access & DELETE))
2004                         continue;
2005                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2006                         if (!open_file->invalidHandle) {
2007                                 /* found a good writable file */
2008                                 cifsFileInfo_get(open_file);
2009                                 spin_unlock(&cifs_inode->open_file_lock);
2010                                 *ret_file = open_file;
2011                                 return 0;
2012                         } else {
2013                                 if (!inv_file)
2014                                         inv_file = open_file;
2015                         }
2016                 }
2017         }
2018         /* couldn't find useable FH with same pid, try any available */
2019         if (!any_available) {
2020                 any_available = true;
2021                 goto refind_writable;
2022         }
2023
2024         if (inv_file) {
2025                 any_available = false;
2026                 cifsFileInfo_get(inv_file);
2027         }
2028
2029         spin_unlock(&cifs_inode->open_file_lock);
2030
2031         if (inv_file) {
2032                 rc = cifs_reopen_file(inv_file, false);
2033                 if (!rc) {
2034                         *ret_file = inv_file;
2035                         return 0;
2036                 }
2037
2038                 spin_lock(&cifs_inode->open_file_lock);
2039                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2040                 spin_unlock(&cifs_inode->open_file_lock);
2041                 cifsFileInfo_put(inv_file);
2042                 ++refind;
2043                 inv_file = NULL;
2044                 spin_lock(&cifs_inode->open_file_lock);
2045                 goto refind_writable;
2046         }
2047
2048         return rc;
2049 }
2050
2051 struct cifsFileInfo *
2052 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2053 {
2054         struct cifsFileInfo *cfile;
2055         int rc;
2056
2057         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2058         if (rc)
2059                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2060
2061         return cfile;
2062 }
2063
2064 int
2065 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2066                        int flags,
2067                        struct cifsFileInfo **ret_file)
2068 {
2069         struct list_head *tmp;
2070         struct cifsFileInfo *cfile;
2071         struct cifsInodeInfo *cinode;
2072         char *full_path;
2073
2074         *ret_file = NULL;
2075
2076         spin_lock(&tcon->open_file_lock);
2077         list_for_each(tmp, &tcon->openFileList) {
2078                 cfile = list_entry(tmp, struct cifsFileInfo,
2079                              tlist);
2080                 full_path = build_path_from_dentry(cfile->dentry);
2081                 if (full_path == NULL) {
2082                         spin_unlock(&tcon->open_file_lock);
2083                         return -ENOMEM;
2084                 }
2085                 if (strcmp(full_path, name)) {
2086                         kfree(full_path);
2087                         continue;
2088                 }
2089
2090                 kfree(full_path);
2091                 cinode = CIFS_I(d_inode(cfile->dentry));
2092                 spin_unlock(&tcon->open_file_lock);
2093                 return cifs_get_writable_file(cinode, flags, ret_file);
2094         }
2095
2096         spin_unlock(&tcon->open_file_lock);
2097         return -ENOENT;
2098 }
2099
2100 int
2101 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2102                        struct cifsFileInfo **ret_file)
2103 {
2104         struct list_head *tmp;
2105         struct cifsFileInfo *cfile;
2106         struct cifsInodeInfo *cinode;
2107         char *full_path;
2108
2109         *ret_file = NULL;
2110
2111         spin_lock(&tcon->open_file_lock);
2112         list_for_each(tmp, &tcon->openFileList) {
2113                 cfile = list_entry(tmp, struct cifsFileInfo,
2114                              tlist);
2115                 full_path = build_path_from_dentry(cfile->dentry);
2116                 if (full_path == NULL) {
2117                         spin_unlock(&tcon->open_file_lock);
2118                         return -ENOMEM;
2119                 }
2120                 if (strcmp(full_path, name)) {
2121                         kfree(full_path);
2122                         continue;
2123                 }
2124
2125                 kfree(full_path);
2126                 cinode = CIFS_I(d_inode(cfile->dentry));
2127                 spin_unlock(&tcon->open_file_lock);
2128                 *ret_file = find_readable_file(cinode, 0);
2129                 return *ret_file ? 0 : -ENOENT;
2130         }
2131
2132         spin_unlock(&tcon->open_file_lock);
2133         return -ENOENT;
2134 }
2135
2136 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2137 {
2138         struct address_space *mapping = page->mapping;
2139         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2140         char *write_data;
2141         int rc = -EFAULT;
2142         int bytes_written = 0;
2143         struct inode *inode;
2144         struct cifsFileInfo *open_file;
2145
2146         if (!mapping || !mapping->host)
2147                 return -EFAULT;
2148
2149         inode = page->mapping->host;
2150
2151         offset += (loff_t)from;
2152         write_data = kmap(page);
2153         write_data += from;
2154
2155         if ((to > PAGE_SIZE) || (from > to)) {
2156                 kunmap(page);
2157                 return -EIO;
2158         }
2159
2160         /* racing with truncate? */
2161         if (offset > mapping->host->i_size) {
2162                 kunmap(page);
2163                 return 0; /* don't care */
2164         }
2165
2166         /* check to make sure that we are not extending the file */
2167         if (mapping->host->i_size - offset < (loff_t)to)
2168                 to = (unsigned)(mapping->host->i_size - offset);
2169
2170         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2171                                     &open_file);
2172         if (!rc) {
2173                 bytes_written = cifs_write(open_file, open_file->pid,
2174                                            write_data, to - from, &offset);
2175                 cifsFileInfo_put(open_file);
2176                 /* Does mm or vfs already set times? */
2177                 inode->i_atime = inode->i_mtime = current_time(inode);
2178                 if ((bytes_written > 0) && (offset))
2179                         rc = 0;
2180                 else if (bytes_written < 0)
2181                         rc = bytes_written;
2182                 else
2183                         rc = -EFAULT;
2184         } else {
2185                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2186                 if (!is_retryable_error(rc))
2187                         rc = -EIO;
2188         }
2189
2190         kunmap(page);
2191         return rc;
2192 }
2193
2194 static struct cifs_writedata *
2195 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2196                           pgoff_t end, pgoff_t *index,
2197                           unsigned int *found_pages)
2198 {
2199         struct cifs_writedata *wdata;
2200
2201         wdata = cifs_writedata_alloc((unsigned int)tofind,
2202                                      cifs_writev_complete);
2203         if (!wdata)
2204                 return NULL;
2205
2206         *found_pages = find_get_pages_range_tag(mapping, index, end,
2207                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2208         return wdata;
2209 }
2210
2211 static unsigned int
2212 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2213                     struct address_space *mapping,
2214                     struct writeback_control *wbc,
2215                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2216 {
2217         unsigned int nr_pages = 0, i;
2218         struct page *page;
2219
2220         for (i = 0; i < found_pages; i++) {
2221                 page = wdata->pages[i];
2222                 /*
2223                  * At this point we hold neither the i_pages lock nor the
2224                  * page lock: the page may be truncated or invalidated
2225                  * (changing page->mapping to NULL), or even swizzled
2226                  * back from swapper_space to tmpfs file mapping
2227                  */
2228
2229                 if (nr_pages == 0)
2230                         lock_page(page);
2231                 else if (!trylock_page(page))
2232                         break;
2233
2234                 if (unlikely(page->mapping != mapping)) {
2235                         unlock_page(page);
2236                         break;
2237                 }
2238
2239                 if (!wbc->range_cyclic && page->index > end) {
2240                         *done = true;
2241                         unlock_page(page);
2242                         break;
2243                 }
2244
2245                 if (*next && (page->index != *next)) {
2246                         /* Not next consecutive page */
2247                         unlock_page(page);
2248                         break;
2249                 }
2250
2251                 if (wbc->sync_mode != WB_SYNC_NONE)
2252                         wait_on_page_writeback(page);
2253
2254                 if (PageWriteback(page) ||
2255                                 !clear_page_dirty_for_io(page)) {
2256                         unlock_page(page);
2257                         break;
2258                 }
2259
2260                 /*
2261                  * This actually clears the dirty bit in the radix tree.
2262                  * See cifs_writepage() for more commentary.
2263                  */
2264                 set_page_writeback(page);
2265                 if (page_offset(page) >= i_size_read(mapping->host)) {
2266                         *done = true;
2267                         unlock_page(page);
2268                         end_page_writeback(page);
2269                         break;
2270                 }
2271
2272                 wdata->pages[i] = page;
2273                 *next = page->index + 1;
2274                 ++nr_pages;
2275         }
2276
2277         /* reset index to refind any pages skipped */
2278         if (nr_pages == 0)
2279                 *index = wdata->pages[0]->index + 1;
2280
2281         /* put any pages we aren't going to use */
2282         for (i = nr_pages; i < found_pages; i++) {
2283                 put_page(wdata->pages[i]);
2284                 wdata->pages[i] = NULL;
2285         }
2286
2287         return nr_pages;
2288 }
2289
2290 static int
2291 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2292                  struct address_space *mapping, struct writeback_control *wbc)
2293 {
2294         int rc;
2295         struct TCP_Server_Info *server =
2296                                 tlink_tcon(wdata->cfile->tlink)->ses->server;
2297
2298         wdata->sync_mode = wbc->sync_mode;
2299         wdata->nr_pages = nr_pages;
2300         wdata->offset = page_offset(wdata->pages[0]);
2301         wdata->pagesz = PAGE_SIZE;
2302         wdata->tailsz = min(i_size_read(mapping->host) -
2303                         page_offset(wdata->pages[nr_pages - 1]),
2304                         (loff_t)PAGE_SIZE);
2305         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2306         wdata->pid = wdata->cfile->pid;
2307
2308         rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2309         if (rc)
2310                 return rc;
2311
2312         if (wdata->cfile->invalidHandle)
2313                 rc = -EAGAIN;
2314         else
2315                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2316
2317         return rc;
2318 }
2319
2320 static int cifs_writepages(struct address_space *mapping,
2321                            struct writeback_control *wbc)
2322 {
2323         struct inode *inode = mapping->host;
2324         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2325         struct TCP_Server_Info *server;
2326         bool done = false, scanned = false, range_whole = false;
2327         pgoff_t end, index;
2328         struct cifs_writedata *wdata;
2329         struct cifsFileInfo *cfile = NULL;
2330         int rc = 0;
2331         int saved_rc = 0;
2332         unsigned int xid;
2333
2334         /*
2335          * If wsize is smaller than the page cache size, default to writing
2336          * one page at a time via cifs_writepage
2337          */
2338         if (cifs_sb->wsize < PAGE_SIZE)
2339                 return generic_writepages(mapping, wbc);
2340
2341         xid = get_xid();
2342         if (wbc->range_cyclic) {
2343                 index = mapping->writeback_index; /* Start from prev offset */
2344                 end = -1;
2345         } else {
2346                 index = wbc->range_start >> PAGE_SHIFT;
2347                 end = wbc->range_end >> PAGE_SHIFT;
2348                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2349                         range_whole = true;
2350                 scanned = true;
2351         }
2352         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2353 retry:
2354         while (!done && index <= end) {
2355                 unsigned int i, nr_pages, found_pages, wsize;
2356                 pgoff_t next = 0, tofind, saved_index = index;
2357                 struct cifs_credits credits_on_stack;
2358                 struct cifs_credits *credits = &credits_on_stack;
2359                 int get_file_rc = 0;
2360
2361                 if (cfile)
2362                         cifsFileInfo_put(cfile);
2363
2364                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2365
2366                 /* in case of an error store it to return later */
2367                 if (rc)
2368                         get_file_rc = rc;
2369
2370                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2371                                                    &wsize, credits);
2372                 if (rc != 0) {
2373                         done = true;
2374                         break;
2375                 }
2376
2377                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2378
2379                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2380                                                   &found_pages);
2381                 if (!wdata) {
2382                         rc = -ENOMEM;
2383                         done = true;
2384                         add_credits_and_wake_if(server, credits, 0);
2385                         break;
2386                 }
2387
2388                 if (found_pages == 0) {
2389                         kref_put(&wdata->refcount, cifs_writedata_release);
2390                         add_credits_and_wake_if(server, credits, 0);
2391                         break;
2392                 }
2393
2394                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2395                                                end, &index, &next, &done);
2396
2397                 /* nothing to write? */
2398                 if (nr_pages == 0) {
2399                         kref_put(&wdata->refcount, cifs_writedata_release);
2400                         add_credits_and_wake_if(server, credits, 0);
2401                         continue;
2402                 }
2403
2404                 wdata->credits = credits_on_stack;
2405                 wdata->cfile = cfile;
2406                 cfile = NULL;
2407
2408                 if (!wdata->cfile) {
2409                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2410                                  get_file_rc);
2411                         if (is_retryable_error(get_file_rc))
2412                                 rc = get_file_rc;
2413                         else
2414                                 rc = -EBADF;
2415                 } else
2416                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2417
2418                 for (i = 0; i < nr_pages; ++i)
2419                         unlock_page(wdata->pages[i]);
2420
2421                 /* send failure -- clean up the mess */
2422                 if (rc != 0) {
2423                         add_credits_and_wake_if(server, &wdata->credits, 0);
2424                         for (i = 0; i < nr_pages; ++i) {
2425                                 if (is_retryable_error(rc))
2426                                         redirty_page_for_writepage(wbc,
2427                                                            wdata->pages[i]);
2428                                 else
2429                                         SetPageError(wdata->pages[i]);
2430                                 end_page_writeback(wdata->pages[i]);
2431                                 put_page(wdata->pages[i]);
2432                         }
2433                         if (!is_retryable_error(rc))
2434                                 mapping_set_error(mapping, rc);
2435                 }
2436                 kref_put(&wdata->refcount, cifs_writedata_release);
2437
2438                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2439                         index = saved_index;
2440                         continue;
2441                 }
2442
2443                 /* Return immediately if we received a signal during writing */
2444                 if (is_interrupt_error(rc)) {
2445                         done = true;
2446                         break;
2447                 }
2448
2449                 if (rc != 0 && saved_rc == 0)
2450                         saved_rc = rc;
2451
2452                 wbc->nr_to_write -= nr_pages;
2453                 if (wbc->nr_to_write <= 0)
2454                         done = true;
2455
2456                 index = next;
2457         }
2458
2459         if (!scanned && !done) {
2460                 /*
2461                  * We hit the last page and there is more work to be done: wrap
2462                  * back to the start of the file
2463                  */
2464                 scanned = true;
2465                 index = 0;
2466                 goto retry;
2467         }
2468
2469         if (saved_rc != 0)
2470                 rc = saved_rc;
2471
2472         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2473                 mapping->writeback_index = index;
2474
2475         if (cfile)
2476                 cifsFileInfo_put(cfile);
2477         free_xid(xid);
2478         return rc;
2479 }
2480
2481 static int
2482 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2483 {
2484         int rc;
2485         unsigned int xid;
2486
2487         xid = get_xid();
2488 /* BB add check for wbc flags */
2489         get_page(page);
2490         if (!PageUptodate(page))
2491                 cifs_dbg(FYI, "ppw - page not up to date\n");
2492
2493         /*
2494          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2495          *
2496          * A writepage() implementation always needs to do either this,
2497          * or re-dirty the page with "redirty_page_for_writepage()" in
2498          * the case of a failure.
2499          *
2500          * Just unlocking the page will cause the radix tree tag-bits
2501          * to fail to update with the state of the page correctly.
2502          */
2503         set_page_writeback(page);
2504 retry_write:
2505         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2506         if (is_retryable_error(rc)) {
2507                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2508                         goto retry_write;
2509                 redirty_page_for_writepage(wbc, page);
2510         } else if (rc != 0) {
2511                 SetPageError(page);
2512                 mapping_set_error(page->mapping, rc);
2513         } else {
2514                 SetPageUptodate(page);
2515         }
2516         end_page_writeback(page);
2517         put_page(page);
2518         free_xid(xid);
2519         return rc;
2520 }
2521
2522 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2523 {
2524         int rc = cifs_writepage_locked(page, wbc);
2525         unlock_page(page);
2526         return rc;
2527 }
2528
2529 static int cifs_write_end(struct file *file, struct address_space *mapping,
2530                         loff_t pos, unsigned len, unsigned copied,
2531                         struct page *page, void *fsdata)
2532 {
2533         int rc;
2534         struct inode *inode = mapping->host;
2535         struct cifsFileInfo *cfile = file->private_data;
2536         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2537         __u32 pid;
2538
2539         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2540                 pid = cfile->pid;
2541         else
2542                 pid = current->tgid;
2543
2544         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2545                  page, pos, copied);
2546
2547         if (PageChecked(page)) {
2548                 if (copied == len)
2549                         SetPageUptodate(page);
2550                 ClearPageChecked(page);
2551         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2552                 SetPageUptodate(page);
2553
2554         if (!PageUptodate(page)) {
2555                 char *page_data;
2556                 unsigned offset = pos & (PAGE_SIZE - 1);
2557                 unsigned int xid;
2558
2559                 xid = get_xid();
2560                 /* this is probably better than directly calling
2561                    partialpage_write since in this function the file handle is
2562                    known which we might as well leverage */
2563                 /* BB check if anything else missing out of ppw
2564                    such as updating last write time */
2565                 page_data = kmap(page);
2566                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2567                 /* if (rc < 0) should we set writebehind rc? */
2568                 kunmap(page);
2569
2570                 free_xid(xid);
2571         } else {
2572                 rc = copied;
2573                 pos += copied;
2574                 set_page_dirty(page);
2575         }
2576
2577         if (rc > 0) {
2578                 spin_lock(&inode->i_lock);
2579                 if (pos > inode->i_size)
2580                         i_size_write(inode, pos);
2581                 spin_unlock(&inode->i_lock);
2582         }
2583
2584         unlock_page(page);
2585         put_page(page);
2586
2587         return rc;
2588 }
2589
2590 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2591                       int datasync)
2592 {
2593         unsigned int xid;
2594         int rc = 0;
2595         struct cifs_tcon *tcon;
2596         struct TCP_Server_Info *server;
2597         struct cifsFileInfo *smbfile = file->private_data;
2598         struct inode *inode = file_inode(file);
2599         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2600
2601         rc = file_write_and_wait_range(file, start, end);
2602         if (rc) {
2603                 trace_cifs_fsync_err(inode->i_ino, rc);
2604                 return rc;
2605         }
2606
2607         xid = get_xid();
2608
2609         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2610                  file, datasync);
2611
2612         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2613                 rc = cifs_zap_mapping(inode);
2614                 if (rc) {
2615                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2616                         rc = 0; /* don't care about it in fsync */
2617                 }
2618         }
2619
2620         tcon = tlink_tcon(smbfile->tlink);
2621         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2622                 server = tcon->ses->server;
2623                 if (server->ops->flush)
2624                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2625                 else
2626                         rc = -ENOSYS;
2627         }
2628
2629         free_xid(xid);
2630         return rc;
2631 }
2632
2633 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2634 {
2635         unsigned int xid;
2636         int rc = 0;
2637         struct cifs_tcon *tcon;
2638         struct TCP_Server_Info *server;
2639         struct cifsFileInfo *smbfile = file->private_data;
2640         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2641
2642         rc = file_write_and_wait_range(file, start, end);
2643         if (rc) {
2644                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2645                 return rc;
2646         }
2647
2648         xid = get_xid();
2649
2650         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2651                  file, datasync);
2652
2653         tcon = tlink_tcon(smbfile->tlink);
2654         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2655                 server = tcon->ses->server;
2656                 if (server->ops->flush)
2657                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2658                 else
2659                         rc = -ENOSYS;
2660         }
2661
2662         free_xid(xid);
2663         return rc;
2664 }
2665
2666 /*
2667  * As file closes, flush all cached write data for this inode checking
2668  * for write behind errors.
2669  */
2670 int cifs_flush(struct file *file, fl_owner_t id)
2671 {
2672         struct inode *inode = file_inode(file);
2673         int rc = 0;
2674
2675         if (file->f_mode & FMODE_WRITE)
2676                 rc = filemap_write_and_wait(inode->i_mapping);
2677
2678         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2679         if (rc)
2680                 trace_cifs_flush_err(inode->i_ino, rc);
2681         return rc;
2682 }
2683
2684 static int
2685 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2686 {
2687         int rc = 0;
2688         unsigned long i;
2689
2690         for (i = 0; i < num_pages; i++) {
2691                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2692                 if (!pages[i]) {
2693                         /*
2694                          * save number of pages we have already allocated and
2695                          * return with ENOMEM error
2696                          */
2697                         num_pages = i;
2698                         rc = -ENOMEM;
2699                         break;
2700                 }
2701         }
2702
2703         if (rc) {
2704                 for (i = 0; i < num_pages; i++)
2705                         put_page(pages[i]);
2706         }
2707         return rc;
2708 }
2709
2710 static inline
2711 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2712 {
2713         size_t num_pages;
2714         size_t clen;
2715
2716         clen = min_t(const size_t, len, wsize);
2717         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2718
2719         if (cur_len)
2720                 *cur_len = clen;
2721
2722         return num_pages;
2723 }
2724
2725 static void
2726 cifs_uncached_writedata_release(struct kref *refcount)
2727 {
2728         int i;
2729         struct cifs_writedata *wdata = container_of(refcount,
2730                                         struct cifs_writedata, refcount);
2731
2732         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2733         for (i = 0; i < wdata->nr_pages; i++)
2734                 put_page(wdata->pages[i]);
2735         cifs_writedata_release(refcount);
2736 }
2737
2738 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2739
2740 static void
2741 cifs_uncached_writev_complete(struct work_struct *work)
2742 {
2743         struct cifs_writedata *wdata = container_of(work,
2744                                         struct cifs_writedata, work);
2745         struct inode *inode = d_inode(wdata->cfile->dentry);
2746         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2747
2748         spin_lock(&inode->i_lock);
2749         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2750         if (cifsi->server_eof > inode->i_size)
2751                 i_size_write(inode, cifsi->server_eof);
2752         spin_unlock(&inode->i_lock);
2753
2754         complete(&wdata->done);
2755         collect_uncached_write_data(wdata->ctx);
2756         /* the below call can possibly free the last ref to aio ctx */
2757         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2758 }
2759
2760 static int
2761 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2762                       size_t *len, unsigned long *num_pages)
2763 {
2764         size_t save_len, copied, bytes, cur_len = *len;
2765         unsigned long i, nr_pages = *num_pages;
2766
2767         save_len = cur_len;
2768         for (i = 0; i < nr_pages; i++) {
2769                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2770                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2771                 cur_len -= copied;
2772                 /*
2773                  * If we didn't copy as much as we expected, then that
2774                  * may mean we trod into an unmapped area. Stop copying
2775                  * at that point. On the next pass through the big
2776                  * loop, we'll likely end up getting a zero-length
2777                  * write and bailing out of it.
2778                  */
2779                 if (copied < bytes)
2780                         break;
2781         }
2782         cur_len = save_len - cur_len;
2783         *len = cur_len;
2784
2785         /*
2786          * If we have no data to send, then that probably means that
2787          * the copy above failed altogether. That's most likely because
2788          * the address in the iovec was bogus. Return -EFAULT and let
2789          * the caller free anything we allocated and bail out.
2790          */
2791         if (!cur_len)
2792                 return -EFAULT;
2793
2794         /*
2795          * i + 1 now represents the number of pages we actually used in
2796          * the copy phase above.
2797          */
2798         *num_pages = i + 1;
2799         return 0;
2800 }
2801
2802 static int
2803 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2804         struct cifs_aio_ctx *ctx)
2805 {
2806         unsigned int wsize;
2807         struct cifs_credits credits;
2808         int rc;
2809         struct TCP_Server_Info *server =
2810                 tlink_tcon(wdata->cfile->tlink)->ses->server;
2811
2812         do {
2813                 if (wdata->cfile->invalidHandle) {
2814                         rc = cifs_reopen_file(wdata->cfile, false);
2815                         if (rc == -EAGAIN)
2816                                 continue;
2817                         else if (rc)
2818                                 break;
2819                 }
2820
2821
2822                 /*
2823                  * Wait for credits to resend this wdata.
2824                  * Note: we are attempting to resend the whole wdata not in
2825                  * segments
2826                  */
2827                 do {
2828                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2829                                                 &wsize, &credits);
2830                         if (rc)
2831                                 goto fail;
2832
2833                         if (wsize < wdata->bytes) {
2834                                 add_credits_and_wake_if(server, &credits, 0);
2835                                 msleep(1000);
2836                         }
2837                 } while (wsize < wdata->bytes);
2838                 wdata->credits = credits;
2839
2840                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2841
2842                 if (!rc) {
2843                         if (wdata->cfile->invalidHandle)
2844                                 rc = -EAGAIN;
2845                         else {
2846 #ifdef CONFIG_CIFS_SMB_DIRECT
2847                                 if (wdata->mr) {
2848                                         wdata->mr->need_invalidate = true;
2849                                         smbd_deregister_mr(wdata->mr);
2850                                         wdata->mr = NULL;
2851                                 }
2852 #endif
2853                                 rc = server->ops->async_writev(wdata,
2854                                         cifs_uncached_writedata_release);
2855                         }
2856                 }
2857
2858                 /* If the write was successfully sent, we are done */
2859                 if (!rc) {
2860                         list_add_tail(&wdata->list, wdata_list);
2861                         return 0;
2862                 }
2863
2864                 /* Roll back credits and retry if needed */
2865                 add_credits_and_wake_if(server, &wdata->credits, 0);
2866         } while (rc == -EAGAIN);
2867
2868 fail:
2869         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2870         return rc;
2871 }
2872
2873 static int
2874 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2875                      struct cifsFileInfo *open_file,
2876                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2877                      struct cifs_aio_ctx *ctx)
2878 {
2879         int rc = 0;
2880         size_t cur_len;
2881         unsigned long nr_pages, num_pages, i;
2882         struct cifs_writedata *wdata;
2883         struct iov_iter saved_from = *from;
2884         loff_t saved_offset = offset;
2885         pid_t pid;
2886         struct TCP_Server_Info *server;
2887         struct page **pagevec;
2888         size_t start;
2889         unsigned int xid;
2890
2891         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2892                 pid = open_file->pid;
2893         else
2894                 pid = current->tgid;
2895
2896         server = tlink_tcon(open_file->tlink)->ses->server;
2897         xid = get_xid();
2898
2899         do {
2900                 unsigned int wsize;
2901                 struct cifs_credits credits_on_stack;
2902                 struct cifs_credits *credits = &credits_on_stack;
2903
2904                 if (open_file->invalidHandle) {
2905                         rc = cifs_reopen_file(open_file, false);
2906                         if (rc == -EAGAIN)
2907                                 continue;
2908                         else if (rc)
2909                                 break;
2910                 }
2911
2912                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2913                                                    &wsize, credits);
2914                 if (rc)
2915                         break;
2916
2917                 cur_len = min_t(const size_t, len, wsize);
2918
2919                 if (ctx->direct_io) {
2920                         ssize_t result;
2921
2922                         result = iov_iter_get_pages_alloc(
2923                                 from, &pagevec, cur_len, &start);
2924                         if (result < 0) {
2925                                 cifs_dbg(VFS,
2926                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
2927                                          result, iov_iter_type(from),
2928                                          from->iov_offset, from->count);
2929                                 dump_stack();
2930
2931                                 rc = result;
2932                                 add_credits_and_wake_if(server, credits, 0);
2933                                 break;
2934                         }
2935                         cur_len = (size_t)result;
2936                         iov_iter_advance(from, cur_len);
2937
2938                         nr_pages =
2939                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2940
2941                         wdata = cifs_writedata_direct_alloc(pagevec,
2942                                              cifs_uncached_writev_complete);
2943                         if (!wdata) {
2944                                 rc = -ENOMEM;
2945                                 add_credits_and_wake_if(server, credits, 0);
2946                                 break;
2947                         }
2948
2949
2950                         wdata->page_offset = start;
2951                         wdata->tailsz =
2952                                 nr_pages > 1 ?
2953                                         cur_len - (PAGE_SIZE - start) -
2954                                         (nr_pages - 2) * PAGE_SIZE :
2955                                         cur_len;
2956                 } else {
2957                         nr_pages = get_numpages(wsize, len, &cur_len);
2958                         wdata = cifs_writedata_alloc(nr_pages,
2959                                              cifs_uncached_writev_complete);
2960                         if (!wdata) {
2961                                 rc = -ENOMEM;
2962                                 add_credits_and_wake_if(server, credits, 0);
2963                                 break;
2964                         }
2965
2966                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2967                         if (rc) {
2968                                 kvfree(wdata->pages);
2969                                 kfree(wdata);
2970                                 add_credits_and_wake_if(server, credits, 0);
2971                                 break;
2972                         }
2973
2974                         num_pages = nr_pages;
2975                         rc = wdata_fill_from_iovec(
2976                                 wdata, from, &cur_len, &num_pages);
2977                         if (rc) {
2978                                 for (i = 0; i < nr_pages; i++)
2979                                         put_page(wdata->pages[i]);
2980                                 kvfree(wdata->pages);
2981                                 kfree(wdata);
2982                                 add_credits_and_wake_if(server, credits, 0);
2983                                 break;
2984                         }
2985
2986                         /*
2987                          * Bring nr_pages down to the number of pages we
2988                          * actually used, and free any pages that we didn't use.
2989                          */
2990                         for ( ; nr_pages > num_pages; nr_pages--)
2991                                 put_page(wdata->pages[nr_pages - 1]);
2992
2993                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2994                 }
2995
2996                 wdata->sync_mode = WB_SYNC_ALL;
2997                 wdata->nr_pages = nr_pages;
2998                 wdata->offset = (__u64)offset;
2999                 wdata->cfile = cifsFileInfo_get(open_file);
3000                 wdata->pid = pid;
3001                 wdata->bytes = cur_len;
3002                 wdata->pagesz = PAGE_SIZE;
3003                 wdata->credits = credits_on_stack;
3004                 wdata->ctx = ctx;
3005                 kref_get(&ctx->refcount);
3006
3007                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3008
3009                 if (!rc) {
3010                         if (wdata->cfile->invalidHandle)
3011                                 rc = -EAGAIN;
3012                         else
3013                                 rc = server->ops->async_writev(wdata,
3014                                         cifs_uncached_writedata_release);
3015                 }
3016
3017                 if (rc) {
3018                         add_credits_and_wake_if(server, &wdata->credits, 0);
3019                         kref_put(&wdata->refcount,
3020                                  cifs_uncached_writedata_release);
3021                         if (rc == -EAGAIN) {
3022                                 *from = saved_from;
3023                                 iov_iter_advance(from, offset - saved_offset);
3024                                 continue;
3025                         }
3026                         break;
3027                 }
3028
3029                 list_add_tail(&wdata->list, wdata_list);
3030                 offset += cur_len;
3031                 len -= cur_len;
3032         } while (len > 0);
3033
3034         free_xid(xid);
3035         return rc;
3036 }
3037
3038 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3039 {
3040         struct cifs_writedata *wdata, *tmp;
3041         struct cifs_tcon *tcon;
3042         struct cifs_sb_info *cifs_sb;
3043         struct dentry *dentry = ctx->cfile->dentry;
3044         int rc;
3045
3046         tcon = tlink_tcon(ctx->cfile->tlink);
3047         cifs_sb = CIFS_SB(dentry->d_sb);
3048
3049         mutex_lock(&ctx->aio_mutex);
3050
3051         if (list_empty(&ctx->list)) {
3052                 mutex_unlock(&ctx->aio_mutex);
3053                 return;
3054         }
3055
3056         rc = ctx->rc;
3057         /*
3058          * Wait for and collect replies for any successful sends in order of
3059          * increasing offset. Once an error is hit, then return without waiting
3060          * for any more replies.
3061          */
3062 restart_loop:
3063         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3064                 if (!rc) {
3065                         if (!try_wait_for_completion(&wdata->done)) {
3066                                 mutex_unlock(&ctx->aio_mutex);
3067                                 return;
3068                         }
3069
3070                         if (wdata->result)
3071                                 rc = wdata->result;
3072                         else
3073                                 ctx->total_len += wdata->bytes;
3074
3075                         /* resend call if it's a retryable error */
3076                         if (rc == -EAGAIN) {
3077                                 struct list_head tmp_list;
3078                                 struct iov_iter tmp_from = ctx->iter;
3079
3080                                 INIT_LIST_HEAD(&tmp_list);
3081                                 list_del_init(&wdata->list);
3082
3083                                 if (ctx->direct_io)
3084                                         rc = cifs_resend_wdata(
3085                                                 wdata, &tmp_list, ctx);
3086                                 else {
3087                                         iov_iter_advance(&tmp_from,
3088                                                  wdata->offset - ctx->pos);
3089
3090                                         rc = cifs_write_from_iter(wdata->offset,
3091                                                 wdata->bytes, &tmp_from,
3092                                                 ctx->cfile, cifs_sb, &tmp_list,
3093                                                 ctx);
3094
3095                                         kref_put(&wdata->refcount,
3096                                                 cifs_uncached_writedata_release);
3097                                 }
3098
3099                                 list_splice(&tmp_list, &ctx->list);
3100                                 goto restart_loop;
3101                         }
3102                 }
3103                 list_del_init(&wdata->list);
3104                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3105         }
3106
3107         cifs_stats_bytes_written(tcon, ctx->total_len);
3108         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3109
3110         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3111
3112         mutex_unlock(&ctx->aio_mutex);
3113
3114         if (ctx->iocb && ctx->iocb->ki_complete)
3115                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3116         else
3117                 complete(&ctx->done);
3118 }
3119
3120 static ssize_t __cifs_writev(
3121         struct kiocb *iocb, struct iov_iter *from, bool direct)
3122 {
3123         struct file *file = iocb->ki_filp;
3124         ssize_t total_written = 0;
3125         struct cifsFileInfo *cfile;
3126         struct cifs_tcon *tcon;
3127         struct cifs_sb_info *cifs_sb;
3128         struct cifs_aio_ctx *ctx;
3129         struct iov_iter saved_from = *from;
3130         size_t len = iov_iter_count(from);
3131         int rc;
3132
3133         /*
3134          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3135          * In this case, fall back to non-direct write function.
3136          * this could be improved by getting pages directly in ITER_KVEC
3137          */
3138         if (direct && iov_iter_is_kvec(from)) {
3139                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3140                 direct = false;
3141         }
3142
3143         rc = generic_write_checks(iocb, from);
3144         if (rc <= 0)
3145                 return rc;
3146
3147         cifs_sb = CIFS_FILE_SB(file);
3148         cfile = file->private_data;
3149         tcon = tlink_tcon(cfile->tlink);
3150
3151         if (!tcon->ses->server->ops->async_writev)
3152                 return -ENOSYS;
3153
3154         ctx = cifs_aio_ctx_alloc();
3155         if (!ctx)
3156                 return -ENOMEM;
3157
3158         ctx->cfile = cifsFileInfo_get(cfile);
3159
3160         if (!is_sync_kiocb(iocb))
3161                 ctx->iocb = iocb;
3162
3163         ctx->pos = iocb->ki_pos;
3164
3165         if (direct) {
3166                 ctx->direct_io = true;
3167                 ctx->iter = *from;
3168                 ctx->len = len;
3169         } else {
3170                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3171                 if (rc) {
3172                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3173                         return rc;
3174                 }
3175         }
3176
3177         /* grab a lock here due to read response handlers can access ctx */
3178         mutex_lock(&ctx->aio_mutex);
3179
3180         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3181                                   cfile, cifs_sb, &ctx->list, ctx);
3182
3183         /*
3184          * If at least one write was successfully sent, then discard any rc
3185          * value from the later writes. If the other write succeeds, then
3186          * we'll end up returning whatever was written. If it fails, then
3187          * we'll get a new rc value from that.
3188          */
3189         if (!list_empty(&ctx->list))
3190                 rc = 0;
3191
3192         mutex_unlock(&ctx->aio_mutex);
3193
3194         if (rc) {
3195                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3196                 return rc;
3197         }
3198
3199         if (!is_sync_kiocb(iocb)) {
3200                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3201                 return -EIOCBQUEUED;
3202         }
3203
3204         rc = wait_for_completion_killable(&ctx->done);
3205         if (rc) {
3206                 mutex_lock(&ctx->aio_mutex);
3207                 ctx->rc = rc = -EINTR;
3208                 total_written = ctx->total_len;
3209                 mutex_unlock(&ctx->aio_mutex);
3210         } else {
3211                 rc = ctx->rc;
3212                 total_written = ctx->total_len;
3213         }
3214
3215         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3216
3217         if (unlikely(!total_written))
3218                 return rc;
3219
3220         iocb->ki_pos += total_written;
3221         return total_written;
3222 }
3223
3224 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3225 {
3226         return __cifs_writev(iocb, from, true);
3227 }
3228
3229 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3230 {
3231         return __cifs_writev(iocb, from, false);
3232 }
3233
3234 static ssize_t
3235 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3236 {
3237         struct file *file = iocb->ki_filp;
3238         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3239         struct inode *inode = file->f_mapping->host;
3240         struct cifsInodeInfo *cinode = CIFS_I(inode);
3241         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3242         ssize_t rc;
3243
3244         inode_lock(inode);
3245         /*
3246          * We need to hold the sem to be sure nobody modifies lock list
3247          * with a brlock that prevents writing.
3248          */
3249         down_read(&cinode->lock_sem);
3250
3251         rc = generic_write_checks(iocb, from);
3252         if (rc <= 0)
3253                 goto out;
3254
3255         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3256                                      server->vals->exclusive_lock_type, 0,
3257                                      NULL, CIFS_WRITE_OP))
3258                 rc = __generic_file_write_iter(iocb, from);
3259         else
3260                 rc = -EACCES;
3261 out:
3262         up_read(&cinode->lock_sem);
3263         inode_unlock(inode);
3264
3265         if (rc > 0)
3266                 rc = generic_write_sync(iocb, rc);
3267         return rc;
3268 }
3269
3270 ssize_t
3271 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3272 {
3273         struct inode *inode = file_inode(iocb->ki_filp);
3274         struct cifsInodeInfo *cinode = CIFS_I(inode);
3275         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3276         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3277                                                 iocb->ki_filp->private_data;
3278         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3279         ssize_t written;
3280
3281         written = cifs_get_writer(cinode);
3282         if (written)
3283                 return written;
3284
3285         if (CIFS_CACHE_WRITE(cinode)) {
3286                 if (cap_unix(tcon->ses) &&
3287                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3288                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3289                         written = generic_file_write_iter(iocb, from);
3290                         goto out;
3291                 }
3292                 written = cifs_writev(iocb, from);
3293                 goto out;
3294         }
3295         /*
3296          * For non-oplocked files in strict cache mode we need to write the data
3297          * to the server exactly from the pos to pos+len-1 rather than flush all
3298          * affected pages because it may cause a error with mandatory locks on
3299          * these pages but not on the region from pos to ppos+len-1.
3300          */
3301         written = cifs_user_writev(iocb, from);
3302         if (CIFS_CACHE_READ(cinode)) {
3303                 /*
3304                  * We have read level caching and we have just sent a write
3305                  * request to the server thus making data in the cache stale.
3306                  * Zap the cache and set oplock/lease level to NONE to avoid
3307                  * reading stale data from the cache. All subsequent read
3308                  * operations will read new data from the server.
3309                  */
3310                 cifs_zap_mapping(inode);
3311                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3312                          inode);
3313                 cinode->oplock = 0;
3314         }
3315 out:
3316         cifs_put_writer(cinode);
3317         return written;
3318 }
3319
3320 static struct cifs_readdata *
3321 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3322 {
3323         struct cifs_readdata *rdata;
3324
3325         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3326         if (rdata != NULL) {
3327                 rdata->pages = pages;
3328                 kref_init(&rdata->refcount);
3329                 INIT_LIST_HEAD(&rdata->list);
3330                 init_completion(&rdata->done);
3331                 INIT_WORK(&rdata->work, complete);
3332         }
3333
3334         return rdata;
3335 }
3336
3337 static struct cifs_readdata *
3338 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3339 {
3340         struct page **pages =
3341                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3342         struct cifs_readdata *ret = NULL;
3343
3344         if (pages) {
3345                 ret = cifs_readdata_direct_alloc(pages, complete);
3346                 if (!ret)
3347                         kfree(pages);
3348         }
3349
3350         return ret;
3351 }
3352
3353 void
3354 cifs_readdata_release(struct kref *refcount)
3355 {
3356         struct cifs_readdata *rdata = container_of(refcount,
3357                                         struct cifs_readdata, refcount);
3358 #ifdef CONFIG_CIFS_SMB_DIRECT
3359         if (rdata->mr) {
3360                 smbd_deregister_mr(rdata->mr);
3361                 rdata->mr = NULL;
3362         }
3363 #endif
3364         if (rdata->cfile)
3365                 cifsFileInfo_put(rdata->cfile);
3366
3367         kvfree(rdata->pages);
3368         kfree(rdata);
3369 }
3370
3371 static int
3372 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3373 {
3374         int rc = 0;
3375         struct page *page;
3376         unsigned int i;
3377
3378         for (i = 0; i < nr_pages; i++) {
3379                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3380                 if (!page) {
3381                         rc = -ENOMEM;
3382                         break;
3383                 }
3384                 rdata->pages[i] = page;
3385         }
3386
3387         if (rc) {
3388                 unsigned int nr_page_failed = i;
3389
3390                 for (i = 0; i < nr_page_failed; i++) {
3391                         put_page(rdata->pages[i]);
3392                         rdata->pages[i] = NULL;
3393                 }
3394         }
3395         return rc;
3396 }
3397
3398 static void
3399 cifs_uncached_readdata_release(struct kref *refcount)
3400 {
3401         struct cifs_readdata *rdata = container_of(refcount,
3402                                         struct cifs_readdata, refcount);
3403         unsigned int i;
3404
3405         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3406         for (i = 0; i < rdata->nr_pages; i++) {
3407                 put_page(rdata->pages[i]);
3408         }
3409         cifs_readdata_release(refcount);
3410 }
3411
3412 /**
3413  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3414  * @rdata:      the readdata response with list of pages holding data
3415  * @iter:       destination for our data
3416  *
3417  * This function copies data from a list of pages in a readdata response into
3418  * an array of iovecs. It will first calculate where the data should go
3419  * based on the info in the readdata and then copy the data into that spot.
3420  */
3421 static int
3422 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3423 {
3424         size_t remaining = rdata->got_bytes;
3425         unsigned int i;
3426
3427         for (i = 0; i < rdata->nr_pages; i++) {
3428                 struct page *page = rdata->pages[i];
3429                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3430                 size_t written;
3431
3432                 if (unlikely(iov_iter_is_pipe(iter))) {
3433                         void *addr = kmap_atomic(page);
3434
3435                         written = copy_to_iter(addr, copy, iter);
3436                         kunmap_atomic(addr);
3437                 } else
3438                         written = copy_page_to_iter(page, 0, copy, iter);
3439                 remaining -= written;
3440                 if (written < copy && iov_iter_count(iter) > 0)
3441                         break;
3442         }
3443         return remaining ? -EFAULT : 0;
3444 }
3445
3446 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3447
3448 static void
3449 cifs_uncached_readv_complete(struct work_struct *work)
3450 {
3451         struct cifs_readdata *rdata = container_of(work,
3452                                                 struct cifs_readdata, work);
3453
3454         complete(&rdata->done);
3455         collect_uncached_read_data(rdata->ctx);
3456         /* the below call can possibly free the last ref to aio ctx */
3457         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3458 }
3459
3460 static int
3461 uncached_fill_pages(struct TCP_Server_Info *server,
3462                     struct cifs_readdata *rdata, struct iov_iter *iter,
3463                     unsigned int len)
3464 {
3465         int result = 0;
3466         unsigned int i;
3467         unsigned int nr_pages = rdata->nr_pages;
3468         unsigned int page_offset = rdata->page_offset;
3469
3470         rdata->got_bytes = 0;
3471         rdata->tailsz = PAGE_SIZE;
3472         for (i = 0; i < nr_pages; i++) {
3473                 struct page *page = rdata->pages[i];
3474                 size_t n;
3475                 unsigned int segment_size = rdata->pagesz;
3476
3477                 if (i == 0)
3478                         segment_size -= page_offset;
3479                 else
3480                         page_offset = 0;
3481
3482
3483                 if (len <= 0) {
3484                         /* no need to hold page hostage */
3485                         rdata->pages[i] = NULL;
3486                         rdata->nr_pages--;
3487                         put_page(page);
3488                         continue;
3489                 }
3490
3491                 n = len;
3492                 if (len >= segment_size)
3493                         /* enough data to fill the page */
3494                         n = segment_size;
3495                 else
3496                         rdata->tailsz = len;
3497                 len -= n;
3498
3499                 if (iter)
3500                         result = copy_page_from_iter(
3501                                         page, page_offset, n, iter);
3502 #ifdef CONFIG_CIFS_SMB_DIRECT
3503                 else if (rdata->mr)
3504                         result = n;
3505 #endif
3506                 else
3507                         result = cifs_read_page_from_socket(
3508                                         server, page, page_offset, n);
3509                 if (result < 0)
3510                         break;
3511
3512                 rdata->got_bytes += result;
3513         }
3514
3515         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3516                                                 rdata->got_bytes : result;
3517 }
3518
3519 static int
3520 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3521                               struct cifs_readdata *rdata, unsigned int len)
3522 {
3523         return uncached_fill_pages(server, rdata, NULL, len);
3524 }
3525
3526 static int
3527 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3528                               struct cifs_readdata *rdata,
3529                               struct iov_iter *iter)
3530 {
3531         return uncached_fill_pages(server, rdata, iter, iter->count);
3532 }
3533
3534 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3535                         struct list_head *rdata_list,
3536                         struct cifs_aio_ctx *ctx)
3537 {
3538         unsigned int rsize;
3539         struct cifs_credits credits;
3540         int rc;
3541         struct TCP_Server_Info *server =
3542                 tlink_tcon(rdata->cfile->tlink)->ses->server;
3543
3544         do {
3545                 if (rdata->cfile->invalidHandle) {
3546                         rc = cifs_reopen_file(rdata->cfile, true);
3547                         if (rc == -EAGAIN)
3548                                 continue;
3549                         else if (rc)
3550                                 break;
3551                 }
3552
3553                 /*
3554                  * Wait for credits to resend this rdata.
3555                  * Note: we are attempting to resend the whole rdata not in
3556                  * segments
3557                  */
3558                 do {
3559                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3560                                                 &rsize, &credits);
3561
3562                         if (rc)
3563                                 goto fail;
3564
3565                         if (rsize < rdata->bytes) {
3566                                 add_credits_and_wake_if(server, &credits, 0);
3567                                 msleep(1000);
3568                         }
3569                 } while (rsize < rdata->bytes);
3570                 rdata->credits = credits;
3571
3572                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3573                 if (!rc) {
3574                         if (rdata->cfile->invalidHandle)
3575                                 rc = -EAGAIN;
3576                         else {
3577 #ifdef CONFIG_CIFS_SMB_DIRECT
3578                                 if (rdata->mr) {
3579                                         rdata->mr->need_invalidate = true;
3580                                         smbd_deregister_mr(rdata->mr);
3581                                         rdata->mr = NULL;
3582                                 }
3583 #endif
3584                                 rc = server->ops->async_readv(rdata);
3585                         }
3586                 }
3587
3588                 /* If the read was successfully sent, we are done */
3589                 if (!rc) {
3590                         /* Add to aio pending list */
3591                         list_add_tail(&rdata->list, rdata_list);
3592                         return 0;
3593                 }
3594
3595                 /* Roll back credits and retry if needed */
3596                 add_credits_and_wake_if(server, &rdata->credits, 0);
3597         } while (rc == -EAGAIN);
3598
3599 fail:
3600         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3601         return rc;
3602 }
3603
3604 static int
3605 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3606                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3607                      struct cifs_aio_ctx *ctx)
3608 {
3609         struct cifs_readdata *rdata;
3610         unsigned int npages, rsize;
3611         struct cifs_credits credits_on_stack;
3612         struct cifs_credits *credits = &credits_on_stack;
3613         size_t cur_len;
3614         int rc;
3615         pid_t pid;
3616         struct TCP_Server_Info *server;
3617         struct page **pagevec;
3618         size_t start;
3619         struct iov_iter direct_iov = ctx->iter;
3620
3621         server = tlink_tcon(open_file->tlink)->ses->server;
3622
3623         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3624                 pid = open_file->pid;
3625         else
3626                 pid = current->tgid;
3627
3628         if (ctx->direct_io)
3629                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3630
3631         do {
3632                 if (open_file->invalidHandle) {
3633                         rc = cifs_reopen_file(open_file, true);
3634                         if (rc == -EAGAIN)
3635                                 continue;
3636                         else if (rc)
3637                                 break;
3638                 }
3639
3640                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3641                                                    &rsize, credits);
3642                 if (rc)
3643                         break;
3644
3645                 cur_len = min_t(const size_t, len, rsize);
3646
3647                 if (ctx->direct_io) {
3648                         ssize_t result;
3649
3650                         result = iov_iter_get_pages_alloc(
3651                                         &direct_iov, &pagevec,
3652                                         cur_len, &start);
3653                         if (result < 0) {
3654                                 cifs_dbg(VFS,
3655                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3656                                          result, iov_iter_type(&direct_iov),
3657                                          direct_iov.iov_offset,
3658                                          direct_iov.count);
3659                                 dump_stack();
3660
3661                                 rc = result;
3662                                 add_credits_and_wake_if(server, credits, 0);
3663                                 break;
3664                         }
3665                         cur_len = (size_t)result;
3666                         iov_iter_advance(&direct_iov, cur_len);
3667
3668                         rdata = cifs_readdata_direct_alloc(
3669                                         pagevec, cifs_uncached_readv_complete);
3670                         if (!rdata) {
3671                                 add_credits_and_wake_if(server, credits, 0);
3672                                 rc = -ENOMEM;
3673                                 break;
3674                         }
3675
3676                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3677                         rdata->page_offset = start;
3678                         rdata->tailsz = npages > 1 ?
3679                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3680                                 cur_len;
3681
3682                 } else {
3683
3684                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3685                         /* allocate a readdata struct */
3686                         rdata = cifs_readdata_alloc(npages,
3687                                             cifs_uncached_readv_complete);
3688                         if (!rdata) {
3689                                 add_credits_and_wake_if(server, credits, 0);
3690                                 rc = -ENOMEM;
3691                                 break;
3692                         }
3693
3694                         rc = cifs_read_allocate_pages(rdata, npages);
3695                         if (rc) {
3696                                 kvfree(rdata->pages);
3697                                 kfree(rdata);
3698                                 add_credits_and_wake_if(server, credits, 0);
3699                                 break;
3700                         }
3701
3702                         rdata->tailsz = PAGE_SIZE;
3703                 }
3704
3705                 rdata->cfile = cifsFileInfo_get(open_file);
3706                 rdata->nr_pages = npages;
3707                 rdata->offset = offset;
3708                 rdata->bytes = cur_len;
3709                 rdata->pid = pid;
3710                 rdata->pagesz = PAGE_SIZE;
3711                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3712                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3713                 rdata->credits = credits_on_stack;
3714                 rdata->ctx = ctx;
3715                 kref_get(&ctx->refcount);
3716
3717                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3718
3719                 if (!rc) {
3720                         if (rdata->cfile->invalidHandle)
3721                                 rc = -EAGAIN;
3722                         else
3723                                 rc = server->ops->async_readv(rdata);
3724                 }
3725
3726                 if (rc) {
3727                         add_credits_and_wake_if(server, &rdata->credits, 0);
3728                         kref_put(&rdata->refcount,
3729                                 cifs_uncached_readdata_release);
3730                         if (rc == -EAGAIN) {
3731                                 iov_iter_revert(&direct_iov, cur_len);
3732                                 continue;
3733                         }
3734                         break;
3735                 }
3736
3737                 list_add_tail(&rdata->list, rdata_list);
3738                 offset += cur_len;
3739                 len -= cur_len;
3740         } while (len > 0);
3741
3742         return rc;
3743 }
3744
3745 static void
3746 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3747 {
3748         struct cifs_readdata *rdata, *tmp;
3749         struct iov_iter *to = &ctx->iter;
3750         struct cifs_sb_info *cifs_sb;
3751         int rc;
3752
3753         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3754
3755         mutex_lock(&ctx->aio_mutex);
3756
3757         if (list_empty(&ctx->list)) {
3758                 mutex_unlock(&ctx->aio_mutex);
3759                 return;
3760         }
3761
3762         rc = ctx->rc;
3763         /* the loop below should proceed in the order of increasing offsets */
3764 again:
3765         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3766                 if (!rc) {
3767                         if (!try_wait_for_completion(&rdata->done)) {
3768                                 mutex_unlock(&ctx->aio_mutex);
3769                                 return;
3770                         }
3771
3772                         if (rdata->result == -EAGAIN) {
3773                                 /* resend call if it's a retryable error */
3774                                 struct list_head tmp_list;
3775                                 unsigned int got_bytes = rdata->got_bytes;
3776
3777                                 list_del_init(&rdata->list);
3778                                 INIT_LIST_HEAD(&tmp_list);
3779
3780                                 /*
3781                                  * Got a part of data and then reconnect has
3782                                  * happened -- fill the buffer and continue
3783                                  * reading.
3784                                  */
3785                                 if (got_bytes && got_bytes < rdata->bytes) {
3786                                         rc = 0;
3787                                         if (!ctx->direct_io)
3788                                                 rc = cifs_readdata_to_iov(rdata, to);
3789                                         if (rc) {
3790                                                 kref_put(&rdata->refcount,
3791                                                         cifs_uncached_readdata_release);
3792                                                 continue;
3793                                         }
3794                                 }
3795
3796                                 if (ctx->direct_io) {
3797                                         /*
3798                                          * Re-use rdata as this is a
3799                                          * direct I/O
3800                                          */
3801                                         rc = cifs_resend_rdata(
3802                                                 rdata,
3803                                                 &tmp_list, ctx);
3804                                 } else {
3805                                         rc = cifs_send_async_read(
3806                                                 rdata->offset + got_bytes,
3807                                                 rdata->bytes - got_bytes,
3808                                                 rdata->cfile, cifs_sb,
3809                                                 &tmp_list, ctx);
3810
3811                                         kref_put(&rdata->refcount,
3812                                                 cifs_uncached_readdata_release);
3813                                 }
3814
3815                                 list_splice(&tmp_list, &ctx->list);
3816
3817                                 goto again;
3818                         } else if (rdata->result)
3819                                 rc = rdata->result;
3820                         else if (!ctx->direct_io)
3821                                 rc = cifs_readdata_to_iov(rdata, to);
3822
3823                         /* if there was a short read -- discard anything left */
3824                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3825                                 rc = -ENODATA;
3826
3827                         ctx->total_len += rdata->got_bytes;
3828                 }
3829                 list_del_init(&rdata->list);
3830                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3831         }
3832
3833         if (!ctx->direct_io)
3834                 ctx->total_len = ctx->len - iov_iter_count(to);
3835
3836         /* mask nodata case */
3837         if (rc == -ENODATA)
3838                 rc = 0;
3839
3840         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3841
3842         mutex_unlock(&ctx->aio_mutex);
3843
3844         if (ctx->iocb && ctx->iocb->ki_complete)
3845                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3846         else
3847                 complete(&ctx->done);
3848 }
3849
3850 static ssize_t __cifs_readv(
3851         struct kiocb *iocb, struct iov_iter *to, bool direct)
3852 {
3853         size_t len;
3854         struct file *file = iocb->ki_filp;
3855         struct cifs_sb_info *cifs_sb;
3856         struct cifsFileInfo *cfile;
3857         struct cifs_tcon *tcon;
3858         ssize_t rc, total_read = 0;
3859         loff_t offset = iocb->ki_pos;
3860         struct cifs_aio_ctx *ctx;
3861
3862         /*
3863          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3864          * fall back to data copy read path
3865          * this could be improved by getting pages directly in ITER_KVEC
3866          */
3867         if (direct && iov_iter_is_kvec(to)) {
3868                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3869                 direct = false;
3870         }
3871
3872         len = iov_iter_count(to);
3873         if (!len)
3874                 return 0;
3875
3876         cifs_sb = CIFS_FILE_SB(file);
3877         cfile = file->private_data;
3878         tcon = tlink_tcon(cfile->tlink);
3879
3880         if (!tcon->ses->server->ops->async_readv)
3881                 return -ENOSYS;
3882
3883         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3884                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3885
3886         ctx = cifs_aio_ctx_alloc();
3887         if (!ctx)
3888                 return -ENOMEM;
3889
3890         ctx->cfile = cifsFileInfo_get(cfile);
3891
3892         if (!is_sync_kiocb(iocb))
3893                 ctx->iocb = iocb;
3894
3895         if (iter_is_iovec(to))
3896                 ctx->should_dirty = true;
3897
3898         if (direct) {
3899                 ctx->pos = offset;
3900                 ctx->direct_io = true;
3901                 ctx->iter = *to;
3902                 ctx->len = len;
3903         } else {
3904                 rc = setup_aio_ctx_iter(ctx, to, READ);
3905                 if (rc) {
3906                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3907                         return rc;
3908                 }
3909                 len = ctx->len;
3910         }
3911
3912         /* grab a lock here due to read response handlers can access ctx */
3913         mutex_lock(&ctx->aio_mutex);
3914
3915         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3916
3917         /* if at least one read request send succeeded, then reset rc */
3918         if (!list_empty(&ctx->list))
3919                 rc = 0;
3920
3921         mutex_unlock(&ctx->aio_mutex);
3922
3923         if (rc) {
3924                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3925                 return rc;
3926         }
3927
3928         if (!is_sync_kiocb(iocb)) {
3929                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3930                 return -EIOCBQUEUED;
3931         }
3932
3933         rc = wait_for_completion_killable(&ctx->done);
3934         if (rc) {
3935                 mutex_lock(&ctx->aio_mutex);
3936                 ctx->rc = rc = -EINTR;
3937                 total_read = ctx->total_len;
3938                 mutex_unlock(&ctx->aio_mutex);
3939         } else {
3940                 rc = ctx->rc;
3941                 total_read = ctx->total_len;
3942         }
3943
3944         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3945
3946         if (total_read) {
3947                 iocb->ki_pos += total_read;
3948                 return total_read;
3949         }
3950         return rc;
3951 }
3952
3953 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3954 {
3955         return __cifs_readv(iocb, to, true);
3956 }
3957
3958 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3959 {
3960         return __cifs_readv(iocb, to, false);
3961 }
3962
3963 ssize_t
3964 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3965 {
3966         struct inode *inode = file_inode(iocb->ki_filp);
3967         struct cifsInodeInfo *cinode = CIFS_I(inode);
3968         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3969         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3970                                                 iocb->ki_filp->private_data;
3971         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3972         int rc = -EACCES;
3973
3974         /*
3975          * In strict cache mode we need to read from the server all the time
3976          * if we don't have level II oplock because the server can delay mtime
3977          * change - so we can't make a decision about inode invalidating.
3978          * And we can also fail with pagereading if there are mandatory locks
3979          * on pages affected by this read but not on the region from pos to
3980          * pos+len-1.
3981          */
3982         if (!CIFS_CACHE_READ(cinode))
3983                 return cifs_user_readv(iocb, to);
3984
3985         if (cap_unix(tcon->ses) &&
3986             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3987             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3988                 return generic_file_read_iter(iocb, to);
3989
3990         /*
3991          * We need to hold the sem to be sure nobody modifies lock list
3992          * with a brlock that prevents reading.
3993          */
3994         down_read(&cinode->lock_sem);
3995         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3996                                      tcon->ses->server->vals->shared_lock_type,
3997                                      0, NULL, CIFS_READ_OP))
3998                 rc = generic_file_read_iter(iocb, to);
3999         up_read(&cinode->lock_sem);
4000         return rc;
4001 }
4002
4003 static ssize_t
4004 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4005 {
4006         int rc = -EACCES;
4007         unsigned int bytes_read = 0;
4008         unsigned int total_read;
4009         unsigned int current_read_size;
4010         unsigned int rsize;
4011         struct cifs_sb_info *cifs_sb;
4012         struct cifs_tcon *tcon;
4013         struct TCP_Server_Info *server;
4014         unsigned int xid;
4015         char *cur_offset;
4016         struct cifsFileInfo *open_file;
4017         struct cifs_io_parms io_parms = {0};
4018         int buf_type = CIFS_NO_BUFFER;
4019         __u32 pid;
4020
4021         xid = get_xid();
4022         cifs_sb = CIFS_FILE_SB(file);
4023
4024         /* FIXME: set up handlers for larger reads and/or convert to async */
4025         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
4026
4027         if (file->private_data == NULL) {
4028                 rc = -EBADF;
4029                 free_xid(xid);
4030                 return rc;
4031         }
4032         open_file = file->private_data;
4033         tcon = tlink_tcon(open_file->tlink);
4034         server = tcon->ses->server;
4035
4036         if (!server->ops->sync_read) {
4037                 free_xid(xid);
4038                 return -ENOSYS;
4039         }
4040
4041         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4042                 pid = open_file->pid;
4043         else
4044                 pid = current->tgid;
4045
4046         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4047                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4048
4049         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4050              total_read += bytes_read, cur_offset += bytes_read) {
4051                 do {
4052                         current_read_size = min_t(uint, read_size - total_read,
4053                                                   rsize);
4054                         /*
4055                          * For windows me and 9x we do not want to request more
4056                          * than it negotiated since it will refuse the read
4057                          * then.
4058                          */
4059                         if (!(tcon->ses->capabilities &
4060                                 tcon->ses->server->vals->cap_large_files)) {
4061                                 current_read_size = min_t(uint,
4062                                         current_read_size, CIFSMaxBufSize);
4063                         }
4064                         if (open_file->invalidHandle) {
4065                                 rc = cifs_reopen_file(open_file, true);
4066                                 if (rc != 0)
4067                                         break;
4068                         }
4069                         io_parms.pid = pid;
4070                         io_parms.tcon = tcon;
4071                         io_parms.offset = *offset;
4072                         io_parms.length = current_read_size;
4073                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4074                                                     &bytes_read, &cur_offset,
4075                                                     &buf_type);
4076                 } while (rc == -EAGAIN);
4077
4078                 if (rc || (bytes_read == 0)) {
4079                         if (total_read) {
4080                                 break;
4081                         } else {
4082                                 free_xid(xid);
4083                                 return rc;
4084                         }
4085                 } else {
4086                         cifs_stats_bytes_read(tcon, total_read);
4087                         *offset += bytes_read;
4088                 }
4089         }
4090         free_xid(xid);
4091         return total_read;
4092 }
4093
4094 /*
4095  * If the page is mmap'ed into a process' page tables, then we need to make
4096  * sure that it doesn't change while being written back.
4097  */
4098 static vm_fault_t
4099 cifs_page_mkwrite(struct vm_fault *vmf)
4100 {
4101         struct page *page = vmf->page;
4102
4103         lock_page(page);
4104         return VM_FAULT_LOCKED;
4105 }
4106
4107 static const struct vm_operations_struct cifs_file_vm_ops = {
4108         .fault = filemap_fault,
4109         .map_pages = filemap_map_pages,
4110         .page_mkwrite = cifs_page_mkwrite,
4111 };
4112
4113 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4114 {
4115         int xid, rc = 0;
4116         struct inode *inode = file_inode(file);
4117
4118         xid = get_xid();
4119
4120         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4121                 rc = cifs_zap_mapping(inode);
4122         if (!rc)
4123                 rc = generic_file_mmap(file, vma);
4124         if (!rc)
4125                 vma->vm_ops = &cifs_file_vm_ops;
4126
4127         free_xid(xid);
4128         return rc;
4129 }
4130
4131 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4132 {
4133         int rc, xid;
4134
4135         xid = get_xid();
4136
4137         rc = cifs_revalidate_file(file);
4138         if (rc)
4139                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4140                          rc);
4141         if (!rc)
4142                 rc = generic_file_mmap(file, vma);
4143         if (!rc)
4144                 vma->vm_ops = &cifs_file_vm_ops;
4145
4146         free_xid(xid);
4147         return rc;
4148 }
4149
4150 static void
4151 cifs_readv_complete(struct work_struct *work)
4152 {
4153         unsigned int i, got_bytes;
4154         struct cifs_readdata *rdata = container_of(work,
4155                                                 struct cifs_readdata, work);
4156
4157         got_bytes = rdata->got_bytes;
4158         for (i = 0; i < rdata->nr_pages; i++) {
4159                 struct page *page = rdata->pages[i];
4160
4161                 lru_cache_add_file(page);
4162
4163                 if (rdata->result == 0 ||
4164                     (rdata->result == -EAGAIN && got_bytes)) {
4165                         flush_dcache_page(page);
4166                         SetPageUptodate(page);
4167                 }
4168
4169                 unlock_page(page);
4170
4171                 if (rdata->result == 0 ||
4172                     (rdata->result == -EAGAIN && got_bytes))
4173                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4174
4175                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4176
4177                 put_page(page);
4178                 rdata->pages[i] = NULL;
4179         }
4180         kref_put(&rdata->refcount, cifs_readdata_release);
4181 }
4182
4183 static int
4184 readpages_fill_pages(struct TCP_Server_Info *server,
4185                      struct cifs_readdata *rdata, struct iov_iter *iter,
4186                      unsigned int len)
4187 {
4188         int result = 0;
4189         unsigned int i;
4190         u64 eof;
4191         pgoff_t eof_index;
4192         unsigned int nr_pages = rdata->nr_pages;
4193         unsigned int page_offset = rdata->page_offset;
4194
4195         /* determine the eof that the server (probably) has */
4196         eof = CIFS_I(rdata->mapping->host)->server_eof;
4197         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4198         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4199
4200         rdata->got_bytes = 0;
4201         rdata->tailsz = PAGE_SIZE;
4202         for (i = 0; i < nr_pages; i++) {
4203                 struct page *page = rdata->pages[i];
4204                 unsigned int to_read = rdata->pagesz;
4205                 size_t n;
4206
4207                 if (i == 0)
4208                         to_read -= page_offset;
4209                 else
4210                         page_offset = 0;
4211
4212                 n = to_read;
4213
4214                 if (len >= to_read) {
4215                         len -= to_read;
4216                 } else if (len > 0) {
4217                         /* enough for partial page, fill and zero the rest */
4218                         zero_user(page, len + page_offset, to_read - len);
4219                         n = rdata->tailsz = len;
4220                         len = 0;
4221                 } else if (page->index > eof_index) {
4222                         /*
4223                          * The VFS will not try to do readahead past the
4224                          * i_size, but it's possible that we have outstanding
4225                          * writes with gaps in the middle and the i_size hasn't
4226                          * caught up yet. Populate those with zeroed out pages
4227                          * to prevent the VFS from repeatedly attempting to
4228                          * fill them until the writes are flushed.
4229                          */
4230                         zero_user(page, 0, PAGE_SIZE);
4231                         lru_cache_add_file(page);
4232                         flush_dcache_page(page);
4233                         SetPageUptodate(page);
4234                         unlock_page(page);
4235                         put_page(page);
4236                         rdata->pages[i] = NULL;
4237                         rdata->nr_pages--;
4238                         continue;
4239                 } else {
4240                         /* no need to hold page hostage */
4241                         lru_cache_add_file(page);
4242                         unlock_page(page);
4243                         put_page(page);
4244                         rdata->pages[i] = NULL;
4245                         rdata->nr_pages--;
4246                         continue;
4247                 }
4248
4249                 if (iter)
4250                         result = copy_page_from_iter(
4251                                         page, page_offset, n, iter);
4252 #ifdef CONFIG_CIFS_SMB_DIRECT
4253                 else if (rdata->mr)
4254                         result = n;
4255 #endif
4256                 else
4257                         result = cifs_read_page_from_socket(
4258                                         server, page, page_offset, n);
4259                 if (result < 0)
4260                         break;
4261
4262                 rdata->got_bytes += result;
4263         }
4264
4265         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4266                                                 rdata->got_bytes : result;
4267 }
4268
4269 static int
4270 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4271                                struct cifs_readdata *rdata, unsigned int len)
4272 {
4273         return readpages_fill_pages(server, rdata, NULL, len);
4274 }
4275
4276 static int
4277 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4278                                struct cifs_readdata *rdata,
4279                                struct iov_iter *iter)
4280 {
4281         return readpages_fill_pages(server, rdata, iter, iter->count);
4282 }
4283
4284 static int
4285 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4286                     unsigned int rsize, struct list_head *tmplist,
4287                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4288 {
4289         struct page *page, *tpage;
4290         unsigned int expected_index;
4291         int rc;
4292         gfp_t gfp = readahead_gfp_mask(mapping);
4293
4294         INIT_LIST_HEAD(tmplist);
4295
4296         page = lru_to_page(page_list);
4297
4298         /*
4299          * Lock the page and put it in the cache. Since no one else
4300          * should have access to this page, we're safe to simply set
4301          * PG_locked without checking it first.
4302          */
4303         __SetPageLocked(page);
4304         rc = add_to_page_cache_locked(page, mapping,
4305                                       page->index, gfp);
4306
4307         /* give up if we can't stick it in the cache */
4308         if (rc) {
4309                 __ClearPageLocked(page);
4310                 return rc;
4311         }
4312
4313         /* move first page to the tmplist */
4314         *offset = (loff_t)page->index << PAGE_SHIFT;
4315         *bytes = PAGE_SIZE;
4316         *nr_pages = 1;
4317         list_move_tail(&page->lru, tmplist);
4318
4319         /* now try and add more pages onto the request */
4320         expected_index = page->index + 1;
4321         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4322                 /* discontinuity ? */
4323                 if (page->index != expected_index)
4324                         break;
4325
4326                 /* would this page push the read over the rsize? */
4327                 if (*bytes + PAGE_SIZE > rsize)
4328                         break;
4329
4330                 __SetPageLocked(page);
4331                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4332                         __ClearPageLocked(page);
4333                         break;
4334                 }
4335                 list_move_tail(&page->lru, tmplist);
4336                 (*bytes) += PAGE_SIZE;
4337                 expected_index++;
4338                 (*nr_pages)++;
4339         }
4340         return rc;
4341 }
4342
4343 static int cifs_readpages(struct file *file, struct address_space *mapping,
4344         struct list_head *page_list, unsigned num_pages)
4345 {
4346         int rc;
4347         struct list_head tmplist;
4348         struct cifsFileInfo *open_file = file->private_data;
4349         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4350         struct TCP_Server_Info *server;
4351         pid_t pid;
4352         unsigned int xid;
4353
4354         xid = get_xid();
4355         /*
4356          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4357          * immediately if the cookie is negative
4358          *
4359          * After this point, every page in the list might have PG_fscache set,
4360          * so we will need to clean that up off of every page we don't use.
4361          */
4362         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4363                                          &num_pages);
4364         if (rc == 0) {
4365                 free_xid(xid);
4366                 return rc;
4367         }
4368
4369         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4370                 pid = open_file->pid;
4371         else
4372                 pid = current->tgid;
4373
4374         rc = 0;
4375         server = tlink_tcon(open_file->tlink)->ses->server;
4376
4377         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4378                  __func__, file, mapping, num_pages);
4379
4380         /*
4381          * Start with the page at end of list and move it to private
4382          * list. Do the same with any following pages until we hit
4383          * the rsize limit, hit an index discontinuity, or run out of
4384          * pages. Issue the async read and then start the loop again
4385          * until the list is empty.
4386          *
4387          * Note that list order is important. The page_list is in
4388          * the order of declining indexes. When we put the pages in
4389          * the rdata->pages, then we want them in increasing order.
4390          */
4391         while (!list_empty(page_list)) {
4392                 unsigned int i, nr_pages, bytes, rsize;
4393                 loff_t offset;
4394                 struct page *page, *tpage;
4395                 struct cifs_readdata *rdata;
4396                 struct cifs_credits credits_on_stack;
4397                 struct cifs_credits *credits = &credits_on_stack;
4398
4399                 if (open_file->invalidHandle) {
4400                         rc = cifs_reopen_file(open_file, true);
4401                         if (rc == -EAGAIN)
4402                                 continue;
4403                         else if (rc)
4404                                 break;
4405                 }
4406
4407                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4408                                                    &rsize, credits);
4409                 if (rc)
4410                         break;
4411
4412                 /*
4413                  * Give up immediately if rsize is too small to read an entire
4414                  * page. The VFS will fall back to readpage. We should never
4415                  * reach this point however since we set ra_pages to 0 when the
4416                  * rsize is smaller than a cache page.
4417                  */
4418                 if (unlikely(rsize < PAGE_SIZE)) {
4419                         add_credits_and_wake_if(server, credits, 0);
4420                         free_xid(xid);
4421                         return 0;
4422                 }
4423
4424                 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4425                                          &nr_pages, &offset, &bytes);
4426                 if (rc) {
4427                         add_credits_and_wake_if(server, credits, 0);
4428                         break;
4429                 }
4430
4431                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4432                 if (!rdata) {
4433                         /* best to give up if we're out of mem */
4434                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4435                                 list_del(&page->lru);
4436                                 lru_cache_add_file(page);
4437                                 unlock_page(page);
4438                                 put_page(page);
4439                         }
4440                         rc = -ENOMEM;
4441                         add_credits_and_wake_if(server, credits, 0);
4442                         break;
4443                 }
4444
4445                 rdata->cfile = cifsFileInfo_get(open_file);
4446                 rdata->mapping = mapping;
4447                 rdata->offset = offset;
4448                 rdata->bytes = bytes;
4449                 rdata->pid = pid;
4450                 rdata->pagesz = PAGE_SIZE;
4451                 rdata->tailsz = PAGE_SIZE;
4452                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4453                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4454                 rdata->credits = credits_on_stack;
4455
4456                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4457                         list_del(&page->lru);
4458                         rdata->pages[rdata->nr_pages++] = page;
4459                 }
4460
4461                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4462
4463                 if (!rc) {
4464                         if (rdata->cfile->invalidHandle)
4465                                 rc = -EAGAIN;
4466                         else
4467                                 rc = server->ops->async_readv(rdata);
4468                 }
4469
4470                 if (rc) {
4471                         add_credits_and_wake_if(server, &rdata->credits, 0);
4472                         for (i = 0; i < rdata->nr_pages; i++) {
4473                                 page = rdata->pages[i];
4474                                 lru_cache_add_file(page);
4475                                 unlock_page(page);
4476                                 put_page(page);
4477                         }
4478                         /* Fallback to the readpage in error/reconnect cases */
4479                         kref_put(&rdata->refcount, cifs_readdata_release);
4480                         break;
4481                 }
4482
4483                 kref_put(&rdata->refcount, cifs_readdata_release);
4484         }
4485
4486         /* Any pages that have been shown to fscache but didn't get added to
4487          * the pagecache must be uncached before they get returned to the
4488          * allocator.
4489          */
4490         cifs_fscache_readpages_cancel(mapping->host, page_list);
4491         free_xid(xid);
4492         return rc;
4493 }
4494
4495 /*
4496  * cifs_readpage_worker must be called with the page pinned
4497  */
4498 static int cifs_readpage_worker(struct file *file, struct page *page,
4499         loff_t *poffset)
4500 {
4501         char *read_data;
4502         int rc;
4503
4504         /* Is the page cached? */
4505         rc = cifs_readpage_from_fscache(file_inode(file), page);
4506         if (rc == 0)
4507                 goto read_complete;
4508
4509         read_data = kmap(page);
4510         /* for reads over a certain size could initiate async read ahead */
4511
4512         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4513
4514         if (rc < 0)
4515                 goto io_error;
4516         else
4517                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4518
4519         /* we do not want atime to be less than mtime, it broke some apps */
4520         file_inode(file)->i_atime = current_time(file_inode(file));
4521         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4522                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4523         else
4524                 file_inode(file)->i_atime = current_time(file_inode(file));
4525
4526         if (PAGE_SIZE > rc)
4527                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4528
4529         flush_dcache_page(page);
4530         SetPageUptodate(page);
4531
4532         /* send this page to the cache */
4533         cifs_readpage_to_fscache(file_inode(file), page);
4534
4535         rc = 0;
4536
4537 io_error:
4538         kunmap(page);
4539         unlock_page(page);
4540
4541 read_complete:
4542         return rc;
4543 }
4544
4545 static int cifs_readpage(struct file *file, struct page *page)
4546 {
4547         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4548         int rc = -EACCES;
4549         unsigned int xid;
4550
4551         xid = get_xid();
4552
4553         if (file->private_data == NULL) {
4554                 rc = -EBADF;
4555                 free_xid(xid);
4556                 return rc;
4557         }
4558
4559         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4560                  page, (int)offset, (int)offset);
4561
4562         rc = cifs_readpage_worker(file, page, &offset);
4563
4564         free_xid(xid);
4565         return rc;
4566 }
4567
4568 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4569 {
4570         struct cifsFileInfo *open_file;
4571
4572         spin_lock(&cifs_inode->open_file_lock);
4573         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4574                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4575                         spin_unlock(&cifs_inode->open_file_lock);
4576                         return 1;
4577                 }
4578         }
4579         spin_unlock(&cifs_inode->open_file_lock);
4580         return 0;
4581 }
4582
4583 /* We do not want to update the file size from server for inodes
4584    open for write - to avoid races with writepage extending
4585    the file - in the future we could consider allowing
4586    refreshing the inode only on increases in the file size
4587    but this is tricky to do without racing with writebehind
4588    page caching in the current Linux kernel design */
4589 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4590 {
4591         if (!cifsInode)
4592                 return true;
4593
4594         if (is_inode_writable(cifsInode)) {
4595                 /* This inode is open for write at least once */
4596                 struct cifs_sb_info *cifs_sb;
4597
4598                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4599                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4600                         /* since no page cache to corrupt on directio
4601                         we can change size safely */
4602                         return true;
4603                 }
4604
4605                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4606                         return true;
4607
4608                 return false;
4609         } else
4610                 return true;
4611 }
4612
4613 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4614                         loff_t pos, unsigned len, unsigned flags,
4615                         struct page **pagep, void **fsdata)
4616 {
4617         int oncethru = 0;
4618         pgoff_t index = pos >> PAGE_SHIFT;
4619         loff_t offset = pos & (PAGE_SIZE - 1);
4620         loff_t page_start = pos & PAGE_MASK;
4621         loff_t i_size;
4622         struct page *page;
4623         int rc = 0;
4624
4625         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4626
4627 start:
4628         page = grab_cache_page_write_begin(mapping, index, flags);
4629         if (!page) {
4630                 rc = -ENOMEM;
4631                 goto out;
4632         }
4633
4634         if (PageUptodate(page))
4635                 goto out;
4636
4637         /*
4638          * If we write a full page it will be up to date, no need to read from
4639          * the server. If the write is short, we'll end up doing a sync write
4640          * instead.
4641          */
4642         if (len == PAGE_SIZE)
4643                 goto out;
4644
4645         /*
4646          * optimize away the read when we have an oplock, and we're not
4647          * expecting to use any of the data we'd be reading in. That
4648          * is, when the page lies beyond the EOF, or straddles the EOF
4649          * and the write will cover all of the existing data.
4650          */
4651         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4652                 i_size = i_size_read(mapping->host);
4653                 if (page_start >= i_size ||
4654                     (offset == 0 && (pos + len) >= i_size)) {
4655                         zero_user_segments(page, 0, offset,
4656                                            offset + len,
4657                                            PAGE_SIZE);
4658                         /*
4659                          * PageChecked means that the parts of the page
4660                          * to which we're not writing are considered up
4661                          * to date. Once the data is copied to the
4662                          * page, it can be set uptodate.
4663                          */
4664                         SetPageChecked(page);
4665                         goto out;
4666                 }
4667         }
4668
4669         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4670                 /*
4671                  * might as well read a page, it is fast enough. If we get
4672                  * an error, we don't need to return it. cifs_write_end will
4673                  * do a sync write instead since PG_uptodate isn't set.
4674                  */
4675                 cifs_readpage_worker(file, page, &page_start);
4676                 put_page(page);
4677                 oncethru = 1;
4678                 goto start;
4679         } else {
4680                 /* we could try using another file handle if there is one -
4681                    but how would we lock it to prevent close of that handle
4682                    racing with this read? In any case
4683                    this will be written out by write_end so is fine */
4684         }
4685 out:
4686         *pagep = page;
4687         return rc;
4688 }
4689
4690 static int cifs_release_page(struct page *page, gfp_t gfp)
4691 {
4692         if (PagePrivate(page))
4693                 return 0;
4694
4695         return cifs_fscache_release_page(page, gfp);
4696 }
4697
4698 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4699                                  unsigned int length)
4700 {
4701         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4702
4703         if (offset == 0 && length == PAGE_SIZE)
4704                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4705 }
4706
4707 static int cifs_launder_page(struct page *page)
4708 {
4709         int rc = 0;
4710         loff_t range_start = page_offset(page);
4711         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4712         struct writeback_control wbc = {
4713                 .sync_mode = WB_SYNC_ALL,
4714                 .nr_to_write = 0,
4715                 .range_start = range_start,
4716                 .range_end = range_end,
4717         };
4718
4719         cifs_dbg(FYI, "Launder page: %p\n", page);
4720
4721         if (clear_page_dirty_for_io(page))
4722                 rc = cifs_writepage_locked(page, &wbc);
4723
4724         cifs_fscache_invalidate_page(page, page->mapping->host);
4725         return rc;
4726 }
4727
4728 void cifs_oplock_break(struct work_struct *work)
4729 {
4730         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4731                                                   oplock_break);
4732         struct inode *inode = d_inode(cfile->dentry);
4733         struct cifsInodeInfo *cinode = CIFS_I(inode);
4734         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4735         struct TCP_Server_Info *server = tcon->ses->server;
4736         int rc = 0;
4737         bool purge_cache = false;
4738
4739         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4740                         TASK_UNINTERRUPTIBLE);
4741
4742         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4743                                       cfile->oplock_epoch, &purge_cache);
4744
4745         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4746                                                 cifs_has_mand_locks(cinode)) {
4747                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4748                          inode);
4749                 cinode->oplock = 0;
4750         }
4751
4752         if (inode && S_ISREG(inode->i_mode)) {
4753                 if (CIFS_CACHE_READ(cinode))
4754                         break_lease(inode, O_RDONLY);
4755                 else
4756                         break_lease(inode, O_WRONLY);
4757                 rc = filemap_fdatawrite(inode->i_mapping);
4758                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4759                         rc = filemap_fdatawait(inode->i_mapping);
4760                         mapping_set_error(inode->i_mapping, rc);
4761                         cifs_zap_mapping(inode);
4762                 }
4763                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4764                 if (CIFS_CACHE_WRITE(cinode))
4765                         goto oplock_break_ack;
4766         }
4767
4768         rc = cifs_push_locks(cfile);
4769         if (rc)
4770                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4771
4772 oplock_break_ack:
4773         /*
4774          * releasing stale oplock after recent reconnect of smb session using
4775          * a now incorrect file handle is not a data integrity issue but do
4776          * not bother sending an oplock release if session to server still is
4777          * disconnected since oplock already released by the server
4778          */
4779         if (!cfile->oplock_break_cancelled) {
4780                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4781                                                              cinode);
4782                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4783         }
4784         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4785         cifs_done_oplock_break(cinode);
4786 }
4787
4788 /*
4789  * The presence of cifs_direct_io() in the address space ops vector
4790  * allowes open() O_DIRECT flags which would have failed otherwise.
4791  *
4792  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4793  * so this method should never be called.
4794  *
4795  * Direct IO is not yet supported in the cached mode. 
4796  */
4797 static ssize_t
4798 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4799 {
4800         /*
4801          * FIXME
4802          * Eventually need to support direct IO for non forcedirectio mounts
4803          */
4804         return -EINVAL;
4805 }
4806
4807 static int cifs_swap_activate(struct swap_info_struct *sis,
4808                               struct file *swap_file, sector_t *span)
4809 {
4810         struct cifsFileInfo *cfile = swap_file->private_data;
4811         struct inode *inode = swap_file->f_mapping->host;
4812         unsigned long blocks;
4813         long long isize;
4814
4815         cifs_dbg(FYI, "swap activate\n");
4816
4817         spin_lock(&inode->i_lock);
4818         blocks = inode->i_blocks;
4819         isize = inode->i_size;
4820         spin_unlock(&inode->i_lock);
4821         if (blocks*512 < isize) {
4822                 pr_warn("swap activate: swapfile has holes\n");
4823                 return -EINVAL;
4824         }
4825         *span = sis->pages;
4826
4827         pr_warn_once("Swap support over SMB3 is experimental\n");
4828
4829         /*
4830          * TODO: consider adding ACL (or documenting how) to prevent other
4831          * users (on this or other systems) from reading it
4832          */
4833
4834
4835         /* TODO: add sk_set_memalloc(inet) or similar */
4836
4837         if (cfile)
4838                 cfile->swapfile = true;
4839         /*
4840          * TODO: Since file already open, we can't open with DENY_ALL here
4841          * but we could add call to grab a byte range lock to prevent others
4842          * from reading or writing the file
4843          */
4844
4845         return 0;
4846 }
4847
4848 static void cifs_swap_deactivate(struct file *file)
4849 {
4850         struct cifsFileInfo *cfile = file->private_data;
4851
4852         cifs_dbg(FYI, "swap deactivate\n");
4853
4854         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4855
4856         if (cfile)
4857                 cfile->swapfile = false;
4858
4859         /* do we need to unpin (or unlock) the file */
4860 }
4861
4862 const struct address_space_operations cifs_addr_ops = {
4863         .readpage = cifs_readpage,
4864         .readpages = cifs_readpages,
4865         .writepage = cifs_writepage,
4866         .writepages = cifs_writepages,
4867         .write_begin = cifs_write_begin,
4868         .write_end = cifs_write_end,
4869         .set_page_dirty = __set_page_dirty_nobuffers,
4870         .releasepage = cifs_release_page,
4871         .direct_IO = cifs_direct_io,
4872         .invalidatepage = cifs_invalidate_page,
4873         .launder_page = cifs_launder_page,
4874         /*
4875          * TODO: investigate and if useful we could add an cifs_migratePage
4876          * helper (under an CONFIG_MIGRATION) in the future, and also
4877          * investigate and add an is_dirty_writeback helper if needed
4878          */
4879         .swap_activate = cifs_swap_activate,
4880         .swap_deactivate = cifs_swap_deactivate,
4881 };
4882
4883 /*
4884  * cifs_readpages requires the server to support a buffer large enough to
4885  * contain the header plus one complete page of data.  Otherwise, we need
4886  * to leave cifs_readpages out of the address space operations.
4887  */
4888 const struct address_space_operations cifs_addr_ops_smallbuf = {
4889         .readpage = cifs_readpage,
4890         .writepage = cifs_writepage,
4891         .writepages = cifs_writepages,
4892         .write_begin = cifs_write_begin,
4893         .write_end = cifs_write_end,
4894         .set_page_dirty = __set_page_dirty_nobuffers,
4895         .releasepage = cifs_release_page,
4896         .invalidatepage = cifs_invalidate_page,
4897         .launder_page = cifs_launder_page,
4898 };