Merge tag 'printk-for-5.6' of git://git.kernel.org/pub/scm/linux/kernel/git/pmladek...
[linux-2.6-microblaze.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47
48 static inline int cifs_convert_flags(unsigned int flags)
49 {
50         if ((flags & O_ACCMODE) == O_RDONLY)
51                 return GENERIC_READ;
52         else if ((flags & O_ACCMODE) == O_WRONLY)
53                 return GENERIC_WRITE;
54         else if ((flags & O_ACCMODE) == O_RDWR) {
55                 /* GENERIC_ALL is too much permission to request
56                    can cause unnecessary access denied on create */
57                 /* return GENERIC_ALL; */
58                 return (GENERIC_READ | GENERIC_WRITE);
59         }
60
61         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63                 FILE_READ_DATA);
64 }
65
66 static u32 cifs_posix_convert_flags(unsigned int flags)
67 {
68         u32 posix_flags = 0;
69
70         if ((flags & O_ACCMODE) == O_RDONLY)
71                 posix_flags = SMB_O_RDONLY;
72         else if ((flags & O_ACCMODE) == O_WRONLY)
73                 posix_flags = SMB_O_WRONLY;
74         else if ((flags & O_ACCMODE) == O_RDWR)
75                 posix_flags = SMB_O_RDWR;
76
77         if (flags & O_CREAT) {
78                 posix_flags |= SMB_O_CREAT;
79                 if (flags & O_EXCL)
80                         posix_flags |= SMB_O_EXCL;
81         } else if (flags & O_EXCL)
82                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83                          current->comm, current->tgid);
84
85         if (flags & O_TRUNC)
86                 posix_flags |= SMB_O_TRUNC;
87         /* be safe and imply O_SYNC for O_DSYNC */
88         if (flags & O_DSYNC)
89                 posix_flags |= SMB_O_SYNC;
90         if (flags & O_DIRECTORY)
91                 posix_flags |= SMB_O_DIRECTORY;
92         if (flags & O_NOFOLLOW)
93                 posix_flags |= SMB_O_NOFOLLOW;
94         if (flags & O_DIRECT)
95                 posix_flags |= SMB_O_DIRECT;
96
97         return posix_flags;
98 }
99
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103                 return FILE_CREATE;
104         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105                 return FILE_OVERWRITE_IF;
106         else if ((flags & O_CREAT) == O_CREAT)
107                 return FILE_OPEN_IF;
108         else if ((flags & O_TRUNC) == O_TRUNC)
109                 return FILE_OVERWRITE;
110         else
111                 return FILE_OPEN;
112 }
113
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115                         struct super_block *sb, int mode, unsigned int f_flags,
116                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118         int rc;
119         FILE_UNIX_BASIC_INFO *presp_data;
120         __u32 posix_flags = 0;
121         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122         struct cifs_fattr fattr;
123         struct tcon_link *tlink;
124         struct cifs_tcon *tcon;
125
126         cifs_dbg(FYI, "posix open %s\n", full_path);
127
128         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129         if (presp_data == NULL)
130                 return -ENOMEM;
131
132         tlink = cifs_sb_tlink(cifs_sb);
133         if (IS_ERR(tlink)) {
134                 rc = PTR_ERR(tlink);
135                 goto posix_open_ret;
136         }
137
138         tcon = tlink_tcon(tlink);
139         mode &= ~current_umask();
140
141         posix_flags = cifs_posix_convert_flags(f_flags);
142         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143                              poplock, full_path, cifs_sb->local_nls,
144                              cifs_remap(cifs_sb));
145         cifs_put_tlink(tlink);
146
147         if (rc)
148                 goto posix_open_ret;
149
150         if (presp_data->Type == cpu_to_le32(-1))
151                 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153         if (!pinode)
154                 goto posix_open_ret; /* caller does not need info */
155
156         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158         /* get new inode and set it up */
159         if (*pinode == NULL) {
160                 cifs_fill_uniqueid(sb, &fattr);
161                 *pinode = cifs_iget(sb, &fattr);
162                 if (!*pinode) {
163                         rc = -ENOMEM;
164                         goto posix_open_ret;
165                 }
166         } else {
167                 cifs_fattr_to_inode(*pinode, &fattr);
168         }
169
170 posix_open_ret:
171         kfree(presp_data);
172         return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178              struct cifs_fid *fid, unsigned int xid)
179 {
180         int rc;
181         int desired_access;
182         int disposition;
183         int create_options = CREATE_NOT_DIR;
184         FILE_ALL_INFO *buf;
185         struct TCP_Server_Info *server = tcon->ses->server;
186         struct cifs_open_parms oparms;
187
188         if (!server->ops->open)
189                 return -ENOSYS;
190
191         desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194  *  open flag mapping table:
195  *
196  *      POSIX Flag            CIFS Disposition
197  *      ----------            ----------------
198  *      O_CREAT               FILE_OPEN_IF
199  *      O_CREAT | O_EXCL      FILE_CREATE
200  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
201  *      O_TRUNC               FILE_OVERWRITE
202  *      none of the above     FILE_OPEN
203  *
204  *      Note that there is not a direct match between disposition
205  *      FILE_SUPERSEDE (ie create whether or not file exists although
206  *      O_CREAT | O_TRUNC is similar but truncates the existing
207  *      file rather than creating a new file as FILE_SUPERSEDE does
208  *      (which uses the attributes / metadata passed in on open call)
209  *?
210  *?  O_SYNC is a reasonable match to CIFS writethrough flag
211  *?  and the read write flags match reasonably.  O_LARGEFILE
212  *?  is irrelevant because largefile support is always used
213  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215  *********************************************************************/
216
217         disposition = cifs_get_disposition(f_flags);
218
219         /* BB pass O_SYNC flag through on file attributes .. BB */
220
221         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222         if (!buf)
223                 return -ENOMEM;
224
225         if (backup_cred(cifs_sb))
226                 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
229         if (f_flags & O_SYNC)
230                 create_options |= CREATE_WRITE_THROUGH;
231
232         if (f_flags & O_DIRECT)
233                 create_options |= CREATE_NO_BUFFER;
234
235         oparms.tcon = tcon;
236         oparms.cifs_sb = cifs_sb;
237         oparms.desired_access = desired_access;
238         oparms.create_options = create_options;
239         oparms.disposition = disposition;
240         oparms.path = full_path;
241         oparms.fid = fid;
242         oparms.reconnect = false;
243
244         rc = server->ops->open(xid, &oparms, oplock, buf);
245
246         if (rc)
247                 goto out;
248
249         if (tcon->unix_ext)
250                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
251                                               xid);
252         else
253                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
254                                          xid, fid);
255
256         if (rc) {
257                 server->ops->close(xid, tcon, fid);
258                 if (rc == -ESTALE)
259                         rc = -EOPENSTALE;
260         }
261
262 out:
263         kfree(buf);
264         return rc;
265 }
266
267 static bool
268 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
269 {
270         struct cifs_fid_locks *cur;
271         bool has_locks = false;
272
273         down_read(&cinode->lock_sem);
274         list_for_each_entry(cur, &cinode->llist, llist) {
275                 if (!list_empty(&cur->locks)) {
276                         has_locks = true;
277                         break;
278                 }
279         }
280         up_read(&cinode->lock_sem);
281         return has_locks;
282 }
283
284 void
285 cifs_down_write(struct rw_semaphore *sem)
286 {
287         while (!down_write_trylock(sem))
288                 msleep(10);
289 }
290
291 static void cifsFileInfo_put_work(struct work_struct *work);
292
293 struct cifsFileInfo *
294 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
295                   struct tcon_link *tlink, __u32 oplock)
296 {
297         struct dentry *dentry = file_dentry(file);
298         struct inode *inode = d_inode(dentry);
299         struct cifsInodeInfo *cinode = CIFS_I(inode);
300         struct cifsFileInfo *cfile;
301         struct cifs_fid_locks *fdlocks;
302         struct cifs_tcon *tcon = tlink_tcon(tlink);
303         struct TCP_Server_Info *server = tcon->ses->server;
304
305         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
306         if (cfile == NULL)
307                 return cfile;
308
309         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
310         if (!fdlocks) {
311                 kfree(cfile);
312                 return NULL;
313         }
314
315         INIT_LIST_HEAD(&fdlocks->locks);
316         fdlocks->cfile = cfile;
317         cfile->llist = fdlocks;
318
319         cfile->count = 1;
320         cfile->pid = current->tgid;
321         cfile->uid = current_fsuid();
322         cfile->dentry = dget(dentry);
323         cfile->f_flags = file->f_flags;
324         cfile->invalidHandle = false;
325         cfile->tlink = cifs_get_tlink(tlink);
326         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
327         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
328         mutex_init(&cfile->fh_mutex);
329         spin_lock_init(&cfile->file_info_lock);
330
331         cifs_sb_active(inode->i_sb);
332
333         /*
334          * If the server returned a read oplock and we have mandatory brlocks,
335          * set oplock level to None.
336          */
337         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
338                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
339                 oplock = 0;
340         }
341
342         cifs_down_write(&cinode->lock_sem);
343         list_add(&fdlocks->llist, &cinode->llist);
344         up_write(&cinode->lock_sem);
345
346         spin_lock(&tcon->open_file_lock);
347         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
348                 oplock = fid->pending_open->oplock;
349         list_del(&fid->pending_open->olist);
350
351         fid->purge_cache = false;
352         server->ops->set_fid(cfile, fid, oplock);
353
354         list_add(&cfile->tlist, &tcon->openFileList);
355         atomic_inc(&tcon->num_local_opens);
356
357         /* if readable file instance put first in list*/
358         spin_lock(&cinode->open_file_lock);
359         if (file->f_mode & FMODE_READ)
360                 list_add(&cfile->flist, &cinode->openFileList);
361         else
362                 list_add_tail(&cfile->flist, &cinode->openFileList);
363         spin_unlock(&cinode->open_file_lock);
364         spin_unlock(&tcon->open_file_lock);
365
366         if (fid->purge_cache)
367                 cifs_zap_mapping(inode);
368
369         file->private_data = cfile;
370         return cfile;
371 }
372
373 struct cifsFileInfo *
374 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
375 {
376         spin_lock(&cifs_file->file_info_lock);
377         cifsFileInfo_get_locked(cifs_file);
378         spin_unlock(&cifs_file->file_info_lock);
379         return cifs_file;
380 }
381
382 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
383 {
384         struct inode *inode = d_inode(cifs_file->dentry);
385         struct cifsInodeInfo *cifsi = CIFS_I(inode);
386         struct cifsLockInfo *li, *tmp;
387         struct super_block *sb = inode->i_sb;
388
389         /*
390          * Delete any outstanding lock records. We'll lose them when the file
391          * is closed anyway.
392          */
393         cifs_down_write(&cifsi->lock_sem);
394         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
395                 list_del(&li->llist);
396                 cifs_del_lock_waiters(li);
397                 kfree(li);
398         }
399         list_del(&cifs_file->llist->llist);
400         kfree(cifs_file->llist);
401         up_write(&cifsi->lock_sem);
402
403         cifs_put_tlink(cifs_file->tlink);
404         dput(cifs_file->dentry);
405         cifs_sb_deactive(sb);
406         kfree(cifs_file);
407 }
408
409 static void cifsFileInfo_put_work(struct work_struct *work)
410 {
411         struct cifsFileInfo *cifs_file = container_of(work,
412                         struct cifsFileInfo, put);
413
414         cifsFileInfo_put_final(cifs_file);
415 }
416
417 /**
418  * cifsFileInfo_put - release a reference of file priv data
419  *
420  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
421  */
422 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
423 {
424         _cifsFileInfo_put(cifs_file, true, true);
425 }
426
427 /**
428  * _cifsFileInfo_put - release a reference of file priv data
429  *
430  * This may involve closing the filehandle @cifs_file out on the
431  * server. Must be called without holding tcon->open_file_lock,
432  * cinode->open_file_lock and cifs_file->file_info_lock.
433  *
434  * If @wait_for_oplock_handler is true and we are releasing the last
435  * reference, wait for any running oplock break handler of the file
436  * and cancel any pending one. If calling this function from the
437  * oplock break handler, you need to pass false.
438  *
439  */
440 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
441                        bool wait_oplock_handler, bool offload)
442 {
443         struct inode *inode = d_inode(cifs_file->dentry);
444         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
445         struct TCP_Server_Info *server = tcon->ses->server;
446         struct cifsInodeInfo *cifsi = CIFS_I(inode);
447         struct super_block *sb = inode->i_sb;
448         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
449         struct cifs_fid fid;
450         struct cifs_pending_open open;
451         bool oplock_break_cancelled;
452
453         spin_lock(&tcon->open_file_lock);
454         spin_lock(&cifsi->open_file_lock);
455         spin_lock(&cifs_file->file_info_lock);
456         if (--cifs_file->count > 0) {
457                 spin_unlock(&cifs_file->file_info_lock);
458                 spin_unlock(&cifsi->open_file_lock);
459                 spin_unlock(&tcon->open_file_lock);
460                 return;
461         }
462         spin_unlock(&cifs_file->file_info_lock);
463
464         if (server->ops->get_lease_key)
465                 server->ops->get_lease_key(inode, &fid);
466
467         /* store open in pending opens to make sure we don't miss lease break */
468         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
469
470         /* remove it from the lists */
471         list_del(&cifs_file->flist);
472         list_del(&cifs_file->tlist);
473         atomic_dec(&tcon->num_local_opens);
474
475         if (list_empty(&cifsi->openFileList)) {
476                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
477                          d_inode(cifs_file->dentry));
478                 /*
479                  * In strict cache mode we need invalidate mapping on the last
480                  * close  because it may cause a error when we open this file
481                  * again and get at least level II oplock.
482                  */
483                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
484                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
485                 cifs_set_oplock_level(cifsi, 0);
486         }
487
488         spin_unlock(&cifsi->open_file_lock);
489         spin_unlock(&tcon->open_file_lock);
490
491         oplock_break_cancelled = wait_oplock_handler ?
492                 cancel_work_sync(&cifs_file->oplock_break) : false;
493
494         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
495                 struct TCP_Server_Info *server = tcon->ses->server;
496                 unsigned int xid;
497
498                 xid = get_xid();
499                 if (server->ops->close_getattr)
500                         server->ops->close_getattr(xid, tcon, cifs_file);
501                 else if (server->ops->close)
502                         server->ops->close(xid, tcon, &cifs_file->fid);
503                 _free_xid(xid);
504         }
505
506         if (oplock_break_cancelled)
507                 cifs_done_oplock_break(cifsi);
508
509         cifs_del_pending_open(&open);
510
511         if (offload)
512                 queue_work(fileinfo_put_wq, &cifs_file->put);
513         else
514                 cifsFileInfo_put_final(cifs_file);
515 }
516
517 int cifs_open(struct inode *inode, struct file *file)
518
519 {
520         int rc = -EACCES;
521         unsigned int xid;
522         __u32 oplock;
523         struct cifs_sb_info *cifs_sb;
524         struct TCP_Server_Info *server;
525         struct cifs_tcon *tcon;
526         struct tcon_link *tlink;
527         struct cifsFileInfo *cfile = NULL;
528         char *full_path = NULL;
529         bool posix_open_ok = false;
530         struct cifs_fid fid;
531         struct cifs_pending_open open;
532
533         xid = get_xid();
534
535         cifs_sb = CIFS_SB(inode->i_sb);
536         tlink = cifs_sb_tlink(cifs_sb);
537         if (IS_ERR(tlink)) {
538                 free_xid(xid);
539                 return PTR_ERR(tlink);
540         }
541         tcon = tlink_tcon(tlink);
542         server = tcon->ses->server;
543
544         full_path = build_path_from_dentry(file_dentry(file));
545         if (full_path == NULL) {
546                 rc = -ENOMEM;
547                 goto out;
548         }
549
550         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
551                  inode, file->f_flags, full_path);
552
553         if (file->f_flags & O_DIRECT &&
554             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
555                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
556                         file->f_op = &cifs_file_direct_nobrl_ops;
557                 else
558                         file->f_op = &cifs_file_direct_ops;
559         }
560
561         if (server->oplocks)
562                 oplock = REQ_OPLOCK;
563         else
564                 oplock = 0;
565
566         if (!tcon->broken_posix_open && tcon->unix_ext &&
567             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
568                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
569                 /* can not refresh inode info since size could be stale */
570                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
571                                 cifs_sb->mnt_file_mode /* ignored */,
572                                 file->f_flags, &oplock, &fid.netfid, xid);
573                 if (rc == 0) {
574                         cifs_dbg(FYI, "posix open succeeded\n");
575                         posix_open_ok = true;
576                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
577                         if (tcon->ses->serverNOS)
578                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
579                                          tcon->ses->serverName,
580                                          tcon->ses->serverNOS);
581                         tcon->broken_posix_open = true;
582                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
583                          (rc != -EOPNOTSUPP)) /* path not found or net err */
584                         goto out;
585                 /*
586                  * Else fallthrough to retry open the old way on network i/o
587                  * or DFS errors.
588                  */
589         }
590
591         if (server->ops->get_lease_key)
592                 server->ops->get_lease_key(inode, &fid);
593
594         cifs_add_pending_open(&fid, tlink, &open);
595
596         if (!posix_open_ok) {
597                 if (server->ops->get_lease_key)
598                         server->ops->get_lease_key(inode, &fid);
599
600                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
601                                   file->f_flags, &oplock, &fid, xid);
602                 if (rc) {
603                         cifs_del_pending_open(&open);
604                         goto out;
605                 }
606         }
607
608         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
609         if (cfile == NULL) {
610                 if (server->ops->close)
611                         server->ops->close(xid, tcon, &fid);
612                 cifs_del_pending_open(&open);
613                 rc = -ENOMEM;
614                 goto out;
615         }
616
617         cifs_fscache_set_inode_cookie(inode, file);
618
619         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
620                 /*
621                  * Time to set mode which we can not set earlier due to
622                  * problems creating new read-only files.
623                  */
624                 struct cifs_unix_set_info_args args = {
625                         .mode   = inode->i_mode,
626                         .uid    = INVALID_UID, /* no change */
627                         .gid    = INVALID_GID, /* no change */
628                         .ctime  = NO_CHANGE_64,
629                         .atime  = NO_CHANGE_64,
630                         .mtime  = NO_CHANGE_64,
631                         .device = 0,
632                 };
633                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
634                                        cfile->pid);
635         }
636
637 out:
638         kfree(full_path);
639         free_xid(xid);
640         cifs_put_tlink(tlink);
641         return rc;
642 }
643
644 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
645
646 /*
647  * Try to reacquire byte range locks that were released when session
648  * to server was lost.
649  */
650 static int
651 cifs_relock_file(struct cifsFileInfo *cfile)
652 {
653         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
654         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
655         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
656         int rc = 0;
657
658         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
659         if (cinode->can_cache_brlcks) {
660                 /* can cache locks - no need to relock */
661                 up_read(&cinode->lock_sem);
662                 return rc;
663         }
664
665         if (cap_unix(tcon->ses) &&
666             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
667             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
668                 rc = cifs_push_posix_locks(cfile);
669         else
670                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
671
672         up_read(&cinode->lock_sem);
673         return rc;
674 }
675
676 static int
677 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
678 {
679         int rc = -EACCES;
680         unsigned int xid;
681         __u32 oplock;
682         struct cifs_sb_info *cifs_sb;
683         struct cifs_tcon *tcon;
684         struct TCP_Server_Info *server;
685         struct cifsInodeInfo *cinode;
686         struct inode *inode;
687         char *full_path = NULL;
688         int desired_access;
689         int disposition = FILE_OPEN;
690         int create_options = CREATE_NOT_DIR;
691         struct cifs_open_parms oparms;
692
693         xid = get_xid();
694         mutex_lock(&cfile->fh_mutex);
695         if (!cfile->invalidHandle) {
696                 mutex_unlock(&cfile->fh_mutex);
697                 rc = 0;
698                 free_xid(xid);
699                 return rc;
700         }
701
702         inode = d_inode(cfile->dentry);
703         cifs_sb = CIFS_SB(inode->i_sb);
704         tcon = tlink_tcon(cfile->tlink);
705         server = tcon->ses->server;
706
707         /*
708          * Can not grab rename sem here because various ops, including those
709          * that already have the rename sem can end up causing writepage to get
710          * called and if the server was down that means we end up here, and we
711          * can never tell if the caller already has the rename_sem.
712          */
713         full_path = build_path_from_dentry(cfile->dentry);
714         if (full_path == NULL) {
715                 rc = -ENOMEM;
716                 mutex_unlock(&cfile->fh_mutex);
717                 free_xid(xid);
718                 return rc;
719         }
720
721         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
722                  inode, cfile->f_flags, full_path);
723
724         if (tcon->ses->server->oplocks)
725                 oplock = REQ_OPLOCK;
726         else
727                 oplock = 0;
728
729         if (tcon->unix_ext && cap_unix(tcon->ses) &&
730             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
731                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
732                 /*
733                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
734                  * original open. Must mask them off for a reopen.
735                  */
736                 unsigned int oflags = cfile->f_flags &
737                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
738
739                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
740                                      cifs_sb->mnt_file_mode /* ignored */,
741                                      oflags, &oplock, &cfile->fid.netfid, xid);
742                 if (rc == 0) {
743                         cifs_dbg(FYI, "posix reopen succeeded\n");
744                         oparms.reconnect = true;
745                         goto reopen_success;
746                 }
747                 /*
748                  * fallthrough to retry open the old way on errors, especially
749                  * in the reconnect path it is important to retry hard
750                  */
751         }
752
753         desired_access = cifs_convert_flags(cfile->f_flags);
754
755         if (backup_cred(cifs_sb))
756                 create_options |= CREATE_OPEN_BACKUP_INTENT;
757
758         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
759         if (cfile->f_flags & O_SYNC)
760                 create_options |= CREATE_WRITE_THROUGH;
761
762         if (cfile->f_flags & O_DIRECT)
763                 create_options |= CREATE_NO_BUFFER;
764
765         if (server->ops->get_lease_key)
766                 server->ops->get_lease_key(inode, &cfile->fid);
767
768         oparms.tcon = tcon;
769         oparms.cifs_sb = cifs_sb;
770         oparms.desired_access = desired_access;
771         oparms.create_options = create_options;
772         oparms.disposition = disposition;
773         oparms.path = full_path;
774         oparms.fid = &cfile->fid;
775         oparms.reconnect = true;
776
777         /*
778          * Can not refresh inode by passing in file_info buf to be returned by
779          * ops->open and then calling get_inode_info with returned buf since
780          * file might have write behind data that needs to be flushed and server
781          * version of file size can be stale. If we knew for sure that inode was
782          * not dirty locally we could do this.
783          */
784         rc = server->ops->open(xid, &oparms, &oplock, NULL);
785         if (rc == -ENOENT && oparms.reconnect == false) {
786                 /* durable handle timeout is expired - open the file again */
787                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
788                 /* indicate that we need to relock the file */
789                 oparms.reconnect = true;
790         }
791
792         if (rc) {
793                 mutex_unlock(&cfile->fh_mutex);
794                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
795                 cifs_dbg(FYI, "oplock: %d\n", oplock);
796                 goto reopen_error_exit;
797         }
798
799 reopen_success:
800         cfile->invalidHandle = false;
801         mutex_unlock(&cfile->fh_mutex);
802         cinode = CIFS_I(inode);
803
804         if (can_flush) {
805                 rc = filemap_write_and_wait(inode->i_mapping);
806                 if (!is_interrupt_error(rc))
807                         mapping_set_error(inode->i_mapping, rc);
808
809                 if (tcon->unix_ext)
810                         rc = cifs_get_inode_info_unix(&inode, full_path,
811                                                       inode->i_sb, xid);
812                 else
813                         rc = cifs_get_inode_info(&inode, full_path, NULL,
814                                                  inode->i_sb, xid, NULL);
815         }
816         /*
817          * Else we are writing out data to server already and could deadlock if
818          * we tried to flush data, and since we do not know if we have data that
819          * would invalidate the current end of file on the server we can not go
820          * to the server to get the new inode info.
821          */
822
823         /*
824          * If the server returned a read oplock and we have mandatory brlocks,
825          * set oplock level to None.
826          */
827         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
828                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
829                 oplock = 0;
830         }
831
832         server->ops->set_fid(cfile, &cfile->fid, oplock);
833         if (oparms.reconnect)
834                 cifs_relock_file(cfile);
835
836 reopen_error_exit:
837         kfree(full_path);
838         free_xid(xid);
839         return rc;
840 }
841
842 int cifs_close(struct inode *inode, struct file *file)
843 {
844         if (file->private_data != NULL) {
845                 _cifsFileInfo_put(file->private_data, true, false);
846                 file->private_data = NULL;
847         }
848
849         /* return code from the ->release op is always ignored */
850         return 0;
851 }
852
853 void
854 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
855 {
856         struct cifsFileInfo *open_file;
857         struct list_head *tmp;
858         struct list_head *tmp1;
859         struct list_head tmp_list;
860
861         if (!tcon->use_persistent || !tcon->need_reopen_files)
862                 return;
863
864         tcon->need_reopen_files = false;
865
866         cifs_dbg(FYI, "Reopen persistent handles");
867         INIT_LIST_HEAD(&tmp_list);
868
869         /* list all files open on tree connection, reopen resilient handles  */
870         spin_lock(&tcon->open_file_lock);
871         list_for_each(tmp, &tcon->openFileList) {
872                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
873                 if (!open_file->invalidHandle)
874                         continue;
875                 cifsFileInfo_get(open_file);
876                 list_add_tail(&open_file->rlist, &tmp_list);
877         }
878         spin_unlock(&tcon->open_file_lock);
879
880         list_for_each_safe(tmp, tmp1, &tmp_list) {
881                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
882                 if (cifs_reopen_file(open_file, false /* do not flush */))
883                         tcon->need_reopen_files = true;
884                 list_del_init(&open_file->rlist);
885                 cifsFileInfo_put(open_file);
886         }
887 }
888
889 int cifs_closedir(struct inode *inode, struct file *file)
890 {
891         int rc = 0;
892         unsigned int xid;
893         struct cifsFileInfo *cfile = file->private_data;
894         struct cifs_tcon *tcon;
895         struct TCP_Server_Info *server;
896         char *buf;
897
898         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
899
900         if (cfile == NULL)
901                 return rc;
902
903         xid = get_xid();
904         tcon = tlink_tcon(cfile->tlink);
905         server = tcon->ses->server;
906
907         cifs_dbg(FYI, "Freeing private data in close dir\n");
908         spin_lock(&cfile->file_info_lock);
909         if (server->ops->dir_needs_close(cfile)) {
910                 cfile->invalidHandle = true;
911                 spin_unlock(&cfile->file_info_lock);
912                 if (server->ops->close_dir)
913                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
914                 else
915                         rc = -ENOSYS;
916                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
917                 /* not much we can do if it fails anyway, ignore rc */
918                 rc = 0;
919         } else
920                 spin_unlock(&cfile->file_info_lock);
921
922         buf = cfile->srch_inf.ntwrk_buf_start;
923         if (buf) {
924                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
925                 cfile->srch_inf.ntwrk_buf_start = NULL;
926                 if (cfile->srch_inf.smallBuf)
927                         cifs_small_buf_release(buf);
928                 else
929                         cifs_buf_release(buf);
930         }
931
932         cifs_put_tlink(cfile->tlink);
933         kfree(file->private_data);
934         file->private_data = NULL;
935         /* BB can we lock the filestruct while this is going on? */
936         free_xid(xid);
937         return rc;
938 }
939
940 static struct cifsLockInfo *
941 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
942 {
943         struct cifsLockInfo *lock =
944                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
945         if (!lock)
946                 return lock;
947         lock->offset = offset;
948         lock->length = length;
949         lock->type = type;
950         lock->pid = current->tgid;
951         lock->flags = flags;
952         INIT_LIST_HEAD(&lock->blist);
953         init_waitqueue_head(&lock->block_q);
954         return lock;
955 }
956
957 void
958 cifs_del_lock_waiters(struct cifsLockInfo *lock)
959 {
960         struct cifsLockInfo *li, *tmp;
961         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
962                 list_del_init(&li->blist);
963                 wake_up(&li->block_q);
964         }
965 }
966
967 #define CIFS_LOCK_OP    0
968 #define CIFS_READ_OP    1
969 #define CIFS_WRITE_OP   2
970
971 /* @rw_check : 0 - no op, 1 - read, 2 - write */
972 static bool
973 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
974                             __u64 length, __u8 type, __u16 flags,
975                             struct cifsFileInfo *cfile,
976                             struct cifsLockInfo **conf_lock, int rw_check)
977 {
978         struct cifsLockInfo *li;
979         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
980         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
981
982         list_for_each_entry(li, &fdlocks->locks, llist) {
983                 if (offset + length <= li->offset ||
984                     offset >= li->offset + li->length)
985                         continue;
986                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
987                     server->ops->compare_fids(cfile, cur_cfile)) {
988                         /* shared lock prevents write op through the same fid */
989                         if (!(li->type & server->vals->shared_lock_type) ||
990                             rw_check != CIFS_WRITE_OP)
991                                 continue;
992                 }
993                 if ((type & server->vals->shared_lock_type) &&
994                     ((server->ops->compare_fids(cfile, cur_cfile) &&
995                      current->tgid == li->pid) || type == li->type))
996                         continue;
997                 if (rw_check == CIFS_LOCK_OP &&
998                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
999                     server->ops->compare_fids(cfile, cur_cfile))
1000                         continue;
1001                 if (conf_lock)
1002                         *conf_lock = li;
1003                 return true;
1004         }
1005         return false;
1006 }
1007
1008 bool
1009 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1010                         __u8 type, __u16 flags,
1011                         struct cifsLockInfo **conf_lock, int rw_check)
1012 {
1013         bool rc = false;
1014         struct cifs_fid_locks *cur;
1015         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1016
1017         list_for_each_entry(cur, &cinode->llist, llist) {
1018                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1019                                                  flags, cfile, conf_lock,
1020                                                  rw_check);
1021                 if (rc)
1022                         break;
1023         }
1024
1025         return rc;
1026 }
1027
1028 /*
1029  * Check if there is another lock that prevents us to set the lock (mandatory
1030  * style). If such a lock exists, update the flock structure with its
1031  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1032  * or leave it the same if we can't. Returns 0 if we don't need to request to
1033  * the server or 1 otherwise.
1034  */
1035 static int
1036 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1037                __u8 type, struct file_lock *flock)
1038 {
1039         int rc = 0;
1040         struct cifsLockInfo *conf_lock;
1041         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1042         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1043         bool exist;
1044
1045         down_read(&cinode->lock_sem);
1046
1047         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1048                                         flock->fl_flags, &conf_lock,
1049                                         CIFS_LOCK_OP);
1050         if (exist) {
1051                 flock->fl_start = conf_lock->offset;
1052                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1053                 flock->fl_pid = conf_lock->pid;
1054                 if (conf_lock->type & server->vals->shared_lock_type)
1055                         flock->fl_type = F_RDLCK;
1056                 else
1057                         flock->fl_type = F_WRLCK;
1058         } else if (!cinode->can_cache_brlcks)
1059                 rc = 1;
1060         else
1061                 flock->fl_type = F_UNLCK;
1062
1063         up_read(&cinode->lock_sem);
1064         return rc;
1065 }
1066
1067 static void
1068 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1069 {
1070         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1071         cifs_down_write(&cinode->lock_sem);
1072         list_add_tail(&lock->llist, &cfile->llist->locks);
1073         up_write(&cinode->lock_sem);
1074 }
1075
1076 /*
1077  * Set the byte-range lock (mandatory style). Returns:
1078  * 1) 0, if we set the lock and don't need to request to the server;
1079  * 2) 1, if no locks prevent us but we need to request to the server;
1080  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1081  */
1082 static int
1083 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1084                  bool wait)
1085 {
1086         struct cifsLockInfo *conf_lock;
1087         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1088         bool exist;
1089         int rc = 0;
1090
1091 try_again:
1092         exist = false;
1093         cifs_down_write(&cinode->lock_sem);
1094
1095         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1096                                         lock->type, lock->flags, &conf_lock,
1097                                         CIFS_LOCK_OP);
1098         if (!exist && cinode->can_cache_brlcks) {
1099                 list_add_tail(&lock->llist, &cfile->llist->locks);
1100                 up_write(&cinode->lock_sem);
1101                 return rc;
1102         }
1103
1104         if (!exist)
1105                 rc = 1;
1106         else if (!wait)
1107                 rc = -EACCES;
1108         else {
1109                 list_add_tail(&lock->blist, &conf_lock->blist);
1110                 up_write(&cinode->lock_sem);
1111                 rc = wait_event_interruptible(lock->block_q,
1112                                         (lock->blist.prev == &lock->blist) &&
1113                                         (lock->blist.next == &lock->blist));
1114                 if (!rc)
1115                         goto try_again;
1116                 cifs_down_write(&cinode->lock_sem);
1117                 list_del_init(&lock->blist);
1118         }
1119
1120         up_write(&cinode->lock_sem);
1121         return rc;
1122 }
1123
1124 /*
1125  * Check if there is another lock that prevents us to set the lock (posix
1126  * style). If such a lock exists, update the flock structure with its
1127  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1128  * or leave it the same if we can't. Returns 0 if we don't need to request to
1129  * the server or 1 otherwise.
1130  */
1131 static int
1132 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1133 {
1134         int rc = 0;
1135         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1136         unsigned char saved_type = flock->fl_type;
1137
1138         if ((flock->fl_flags & FL_POSIX) == 0)
1139                 return 1;
1140
1141         down_read(&cinode->lock_sem);
1142         posix_test_lock(file, flock);
1143
1144         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1145                 flock->fl_type = saved_type;
1146                 rc = 1;
1147         }
1148
1149         up_read(&cinode->lock_sem);
1150         return rc;
1151 }
1152
1153 /*
1154  * Set the byte-range lock (posix style). Returns:
1155  * 1) 0, if we set the lock and don't need to request to the server;
1156  * 2) 1, if we need to request to the server;
1157  * 3) <0, if the error occurs while setting the lock.
1158  */
1159 static int
1160 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1161 {
1162         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1163         int rc = 1;
1164
1165         if ((flock->fl_flags & FL_POSIX) == 0)
1166                 return rc;
1167
1168 try_again:
1169         cifs_down_write(&cinode->lock_sem);
1170         if (!cinode->can_cache_brlcks) {
1171                 up_write(&cinode->lock_sem);
1172                 return rc;
1173         }
1174
1175         rc = posix_lock_file(file, flock, NULL);
1176         up_write(&cinode->lock_sem);
1177         if (rc == FILE_LOCK_DEFERRED) {
1178                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
1179                 if (!rc)
1180                         goto try_again;
1181                 locks_delete_block(flock);
1182         }
1183         return rc;
1184 }
1185
1186 int
1187 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1188 {
1189         unsigned int xid;
1190         int rc = 0, stored_rc;
1191         struct cifsLockInfo *li, *tmp;
1192         struct cifs_tcon *tcon;
1193         unsigned int num, max_num, max_buf;
1194         LOCKING_ANDX_RANGE *buf, *cur;
1195         static const int types[] = {
1196                 LOCKING_ANDX_LARGE_FILES,
1197                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1198         };
1199         int i;
1200
1201         xid = get_xid();
1202         tcon = tlink_tcon(cfile->tlink);
1203
1204         /*
1205          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1206          * and check it before using.
1207          */
1208         max_buf = tcon->ses->server->maxBuf;
1209         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1210                 free_xid(xid);
1211                 return -EINVAL;
1212         }
1213
1214         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1215                      PAGE_SIZE);
1216         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1217                         PAGE_SIZE);
1218         max_num = (max_buf - sizeof(struct smb_hdr)) /
1219                                                 sizeof(LOCKING_ANDX_RANGE);
1220         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1221         if (!buf) {
1222                 free_xid(xid);
1223                 return -ENOMEM;
1224         }
1225
1226         for (i = 0; i < 2; i++) {
1227                 cur = buf;
1228                 num = 0;
1229                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1230                         if (li->type != types[i])
1231                                 continue;
1232                         cur->Pid = cpu_to_le16(li->pid);
1233                         cur->LengthLow = cpu_to_le32((u32)li->length);
1234                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1235                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1236                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1237                         if (++num == max_num) {
1238                                 stored_rc = cifs_lockv(xid, tcon,
1239                                                        cfile->fid.netfid,
1240                                                        (__u8)li->type, 0, num,
1241                                                        buf);
1242                                 if (stored_rc)
1243                                         rc = stored_rc;
1244                                 cur = buf;
1245                                 num = 0;
1246                         } else
1247                                 cur++;
1248                 }
1249
1250                 if (num) {
1251                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1252                                                (__u8)types[i], 0, num, buf);
1253                         if (stored_rc)
1254                                 rc = stored_rc;
1255                 }
1256         }
1257
1258         kfree(buf);
1259         free_xid(xid);
1260         return rc;
1261 }
1262
1263 static __u32
1264 hash_lockowner(fl_owner_t owner)
1265 {
1266         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1267 }
1268
1269 struct lock_to_push {
1270         struct list_head llist;
1271         __u64 offset;
1272         __u64 length;
1273         __u32 pid;
1274         __u16 netfid;
1275         __u8 type;
1276 };
1277
1278 static int
1279 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1280 {
1281         struct inode *inode = d_inode(cfile->dentry);
1282         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1283         struct file_lock *flock;
1284         struct file_lock_context *flctx = inode->i_flctx;
1285         unsigned int count = 0, i;
1286         int rc = 0, xid, type;
1287         struct list_head locks_to_send, *el;
1288         struct lock_to_push *lck, *tmp;
1289         __u64 length;
1290
1291         xid = get_xid();
1292
1293         if (!flctx)
1294                 goto out;
1295
1296         spin_lock(&flctx->flc_lock);
1297         list_for_each(el, &flctx->flc_posix) {
1298                 count++;
1299         }
1300         spin_unlock(&flctx->flc_lock);
1301
1302         INIT_LIST_HEAD(&locks_to_send);
1303
1304         /*
1305          * Allocating count locks is enough because no FL_POSIX locks can be
1306          * added to the list while we are holding cinode->lock_sem that
1307          * protects locking operations of this inode.
1308          */
1309         for (i = 0; i < count; i++) {
1310                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1311                 if (!lck) {
1312                         rc = -ENOMEM;
1313                         goto err_out;
1314                 }
1315                 list_add_tail(&lck->llist, &locks_to_send);
1316         }
1317
1318         el = locks_to_send.next;
1319         spin_lock(&flctx->flc_lock);
1320         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1321                 if (el == &locks_to_send) {
1322                         /*
1323                          * The list ended. We don't have enough allocated
1324                          * structures - something is really wrong.
1325                          */
1326                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1327                         break;
1328                 }
1329                 length = 1 + flock->fl_end - flock->fl_start;
1330                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1331                         type = CIFS_RDLCK;
1332                 else
1333                         type = CIFS_WRLCK;
1334                 lck = list_entry(el, struct lock_to_push, llist);
1335                 lck->pid = hash_lockowner(flock->fl_owner);
1336                 lck->netfid = cfile->fid.netfid;
1337                 lck->length = length;
1338                 lck->type = type;
1339                 lck->offset = flock->fl_start;
1340         }
1341         spin_unlock(&flctx->flc_lock);
1342
1343         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1344                 int stored_rc;
1345
1346                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1347                                              lck->offset, lck->length, NULL,
1348                                              lck->type, 0);
1349                 if (stored_rc)
1350                         rc = stored_rc;
1351                 list_del(&lck->llist);
1352                 kfree(lck);
1353         }
1354
1355 out:
1356         free_xid(xid);
1357         return rc;
1358 err_out:
1359         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1360                 list_del(&lck->llist);
1361                 kfree(lck);
1362         }
1363         goto out;
1364 }
1365
1366 static int
1367 cifs_push_locks(struct cifsFileInfo *cfile)
1368 {
1369         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1370         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1371         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1372         int rc = 0;
1373
1374         /* we are going to update can_cache_brlcks here - need a write access */
1375         cifs_down_write(&cinode->lock_sem);
1376         if (!cinode->can_cache_brlcks) {
1377                 up_write(&cinode->lock_sem);
1378                 return rc;
1379         }
1380
1381         if (cap_unix(tcon->ses) &&
1382             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1383             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1384                 rc = cifs_push_posix_locks(cfile);
1385         else
1386                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1387
1388         cinode->can_cache_brlcks = false;
1389         up_write(&cinode->lock_sem);
1390         return rc;
1391 }
1392
1393 static void
1394 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1395                 bool *wait_flag, struct TCP_Server_Info *server)
1396 {
1397         if (flock->fl_flags & FL_POSIX)
1398                 cifs_dbg(FYI, "Posix\n");
1399         if (flock->fl_flags & FL_FLOCK)
1400                 cifs_dbg(FYI, "Flock\n");
1401         if (flock->fl_flags & FL_SLEEP) {
1402                 cifs_dbg(FYI, "Blocking lock\n");
1403                 *wait_flag = true;
1404         }
1405         if (flock->fl_flags & FL_ACCESS)
1406                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1407         if (flock->fl_flags & FL_LEASE)
1408                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1409         if (flock->fl_flags &
1410             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1411                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1412                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1413
1414         *type = server->vals->large_lock_type;
1415         if (flock->fl_type == F_WRLCK) {
1416                 cifs_dbg(FYI, "F_WRLCK\n");
1417                 *type |= server->vals->exclusive_lock_type;
1418                 *lock = 1;
1419         } else if (flock->fl_type == F_UNLCK) {
1420                 cifs_dbg(FYI, "F_UNLCK\n");
1421                 *type |= server->vals->unlock_lock_type;
1422                 *unlock = 1;
1423                 /* Check if unlock includes more than one lock range */
1424         } else if (flock->fl_type == F_RDLCK) {
1425                 cifs_dbg(FYI, "F_RDLCK\n");
1426                 *type |= server->vals->shared_lock_type;
1427                 *lock = 1;
1428         } else if (flock->fl_type == F_EXLCK) {
1429                 cifs_dbg(FYI, "F_EXLCK\n");
1430                 *type |= server->vals->exclusive_lock_type;
1431                 *lock = 1;
1432         } else if (flock->fl_type == F_SHLCK) {
1433                 cifs_dbg(FYI, "F_SHLCK\n");
1434                 *type |= server->vals->shared_lock_type;
1435                 *lock = 1;
1436         } else
1437                 cifs_dbg(FYI, "Unknown type of lock\n");
1438 }
1439
1440 static int
1441 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1442            bool wait_flag, bool posix_lck, unsigned int xid)
1443 {
1444         int rc = 0;
1445         __u64 length = 1 + flock->fl_end - flock->fl_start;
1446         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1447         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1448         struct TCP_Server_Info *server = tcon->ses->server;
1449         __u16 netfid = cfile->fid.netfid;
1450
1451         if (posix_lck) {
1452                 int posix_lock_type;
1453
1454                 rc = cifs_posix_lock_test(file, flock);
1455                 if (!rc)
1456                         return rc;
1457
1458                 if (type & server->vals->shared_lock_type)
1459                         posix_lock_type = CIFS_RDLCK;
1460                 else
1461                         posix_lock_type = CIFS_WRLCK;
1462                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1463                                       hash_lockowner(flock->fl_owner),
1464                                       flock->fl_start, length, flock,
1465                                       posix_lock_type, wait_flag);
1466                 return rc;
1467         }
1468
1469         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1470         if (!rc)
1471                 return rc;
1472
1473         /* BB we could chain these into one lock request BB */
1474         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1475                                     1, 0, false);
1476         if (rc == 0) {
1477                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1478                                             type, 0, 1, false);
1479                 flock->fl_type = F_UNLCK;
1480                 if (rc != 0)
1481                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1482                                  rc);
1483                 return 0;
1484         }
1485
1486         if (type & server->vals->shared_lock_type) {
1487                 flock->fl_type = F_WRLCK;
1488                 return 0;
1489         }
1490
1491         type &= ~server->vals->exclusive_lock_type;
1492
1493         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1494                                     type | server->vals->shared_lock_type,
1495                                     1, 0, false);
1496         if (rc == 0) {
1497                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1498                         type | server->vals->shared_lock_type, 0, 1, false);
1499                 flock->fl_type = F_RDLCK;
1500                 if (rc != 0)
1501                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1502                                  rc);
1503         } else
1504                 flock->fl_type = F_WRLCK;
1505
1506         return 0;
1507 }
1508
1509 void
1510 cifs_move_llist(struct list_head *source, struct list_head *dest)
1511 {
1512         struct list_head *li, *tmp;
1513         list_for_each_safe(li, tmp, source)
1514                 list_move(li, dest);
1515 }
1516
1517 void
1518 cifs_free_llist(struct list_head *llist)
1519 {
1520         struct cifsLockInfo *li, *tmp;
1521         list_for_each_entry_safe(li, tmp, llist, llist) {
1522                 cifs_del_lock_waiters(li);
1523                 list_del(&li->llist);
1524                 kfree(li);
1525         }
1526 }
1527
1528 int
1529 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1530                   unsigned int xid)
1531 {
1532         int rc = 0, stored_rc;
1533         static const int types[] = {
1534                 LOCKING_ANDX_LARGE_FILES,
1535                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1536         };
1537         unsigned int i;
1538         unsigned int max_num, num, max_buf;
1539         LOCKING_ANDX_RANGE *buf, *cur;
1540         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1541         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1542         struct cifsLockInfo *li, *tmp;
1543         __u64 length = 1 + flock->fl_end - flock->fl_start;
1544         struct list_head tmp_llist;
1545
1546         INIT_LIST_HEAD(&tmp_llist);
1547
1548         /*
1549          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1550          * and check it before using.
1551          */
1552         max_buf = tcon->ses->server->maxBuf;
1553         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1554                 return -EINVAL;
1555
1556         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1557                      PAGE_SIZE);
1558         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1559                         PAGE_SIZE);
1560         max_num = (max_buf - sizeof(struct smb_hdr)) /
1561                                                 sizeof(LOCKING_ANDX_RANGE);
1562         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1563         if (!buf)
1564                 return -ENOMEM;
1565
1566         cifs_down_write(&cinode->lock_sem);
1567         for (i = 0; i < 2; i++) {
1568                 cur = buf;
1569                 num = 0;
1570                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1571                         if (flock->fl_start > li->offset ||
1572                             (flock->fl_start + length) <
1573                             (li->offset + li->length))
1574                                 continue;
1575                         if (current->tgid != li->pid)
1576                                 continue;
1577                         if (types[i] != li->type)
1578                                 continue;
1579                         if (cinode->can_cache_brlcks) {
1580                                 /*
1581                                  * We can cache brlock requests - simply remove
1582                                  * a lock from the file's list.
1583                                  */
1584                                 list_del(&li->llist);
1585                                 cifs_del_lock_waiters(li);
1586                                 kfree(li);
1587                                 continue;
1588                         }
1589                         cur->Pid = cpu_to_le16(li->pid);
1590                         cur->LengthLow = cpu_to_le32((u32)li->length);
1591                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1592                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1593                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1594                         /*
1595                          * We need to save a lock here to let us add it again to
1596                          * the file's list if the unlock range request fails on
1597                          * the server.
1598                          */
1599                         list_move(&li->llist, &tmp_llist);
1600                         if (++num == max_num) {
1601                                 stored_rc = cifs_lockv(xid, tcon,
1602                                                        cfile->fid.netfid,
1603                                                        li->type, num, 0, buf);
1604                                 if (stored_rc) {
1605                                         /*
1606                                          * We failed on the unlock range
1607                                          * request - add all locks from the tmp
1608                                          * list to the head of the file's list.
1609                                          */
1610                                         cifs_move_llist(&tmp_llist,
1611                                                         &cfile->llist->locks);
1612                                         rc = stored_rc;
1613                                 } else
1614                                         /*
1615                                          * The unlock range request succeed -
1616                                          * free the tmp list.
1617                                          */
1618                                         cifs_free_llist(&tmp_llist);
1619                                 cur = buf;
1620                                 num = 0;
1621                         } else
1622                                 cur++;
1623                 }
1624                 if (num) {
1625                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1626                                                types[i], num, 0, buf);
1627                         if (stored_rc) {
1628                                 cifs_move_llist(&tmp_llist,
1629                                                 &cfile->llist->locks);
1630                                 rc = stored_rc;
1631                         } else
1632                                 cifs_free_llist(&tmp_llist);
1633                 }
1634         }
1635
1636         up_write(&cinode->lock_sem);
1637         kfree(buf);
1638         return rc;
1639 }
1640
1641 static int
1642 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1643            bool wait_flag, bool posix_lck, int lock, int unlock,
1644            unsigned int xid)
1645 {
1646         int rc = 0;
1647         __u64 length = 1 + flock->fl_end - flock->fl_start;
1648         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1649         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1650         struct TCP_Server_Info *server = tcon->ses->server;
1651         struct inode *inode = d_inode(cfile->dentry);
1652
1653         if (posix_lck) {
1654                 int posix_lock_type;
1655
1656                 rc = cifs_posix_lock_set(file, flock);
1657                 if (!rc || rc < 0)
1658                         return rc;
1659
1660                 if (type & server->vals->shared_lock_type)
1661                         posix_lock_type = CIFS_RDLCK;
1662                 else
1663                         posix_lock_type = CIFS_WRLCK;
1664
1665                 if (unlock == 1)
1666                         posix_lock_type = CIFS_UNLCK;
1667
1668                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1669                                       hash_lockowner(flock->fl_owner),
1670                                       flock->fl_start, length,
1671                                       NULL, posix_lock_type, wait_flag);
1672                 goto out;
1673         }
1674
1675         if (lock) {
1676                 struct cifsLockInfo *lock;
1677
1678                 lock = cifs_lock_init(flock->fl_start, length, type,
1679                                       flock->fl_flags);
1680                 if (!lock)
1681                         return -ENOMEM;
1682
1683                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1684                 if (rc < 0) {
1685                         kfree(lock);
1686                         return rc;
1687                 }
1688                 if (!rc)
1689                         goto out;
1690
1691                 /*
1692                  * Windows 7 server can delay breaking lease from read to None
1693                  * if we set a byte-range lock on a file - break it explicitly
1694                  * before sending the lock to the server to be sure the next
1695                  * read won't conflict with non-overlapted locks due to
1696                  * pagereading.
1697                  */
1698                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1699                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1700                         cifs_zap_mapping(inode);
1701                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1702                                  inode);
1703                         CIFS_I(inode)->oplock = 0;
1704                 }
1705
1706                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1707                                             type, 1, 0, wait_flag);
1708                 if (rc) {
1709                         kfree(lock);
1710                         return rc;
1711                 }
1712
1713                 cifs_lock_add(cfile, lock);
1714         } else if (unlock)
1715                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1716
1717 out:
1718         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1719                 /*
1720                  * If this is a request to remove all locks because we
1721                  * are closing the file, it doesn't matter if the
1722                  * unlocking failed as both cifs.ko and the SMB server
1723                  * remove the lock on file close
1724                  */
1725                 if (rc) {
1726                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1727                         if (!(flock->fl_flags & FL_CLOSE))
1728                                 return rc;
1729                 }
1730                 rc = locks_lock_file_wait(file, flock);
1731         }
1732         return rc;
1733 }
1734
1735 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1736 {
1737         int rc, xid;
1738         int lock = 0, unlock = 0;
1739         bool wait_flag = false;
1740         bool posix_lck = false;
1741         struct cifs_sb_info *cifs_sb;
1742         struct cifs_tcon *tcon;
1743         struct cifsFileInfo *cfile;
1744         __u32 type;
1745
1746         rc = -EACCES;
1747         xid = get_xid();
1748
1749         if (!(fl->fl_flags & FL_FLOCK))
1750                 return -ENOLCK;
1751
1752         cfile = (struct cifsFileInfo *)file->private_data;
1753         tcon = tlink_tcon(cfile->tlink);
1754
1755         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1756                         tcon->ses->server);
1757         cifs_sb = CIFS_FILE_SB(file);
1758
1759         if (cap_unix(tcon->ses) &&
1760             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1761             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1762                 posix_lck = true;
1763
1764         if (!lock && !unlock) {
1765                 /*
1766                  * if no lock or unlock then nothing to do since we do not
1767                  * know what it is
1768                  */
1769                 free_xid(xid);
1770                 return -EOPNOTSUPP;
1771         }
1772
1773         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1774                         xid);
1775         free_xid(xid);
1776         return rc;
1777
1778
1779 }
1780
1781 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1782 {
1783         int rc, xid;
1784         int lock = 0, unlock = 0;
1785         bool wait_flag = false;
1786         bool posix_lck = false;
1787         struct cifs_sb_info *cifs_sb;
1788         struct cifs_tcon *tcon;
1789         struct cifsFileInfo *cfile;
1790         __u32 type;
1791
1792         rc = -EACCES;
1793         xid = get_xid();
1794
1795         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1796                  cmd, flock->fl_flags, flock->fl_type,
1797                  flock->fl_start, flock->fl_end);
1798
1799         cfile = (struct cifsFileInfo *)file->private_data;
1800         tcon = tlink_tcon(cfile->tlink);
1801
1802         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1803                         tcon->ses->server);
1804         cifs_sb = CIFS_FILE_SB(file);
1805
1806         if (cap_unix(tcon->ses) &&
1807             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1808             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1809                 posix_lck = true;
1810         /*
1811          * BB add code here to normalize offset and length to account for
1812          * negative length which we can not accept over the wire.
1813          */
1814         if (IS_GETLK(cmd)) {
1815                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1816                 free_xid(xid);
1817                 return rc;
1818         }
1819
1820         if (!lock && !unlock) {
1821                 /*
1822                  * if no lock or unlock then nothing to do since we do not
1823                  * know what it is
1824                  */
1825                 free_xid(xid);
1826                 return -EOPNOTSUPP;
1827         }
1828
1829         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1830                         xid);
1831         free_xid(xid);
1832         return rc;
1833 }
1834
1835 /*
1836  * update the file size (if needed) after a write. Should be called with
1837  * the inode->i_lock held
1838  */
1839 void
1840 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1841                       unsigned int bytes_written)
1842 {
1843         loff_t end_of_write = offset + bytes_written;
1844
1845         if (end_of_write > cifsi->server_eof)
1846                 cifsi->server_eof = end_of_write;
1847 }
1848
1849 static ssize_t
1850 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1851            size_t write_size, loff_t *offset)
1852 {
1853         int rc = 0;
1854         unsigned int bytes_written = 0;
1855         unsigned int total_written;
1856         struct cifs_tcon *tcon;
1857         struct TCP_Server_Info *server;
1858         unsigned int xid;
1859         struct dentry *dentry = open_file->dentry;
1860         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1861         struct cifs_io_parms io_parms;
1862
1863         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1864                  write_size, *offset, dentry);
1865
1866         tcon = tlink_tcon(open_file->tlink);
1867         server = tcon->ses->server;
1868
1869         if (!server->ops->sync_write)
1870                 return -ENOSYS;
1871
1872         xid = get_xid();
1873
1874         for (total_written = 0; write_size > total_written;
1875              total_written += bytes_written) {
1876                 rc = -EAGAIN;
1877                 while (rc == -EAGAIN) {
1878                         struct kvec iov[2];
1879                         unsigned int len;
1880
1881                         if (open_file->invalidHandle) {
1882                                 /* we could deadlock if we called
1883                                    filemap_fdatawait from here so tell
1884                                    reopen_file not to flush data to
1885                                    server now */
1886                                 rc = cifs_reopen_file(open_file, false);
1887                                 if (rc != 0)
1888                                         break;
1889                         }
1890
1891                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1892                                   (unsigned int)write_size - total_written);
1893                         /* iov[0] is reserved for smb header */
1894                         iov[1].iov_base = (char *)write_data + total_written;
1895                         iov[1].iov_len = len;
1896                         io_parms.pid = pid;
1897                         io_parms.tcon = tcon;
1898                         io_parms.offset = *offset;
1899                         io_parms.length = len;
1900                         rc = server->ops->sync_write(xid, &open_file->fid,
1901                                         &io_parms, &bytes_written, iov, 1);
1902                 }
1903                 if (rc || (bytes_written == 0)) {
1904                         if (total_written)
1905                                 break;
1906                         else {
1907                                 free_xid(xid);
1908                                 return rc;
1909                         }
1910                 } else {
1911                         spin_lock(&d_inode(dentry)->i_lock);
1912                         cifs_update_eof(cifsi, *offset, bytes_written);
1913                         spin_unlock(&d_inode(dentry)->i_lock);
1914                         *offset += bytes_written;
1915                 }
1916         }
1917
1918         cifs_stats_bytes_written(tcon, total_written);
1919
1920         if (total_written > 0) {
1921                 spin_lock(&d_inode(dentry)->i_lock);
1922                 if (*offset > d_inode(dentry)->i_size)
1923                         i_size_write(d_inode(dentry), *offset);
1924                 spin_unlock(&d_inode(dentry)->i_lock);
1925         }
1926         mark_inode_dirty_sync(d_inode(dentry));
1927         free_xid(xid);
1928         return total_written;
1929 }
1930
1931 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1932                                         bool fsuid_only)
1933 {
1934         struct cifsFileInfo *open_file = NULL;
1935         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1936
1937         /* only filter by fsuid on multiuser mounts */
1938         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1939                 fsuid_only = false;
1940
1941         spin_lock(&cifs_inode->open_file_lock);
1942         /* we could simply get the first_list_entry since write-only entries
1943            are always at the end of the list but since the first entry might
1944            have a close pending, we go through the whole list */
1945         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1946                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1947                         continue;
1948                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1949                         if (!open_file->invalidHandle) {
1950                                 /* found a good file */
1951                                 /* lock it so it will not be closed on us */
1952                                 cifsFileInfo_get(open_file);
1953                                 spin_unlock(&cifs_inode->open_file_lock);
1954                                 return open_file;
1955                         } /* else might as well continue, and look for
1956                              another, or simply have the caller reopen it
1957                              again rather than trying to fix this handle */
1958                 } else /* write only file */
1959                         break; /* write only files are last so must be done */
1960         }
1961         spin_unlock(&cifs_inode->open_file_lock);
1962         return NULL;
1963 }
1964
1965 /* Return -EBADF if no handle is found and general rc otherwise */
1966 int
1967 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,
1968                        struct cifsFileInfo **ret_file)
1969 {
1970         struct cifsFileInfo *open_file, *inv_file = NULL;
1971         struct cifs_sb_info *cifs_sb;
1972         bool any_available = false;
1973         int rc = -EBADF;
1974         unsigned int refind = 0;
1975
1976         *ret_file = NULL;
1977
1978         /*
1979          * Having a null inode here (because mapping->host was set to zero by
1980          * the VFS or MM) should not happen but we had reports of on oops (due
1981          * to it being zero) during stress testcases so we need to check for it
1982          */
1983
1984         if (cifs_inode == NULL) {
1985                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1986                 dump_stack();
1987                 return rc;
1988         }
1989
1990         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1991
1992         /* only filter by fsuid on multiuser mounts */
1993         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1994                 fsuid_only = false;
1995
1996         spin_lock(&cifs_inode->open_file_lock);
1997 refind_writable:
1998         if (refind > MAX_REOPEN_ATT) {
1999                 spin_unlock(&cifs_inode->open_file_lock);
2000                 return rc;
2001         }
2002         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2003                 if (!any_available && open_file->pid != current->tgid)
2004                         continue;
2005                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2006                         continue;
2007                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2008                         if (!open_file->invalidHandle) {
2009                                 /* found a good writable file */
2010                                 cifsFileInfo_get(open_file);
2011                                 spin_unlock(&cifs_inode->open_file_lock);
2012                                 *ret_file = open_file;
2013                                 return 0;
2014                         } else {
2015                                 if (!inv_file)
2016                                         inv_file = open_file;
2017                         }
2018                 }
2019         }
2020         /* couldn't find useable FH with same pid, try any available */
2021         if (!any_available) {
2022                 any_available = true;
2023                 goto refind_writable;
2024         }
2025
2026         if (inv_file) {
2027                 any_available = false;
2028                 cifsFileInfo_get(inv_file);
2029         }
2030
2031         spin_unlock(&cifs_inode->open_file_lock);
2032
2033         if (inv_file) {
2034                 rc = cifs_reopen_file(inv_file, false);
2035                 if (!rc) {
2036                         *ret_file = inv_file;
2037                         return 0;
2038                 }
2039
2040                 spin_lock(&cifs_inode->open_file_lock);
2041                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2042                 spin_unlock(&cifs_inode->open_file_lock);
2043                 cifsFileInfo_put(inv_file);
2044                 ++refind;
2045                 inv_file = NULL;
2046                 spin_lock(&cifs_inode->open_file_lock);
2047                 goto refind_writable;
2048         }
2049
2050         return rc;
2051 }
2052
2053 struct cifsFileInfo *
2054 find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only)
2055 {
2056         struct cifsFileInfo *cfile;
2057         int rc;
2058
2059         rc = cifs_get_writable_file(cifs_inode, fsuid_only, &cfile);
2060         if (rc)
2061                 cifs_dbg(FYI, "couldn't find writable handle rc=%d", rc);
2062
2063         return cfile;
2064 }
2065
2066 int
2067 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2068                        struct cifsFileInfo **ret_file)
2069 {
2070         struct list_head *tmp;
2071         struct cifsFileInfo *cfile;
2072         struct cifsInodeInfo *cinode;
2073         char *full_path;
2074
2075         *ret_file = NULL;
2076
2077         spin_lock(&tcon->open_file_lock);
2078         list_for_each(tmp, &tcon->openFileList) {
2079                 cfile = list_entry(tmp, struct cifsFileInfo,
2080                              tlist);
2081                 full_path = build_path_from_dentry(cfile->dentry);
2082                 if (full_path == NULL) {
2083                         spin_unlock(&tcon->open_file_lock);
2084                         return -ENOMEM;
2085                 }
2086                 if (strcmp(full_path, name)) {
2087                         kfree(full_path);
2088                         continue;
2089                 }
2090
2091                 kfree(full_path);
2092                 cinode = CIFS_I(d_inode(cfile->dentry));
2093                 spin_unlock(&tcon->open_file_lock);
2094                 return cifs_get_writable_file(cinode, 0, ret_file);
2095         }
2096
2097         spin_unlock(&tcon->open_file_lock);
2098         return -ENOENT;
2099 }
2100
2101 int
2102 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2103                        struct cifsFileInfo **ret_file)
2104 {
2105         struct list_head *tmp;
2106         struct cifsFileInfo *cfile;
2107         struct cifsInodeInfo *cinode;
2108         char *full_path;
2109
2110         *ret_file = NULL;
2111
2112         spin_lock(&tcon->open_file_lock);
2113         list_for_each(tmp, &tcon->openFileList) {
2114                 cfile = list_entry(tmp, struct cifsFileInfo,
2115                              tlist);
2116                 full_path = build_path_from_dentry(cfile->dentry);
2117                 if (full_path == NULL) {
2118                         spin_unlock(&tcon->open_file_lock);
2119                         return -ENOMEM;
2120                 }
2121                 if (strcmp(full_path, name)) {
2122                         kfree(full_path);
2123                         continue;
2124                 }
2125
2126                 kfree(full_path);
2127                 cinode = CIFS_I(d_inode(cfile->dentry));
2128                 spin_unlock(&tcon->open_file_lock);
2129                 *ret_file = find_readable_file(cinode, 0);
2130                 return *ret_file ? 0 : -ENOENT;
2131         }
2132
2133         spin_unlock(&tcon->open_file_lock);
2134         return -ENOENT;
2135 }
2136
2137 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2138 {
2139         struct address_space *mapping = page->mapping;
2140         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2141         char *write_data;
2142         int rc = -EFAULT;
2143         int bytes_written = 0;
2144         struct inode *inode;
2145         struct cifsFileInfo *open_file;
2146
2147         if (!mapping || !mapping->host)
2148                 return -EFAULT;
2149
2150         inode = page->mapping->host;
2151
2152         offset += (loff_t)from;
2153         write_data = kmap(page);
2154         write_data += from;
2155
2156         if ((to > PAGE_SIZE) || (from > to)) {
2157                 kunmap(page);
2158                 return -EIO;
2159         }
2160
2161         /* racing with truncate? */
2162         if (offset > mapping->host->i_size) {
2163                 kunmap(page);
2164                 return 0; /* don't care */
2165         }
2166
2167         /* check to make sure that we are not extending the file */
2168         if (mapping->host->i_size - offset < (loff_t)to)
2169                 to = (unsigned)(mapping->host->i_size - offset);
2170
2171         rc = cifs_get_writable_file(CIFS_I(mapping->host), false, &open_file);
2172         if (!rc) {
2173                 bytes_written = cifs_write(open_file, open_file->pid,
2174                                            write_data, to - from, &offset);
2175                 cifsFileInfo_put(open_file);
2176                 /* Does mm or vfs already set times? */
2177                 inode->i_atime = inode->i_mtime = current_time(inode);
2178                 if ((bytes_written > 0) && (offset))
2179                         rc = 0;
2180                 else if (bytes_written < 0)
2181                         rc = bytes_written;
2182                 else
2183                         rc = -EFAULT;
2184         } else {
2185                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2186                 if (!is_retryable_error(rc))
2187                         rc = -EIO;
2188         }
2189
2190         kunmap(page);
2191         return rc;
2192 }
2193
2194 static struct cifs_writedata *
2195 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2196                           pgoff_t end, pgoff_t *index,
2197                           unsigned int *found_pages)
2198 {
2199         struct cifs_writedata *wdata;
2200
2201         wdata = cifs_writedata_alloc((unsigned int)tofind,
2202                                      cifs_writev_complete);
2203         if (!wdata)
2204                 return NULL;
2205
2206         *found_pages = find_get_pages_range_tag(mapping, index, end,
2207                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2208         return wdata;
2209 }
2210
2211 static unsigned int
2212 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2213                     struct address_space *mapping,
2214                     struct writeback_control *wbc,
2215                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2216 {
2217         unsigned int nr_pages = 0, i;
2218         struct page *page;
2219
2220         for (i = 0; i < found_pages; i++) {
2221                 page = wdata->pages[i];
2222                 /*
2223                  * At this point we hold neither the i_pages lock nor the
2224                  * page lock: the page may be truncated or invalidated
2225                  * (changing page->mapping to NULL), or even swizzled
2226                  * back from swapper_space to tmpfs file mapping
2227                  */
2228
2229                 if (nr_pages == 0)
2230                         lock_page(page);
2231                 else if (!trylock_page(page))
2232                         break;
2233
2234                 if (unlikely(page->mapping != mapping)) {
2235                         unlock_page(page);
2236                         break;
2237                 }
2238
2239                 if (!wbc->range_cyclic && page->index > end) {
2240                         *done = true;
2241                         unlock_page(page);
2242                         break;
2243                 }
2244
2245                 if (*next && (page->index != *next)) {
2246                         /* Not next consecutive page */
2247                         unlock_page(page);
2248                         break;
2249                 }
2250
2251                 if (wbc->sync_mode != WB_SYNC_NONE)
2252                         wait_on_page_writeback(page);
2253
2254                 if (PageWriteback(page) ||
2255                                 !clear_page_dirty_for_io(page)) {
2256                         unlock_page(page);
2257                         break;
2258                 }
2259
2260                 /*
2261                  * This actually clears the dirty bit in the radix tree.
2262                  * See cifs_writepage() for more commentary.
2263                  */
2264                 set_page_writeback(page);
2265                 if (page_offset(page) >= i_size_read(mapping->host)) {
2266                         *done = true;
2267                         unlock_page(page);
2268                         end_page_writeback(page);
2269                         break;
2270                 }
2271
2272                 wdata->pages[i] = page;
2273                 *next = page->index + 1;
2274                 ++nr_pages;
2275         }
2276
2277         /* reset index to refind any pages skipped */
2278         if (nr_pages == 0)
2279                 *index = wdata->pages[0]->index + 1;
2280
2281         /* put any pages we aren't going to use */
2282         for (i = nr_pages; i < found_pages; i++) {
2283                 put_page(wdata->pages[i]);
2284                 wdata->pages[i] = NULL;
2285         }
2286
2287         return nr_pages;
2288 }
2289
2290 static int
2291 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2292                  struct address_space *mapping, struct writeback_control *wbc)
2293 {
2294         int rc;
2295         struct TCP_Server_Info *server =
2296                                 tlink_tcon(wdata->cfile->tlink)->ses->server;
2297
2298         wdata->sync_mode = wbc->sync_mode;
2299         wdata->nr_pages = nr_pages;
2300         wdata->offset = page_offset(wdata->pages[0]);
2301         wdata->pagesz = PAGE_SIZE;
2302         wdata->tailsz = min(i_size_read(mapping->host) -
2303                         page_offset(wdata->pages[nr_pages - 1]),
2304                         (loff_t)PAGE_SIZE);
2305         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2306         wdata->pid = wdata->cfile->pid;
2307
2308         rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2309         if (rc)
2310                 return rc;
2311
2312         if (wdata->cfile->invalidHandle)
2313                 rc = -EAGAIN;
2314         else
2315                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2316
2317         return rc;
2318 }
2319
2320 static int cifs_writepages(struct address_space *mapping,
2321                            struct writeback_control *wbc)
2322 {
2323         struct inode *inode = mapping->host;
2324         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2325         struct TCP_Server_Info *server;
2326         bool done = false, scanned = false, range_whole = false;
2327         pgoff_t end, index;
2328         struct cifs_writedata *wdata;
2329         struct cifsFileInfo *cfile = NULL;
2330         int rc = 0;
2331         int saved_rc = 0;
2332         unsigned int xid;
2333
2334         /*
2335          * If wsize is smaller than the page cache size, default to writing
2336          * one page at a time via cifs_writepage
2337          */
2338         if (cifs_sb->wsize < PAGE_SIZE)
2339                 return generic_writepages(mapping, wbc);
2340
2341         xid = get_xid();
2342         if (wbc->range_cyclic) {
2343                 index = mapping->writeback_index; /* Start from prev offset */
2344                 end = -1;
2345         } else {
2346                 index = wbc->range_start >> PAGE_SHIFT;
2347                 end = wbc->range_end >> PAGE_SHIFT;
2348                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2349                         range_whole = true;
2350                 scanned = true;
2351         }
2352         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2353 retry:
2354         while (!done && index <= end) {
2355                 unsigned int i, nr_pages, found_pages, wsize;
2356                 pgoff_t next = 0, tofind, saved_index = index;
2357                 struct cifs_credits credits_on_stack;
2358                 struct cifs_credits *credits = &credits_on_stack;
2359                 int get_file_rc = 0;
2360
2361                 if (cfile)
2362                         cifsFileInfo_put(cfile);
2363
2364                 rc = cifs_get_writable_file(CIFS_I(inode), false, &cfile);
2365
2366                 /* in case of an error store it to return later */
2367                 if (rc)
2368                         get_file_rc = rc;
2369
2370                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2371                                                    &wsize, credits);
2372                 if (rc != 0) {
2373                         done = true;
2374                         break;
2375                 }
2376
2377                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2378
2379                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2380                                                   &found_pages);
2381                 if (!wdata) {
2382                         rc = -ENOMEM;
2383                         done = true;
2384                         add_credits_and_wake_if(server, credits, 0);
2385                         break;
2386                 }
2387
2388                 if (found_pages == 0) {
2389                         kref_put(&wdata->refcount, cifs_writedata_release);
2390                         add_credits_and_wake_if(server, credits, 0);
2391                         break;
2392                 }
2393
2394                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2395                                                end, &index, &next, &done);
2396
2397                 /* nothing to write? */
2398                 if (nr_pages == 0) {
2399                         kref_put(&wdata->refcount, cifs_writedata_release);
2400                         add_credits_and_wake_if(server, credits, 0);
2401                         continue;
2402                 }
2403
2404                 wdata->credits = credits_on_stack;
2405                 wdata->cfile = cfile;
2406                 cfile = NULL;
2407
2408                 if (!wdata->cfile) {
2409                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2410                                  get_file_rc);
2411                         if (is_retryable_error(get_file_rc))
2412                                 rc = get_file_rc;
2413                         else
2414                                 rc = -EBADF;
2415                 } else
2416                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2417
2418                 for (i = 0; i < nr_pages; ++i)
2419                         unlock_page(wdata->pages[i]);
2420
2421                 /* send failure -- clean up the mess */
2422                 if (rc != 0) {
2423                         add_credits_and_wake_if(server, &wdata->credits, 0);
2424                         for (i = 0; i < nr_pages; ++i) {
2425                                 if (is_retryable_error(rc))
2426                                         redirty_page_for_writepage(wbc,
2427                                                            wdata->pages[i]);
2428                                 else
2429                                         SetPageError(wdata->pages[i]);
2430                                 end_page_writeback(wdata->pages[i]);
2431                                 put_page(wdata->pages[i]);
2432                         }
2433                         if (!is_retryable_error(rc))
2434                                 mapping_set_error(mapping, rc);
2435                 }
2436                 kref_put(&wdata->refcount, cifs_writedata_release);
2437
2438                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2439                         index = saved_index;
2440                         continue;
2441                 }
2442
2443                 /* Return immediately if we received a signal during writing */
2444                 if (is_interrupt_error(rc)) {
2445                         done = true;
2446                         break;
2447                 }
2448
2449                 if (rc != 0 && saved_rc == 0)
2450                         saved_rc = rc;
2451
2452                 wbc->nr_to_write -= nr_pages;
2453                 if (wbc->nr_to_write <= 0)
2454                         done = true;
2455
2456                 index = next;
2457         }
2458
2459         if (!scanned && !done) {
2460                 /*
2461                  * We hit the last page and there is more work to be done: wrap
2462                  * back to the start of the file
2463                  */
2464                 scanned = true;
2465                 index = 0;
2466                 goto retry;
2467         }
2468
2469         if (saved_rc != 0)
2470                 rc = saved_rc;
2471
2472         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2473                 mapping->writeback_index = index;
2474
2475         if (cfile)
2476                 cifsFileInfo_put(cfile);
2477         free_xid(xid);
2478         return rc;
2479 }
2480
2481 static int
2482 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2483 {
2484         int rc;
2485         unsigned int xid;
2486
2487         xid = get_xid();
2488 /* BB add check for wbc flags */
2489         get_page(page);
2490         if (!PageUptodate(page))
2491                 cifs_dbg(FYI, "ppw - page not up to date\n");
2492
2493         /*
2494          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2495          *
2496          * A writepage() implementation always needs to do either this,
2497          * or re-dirty the page with "redirty_page_for_writepage()" in
2498          * the case of a failure.
2499          *
2500          * Just unlocking the page will cause the radix tree tag-bits
2501          * to fail to update with the state of the page correctly.
2502          */
2503         set_page_writeback(page);
2504 retry_write:
2505         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2506         if (is_retryable_error(rc)) {
2507                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2508                         goto retry_write;
2509                 redirty_page_for_writepage(wbc, page);
2510         } else if (rc != 0) {
2511                 SetPageError(page);
2512                 mapping_set_error(page->mapping, rc);
2513         } else {
2514                 SetPageUptodate(page);
2515         }
2516         end_page_writeback(page);
2517         put_page(page);
2518         free_xid(xid);
2519         return rc;
2520 }
2521
2522 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2523 {
2524         int rc = cifs_writepage_locked(page, wbc);
2525         unlock_page(page);
2526         return rc;
2527 }
2528
2529 static int cifs_write_end(struct file *file, struct address_space *mapping,
2530                         loff_t pos, unsigned len, unsigned copied,
2531                         struct page *page, void *fsdata)
2532 {
2533         int rc;
2534         struct inode *inode = mapping->host;
2535         struct cifsFileInfo *cfile = file->private_data;
2536         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2537         __u32 pid;
2538
2539         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2540                 pid = cfile->pid;
2541         else
2542                 pid = current->tgid;
2543
2544         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2545                  page, pos, copied);
2546
2547         if (PageChecked(page)) {
2548                 if (copied == len)
2549                         SetPageUptodate(page);
2550                 ClearPageChecked(page);
2551         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2552                 SetPageUptodate(page);
2553
2554         if (!PageUptodate(page)) {
2555                 char *page_data;
2556                 unsigned offset = pos & (PAGE_SIZE - 1);
2557                 unsigned int xid;
2558
2559                 xid = get_xid();
2560                 /* this is probably better than directly calling
2561                    partialpage_write since in this function the file handle is
2562                    known which we might as well leverage */
2563                 /* BB check if anything else missing out of ppw
2564                    such as updating last write time */
2565                 page_data = kmap(page);
2566                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2567                 /* if (rc < 0) should we set writebehind rc? */
2568                 kunmap(page);
2569
2570                 free_xid(xid);
2571         } else {
2572                 rc = copied;
2573                 pos += copied;
2574                 set_page_dirty(page);
2575         }
2576
2577         if (rc > 0) {
2578                 spin_lock(&inode->i_lock);
2579                 if (pos > inode->i_size)
2580                         i_size_write(inode, pos);
2581                 spin_unlock(&inode->i_lock);
2582         }
2583
2584         unlock_page(page);
2585         put_page(page);
2586
2587         return rc;
2588 }
2589
2590 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2591                       int datasync)
2592 {
2593         unsigned int xid;
2594         int rc = 0;
2595         struct cifs_tcon *tcon;
2596         struct TCP_Server_Info *server;
2597         struct cifsFileInfo *smbfile = file->private_data;
2598         struct inode *inode = file_inode(file);
2599         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2600
2601         rc = file_write_and_wait_range(file, start, end);
2602         if (rc)
2603                 return rc;
2604
2605         xid = get_xid();
2606
2607         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2608                  file, datasync);
2609
2610         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2611                 rc = cifs_zap_mapping(inode);
2612                 if (rc) {
2613                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2614                         rc = 0; /* don't care about it in fsync */
2615                 }
2616         }
2617
2618         tcon = tlink_tcon(smbfile->tlink);
2619         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2620                 server = tcon->ses->server;
2621                 if (server->ops->flush)
2622                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2623                 else
2624                         rc = -ENOSYS;
2625         }
2626
2627         free_xid(xid);
2628         return rc;
2629 }
2630
2631 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2632 {
2633         unsigned int xid;
2634         int rc = 0;
2635         struct cifs_tcon *tcon;
2636         struct TCP_Server_Info *server;
2637         struct cifsFileInfo *smbfile = file->private_data;
2638         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2639
2640         rc = file_write_and_wait_range(file, start, end);
2641         if (rc)
2642                 return rc;
2643
2644         xid = get_xid();
2645
2646         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2647                  file, datasync);
2648
2649         tcon = tlink_tcon(smbfile->tlink);
2650         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2651                 server = tcon->ses->server;
2652                 if (server->ops->flush)
2653                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2654                 else
2655                         rc = -ENOSYS;
2656         }
2657
2658         free_xid(xid);
2659         return rc;
2660 }
2661
2662 /*
2663  * As file closes, flush all cached write data for this inode checking
2664  * for write behind errors.
2665  */
2666 int cifs_flush(struct file *file, fl_owner_t id)
2667 {
2668         struct inode *inode = file_inode(file);
2669         int rc = 0;
2670
2671         if (file->f_mode & FMODE_WRITE)
2672                 rc = filemap_write_and_wait(inode->i_mapping);
2673
2674         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2675
2676         return rc;
2677 }
2678
2679 static int
2680 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2681 {
2682         int rc = 0;
2683         unsigned long i;
2684
2685         for (i = 0; i < num_pages; i++) {
2686                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2687                 if (!pages[i]) {
2688                         /*
2689                          * save number of pages we have already allocated and
2690                          * return with ENOMEM error
2691                          */
2692                         num_pages = i;
2693                         rc = -ENOMEM;
2694                         break;
2695                 }
2696         }
2697
2698         if (rc) {
2699                 for (i = 0; i < num_pages; i++)
2700                         put_page(pages[i]);
2701         }
2702         return rc;
2703 }
2704
2705 static inline
2706 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2707 {
2708         size_t num_pages;
2709         size_t clen;
2710
2711         clen = min_t(const size_t, len, wsize);
2712         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2713
2714         if (cur_len)
2715                 *cur_len = clen;
2716
2717         return num_pages;
2718 }
2719
2720 static void
2721 cifs_uncached_writedata_release(struct kref *refcount)
2722 {
2723         int i;
2724         struct cifs_writedata *wdata = container_of(refcount,
2725                                         struct cifs_writedata, refcount);
2726
2727         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2728         for (i = 0; i < wdata->nr_pages; i++)
2729                 put_page(wdata->pages[i]);
2730         cifs_writedata_release(refcount);
2731 }
2732
2733 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2734
2735 static void
2736 cifs_uncached_writev_complete(struct work_struct *work)
2737 {
2738         struct cifs_writedata *wdata = container_of(work,
2739                                         struct cifs_writedata, work);
2740         struct inode *inode = d_inode(wdata->cfile->dentry);
2741         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2742
2743         spin_lock(&inode->i_lock);
2744         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2745         if (cifsi->server_eof > inode->i_size)
2746                 i_size_write(inode, cifsi->server_eof);
2747         spin_unlock(&inode->i_lock);
2748
2749         complete(&wdata->done);
2750         collect_uncached_write_data(wdata->ctx);
2751         /* the below call can possibly free the last ref to aio ctx */
2752         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2753 }
2754
2755 static int
2756 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2757                       size_t *len, unsigned long *num_pages)
2758 {
2759         size_t save_len, copied, bytes, cur_len = *len;
2760         unsigned long i, nr_pages = *num_pages;
2761
2762         save_len = cur_len;
2763         for (i = 0; i < nr_pages; i++) {
2764                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2765                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2766                 cur_len -= copied;
2767                 /*
2768                  * If we didn't copy as much as we expected, then that
2769                  * may mean we trod into an unmapped area. Stop copying
2770                  * at that point. On the next pass through the big
2771                  * loop, we'll likely end up getting a zero-length
2772                  * write and bailing out of it.
2773                  */
2774                 if (copied < bytes)
2775                         break;
2776         }
2777         cur_len = save_len - cur_len;
2778         *len = cur_len;
2779
2780         /*
2781          * If we have no data to send, then that probably means that
2782          * the copy above failed altogether. That's most likely because
2783          * the address in the iovec was bogus. Return -EFAULT and let
2784          * the caller free anything we allocated and bail out.
2785          */
2786         if (!cur_len)
2787                 return -EFAULT;
2788
2789         /*
2790          * i + 1 now represents the number of pages we actually used in
2791          * the copy phase above.
2792          */
2793         *num_pages = i + 1;
2794         return 0;
2795 }
2796
2797 static int
2798 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2799         struct cifs_aio_ctx *ctx)
2800 {
2801         unsigned int wsize;
2802         struct cifs_credits credits;
2803         int rc;
2804         struct TCP_Server_Info *server =
2805                 tlink_tcon(wdata->cfile->tlink)->ses->server;
2806
2807         do {
2808                 if (wdata->cfile->invalidHandle) {
2809                         rc = cifs_reopen_file(wdata->cfile, false);
2810                         if (rc == -EAGAIN)
2811                                 continue;
2812                         else if (rc)
2813                                 break;
2814                 }
2815
2816
2817                 /*
2818                  * Wait for credits to resend this wdata.
2819                  * Note: we are attempting to resend the whole wdata not in
2820                  * segments
2821                  */
2822                 do {
2823                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2824                                                 &wsize, &credits);
2825                         if (rc)
2826                                 goto fail;
2827
2828                         if (wsize < wdata->bytes) {
2829                                 add_credits_and_wake_if(server, &credits, 0);
2830                                 msleep(1000);
2831                         }
2832                 } while (wsize < wdata->bytes);
2833                 wdata->credits = credits;
2834
2835                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2836
2837                 if (!rc) {
2838                         if (wdata->cfile->invalidHandle)
2839                                 rc = -EAGAIN;
2840                         else {
2841 #ifdef CONFIG_CIFS_SMB_DIRECT
2842                                 if (wdata->mr) {
2843                                         wdata->mr->need_invalidate = true;
2844                                         smbd_deregister_mr(wdata->mr);
2845                                         wdata->mr = NULL;
2846                                 }
2847 #endif
2848                                 rc = server->ops->async_writev(wdata,
2849                                         cifs_uncached_writedata_release);
2850                         }
2851                 }
2852
2853                 /* If the write was successfully sent, we are done */
2854                 if (!rc) {
2855                         list_add_tail(&wdata->list, wdata_list);
2856                         return 0;
2857                 }
2858
2859                 /* Roll back credits and retry if needed */
2860                 add_credits_and_wake_if(server, &wdata->credits, 0);
2861         } while (rc == -EAGAIN);
2862
2863 fail:
2864         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2865         return rc;
2866 }
2867
2868 static int
2869 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2870                      struct cifsFileInfo *open_file,
2871                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2872                      struct cifs_aio_ctx *ctx)
2873 {
2874         int rc = 0;
2875         size_t cur_len;
2876         unsigned long nr_pages, num_pages, i;
2877         struct cifs_writedata *wdata;
2878         struct iov_iter saved_from = *from;
2879         loff_t saved_offset = offset;
2880         pid_t pid;
2881         struct TCP_Server_Info *server;
2882         struct page **pagevec;
2883         size_t start;
2884         unsigned int xid;
2885
2886         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2887                 pid = open_file->pid;
2888         else
2889                 pid = current->tgid;
2890
2891         server = tlink_tcon(open_file->tlink)->ses->server;
2892         xid = get_xid();
2893
2894         do {
2895                 unsigned int wsize;
2896                 struct cifs_credits credits_on_stack;
2897                 struct cifs_credits *credits = &credits_on_stack;
2898
2899                 if (open_file->invalidHandle) {
2900                         rc = cifs_reopen_file(open_file, false);
2901                         if (rc == -EAGAIN)
2902                                 continue;
2903                         else if (rc)
2904                                 break;
2905                 }
2906
2907                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2908                                                    &wsize, credits);
2909                 if (rc)
2910                         break;
2911
2912                 cur_len = min_t(const size_t, len, wsize);
2913
2914                 if (ctx->direct_io) {
2915                         ssize_t result;
2916
2917                         result = iov_iter_get_pages_alloc(
2918                                 from, &pagevec, cur_len, &start);
2919                         if (result < 0) {
2920                                 cifs_dbg(VFS,
2921                                         "direct_writev couldn't get user pages "
2922                                         "(rc=%zd) iter type %d iov_offset %zd "
2923                                         "count %zd\n",
2924                                         result, iov_iter_type(from),
2925                                         from->iov_offset, from->count);
2926                                 dump_stack();
2927
2928                                 rc = result;
2929                                 add_credits_and_wake_if(server, credits, 0);
2930                                 break;
2931                         }
2932                         cur_len = (size_t)result;
2933                         iov_iter_advance(from, cur_len);
2934
2935                         nr_pages =
2936                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2937
2938                         wdata = cifs_writedata_direct_alloc(pagevec,
2939                                              cifs_uncached_writev_complete);
2940                         if (!wdata) {
2941                                 rc = -ENOMEM;
2942                                 add_credits_and_wake_if(server, credits, 0);
2943                                 break;
2944                         }
2945
2946
2947                         wdata->page_offset = start;
2948                         wdata->tailsz =
2949                                 nr_pages > 1 ?
2950                                         cur_len - (PAGE_SIZE - start) -
2951                                         (nr_pages - 2) * PAGE_SIZE :
2952                                         cur_len;
2953                 } else {
2954                         nr_pages = get_numpages(wsize, len, &cur_len);
2955                         wdata = cifs_writedata_alloc(nr_pages,
2956                                              cifs_uncached_writev_complete);
2957                         if (!wdata) {
2958                                 rc = -ENOMEM;
2959                                 add_credits_and_wake_if(server, credits, 0);
2960                                 break;
2961                         }
2962
2963                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2964                         if (rc) {
2965                                 kvfree(wdata->pages);
2966                                 kfree(wdata);
2967                                 add_credits_and_wake_if(server, credits, 0);
2968                                 break;
2969                         }
2970
2971                         num_pages = nr_pages;
2972                         rc = wdata_fill_from_iovec(
2973                                 wdata, from, &cur_len, &num_pages);
2974                         if (rc) {
2975                                 for (i = 0; i < nr_pages; i++)
2976                                         put_page(wdata->pages[i]);
2977                                 kvfree(wdata->pages);
2978                                 kfree(wdata);
2979                                 add_credits_and_wake_if(server, credits, 0);
2980                                 break;
2981                         }
2982
2983                         /*
2984                          * Bring nr_pages down to the number of pages we
2985                          * actually used, and free any pages that we didn't use.
2986                          */
2987                         for ( ; nr_pages > num_pages; nr_pages--)
2988                                 put_page(wdata->pages[nr_pages - 1]);
2989
2990                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2991                 }
2992
2993                 wdata->sync_mode = WB_SYNC_ALL;
2994                 wdata->nr_pages = nr_pages;
2995                 wdata->offset = (__u64)offset;
2996                 wdata->cfile = cifsFileInfo_get(open_file);
2997                 wdata->pid = pid;
2998                 wdata->bytes = cur_len;
2999                 wdata->pagesz = PAGE_SIZE;
3000                 wdata->credits = credits_on_stack;
3001                 wdata->ctx = ctx;
3002                 kref_get(&ctx->refcount);
3003
3004                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3005
3006                 if (!rc) {
3007                         if (wdata->cfile->invalidHandle)
3008                                 rc = -EAGAIN;
3009                         else
3010                                 rc = server->ops->async_writev(wdata,
3011                                         cifs_uncached_writedata_release);
3012                 }
3013
3014                 if (rc) {
3015                         add_credits_and_wake_if(server, &wdata->credits, 0);
3016                         kref_put(&wdata->refcount,
3017                                  cifs_uncached_writedata_release);
3018                         if (rc == -EAGAIN) {
3019                                 *from = saved_from;
3020                                 iov_iter_advance(from, offset - saved_offset);
3021                                 continue;
3022                         }
3023                         break;
3024                 }
3025
3026                 list_add_tail(&wdata->list, wdata_list);
3027                 offset += cur_len;
3028                 len -= cur_len;
3029         } while (len > 0);
3030
3031         free_xid(xid);
3032         return rc;
3033 }
3034
3035 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3036 {
3037         struct cifs_writedata *wdata, *tmp;
3038         struct cifs_tcon *tcon;
3039         struct cifs_sb_info *cifs_sb;
3040         struct dentry *dentry = ctx->cfile->dentry;
3041         int rc;
3042
3043         tcon = tlink_tcon(ctx->cfile->tlink);
3044         cifs_sb = CIFS_SB(dentry->d_sb);
3045
3046         mutex_lock(&ctx->aio_mutex);
3047
3048         if (list_empty(&ctx->list)) {
3049                 mutex_unlock(&ctx->aio_mutex);
3050                 return;
3051         }
3052
3053         rc = ctx->rc;
3054         /*
3055          * Wait for and collect replies for any successful sends in order of
3056          * increasing offset. Once an error is hit, then return without waiting
3057          * for any more replies.
3058          */
3059 restart_loop:
3060         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3061                 if (!rc) {
3062                         if (!try_wait_for_completion(&wdata->done)) {
3063                                 mutex_unlock(&ctx->aio_mutex);
3064                                 return;
3065                         }
3066
3067                         if (wdata->result)
3068                                 rc = wdata->result;
3069                         else
3070                                 ctx->total_len += wdata->bytes;
3071
3072                         /* resend call if it's a retryable error */
3073                         if (rc == -EAGAIN) {
3074                                 struct list_head tmp_list;
3075                                 struct iov_iter tmp_from = ctx->iter;
3076
3077                                 INIT_LIST_HEAD(&tmp_list);
3078                                 list_del_init(&wdata->list);
3079
3080                                 if (ctx->direct_io)
3081                                         rc = cifs_resend_wdata(
3082                                                 wdata, &tmp_list, ctx);
3083                                 else {
3084                                         iov_iter_advance(&tmp_from,
3085                                                  wdata->offset - ctx->pos);
3086
3087                                         rc = cifs_write_from_iter(wdata->offset,
3088                                                 wdata->bytes, &tmp_from,
3089                                                 ctx->cfile, cifs_sb, &tmp_list,
3090                                                 ctx);
3091
3092                                         kref_put(&wdata->refcount,
3093                                                 cifs_uncached_writedata_release);
3094                                 }
3095
3096                                 list_splice(&tmp_list, &ctx->list);
3097                                 goto restart_loop;
3098                         }
3099                 }
3100                 list_del_init(&wdata->list);
3101                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3102         }
3103
3104         cifs_stats_bytes_written(tcon, ctx->total_len);
3105         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3106
3107         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3108
3109         mutex_unlock(&ctx->aio_mutex);
3110
3111         if (ctx->iocb && ctx->iocb->ki_complete)
3112                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3113         else
3114                 complete(&ctx->done);
3115 }
3116
3117 static ssize_t __cifs_writev(
3118         struct kiocb *iocb, struct iov_iter *from, bool direct)
3119 {
3120         struct file *file = iocb->ki_filp;
3121         ssize_t total_written = 0;
3122         struct cifsFileInfo *cfile;
3123         struct cifs_tcon *tcon;
3124         struct cifs_sb_info *cifs_sb;
3125         struct cifs_aio_ctx *ctx;
3126         struct iov_iter saved_from = *from;
3127         size_t len = iov_iter_count(from);
3128         int rc;
3129
3130         /*
3131          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3132          * In this case, fall back to non-direct write function.
3133          * this could be improved by getting pages directly in ITER_KVEC
3134          */
3135         if (direct && iov_iter_is_kvec(from)) {
3136                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3137                 direct = false;
3138         }
3139
3140         rc = generic_write_checks(iocb, from);
3141         if (rc <= 0)
3142                 return rc;
3143
3144         cifs_sb = CIFS_FILE_SB(file);
3145         cfile = file->private_data;
3146         tcon = tlink_tcon(cfile->tlink);
3147
3148         if (!tcon->ses->server->ops->async_writev)
3149                 return -ENOSYS;
3150
3151         ctx = cifs_aio_ctx_alloc();
3152         if (!ctx)
3153                 return -ENOMEM;
3154
3155         ctx->cfile = cifsFileInfo_get(cfile);
3156
3157         if (!is_sync_kiocb(iocb))
3158                 ctx->iocb = iocb;
3159
3160         ctx->pos = iocb->ki_pos;
3161
3162         if (direct) {
3163                 ctx->direct_io = true;
3164                 ctx->iter = *from;
3165                 ctx->len = len;
3166         } else {
3167                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3168                 if (rc) {
3169                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3170                         return rc;
3171                 }
3172         }
3173
3174         /* grab a lock here due to read response handlers can access ctx */
3175         mutex_lock(&ctx->aio_mutex);
3176
3177         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3178                                   cfile, cifs_sb, &ctx->list, ctx);
3179
3180         /*
3181          * If at least one write was successfully sent, then discard any rc
3182          * value from the later writes. If the other write succeeds, then
3183          * we'll end up returning whatever was written. If it fails, then
3184          * we'll get a new rc value from that.
3185          */
3186         if (!list_empty(&ctx->list))
3187                 rc = 0;
3188
3189         mutex_unlock(&ctx->aio_mutex);
3190
3191         if (rc) {
3192                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3193                 return rc;
3194         }
3195
3196         if (!is_sync_kiocb(iocb)) {
3197                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3198                 return -EIOCBQUEUED;
3199         }
3200
3201         rc = wait_for_completion_killable(&ctx->done);
3202         if (rc) {
3203                 mutex_lock(&ctx->aio_mutex);
3204                 ctx->rc = rc = -EINTR;
3205                 total_written = ctx->total_len;
3206                 mutex_unlock(&ctx->aio_mutex);
3207         } else {
3208                 rc = ctx->rc;
3209                 total_written = ctx->total_len;
3210         }
3211
3212         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3213
3214         if (unlikely(!total_written))
3215                 return rc;
3216
3217         iocb->ki_pos += total_written;
3218         return total_written;
3219 }
3220
3221 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3222 {
3223         return __cifs_writev(iocb, from, true);
3224 }
3225
3226 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3227 {
3228         return __cifs_writev(iocb, from, false);
3229 }
3230
3231 static ssize_t
3232 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3233 {
3234         struct file *file = iocb->ki_filp;
3235         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3236         struct inode *inode = file->f_mapping->host;
3237         struct cifsInodeInfo *cinode = CIFS_I(inode);
3238         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3239         ssize_t rc;
3240
3241         inode_lock(inode);
3242         /*
3243          * We need to hold the sem to be sure nobody modifies lock list
3244          * with a brlock that prevents writing.
3245          */
3246         down_read(&cinode->lock_sem);
3247
3248         rc = generic_write_checks(iocb, from);
3249         if (rc <= 0)
3250                 goto out;
3251
3252         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3253                                      server->vals->exclusive_lock_type, 0,
3254                                      NULL, CIFS_WRITE_OP))
3255                 rc = __generic_file_write_iter(iocb, from);
3256         else
3257                 rc = -EACCES;
3258 out:
3259         up_read(&cinode->lock_sem);
3260         inode_unlock(inode);
3261
3262         if (rc > 0)
3263                 rc = generic_write_sync(iocb, rc);
3264         return rc;
3265 }
3266
3267 ssize_t
3268 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3269 {
3270         struct inode *inode = file_inode(iocb->ki_filp);
3271         struct cifsInodeInfo *cinode = CIFS_I(inode);
3272         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3273         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3274                                                 iocb->ki_filp->private_data;
3275         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3276         ssize_t written;
3277
3278         written = cifs_get_writer(cinode);
3279         if (written)
3280                 return written;
3281
3282         if (CIFS_CACHE_WRITE(cinode)) {
3283                 if (cap_unix(tcon->ses) &&
3284                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3285                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3286                         written = generic_file_write_iter(iocb, from);
3287                         goto out;
3288                 }
3289                 written = cifs_writev(iocb, from);
3290                 goto out;
3291         }
3292         /*
3293          * For non-oplocked files in strict cache mode we need to write the data
3294          * to the server exactly from the pos to pos+len-1 rather than flush all
3295          * affected pages because it may cause a error with mandatory locks on
3296          * these pages but not on the region from pos to ppos+len-1.
3297          */
3298         written = cifs_user_writev(iocb, from);
3299         if (CIFS_CACHE_READ(cinode)) {
3300                 /*
3301                  * We have read level caching and we have just sent a write
3302                  * request to the server thus making data in the cache stale.
3303                  * Zap the cache and set oplock/lease level to NONE to avoid
3304                  * reading stale data from the cache. All subsequent read
3305                  * operations will read new data from the server.
3306                  */
3307                 cifs_zap_mapping(inode);
3308                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3309                          inode);
3310                 cinode->oplock = 0;
3311         }
3312 out:
3313         cifs_put_writer(cinode);
3314         return written;
3315 }
3316
3317 static struct cifs_readdata *
3318 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3319 {
3320         struct cifs_readdata *rdata;
3321
3322         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3323         if (rdata != NULL) {
3324                 rdata->pages = pages;
3325                 kref_init(&rdata->refcount);
3326                 INIT_LIST_HEAD(&rdata->list);
3327                 init_completion(&rdata->done);
3328                 INIT_WORK(&rdata->work, complete);
3329         }
3330
3331         return rdata;
3332 }
3333
3334 static struct cifs_readdata *
3335 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3336 {
3337         struct page **pages =
3338                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3339         struct cifs_readdata *ret = NULL;
3340
3341         if (pages) {
3342                 ret = cifs_readdata_direct_alloc(pages, complete);
3343                 if (!ret)
3344                         kfree(pages);
3345         }
3346
3347         return ret;
3348 }
3349
3350 void
3351 cifs_readdata_release(struct kref *refcount)
3352 {
3353         struct cifs_readdata *rdata = container_of(refcount,
3354                                         struct cifs_readdata, refcount);
3355 #ifdef CONFIG_CIFS_SMB_DIRECT
3356         if (rdata->mr) {
3357                 smbd_deregister_mr(rdata->mr);
3358                 rdata->mr = NULL;
3359         }
3360 #endif
3361         if (rdata->cfile)
3362                 cifsFileInfo_put(rdata->cfile);
3363
3364         kvfree(rdata->pages);
3365         kfree(rdata);
3366 }
3367
3368 static int
3369 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3370 {
3371         int rc = 0;
3372         struct page *page;
3373         unsigned int i;
3374
3375         for (i = 0; i < nr_pages; i++) {
3376                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3377                 if (!page) {
3378                         rc = -ENOMEM;
3379                         break;
3380                 }
3381                 rdata->pages[i] = page;
3382         }
3383
3384         if (rc) {
3385                 unsigned int nr_page_failed = i;
3386
3387                 for (i = 0; i < nr_page_failed; i++) {
3388                         put_page(rdata->pages[i]);
3389                         rdata->pages[i] = NULL;
3390                 }
3391         }
3392         return rc;
3393 }
3394
3395 static void
3396 cifs_uncached_readdata_release(struct kref *refcount)
3397 {
3398         struct cifs_readdata *rdata = container_of(refcount,
3399                                         struct cifs_readdata, refcount);
3400         unsigned int i;
3401
3402         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3403         for (i = 0; i < rdata->nr_pages; i++) {
3404                 put_page(rdata->pages[i]);
3405         }
3406         cifs_readdata_release(refcount);
3407 }
3408
3409 /**
3410  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3411  * @rdata:      the readdata response with list of pages holding data
3412  * @iter:       destination for our data
3413  *
3414  * This function copies data from a list of pages in a readdata response into
3415  * an array of iovecs. It will first calculate where the data should go
3416  * based on the info in the readdata and then copy the data into that spot.
3417  */
3418 static int
3419 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3420 {
3421         size_t remaining = rdata->got_bytes;
3422         unsigned int i;
3423
3424         for (i = 0; i < rdata->nr_pages; i++) {
3425                 struct page *page = rdata->pages[i];
3426                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3427                 size_t written;
3428
3429                 if (unlikely(iov_iter_is_pipe(iter))) {
3430                         void *addr = kmap_atomic(page);
3431
3432                         written = copy_to_iter(addr, copy, iter);
3433                         kunmap_atomic(addr);
3434                 } else
3435                         written = copy_page_to_iter(page, 0, copy, iter);
3436                 remaining -= written;
3437                 if (written < copy && iov_iter_count(iter) > 0)
3438                         break;
3439         }
3440         return remaining ? -EFAULT : 0;
3441 }
3442
3443 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3444
3445 static void
3446 cifs_uncached_readv_complete(struct work_struct *work)
3447 {
3448         struct cifs_readdata *rdata = container_of(work,
3449                                                 struct cifs_readdata, work);
3450
3451         complete(&rdata->done);
3452         collect_uncached_read_data(rdata->ctx);
3453         /* the below call can possibly free the last ref to aio ctx */
3454         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3455 }
3456
3457 static int
3458 uncached_fill_pages(struct TCP_Server_Info *server,
3459                     struct cifs_readdata *rdata, struct iov_iter *iter,
3460                     unsigned int len)
3461 {
3462         int result = 0;
3463         unsigned int i;
3464         unsigned int nr_pages = rdata->nr_pages;
3465         unsigned int page_offset = rdata->page_offset;
3466
3467         rdata->got_bytes = 0;
3468         rdata->tailsz = PAGE_SIZE;
3469         for (i = 0; i < nr_pages; i++) {
3470                 struct page *page = rdata->pages[i];
3471                 size_t n;
3472                 unsigned int segment_size = rdata->pagesz;
3473
3474                 if (i == 0)
3475                         segment_size -= page_offset;
3476                 else
3477                         page_offset = 0;
3478
3479
3480                 if (len <= 0) {
3481                         /* no need to hold page hostage */
3482                         rdata->pages[i] = NULL;
3483                         rdata->nr_pages--;
3484                         put_page(page);
3485                         continue;
3486                 }
3487
3488                 n = len;
3489                 if (len >= segment_size)
3490                         /* enough data to fill the page */
3491                         n = segment_size;
3492                 else
3493                         rdata->tailsz = len;
3494                 len -= n;
3495
3496                 if (iter)
3497                         result = copy_page_from_iter(
3498                                         page, page_offset, n, iter);
3499 #ifdef CONFIG_CIFS_SMB_DIRECT
3500                 else if (rdata->mr)
3501                         result = n;
3502 #endif
3503                 else
3504                         result = cifs_read_page_from_socket(
3505                                         server, page, page_offset, n);
3506                 if (result < 0)
3507                         break;
3508
3509                 rdata->got_bytes += result;
3510         }
3511
3512         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3513                                                 rdata->got_bytes : result;
3514 }
3515
3516 static int
3517 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3518                               struct cifs_readdata *rdata, unsigned int len)
3519 {
3520         return uncached_fill_pages(server, rdata, NULL, len);
3521 }
3522
3523 static int
3524 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3525                               struct cifs_readdata *rdata,
3526                               struct iov_iter *iter)
3527 {
3528         return uncached_fill_pages(server, rdata, iter, iter->count);
3529 }
3530
3531 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3532                         struct list_head *rdata_list,
3533                         struct cifs_aio_ctx *ctx)
3534 {
3535         unsigned int rsize;
3536         struct cifs_credits credits;
3537         int rc;
3538         struct TCP_Server_Info *server =
3539                 tlink_tcon(rdata->cfile->tlink)->ses->server;
3540
3541         do {
3542                 if (rdata->cfile->invalidHandle) {
3543                         rc = cifs_reopen_file(rdata->cfile, true);
3544                         if (rc == -EAGAIN)
3545                                 continue;
3546                         else if (rc)
3547                                 break;
3548                 }
3549
3550                 /*
3551                  * Wait for credits to resend this rdata.
3552                  * Note: we are attempting to resend the whole rdata not in
3553                  * segments
3554                  */
3555                 do {
3556                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3557                                                 &rsize, &credits);
3558
3559                         if (rc)
3560                                 goto fail;
3561
3562                         if (rsize < rdata->bytes) {
3563                                 add_credits_and_wake_if(server, &credits, 0);
3564                                 msleep(1000);
3565                         }
3566                 } while (rsize < rdata->bytes);
3567                 rdata->credits = credits;
3568
3569                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3570                 if (!rc) {
3571                         if (rdata->cfile->invalidHandle)
3572                                 rc = -EAGAIN;
3573                         else {
3574 #ifdef CONFIG_CIFS_SMB_DIRECT
3575                                 if (rdata->mr) {
3576                                         rdata->mr->need_invalidate = true;
3577                                         smbd_deregister_mr(rdata->mr);
3578                                         rdata->mr = NULL;
3579                                 }
3580 #endif
3581                                 rc = server->ops->async_readv(rdata);
3582                         }
3583                 }
3584
3585                 /* If the read was successfully sent, we are done */
3586                 if (!rc) {
3587                         /* Add to aio pending list */
3588                         list_add_tail(&rdata->list, rdata_list);
3589                         return 0;
3590                 }
3591
3592                 /* Roll back credits and retry if needed */
3593                 add_credits_and_wake_if(server, &rdata->credits, 0);
3594         } while (rc == -EAGAIN);
3595
3596 fail:
3597         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3598         return rc;
3599 }
3600
3601 static int
3602 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3603                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3604                      struct cifs_aio_ctx *ctx)
3605 {
3606         struct cifs_readdata *rdata;
3607         unsigned int npages, rsize;
3608         struct cifs_credits credits_on_stack;
3609         struct cifs_credits *credits = &credits_on_stack;
3610         size_t cur_len;
3611         int rc;
3612         pid_t pid;
3613         struct TCP_Server_Info *server;
3614         struct page **pagevec;
3615         size_t start;
3616         struct iov_iter direct_iov = ctx->iter;
3617
3618         server = tlink_tcon(open_file->tlink)->ses->server;
3619
3620         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3621                 pid = open_file->pid;
3622         else
3623                 pid = current->tgid;
3624
3625         if (ctx->direct_io)
3626                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3627
3628         do {
3629                 if (open_file->invalidHandle) {
3630                         rc = cifs_reopen_file(open_file, true);
3631                         if (rc == -EAGAIN)
3632                                 continue;
3633                         else if (rc)
3634                                 break;
3635                 }
3636
3637                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3638                                                    &rsize, credits);
3639                 if (rc)
3640                         break;
3641
3642                 cur_len = min_t(const size_t, len, rsize);
3643
3644                 if (ctx->direct_io) {
3645                         ssize_t result;
3646
3647                         result = iov_iter_get_pages_alloc(
3648                                         &direct_iov, &pagevec,
3649                                         cur_len, &start);
3650                         if (result < 0) {
3651                                 cifs_dbg(VFS,
3652                                         "couldn't get user pages (rc=%zd)"
3653                                         " iter type %d"
3654                                         " iov_offset %zd count %zd\n",
3655                                         result, iov_iter_type(&direct_iov),
3656                                         direct_iov.iov_offset,
3657                                         direct_iov.count);
3658                                 dump_stack();
3659
3660                                 rc = result;
3661                                 add_credits_and_wake_if(server, credits, 0);
3662                                 break;
3663                         }
3664                         cur_len = (size_t)result;
3665                         iov_iter_advance(&direct_iov, cur_len);
3666
3667                         rdata = cifs_readdata_direct_alloc(
3668                                         pagevec, cifs_uncached_readv_complete);
3669                         if (!rdata) {
3670                                 add_credits_and_wake_if(server, credits, 0);
3671                                 rc = -ENOMEM;
3672                                 break;
3673                         }
3674
3675                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3676                         rdata->page_offset = start;
3677                         rdata->tailsz = npages > 1 ?
3678                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3679                                 cur_len;
3680
3681                 } else {
3682
3683                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3684                         /* allocate a readdata struct */
3685                         rdata = cifs_readdata_alloc(npages,
3686                                             cifs_uncached_readv_complete);
3687                         if (!rdata) {
3688                                 add_credits_and_wake_if(server, credits, 0);
3689                                 rc = -ENOMEM;
3690                                 break;
3691                         }
3692
3693                         rc = cifs_read_allocate_pages(rdata, npages);
3694                         if (rc) {
3695                                 kvfree(rdata->pages);
3696                                 kfree(rdata);
3697                                 add_credits_and_wake_if(server, credits, 0);
3698                                 break;
3699                         }
3700
3701                         rdata->tailsz = PAGE_SIZE;
3702                 }
3703
3704                 rdata->cfile = cifsFileInfo_get(open_file);
3705                 rdata->nr_pages = npages;
3706                 rdata->offset = offset;
3707                 rdata->bytes = cur_len;
3708                 rdata->pid = pid;
3709                 rdata->pagesz = PAGE_SIZE;
3710                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3711                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3712                 rdata->credits = credits_on_stack;
3713                 rdata->ctx = ctx;
3714                 kref_get(&ctx->refcount);
3715
3716                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3717
3718                 if (!rc) {
3719                         if (rdata->cfile->invalidHandle)
3720                                 rc = -EAGAIN;
3721                         else
3722                                 rc = server->ops->async_readv(rdata);
3723                 }
3724
3725                 if (rc) {
3726                         add_credits_and_wake_if(server, &rdata->credits, 0);
3727                         kref_put(&rdata->refcount,
3728                                 cifs_uncached_readdata_release);
3729                         if (rc == -EAGAIN) {
3730                                 iov_iter_revert(&direct_iov, cur_len);
3731                                 continue;
3732                         }
3733                         break;
3734                 }
3735
3736                 list_add_tail(&rdata->list, rdata_list);
3737                 offset += cur_len;
3738                 len -= cur_len;
3739         } while (len > 0);
3740
3741         return rc;
3742 }
3743
3744 static void
3745 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3746 {
3747         struct cifs_readdata *rdata, *tmp;
3748         struct iov_iter *to = &ctx->iter;
3749         struct cifs_sb_info *cifs_sb;
3750         int rc;
3751
3752         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3753
3754         mutex_lock(&ctx->aio_mutex);
3755
3756         if (list_empty(&ctx->list)) {
3757                 mutex_unlock(&ctx->aio_mutex);
3758                 return;
3759         }
3760
3761         rc = ctx->rc;
3762         /* the loop below should proceed in the order of increasing offsets */
3763 again:
3764         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3765                 if (!rc) {
3766                         if (!try_wait_for_completion(&rdata->done)) {
3767                                 mutex_unlock(&ctx->aio_mutex);
3768                                 return;
3769                         }
3770
3771                         if (rdata->result == -EAGAIN) {
3772                                 /* resend call if it's a retryable error */
3773                                 struct list_head tmp_list;
3774                                 unsigned int got_bytes = rdata->got_bytes;
3775
3776                                 list_del_init(&rdata->list);
3777                                 INIT_LIST_HEAD(&tmp_list);
3778
3779                                 /*
3780                                  * Got a part of data and then reconnect has
3781                                  * happened -- fill the buffer and continue
3782                                  * reading.
3783                                  */
3784                                 if (got_bytes && got_bytes < rdata->bytes) {
3785                                         rc = 0;
3786                                         if (!ctx->direct_io)
3787                                                 rc = cifs_readdata_to_iov(rdata, to);
3788                                         if (rc) {
3789                                                 kref_put(&rdata->refcount,
3790                                                         cifs_uncached_readdata_release);
3791                                                 continue;
3792                                         }
3793                                 }
3794
3795                                 if (ctx->direct_io) {
3796                                         /*
3797                                          * Re-use rdata as this is a
3798                                          * direct I/O
3799                                          */
3800                                         rc = cifs_resend_rdata(
3801                                                 rdata,
3802                                                 &tmp_list, ctx);
3803                                 } else {
3804                                         rc = cifs_send_async_read(
3805                                                 rdata->offset + got_bytes,
3806                                                 rdata->bytes - got_bytes,
3807                                                 rdata->cfile, cifs_sb,
3808                                                 &tmp_list, ctx);
3809
3810                                         kref_put(&rdata->refcount,
3811                                                 cifs_uncached_readdata_release);
3812                                 }
3813
3814                                 list_splice(&tmp_list, &ctx->list);
3815
3816                                 goto again;
3817                         } else if (rdata->result)
3818                                 rc = rdata->result;
3819                         else if (!ctx->direct_io)
3820                                 rc = cifs_readdata_to_iov(rdata, to);
3821
3822                         /* if there was a short read -- discard anything left */
3823                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3824                                 rc = -ENODATA;
3825
3826                         ctx->total_len += rdata->got_bytes;
3827                 }
3828                 list_del_init(&rdata->list);
3829                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3830         }
3831
3832         if (!ctx->direct_io)
3833                 ctx->total_len = ctx->len - iov_iter_count(to);
3834
3835         /* mask nodata case */
3836         if (rc == -ENODATA)
3837                 rc = 0;
3838
3839         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3840
3841         mutex_unlock(&ctx->aio_mutex);
3842
3843         if (ctx->iocb && ctx->iocb->ki_complete)
3844                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3845         else
3846                 complete(&ctx->done);
3847 }
3848
3849 static ssize_t __cifs_readv(
3850         struct kiocb *iocb, struct iov_iter *to, bool direct)
3851 {
3852         size_t len;
3853         struct file *file = iocb->ki_filp;
3854         struct cifs_sb_info *cifs_sb;
3855         struct cifsFileInfo *cfile;
3856         struct cifs_tcon *tcon;
3857         ssize_t rc, total_read = 0;
3858         loff_t offset = iocb->ki_pos;
3859         struct cifs_aio_ctx *ctx;
3860
3861         /*
3862          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3863          * fall back to data copy read path
3864          * this could be improved by getting pages directly in ITER_KVEC
3865          */
3866         if (direct && iov_iter_is_kvec(to)) {
3867                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3868                 direct = false;
3869         }
3870
3871         len = iov_iter_count(to);
3872         if (!len)
3873                 return 0;
3874
3875         cifs_sb = CIFS_FILE_SB(file);
3876         cfile = file->private_data;
3877         tcon = tlink_tcon(cfile->tlink);
3878
3879         if (!tcon->ses->server->ops->async_readv)
3880                 return -ENOSYS;
3881
3882         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3883                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3884
3885         ctx = cifs_aio_ctx_alloc();
3886         if (!ctx)
3887                 return -ENOMEM;
3888
3889         ctx->cfile = cifsFileInfo_get(cfile);
3890
3891         if (!is_sync_kiocb(iocb))
3892                 ctx->iocb = iocb;
3893
3894         if (iter_is_iovec(to))
3895                 ctx->should_dirty = true;
3896
3897         if (direct) {
3898                 ctx->pos = offset;
3899                 ctx->direct_io = true;
3900                 ctx->iter = *to;
3901                 ctx->len = len;
3902         } else {
3903                 rc = setup_aio_ctx_iter(ctx, to, READ);
3904                 if (rc) {
3905                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3906                         return rc;
3907                 }
3908                 len = ctx->len;
3909         }
3910
3911         /* grab a lock here due to read response handlers can access ctx */
3912         mutex_lock(&ctx->aio_mutex);
3913
3914         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3915
3916         /* if at least one read request send succeeded, then reset rc */
3917         if (!list_empty(&ctx->list))
3918                 rc = 0;
3919
3920         mutex_unlock(&ctx->aio_mutex);
3921
3922         if (rc) {
3923                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3924                 return rc;
3925         }
3926
3927         if (!is_sync_kiocb(iocb)) {
3928                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3929                 return -EIOCBQUEUED;
3930         }
3931
3932         rc = wait_for_completion_killable(&ctx->done);
3933         if (rc) {
3934                 mutex_lock(&ctx->aio_mutex);
3935                 ctx->rc = rc = -EINTR;
3936                 total_read = ctx->total_len;
3937                 mutex_unlock(&ctx->aio_mutex);
3938         } else {
3939                 rc = ctx->rc;
3940                 total_read = ctx->total_len;
3941         }
3942
3943         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3944
3945         if (total_read) {
3946                 iocb->ki_pos += total_read;
3947                 return total_read;
3948         }
3949         return rc;
3950 }
3951
3952 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3953 {
3954         return __cifs_readv(iocb, to, true);
3955 }
3956
3957 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3958 {
3959         return __cifs_readv(iocb, to, false);
3960 }
3961
3962 ssize_t
3963 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3964 {
3965         struct inode *inode = file_inode(iocb->ki_filp);
3966         struct cifsInodeInfo *cinode = CIFS_I(inode);
3967         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3968         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3969                                                 iocb->ki_filp->private_data;
3970         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3971         int rc = -EACCES;
3972
3973         /*
3974          * In strict cache mode we need to read from the server all the time
3975          * if we don't have level II oplock because the server can delay mtime
3976          * change - so we can't make a decision about inode invalidating.
3977          * And we can also fail with pagereading if there are mandatory locks
3978          * on pages affected by this read but not on the region from pos to
3979          * pos+len-1.
3980          */
3981         if (!CIFS_CACHE_READ(cinode))
3982                 return cifs_user_readv(iocb, to);
3983
3984         if (cap_unix(tcon->ses) &&
3985             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3986             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3987                 return generic_file_read_iter(iocb, to);
3988
3989         /*
3990          * We need to hold the sem to be sure nobody modifies lock list
3991          * with a brlock that prevents reading.
3992          */
3993         down_read(&cinode->lock_sem);
3994         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3995                                      tcon->ses->server->vals->shared_lock_type,
3996                                      0, NULL, CIFS_READ_OP))
3997                 rc = generic_file_read_iter(iocb, to);
3998         up_read(&cinode->lock_sem);
3999         return rc;
4000 }
4001
4002 static ssize_t
4003 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4004 {
4005         int rc = -EACCES;
4006         unsigned int bytes_read = 0;
4007         unsigned int total_read;
4008         unsigned int current_read_size;
4009         unsigned int rsize;
4010         struct cifs_sb_info *cifs_sb;
4011         struct cifs_tcon *tcon;
4012         struct TCP_Server_Info *server;
4013         unsigned int xid;
4014         char *cur_offset;
4015         struct cifsFileInfo *open_file;
4016         struct cifs_io_parms io_parms;
4017         int buf_type = CIFS_NO_BUFFER;
4018         __u32 pid;
4019
4020         xid = get_xid();
4021         cifs_sb = CIFS_FILE_SB(file);
4022
4023         /* FIXME: set up handlers for larger reads and/or convert to async */
4024         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
4025
4026         if (file->private_data == NULL) {
4027                 rc = -EBADF;
4028                 free_xid(xid);
4029                 return rc;
4030         }
4031         open_file = file->private_data;
4032         tcon = tlink_tcon(open_file->tlink);
4033         server = tcon->ses->server;
4034
4035         if (!server->ops->sync_read) {
4036                 free_xid(xid);
4037                 return -ENOSYS;
4038         }
4039
4040         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4041                 pid = open_file->pid;
4042         else
4043                 pid = current->tgid;
4044
4045         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4046                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4047
4048         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4049              total_read += bytes_read, cur_offset += bytes_read) {
4050                 do {
4051                         current_read_size = min_t(uint, read_size - total_read,
4052                                                   rsize);
4053                         /*
4054                          * For windows me and 9x we do not want to request more
4055                          * than it negotiated since it will refuse the read
4056                          * then.
4057                          */
4058                         if ((tcon->ses) && !(tcon->ses->capabilities &
4059                                 tcon->ses->server->vals->cap_large_files)) {
4060                                 current_read_size = min_t(uint,
4061                                         current_read_size, CIFSMaxBufSize);
4062                         }
4063                         if (open_file->invalidHandle) {
4064                                 rc = cifs_reopen_file(open_file, true);
4065                                 if (rc != 0)
4066                                         break;
4067                         }
4068                         io_parms.pid = pid;
4069                         io_parms.tcon = tcon;
4070                         io_parms.offset = *offset;
4071                         io_parms.length = current_read_size;
4072                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4073                                                     &bytes_read, &cur_offset,
4074                                                     &buf_type);
4075                 } while (rc == -EAGAIN);
4076
4077                 if (rc || (bytes_read == 0)) {
4078                         if (total_read) {
4079                                 break;
4080                         } else {
4081                                 free_xid(xid);
4082                                 return rc;
4083                         }
4084                 } else {
4085                         cifs_stats_bytes_read(tcon, total_read);
4086                         *offset += bytes_read;
4087                 }
4088         }
4089         free_xid(xid);
4090         return total_read;
4091 }
4092
4093 /*
4094  * If the page is mmap'ed into a process' page tables, then we need to make
4095  * sure that it doesn't change while being written back.
4096  */
4097 static vm_fault_t
4098 cifs_page_mkwrite(struct vm_fault *vmf)
4099 {
4100         struct page *page = vmf->page;
4101
4102         lock_page(page);
4103         return VM_FAULT_LOCKED;
4104 }
4105
4106 static const struct vm_operations_struct cifs_file_vm_ops = {
4107         .fault = filemap_fault,
4108         .map_pages = filemap_map_pages,
4109         .page_mkwrite = cifs_page_mkwrite,
4110 };
4111
4112 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4113 {
4114         int xid, rc = 0;
4115         struct inode *inode = file_inode(file);
4116
4117         xid = get_xid();
4118
4119         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4120                 rc = cifs_zap_mapping(inode);
4121         if (!rc)
4122                 rc = generic_file_mmap(file, vma);
4123         if (!rc)
4124                 vma->vm_ops = &cifs_file_vm_ops;
4125
4126         free_xid(xid);
4127         return rc;
4128 }
4129
4130 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4131 {
4132         int rc, xid;
4133
4134         xid = get_xid();
4135
4136         rc = cifs_revalidate_file(file);
4137         if (rc)
4138                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4139                          rc);
4140         if (!rc)
4141                 rc = generic_file_mmap(file, vma);
4142         if (!rc)
4143                 vma->vm_ops = &cifs_file_vm_ops;
4144
4145         free_xid(xid);
4146         return rc;
4147 }
4148
4149 static void
4150 cifs_readv_complete(struct work_struct *work)
4151 {
4152         unsigned int i, got_bytes;
4153         struct cifs_readdata *rdata = container_of(work,
4154                                                 struct cifs_readdata, work);
4155
4156         got_bytes = rdata->got_bytes;
4157         for (i = 0; i < rdata->nr_pages; i++) {
4158                 struct page *page = rdata->pages[i];
4159
4160                 lru_cache_add_file(page);
4161
4162                 if (rdata->result == 0 ||
4163                     (rdata->result == -EAGAIN && got_bytes)) {
4164                         flush_dcache_page(page);
4165                         SetPageUptodate(page);
4166                 }
4167
4168                 unlock_page(page);
4169
4170                 if (rdata->result == 0 ||
4171                     (rdata->result == -EAGAIN && got_bytes))
4172                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4173
4174                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4175
4176                 put_page(page);
4177                 rdata->pages[i] = NULL;
4178         }
4179         kref_put(&rdata->refcount, cifs_readdata_release);
4180 }
4181
4182 static int
4183 readpages_fill_pages(struct TCP_Server_Info *server,
4184                      struct cifs_readdata *rdata, struct iov_iter *iter,
4185                      unsigned int len)
4186 {
4187         int result = 0;
4188         unsigned int i;
4189         u64 eof;
4190         pgoff_t eof_index;
4191         unsigned int nr_pages = rdata->nr_pages;
4192         unsigned int page_offset = rdata->page_offset;
4193
4194         /* determine the eof that the server (probably) has */
4195         eof = CIFS_I(rdata->mapping->host)->server_eof;
4196         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4197         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4198
4199         rdata->got_bytes = 0;
4200         rdata->tailsz = PAGE_SIZE;
4201         for (i = 0; i < nr_pages; i++) {
4202                 struct page *page = rdata->pages[i];
4203                 unsigned int to_read = rdata->pagesz;
4204                 size_t n;
4205
4206                 if (i == 0)
4207                         to_read -= page_offset;
4208                 else
4209                         page_offset = 0;
4210
4211                 n = to_read;
4212
4213                 if (len >= to_read) {
4214                         len -= to_read;
4215                 } else if (len > 0) {
4216                         /* enough for partial page, fill and zero the rest */
4217                         zero_user(page, len + page_offset, to_read - len);
4218                         n = rdata->tailsz = len;
4219                         len = 0;
4220                 } else if (page->index > eof_index) {
4221                         /*
4222                          * The VFS will not try to do readahead past the
4223                          * i_size, but it's possible that we have outstanding
4224                          * writes with gaps in the middle and the i_size hasn't
4225                          * caught up yet. Populate those with zeroed out pages
4226                          * to prevent the VFS from repeatedly attempting to
4227                          * fill them until the writes are flushed.
4228                          */
4229                         zero_user(page, 0, PAGE_SIZE);
4230                         lru_cache_add_file(page);
4231                         flush_dcache_page(page);
4232                         SetPageUptodate(page);
4233                         unlock_page(page);
4234                         put_page(page);
4235                         rdata->pages[i] = NULL;
4236                         rdata->nr_pages--;
4237                         continue;
4238                 } else {
4239                         /* no need to hold page hostage */
4240                         lru_cache_add_file(page);
4241                         unlock_page(page);
4242                         put_page(page);
4243                         rdata->pages[i] = NULL;
4244                         rdata->nr_pages--;
4245                         continue;
4246                 }
4247
4248                 if (iter)
4249                         result = copy_page_from_iter(
4250                                         page, page_offset, n, iter);
4251 #ifdef CONFIG_CIFS_SMB_DIRECT
4252                 else if (rdata->mr)
4253                         result = n;
4254 #endif
4255                 else
4256                         result = cifs_read_page_from_socket(
4257                                         server, page, page_offset, n);
4258                 if (result < 0)
4259                         break;
4260
4261                 rdata->got_bytes += result;
4262         }
4263
4264         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4265                                                 rdata->got_bytes : result;
4266 }
4267
4268 static int
4269 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4270                                struct cifs_readdata *rdata, unsigned int len)
4271 {
4272         return readpages_fill_pages(server, rdata, NULL, len);
4273 }
4274
4275 static int
4276 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4277                                struct cifs_readdata *rdata,
4278                                struct iov_iter *iter)
4279 {
4280         return readpages_fill_pages(server, rdata, iter, iter->count);
4281 }
4282
4283 static int
4284 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4285                     unsigned int rsize, struct list_head *tmplist,
4286                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4287 {
4288         struct page *page, *tpage;
4289         unsigned int expected_index;
4290         int rc;
4291         gfp_t gfp = readahead_gfp_mask(mapping);
4292
4293         INIT_LIST_HEAD(tmplist);
4294
4295         page = lru_to_page(page_list);
4296
4297         /*
4298          * Lock the page and put it in the cache. Since no one else
4299          * should have access to this page, we're safe to simply set
4300          * PG_locked without checking it first.
4301          */
4302         __SetPageLocked(page);
4303         rc = add_to_page_cache_locked(page, mapping,
4304                                       page->index, gfp);
4305
4306         /* give up if we can't stick it in the cache */
4307         if (rc) {
4308                 __ClearPageLocked(page);
4309                 return rc;
4310         }
4311
4312         /* move first page to the tmplist */
4313         *offset = (loff_t)page->index << PAGE_SHIFT;
4314         *bytes = PAGE_SIZE;
4315         *nr_pages = 1;
4316         list_move_tail(&page->lru, tmplist);
4317
4318         /* now try and add more pages onto the request */
4319         expected_index = page->index + 1;
4320         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4321                 /* discontinuity ? */
4322                 if (page->index != expected_index)
4323                         break;
4324
4325                 /* would this page push the read over the rsize? */
4326                 if (*bytes + PAGE_SIZE > rsize)
4327                         break;
4328
4329                 __SetPageLocked(page);
4330                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4331                         __ClearPageLocked(page);
4332                         break;
4333                 }
4334                 list_move_tail(&page->lru, tmplist);
4335                 (*bytes) += PAGE_SIZE;
4336                 expected_index++;
4337                 (*nr_pages)++;
4338         }
4339         return rc;
4340 }
4341
4342 static int cifs_readpages(struct file *file, struct address_space *mapping,
4343         struct list_head *page_list, unsigned num_pages)
4344 {
4345         int rc;
4346         struct list_head tmplist;
4347         struct cifsFileInfo *open_file = file->private_data;
4348         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4349         struct TCP_Server_Info *server;
4350         pid_t pid;
4351         unsigned int xid;
4352
4353         xid = get_xid();
4354         /*
4355          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4356          * immediately if the cookie is negative
4357          *
4358          * After this point, every page in the list might have PG_fscache set,
4359          * so we will need to clean that up off of every page we don't use.
4360          */
4361         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4362                                          &num_pages);
4363         if (rc == 0) {
4364                 free_xid(xid);
4365                 return rc;
4366         }
4367
4368         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4369                 pid = open_file->pid;
4370         else
4371                 pid = current->tgid;
4372
4373         rc = 0;
4374         server = tlink_tcon(open_file->tlink)->ses->server;
4375
4376         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4377                  __func__, file, mapping, num_pages);
4378
4379         /*
4380          * Start with the page at end of list and move it to private
4381          * list. Do the same with any following pages until we hit
4382          * the rsize limit, hit an index discontinuity, or run out of
4383          * pages. Issue the async read and then start the loop again
4384          * until the list is empty.
4385          *
4386          * Note that list order is important. The page_list is in
4387          * the order of declining indexes. When we put the pages in
4388          * the rdata->pages, then we want them in increasing order.
4389          */
4390         while (!list_empty(page_list)) {
4391                 unsigned int i, nr_pages, bytes, rsize;
4392                 loff_t offset;
4393                 struct page *page, *tpage;
4394                 struct cifs_readdata *rdata;
4395                 struct cifs_credits credits_on_stack;
4396                 struct cifs_credits *credits = &credits_on_stack;
4397
4398                 if (open_file->invalidHandle) {
4399                         rc = cifs_reopen_file(open_file, true);
4400                         if (rc == -EAGAIN)
4401                                 continue;
4402                         else if (rc)
4403                                 break;
4404                 }
4405
4406                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4407                                                    &rsize, credits);
4408                 if (rc)
4409                         break;
4410
4411                 /*
4412                  * Give up immediately if rsize is too small to read an entire
4413                  * page. The VFS will fall back to readpage. We should never
4414                  * reach this point however since we set ra_pages to 0 when the
4415                  * rsize is smaller than a cache page.
4416                  */
4417                 if (unlikely(rsize < PAGE_SIZE)) {
4418                         add_credits_and_wake_if(server, credits, 0);
4419                         free_xid(xid);
4420                         return 0;
4421                 }
4422
4423                 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4424                                          &nr_pages, &offset, &bytes);
4425                 if (rc) {
4426                         add_credits_and_wake_if(server, credits, 0);
4427                         break;
4428                 }
4429
4430                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4431                 if (!rdata) {
4432                         /* best to give up if we're out of mem */
4433                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4434                                 list_del(&page->lru);
4435                                 lru_cache_add_file(page);
4436                                 unlock_page(page);
4437                                 put_page(page);
4438                         }
4439                         rc = -ENOMEM;
4440                         add_credits_and_wake_if(server, credits, 0);
4441                         break;
4442                 }
4443
4444                 rdata->cfile = cifsFileInfo_get(open_file);
4445                 rdata->mapping = mapping;
4446                 rdata->offset = offset;
4447                 rdata->bytes = bytes;
4448                 rdata->pid = pid;
4449                 rdata->pagesz = PAGE_SIZE;
4450                 rdata->tailsz = PAGE_SIZE;
4451                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4452                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4453                 rdata->credits = credits_on_stack;
4454
4455                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4456                         list_del(&page->lru);
4457                         rdata->pages[rdata->nr_pages++] = page;
4458                 }
4459
4460                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4461
4462                 if (!rc) {
4463                         if (rdata->cfile->invalidHandle)
4464                                 rc = -EAGAIN;
4465                         else
4466                                 rc = server->ops->async_readv(rdata);
4467                 }
4468
4469                 if (rc) {
4470                         add_credits_and_wake_if(server, &rdata->credits, 0);
4471                         for (i = 0; i < rdata->nr_pages; i++) {
4472                                 page = rdata->pages[i];
4473                                 lru_cache_add_file(page);
4474                                 unlock_page(page);
4475                                 put_page(page);
4476                         }
4477                         /* Fallback to the readpage in error/reconnect cases */
4478                         kref_put(&rdata->refcount, cifs_readdata_release);
4479                         break;
4480                 }
4481
4482                 kref_put(&rdata->refcount, cifs_readdata_release);
4483         }
4484
4485         /* Any pages that have been shown to fscache but didn't get added to
4486          * the pagecache must be uncached before they get returned to the
4487          * allocator.
4488          */
4489         cifs_fscache_readpages_cancel(mapping->host, page_list);
4490         free_xid(xid);
4491         return rc;
4492 }
4493
4494 /*
4495  * cifs_readpage_worker must be called with the page pinned
4496  */
4497 static int cifs_readpage_worker(struct file *file, struct page *page,
4498         loff_t *poffset)
4499 {
4500         char *read_data;
4501         int rc;
4502
4503         /* Is the page cached? */
4504         rc = cifs_readpage_from_fscache(file_inode(file), page);
4505         if (rc == 0)
4506                 goto read_complete;
4507
4508         read_data = kmap(page);
4509         /* for reads over a certain size could initiate async read ahead */
4510
4511         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4512
4513         if (rc < 0)
4514                 goto io_error;
4515         else
4516                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4517
4518         /* we do not want atime to be less than mtime, it broke some apps */
4519         file_inode(file)->i_atime = current_time(file_inode(file));
4520         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4521                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4522         else
4523                 file_inode(file)->i_atime = current_time(file_inode(file));
4524
4525         if (PAGE_SIZE > rc)
4526                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4527
4528         flush_dcache_page(page);
4529         SetPageUptodate(page);
4530
4531         /* send this page to the cache */
4532         cifs_readpage_to_fscache(file_inode(file), page);
4533
4534         rc = 0;
4535
4536 io_error:
4537         kunmap(page);
4538         unlock_page(page);
4539
4540 read_complete:
4541         return rc;
4542 }
4543
4544 static int cifs_readpage(struct file *file, struct page *page)
4545 {
4546         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4547         int rc = -EACCES;
4548         unsigned int xid;
4549
4550         xid = get_xid();
4551
4552         if (file->private_data == NULL) {
4553                 rc = -EBADF;
4554                 free_xid(xid);
4555                 return rc;
4556         }
4557
4558         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4559                  page, (int)offset, (int)offset);
4560
4561         rc = cifs_readpage_worker(file, page, &offset);
4562
4563         free_xid(xid);
4564         return rc;
4565 }
4566
4567 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4568 {
4569         struct cifsFileInfo *open_file;
4570
4571         spin_lock(&cifs_inode->open_file_lock);
4572         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4573                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4574                         spin_unlock(&cifs_inode->open_file_lock);
4575                         return 1;
4576                 }
4577         }
4578         spin_unlock(&cifs_inode->open_file_lock);
4579         return 0;
4580 }
4581
4582 /* We do not want to update the file size from server for inodes
4583    open for write - to avoid races with writepage extending
4584    the file - in the future we could consider allowing
4585    refreshing the inode only on increases in the file size
4586    but this is tricky to do without racing with writebehind
4587    page caching in the current Linux kernel design */
4588 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4589 {
4590         if (!cifsInode)
4591                 return true;
4592
4593         if (is_inode_writable(cifsInode)) {
4594                 /* This inode is open for write at least once */
4595                 struct cifs_sb_info *cifs_sb;
4596
4597                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4598                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4599                         /* since no page cache to corrupt on directio
4600                         we can change size safely */
4601                         return true;
4602                 }
4603
4604                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4605                         return true;
4606
4607                 return false;
4608         } else
4609                 return true;
4610 }
4611
4612 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4613                         loff_t pos, unsigned len, unsigned flags,
4614                         struct page **pagep, void **fsdata)
4615 {
4616         int oncethru = 0;
4617         pgoff_t index = pos >> PAGE_SHIFT;
4618         loff_t offset = pos & (PAGE_SIZE - 1);
4619         loff_t page_start = pos & PAGE_MASK;
4620         loff_t i_size;
4621         struct page *page;
4622         int rc = 0;
4623
4624         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4625
4626 start:
4627         page = grab_cache_page_write_begin(mapping, index, flags);
4628         if (!page) {
4629                 rc = -ENOMEM;
4630                 goto out;
4631         }
4632
4633         if (PageUptodate(page))
4634                 goto out;
4635
4636         /*
4637          * If we write a full page it will be up to date, no need to read from
4638          * the server. If the write is short, we'll end up doing a sync write
4639          * instead.
4640          */
4641         if (len == PAGE_SIZE)
4642                 goto out;
4643
4644         /*
4645          * optimize away the read when we have an oplock, and we're not
4646          * expecting to use any of the data we'd be reading in. That
4647          * is, when the page lies beyond the EOF, or straddles the EOF
4648          * and the write will cover all of the existing data.
4649          */
4650         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4651                 i_size = i_size_read(mapping->host);
4652                 if (page_start >= i_size ||
4653                     (offset == 0 && (pos + len) >= i_size)) {
4654                         zero_user_segments(page, 0, offset,
4655                                            offset + len,
4656                                            PAGE_SIZE);
4657                         /*
4658                          * PageChecked means that the parts of the page
4659                          * to which we're not writing are considered up
4660                          * to date. Once the data is copied to the
4661                          * page, it can be set uptodate.
4662                          */
4663                         SetPageChecked(page);
4664                         goto out;
4665                 }
4666         }
4667
4668         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4669                 /*
4670                  * might as well read a page, it is fast enough. If we get
4671                  * an error, we don't need to return it. cifs_write_end will
4672                  * do a sync write instead since PG_uptodate isn't set.
4673                  */
4674                 cifs_readpage_worker(file, page, &page_start);
4675                 put_page(page);
4676                 oncethru = 1;
4677                 goto start;
4678         } else {
4679                 /* we could try using another file handle if there is one -
4680                    but how would we lock it to prevent close of that handle
4681                    racing with this read? In any case
4682                    this will be written out by write_end so is fine */
4683         }
4684 out:
4685         *pagep = page;
4686         return rc;
4687 }
4688
4689 static int cifs_release_page(struct page *page, gfp_t gfp)
4690 {
4691         if (PagePrivate(page))
4692                 return 0;
4693
4694         return cifs_fscache_release_page(page, gfp);
4695 }
4696
4697 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4698                                  unsigned int length)
4699 {
4700         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4701
4702         if (offset == 0 && length == PAGE_SIZE)
4703                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4704 }
4705
4706 static int cifs_launder_page(struct page *page)
4707 {
4708         int rc = 0;
4709         loff_t range_start = page_offset(page);
4710         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4711         struct writeback_control wbc = {
4712                 .sync_mode = WB_SYNC_ALL,
4713                 .nr_to_write = 0,
4714                 .range_start = range_start,
4715                 .range_end = range_end,
4716         };
4717
4718         cifs_dbg(FYI, "Launder page: %p\n", page);
4719
4720         if (clear_page_dirty_for_io(page))
4721                 rc = cifs_writepage_locked(page, &wbc);
4722
4723         cifs_fscache_invalidate_page(page, page->mapping->host);
4724         return rc;
4725 }
4726
4727 void cifs_oplock_break(struct work_struct *work)
4728 {
4729         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4730                                                   oplock_break);
4731         struct inode *inode = d_inode(cfile->dentry);
4732         struct cifsInodeInfo *cinode = CIFS_I(inode);
4733         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4734         struct TCP_Server_Info *server = tcon->ses->server;
4735         int rc = 0;
4736         bool purge_cache = false;
4737
4738         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4739                         TASK_UNINTERRUPTIBLE);
4740
4741         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4742                                       cfile->oplock_epoch, &purge_cache);
4743
4744         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4745                                                 cifs_has_mand_locks(cinode)) {
4746                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4747                          inode);
4748                 cinode->oplock = 0;
4749         }
4750
4751         if (inode && S_ISREG(inode->i_mode)) {
4752                 if (CIFS_CACHE_READ(cinode))
4753                         break_lease(inode, O_RDONLY);
4754                 else
4755                         break_lease(inode, O_WRONLY);
4756                 rc = filemap_fdatawrite(inode->i_mapping);
4757                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4758                         rc = filemap_fdatawait(inode->i_mapping);
4759                         mapping_set_error(inode->i_mapping, rc);
4760                         cifs_zap_mapping(inode);
4761                 }
4762                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4763                 if (CIFS_CACHE_WRITE(cinode))
4764                         goto oplock_break_ack;
4765         }
4766
4767         rc = cifs_push_locks(cfile);
4768         if (rc)
4769                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4770
4771 oplock_break_ack:
4772         /*
4773          * releasing stale oplock after recent reconnect of smb session using
4774          * a now incorrect file handle is not a data integrity issue but do
4775          * not bother sending an oplock release if session to server still is
4776          * disconnected since oplock already released by the server
4777          */
4778         if (!cfile->oplock_break_cancelled) {
4779                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4780                                                              cinode);
4781                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4782         }
4783         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4784         cifs_done_oplock_break(cinode);
4785 }
4786
4787 /*
4788  * The presence of cifs_direct_io() in the address space ops vector
4789  * allowes open() O_DIRECT flags which would have failed otherwise.
4790  *
4791  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4792  * so this method should never be called.
4793  *
4794  * Direct IO is not yet supported in the cached mode. 
4795  */
4796 static ssize_t
4797 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4798 {
4799         /*
4800          * FIXME
4801          * Eventually need to support direct IO for non forcedirectio mounts
4802          */
4803         return -EINVAL;
4804 }
4805
4806
4807 const struct address_space_operations cifs_addr_ops = {
4808         .readpage = cifs_readpage,
4809         .readpages = cifs_readpages,
4810         .writepage = cifs_writepage,
4811         .writepages = cifs_writepages,
4812         .write_begin = cifs_write_begin,
4813         .write_end = cifs_write_end,
4814         .set_page_dirty = __set_page_dirty_nobuffers,
4815         .releasepage = cifs_release_page,
4816         .direct_IO = cifs_direct_io,
4817         .invalidatepage = cifs_invalidate_page,
4818         .launder_page = cifs_launder_page,
4819 };
4820
4821 /*
4822  * cifs_readpages requires the server to support a buffer large enough to
4823  * contain the header plus one complete page of data.  Otherwise, we need
4824  * to leave cifs_readpages out of the address space operations.
4825  */
4826 const struct address_space_operations cifs_addr_ops_smallbuf = {
4827         .readpage = cifs_readpage,
4828         .writepage = cifs_writepage,
4829         .writepages = cifs_writepages,
4830         .write_begin = cifs_write_begin,
4831         .write_end = cifs_write_end,
4832         .set_page_dirty = __set_page_dirty_nobuffers,
4833         .releasepage = cifs_release_page,
4834         .invalidatepage = cifs_invalidate_page,
4835         .launder_page = cifs_launder_page,
4836 };