scsi: core: Run queue in case of I/O resource contention failure
[linux-2.6-microblaze.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47
48 static inline int cifs_convert_flags(unsigned int flags)
49 {
50         if ((flags & O_ACCMODE) == O_RDONLY)
51                 return GENERIC_READ;
52         else if ((flags & O_ACCMODE) == O_WRONLY)
53                 return GENERIC_WRITE;
54         else if ((flags & O_ACCMODE) == O_RDWR) {
55                 /* GENERIC_ALL is too much permission to request
56                    can cause unnecessary access denied on create */
57                 /* return GENERIC_ALL; */
58                 return (GENERIC_READ | GENERIC_WRITE);
59         }
60
61         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63                 FILE_READ_DATA);
64 }
65
66 static u32 cifs_posix_convert_flags(unsigned int flags)
67 {
68         u32 posix_flags = 0;
69
70         if ((flags & O_ACCMODE) == O_RDONLY)
71                 posix_flags = SMB_O_RDONLY;
72         else if ((flags & O_ACCMODE) == O_WRONLY)
73                 posix_flags = SMB_O_WRONLY;
74         else if ((flags & O_ACCMODE) == O_RDWR)
75                 posix_flags = SMB_O_RDWR;
76
77         if (flags & O_CREAT) {
78                 posix_flags |= SMB_O_CREAT;
79                 if (flags & O_EXCL)
80                         posix_flags |= SMB_O_EXCL;
81         } else if (flags & O_EXCL)
82                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83                          current->comm, current->tgid);
84
85         if (flags & O_TRUNC)
86                 posix_flags |= SMB_O_TRUNC;
87         /* be safe and imply O_SYNC for O_DSYNC */
88         if (flags & O_DSYNC)
89                 posix_flags |= SMB_O_SYNC;
90         if (flags & O_DIRECTORY)
91                 posix_flags |= SMB_O_DIRECTORY;
92         if (flags & O_NOFOLLOW)
93                 posix_flags |= SMB_O_NOFOLLOW;
94         if (flags & O_DIRECT)
95                 posix_flags |= SMB_O_DIRECT;
96
97         return posix_flags;
98 }
99
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103                 return FILE_CREATE;
104         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105                 return FILE_OVERWRITE_IF;
106         else if ((flags & O_CREAT) == O_CREAT)
107                 return FILE_OPEN_IF;
108         else if ((flags & O_TRUNC) == O_TRUNC)
109                 return FILE_OVERWRITE;
110         else
111                 return FILE_OPEN;
112 }
113
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115                         struct super_block *sb, int mode, unsigned int f_flags,
116                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118         int rc;
119         FILE_UNIX_BASIC_INFO *presp_data;
120         __u32 posix_flags = 0;
121         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122         struct cifs_fattr fattr;
123         struct tcon_link *tlink;
124         struct cifs_tcon *tcon;
125
126         cifs_dbg(FYI, "posix open %s\n", full_path);
127
128         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129         if (presp_data == NULL)
130                 return -ENOMEM;
131
132         tlink = cifs_sb_tlink(cifs_sb);
133         if (IS_ERR(tlink)) {
134                 rc = PTR_ERR(tlink);
135                 goto posix_open_ret;
136         }
137
138         tcon = tlink_tcon(tlink);
139         mode &= ~current_umask();
140
141         posix_flags = cifs_posix_convert_flags(f_flags);
142         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143                              poplock, full_path, cifs_sb->local_nls,
144                              cifs_remap(cifs_sb));
145         cifs_put_tlink(tlink);
146
147         if (rc)
148                 goto posix_open_ret;
149
150         if (presp_data->Type == cpu_to_le32(-1))
151                 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153         if (!pinode)
154                 goto posix_open_ret; /* caller does not need info */
155
156         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158         /* get new inode and set it up */
159         if (*pinode == NULL) {
160                 cifs_fill_uniqueid(sb, &fattr);
161                 *pinode = cifs_iget(sb, &fattr);
162                 if (!*pinode) {
163                         rc = -ENOMEM;
164                         goto posix_open_ret;
165                 }
166         } else {
167                 cifs_fattr_to_inode(*pinode, &fattr);
168         }
169
170 posix_open_ret:
171         kfree(presp_data);
172         return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178              struct cifs_fid *fid, unsigned int xid)
179 {
180         int rc;
181         int desired_access;
182         int disposition;
183         int create_options = CREATE_NOT_DIR;
184         FILE_ALL_INFO *buf;
185         struct TCP_Server_Info *server = tcon->ses->server;
186         struct cifs_open_parms oparms;
187
188         if (!server->ops->open)
189                 return -ENOSYS;
190
191         desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194  *  open flag mapping table:
195  *
196  *      POSIX Flag            CIFS Disposition
197  *      ----------            ----------------
198  *      O_CREAT               FILE_OPEN_IF
199  *      O_CREAT | O_EXCL      FILE_CREATE
200  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
201  *      O_TRUNC               FILE_OVERWRITE
202  *      none of the above     FILE_OPEN
203  *
204  *      Note that there is not a direct match between disposition
205  *      FILE_SUPERSEDE (ie create whether or not file exists although
206  *      O_CREAT | O_TRUNC is similar but truncates the existing
207  *      file rather than creating a new file as FILE_SUPERSEDE does
208  *      (which uses the attributes / metadata passed in on open call)
209  *?
210  *?  O_SYNC is a reasonable match to CIFS writethrough flag
211  *?  and the read write flags match reasonably.  O_LARGEFILE
212  *?  is irrelevant because largefile support is always used
213  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215  *********************************************************************/
216
217         disposition = cifs_get_disposition(f_flags);
218
219         /* BB pass O_SYNC flag through on file attributes .. BB */
220
221         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222         if (!buf)
223                 return -ENOMEM;
224
225         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
226         if (f_flags & O_SYNC)
227                 create_options |= CREATE_WRITE_THROUGH;
228
229         if (f_flags & O_DIRECT)
230                 create_options |= CREATE_NO_BUFFER;
231
232         oparms.tcon = tcon;
233         oparms.cifs_sb = cifs_sb;
234         oparms.desired_access = desired_access;
235         oparms.create_options = cifs_create_options(cifs_sb, create_options);
236         oparms.disposition = disposition;
237         oparms.path = full_path;
238         oparms.fid = fid;
239         oparms.reconnect = false;
240
241         rc = server->ops->open(xid, &oparms, oplock, buf);
242
243         if (rc)
244                 goto out;
245
246         /* TODO: Add support for calling posix query info but with passing in fid */
247         if (tcon->unix_ext)
248                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
249                                               xid);
250         else
251                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
252                                          xid, fid);
253
254         if (rc) {
255                 server->ops->close(xid, tcon, fid);
256                 if (rc == -ESTALE)
257                         rc = -EOPENSTALE;
258         }
259
260 out:
261         kfree(buf);
262         return rc;
263 }
264
265 static bool
266 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
267 {
268         struct cifs_fid_locks *cur;
269         bool has_locks = false;
270
271         down_read(&cinode->lock_sem);
272         list_for_each_entry(cur, &cinode->llist, llist) {
273                 if (!list_empty(&cur->locks)) {
274                         has_locks = true;
275                         break;
276                 }
277         }
278         up_read(&cinode->lock_sem);
279         return has_locks;
280 }
281
282 void
283 cifs_down_write(struct rw_semaphore *sem)
284 {
285         while (!down_write_trylock(sem))
286                 msleep(10);
287 }
288
289 static void cifsFileInfo_put_work(struct work_struct *work);
290
291 struct cifsFileInfo *
292 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
293                   struct tcon_link *tlink, __u32 oplock)
294 {
295         struct dentry *dentry = file_dentry(file);
296         struct inode *inode = d_inode(dentry);
297         struct cifsInodeInfo *cinode = CIFS_I(inode);
298         struct cifsFileInfo *cfile;
299         struct cifs_fid_locks *fdlocks;
300         struct cifs_tcon *tcon = tlink_tcon(tlink);
301         struct TCP_Server_Info *server = tcon->ses->server;
302
303         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
304         if (cfile == NULL)
305                 return cfile;
306
307         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
308         if (!fdlocks) {
309                 kfree(cfile);
310                 return NULL;
311         }
312
313         INIT_LIST_HEAD(&fdlocks->locks);
314         fdlocks->cfile = cfile;
315         cfile->llist = fdlocks;
316
317         cfile->count = 1;
318         cfile->pid = current->tgid;
319         cfile->uid = current_fsuid();
320         cfile->dentry = dget(dentry);
321         cfile->f_flags = file->f_flags;
322         cfile->invalidHandle = false;
323         cfile->tlink = cifs_get_tlink(tlink);
324         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
325         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
326         mutex_init(&cfile->fh_mutex);
327         spin_lock_init(&cfile->file_info_lock);
328
329         cifs_sb_active(inode->i_sb);
330
331         /*
332          * If the server returned a read oplock and we have mandatory brlocks,
333          * set oplock level to None.
334          */
335         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
336                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
337                 oplock = 0;
338         }
339
340         cifs_down_write(&cinode->lock_sem);
341         list_add(&fdlocks->llist, &cinode->llist);
342         up_write(&cinode->lock_sem);
343
344         spin_lock(&tcon->open_file_lock);
345         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
346                 oplock = fid->pending_open->oplock;
347         list_del(&fid->pending_open->olist);
348
349         fid->purge_cache = false;
350         server->ops->set_fid(cfile, fid, oplock);
351
352         list_add(&cfile->tlist, &tcon->openFileList);
353         atomic_inc(&tcon->num_local_opens);
354
355         /* if readable file instance put first in list*/
356         spin_lock(&cinode->open_file_lock);
357         if (file->f_mode & FMODE_READ)
358                 list_add(&cfile->flist, &cinode->openFileList);
359         else
360                 list_add_tail(&cfile->flist, &cinode->openFileList);
361         spin_unlock(&cinode->open_file_lock);
362         spin_unlock(&tcon->open_file_lock);
363
364         if (fid->purge_cache)
365                 cifs_zap_mapping(inode);
366
367         file->private_data = cfile;
368         return cfile;
369 }
370
371 struct cifsFileInfo *
372 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
373 {
374         spin_lock(&cifs_file->file_info_lock);
375         cifsFileInfo_get_locked(cifs_file);
376         spin_unlock(&cifs_file->file_info_lock);
377         return cifs_file;
378 }
379
380 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
381 {
382         struct inode *inode = d_inode(cifs_file->dentry);
383         struct cifsInodeInfo *cifsi = CIFS_I(inode);
384         struct cifsLockInfo *li, *tmp;
385         struct super_block *sb = inode->i_sb;
386
387         /*
388          * Delete any outstanding lock records. We'll lose them when the file
389          * is closed anyway.
390          */
391         cifs_down_write(&cifsi->lock_sem);
392         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
393                 list_del(&li->llist);
394                 cifs_del_lock_waiters(li);
395                 kfree(li);
396         }
397         list_del(&cifs_file->llist->llist);
398         kfree(cifs_file->llist);
399         up_write(&cifsi->lock_sem);
400
401         cifs_put_tlink(cifs_file->tlink);
402         dput(cifs_file->dentry);
403         cifs_sb_deactive(sb);
404         kfree(cifs_file);
405 }
406
407 static void cifsFileInfo_put_work(struct work_struct *work)
408 {
409         struct cifsFileInfo *cifs_file = container_of(work,
410                         struct cifsFileInfo, put);
411
412         cifsFileInfo_put_final(cifs_file);
413 }
414
415 /**
416  * cifsFileInfo_put - release a reference of file priv data
417  *
418  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
419  */
420 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
421 {
422         _cifsFileInfo_put(cifs_file, true, true);
423 }
424
425 /**
426  * _cifsFileInfo_put - release a reference of file priv data
427  *
428  * This may involve closing the filehandle @cifs_file out on the
429  * server. Must be called without holding tcon->open_file_lock,
430  * cinode->open_file_lock and cifs_file->file_info_lock.
431  *
432  * If @wait_for_oplock_handler is true and we are releasing the last
433  * reference, wait for any running oplock break handler of the file
434  * and cancel any pending one. If calling this function from the
435  * oplock break handler, you need to pass false.
436  *
437  */
438 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
439                        bool wait_oplock_handler, bool offload)
440 {
441         struct inode *inode = d_inode(cifs_file->dentry);
442         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
443         struct TCP_Server_Info *server = tcon->ses->server;
444         struct cifsInodeInfo *cifsi = CIFS_I(inode);
445         struct super_block *sb = inode->i_sb;
446         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
447         struct cifs_fid fid;
448         struct cifs_pending_open open;
449         bool oplock_break_cancelled;
450
451         spin_lock(&tcon->open_file_lock);
452         spin_lock(&cifsi->open_file_lock);
453         spin_lock(&cifs_file->file_info_lock);
454         if (--cifs_file->count > 0) {
455                 spin_unlock(&cifs_file->file_info_lock);
456                 spin_unlock(&cifsi->open_file_lock);
457                 spin_unlock(&tcon->open_file_lock);
458                 return;
459         }
460         spin_unlock(&cifs_file->file_info_lock);
461
462         if (server->ops->get_lease_key)
463                 server->ops->get_lease_key(inode, &fid);
464
465         /* store open in pending opens to make sure we don't miss lease break */
466         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
467
468         /* remove it from the lists */
469         list_del(&cifs_file->flist);
470         list_del(&cifs_file->tlist);
471         atomic_dec(&tcon->num_local_opens);
472
473         if (list_empty(&cifsi->openFileList)) {
474                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
475                          d_inode(cifs_file->dentry));
476                 /*
477                  * In strict cache mode we need invalidate mapping on the last
478                  * close  because it may cause a error when we open this file
479                  * again and get at least level II oplock.
480                  */
481                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
482                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
483                 cifs_set_oplock_level(cifsi, 0);
484         }
485
486         spin_unlock(&cifsi->open_file_lock);
487         spin_unlock(&tcon->open_file_lock);
488
489         oplock_break_cancelled = wait_oplock_handler ?
490                 cancel_work_sync(&cifs_file->oplock_break) : false;
491
492         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
493                 struct TCP_Server_Info *server = tcon->ses->server;
494                 unsigned int xid;
495
496                 xid = get_xid();
497                 if (server->ops->close_getattr)
498                         server->ops->close_getattr(xid, tcon, cifs_file);
499                 else if (server->ops->close)
500                         server->ops->close(xid, tcon, &cifs_file->fid);
501                 _free_xid(xid);
502         }
503
504         if (oplock_break_cancelled)
505                 cifs_done_oplock_break(cifsi);
506
507         cifs_del_pending_open(&open);
508
509         if (offload)
510                 queue_work(fileinfo_put_wq, &cifs_file->put);
511         else
512                 cifsFileInfo_put_final(cifs_file);
513 }
514
515 int cifs_open(struct inode *inode, struct file *file)
516
517 {
518         int rc = -EACCES;
519         unsigned int xid;
520         __u32 oplock;
521         struct cifs_sb_info *cifs_sb;
522         struct TCP_Server_Info *server;
523         struct cifs_tcon *tcon;
524         struct tcon_link *tlink;
525         struct cifsFileInfo *cfile = NULL;
526         char *full_path = NULL;
527         bool posix_open_ok = false;
528         struct cifs_fid fid;
529         struct cifs_pending_open open;
530
531         xid = get_xid();
532
533         cifs_sb = CIFS_SB(inode->i_sb);
534         tlink = cifs_sb_tlink(cifs_sb);
535         if (IS_ERR(tlink)) {
536                 free_xid(xid);
537                 return PTR_ERR(tlink);
538         }
539         tcon = tlink_tcon(tlink);
540         server = tcon->ses->server;
541
542         full_path = build_path_from_dentry(file_dentry(file));
543         if (full_path == NULL) {
544                 rc = -ENOMEM;
545                 goto out;
546         }
547
548         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
549                  inode, file->f_flags, full_path);
550
551         if (file->f_flags & O_DIRECT &&
552             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
553                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
554                         file->f_op = &cifs_file_direct_nobrl_ops;
555                 else
556                         file->f_op = &cifs_file_direct_ops;
557         }
558
559         if (server->oplocks)
560                 oplock = REQ_OPLOCK;
561         else
562                 oplock = 0;
563
564         if (!tcon->broken_posix_open && tcon->unix_ext &&
565             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
566                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
567                 /* can not refresh inode info since size could be stale */
568                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
569                                 cifs_sb->mnt_file_mode /* ignored */,
570                                 file->f_flags, &oplock, &fid.netfid, xid);
571                 if (rc == 0) {
572                         cifs_dbg(FYI, "posix open succeeded\n");
573                         posix_open_ok = true;
574                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
575                         if (tcon->ses->serverNOS)
576                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
577                                          tcon->ses->serverName,
578                                          tcon->ses->serverNOS);
579                         tcon->broken_posix_open = true;
580                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
581                          (rc != -EOPNOTSUPP)) /* path not found or net err */
582                         goto out;
583                 /*
584                  * Else fallthrough to retry open the old way on network i/o
585                  * or DFS errors.
586                  */
587         }
588
589         if (server->ops->get_lease_key)
590                 server->ops->get_lease_key(inode, &fid);
591
592         cifs_add_pending_open(&fid, tlink, &open);
593
594         if (!posix_open_ok) {
595                 if (server->ops->get_lease_key)
596                         server->ops->get_lease_key(inode, &fid);
597
598                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
599                                   file->f_flags, &oplock, &fid, xid);
600                 if (rc) {
601                         cifs_del_pending_open(&open);
602                         goto out;
603                 }
604         }
605
606         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
607         if (cfile == NULL) {
608                 if (server->ops->close)
609                         server->ops->close(xid, tcon, &fid);
610                 cifs_del_pending_open(&open);
611                 rc = -ENOMEM;
612                 goto out;
613         }
614
615         cifs_fscache_set_inode_cookie(inode, file);
616
617         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
618                 /*
619                  * Time to set mode which we can not set earlier due to
620                  * problems creating new read-only files.
621                  */
622                 struct cifs_unix_set_info_args args = {
623                         .mode   = inode->i_mode,
624                         .uid    = INVALID_UID, /* no change */
625                         .gid    = INVALID_GID, /* no change */
626                         .ctime  = NO_CHANGE_64,
627                         .atime  = NO_CHANGE_64,
628                         .mtime  = NO_CHANGE_64,
629                         .device = 0,
630                 };
631                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
632                                        cfile->pid);
633         }
634
635 out:
636         kfree(full_path);
637         free_xid(xid);
638         cifs_put_tlink(tlink);
639         return rc;
640 }
641
642 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
643
644 /*
645  * Try to reacquire byte range locks that were released when session
646  * to server was lost.
647  */
648 static int
649 cifs_relock_file(struct cifsFileInfo *cfile)
650 {
651         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
652         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
653         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
654         int rc = 0;
655
656         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
657         if (cinode->can_cache_brlcks) {
658                 /* can cache locks - no need to relock */
659                 up_read(&cinode->lock_sem);
660                 return rc;
661         }
662
663         if (cap_unix(tcon->ses) &&
664             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
665             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
666                 rc = cifs_push_posix_locks(cfile);
667         else
668                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
669
670         up_read(&cinode->lock_sem);
671         return rc;
672 }
673
674 static int
675 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
676 {
677         int rc = -EACCES;
678         unsigned int xid;
679         __u32 oplock;
680         struct cifs_sb_info *cifs_sb;
681         struct cifs_tcon *tcon;
682         struct TCP_Server_Info *server;
683         struct cifsInodeInfo *cinode;
684         struct inode *inode;
685         char *full_path = NULL;
686         int desired_access;
687         int disposition = FILE_OPEN;
688         int create_options = CREATE_NOT_DIR;
689         struct cifs_open_parms oparms;
690
691         xid = get_xid();
692         mutex_lock(&cfile->fh_mutex);
693         if (!cfile->invalidHandle) {
694                 mutex_unlock(&cfile->fh_mutex);
695                 rc = 0;
696                 free_xid(xid);
697                 return rc;
698         }
699
700         inode = d_inode(cfile->dentry);
701         cifs_sb = CIFS_SB(inode->i_sb);
702         tcon = tlink_tcon(cfile->tlink);
703         server = tcon->ses->server;
704
705         /*
706          * Can not grab rename sem here because various ops, including those
707          * that already have the rename sem can end up causing writepage to get
708          * called and if the server was down that means we end up here, and we
709          * can never tell if the caller already has the rename_sem.
710          */
711         full_path = build_path_from_dentry(cfile->dentry);
712         if (full_path == NULL) {
713                 rc = -ENOMEM;
714                 mutex_unlock(&cfile->fh_mutex);
715                 free_xid(xid);
716                 return rc;
717         }
718
719         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
720                  inode, cfile->f_flags, full_path);
721
722         if (tcon->ses->server->oplocks)
723                 oplock = REQ_OPLOCK;
724         else
725                 oplock = 0;
726
727         if (tcon->unix_ext && cap_unix(tcon->ses) &&
728             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
729                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
730                 /*
731                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
732                  * original open. Must mask them off for a reopen.
733                  */
734                 unsigned int oflags = cfile->f_flags &
735                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
736
737                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
738                                      cifs_sb->mnt_file_mode /* ignored */,
739                                      oflags, &oplock, &cfile->fid.netfid, xid);
740                 if (rc == 0) {
741                         cifs_dbg(FYI, "posix reopen succeeded\n");
742                         oparms.reconnect = true;
743                         goto reopen_success;
744                 }
745                 /*
746                  * fallthrough to retry open the old way on errors, especially
747                  * in the reconnect path it is important to retry hard
748                  */
749         }
750
751         desired_access = cifs_convert_flags(cfile->f_flags);
752
753         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
754         if (cfile->f_flags & O_SYNC)
755                 create_options |= CREATE_WRITE_THROUGH;
756
757         if (cfile->f_flags & O_DIRECT)
758                 create_options |= CREATE_NO_BUFFER;
759
760         if (server->ops->get_lease_key)
761                 server->ops->get_lease_key(inode, &cfile->fid);
762
763         oparms.tcon = tcon;
764         oparms.cifs_sb = cifs_sb;
765         oparms.desired_access = desired_access;
766         oparms.create_options = cifs_create_options(cifs_sb, create_options);
767         oparms.disposition = disposition;
768         oparms.path = full_path;
769         oparms.fid = &cfile->fid;
770         oparms.reconnect = true;
771
772         /*
773          * Can not refresh inode by passing in file_info buf to be returned by
774          * ops->open and then calling get_inode_info with returned buf since
775          * file might have write behind data that needs to be flushed and server
776          * version of file size can be stale. If we knew for sure that inode was
777          * not dirty locally we could do this.
778          */
779         rc = server->ops->open(xid, &oparms, &oplock, NULL);
780         if (rc == -ENOENT && oparms.reconnect == false) {
781                 /* durable handle timeout is expired - open the file again */
782                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
783                 /* indicate that we need to relock the file */
784                 oparms.reconnect = true;
785         }
786
787         if (rc) {
788                 mutex_unlock(&cfile->fh_mutex);
789                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
790                 cifs_dbg(FYI, "oplock: %d\n", oplock);
791                 goto reopen_error_exit;
792         }
793
794 reopen_success:
795         cfile->invalidHandle = false;
796         mutex_unlock(&cfile->fh_mutex);
797         cinode = CIFS_I(inode);
798
799         if (can_flush) {
800                 rc = filemap_write_and_wait(inode->i_mapping);
801                 if (!is_interrupt_error(rc))
802                         mapping_set_error(inode->i_mapping, rc);
803
804                 if (tcon->posix_extensions)
805                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
806                 else if (tcon->unix_ext)
807                         rc = cifs_get_inode_info_unix(&inode, full_path,
808                                                       inode->i_sb, xid);
809                 else
810                         rc = cifs_get_inode_info(&inode, full_path, NULL,
811                                                  inode->i_sb, xid, NULL);
812         }
813         /*
814          * Else we are writing out data to server already and could deadlock if
815          * we tried to flush data, and since we do not know if we have data that
816          * would invalidate the current end of file on the server we can not go
817          * to the server to get the new inode info.
818          */
819
820         /*
821          * If the server returned a read oplock and we have mandatory brlocks,
822          * set oplock level to None.
823          */
824         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
825                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
826                 oplock = 0;
827         }
828
829         server->ops->set_fid(cfile, &cfile->fid, oplock);
830         if (oparms.reconnect)
831                 cifs_relock_file(cfile);
832
833 reopen_error_exit:
834         kfree(full_path);
835         free_xid(xid);
836         return rc;
837 }
838
839 int cifs_close(struct inode *inode, struct file *file)
840 {
841         if (file->private_data != NULL) {
842                 _cifsFileInfo_put(file->private_data, true, false);
843                 file->private_data = NULL;
844         }
845
846         /* return code from the ->release op is always ignored */
847         return 0;
848 }
849
850 void
851 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
852 {
853         struct cifsFileInfo *open_file;
854         struct list_head *tmp;
855         struct list_head *tmp1;
856         struct list_head tmp_list;
857
858         if (!tcon->use_persistent || !tcon->need_reopen_files)
859                 return;
860
861         tcon->need_reopen_files = false;
862
863         cifs_dbg(FYI, "Reopen persistent handles\n");
864         INIT_LIST_HEAD(&tmp_list);
865
866         /* list all files open on tree connection, reopen resilient handles  */
867         spin_lock(&tcon->open_file_lock);
868         list_for_each(tmp, &tcon->openFileList) {
869                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
870                 if (!open_file->invalidHandle)
871                         continue;
872                 cifsFileInfo_get(open_file);
873                 list_add_tail(&open_file->rlist, &tmp_list);
874         }
875         spin_unlock(&tcon->open_file_lock);
876
877         list_for_each_safe(tmp, tmp1, &tmp_list) {
878                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
879                 if (cifs_reopen_file(open_file, false /* do not flush */))
880                         tcon->need_reopen_files = true;
881                 list_del_init(&open_file->rlist);
882                 cifsFileInfo_put(open_file);
883         }
884 }
885
886 int cifs_closedir(struct inode *inode, struct file *file)
887 {
888         int rc = 0;
889         unsigned int xid;
890         struct cifsFileInfo *cfile = file->private_data;
891         struct cifs_tcon *tcon;
892         struct TCP_Server_Info *server;
893         char *buf;
894
895         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
896
897         if (cfile == NULL)
898                 return rc;
899
900         xid = get_xid();
901         tcon = tlink_tcon(cfile->tlink);
902         server = tcon->ses->server;
903
904         cifs_dbg(FYI, "Freeing private data in close dir\n");
905         spin_lock(&cfile->file_info_lock);
906         if (server->ops->dir_needs_close(cfile)) {
907                 cfile->invalidHandle = true;
908                 spin_unlock(&cfile->file_info_lock);
909                 if (server->ops->close_dir)
910                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
911                 else
912                         rc = -ENOSYS;
913                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
914                 /* not much we can do if it fails anyway, ignore rc */
915                 rc = 0;
916         } else
917                 spin_unlock(&cfile->file_info_lock);
918
919         buf = cfile->srch_inf.ntwrk_buf_start;
920         if (buf) {
921                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
922                 cfile->srch_inf.ntwrk_buf_start = NULL;
923                 if (cfile->srch_inf.smallBuf)
924                         cifs_small_buf_release(buf);
925                 else
926                         cifs_buf_release(buf);
927         }
928
929         cifs_put_tlink(cfile->tlink);
930         kfree(file->private_data);
931         file->private_data = NULL;
932         /* BB can we lock the filestruct while this is going on? */
933         free_xid(xid);
934         return rc;
935 }
936
937 static struct cifsLockInfo *
938 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
939 {
940         struct cifsLockInfo *lock =
941                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
942         if (!lock)
943                 return lock;
944         lock->offset = offset;
945         lock->length = length;
946         lock->type = type;
947         lock->pid = current->tgid;
948         lock->flags = flags;
949         INIT_LIST_HEAD(&lock->blist);
950         init_waitqueue_head(&lock->block_q);
951         return lock;
952 }
953
954 void
955 cifs_del_lock_waiters(struct cifsLockInfo *lock)
956 {
957         struct cifsLockInfo *li, *tmp;
958         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
959                 list_del_init(&li->blist);
960                 wake_up(&li->block_q);
961         }
962 }
963
964 #define CIFS_LOCK_OP    0
965 #define CIFS_READ_OP    1
966 #define CIFS_WRITE_OP   2
967
968 /* @rw_check : 0 - no op, 1 - read, 2 - write */
969 static bool
970 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
971                             __u64 length, __u8 type, __u16 flags,
972                             struct cifsFileInfo *cfile,
973                             struct cifsLockInfo **conf_lock, int rw_check)
974 {
975         struct cifsLockInfo *li;
976         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
977         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
978
979         list_for_each_entry(li, &fdlocks->locks, llist) {
980                 if (offset + length <= li->offset ||
981                     offset >= li->offset + li->length)
982                         continue;
983                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
984                     server->ops->compare_fids(cfile, cur_cfile)) {
985                         /* shared lock prevents write op through the same fid */
986                         if (!(li->type & server->vals->shared_lock_type) ||
987                             rw_check != CIFS_WRITE_OP)
988                                 continue;
989                 }
990                 if ((type & server->vals->shared_lock_type) &&
991                     ((server->ops->compare_fids(cfile, cur_cfile) &&
992                      current->tgid == li->pid) || type == li->type))
993                         continue;
994                 if (rw_check == CIFS_LOCK_OP &&
995                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
996                     server->ops->compare_fids(cfile, cur_cfile))
997                         continue;
998                 if (conf_lock)
999                         *conf_lock = li;
1000                 return true;
1001         }
1002         return false;
1003 }
1004
1005 bool
1006 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1007                         __u8 type, __u16 flags,
1008                         struct cifsLockInfo **conf_lock, int rw_check)
1009 {
1010         bool rc = false;
1011         struct cifs_fid_locks *cur;
1012         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1013
1014         list_for_each_entry(cur, &cinode->llist, llist) {
1015                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1016                                                  flags, cfile, conf_lock,
1017                                                  rw_check);
1018                 if (rc)
1019                         break;
1020         }
1021
1022         return rc;
1023 }
1024
1025 /*
1026  * Check if there is another lock that prevents us to set the lock (mandatory
1027  * style). If such a lock exists, update the flock structure with its
1028  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1029  * or leave it the same if we can't. Returns 0 if we don't need to request to
1030  * the server or 1 otherwise.
1031  */
1032 static int
1033 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1034                __u8 type, struct file_lock *flock)
1035 {
1036         int rc = 0;
1037         struct cifsLockInfo *conf_lock;
1038         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1039         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1040         bool exist;
1041
1042         down_read(&cinode->lock_sem);
1043
1044         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1045                                         flock->fl_flags, &conf_lock,
1046                                         CIFS_LOCK_OP);
1047         if (exist) {
1048                 flock->fl_start = conf_lock->offset;
1049                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1050                 flock->fl_pid = conf_lock->pid;
1051                 if (conf_lock->type & server->vals->shared_lock_type)
1052                         flock->fl_type = F_RDLCK;
1053                 else
1054                         flock->fl_type = F_WRLCK;
1055         } else if (!cinode->can_cache_brlcks)
1056                 rc = 1;
1057         else
1058                 flock->fl_type = F_UNLCK;
1059
1060         up_read(&cinode->lock_sem);
1061         return rc;
1062 }
1063
1064 static void
1065 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1066 {
1067         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1068         cifs_down_write(&cinode->lock_sem);
1069         list_add_tail(&lock->llist, &cfile->llist->locks);
1070         up_write(&cinode->lock_sem);
1071 }
1072
1073 /*
1074  * Set the byte-range lock (mandatory style). Returns:
1075  * 1) 0, if we set the lock and don't need to request to the server;
1076  * 2) 1, if no locks prevent us but we need to request to the server;
1077  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1078  */
1079 static int
1080 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1081                  bool wait)
1082 {
1083         struct cifsLockInfo *conf_lock;
1084         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1085         bool exist;
1086         int rc = 0;
1087
1088 try_again:
1089         exist = false;
1090         cifs_down_write(&cinode->lock_sem);
1091
1092         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1093                                         lock->type, lock->flags, &conf_lock,
1094                                         CIFS_LOCK_OP);
1095         if (!exist && cinode->can_cache_brlcks) {
1096                 list_add_tail(&lock->llist, &cfile->llist->locks);
1097                 up_write(&cinode->lock_sem);
1098                 return rc;
1099         }
1100
1101         if (!exist)
1102                 rc = 1;
1103         else if (!wait)
1104                 rc = -EACCES;
1105         else {
1106                 list_add_tail(&lock->blist, &conf_lock->blist);
1107                 up_write(&cinode->lock_sem);
1108                 rc = wait_event_interruptible(lock->block_q,
1109                                         (lock->blist.prev == &lock->blist) &&
1110                                         (lock->blist.next == &lock->blist));
1111                 if (!rc)
1112                         goto try_again;
1113                 cifs_down_write(&cinode->lock_sem);
1114                 list_del_init(&lock->blist);
1115         }
1116
1117         up_write(&cinode->lock_sem);
1118         return rc;
1119 }
1120
1121 /*
1122  * Check if there is another lock that prevents us to set the lock (posix
1123  * style). If such a lock exists, update the flock structure with its
1124  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1125  * or leave it the same if we can't. Returns 0 if we don't need to request to
1126  * the server or 1 otherwise.
1127  */
1128 static int
1129 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1130 {
1131         int rc = 0;
1132         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1133         unsigned char saved_type = flock->fl_type;
1134
1135         if ((flock->fl_flags & FL_POSIX) == 0)
1136                 return 1;
1137
1138         down_read(&cinode->lock_sem);
1139         posix_test_lock(file, flock);
1140
1141         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1142                 flock->fl_type = saved_type;
1143                 rc = 1;
1144         }
1145
1146         up_read(&cinode->lock_sem);
1147         return rc;
1148 }
1149
1150 /*
1151  * Set the byte-range lock (posix style). Returns:
1152  * 1) 0, if we set the lock and don't need to request to the server;
1153  * 2) 1, if we need to request to the server;
1154  * 3) <0, if the error occurs while setting the lock.
1155  */
1156 static int
1157 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1158 {
1159         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1160         int rc = 1;
1161
1162         if ((flock->fl_flags & FL_POSIX) == 0)
1163                 return rc;
1164
1165 try_again:
1166         cifs_down_write(&cinode->lock_sem);
1167         if (!cinode->can_cache_brlcks) {
1168                 up_write(&cinode->lock_sem);
1169                 return rc;
1170         }
1171
1172         rc = posix_lock_file(file, flock, NULL);
1173         up_write(&cinode->lock_sem);
1174         if (rc == FILE_LOCK_DEFERRED) {
1175                 rc = wait_event_interruptible(flock->fl_wait,
1176                                         list_empty(&flock->fl_blocked_member));
1177                 if (!rc)
1178                         goto try_again;
1179                 locks_delete_block(flock);
1180         }
1181         return rc;
1182 }
1183
1184 int
1185 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1186 {
1187         unsigned int xid;
1188         int rc = 0, stored_rc;
1189         struct cifsLockInfo *li, *tmp;
1190         struct cifs_tcon *tcon;
1191         unsigned int num, max_num, max_buf;
1192         LOCKING_ANDX_RANGE *buf, *cur;
1193         static const int types[] = {
1194                 LOCKING_ANDX_LARGE_FILES,
1195                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1196         };
1197         int i;
1198
1199         xid = get_xid();
1200         tcon = tlink_tcon(cfile->tlink);
1201
1202         /*
1203          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1204          * and check it before using.
1205          */
1206         max_buf = tcon->ses->server->maxBuf;
1207         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1208                 free_xid(xid);
1209                 return -EINVAL;
1210         }
1211
1212         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1213                      PAGE_SIZE);
1214         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1215                         PAGE_SIZE);
1216         max_num = (max_buf - sizeof(struct smb_hdr)) /
1217                                                 sizeof(LOCKING_ANDX_RANGE);
1218         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1219         if (!buf) {
1220                 free_xid(xid);
1221                 return -ENOMEM;
1222         }
1223
1224         for (i = 0; i < 2; i++) {
1225                 cur = buf;
1226                 num = 0;
1227                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1228                         if (li->type != types[i])
1229                                 continue;
1230                         cur->Pid = cpu_to_le16(li->pid);
1231                         cur->LengthLow = cpu_to_le32((u32)li->length);
1232                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1233                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1234                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1235                         if (++num == max_num) {
1236                                 stored_rc = cifs_lockv(xid, tcon,
1237                                                        cfile->fid.netfid,
1238                                                        (__u8)li->type, 0, num,
1239                                                        buf);
1240                                 if (stored_rc)
1241                                         rc = stored_rc;
1242                                 cur = buf;
1243                                 num = 0;
1244                         } else
1245                                 cur++;
1246                 }
1247
1248                 if (num) {
1249                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1250                                                (__u8)types[i], 0, num, buf);
1251                         if (stored_rc)
1252                                 rc = stored_rc;
1253                 }
1254         }
1255
1256         kfree(buf);
1257         free_xid(xid);
1258         return rc;
1259 }
1260
1261 static __u32
1262 hash_lockowner(fl_owner_t owner)
1263 {
1264         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1265 }
1266
1267 struct lock_to_push {
1268         struct list_head llist;
1269         __u64 offset;
1270         __u64 length;
1271         __u32 pid;
1272         __u16 netfid;
1273         __u8 type;
1274 };
1275
1276 static int
1277 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1278 {
1279         struct inode *inode = d_inode(cfile->dentry);
1280         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1281         struct file_lock *flock;
1282         struct file_lock_context *flctx = inode->i_flctx;
1283         unsigned int count = 0, i;
1284         int rc = 0, xid, type;
1285         struct list_head locks_to_send, *el;
1286         struct lock_to_push *lck, *tmp;
1287         __u64 length;
1288
1289         xid = get_xid();
1290
1291         if (!flctx)
1292                 goto out;
1293
1294         spin_lock(&flctx->flc_lock);
1295         list_for_each(el, &flctx->flc_posix) {
1296                 count++;
1297         }
1298         spin_unlock(&flctx->flc_lock);
1299
1300         INIT_LIST_HEAD(&locks_to_send);
1301
1302         /*
1303          * Allocating count locks is enough because no FL_POSIX locks can be
1304          * added to the list while we are holding cinode->lock_sem that
1305          * protects locking operations of this inode.
1306          */
1307         for (i = 0; i < count; i++) {
1308                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1309                 if (!lck) {
1310                         rc = -ENOMEM;
1311                         goto err_out;
1312                 }
1313                 list_add_tail(&lck->llist, &locks_to_send);
1314         }
1315
1316         el = locks_to_send.next;
1317         spin_lock(&flctx->flc_lock);
1318         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1319                 if (el == &locks_to_send) {
1320                         /*
1321                          * The list ended. We don't have enough allocated
1322                          * structures - something is really wrong.
1323                          */
1324                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1325                         break;
1326                 }
1327                 length = 1 + flock->fl_end - flock->fl_start;
1328                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1329                         type = CIFS_RDLCK;
1330                 else
1331                         type = CIFS_WRLCK;
1332                 lck = list_entry(el, struct lock_to_push, llist);
1333                 lck->pid = hash_lockowner(flock->fl_owner);
1334                 lck->netfid = cfile->fid.netfid;
1335                 lck->length = length;
1336                 lck->type = type;
1337                 lck->offset = flock->fl_start;
1338         }
1339         spin_unlock(&flctx->flc_lock);
1340
1341         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1342                 int stored_rc;
1343
1344                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1345                                              lck->offset, lck->length, NULL,
1346                                              lck->type, 0);
1347                 if (stored_rc)
1348                         rc = stored_rc;
1349                 list_del(&lck->llist);
1350                 kfree(lck);
1351         }
1352
1353 out:
1354         free_xid(xid);
1355         return rc;
1356 err_out:
1357         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1358                 list_del(&lck->llist);
1359                 kfree(lck);
1360         }
1361         goto out;
1362 }
1363
1364 static int
1365 cifs_push_locks(struct cifsFileInfo *cfile)
1366 {
1367         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1368         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1369         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1370         int rc = 0;
1371
1372         /* we are going to update can_cache_brlcks here - need a write access */
1373         cifs_down_write(&cinode->lock_sem);
1374         if (!cinode->can_cache_brlcks) {
1375                 up_write(&cinode->lock_sem);
1376                 return rc;
1377         }
1378
1379         if (cap_unix(tcon->ses) &&
1380             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1381             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1382                 rc = cifs_push_posix_locks(cfile);
1383         else
1384                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1385
1386         cinode->can_cache_brlcks = false;
1387         up_write(&cinode->lock_sem);
1388         return rc;
1389 }
1390
1391 static void
1392 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1393                 bool *wait_flag, struct TCP_Server_Info *server)
1394 {
1395         if (flock->fl_flags & FL_POSIX)
1396                 cifs_dbg(FYI, "Posix\n");
1397         if (flock->fl_flags & FL_FLOCK)
1398                 cifs_dbg(FYI, "Flock\n");
1399         if (flock->fl_flags & FL_SLEEP) {
1400                 cifs_dbg(FYI, "Blocking lock\n");
1401                 *wait_flag = true;
1402         }
1403         if (flock->fl_flags & FL_ACCESS)
1404                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1405         if (flock->fl_flags & FL_LEASE)
1406                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1407         if (flock->fl_flags &
1408             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1409                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1410                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1411
1412         *type = server->vals->large_lock_type;
1413         if (flock->fl_type == F_WRLCK) {
1414                 cifs_dbg(FYI, "F_WRLCK\n");
1415                 *type |= server->vals->exclusive_lock_type;
1416                 *lock = 1;
1417         } else if (flock->fl_type == F_UNLCK) {
1418                 cifs_dbg(FYI, "F_UNLCK\n");
1419                 *type |= server->vals->unlock_lock_type;
1420                 *unlock = 1;
1421                 /* Check if unlock includes more than one lock range */
1422         } else if (flock->fl_type == F_RDLCK) {
1423                 cifs_dbg(FYI, "F_RDLCK\n");
1424                 *type |= server->vals->shared_lock_type;
1425                 *lock = 1;
1426         } else if (flock->fl_type == F_EXLCK) {
1427                 cifs_dbg(FYI, "F_EXLCK\n");
1428                 *type |= server->vals->exclusive_lock_type;
1429                 *lock = 1;
1430         } else if (flock->fl_type == F_SHLCK) {
1431                 cifs_dbg(FYI, "F_SHLCK\n");
1432                 *type |= server->vals->shared_lock_type;
1433                 *lock = 1;
1434         } else
1435                 cifs_dbg(FYI, "Unknown type of lock\n");
1436 }
1437
1438 static int
1439 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1440            bool wait_flag, bool posix_lck, unsigned int xid)
1441 {
1442         int rc = 0;
1443         __u64 length = 1 + flock->fl_end - flock->fl_start;
1444         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1445         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1446         struct TCP_Server_Info *server = tcon->ses->server;
1447         __u16 netfid = cfile->fid.netfid;
1448
1449         if (posix_lck) {
1450                 int posix_lock_type;
1451
1452                 rc = cifs_posix_lock_test(file, flock);
1453                 if (!rc)
1454                         return rc;
1455
1456                 if (type & server->vals->shared_lock_type)
1457                         posix_lock_type = CIFS_RDLCK;
1458                 else
1459                         posix_lock_type = CIFS_WRLCK;
1460                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1461                                       hash_lockowner(flock->fl_owner),
1462                                       flock->fl_start, length, flock,
1463                                       posix_lock_type, wait_flag);
1464                 return rc;
1465         }
1466
1467         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1468         if (!rc)
1469                 return rc;
1470
1471         /* BB we could chain these into one lock request BB */
1472         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1473                                     1, 0, false);
1474         if (rc == 0) {
1475                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1476                                             type, 0, 1, false);
1477                 flock->fl_type = F_UNLCK;
1478                 if (rc != 0)
1479                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1480                                  rc);
1481                 return 0;
1482         }
1483
1484         if (type & server->vals->shared_lock_type) {
1485                 flock->fl_type = F_WRLCK;
1486                 return 0;
1487         }
1488
1489         type &= ~server->vals->exclusive_lock_type;
1490
1491         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1492                                     type | server->vals->shared_lock_type,
1493                                     1, 0, false);
1494         if (rc == 0) {
1495                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1496                         type | server->vals->shared_lock_type, 0, 1, false);
1497                 flock->fl_type = F_RDLCK;
1498                 if (rc != 0)
1499                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1500                                  rc);
1501         } else
1502                 flock->fl_type = F_WRLCK;
1503
1504         return 0;
1505 }
1506
1507 void
1508 cifs_move_llist(struct list_head *source, struct list_head *dest)
1509 {
1510         struct list_head *li, *tmp;
1511         list_for_each_safe(li, tmp, source)
1512                 list_move(li, dest);
1513 }
1514
1515 void
1516 cifs_free_llist(struct list_head *llist)
1517 {
1518         struct cifsLockInfo *li, *tmp;
1519         list_for_each_entry_safe(li, tmp, llist, llist) {
1520                 cifs_del_lock_waiters(li);
1521                 list_del(&li->llist);
1522                 kfree(li);
1523         }
1524 }
1525
1526 int
1527 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1528                   unsigned int xid)
1529 {
1530         int rc = 0, stored_rc;
1531         static const int types[] = {
1532                 LOCKING_ANDX_LARGE_FILES,
1533                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1534         };
1535         unsigned int i;
1536         unsigned int max_num, num, max_buf;
1537         LOCKING_ANDX_RANGE *buf, *cur;
1538         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1539         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1540         struct cifsLockInfo *li, *tmp;
1541         __u64 length = 1 + flock->fl_end - flock->fl_start;
1542         struct list_head tmp_llist;
1543
1544         INIT_LIST_HEAD(&tmp_llist);
1545
1546         /*
1547          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1548          * and check it before using.
1549          */
1550         max_buf = tcon->ses->server->maxBuf;
1551         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1552                 return -EINVAL;
1553
1554         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1555                      PAGE_SIZE);
1556         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1557                         PAGE_SIZE);
1558         max_num = (max_buf - sizeof(struct smb_hdr)) /
1559                                                 sizeof(LOCKING_ANDX_RANGE);
1560         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1561         if (!buf)
1562                 return -ENOMEM;
1563
1564         cifs_down_write(&cinode->lock_sem);
1565         for (i = 0; i < 2; i++) {
1566                 cur = buf;
1567                 num = 0;
1568                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1569                         if (flock->fl_start > li->offset ||
1570                             (flock->fl_start + length) <
1571                             (li->offset + li->length))
1572                                 continue;
1573                         if (current->tgid != li->pid)
1574                                 continue;
1575                         if (types[i] != li->type)
1576                                 continue;
1577                         if (cinode->can_cache_brlcks) {
1578                                 /*
1579                                  * We can cache brlock requests - simply remove
1580                                  * a lock from the file's list.
1581                                  */
1582                                 list_del(&li->llist);
1583                                 cifs_del_lock_waiters(li);
1584                                 kfree(li);
1585                                 continue;
1586                         }
1587                         cur->Pid = cpu_to_le16(li->pid);
1588                         cur->LengthLow = cpu_to_le32((u32)li->length);
1589                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1590                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1591                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1592                         /*
1593                          * We need to save a lock here to let us add it again to
1594                          * the file's list if the unlock range request fails on
1595                          * the server.
1596                          */
1597                         list_move(&li->llist, &tmp_llist);
1598                         if (++num == max_num) {
1599                                 stored_rc = cifs_lockv(xid, tcon,
1600                                                        cfile->fid.netfid,
1601                                                        li->type, num, 0, buf);
1602                                 if (stored_rc) {
1603                                         /*
1604                                          * We failed on the unlock range
1605                                          * request - add all locks from the tmp
1606                                          * list to the head of the file's list.
1607                                          */
1608                                         cifs_move_llist(&tmp_llist,
1609                                                         &cfile->llist->locks);
1610                                         rc = stored_rc;
1611                                 } else
1612                                         /*
1613                                          * The unlock range request succeed -
1614                                          * free the tmp list.
1615                                          */
1616                                         cifs_free_llist(&tmp_llist);
1617                                 cur = buf;
1618                                 num = 0;
1619                         } else
1620                                 cur++;
1621                 }
1622                 if (num) {
1623                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1624                                                types[i], num, 0, buf);
1625                         if (stored_rc) {
1626                                 cifs_move_llist(&tmp_llist,
1627                                                 &cfile->llist->locks);
1628                                 rc = stored_rc;
1629                         } else
1630                                 cifs_free_llist(&tmp_llist);
1631                 }
1632         }
1633
1634         up_write(&cinode->lock_sem);
1635         kfree(buf);
1636         return rc;
1637 }
1638
1639 static int
1640 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1641            bool wait_flag, bool posix_lck, int lock, int unlock,
1642            unsigned int xid)
1643 {
1644         int rc = 0;
1645         __u64 length = 1 + flock->fl_end - flock->fl_start;
1646         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1647         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1648         struct TCP_Server_Info *server = tcon->ses->server;
1649         struct inode *inode = d_inode(cfile->dentry);
1650
1651         if (posix_lck) {
1652                 int posix_lock_type;
1653
1654                 rc = cifs_posix_lock_set(file, flock);
1655                 if (!rc || rc < 0)
1656                         return rc;
1657
1658                 if (type & server->vals->shared_lock_type)
1659                         posix_lock_type = CIFS_RDLCK;
1660                 else
1661                         posix_lock_type = CIFS_WRLCK;
1662
1663                 if (unlock == 1)
1664                         posix_lock_type = CIFS_UNLCK;
1665
1666                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1667                                       hash_lockowner(flock->fl_owner),
1668                                       flock->fl_start, length,
1669                                       NULL, posix_lock_type, wait_flag);
1670                 goto out;
1671         }
1672
1673         if (lock) {
1674                 struct cifsLockInfo *lock;
1675
1676                 lock = cifs_lock_init(flock->fl_start, length, type,
1677                                       flock->fl_flags);
1678                 if (!lock)
1679                         return -ENOMEM;
1680
1681                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1682                 if (rc < 0) {
1683                         kfree(lock);
1684                         return rc;
1685                 }
1686                 if (!rc)
1687                         goto out;
1688
1689                 /*
1690                  * Windows 7 server can delay breaking lease from read to None
1691                  * if we set a byte-range lock on a file - break it explicitly
1692                  * before sending the lock to the server to be sure the next
1693                  * read won't conflict with non-overlapted locks due to
1694                  * pagereading.
1695                  */
1696                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1697                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1698                         cifs_zap_mapping(inode);
1699                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1700                                  inode);
1701                         CIFS_I(inode)->oplock = 0;
1702                 }
1703
1704                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1705                                             type, 1, 0, wait_flag);
1706                 if (rc) {
1707                         kfree(lock);
1708                         return rc;
1709                 }
1710
1711                 cifs_lock_add(cfile, lock);
1712         } else if (unlock)
1713                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1714
1715 out:
1716         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1717                 /*
1718                  * If this is a request to remove all locks because we
1719                  * are closing the file, it doesn't matter if the
1720                  * unlocking failed as both cifs.ko and the SMB server
1721                  * remove the lock on file close
1722                  */
1723                 if (rc) {
1724                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1725                         if (!(flock->fl_flags & FL_CLOSE))
1726                                 return rc;
1727                 }
1728                 rc = locks_lock_file_wait(file, flock);
1729         }
1730         return rc;
1731 }
1732
1733 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1734 {
1735         int rc, xid;
1736         int lock = 0, unlock = 0;
1737         bool wait_flag = false;
1738         bool posix_lck = false;
1739         struct cifs_sb_info *cifs_sb;
1740         struct cifs_tcon *tcon;
1741         struct cifsFileInfo *cfile;
1742         __u32 type;
1743
1744         rc = -EACCES;
1745         xid = get_xid();
1746
1747         if (!(fl->fl_flags & FL_FLOCK))
1748                 return -ENOLCK;
1749
1750         cfile = (struct cifsFileInfo *)file->private_data;
1751         tcon = tlink_tcon(cfile->tlink);
1752
1753         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1754                         tcon->ses->server);
1755         cifs_sb = CIFS_FILE_SB(file);
1756
1757         if (cap_unix(tcon->ses) &&
1758             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1759             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1760                 posix_lck = true;
1761
1762         if (!lock && !unlock) {
1763                 /*
1764                  * if no lock or unlock then nothing to do since we do not
1765                  * know what it is
1766                  */
1767                 free_xid(xid);
1768                 return -EOPNOTSUPP;
1769         }
1770
1771         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1772                         xid);
1773         free_xid(xid);
1774         return rc;
1775
1776
1777 }
1778
1779 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1780 {
1781         int rc, xid;
1782         int lock = 0, unlock = 0;
1783         bool wait_flag = false;
1784         bool posix_lck = false;
1785         struct cifs_sb_info *cifs_sb;
1786         struct cifs_tcon *tcon;
1787         struct cifsFileInfo *cfile;
1788         __u32 type;
1789
1790         rc = -EACCES;
1791         xid = get_xid();
1792
1793         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1794                  cmd, flock->fl_flags, flock->fl_type,
1795                  flock->fl_start, flock->fl_end);
1796
1797         cfile = (struct cifsFileInfo *)file->private_data;
1798         tcon = tlink_tcon(cfile->tlink);
1799
1800         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1801                         tcon->ses->server);
1802         cifs_sb = CIFS_FILE_SB(file);
1803
1804         if (cap_unix(tcon->ses) &&
1805             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1806             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1807                 posix_lck = true;
1808         /*
1809          * BB add code here to normalize offset and length to account for
1810          * negative length which we can not accept over the wire.
1811          */
1812         if (IS_GETLK(cmd)) {
1813                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1814                 free_xid(xid);
1815                 return rc;
1816         }
1817
1818         if (!lock && !unlock) {
1819                 /*
1820                  * if no lock or unlock then nothing to do since we do not
1821                  * know what it is
1822                  */
1823                 free_xid(xid);
1824                 return -EOPNOTSUPP;
1825         }
1826
1827         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1828                         xid);
1829         free_xid(xid);
1830         return rc;
1831 }
1832
1833 /*
1834  * update the file size (if needed) after a write. Should be called with
1835  * the inode->i_lock held
1836  */
1837 void
1838 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1839                       unsigned int bytes_written)
1840 {
1841         loff_t end_of_write = offset + bytes_written;
1842
1843         if (end_of_write > cifsi->server_eof)
1844                 cifsi->server_eof = end_of_write;
1845 }
1846
1847 static ssize_t
1848 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1849            size_t write_size, loff_t *offset)
1850 {
1851         int rc = 0;
1852         unsigned int bytes_written = 0;
1853         unsigned int total_written;
1854         struct cifs_tcon *tcon;
1855         struct TCP_Server_Info *server;
1856         unsigned int xid;
1857         struct dentry *dentry = open_file->dentry;
1858         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1859         struct cifs_io_parms io_parms = {0};
1860
1861         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1862                  write_size, *offset, dentry);
1863
1864         tcon = tlink_tcon(open_file->tlink);
1865         server = tcon->ses->server;
1866
1867         if (!server->ops->sync_write)
1868                 return -ENOSYS;
1869
1870         xid = get_xid();
1871
1872         for (total_written = 0; write_size > total_written;
1873              total_written += bytes_written) {
1874                 rc = -EAGAIN;
1875                 while (rc == -EAGAIN) {
1876                         struct kvec iov[2];
1877                         unsigned int len;
1878
1879                         if (open_file->invalidHandle) {
1880                                 /* we could deadlock if we called
1881                                    filemap_fdatawait from here so tell
1882                                    reopen_file not to flush data to
1883                                    server now */
1884                                 rc = cifs_reopen_file(open_file, false);
1885                                 if (rc != 0)
1886                                         break;
1887                         }
1888
1889                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1890                                   (unsigned int)write_size - total_written);
1891                         /* iov[0] is reserved for smb header */
1892                         iov[1].iov_base = (char *)write_data + total_written;
1893                         iov[1].iov_len = len;
1894                         io_parms.pid = pid;
1895                         io_parms.tcon = tcon;
1896                         io_parms.offset = *offset;
1897                         io_parms.length = len;
1898                         rc = server->ops->sync_write(xid, &open_file->fid,
1899                                         &io_parms, &bytes_written, iov, 1);
1900                 }
1901                 if (rc || (bytes_written == 0)) {
1902                         if (total_written)
1903                                 break;
1904                         else {
1905                                 free_xid(xid);
1906                                 return rc;
1907                         }
1908                 } else {
1909                         spin_lock(&d_inode(dentry)->i_lock);
1910                         cifs_update_eof(cifsi, *offset, bytes_written);
1911                         spin_unlock(&d_inode(dentry)->i_lock);
1912                         *offset += bytes_written;
1913                 }
1914         }
1915
1916         cifs_stats_bytes_written(tcon, total_written);
1917
1918         if (total_written > 0) {
1919                 spin_lock(&d_inode(dentry)->i_lock);
1920                 if (*offset > d_inode(dentry)->i_size)
1921                         i_size_write(d_inode(dentry), *offset);
1922                 spin_unlock(&d_inode(dentry)->i_lock);
1923         }
1924         mark_inode_dirty_sync(d_inode(dentry));
1925         free_xid(xid);
1926         return total_written;
1927 }
1928
1929 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1930                                         bool fsuid_only)
1931 {
1932         struct cifsFileInfo *open_file = NULL;
1933         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1934
1935         /* only filter by fsuid on multiuser mounts */
1936         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1937                 fsuid_only = false;
1938
1939         spin_lock(&cifs_inode->open_file_lock);
1940         /* we could simply get the first_list_entry since write-only entries
1941            are always at the end of the list but since the first entry might
1942            have a close pending, we go through the whole list */
1943         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1944                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1945                         continue;
1946                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1947                         if (!open_file->invalidHandle) {
1948                                 /* found a good file */
1949                                 /* lock it so it will not be closed on us */
1950                                 cifsFileInfo_get(open_file);
1951                                 spin_unlock(&cifs_inode->open_file_lock);
1952                                 return open_file;
1953                         } /* else might as well continue, and look for
1954                              another, or simply have the caller reopen it
1955                              again rather than trying to fix this handle */
1956                 } else /* write only file */
1957                         break; /* write only files are last so must be done */
1958         }
1959         spin_unlock(&cifs_inode->open_file_lock);
1960         return NULL;
1961 }
1962
1963 /* Return -EBADF if no handle is found and general rc otherwise */
1964 int
1965 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
1966                        struct cifsFileInfo **ret_file)
1967 {
1968         struct cifsFileInfo *open_file, *inv_file = NULL;
1969         struct cifs_sb_info *cifs_sb;
1970         bool any_available = false;
1971         int rc = -EBADF;
1972         unsigned int refind = 0;
1973         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
1974         bool with_delete = flags & FIND_WR_WITH_DELETE;
1975         *ret_file = NULL;
1976
1977         /*
1978          * Having a null inode here (because mapping->host was set to zero by
1979          * the VFS or MM) should not happen but we had reports of on oops (due
1980          * to it being zero) during stress testcases so we need to check for it
1981          */
1982
1983         if (cifs_inode == NULL) {
1984                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1985                 dump_stack();
1986                 return rc;
1987         }
1988
1989         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1990
1991         /* only filter by fsuid on multiuser mounts */
1992         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1993                 fsuid_only = false;
1994
1995         spin_lock(&cifs_inode->open_file_lock);
1996 refind_writable:
1997         if (refind > MAX_REOPEN_ATT) {
1998                 spin_unlock(&cifs_inode->open_file_lock);
1999                 return rc;
2000         }
2001         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2002                 if (!any_available && open_file->pid != current->tgid)
2003                         continue;
2004                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2005                         continue;
2006                 if (with_delete && !(open_file->fid.access & DELETE))
2007                         continue;
2008                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2009                         if (!open_file->invalidHandle) {
2010                                 /* found a good writable file */
2011                                 cifsFileInfo_get(open_file);
2012                                 spin_unlock(&cifs_inode->open_file_lock);
2013                                 *ret_file = open_file;
2014                                 return 0;
2015                         } else {
2016                                 if (!inv_file)
2017                                         inv_file = open_file;
2018                         }
2019                 }
2020         }
2021         /* couldn't find useable FH with same pid, try any available */
2022         if (!any_available) {
2023                 any_available = true;
2024                 goto refind_writable;
2025         }
2026
2027         if (inv_file) {
2028                 any_available = false;
2029                 cifsFileInfo_get(inv_file);
2030         }
2031
2032         spin_unlock(&cifs_inode->open_file_lock);
2033
2034         if (inv_file) {
2035                 rc = cifs_reopen_file(inv_file, false);
2036                 if (!rc) {
2037                         *ret_file = inv_file;
2038                         return 0;
2039                 }
2040
2041                 spin_lock(&cifs_inode->open_file_lock);
2042                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2043                 spin_unlock(&cifs_inode->open_file_lock);
2044                 cifsFileInfo_put(inv_file);
2045                 ++refind;
2046                 inv_file = NULL;
2047                 spin_lock(&cifs_inode->open_file_lock);
2048                 goto refind_writable;
2049         }
2050
2051         return rc;
2052 }
2053
2054 struct cifsFileInfo *
2055 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2056 {
2057         struct cifsFileInfo *cfile;
2058         int rc;
2059
2060         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2061         if (rc)
2062                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2063
2064         return cfile;
2065 }
2066
2067 int
2068 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2069                        int flags,
2070                        struct cifsFileInfo **ret_file)
2071 {
2072         struct list_head *tmp;
2073         struct cifsFileInfo *cfile;
2074         struct cifsInodeInfo *cinode;
2075         char *full_path;
2076
2077         *ret_file = NULL;
2078
2079         spin_lock(&tcon->open_file_lock);
2080         list_for_each(tmp, &tcon->openFileList) {
2081                 cfile = list_entry(tmp, struct cifsFileInfo,
2082                              tlist);
2083                 full_path = build_path_from_dentry(cfile->dentry);
2084                 if (full_path == NULL) {
2085                         spin_unlock(&tcon->open_file_lock);
2086                         return -ENOMEM;
2087                 }
2088                 if (strcmp(full_path, name)) {
2089                         kfree(full_path);
2090                         continue;
2091                 }
2092
2093                 kfree(full_path);
2094                 cinode = CIFS_I(d_inode(cfile->dentry));
2095                 spin_unlock(&tcon->open_file_lock);
2096                 return cifs_get_writable_file(cinode, flags, ret_file);
2097         }
2098
2099         spin_unlock(&tcon->open_file_lock);
2100         return -ENOENT;
2101 }
2102
2103 int
2104 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2105                        struct cifsFileInfo **ret_file)
2106 {
2107         struct list_head *tmp;
2108         struct cifsFileInfo *cfile;
2109         struct cifsInodeInfo *cinode;
2110         char *full_path;
2111
2112         *ret_file = NULL;
2113
2114         spin_lock(&tcon->open_file_lock);
2115         list_for_each(tmp, &tcon->openFileList) {
2116                 cfile = list_entry(tmp, struct cifsFileInfo,
2117                              tlist);
2118                 full_path = build_path_from_dentry(cfile->dentry);
2119                 if (full_path == NULL) {
2120                         spin_unlock(&tcon->open_file_lock);
2121                         return -ENOMEM;
2122                 }
2123                 if (strcmp(full_path, name)) {
2124                         kfree(full_path);
2125                         continue;
2126                 }
2127
2128                 kfree(full_path);
2129                 cinode = CIFS_I(d_inode(cfile->dentry));
2130                 spin_unlock(&tcon->open_file_lock);
2131                 *ret_file = find_readable_file(cinode, 0);
2132                 return *ret_file ? 0 : -ENOENT;
2133         }
2134
2135         spin_unlock(&tcon->open_file_lock);
2136         return -ENOENT;
2137 }
2138
2139 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2140 {
2141         struct address_space *mapping = page->mapping;
2142         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2143         char *write_data;
2144         int rc = -EFAULT;
2145         int bytes_written = 0;
2146         struct inode *inode;
2147         struct cifsFileInfo *open_file;
2148
2149         if (!mapping || !mapping->host)
2150                 return -EFAULT;
2151
2152         inode = page->mapping->host;
2153
2154         offset += (loff_t)from;
2155         write_data = kmap(page);
2156         write_data += from;
2157
2158         if ((to > PAGE_SIZE) || (from > to)) {
2159                 kunmap(page);
2160                 return -EIO;
2161         }
2162
2163         /* racing with truncate? */
2164         if (offset > mapping->host->i_size) {
2165                 kunmap(page);
2166                 return 0; /* don't care */
2167         }
2168
2169         /* check to make sure that we are not extending the file */
2170         if (mapping->host->i_size - offset < (loff_t)to)
2171                 to = (unsigned)(mapping->host->i_size - offset);
2172
2173         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2174                                     &open_file);
2175         if (!rc) {
2176                 bytes_written = cifs_write(open_file, open_file->pid,
2177                                            write_data, to - from, &offset);
2178                 cifsFileInfo_put(open_file);
2179                 /* Does mm or vfs already set times? */
2180                 inode->i_atime = inode->i_mtime = current_time(inode);
2181                 if ((bytes_written > 0) && (offset))
2182                         rc = 0;
2183                 else if (bytes_written < 0)
2184                         rc = bytes_written;
2185                 else
2186                         rc = -EFAULT;
2187         } else {
2188                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2189                 if (!is_retryable_error(rc))
2190                         rc = -EIO;
2191         }
2192
2193         kunmap(page);
2194         return rc;
2195 }
2196
2197 static struct cifs_writedata *
2198 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2199                           pgoff_t end, pgoff_t *index,
2200                           unsigned int *found_pages)
2201 {
2202         struct cifs_writedata *wdata;
2203
2204         wdata = cifs_writedata_alloc((unsigned int)tofind,
2205                                      cifs_writev_complete);
2206         if (!wdata)
2207                 return NULL;
2208
2209         *found_pages = find_get_pages_range_tag(mapping, index, end,
2210                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2211         return wdata;
2212 }
2213
2214 static unsigned int
2215 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2216                     struct address_space *mapping,
2217                     struct writeback_control *wbc,
2218                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2219 {
2220         unsigned int nr_pages = 0, i;
2221         struct page *page;
2222
2223         for (i = 0; i < found_pages; i++) {
2224                 page = wdata->pages[i];
2225                 /*
2226                  * At this point we hold neither the i_pages lock nor the
2227                  * page lock: the page may be truncated or invalidated
2228                  * (changing page->mapping to NULL), or even swizzled
2229                  * back from swapper_space to tmpfs file mapping
2230                  */
2231
2232                 if (nr_pages == 0)
2233                         lock_page(page);
2234                 else if (!trylock_page(page))
2235                         break;
2236
2237                 if (unlikely(page->mapping != mapping)) {
2238                         unlock_page(page);
2239                         break;
2240                 }
2241
2242                 if (!wbc->range_cyclic && page->index > end) {
2243                         *done = true;
2244                         unlock_page(page);
2245                         break;
2246                 }
2247
2248                 if (*next && (page->index != *next)) {
2249                         /* Not next consecutive page */
2250                         unlock_page(page);
2251                         break;
2252                 }
2253
2254                 if (wbc->sync_mode != WB_SYNC_NONE)
2255                         wait_on_page_writeback(page);
2256
2257                 if (PageWriteback(page) ||
2258                                 !clear_page_dirty_for_io(page)) {
2259                         unlock_page(page);
2260                         break;
2261                 }
2262
2263                 /*
2264                  * This actually clears the dirty bit in the radix tree.
2265                  * See cifs_writepage() for more commentary.
2266                  */
2267                 set_page_writeback(page);
2268                 if (page_offset(page) >= i_size_read(mapping->host)) {
2269                         *done = true;
2270                         unlock_page(page);
2271                         end_page_writeback(page);
2272                         break;
2273                 }
2274
2275                 wdata->pages[i] = page;
2276                 *next = page->index + 1;
2277                 ++nr_pages;
2278         }
2279
2280         /* reset index to refind any pages skipped */
2281         if (nr_pages == 0)
2282                 *index = wdata->pages[0]->index + 1;
2283
2284         /* put any pages we aren't going to use */
2285         for (i = nr_pages; i < found_pages; i++) {
2286                 put_page(wdata->pages[i]);
2287                 wdata->pages[i] = NULL;
2288         }
2289
2290         return nr_pages;
2291 }
2292
2293 static int
2294 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2295                  struct address_space *mapping, struct writeback_control *wbc)
2296 {
2297         int rc;
2298
2299         wdata->sync_mode = wbc->sync_mode;
2300         wdata->nr_pages = nr_pages;
2301         wdata->offset = page_offset(wdata->pages[0]);
2302         wdata->pagesz = PAGE_SIZE;
2303         wdata->tailsz = min(i_size_read(mapping->host) -
2304                         page_offset(wdata->pages[nr_pages - 1]),
2305                         (loff_t)PAGE_SIZE);
2306         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2307         wdata->pid = wdata->cfile->pid;
2308
2309         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2310         if (rc)
2311                 return rc;
2312
2313         if (wdata->cfile->invalidHandle)
2314                 rc = -EAGAIN;
2315         else
2316                 rc = wdata->server->ops->async_writev(wdata,
2317                                                       cifs_writedata_release);
2318
2319         return rc;
2320 }
2321
2322 static int cifs_writepages(struct address_space *mapping,
2323                            struct writeback_control *wbc)
2324 {
2325         struct inode *inode = mapping->host;
2326         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2327         struct TCP_Server_Info *server;
2328         bool done = false, scanned = false, range_whole = false;
2329         pgoff_t end, index;
2330         struct cifs_writedata *wdata;
2331         struct cifsFileInfo *cfile = NULL;
2332         int rc = 0;
2333         int saved_rc = 0;
2334         unsigned int xid;
2335
2336         /*
2337          * If wsize is smaller than the page cache size, default to writing
2338          * one page at a time via cifs_writepage
2339          */
2340         if (cifs_sb->wsize < PAGE_SIZE)
2341                 return generic_writepages(mapping, wbc);
2342
2343         xid = get_xid();
2344         if (wbc->range_cyclic) {
2345                 index = mapping->writeback_index; /* Start from prev offset */
2346                 end = -1;
2347         } else {
2348                 index = wbc->range_start >> PAGE_SHIFT;
2349                 end = wbc->range_end >> PAGE_SHIFT;
2350                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2351                         range_whole = true;
2352                 scanned = true;
2353         }
2354         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2355
2356 retry:
2357         while (!done && index <= end) {
2358                 unsigned int i, nr_pages, found_pages, wsize;
2359                 pgoff_t next = 0, tofind, saved_index = index;
2360                 struct cifs_credits credits_on_stack;
2361                 struct cifs_credits *credits = &credits_on_stack;
2362                 int get_file_rc = 0;
2363
2364                 if (cfile)
2365                         cifsFileInfo_put(cfile);
2366
2367                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2368
2369                 /* in case of an error store it to return later */
2370                 if (rc)
2371                         get_file_rc = rc;
2372
2373                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2374                                                    &wsize, credits);
2375                 if (rc != 0) {
2376                         done = true;
2377                         break;
2378                 }
2379
2380                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2381
2382                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2383                                                   &found_pages);
2384                 if (!wdata) {
2385                         rc = -ENOMEM;
2386                         done = true;
2387                         add_credits_and_wake_if(server, credits, 0);
2388                         break;
2389                 }
2390
2391                 if (found_pages == 0) {
2392                         kref_put(&wdata->refcount, cifs_writedata_release);
2393                         add_credits_and_wake_if(server, credits, 0);
2394                         break;
2395                 }
2396
2397                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2398                                                end, &index, &next, &done);
2399
2400                 /* nothing to write? */
2401                 if (nr_pages == 0) {
2402                         kref_put(&wdata->refcount, cifs_writedata_release);
2403                         add_credits_and_wake_if(server, credits, 0);
2404                         continue;
2405                 }
2406
2407                 wdata->credits = credits_on_stack;
2408                 wdata->cfile = cfile;
2409                 wdata->server = server;
2410                 cfile = NULL;
2411
2412                 if (!wdata->cfile) {
2413                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2414                                  get_file_rc);
2415                         if (is_retryable_error(get_file_rc))
2416                                 rc = get_file_rc;
2417                         else
2418                                 rc = -EBADF;
2419                 } else
2420                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2421
2422                 for (i = 0; i < nr_pages; ++i)
2423                         unlock_page(wdata->pages[i]);
2424
2425                 /* send failure -- clean up the mess */
2426                 if (rc != 0) {
2427                         add_credits_and_wake_if(server, &wdata->credits, 0);
2428                         for (i = 0; i < nr_pages; ++i) {
2429                                 if (is_retryable_error(rc))
2430                                         redirty_page_for_writepage(wbc,
2431                                                            wdata->pages[i]);
2432                                 else
2433                                         SetPageError(wdata->pages[i]);
2434                                 end_page_writeback(wdata->pages[i]);
2435                                 put_page(wdata->pages[i]);
2436                         }
2437                         if (!is_retryable_error(rc))
2438                                 mapping_set_error(mapping, rc);
2439                 }
2440                 kref_put(&wdata->refcount, cifs_writedata_release);
2441
2442                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2443                         index = saved_index;
2444                         continue;
2445                 }
2446
2447                 /* Return immediately if we received a signal during writing */
2448                 if (is_interrupt_error(rc)) {
2449                         done = true;
2450                         break;
2451                 }
2452
2453                 if (rc != 0 && saved_rc == 0)
2454                         saved_rc = rc;
2455
2456                 wbc->nr_to_write -= nr_pages;
2457                 if (wbc->nr_to_write <= 0)
2458                         done = true;
2459
2460                 index = next;
2461         }
2462
2463         if (!scanned && !done) {
2464                 /*
2465                  * We hit the last page and there is more work to be done: wrap
2466                  * back to the start of the file
2467                  */
2468                 scanned = true;
2469                 index = 0;
2470                 goto retry;
2471         }
2472
2473         if (saved_rc != 0)
2474                 rc = saved_rc;
2475
2476         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2477                 mapping->writeback_index = index;
2478
2479         if (cfile)
2480                 cifsFileInfo_put(cfile);
2481         free_xid(xid);
2482         return rc;
2483 }
2484
2485 static int
2486 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2487 {
2488         int rc;
2489         unsigned int xid;
2490
2491         xid = get_xid();
2492 /* BB add check for wbc flags */
2493         get_page(page);
2494         if (!PageUptodate(page))
2495                 cifs_dbg(FYI, "ppw - page not up to date\n");
2496
2497         /*
2498          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2499          *
2500          * A writepage() implementation always needs to do either this,
2501          * or re-dirty the page with "redirty_page_for_writepage()" in
2502          * the case of a failure.
2503          *
2504          * Just unlocking the page will cause the radix tree tag-bits
2505          * to fail to update with the state of the page correctly.
2506          */
2507         set_page_writeback(page);
2508 retry_write:
2509         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2510         if (is_retryable_error(rc)) {
2511                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2512                         goto retry_write;
2513                 redirty_page_for_writepage(wbc, page);
2514         } else if (rc != 0) {
2515                 SetPageError(page);
2516                 mapping_set_error(page->mapping, rc);
2517         } else {
2518                 SetPageUptodate(page);
2519         }
2520         end_page_writeback(page);
2521         put_page(page);
2522         free_xid(xid);
2523         return rc;
2524 }
2525
2526 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2527 {
2528         int rc = cifs_writepage_locked(page, wbc);
2529         unlock_page(page);
2530         return rc;
2531 }
2532
2533 static int cifs_write_end(struct file *file, struct address_space *mapping,
2534                         loff_t pos, unsigned len, unsigned copied,
2535                         struct page *page, void *fsdata)
2536 {
2537         int rc;
2538         struct inode *inode = mapping->host;
2539         struct cifsFileInfo *cfile = file->private_data;
2540         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2541         __u32 pid;
2542
2543         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2544                 pid = cfile->pid;
2545         else
2546                 pid = current->tgid;
2547
2548         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2549                  page, pos, copied);
2550
2551         if (PageChecked(page)) {
2552                 if (copied == len)
2553                         SetPageUptodate(page);
2554                 ClearPageChecked(page);
2555         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2556                 SetPageUptodate(page);
2557
2558         if (!PageUptodate(page)) {
2559                 char *page_data;
2560                 unsigned offset = pos & (PAGE_SIZE - 1);
2561                 unsigned int xid;
2562
2563                 xid = get_xid();
2564                 /* this is probably better than directly calling
2565                    partialpage_write since in this function the file handle is
2566                    known which we might as well leverage */
2567                 /* BB check if anything else missing out of ppw
2568                    such as updating last write time */
2569                 page_data = kmap(page);
2570                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2571                 /* if (rc < 0) should we set writebehind rc? */
2572                 kunmap(page);
2573
2574                 free_xid(xid);
2575         } else {
2576                 rc = copied;
2577                 pos += copied;
2578                 set_page_dirty(page);
2579         }
2580
2581         if (rc > 0) {
2582                 spin_lock(&inode->i_lock);
2583                 if (pos > inode->i_size)
2584                         i_size_write(inode, pos);
2585                 spin_unlock(&inode->i_lock);
2586         }
2587
2588         unlock_page(page);
2589         put_page(page);
2590
2591         return rc;
2592 }
2593
2594 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2595                       int datasync)
2596 {
2597         unsigned int xid;
2598         int rc = 0;
2599         struct cifs_tcon *tcon;
2600         struct TCP_Server_Info *server;
2601         struct cifsFileInfo *smbfile = file->private_data;
2602         struct inode *inode = file_inode(file);
2603         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2604
2605         rc = file_write_and_wait_range(file, start, end);
2606         if (rc) {
2607                 trace_cifs_fsync_err(inode->i_ino, rc);
2608                 return rc;
2609         }
2610
2611         xid = get_xid();
2612
2613         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2614                  file, datasync);
2615
2616         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2617                 rc = cifs_zap_mapping(inode);
2618                 if (rc) {
2619                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2620                         rc = 0; /* don't care about it in fsync */
2621                 }
2622         }
2623
2624         tcon = tlink_tcon(smbfile->tlink);
2625         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2626                 server = tcon->ses->server;
2627                 if (server->ops->flush)
2628                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2629                 else
2630                         rc = -ENOSYS;
2631         }
2632
2633         free_xid(xid);
2634         return rc;
2635 }
2636
2637 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2638 {
2639         unsigned int xid;
2640         int rc = 0;
2641         struct cifs_tcon *tcon;
2642         struct TCP_Server_Info *server;
2643         struct cifsFileInfo *smbfile = file->private_data;
2644         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2645
2646         rc = file_write_and_wait_range(file, start, end);
2647         if (rc) {
2648                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2649                 return rc;
2650         }
2651
2652         xid = get_xid();
2653
2654         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2655                  file, datasync);
2656
2657         tcon = tlink_tcon(smbfile->tlink);
2658         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2659                 server = tcon->ses->server;
2660                 if (server->ops->flush)
2661                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2662                 else
2663                         rc = -ENOSYS;
2664         }
2665
2666         free_xid(xid);
2667         return rc;
2668 }
2669
2670 /*
2671  * As file closes, flush all cached write data for this inode checking
2672  * for write behind errors.
2673  */
2674 int cifs_flush(struct file *file, fl_owner_t id)
2675 {
2676         struct inode *inode = file_inode(file);
2677         int rc = 0;
2678
2679         if (file->f_mode & FMODE_WRITE)
2680                 rc = filemap_write_and_wait(inode->i_mapping);
2681
2682         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2683         if (rc)
2684                 trace_cifs_flush_err(inode->i_ino, rc);
2685         return rc;
2686 }
2687
2688 static int
2689 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2690 {
2691         int rc = 0;
2692         unsigned long i;
2693
2694         for (i = 0; i < num_pages; i++) {
2695                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2696                 if (!pages[i]) {
2697                         /*
2698                          * save number of pages we have already allocated and
2699                          * return with ENOMEM error
2700                          */
2701                         num_pages = i;
2702                         rc = -ENOMEM;
2703                         break;
2704                 }
2705         }
2706
2707         if (rc) {
2708                 for (i = 0; i < num_pages; i++)
2709                         put_page(pages[i]);
2710         }
2711         return rc;
2712 }
2713
2714 static inline
2715 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2716 {
2717         size_t num_pages;
2718         size_t clen;
2719
2720         clen = min_t(const size_t, len, wsize);
2721         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2722
2723         if (cur_len)
2724                 *cur_len = clen;
2725
2726         return num_pages;
2727 }
2728
2729 static void
2730 cifs_uncached_writedata_release(struct kref *refcount)
2731 {
2732         int i;
2733         struct cifs_writedata *wdata = container_of(refcount,
2734                                         struct cifs_writedata, refcount);
2735
2736         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2737         for (i = 0; i < wdata->nr_pages; i++)
2738                 put_page(wdata->pages[i]);
2739         cifs_writedata_release(refcount);
2740 }
2741
2742 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2743
2744 static void
2745 cifs_uncached_writev_complete(struct work_struct *work)
2746 {
2747         struct cifs_writedata *wdata = container_of(work,
2748                                         struct cifs_writedata, work);
2749         struct inode *inode = d_inode(wdata->cfile->dentry);
2750         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2751
2752         spin_lock(&inode->i_lock);
2753         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2754         if (cifsi->server_eof > inode->i_size)
2755                 i_size_write(inode, cifsi->server_eof);
2756         spin_unlock(&inode->i_lock);
2757
2758         complete(&wdata->done);
2759         collect_uncached_write_data(wdata->ctx);
2760         /* the below call can possibly free the last ref to aio ctx */
2761         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2762 }
2763
2764 static int
2765 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2766                       size_t *len, unsigned long *num_pages)
2767 {
2768         size_t save_len, copied, bytes, cur_len = *len;
2769         unsigned long i, nr_pages = *num_pages;
2770
2771         save_len = cur_len;
2772         for (i = 0; i < nr_pages; i++) {
2773                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2774                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2775                 cur_len -= copied;
2776                 /*
2777                  * If we didn't copy as much as we expected, then that
2778                  * may mean we trod into an unmapped area. Stop copying
2779                  * at that point. On the next pass through the big
2780                  * loop, we'll likely end up getting a zero-length
2781                  * write and bailing out of it.
2782                  */
2783                 if (copied < bytes)
2784                         break;
2785         }
2786         cur_len = save_len - cur_len;
2787         *len = cur_len;
2788
2789         /*
2790          * If we have no data to send, then that probably means that
2791          * the copy above failed altogether. That's most likely because
2792          * the address in the iovec was bogus. Return -EFAULT and let
2793          * the caller free anything we allocated and bail out.
2794          */
2795         if (!cur_len)
2796                 return -EFAULT;
2797
2798         /*
2799          * i + 1 now represents the number of pages we actually used in
2800          * the copy phase above.
2801          */
2802         *num_pages = i + 1;
2803         return 0;
2804 }
2805
2806 static int
2807 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2808         struct cifs_aio_ctx *ctx)
2809 {
2810         unsigned int wsize;
2811         struct cifs_credits credits;
2812         int rc;
2813         struct TCP_Server_Info *server = wdata->server;
2814
2815         do {
2816                 if (wdata->cfile->invalidHandle) {
2817                         rc = cifs_reopen_file(wdata->cfile, false);
2818                         if (rc == -EAGAIN)
2819                                 continue;
2820                         else if (rc)
2821                                 break;
2822                 }
2823
2824
2825                 /*
2826                  * Wait for credits to resend this wdata.
2827                  * Note: we are attempting to resend the whole wdata not in
2828                  * segments
2829                  */
2830                 do {
2831                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2832                                                 &wsize, &credits);
2833                         if (rc)
2834                                 goto fail;
2835
2836                         if (wsize < wdata->bytes) {
2837                                 add_credits_and_wake_if(server, &credits, 0);
2838                                 msleep(1000);
2839                         }
2840                 } while (wsize < wdata->bytes);
2841                 wdata->credits = credits;
2842
2843                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2844
2845                 if (!rc) {
2846                         if (wdata->cfile->invalidHandle)
2847                                 rc = -EAGAIN;
2848                         else {
2849 #ifdef CONFIG_CIFS_SMB_DIRECT
2850                                 if (wdata->mr) {
2851                                         wdata->mr->need_invalidate = true;
2852                                         smbd_deregister_mr(wdata->mr);
2853                                         wdata->mr = NULL;
2854                                 }
2855 #endif
2856                                 rc = server->ops->async_writev(wdata,
2857                                         cifs_uncached_writedata_release);
2858                         }
2859                 }
2860
2861                 /* If the write was successfully sent, we are done */
2862                 if (!rc) {
2863                         list_add_tail(&wdata->list, wdata_list);
2864                         return 0;
2865                 }
2866
2867                 /* Roll back credits and retry if needed */
2868                 add_credits_and_wake_if(server, &wdata->credits, 0);
2869         } while (rc == -EAGAIN);
2870
2871 fail:
2872         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2873         return rc;
2874 }
2875
2876 static int
2877 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2878                      struct cifsFileInfo *open_file,
2879                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2880                      struct cifs_aio_ctx *ctx)
2881 {
2882         int rc = 0;
2883         size_t cur_len;
2884         unsigned long nr_pages, num_pages, i;
2885         struct cifs_writedata *wdata;
2886         struct iov_iter saved_from = *from;
2887         loff_t saved_offset = offset;
2888         pid_t pid;
2889         struct TCP_Server_Info *server;
2890         struct page **pagevec;
2891         size_t start;
2892         unsigned int xid;
2893
2894         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2895                 pid = open_file->pid;
2896         else
2897                 pid = current->tgid;
2898
2899         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2900         xid = get_xid();
2901
2902         do {
2903                 unsigned int wsize;
2904                 struct cifs_credits credits_on_stack;
2905                 struct cifs_credits *credits = &credits_on_stack;
2906
2907                 if (open_file->invalidHandle) {
2908                         rc = cifs_reopen_file(open_file, false);
2909                         if (rc == -EAGAIN)
2910                                 continue;
2911                         else if (rc)
2912                                 break;
2913                 }
2914
2915                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2916                                                    &wsize, credits);
2917                 if (rc)
2918                         break;
2919
2920                 cur_len = min_t(const size_t, len, wsize);
2921
2922                 if (ctx->direct_io) {
2923                         ssize_t result;
2924
2925                         result = iov_iter_get_pages_alloc(
2926                                 from, &pagevec, cur_len, &start);
2927                         if (result < 0) {
2928                                 cifs_dbg(VFS,
2929                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
2930                                          result, iov_iter_type(from),
2931                                          from->iov_offset, from->count);
2932                                 dump_stack();
2933
2934                                 rc = result;
2935                                 add_credits_and_wake_if(server, credits, 0);
2936                                 break;
2937                         }
2938                         cur_len = (size_t)result;
2939                         iov_iter_advance(from, cur_len);
2940
2941                         nr_pages =
2942                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2943
2944                         wdata = cifs_writedata_direct_alloc(pagevec,
2945                                              cifs_uncached_writev_complete);
2946                         if (!wdata) {
2947                                 rc = -ENOMEM;
2948                                 add_credits_and_wake_if(server, credits, 0);
2949                                 break;
2950                         }
2951
2952
2953                         wdata->page_offset = start;
2954                         wdata->tailsz =
2955                                 nr_pages > 1 ?
2956                                         cur_len - (PAGE_SIZE - start) -
2957                                         (nr_pages - 2) * PAGE_SIZE :
2958                                         cur_len;
2959                 } else {
2960                         nr_pages = get_numpages(wsize, len, &cur_len);
2961                         wdata = cifs_writedata_alloc(nr_pages,
2962                                              cifs_uncached_writev_complete);
2963                         if (!wdata) {
2964                                 rc = -ENOMEM;
2965                                 add_credits_and_wake_if(server, credits, 0);
2966                                 break;
2967                         }
2968
2969                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2970                         if (rc) {
2971                                 kvfree(wdata->pages);
2972                                 kfree(wdata);
2973                                 add_credits_and_wake_if(server, credits, 0);
2974                                 break;
2975                         }
2976
2977                         num_pages = nr_pages;
2978                         rc = wdata_fill_from_iovec(
2979                                 wdata, from, &cur_len, &num_pages);
2980                         if (rc) {
2981                                 for (i = 0; i < nr_pages; i++)
2982                                         put_page(wdata->pages[i]);
2983                                 kvfree(wdata->pages);
2984                                 kfree(wdata);
2985                                 add_credits_and_wake_if(server, credits, 0);
2986                                 break;
2987                         }
2988
2989                         /*
2990                          * Bring nr_pages down to the number of pages we
2991                          * actually used, and free any pages that we didn't use.
2992                          */
2993                         for ( ; nr_pages > num_pages; nr_pages--)
2994                                 put_page(wdata->pages[nr_pages - 1]);
2995
2996                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2997                 }
2998
2999                 wdata->sync_mode = WB_SYNC_ALL;
3000                 wdata->nr_pages = nr_pages;
3001                 wdata->offset = (__u64)offset;
3002                 wdata->cfile = cifsFileInfo_get(open_file);
3003                 wdata->server = server;
3004                 wdata->pid = pid;
3005                 wdata->bytes = cur_len;
3006                 wdata->pagesz = PAGE_SIZE;
3007                 wdata->credits = credits_on_stack;
3008                 wdata->ctx = ctx;
3009                 kref_get(&ctx->refcount);
3010
3011                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3012
3013                 if (!rc) {
3014                         if (wdata->cfile->invalidHandle)
3015                                 rc = -EAGAIN;
3016                         else
3017                                 rc = server->ops->async_writev(wdata,
3018                                         cifs_uncached_writedata_release);
3019                 }
3020
3021                 if (rc) {
3022                         add_credits_and_wake_if(server, &wdata->credits, 0);
3023                         kref_put(&wdata->refcount,
3024                                  cifs_uncached_writedata_release);
3025                         if (rc == -EAGAIN) {
3026                                 *from = saved_from;
3027                                 iov_iter_advance(from, offset - saved_offset);
3028                                 continue;
3029                         }
3030                         break;
3031                 }
3032
3033                 list_add_tail(&wdata->list, wdata_list);
3034                 offset += cur_len;
3035                 len -= cur_len;
3036         } while (len > 0);
3037
3038         free_xid(xid);
3039         return rc;
3040 }
3041
3042 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3043 {
3044         struct cifs_writedata *wdata, *tmp;
3045         struct cifs_tcon *tcon;
3046         struct cifs_sb_info *cifs_sb;
3047         struct dentry *dentry = ctx->cfile->dentry;
3048         int rc;
3049
3050         tcon = tlink_tcon(ctx->cfile->tlink);
3051         cifs_sb = CIFS_SB(dentry->d_sb);
3052
3053         mutex_lock(&ctx->aio_mutex);
3054
3055         if (list_empty(&ctx->list)) {
3056                 mutex_unlock(&ctx->aio_mutex);
3057                 return;
3058         }
3059
3060         rc = ctx->rc;
3061         /*
3062          * Wait for and collect replies for any successful sends in order of
3063          * increasing offset. Once an error is hit, then return without waiting
3064          * for any more replies.
3065          */
3066 restart_loop:
3067         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3068                 if (!rc) {
3069                         if (!try_wait_for_completion(&wdata->done)) {
3070                                 mutex_unlock(&ctx->aio_mutex);
3071                                 return;
3072                         }
3073
3074                         if (wdata->result)
3075                                 rc = wdata->result;
3076                         else
3077                                 ctx->total_len += wdata->bytes;
3078
3079                         /* resend call if it's a retryable error */
3080                         if (rc == -EAGAIN) {
3081                                 struct list_head tmp_list;
3082                                 struct iov_iter tmp_from = ctx->iter;
3083
3084                                 INIT_LIST_HEAD(&tmp_list);
3085                                 list_del_init(&wdata->list);
3086
3087                                 if (ctx->direct_io)
3088                                         rc = cifs_resend_wdata(
3089                                                 wdata, &tmp_list, ctx);
3090                                 else {
3091                                         iov_iter_advance(&tmp_from,
3092                                                  wdata->offset - ctx->pos);
3093
3094                                         rc = cifs_write_from_iter(wdata->offset,
3095                                                 wdata->bytes, &tmp_from,
3096                                                 ctx->cfile, cifs_sb, &tmp_list,
3097                                                 ctx);
3098
3099                                         kref_put(&wdata->refcount,
3100                                                 cifs_uncached_writedata_release);
3101                                 }
3102
3103                                 list_splice(&tmp_list, &ctx->list);
3104                                 goto restart_loop;
3105                         }
3106                 }
3107                 list_del_init(&wdata->list);
3108                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3109         }
3110
3111         cifs_stats_bytes_written(tcon, ctx->total_len);
3112         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3113
3114         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3115
3116         mutex_unlock(&ctx->aio_mutex);
3117
3118         if (ctx->iocb && ctx->iocb->ki_complete)
3119                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3120         else
3121                 complete(&ctx->done);
3122 }
3123
3124 static ssize_t __cifs_writev(
3125         struct kiocb *iocb, struct iov_iter *from, bool direct)
3126 {
3127         struct file *file = iocb->ki_filp;
3128         ssize_t total_written = 0;
3129         struct cifsFileInfo *cfile;
3130         struct cifs_tcon *tcon;
3131         struct cifs_sb_info *cifs_sb;
3132         struct cifs_aio_ctx *ctx;
3133         struct iov_iter saved_from = *from;
3134         size_t len = iov_iter_count(from);
3135         int rc;
3136
3137         /*
3138          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3139          * In this case, fall back to non-direct write function.
3140          * this could be improved by getting pages directly in ITER_KVEC
3141          */
3142         if (direct && iov_iter_is_kvec(from)) {
3143                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3144                 direct = false;
3145         }
3146
3147         rc = generic_write_checks(iocb, from);
3148         if (rc <= 0)
3149                 return rc;
3150
3151         cifs_sb = CIFS_FILE_SB(file);
3152         cfile = file->private_data;
3153         tcon = tlink_tcon(cfile->tlink);
3154
3155         if (!tcon->ses->server->ops->async_writev)
3156                 return -ENOSYS;
3157
3158         ctx = cifs_aio_ctx_alloc();
3159         if (!ctx)
3160                 return -ENOMEM;
3161
3162         ctx->cfile = cifsFileInfo_get(cfile);
3163
3164         if (!is_sync_kiocb(iocb))
3165                 ctx->iocb = iocb;
3166
3167         ctx->pos = iocb->ki_pos;
3168
3169         if (direct) {
3170                 ctx->direct_io = true;
3171                 ctx->iter = *from;
3172                 ctx->len = len;
3173         } else {
3174                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3175                 if (rc) {
3176                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3177                         return rc;
3178                 }
3179         }
3180
3181         /* grab a lock here due to read response handlers can access ctx */
3182         mutex_lock(&ctx->aio_mutex);
3183
3184         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3185                                   cfile, cifs_sb, &ctx->list, ctx);
3186
3187         /*
3188          * If at least one write was successfully sent, then discard any rc
3189          * value from the later writes. If the other write succeeds, then
3190          * we'll end up returning whatever was written. If it fails, then
3191          * we'll get a new rc value from that.
3192          */
3193         if (!list_empty(&ctx->list))
3194                 rc = 0;
3195
3196         mutex_unlock(&ctx->aio_mutex);
3197
3198         if (rc) {
3199                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3200                 return rc;
3201         }
3202
3203         if (!is_sync_kiocb(iocb)) {
3204                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3205                 return -EIOCBQUEUED;
3206         }
3207
3208         rc = wait_for_completion_killable(&ctx->done);
3209         if (rc) {
3210                 mutex_lock(&ctx->aio_mutex);
3211                 ctx->rc = rc = -EINTR;
3212                 total_written = ctx->total_len;
3213                 mutex_unlock(&ctx->aio_mutex);
3214         } else {
3215                 rc = ctx->rc;
3216                 total_written = ctx->total_len;
3217         }
3218
3219         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3220
3221         if (unlikely(!total_written))
3222                 return rc;
3223
3224         iocb->ki_pos += total_written;
3225         return total_written;
3226 }
3227
3228 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3229 {
3230         return __cifs_writev(iocb, from, true);
3231 }
3232
3233 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3234 {
3235         return __cifs_writev(iocb, from, false);
3236 }
3237
3238 static ssize_t
3239 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3240 {
3241         struct file *file = iocb->ki_filp;
3242         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3243         struct inode *inode = file->f_mapping->host;
3244         struct cifsInodeInfo *cinode = CIFS_I(inode);
3245         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3246         ssize_t rc;
3247
3248         inode_lock(inode);
3249         /*
3250          * We need to hold the sem to be sure nobody modifies lock list
3251          * with a brlock that prevents writing.
3252          */
3253         down_read(&cinode->lock_sem);
3254
3255         rc = generic_write_checks(iocb, from);
3256         if (rc <= 0)
3257                 goto out;
3258
3259         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3260                                      server->vals->exclusive_lock_type, 0,
3261                                      NULL, CIFS_WRITE_OP))
3262                 rc = __generic_file_write_iter(iocb, from);
3263         else
3264                 rc = -EACCES;
3265 out:
3266         up_read(&cinode->lock_sem);
3267         inode_unlock(inode);
3268
3269         if (rc > 0)
3270                 rc = generic_write_sync(iocb, rc);
3271         return rc;
3272 }
3273
3274 ssize_t
3275 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3276 {
3277         struct inode *inode = file_inode(iocb->ki_filp);
3278         struct cifsInodeInfo *cinode = CIFS_I(inode);
3279         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3280         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3281                                                 iocb->ki_filp->private_data;
3282         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3283         ssize_t written;
3284
3285         written = cifs_get_writer(cinode);
3286         if (written)
3287                 return written;
3288
3289         if (CIFS_CACHE_WRITE(cinode)) {
3290                 if (cap_unix(tcon->ses) &&
3291                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3292                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3293                         written = generic_file_write_iter(iocb, from);
3294                         goto out;
3295                 }
3296                 written = cifs_writev(iocb, from);
3297                 goto out;
3298         }
3299         /*
3300          * For non-oplocked files in strict cache mode we need to write the data
3301          * to the server exactly from the pos to pos+len-1 rather than flush all
3302          * affected pages because it may cause a error with mandatory locks on
3303          * these pages but not on the region from pos to ppos+len-1.
3304          */
3305         written = cifs_user_writev(iocb, from);
3306         if (CIFS_CACHE_READ(cinode)) {
3307                 /*
3308                  * We have read level caching and we have just sent a write
3309                  * request to the server thus making data in the cache stale.
3310                  * Zap the cache and set oplock/lease level to NONE to avoid
3311                  * reading stale data from the cache. All subsequent read
3312                  * operations will read new data from the server.
3313                  */
3314                 cifs_zap_mapping(inode);
3315                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3316                          inode);
3317                 cinode->oplock = 0;
3318         }
3319 out:
3320         cifs_put_writer(cinode);
3321         return written;
3322 }
3323
3324 static struct cifs_readdata *
3325 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3326 {
3327         struct cifs_readdata *rdata;
3328
3329         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3330         if (rdata != NULL) {
3331                 rdata->pages = pages;
3332                 kref_init(&rdata->refcount);
3333                 INIT_LIST_HEAD(&rdata->list);
3334                 init_completion(&rdata->done);
3335                 INIT_WORK(&rdata->work, complete);
3336         }
3337
3338         return rdata;
3339 }
3340
3341 static struct cifs_readdata *
3342 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3343 {
3344         struct page **pages =
3345                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3346         struct cifs_readdata *ret = NULL;
3347
3348         if (pages) {
3349                 ret = cifs_readdata_direct_alloc(pages, complete);
3350                 if (!ret)
3351                         kfree(pages);
3352         }
3353
3354         return ret;
3355 }
3356
3357 void
3358 cifs_readdata_release(struct kref *refcount)
3359 {
3360         struct cifs_readdata *rdata = container_of(refcount,
3361                                         struct cifs_readdata, refcount);
3362 #ifdef CONFIG_CIFS_SMB_DIRECT
3363         if (rdata->mr) {
3364                 smbd_deregister_mr(rdata->mr);
3365                 rdata->mr = NULL;
3366         }
3367 #endif
3368         if (rdata->cfile)
3369                 cifsFileInfo_put(rdata->cfile);
3370
3371         kvfree(rdata->pages);
3372         kfree(rdata);
3373 }
3374
3375 static int
3376 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3377 {
3378         int rc = 0;
3379         struct page *page;
3380         unsigned int i;
3381
3382         for (i = 0; i < nr_pages; i++) {
3383                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3384                 if (!page) {
3385                         rc = -ENOMEM;
3386                         break;
3387                 }
3388                 rdata->pages[i] = page;
3389         }
3390
3391         if (rc) {
3392                 unsigned int nr_page_failed = i;
3393
3394                 for (i = 0; i < nr_page_failed; i++) {
3395                         put_page(rdata->pages[i]);
3396                         rdata->pages[i] = NULL;
3397                 }
3398         }
3399         return rc;
3400 }
3401
3402 static void
3403 cifs_uncached_readdata_release(struct kref *refcount)
3404 {
3405         struct cifs_readdata *rdata = container_of(refcount,
3406                                         struct cifs_readdata, refcount);
3407         unsigned int i;
3408
3409         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3410         for (i = 0; i < rdata->nr_pages; i++) {
3411                 put_page(rdata->pages[i]);
3412         }
3413         cifs_readdata_release(refcount);
3414 }
3415
3416 /**
3417  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3418  * @rdata:      the readdata response with list of pages holding data
3419  * @iter:       destination for our data
3420  *
3421  * This function copies data from a list of pages in a readdata response into
3422  * an array of iovecs. It will first calculate where the data should go
3423  * based on the info in the readdata and then copy the data into that spot.
3424  */
3425 static int
3426 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3427 {
3428         size_t remaining = rdata->got_bytes;
3429         unsigned int i;
3430
3431         for (i = 0; i < rdata->nr_pages; i++) {
3432                 struct page *page = rdata->pages[i];
3433                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3434                 size_t written;
3435
3436                 if (unlikely(iov_iter_is_pipe(iter))) {
3437                         void *addr = kmap_atomic(page);
3438
3439                         written = copy_to_iter(addr, copy, iter);
3440                         kunmap_atomic(addr);
3441                 } else
3442                         written = copy_page_to_iter(page, 0, copy, iter);
3443                 remaining -= written;
3444                 if (written < copy && iov_iter_count(iter) > 0)
3445                         break;
3446         }
3447         return remaining ? -EFAULT : 0;
3448 }
3449
3450 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3451
3452 static void
3453 cifs_uncached_readv_complete(struct work_struct *work)
3454 {
3455         struct cifs_readdata *rdata = container_of(work,
3456                                                 struct cifs_readdata, work);
3457
3458         complete(&rdata->done);
3459         collect_uncached_read_data(rdata->ctx);
3460         /* the below call can possibly free the last ref to aio ctx */
3461         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3462 }
3463
3464 static int
3465 uncached_fill_pages(struct TCP_Server_Info *server,
3466                     struct cifs_readdata *rdata, struct iov_iter *iter,
3467                     unsigned int len)
3468 {
3469         int result = 0;
3470         unsigned int i;
3471         unsigned int nr_pages = rdata->nr_pages;
3472         unsigned int page_offset = rdata->page_offset;
3473
3474         rdata->got_bytes = 0;
3475         rdata->tailsz = PAGE_SIZE;
3476         for (i = 0; i < nr_pages; i++) {
3477                 struct page *page = rdata->pages[i];
3478                 size_t n;
3479                 unsigned int segment_size = rdata->pagesz;
3480
3481                 if (i == 0)
3482                         segment_size -= page_offset;
3483                 else
3484                         page_offset = 0;
3485
3486
3487                 if (len <= 0) {
3488                         /* no need to hold page hostage */
3489                         rdata->pages[i] = NULL;
3490                         rdata->nr_pages--;
3491                         put_page(page);
3492                         continue;
3493                 }
3494
3495                 n = len;
3496                 if (len >= segment_size)
3497                         /* enough data to fill the page */
3498                         n = segment_size;
3499                 else
3500                         rdata->tailsz = len;
3501                 len -= n;
3502
3503                 if (iter)
3504                         result = copy_page_from_iter(
3505                                         page, page_offset, n, iter);
3506 #ifdef CONFIG_CIFS_SMB_DIRECT
3507                 else if (rdata->mr)
3508                         result = n;
3509 #endif
3510                 else
3511                         result = cifs_read_page_from_socket(
3512                                         server, page, page_offset, n);
3513                 if (result < 0)
3514                         break;
3515
3516                 rdata->got_bytes += result;
3517         }
3518
3519         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3520                                                 rdata->got_bytes : result;
3521 }
3522
3523 static int
3524 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3525                               struct cifs_readdata *rdata, unsigned int len)
3526 {
3527         return uncached_fill_pages(server, rdata, NULL, len);
3528 }
3529
3530 static int
3531 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3532                               struct cifs_readdata *rdata,
3533                               struct iov_iter *iter)
3534 {
3535         return uncached_fill_pages(server, rdata, iter, iter->count);
3536 }
3537
3538 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3539                         struct list_head *rdata_list,
3540                         struct cifs_aio_ctx *ctx)
3541 {
3542         unsigned int rsize;
3543         struct cifs_credits credits;
3544         int rc;
3545         struct TCP_Server_Info *server;
3546
3547         /* XXX: should we pick a new channel here? */
3548         server = rdata->server;
3549
3550         do {
3551                 if (rdata->cfile->invalidHandle) {
3552                         rc = cifs_reopen_file(rdata->cfile, true);
3553                         if (rc == -EAGAIN)
3554                                 continue;
3555                         else if (rc)
3556                                 break;
3557                 }
3558
3559                 /*
3560                  * Wait for credits to resend this rdata.
3561                  * Note: we are attempting to resend the whole rdata not in
3562                  * segments
3563                  */
3564                 do {
3565                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3566                                                 &rsize, &credits);
3567
3568                         if (rc)
3569                                 goto fail;
3570
3571                         if (rsize < rdata->bytes) {
3572                                 add_credits_and_wake_if(server, &credits, 0);
3573                                 msleep(1000);
3574                         }
3575                 } while (rsize < rdata->bytes);
3576                 rdata->credits = credits;
3577
3578                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3579                 if (!rc) {
3580                         if (rdata->cfile->invalidHandle)
3581                                 rc = -EAGAIN;
3582                         else {
3583 #ifdef CONFIG_CIFS_SMB_DIRECT
3584                                 if (rdata->mr) {
3585                                         rdata->mr->need_invalidate = true;
3586                                         smbd_deregister_mr(rdata->mr);
3587                                         rdata->mr = NULL;
3588                                 }
3589 #endif
3590                                 rc = server->ops->async_readv(rdata);
3591                         }
3592                 }
3593
3594                 /* If the read was successfully sent, we are done */
3595                 if (!rc) {
3596                         /* Add to aio pending list */
3597                         list_add_tail(&rdata->list, rdata_list);
3598                         return 0;
3599                 }
3600
3601                 /* Roll back credits and retry if needed */
3602                 add_credits_and_wake_if(server, &rdata->credits, 0);
3603         } while (rc == -EAGAIN);
3604
3605 fail:
3606         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3607         return rc;
3608 }
3609
3610 static int
3611 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3612                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3613                      struct cifs_aio_ctx *ctx)
3614 {
3615         struct cifs_readdata *rdata;
3616         unsigned int npages, rsize;
3617         struct cifs_credits credits_on_stack;
3618         struct cifs_credits *credits = &credits_on_stack;
3619         size_t cur_len;
3620         int rc;
3621         pid_t pid;
3622         struct TCP_Server_Info *server;
3623         struct page **pagevec;
3624         size_t start;
3625         struct iov_iter direct_iov = ctx->iter;
3626
3627         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3628
3629         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3630                 pid = open_file->pid;
3631         else
3632                 pid = current->tgid;
3633
3634         if (ctx->direct_io)
3635                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3636
3637         do {
3638                 if (open_file->invalidHandle) {
3639                         rc = cifs_reopen_file(open_file, true);
3640                         if (rc == -EAGAIN)
3641                                 continue;
3642                         else if (rc)
3643                                 break;
3644                 }
3645
3646                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3647                                                    &rsize, credits);
3648                 if (rc)
3649                         break;
3650
3651                 cur_len = min_t(const size_t, len, rsize);
3652
3653                 if (ctx->direct_io) {
3654                         ssize_t result;
3655
3656                         result = iov_iter_get_pages_alloc(
3657                                         &direct_iov, &pagevec,
3658                                         cur_len, &start);
3659                         if (result < 0) {
3660                                 cifs_dbg(VFS,
3661                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3662                                          result, iov_iter_type(&direct_iov),
3663                                          direct_iov.iov_offset,
3664                                          direct_iov.count);
3665                                 dump_stack();
3666
3667                                 rc = result;
3668                                 add_credits_and_wake_if(server, credits, 0);
3669                                 break;
3670                         }
3671                         cur_len = (size_t)result;
3672                         iov_iter_advance(&direct_iov, cur_len);
3673
3674                         rdata = cifs_readdata_direct_alloc(
3675                                         pagevec, cifs_uncached_readv_complete);
3676                         if (!rdata) {
3677                                 add_credits_and_wake_if(server, credits, 0);
3678                                 rc = -ENOMEM;
3679                                 break;
3680                         }
3681
3682                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3683                         rdata->page_offset = start;
3684                         rdata->tailsz = npages > 1 ?
3685                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3686                                 cur_len;
3687
3688                 } else {
3689
3690                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3691                         /* allocate a readdata struct */
3692                         rdata = cifs_readdata_alloc(npages,
3693                                             cifs_uncached_readv_complete);
3694                         if (!rdata) {
3695                                 add_credits_and_wake_if(server, credits, 0);
3696                                 rc = -ENOMEM;
3697                                 break;
3698                         }
3699
3700                         rc = cifs_read_allocate_pages(rdata, npages);
3701                         if (rc) {
3702                                 kvfree(rdata->pages);
3703                                 kfree(rdata);
3704                                 add_credits_and_wake_if(server, credits, 0);
3705                                 break;
3706                         }
3707
3708                         rdata->tailsz = PAGE_SIZE;
3709                 }
3710
3711                 rdata->server = server;
3712                 rdata->cfile = cifsFileInfo_get(open_file);
3713                 rdata->nr_pages = npages;
3714                 rdata->offset = offset;
3715                 rdata->bytes = cur_len;
3716                 rdata->pid = pid;
3717                 rdata->pagesz = PAGE_SIZE;
3718                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3719                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3720                 rdata->credits = credits_on_stack;
3721                 rdata->ctx = ctx;
3722                 kref_get(&ctx->refcount);
3723
3724                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3725
3726                 if (!rc) {
3727                         if (rdata->cfile->invalidHandle)
3728                                 rc = -EAGAIN;
3729                         else
3730                                 rc = server->ops->async_readv(rdata);
3731                 }
3732
3733                 if (rc) {
3734                         add_credits_and_wake_if(server, &rdata->credits, 0);
3735                         kref_put(&rdata->refcount,
3736                                 cifs_uncached_readdata_release);
3737                         if (rc == -EAGAIN) {
3738                                 iov_iter_revert(&direct_iov, cur_len);
3739                                 continue;
3740                         }
3741                         break;
3742                 }
3743
3744                 list_add_tail(&rdata->list, rdata_list);
3745                 offset += cur_len;
3746                 len -= cur_len;
3747         } while (len > 0);
3748
3749         return rc;
3750 }
3751
3752 static void
3753 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3754 {
3755         struct cifs_readdata *rdata, *tmp;
3756         struct iov_iter *to = &ctx->iter;
3757         struct cifs_sb_info *cifs_sb;
3758         int rc;
3759
3760         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3761
3762         mutex_lock(&ctx->aio_mutex);
3763
3764         if (list_empty(&ctx->list)) {
3765                 mutex_unlock(&ctx->aio_mutex);
3766                 return;
3767         }
3768
3769         rc = ctx->rc;
3770         /* the loop below should proceed in the order of increasing offsets */
3771 again:
3772         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3773                 if (!rc) {
3774                         if (!try_wait_for_completion(&rdata->done)) {
3775                                 mutex_unlock(&ctx->aio_mutex);
3776                                 return;
3777                         }
3778
3779                         if (rdata->result == -EAGAIN) {
3780                                 /* resend call if it's a retryable error */
3781                                 struct list_head tmp_list;
3782                                 unsigned int got_bytes = rdata->got_bytes;
3783
3784                                 list_del_init(&rdata->list);
3785                                 INIT_LIST_HEAD(&tmp_list);
3786
3787                                 /*
3788                                  * Got a part of data and then reconnect has
3789                                  * happened -- fill the buffer and continue
3790                                  * reading.
3791                                  */
3792                                 if (got_bytes && got_bytes < rdata->bytes) {
3793                                         rc = 0;
3794                                         if (!ctx->direct_io)
3795                                                 rc = cifs_readdata_to_iov(rdata, to);
3796                                         if (rc) {
3797                                                 kref_put(&rdata->refcount,
3798                                                         cifs_uncached_readdata_release);
3799                                                 continue;
3800                                         }
3801                                 }
3802
3803                                 if (ctx->direct_io) {
3804                                         /*
3805                                          * Re-use rdata as this is a
3806                                          * direct I/O
3807                                          */
3808                                         rc = cifs_resend_rdata(
3809                                                 rdata,
3810                                                 &tmp_list, ctx);
3811                                 } else {
3812                                         rc = cifs_send_async_read(
3813                                                 rdata->offset + got_bytes,
3814                                                 rdata->bytes - got_bytes,
3815                                                 rdata->cfile, cifs_sb,
3816                                                 &tmp_list, ctx);
3817
3818                                         kref_put(&rdata->refcount,
3819                                                 cifs_uncached_readdata_release);
3820                                 }
3821
3822                                 list_splice(&tmp_list, &ctx->list);
3823
3824                                 goto again;
3825                         } else if (rdata->result)
3826                                 rc = rdata->result;
3827                         else if (!ctx->direct_io)
3828                                 rc = cifs_readdata_to_iov(rdata, to);
3829
3830                         /* if there was a short read -- discard anything left */
3831                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3832                                 rc = -ENODATA;
3833
3834                         ctx->total_len += rdata->got_bytes;
3835                 }
3836                 list_del_init(&rdata->list);
3837                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3838         }
3839
3840         if (!ctx->direct_io)
3841                 ctx->total_len = ctx->len - iov_iter_count(to);
3842
3843         /* mask nodata case */
3844         if (rc == -ENODATA)
3845                 rc = 0;
3846
3847         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3848
3849         mutex_unlock(&ctx->aio_mutex);
3850
3851         if (ctx->iocb && ctx->iocb->ki_complete)
3852                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3853         else
3854                 complete(&ctx->done);
3855 }
3856
3857 static ssize_t __cifs_readv(
3858         struct kiocb *iocb, struct iov_iter *to, bool direct)
3859 {
3860         size_t len;
3861         struct file *file = iocb->ki_filp;
3862         struct cifs_sb_info *cifs_sb;
3863         struct cifsFileInfo *cfile;
3864         struct cifs_tcon *tcon;
3865         ssize_t rc, total_read = 0;
3866         loff_t offset = iocb->ki_pos;
3867         struct cifs_aio_ctx *ctx;
3868
3869         /*
3870          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3871          * fall back to data copy read path
3872          * this could be improved by getting pages directly in ITER_KVEC
3873          */
3874         if (direct && iov_iter_is_kvec(to)) {
3875                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3876                 direct = false;
3877         }
3878
3879         len = iov_iter_count(to);
3880         if (!len)
3881                 return 0;
3882
3883         cifs_sb = CIFS_FILE_SB(file);
3884         cfile = file->private_data;
3885         tcon = tlink_tcon(cfile->tlink);
3886
3887         if (!tcon->ses->server->ops->async_readv)
3888                 return -ENOSYS;
3889
3890         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3891                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3892
3893         ctx = cifs_aio_ctx_alloc();
3894         if (!ctx)
3895                 return -ENOMEM;
3896
3897         ctx->cfile = cifsFileInfo_get(cfile);
3898
3899         if (!is_sync_kiocb(iocb))
3900                 ctx->iocb = iocb;
3901
3902         if (iter_is_iovec(to))
3903                 ctx->should_dirty = true;
3904
3905         if (direct) {
3906                 ctx->pos = offset;
3907                 ctx->direct_io = true;
3908                 ctx->iter = *to;
3909                 ctx->len = len;
3910         } else {
3911                 rc = setup_aio_ctx_iter(ctx, to, READ);
3912                 if (rc) {
3913                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3914                         return rc;
3915                 }
3916                 len = ctx->len;
3917         }
3918
3919         /* grab a lock here due to read response handlers can access ctx */
3920         mutex_lock(&ctx->aio_mutex);
3921
3922         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3923
3924         /* if at least one read request send succeeded, then reset rc */
3925         if (!list_empty(&ctx->list))
3926                 rc = 0;
3927
3928         mutex_unlock(&ctx->aio_mutex);
3929
3930         if (rc) {
3931                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3932                 return rc;
3933         }
3934
3935         if (!is_sync_kiocb(iocb)) {
3936                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3937                 return -EIOCBQUEUED;
3938         }
3939
3940         rc = wait_for_completion_killable(&ctx->done);
3941         if (rc) {
3942                 mutex_lock(&ctx->aio_mutex);
3943                 ctx->rc = rc = -EINTR;
3944                 total_read = ctx->total_len;
3945                 mutex_unlock(&ctx->aio_mutex);
3946         } else {
3947                 rc = ctx->rc;
3948                 total_read = ctx->total_len;
3949         }
3950
3951         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3952
3953         if (total_read) {
3954                 iocb->ki_pos += total_read;
3955                 return total_read;
3956         }
3957         return rc;
3958 }
3959
3960 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3961 {
3962         return __cifs_readv(iocb, to, true);
3963 }
3964
3965 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3966 {
3967         return __cifs_readv(iocb, to, false);
3968 }
3969
3970 ssize_t
3971 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3972 {
3973         struct inode *inode = file_inode(iocb->ki_filp);
3974         struct cifsInodeInfo *cinode = CIFS_I(inode);
3975         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3976         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3977                                                 iocb->ki_filp->private_data;
3978         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3979         int rc = -EACCES;
3980
3981         /*
3982          * In strict cache mode we need to read from the server all the time
3983          * if we don't have level II oplock because the server can delay mtime
3984          * change - so we can't make a decision about inode invalidating.
3985          * And we can also fail with pagereading if there are mandatory locks
3986          * on pages affected by this read but not on the region from pos to
3987          * pos+len-1.
3988          */
3989         if (!CIFS_CACHE_READ(cinode))
3990                 return cifs_user_readv(iocb, to);
3991
3992         if (cap_unix(tcon->ses) &&
3993             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3994             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3995                 return generic_file_read_iter(iocb, to);
3996
3997         /*
3998          * We need to hold the sem to be sure nobody modifies lock list
3999          * with a brlock that prevents reading.
4000          */
4001         down_read(&cinode->lock_sem);
4002         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4003                                      tcon->ses->server->vals->shared_lock_type,
4004                                      0, NULL, CIFS_READ_OP))
4005                 rc = generic_file_read_iter(iocb, to);
4006         up_read(&cinode->lock_sem);
4007         return rc;
4008 }
4009
4010 static ssize_t
4011 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4012 {
4013         int rc = -EACCES;
4014         unsigned int bytes_read = 0;
4015         unsigned int total_read;
4016         unsigned int current_read_size;
4017         unsigned int rsize;
4018         struct cifs_sb_info *cifs_sb;
4019         struct cifs_tcon *tcon;
4020         struct TCP_Server_Info *server;
4021         unsigned int xid;
4022         char *cur_offset;
4023         struct cifsFileInfo *open_file;
4024         struct cifs_io_parms io_parms = {0};
4025         int buf_type = CIFS_NO_BUFFER;
4026         __u32 pid;
4027
4028         xid = get_xid();
4029         cifs_sb = CIFS_FILE_SB(file);
4030
4031         /* FIXME: set up handlers for larger reads and/or convert to async */
4032         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
4033
4034         if (file->private_data == NULL) {
4035                 rc = -EBADF;
4036                 free_xid(xid);
4037                 return rc;
4038         }
4039         open_file = file->private_data;
4040         tcon = tlink_tcon(open_file->tlink);
4041         server = cifs_pick_channel(tcon->ses);
4042
4043         if (!server->ops->sync_read) {
4044                 free_xid(xid);
4045                 return -ENOSYS;
4046         }
4047
4048         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4049                 pid = open_file->pid;
4050         else
4051                 pid = current->tgid;
4052
4053         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4054                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4055
4056         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4057              total_read += bytes_read, cur_offset += bytes_read) {
4058                 do {
4059                         current_read_size = min_t(uint, read_size - total_read,
4060                                                   rsize);
4061                         /*
4062                          * For windows me and 9x we do not want to request more
4063                          * than it negotiated since it will refuse the read
4064                          * then.
4065                          */
4066                         if (!(tcon->ses->capabilities &
4067                                 tcon->ses->server->vals->cap_large_files)) {
4068                                 current_read_size = min_t(uint,
4069                                         current_read_size, CIFSMaxBufSize);
4070                         }
4071                         if (open_file->invalidHandle) {
4072                                 rc = cifs_reopen_file(open_file, true);
4073                                 if (rc != 0)
4074                                         break;
4075                         }
4076                         io_parms.pid = pid;
4077                         io_parms.tcon = tcon;
4078                         io_parms.offset = *offset;
4079                         io_parms.length = current_read_size;
4080                         io_parms.server = server;
4081                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4082                                                     &bytes_read, &cur_offset,
4083                                                     &buf_type);
4084                 } while (rc == -EAGAIN);
4085
4086                 if (rc || (bytes_read == 0)) {
4087                         if (total_read) {
4088                                 break;
4089                         } else {
4090                                 free_xid(xid);
4091                                 return rc;
4092                         }
4093                 } else {
4094                         cifs_stats_bytes_read(tcon, total_read);
4095                         *offset += bytes_read;
4096                 }
4097         }
4098         free_xid(xid);
4099         return total_read;
4100 }
4101
4102 /*
4103  * If the page is mmap'ed into a process' page tables, then we need to make
4104  * sure that it doesn't change while being written back.
4105  */
4106 static vm_fault_t
4107 cifs_page_mkwrite(struct vm_fault *vmf)
4108 {
4109         struct page *page = vmf->page;
4110
4111         lock_page(page);
4112         return VM_FAULT_LOCKED;
4113 }
4114
4115 static const struct vm_operations_struct cifs_file_vm_ops = {
4116         .fault = filemap_fault,
4117         .map_pages = filemap_map_pages,
4118         .page_mkwrite = cifs_page_mkwrite,
4119 };
4120
4121 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4122 {
4123         int xid, rc = 0;
4124         struct inode *inode = file_inode(file);
4125
4126         xid = get_xid();
4127
4128         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4129                 rc = cifs_zap_mapping(inode);
4130         if (!rc)
4131                 rc = generic_file_mmap(file, vma);
4132         if (!rc)
4133                 vma->vm_ops = &cifs_file_vm_ops;
4134
4135         free_xid(xid);
4136         return rc;
4137 }
4138
4139 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4140 {
4141         int rc, xid;
4142
4143         xid = get_xid();
4144
4145         rc = cifs_revalidate_file(file);
4146         if (rc)
4147                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4148                          rc);
4149         if (!rc)
4150                 rc = generic_file_mmap(file, vma);
4151         if (!rc)
4152                 vma->vm_ops = &cifs_file_vm_ops;
4153
4154         free_xid(xid);
4155         return rc;
4156 }
4157
4158 static void
4159 cifs_readv_complete(struct work_struct *work)
4160 {
4161         unsigned int i, got_bytes;
4162         struct cifs_readdata *rdata = container_of(work,
4163                                                 struct cifs_readdata, work);
4164
4165         got_bytes = rdata->got_bytes;
4166         for (i = 0; i < rdata->nr_pages; i++) {
4167                 struct page *page = rdata->pages[i];
4168
4169                 lru_cache_add(page);
4170
4171                 if (rdata->result == 0 ||
4172                     (rdata->result == -EAGAIN && got_bytes)) {
4173                         flush_dcache_page(page);
4174                         SetPageUptodate(page);
4175                 }
4176
4177                 unlock_page(page);
4178
4179                 if (rdata->result == 0 ||
4180                     (rdata->result == -EAGAIN && got_bytes))
4181                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4182
4183                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4184
4185                 put_page(page);
4186                 rdata->pages[i] = NULL;
4187         }
4188         kref_put(&rdata->refcount, cifs_readdata_release);
4189 }
4190
4191 static int
4192 readpages_fill_pages(struct TCP_Server_Info *server,
4193                      struct cifs_readdata *rdata, struct iov_iter *iter,
4194                      unsigned int len)
4195 {
4196         int result = 0;
4197         unsigned int i;
4198         u64 eof;
4199         pgoff_t eof_index;
4200         unsigned int nr_pages = rdata->nr_pages;
4201         unsigned int page_offset = rdata->page_offset;
4202
4203         /* determine the eof that the server (probably) has */
4204         eof = CIFS_I(rdata->mapping->host)->server_eof;
4205         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4206         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4207
4208         rdata->got_bytes = 0;
4209         rdata->tailsz = PAGE_SIZE;
4210         for (i = 0; i < nr_pages; i++) {
4211                 struct page *page = rdata->pages[i];
4212                 unsigned int to_read = rdata->pagesz;
4213                 size_t n;
4214
4215                 if (i == 0)
4216                         to_read -= page_offset;
4217                 else
4218                         page_offset = 0;
4219
4220                 n = to_read;
4221
4222                 if (len >= to_read) {
4223                         len -= to_read;
4224                 } else if (len > 0) {
4225                         /* enough for partial page, fill and zero the rest */
4226                         zero_user(page, len + page_offset, to_read - len);
4227                         n = rdata->tailsz = len;
4228                         len = 0;
4229                 } else if (page->index > eof_index) {
4230                         /*
4231                          * The VFS will not try to do readahead past the
4232                          * i_size, but it's possible that we have outstanding
4233                          * writes with gaps in the middle and the i_size hasn't
4234                          * caught up yet. Populate those with zeroed out pages
4235                          * to prevent the VFS from repeatedly attempting to
4236                          * fill them until the writes are flushed.
4237                          */
4238                         zero_user(page, 0, PAGE_SIZE);
4239                         lru_cache_add(page);
4240                         flush_dcache_page(page);
4241                         SetPageUptodate(page);
4242                         unlock_page(page);
4243                         put_page(page);
4244                         rdata->pages[i] = NULL;
4245                         rdata->nr_pages--;
4246                         continue;
4247                 } else {
4248                         /* no need to hold page hostage */
4249                         lru_cache_add(page);
4250                         unlock_page(page);
4251                         put_page(page);
4252                         rdata->pages[i] = NULL;
4253                         rdata->nr_pages--;
4254                         continue;
4255                 }
4256
4257                 if (iter)
4258                         result = copy_page_from_iter(
4259                                         page, page_offset, n, iter);
4260 #ifdef CONFIG_CIFS_SMB_DIRECT
4261                 else if (rdata->mr)
4262                         result = n;
4263 #endif
4264                 else
4265                         result = cifs_read_page_from_socket(
4266                                         server, page, page_offset, n);
4267                 if (result < 0)
4268                         break;
4269
4270                 rdata->got_bytes += result;
4271         }
4272
4273         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4274                                                 rdata->got_bytes : result;
4275 }
4276
4277 static int
4278 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4279                                struct cifs_readdata *rdata, unsigned int len)
4280 {
4281         return readpages_fill_pages(server, rdata, NULL, len);
4282 }
4283
4284 static int
4285 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4286                                struct cifs_readdata *rdata,
4287                                struct iov_iter *iter)
4288 {
4289         return readpages_fill_pages(server, rdata, iter, iter->count);
4290 }
4291
4292 static int
4293 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4294                     unsigned int rsize, struct list_head *tmplist,
4295                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4296 {
4297         struct page *page, *tpage;
4298         unsigned int expected_index;
4299         int rc;
4300         gfp_t gfp = readahead_gfp_mask(mapping);
4301
4302         INIT_LIST_HEAD(tmplist);
4303
4304         page = lru_to_page(page_list);
4305
4306         /*
4307          * Lock the page and put it in the cache. Since no one else
4308          * should have access to this page, we're safe to simply set
4309          * PG_locked without checking it first.
4310          */
4311         __SetPageLocked(page);
4312         rc = add_to_page_cache_locked(page, mapping,
4313                                       page->index, gfp);
4314
4315         /* give up if we can't stick it in the cache */
4316         if (rc) {
4317                 __ClearPageLocked(page);
4318                 return rc;
4319         }
4320
4321         /* move first page to the tmplist */
4322         *offset = (loff_t)page->index << PAGE_SHIFT;
4323         *bytes = PAGE_SIZE;
4324         *nr_pages = 1;
4325         list_move_tail(&page->lru, tmplist);
4326
4327         /* now try and add more pages onto the request */
4328         expected_index = page->index + 1;
4329         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4330                 /* discontinuity ? */
4331                 if (page->index != expected_index)
4332                         break;
4333
4334                 /* would this page push the read over the rsize? */
4335                 if (*bytes + PAGE_SIZE > rsize)
4336                         break;
4337
4338                 __SetPageLocked(page);
4339                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4340                         __ClearPageLocked(page);
4341                         break;
4342                 }
4343                 list_move_tail(&page->lru, tmplist);
4344                 (*bytes) += PAGE_SIZE;
4345                 expected_index++;
4346                 (*nr_pages)++;
4347         }
4348         return rc;
4349 }
4350
4351 static int cifs_readpages(struct file *file, struct address_space *mapping,
4352         struct list_head *page_list, unsigned num_pages)
4353 {
4354         int rc;
4355         struct list_head tmplist;
4356         struct cifsFileInfo *open_file = file->private_data;
4357         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4358         struct TCP_Server_Info *server;
4359         pid_t pid;
4360         unsigned int xid;
4361
4362         xid = get_xid();
4363         /*
4364          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4365          * immediately if the cookie is negative
4366          *
4367          * After this point, every page in the list might have PG_fscache set,
4368          * so we will need to clean that up off of every page we don't use.
4369          */
4370         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4371                                          &num_pages);
4372         if (rc == 0) {
4373                 free_xid(xid);
4374                 return rc;
4375         }
4376
4377         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4378                 pid = open_file->pid;
4379         else
4380                 pid = current->tgid;
4381
4382         rc = 0;
4383         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4384
4385         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4386                  __func__, file, mapping, num_pages);
4387
4388         /*
4389          * Start with the page at end of list and move it to private
4390          * list. Do the same with any following pages until we hit
4391          * the rsize limit, hit an index discontinuity, or run out of
4392          * pages. Issue the async read and then start the loop again
4393          * until the list is empty.
4394          *
4395          * Note that list order is important. The page_list is in
4396          * the order of declining indexes. When we put the pages in
4397          * the rdata->pages, then we want them in increasing order.
4398          */
4399         while (!list_empty(page_list)) {
4400                 unsigned int i, nr_pages, bytes, rsize;
4401                 loff_t offset;
4402                 struct page *page, *tpage;
4403                 struct cifs_readdata *rdata;
4404                 struct cifs_credits credits_on_stack;
4405                 struct cifs_credits *credits = &credits_on_stack;
4406
4407                 if (open_file->invalidHandle) {
4408                         rc = cifs_reopen_file(open_file, true);
4409                         if (rc == -EAGAIN)
4410                                 continue;
4411                         else if (rc)
4412                                 break;
4413                 }
4414
4415                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4416                                                    &rsize, credits);
4417                 if (rc)
4418                         break;
4419
4420                 /*
4421                  * Give up immediately if rsize is too small to read an entire
4422                  * page. The VFS will fall back to readpage. We should never
4423                  * reach this point however since we set ra_pages to 0 when the
4424                  * rsize is smaller than a cache page.
4425                  */
4426                 if (unlikely(rsize < PAGE_SIZE)) {
4427                         add_credits_and_wake_if(server, credits, 0);
4428                         free_xid(xid);
4429                         return 0;
4430                 }
4431
4432                 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4433                                          &nr_pages, &offset, &bytes);
4434                 if (rc) {
4435                         add_credits_and_wake_if(server, credits, 0);
4436                         break;
4437                 }
4438
4439                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4440                 if (!rdata) {
4441                         /* best to give up if we're out of mem */
4442                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4443                                 list_del(&page->lru);
4444                                 lru_cache_add(page);
4445                                 unlock_page(page);
4446                                 put_page(page);
4447                         }
4448                         rc = -ENOMEM;
4449                         add_credits_and_wake_if(server, credits, 0);
4450                         break;
4451                 }
4452
4453                 rdata->cfile = cifsFileInfo_get(open_file);
4454                 rdata->server = server;
4455                 rdata->mapping = mapping;
4456                 rdata->offset = offset;
4457                 rdata->bytes = bytes;
4458                 rdata->pid = pid;
4459                 rdata->pagesz = PAGE_SIZE;
4460                 rdata->tailsz = PAGE_SIZE;
4461                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4462                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4463                 rdata->credits = credits_on_stack;
4464
4465                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4466                         list_del(&page->lru);
4467                         rdata->pages[rdata->nr_pages++] = page;
4468                 }
4469
4470                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4471
4472                 if (!rc) {
4473                         if (rdata->cfile->invalidHandle)
4474                                 rc = -EAGAIN;
4475                         else
4476                                 rc = server->ops->async_readv(rdata);
4477                 }
4478
4479                 if (rc) {
4480                         add_credits_and_wake_if(server, &rdata->credits, 0);
4481                         for (i = 0; i < rdata->nr_pages; i++) {
4482                                 page = rdata->pages[i];
4483                                 lru_cache_add(page);
4484                                 unlock_page(page);
4485                                 put_page(page);
4486                         }
4487                         /* Fallback to the readpage in error/reconnect cases */
4488                         kref_put(&rdata->refcount, cifs_readdata_release);
4489                         break;
4490                 }
4491
4492                 kref_put(&rdata->refcount, cifs_readdata_release);
4493         }
4494
4495         /* Any pages that have been shown to fscache but didn't get added to
4496          * the pagecache must be uncached before they get returned to the
4497          * allocator.
4498          */
4499         cifs_fscache_readpages_cancel(mapping->host, page_list);
4500         free_xid(xid);
4501         return rc;
4502 }
4503
4504 /*
4505  * cifs_readpage_worker must be called with the page pinned
4506  */
4507 static int cifs_readpage_worker(struct file *file, struct page *page,
4508         loff_t *poffset)
4509 {
4510         char *read_data;
4511         int rc;
4512
4513         /* Is the page cached? */
4514         rc = cifs_readpage_from_fscache(file_inode(file), page);
4515         if (rc == 0)
4516                 goto read_complete;
4517
4518         read_data = kmap(page);
4519         /* for reads over a certain size could initiate async read ahead */
4520
4521         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4522
4523         if (rc < 0)
4524                 goto io_error;
4525         else
4526                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4527
4528         /* we do not want atime to be less than mtime, it broke some apps */
4529         file_inode(file)->i_atime = current_time(file_inode(file));
4530         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4531                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4532         else
4533                 file_inode(file)->i_atime = current_time(file_inode(file));
4534
4535         if (PAGE_SIZE > rc)
4536                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4537
4538         flush_dcache_page(page);
4539         SetPageUptodate(page);
4540
4541         /* send this page to the cache */
4542         cifs_readpage_to_fscache(file_inode(file), page);
4543
4544         rc = 0;
4545
4546 io_error:
4547         kunmap(page);
4548         unlock_page(page);
4549
4550 read_complete:
4551         return rc;
4552 }
4553
4554 static int cifs_readpage(struct file *file, struct page *page)
4555 {
4556         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4557         int rc = -EACCES;
4558         unsigned int xid;
4559
4560         xid = get_xid();
4561
4562         if (file->private_data == NULL) {
4563                 rc = -EBADF;
4564                 free_xid(xid);
4565                 return rc;
4566         }
4567
4568         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4569                  page, (int)offset, (int)offset);
4570
4571         rc = cifs_readpage_worker(file, page, &offset);
4572
4573         free_xid(xid);
4574         return rc;
4575 }
4576
4577 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4578 {
4579         struct cifsFileInfo *open_file;
4580
4581         spin_lock(&cifs_inode->open_file_lock);
4582         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4583                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4584                         spin_unlock(&cifs_inode->open_file_lock);
4585                         return 1;
4586                 }
4587         }
4588         spin_unlock(&cifs_inode->open_file_lock);
4589         return 0;
4590 }
4591
4592 /* We do not want to update the file size from server for inodes
4593    open for write - to avoid races with writepage extending
4594    the file - in the future we could consider allowing
4595    refreshing the inode only on increases in the file size
4596    but this is tricky to do without racing with writebehind
4597    page caching in the current Linux kernel design */
4598 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4599 {
4600         if (!cifsInode)
4601                 return true;
4602
4603         if (is_inode_writable(cifsInode)) {
4604                 /* This inode is open for write at least once */
4605                 struct cifs_sb_info *cifs_sb;
4606
4607                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4608                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4609                         /* since no page cache to corrupt on directio
4610                         we can change size safely */
4611                         return true;
4612                 }
4613
4614                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4615                         return true;
4616
4617                 return false;
4618         } else
4619                 return true;
4620 }
4621
4622 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4623                         loff_t pos, unsigned len, unsigned flags,
4624                         struct page **pagep, void **fsdata)
4625 {
4626         int oncethru = 0;
4627         pgoff_t index = pos >> PAGE_SHIFT;
4628         loff_t offset = pos & (PAGE_SIZE - 1);
4629         loff_t page_start = pos & PAGE_MASK;
4630         loff_t i_size;
4631         struct page *page;
4632         int rc = 0;
4633
4634         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4635
4636 start:
4637         page = grab_cache_page_write_begin(mapping, index, flags);
4638         if (!page) {
4639                 rc = -ENOMEM;
4640                 goto out;
4641         }
4642
4643         if (PageUptodate(page))
4644                 goto out;
4645
4646         /*
4647          * If we write a full page it will be up to date, no need to read from
4648          * the server. If the write is short, we'll end up doing a sync write
4649          * instead.
4650          */
4651         if (len == PAGE_SIZE)
4652                 goto out;
4653
4654         /*
4655          * optimize away the read when we have an oplock, and we're not
4656          * expecting to use any of the data we'd be reading in. That
4657          * is, when the page lies beyond the EOF, or straddles the EOF
4658          * and the write will cover all of the existing data.
4659          */
4660         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4661                 i_size = i_size_read(mapping->host);
4662                 if (page_start >= i_size ||
4663                     (offset == 0 && (pos + len) >= i_size)) {
4664                         zero_user_segments(page, 0, offset,
4665                                            offset + len,
4666                                            PAGE_SIZE);
4667                         /*
4668                          * PageChecked means that the parts of the page
4669                          * to which we're not writing are considered up
4670                          * to date. Once the data is copied to the
4671                          * page, it can be set uptodate.
4672                          */
4673                         SetPageChecked(page);
4674                         goto out;
4675                 }
4676         }
4677
4678         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4679                 /*
4680                  * might as well read a page, it is fast enough. If we get
4681                  * an error, we don't need to return it. cifs_write_end will
4682                  * do a sync write instead since PG_uptodate isn't set.
4683                  */
4684                 cifs_readpage_worker(file, page, &page_start);
4685                 put_page(page);
4686                 oncethru = 1;
4687                 goto start;
4688         } else {
4689                 /* we could try using another file handle if there is one -
4690                    but how would we lock it to prevent close of that handle
4691                    racing with this read? In any case
4692                    this will be written out by write_end so is fine */
4693         }
4694 out:
4695         *pagep = page;
4696         return rc;
4697 }
4698
4699 static int cifs_release_page(struct page *page, gfp_t gfp)
4700 {
4701         if (PagePrivate(page))
4702                 return 0;
4703
4704         return cifs_fscache_release_page(page, gfp);
4705 }
4706
4707 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4708                                  unsigned int length)
4709 {
4710         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4711
4712         if (offset == 0 && length == PAGE_SIZE)
4713                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4714 }
4715
4716 static int cifs_launder_page(struct page *page)
4717 {
4718         int rc = 0;
4719         loff_t range_start = page_offset(page);
4720         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4721         struct writeback_control wbc = {
4722                 .sync_mode = WB_SYNC_ALL,
4723                 .nr_to_write = 0,
4724                 .range_start = range_start,
4725                 .range_end = range_end,
4726         };
4727
4728         cifs_dbg(FYI, "Launder page: %p\n", page);
4729
4730         if (clear_page_dirty_for_io(page))
4731                 rc = cifs_writepage_locked(page, &wbc);
4732
4733         cifs_fscache_invalidate_page(page, page->mapping->host);
4734         return rc;
4735 }
4736
4737 void cifs_oplock_break(struct work_struct *work)
4738 {
4739         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4740                                                   oplock_break);
4741         struct inode *inode = d_inode(cfile->dentry);
4742         struct cifsInodeInfo *cinode = CIFS_I(inode);
4743         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4744         struct TCP_Server_Info *server = tcon->ses->server;
4745         int rc = 0;
4746         bool purge_cache = false;
4747
4748         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4749                         TASK_UNINTERRUPTIBLE);
4750
4751         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4752                                       cfile->oplock_epoch, &purge_cache);
4753
4754         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4755                                                 cifs_has_mand_locks(cinode)) {
4756                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4757                          inode);
4758                 cinode->oplock = 0;
4759         }
4760
4761         if (inode && S_ISREG(inode->i_mode)) {
4762                 if (CIFS_CACHE_READ(cinode))
4763                         break_lease(inode, O_RDONLY);
4764                 else
4765                         break_lease(inode, O_WRONLY);
4766                 rc = filemap_fdatawrite(inode->i_mapping);
4767                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4768                         rc = filemap_fdatawait(inode->i_mapping);
4769                         mapping_set_error(inode->i_mapping, rc);
4770                         cifs_zap_mapping(inode);
4771                 }
4772                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4773                 if (CIFS_CACHE_WRITE(cinode))
4774                         goto oplock_break_ack;
4775         }
4776
4777         rc = cifs_push_locks(cfile);
4778         if (rc)
4779                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4780
4781 oplock_break_ack:
4782         /*
4783          * releasing stale oplock after recent reconnect of smb session using
4784          * a now incorrect file handle is not a data integrity issue but do
4785          * not bother sending an oplock release if session to server still is
4786          * disconnected since oplock already released by the server
4787          */
4788         if (!cfile->oplock_break_cancelled) {
4789                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4790                                                              cinode);
4791                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4792         }
4793         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4794         cifs_done_oplock_break(cinode);
4795 }
4796
4797 /*
4798  * The presence of cifs_direct_io() in the address space ops vector
4799  * allowes open() O_DIRECT flags which would have failed otherwise.
4800  *
4801  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4802  * so this method should never be called.
4803  *
4804  * Direct IO is not yet supported in the cached mode. 
4805  */
4806 static ssize_t
4807 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4808 {
4809         /*
4810          * FIXME
4811          * Eventually need to support direct IO for non forcedirectio mounts
4812          */
4813         return -EINVAL;
4814 }
4815
4816 static int cifs_swap_activate(struct swap_info_struct *sis,
4817                               struct file *swap_file, sector_t *span)
4818 {
4819         struct cifsFileInfo *cfile = swap_file->private_data;
4820         struct inode *inode = swap_file->f_mapping->host;
4821         unsigned long blocks;
4822         long long isize;
4823
4824         cifs_dbg(FYI, "swap activate\n");
4825
4826         spin_lock(&inode->i_lock);
4827         blocks = inode->i_blocks;
4828         isize = inode->i_size;
4829         spin_unlock(&inode->i_lock);
4830         if (blocks*512 < isize) {
4831                 pr_warn("swap activate: swapfile has holes\n");
4832                 return -EINVAL;
4833         }
4834         *span = sis->pages;
4835
4836         pr_warn_once("Swap support over SMB3 is experimental\n");
4837
4838         /*
4839          * TODO: consider adding ACL (or documenting how) to prevent other
4840          * users (on this or other systems) from reading it
4841          */
4842
4843
4844         /* TODO: add sk_set_memalloc(inet) or similar */
4845
4846         if (cfile)
4847                 cfile->swapfile = true;
4848         /*
4849          * TODO: Since file already open, we can't open with DENY_ALL here
4850          * but we could add call to grab a byte range lock to prevent others
4851          * from reading or writing the file
4852          */
4853
4854         return 0;
4855 }
4856
4857 static void cifs_swap_deactivate(struct file *file)
4858 {
4859         struct cifsFileInfo *cfile = file->private_data;
4860
4861         cifs_dbg(FYI, "swap deactivate\n");
4862
4863         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4864
4865         if (cfile)
4866                 cfile->swapfile = false;
4867
4868         /* do we need to unpin (or unlock) the file */
4869 }
4870
4871 const struct address_space_operations cifs_addr_ops = {
4872         .readpage = cifs_readpage,
4873         .readpages = cifs_readpages,
4874         .writepage = cifs_writepage,
4875         .writepages = cifs_writepages,
4876         .write_begin = cifs_write_begin,
4877         .write_end = cifs_write_end,
4878         .set_page_dirty = __set_page_dirty_nobuffers,
4879         .releasepage = cifs_release_page,
4880         .direct_IO = cifs_direct_io,
4881         .invalidatepage = cifs_invalidate_page,
4882         .launder_page = cifs_launder_page,
4883         /*
4884          * TODO: investigate and if useful we could add an cifs_migratePage
4885          * helper (under an CONFIG_MIGRATION) in the future, and also
4886          * investigate and add an is_dirty_writeback helper if needed
4887          */
4888         .swap_activate = cifs_swap_activate,
4889         .swap_deactivate = cifs_swap_deactivate,
4890 };
4891
4892 /*
4893  * cifs_readpages requires the server to support a buffer large enough to
4894  * contain the header plus one complete page of data.  Otherwise, we need
4895  * to leave cifs_readpages out of the address space operations.
4896  */
4897 const struct address_space_operations cifs_addr_ops_smallbuf = {
4898         .readpage = cifs_readpage,
4899         .writepage = cifs_writepage,
4900         .writepages = cifs_writepages,
4901         .write_begin = cifs_write_begin,
4902         .write_end = cifs_write_end,
4903         .set_page_dirty = __set_page_dirty_nobuffers,
4904         .releasepage = cifs_release_page,
4905         .invalidatepage = cifs_invalidate_page,
4906         .launder_page = cifs_launder_page,
4907 };