Merge remote-tracking branch 'spi/for-5.14' into spi-next
[linux-2.6-microblaze.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47 #include "fs_context.h"
48 #include "cifs_ioctl.h"
49
50 static inline int cifs_convert_flags(unsigned int flags)
51 {
52         if ((flags & O_ACCMODE) == O_RDONLY)
53                 return GENERIC_READ;
54         else if ((flags & O_ACCMODE) == O_WRONLY)
55                 return GENERIC_WRITE;
56         else if ((flags & O_ACCMODE) == O_RDWR) {
57                 /* GENERIC_ALL is too much permission to request
58                    can cause unnecessary access denied on create */
59                 /* return GENERIC_ALL; */
60                 return (GENERIC_READ | GENERIC_WRITE);
61         }
62
63         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
64                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
65                 FILE_READ_DATA);
66 }
67
68 static u32 cifs_posix_convert_flags(unsigned int flags)
69 {
70         u32 posix_flags = 0;
71
72         if ((flags & O_ACCMODE) == O_RDONLY)
73                 posix_flags = SMB_O_RDONLY;
74         else if ((flags & O_ACCMODE) == O_WRONLY)
75                 posix_flags = SMB_O_WRONLY;
76         else if ((flags & O_ACCMODE) == O_RDWR)
77                 posix_flags = SMB_O_RDWR;
78
79         if (flags & O_CREAT) {
80                 posix_flags |= SMB_O_CREAT;
81                 if (flags & O_EXCL)
82                         posix_flags |= SMB_O_EXCL;
83         } else if (flags & O_EXCL)
84                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
85                          current->comm, current->tgid);
86
87         if (flags & O_TRUNC)
88                 posix_flags |= SMB_O_TRUNC;
89         /* be safe and imply O_SYNC for O_DSYNC */
90         if (flags & O_DSYNC)
91                 posix_flags |= SMB_O_SYNC;
92         if (flags & O_DIRECTORY)
93                 posix_flags |= SMB_O_DIRECTORY;
94         if (flags & O_NOFOLLOW)
95                 posix_flags |= SMB_O_NOFOLLOW;
96         if (flags & O_DIRECT)
97                 posix_flags |= SMB_O_DIRECT;
98
99         return posix_flags;
100 }
101
102 static inline int cifs_get_disposition(unsigned int flags)
103 {
104         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
105                 return FILE_CREATE;
106         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
107                 return FILE_OVERWRITE_IF;
108         else if ((flags & O_CREAT) == O_CREAT)
109                 return FILE_OPEN_IF;
110         else if ((flags & O_TRUNC) == O_TRUNC)
111                 return FILE_OVERWRITE;
112         else
113                 return FILE_OPEN;
114 }
115
116 int cifs_posix_open(const char *full_path, struct inode **pinode,
117                         struct super_block *sb, int mode, unsigned int f_flags,
118                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
119 {
120         int rc;
121         FILE_UNIX_BASIC_INFO *presp_data;
122         __u32 posix_flags = 0;
123         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
124         struct cifs_fattr fattr;
125         struct tcon_link *tlink;
126         struct cifs_tcon *tcon;
127
128         cifs_dbg(FYI, "posix open %s\n", full_path);
129
130         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
131         if (presp_data == NULL)
132                 return -ENOMEM;
133
134         tlink = cifs_sb_tlink(cifs_sb);
135         if (IS_ERR(tlink)) {
136                 rc = PTR_ERR(tlink);
137                 goto posix_open_ret;
138         }
139
140         tcon = tlink_tcon(tlink);
141         mode &= ~current_umask();
142
143         posix_flags = cifs_posix_convert_flags(f_flags);
144         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
145                              poplock, full_path, cifs_sb->local_nls,
146                              cifs_remap(cifs_sb));
147         cifs_put_tlink(tlink);
148
149         if (rc)
150                 goto posix_open_ret;
151
152         if (presp_data->Type == cpu_to_le32(-1))
153                 goto posix_open_ret; /* open ok, caller does qpathinfo */
154
155         if (!pinode)
156                 goto posix_open_ret; /* caller does not need info */
157
158         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
159
160         /* get new inode and set it up */
161         if (*pinode == NULL) {
162                 cifs_fill_uniqueid(sb, &fattr);
163                 *pinode = cifs_iget(sb, &fattr);
164                 if (!*pinode) {
165                         rc = -ENOMEM;
166                         goto posix_open_ret;
167                 }
168         } else {
169                 cifs_revalidate_mapping(*pinode);
170                 rc = cifs_fattr_to_inode(*pinode, &fattr);
171         }
172
173 posix_open_ret:
174         kfree(presp_data);
175         return rc;
176 }
177
178 static int
179 cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
180              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
181              struct cifs_fid *fid, unsigned int xid)
182 {
183         int rc;
184         int desired_access;
185         int disposition;
186         int create_options = CREATE_NOT_DIR;
187         FILE_ALL_INFO *buf;
188         struct TCP_Server_Info *server = tcon->ses->server;
189         struct cifs_open_parms oparms;
190
191         if (!server->ops->open)
192                 return -ENOSYS;
193
194         desired_access = cifs_convert_flags(f_flags);
195
196 /*********************************************************************
197  *  open flag mapping table:
198  *
199  *      POSIX Flag            CIFS Disposition
200  *      ----------            ----------------
201  *      O_CREAT               FILE_OPEN_IF
202  *      O_CREAT | O_EXCL      FILE_CREATE
203  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
204  *      O_TRUNC               FILE_OVERWRITE
205  *      none of the above     FILE_OPEN
206  *
207  *      Note that there is not a direct match between disposition
208  *      FILE_SUPERSEDE (ie create whether or not file exists although
209  *      O_CREAT | O_TRUNC is similar but truncates the existing
210  *      file rather than creating a new file as FILE_SUPERSEDE does
211  *      (which uses the attributes / metadata passed in on open call)
212  *?
213  *?  O_SYNC is a reasonable match to CIFS writethrough flag
214  *?  and the read write flags match reasonably.  O_LARGEFILE
215  *?  is irrelevant because largefile support is always used
216  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
217  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
218  *********************************************************************/
219
220         disposition = cifs_get_disposition(f_flags);
221
222         /* BB pass O_SYNC flag through on file attributes .. BB */
223
224         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
225         if (!buf)
226                 return -ENOMEM;
227
228         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
229         if (f_flags & O_SYNC)
230                 create_options |= CREATE_WRITE_THROUGH;
231
232         if (f_flags & O_DIRECT)
233                 create_options |= CREATE_NO_BUFFER;
234
235         oparms.tcon = tcon;
236         oparms.cifs_sb = cifs_sb;
237         oparms.desired_access = desired_access;
238         oparms.create_options = cifs_create_options(cifs_sb, create_options);
239         oparms.disposition = disposition;
240         oparms.path = full_path;
241         oparms.fid = fid;
242         oparms.reconnect = false;
243
244         rc = server->ops->open(xid, &oparms, oplock, buf);
245
246         if (rc)
247                 goto out;
248
249         /* TODO: Add support for calling posix query info but with passing in fid */
250         if (tcon->unix_ext)
251                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
252                                               xid);
253         else
254                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
255                                          xid, fid);
256
257         if (rc) {
258                 server->ops->close(xid, tcon, fid);
259                 if (rc == -ESTALE)
260                         rc = -EOPENSTALE;
261         }
262
263 out:
264         kfree(buf);
265         return rc;
266 }
267
268 static bool
269 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
270 {
271         struct cifs_fid_locks *cur;
272         bool has_locks = false;
273
274         down_read(&cinode->lock_sem);
275         list_for_each_entry(cur, &cinode->llist, llist) {
276                 if (!list_empty(&cur->locks)) {
277                         has_locks = true;
278                         break;
279                 }
280         }
281         up_read(&cinode->lock_sem);
282         return has_locks;
283 }
284
285 void
286 cifs_down_write(struct rw_semaphore *sem)
287 {
288         while (!down_write_trylock(sem))
289                 msleep(10);
290 }
291
292 static void cifsFileInfo_put_work(struct work_struct *work);
293
294 struct cifsFileInfo *
295 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
296                   struct tcon_link *tlink, __u32 oplock)
297 {
298         struct dentry *dentry = file_dentry(file);
299         struct inode *inode = d_inode(dentry);
300         struct cifsInodeInfo *cinode = CIFS_I(inode);
301         struct cifsFileInfo *cfile;
302         struct cifs_fid_locks *fdlocks;
303         struct cifs_tcon *tcon = tlink_tcon(tlink);
304         struct TCP_Server_Info *server = tcon->ses->server;
305
306         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
307         if (cfile == NULL)
308                 return cfile;
309
310         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
311         if (!fdlocks) {
312                 kfree(cfile);
313                 return NULL;
314         }
315
316         INIT_LIST_HEAD(&fdlocks->locks);
317         fdlocks->cfile = cfile;
318         cfile->llist = fdlocks;
319
320         cfile->count = 1;
321         cfile->pid = current->tgid;
322         cfile->uid = current_fsuid();
323         cfile->dentry = dget(dentry);
324         cfile->f_flags = file->f_flags;
325         cfile->invalidHandle = false;
326         cfile->deferred_close_scheduled = false;
327         cfile->tlink = cifs_get_tlink(tlink);
328         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
329         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
330         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
331         mutex_init(&cfile->fh_mutex);
332         spin_lock_init(&cfile->file_info_lock);
333
334         cifs_sb_active(inode->i_sb);
335
336         /*
337          * If the server returned a read oplock and we have mandatory brlocks,
338          * set oplock level to None.
339          */
340         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
341                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
342                 oplock = 0;
343         }
344
345         cifs_down_write(&cinode->lock_sem);
346         list_add(&fdlocks->llist, &cinode->llist);
347         up_write(&cinode->lock_sem);
348
349         spin_lock(&tcon->open_file_lock);
350         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
351                 oplock = fid->pending_open->oplock;
352         list_del(&fid->pending_open->olist);
353
354         fid->purge_cache = false;
355         server->ops->set_fid(cfile, fid, oplock);
356
357         list_add(&cfile->tlist, &tcon->openFileList);
358         atomic_inc(&tcon->num_local_opens);
359
360         /* if readable file instance put first in list*/
361         spin_lock(&cinode->open_file_lock);
362         if (file->f_mode & FMODE_READ)
363                 list_add(&cfile->flist, &cinode->openFileList);
364         else
365                 list_add_tail(&cfile->flist, &cinode->openFileList);
366         spin_unlock(&cinode->open_file_lock);
367         spin_unlock(&tcon->open_file_lock);
368
369         if (fid->purge_cache)
370                 cifs_zap_mapping(inode);
371
372         file->private_data = cfile;
373         return cfile;
374 }
375
376 struct cifsFileInfo *
377 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
378 {
379         spin_lock(&cifs_file->file_info_lock);
380         cifsFileInfo_get_locked(cifs_file);
381         spin_unlock(&cifs_file->file_info_lock);
382         return cifs_file;
383 }
384
385 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
386 {
387         struct inode *inode = d_inode(cifs_file->dentry);
388         struct cifsInodeInfo *cifsi = CIFS_I(inode);
389         struct cifsLockInfo *li, *tmp;
390         struct super_block *sb = inode->i_sb;
391
392         /*
393          * Delete any outstanding lock records. We'll lose them when the file
394          * is closed anyway.
395          */
396         cifs_down_write(&cifsi->lock_sem);
397         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
398                 list_del(&li->llist);
399                 cifs_del_lock_waiters(li);
400                 kfree(li);
401         }
402         list_del(&cifs_file->llist->llist);
403         kfree(cifs_file->llist);
404         up_write(&cifsi->lock_sem);
405
406         cifs_put_tlink(cifs_file->tlink);
407         dput(cifs_file->dentry);
408         cifs_sb_deactive(sb);
409         kfree(cifs_file);
410 }
411
412 static void cifsFileInfo_put_work(struct work_struct *work)
413 {
414         struct cifsFileInfo *cifs_file = container_of(work,
415                         struct cifsFileInfo, put);
416
417         cifsFileInfo_put_final(cifs_file);
418 }
419
420 /**
421  * cifsFileInfo_put - release a reference of file priv data
422  *
423  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
424  *
425  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
426  */
427 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
428 {
429         _cifsFileInfo_put(cifs_file, true, true);
430 }
431
432 /**
433  * _cifsFileInfo_put - release a reference of file priv data
434  *
435  * This may involve closing the filehandle @cifs_file out on the
436  * server. Must be called without holding tcon->open_file_lock,
437  * cinode->open_file_lock and cifs_file->file_info_lock.
438  *
439  * If @wait_for_oplock_handler is true and we are releasing the last
440  * reference, wait for any running oplock break handler of the file
441  * and cancel any pending one.
442  *
443  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
444  * @wait_oplock_handler: must be false if called from oplock_break_handler
445  * @offload:    not offloaded on close and oplock breaks
446  *
447  */
448 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
449                        bool wait_oplock_handler, bool offload)
450 {
451         struct inode *inode = d_inode(cifs_file->dentry);
452         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
453         struct TCP_Server_Info *server = tcon->ses->server;
454         struct cifsInodeInfo *cifsi = CIFS_I(inode);
455         struct super_block *sb = inode->i_sb;
456         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
457         struct cifs_fid fid;
458         struct cifs_pending_open open;
459         bool oplock_break_cancelled;
460
461         spin_lock(&tcon->open_file_lock);
462         spin_lock(&cifsi->open_file_lock);
463         spin_lock(&cifs_file->file_info_lock);
464         if (--cifs_file->count > 0) {
465                 spin_unlock(&cifs_file->file_info_lock);
466                 spin_unlock(&cifsi->open_file_lock);
467                 spin_unlock(&tcon->open_file_lock);
468                 return;
469         }
470         spin_unlock(&cifs_file->file_info_lock);
471
472         if (server->ops->get_lease_key)
473                 server->ops->get_lease_key(inode, &fid);
474
475         /* store open in pending opens to make sure we don't miss lease break */
476         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
477
478         /* remove it from the lists */
479         list_del(&cifs_file->flist);
480         list_del(&cifs_file->tlist);
481         atomic_dec(&tcon->num_local_opens);
482
483         if (list_empty(&cifsi->openFileList)) {
484                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
485                          d_inode(cifs_file->dentry));
486                 /*
487                  * In strict cache mode we need invalidate mapping on the last
488                  * close  because it may cause a error when we open this file
489                  * again and get at least level II oplock.
490                  */
491                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
492                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
493                 cifs_set_oplock_level(cifsi, 0);
494         }
495
496         spin_unlock(&cifsi->open_file_lock);
497         spin_unlock(&tcon->open_file_lock);
498
499         oplock_break_cancelled = wait_oplock_handler ?
500                 cancel_work_sync(&cifs_file->oplock_break) : false;
501
502         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
503                 struct TCP_Server_Info *server = tcon->ses->server;
504                 unsigned int xid;
505
506                 xid = get_xid();
507                 if (server->ops->close_getattr)
508                         server->ops->close_getattr(xid, tcon, cifs_file);
509                 else if (server->ops->close)
510                         server->ops->close(xid, tcon, &cifs_file->fid);
511                 _free_xid(xid);
512         }
513
514         if (oplock_break_cancelled)
515                 cifs_done_oplock_break(cifsi);
516
517         cifs_del_pending_open(&open);
518
519         if (offload)
520                 queue_work(fileinfo_put_wq, &cifs_file->put);
521         else
522                 cifsFileInfo_put_final(cifs_file);
523 }
524
525 int cifs_open(struct inode *inode, struct file *file)
526
527 {
528         int rc = -EACCES;
529         unsigned int xid;
530         __u32 oplock;
531         struct cifs_sb_info *cifs_sb;
532         struct TCP_Server_Info *server;
533         struct cifs_tcon *tcon;
534         struct tcon_link *tlink;
535         struct cifsFileInfo *cfile = NULL;
536         void *page;
537         const char *full_path;
538         bool posix_open_ok = false;
539         struct cifs_fid fid;
540         struct cifs_pending_open open;
541
542         xid = get_xid();
543
544         cifs_sb = CIFS_SB(inode->i_sb);
545         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
546                 free_xid(xid);
547                 return -EIO;
548         }
549
550         tlink = cifs_sb_tlink(cifs_sb);
551         if (IS_ERR(tlink)) {
552                 free_xid(xid);
553                 return PTR_ERR(tlink);
554         }
555         tcon = tlink_tcon(tlink);
556         server = tcon->ses->server;
557
558         page = alloc_dentry_path();
559         full_path = build_path_from_dentry(file_dentry(file), page);
560         if (IS_ERR(full_path)) {
561                 rc = PTR_ERR(full_path);
562                 goto out;
563         }
564
565         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
566                  inode, file->f_flags, full_path);
567
568         if (file->f_flags & O_DIRECT &&
569             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
570                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
571                         file->f_op = &cifs_file_direct_nobrl_ops;
572                 else
573                         file->f_op = &cifs_file_direct_ops;
574         }
575
576         /* Get the cached handle as SMB2 close is deferred */
577         rc = cifs_get_readable_path(tcon, full_path, &cfile);
578         if (rc == 0) {
579                 if (file->f_flags == cfile->f_flags) {
580                         file->private_data = cfile;
581                         spin_lock(&CIFS_I(inode)->deferred_lock);
582                         cifs_del_deferred_close(cfile);
583                         spin_unlock(&CIFS_I(inode)->deferred_lock);
584                         goto out;
585                 } else {
586                         _cifsFileInfo_put(cfile, true, false);
587                 }
588         }
589
590         if (server->oplocks)
591                 oplock = REQ_OPLOCK;
592         else
593                 oplock = 0;
594
595         if (!tcon->broken_posix_open && tcon->unix_ext &&
596             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
597                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
598                 /* can not refresh inode info since size could be stale */
599                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
600                                 cifs_sb->ctx->file_mode /* ignored */,
601                                 file->f_flags, &oplock, &fid.netfid, xid);
602                 if (rc == 0) {
603                         cifs_dbg(FYI, "posix open succeeded\n");
604                         posix_open_ok = true;
605                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
606                         if (tcon->ses->serverNOS)
607                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
608                                          tcon->ses->ip_addr,
609                                          tcon->ses->serverNOS);
610                         tcon->broken_posix_open = true;
611                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
612                          (rc != -EOPNOTSUPP)) /* path not found or net err */
613                         goto out;
614                 /*
615                  * Else fallthrough to retry open the old way on network i/o
616                  * or DFS errors.
617                  */
618         }
619
620         if (server->ops->get_lease_key)
621                 server->ops->get_lease_key(inode, &fid);
622
623         cifs_add_pending_open(&fid, tlink, &open);
624
625         if (!posix_open_ok) {
626                 if (server->ops->get_lease_key)
627                         server->ops->get_lease_key(inode, &fid);
628
629                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
630                                   file->f_flags, &oplock, &fid, xid);
631                 if (rc) {
632                         cifs_del_pending_open(&open);
633                         goto out;
634                 }
635         }
636
637         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
638         if (cfile == NULL) {
639                 if (server->ops->close)
640                         server->ops->close(xid, tcon, &fid);
641                 cifs_del_pending_open(&open);
642                 rc = -ENOMEM;
643                 goto out;
644         }
645
646         cifs_fscache_set_inode_cookie(inode, file);
647
648         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
649                 /*
650                  * Time to set mode which we can not set earlier due to
651                  * problems creating new read-only files.
652                  */
653                 struct cifs_unix_set_info_args args = {
654                         .mode   = inode->i_mode,
655                         .uid    = INVALID_UID, /* no change */
656                         .gid    = INVALID_GID, /* no change */
657                         .ctime  = NO_CHANGE_64,
658                         .atime  = NO_CHANGE_64,
659                         .mtime  = NO_CHANGE_64,
660                         .device = 0,
661                 };
662                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
663                                        cfile->pid);
664         }
665
666 out:
667         free_dentry_path(page);
668         free_xid(xid);
669         cifs_put_tlink(tlink);
670         return rc;
671 }
672
673 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
674
675 /*
676  * Try to reacquire byte range locks that were released when session
677  * to server was lost.
678  */
679 static int
680 cifs_relock_file(struct cifsFileInfo *cfile)
681 {
682         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
683         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
684         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
685         int rc = 0;
686
687         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
688         if (cinode->can_cache_brlcks) {
689                 /* can cache locks - no need to relock */
690                 up_read(&cinode->lock_sem);
691                 return rc;
692         }
693
694         if (cap_unix(tcon->ses) &&
695             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
696             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
697                 rc = cifs_push_posix_locks(cfile);
698         else
699                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
700
701         up_read(&cinode->lock_sem);
702         return rc;
703 }
704
705 static int
706 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
707 {
708         int rc = -EACCES;
709         unsigned int xid;
710         __u32 oplock;
711         struct cifs_sb_info *cifs_sb;
712         struct cifs_tcon *tcon;
713         struct TCP_Server_Info *server;
714         struct cifsInodeInfo *cinode;
715         struct inode *inode;
716         void *page;
717         const char *full_path;
718         int desired_access;
719         int disposition = FILE_OPEN;
720         int create_options = CREATE_NOT_DIR;
721         struct cifs_open_parms oparms;
722
723         xid = get_xid();
724         mutex_lock(&cfile->fh_mutex);
725         if (!cfile->invalidHandle) {
726                 mutex_unlock(&cfile->fh_mutex);
727                 free_xid(xid);
728                 return 0;
729         }
730
731         inode = d_inode(cfile->dentry);
732         cifs_sb = CIFS_SB(inode->i_sb);
733         tcon = tlink_tcon(cfile->tlink);
734         server = tcon->ses->server;
735
736         /*
737          * Can not grab rename sem here because various ops, including those
738          * that already have the rename sem can end up causing writepage to get
739          * called and if the server was down that means we end up here, and we
740          * can never tell if the caller already has the rename_sem.
741          */
742         page = alloc_dentry_path();
743         full_path = build_path_from_dentry(cfile->dentry, page);
744         if (IS_ERR(full_path)) {
745                 mutex_unlock(&cfile->fh_mutex);
746                 free_dentry_path(page);
747                 free_xid(xid);
748                 return PTR_ERR(full_path);
749         }
750
751         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
752                  inode, cfile->f_flags, full_path);
753
754         if (tcon->ses->server->oplocks)
755                 oplock = REQ_OPLOCK;
756         else
757                 oplock = 0;
758
759         if (tcon->unix_ext && cap_unix(tcon->ses) &&
760             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
761                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
762                 /*
763                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
764                  * original open. Must mask them off for a reopen.
765                  */
766                 unsigned int oflags = cfile->f_flags &
767                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
768
769                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
770                                      cifs_sb->ctx->file_mode /* ignored */,
771                                      oflags, &oplock, &cfile->fid.netfid, xid);
772                 if (rc == 0) {
773                         cifs_dbg(FYI, "posix reopen succeeded\n");
774                         oparms.reconnect = true;
775                         goto reopen_success;
776                 }
777                 /*
778                  * fallthrough to retry open the old way on errors, especially
779                  * in the reconnect path it is important to retry hard
780                  */
781         }
782
783         desired_access = cifs_convert_flags(cfile->f_flags);
784
785         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
786         if (cfile->f_flags & O_SYNC)
787                 create_options |= CREATE_WRITE_THROUGH;
788
789         if (cfile->f_flags & O_DIRECT)
790                 create_options |= CREATE_NO_BUFFER;
791
792         if (server->ops->get_lease_key)
793                 server->ops->get_lease_key(inode, &cfile->fid);
794
795         oparms.tcon = tcon;
796         oparms.cifs_sb = cifs_sb;
797         oparms.desired_access = desired_access;
798         oparms.create_options = cifs_create_options(cifs_sb, create_options);
799         oparms.disposition = disposition;
800         oparms.path = full_path;
801         oparms.fid = &cfile->fid;
802         oparms.reconnect = true;
803
804         /*
805          * Can not refresh inode by passing in file_info buf to be returned by
806          * ops->open and then calling get_inode_info with returned buf since
807          * file might have write behind data that needs to be flushed and server
808          * version of file size can be stale. If we knew for sure that inode was
809          * not dirty locally we could do this.
810          */
811         rc = server->ops->open(xid, &oparms, &oplock, NULL);
812         if (rc == -ENOENT && oparms.reconnect == false) {
813                 /* durable handle timeout is expired - open the file again */
814                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
815                 /* indicate that we need to relock the file */
816                 oparms.reconnect = true;
817         }
818
819         if (rc) {
820                 mutex_unlock(&cfile->fh_mutex);
821                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
822                 cifs_dbg(FYI, "oplock: %d\n", oplock);
823                 goto reopen_error_exit;
824         }
825
826 reopen_success:
827         cfile->invalidHandle = false;
828         mutex_unlock(&cfile->fh_mutex);
829         cinode = CIFS_I(inode);
830
831         if (can_flush) {
832                 rc = filemap_write_and_wait(inode->i_mapping);
833                 if (!is_interrupt_error(rc))
834                         mapping_set_error(inode->i_mapping, rc);
835
836                 if (tcon->posix_extensions)
837                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
838                 else if (tcon->unix_ext)
839                         rc = cifs_get_inode_info_unix(&inode, full_path,
840                                                       inode->i_sb, xid);
841                 else
842                         rc = cifs_get_inode_info(&inode, full_path, NULL,
843                                                  inode->i_sb, xid, NULL);
844         }
845         /*
846          * Else we are writing out data to server already and could deadlock if
847          * we tried to flush data, and since we do not know if we have data that
848          * would invalidate the current end of file on the server we can not go
849          * to the server to get the new inode info.
850          */
851
852         /*
853          * If the server returned a read oplock and we have mandatory brlocks,
854          * set oplock level to None.
855          */
856         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
857                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
858                 oplock = 0;
859         }
860
861         server->ops->set_fid(cfile, &cfile->fid, oplock);
862         if (oparms.reconnect)
863                 cifs_relock_file(cfile);
864
865 reopen_error_exit:
866         free_dentry_path(page);
867         free_xid(xid);
868         return rc;
869 }
870
871 void smb2_deferred_work_close(struct work_struct *work)
872 {
873         struct cifsFileInfo *cfile = container_of(work,
874                         struct cifsFileInfo, deferred.work);
875
876         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
877         cifs_del_deferred_close(cfile);
878         cfile->deferred_close_scheduled = false;
879         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
880         _cifsFileInfo_put(cfile, true, false);
881 }
882
883 int cifs_close(struct inode *inode, struct file *file)
884 {
885         struct cifsFileInfo *cfile;
886         struct cifsInodeInfo *cinode = CIFS_I(inode);
887         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
888         struct cifs_deferred_close *dclose;
889
890         if (file->private_data != NULL) {
891                 cfile = file->private_data;
892                 file->private_data = NULL;
893                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
894                 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
895                     cinode->lease_granted &&
896                     dclose) {
897                         if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags))
898                                 inode->i_ctime = inode->i_mtime = current_time(inode);
899                         spin_lock(&cinode->deferred_lock);
900                         cifs_add_deferred_close(cfile, dclose);
901                         if (cfile->deferred_close_scheduled &&
902                             delayed_work_pending(&cfile->deferred)) {
903                                 /*
904                                  * If there is no pending work, mod_delayed_work queues new work.
905                                  * So, Increase the ref count to avoid use-after-free.
906                                  */
907                                 if (!mod_delayed_work(deferredclose_wq,
908                                                 &cfile->deferred, cifs_sb->ctx->acregmax))
909                                         cifsFileInfo_get(cfile);
910                         } else {
911                                 /* Deferred close for files */
912                                 queue_delayed_work(deferredclose_wq,
913                                                 &cfile->deferred, cifs_sb->ctx->acregmax);
914                                 cfile->deferred_close_scheduled = true;
915                                 spin_unlock(&cinode->deferred_lock);
916                                 return 0;
917                         }
918                         spin_unlock(&cinode->deferred_lock);
919                         _cifsFileInfo_put(cfile, true, false);
920                 } else {
921                         _cifsFileInfo_put(cfile, true, false);
922                         kfree(dclose);
923                 }
924         }
925
926         /* return code from the ->release op is always ignored */
927         return 0;
928 }
929
930 void
931 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
932 {
933         struct cifsFileInfo *open_file;
934         struct list_head *tmp;
935         struct list_head *tmp1;
936         struct list_head tmp_list;
937
938         if (!tcon->use_persistent || !tcon->need_reopen_files)
939                 return;
940
941         tcon->need_reopen_files = false;
942
943         cifs_dbg(FYI, "Reopen persistent handles\n");
944         INIT_LIST_HEAD(&tmp_list);
945
946         /* list all files open on tree connection, reopen resilient handles  */
947         spin_lock(&tcon->open_file_lock);
948         list_for_each(tmp, &tcon->openFileList) {
949                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
950                 if (!open_file->invalidHandle)
951                         continue;
952                 cifsFileInfo_get(open_file);
953                 list_add_tail(&open_file->rlist, &tmp_list);
954         }
955         spin_unlock(&tcon->open_file_lock);
956
957         list_for_each_safe(tmp, tmp1, &tmp_list) {
958                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
959                 if (cifs_reopen_file(open_file, false /* do not flush */))
960                         tcon->need_reopen_files = true;
961                 list_del_init(&open_file->rlist);
962                 cifsFileInfo_put(open_file);
963         }
964 }
965
966 int cifs_closedir(struct inode *inode, struct file *file)
967 {
968         int rc = 0;
969         unsigned int xid;
970         struct cifsFileInfo *cfile = file->private_data;
971         struct cifs_tcon *tcon;
972         struct TCP_Server_Info *server;
973         char *buf;
974
975         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
976
977         if (cfile == NULL)
978                 return rc;
979
980         xid = get_xid();
981         tcon = tlink_tcon(cfile->tlink);
982         server = tcon->ses->server;
983
984         cifs_dbg(FYI, "Freeing private data in close dir\n");
985         spin_lock(&cfile->file_info_lock);
986         if (server->ops->dir_needs_close(cfile)) {
987                 cfile->invalidHandle = true;
988                 spin_unlock(&cfile->file_info_lock);
989                 if (server->ops->close_dir)
990                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
991                 else
992                         rc = -ENOSYS;
993                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
994                 /* not much we can do if it fails anyway, ignore rc */
995                 rc = 0;
996         } else
997                 spin_unlock(&cfile->file_info_lock);
998
999         buf = cfile->srch_inf.ntwrk_buf_start;
1000         if (buf) {
1001                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1002                 cfile->srch_inf.ntwrk_buf_start = NULL;
1003                 if (cfile->srch_inf.smallBuf)
1004                         cifs_small_buf_release(buf);
1005                 else
1006                         cifs_buf_release(buf);
1007         }
1008
1009         cifs_put_tlink(cfile->tlink);
1010         kfree(file->private_data);
1011         file->private_data = NULL;
1012         /* BB can we lock the filestruct while this is going on? */
1013         free_xid(xid);
1014         return rc;
1015 }
1016
1017 static struct cifsLockInfo *
1018 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1019 {
1020         struct cifsLockInfo *lock =
1021                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1022         if (!lock)
1023                 return lock;
1024         lock->offset = offset;
1025         lock->length = length;
1026         lock->type = type;
1027         lock->pid = current->tgid;
1028         lock->flags = flags;
1029         INIT_LIST_HEAD(&lock->blist);
1030         init_waitqueue_head(&lock->block_q);
1031         return lock;
1032 }
1033
1034 void
1035 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1036 {
1037         struct cifsLockInfo *li, *tmp;
1038         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1039                 list_del_init(&li->blist);
1040                 wake_up(&li->block_q);
1041         }
1042 }
1043
1044 #define CIFS_LOCK_OP    0
1045 #define CIFS_READ_OP    1
1046 #define CIFS_WRITE_OP   2
1047
1048 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1049 static bool
1050 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1051                             __u64 length, __u8 type, __u16 flags,
1052                             struct cifsFileInfo *cfile,
1053                             struct cifsLockInfo **conf_lock, int rw_check)
1054 {
1055         struct cifsLockInfo *li;
1056         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1057         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1058
1059         list_for_each_entry(li, &fdlocks->locks, llist) {
1060                 if (offset + length <= li->offset ||
1061                     offset >= li->offset + li->length)
1062                         continue;
1063                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1064                     server->ops->compare_fids(cfile, cur_cfile)) {
1065                         /* shared lock prevents write op through the same fid */
1066                         if (!(li->type & server->vals->shared_lock_type) ||
1067                             rw_check != CIFS_WRITE_OP)
1068                                 continue;
1069                 }
1070                 if ((type & server->vals->shared_lock_type) &&
1071                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1072                      current->tgid == li->pid) || type == li->type))
1073                         continue;
1074                 if (rw_check == CIFS_LOCK_OP &&
1075                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1076                     server->ops->compare_fids(cfile, cur_cfile))
1077                         continue;
1078                 if (conf_lock)
1079                         *conf_lock = li;
1080                 return true;
1081         }
1082         return false;
1083 }
1084
1085 bool
1086 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1087                         __u8 type, __u16 flags,
1088                         struct cifsLockInfo **conf_lock, int rw_check)
1089 {
1090         bool rc = false;
1091         struct cifs_fid_locks *cur;
1092         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1093
1094         list_for_each_entry(cur, &cinode->llist, llist) {
1095                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1096                                                  flags, cfile, conf_lock,
1097                                                  rw_check);
1098                 if (rc)
1099                         break;
1100         }
1101
1102         return rc;
1103 }
1104
1105 /*
1106  * Check if there is another lock that prevents us to set the lock (mandatory
1107  * style). If such a lock exists, update the flock structure with its
1108  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1109  * or leave it the same if we can't. Returns 0 if we don't need to request to
1110  * the server or 1 otherwise.
1111  */
1112 static int
1113 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1114                __u8 type, struct file_lock *flock)
1115 {
1116         int rc = 0;
1117         struct cifsLockInfo *conf_lock;
1118         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1119         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1120         bool exist;
1121
1122         down_read(&cinode->lock_sem);
1123
1124         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1125                                         flock->fl_flags, &conf_lock,
1126                                         CIFS_LOCK_OP);
1127         if (exist) {
1128                 flock->fl_start = conf_lock->offset;
1129                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1130                 flock->fl_pid = conf_lock->pid;
1131                 if (conf_lock->type & server->vals->shared_lock_type)
1132                         flock->fl_type = F_RDLCK;
1133                 else
1134                         flock->fl_type = F_WRLCK;
1135         } else if (!cinode->can_cache_brlcks)
1136                 rc = 1;
1137         else
1138                 flock->fl_type = F_UNLCK;
1139
1140         up_read(&cinode->lock_sem);
1141         return rc;
1142 }
1143
1144 static void
1145 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1146 {
1147         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1148         cifs_down_write(&cinode->lock_sem);
1149         list_add_tail(&lock->llist, &cfile->llist->locks);
1150         up_write(&cinode->lock_sem);
1151 }
1152
1153 /*
1154  * Set the byte-range lock (mandatory style). Returns:
1155  * 1) 0, if we set the lock and don't need to request to the server;
1156  * 2) 1, if no locks prevent us but we need to request to the server;
1157  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1158  */
1159 static int
1160 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1161                  bool wait)
1162 {
1163         struct cifsLockInfo *conf_lock;
1164         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1165         bool exist;
1166         int rc = 0;
1167
1168 try_again:
1169         exist = false;
1170         cifs_down_write(&cinode->lock_sem);
1171
1172         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1173                                         lock->type, lock->flags, &conf_lock,
1174                                         CIFS_LOCK_OP);
1175         if (!exist && cinode->can_cache_brlcks) {
1176                 list_add_tail(&lock->llist, &cfile->llist->locks);
1177                 up_write(&cinode->lock_sem);
1178                 return rc;
1179         }
1180
1181         if (!exist)
1182                 rc = 1;
1183         else if (!wait)
1184                 rc = -EACCES;
1185         else {
1186                 list_add_tail(&lock->blist, &conf_lock->blist);
1187                 up_write(&cinode->lock_sem);
1188                 rc = wait_event_interruptible(lock->block_q,
1189                                         (lock->blist.prev == &lock->blist) &&
1190                                         (lock->blist.next == &lock->blist));
1191                 if (!rc)
1192                         goto try_again;
1193                 cifs_down_write(&cinode->lock_sem);
1194                 list_del_init(&lock->blist);
1195         }
1196
1197         up_write(&cinode->lock_sem);
1198         return rc;
1199 }
1200
1201 /*
1202  * Check if there is another lock that prevents us to set the lock (posix
1203  * style). If such a lock exists, update the flock structure with its
1204  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1205  * or leave it the same if we can't. Returns 0 if we don't need to request to
1206  * the server or 1 otherwise.
1207  */
1208 static int
1209 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1210 {
1211         int rc = 0;
1212         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1213         unsigned char saved_type = flock->fl_type;
1214
1215         if ((flock->fl_flags & FL_POSIX) == 0)
1216                 return 1;
1217
1218         down_read(&cinode->lock_sem);
1219         posix_test_lock(file, flock);
1220
1221         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1222                 flock->fl_type = saved_type;
1223                 rc = 1;
1224         }
1225
1226         up_read(&cinode->lock_sem);
1227         return rc;
1228 }
1229
1230 /*
1231  * Set the byte-range lock (posix style). Returns:
1232  * 1) <0, if the error occurs while setting the lock;
1233  * 2) 0, if we set the lock and don't need to request to the server;
1234  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1235  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1236  */
1237 static int
1238 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1239 {
1240         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1241         int rc = FILE_LOCK_DEFERRED + 1;
1242
1243         if ((flock->fl_flags & FL_POSIX) == 0)
1244                 return rc;
1245
1246         cifs_down_write(&cinode->lock_sem);
1247         if (!cinode->can_cache_brlcks) {
1248                 up_write(&cinode->lock_sem);
1249                 return rc;
1250         }
1251
1252         rc = posix_lock_file(file, flock, NULL);
1253         up_write(&cinode->lock_sem);
1254         return rc;
1255 }
1256
1257 int
1258 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1259 {
1260         unsigned int xid;
1261         int rc = 0, stored_rc;
1262         struct cifsLockInfo *li, *tmp;
1263         struct cifs_tcon *tcon;
1264         unsigned int num, max_num, max_buf;
1265         LOCKING_ANDX_RANGE *buf, *cur;
1266         static const int types[] = {
1267                 LOCKING_ANDX_LARGE_FILES,
1268                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1269         };
1270         int i;
1271
1272         xid = get_xid();
1273         tcon = tlink_tcon(cfile->tlink);
1274
1275         /*
1276          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1277          * and check it before using.
1278          */
1279         max_buf = tcon->ses->server->maxBuf;
1280         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1281                 free_xid(xid);
1282                 return -EINVAL;
1283         }
1284
1285         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1286                      PAGE_SIZE);
1287         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1288                         PAGE_SIZE);
1289         max_num = (max_buf - sizeof(struct smb_hdr)) /
1290                                                 sizeof(LOCKING_ANDX_RANGE);
1291         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1292         if (!buf) {
1293                 free_xid(xid);
1294                 return -ENOMEM;
1295         }
1296
1297         for (i = 0; i < 2; i++) {
1298                 cur = buf;
1299                 num = 0;
1300                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1301                         if (li->type != types[i])
1302                                 continue;
1303                         cur->Pid = cpu_to_le16(li->pid);
1304                         cur->LengthLow = cpu_to_le32((u32)li->length);
1305                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1306                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1307                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1308                         if (++num == max_num) {
1309                                 stored_rc = cifs_lockv(xid, tcon,
1310                                                        cfile->fid.netfid,
1311                                                        (__u8)li->type, 0, num,
1312                                                        buf);
1313                                 if (stored_rc)
1314                                         rc = stored_rc;
1315                                 cur = buf;
1316                                 num = 0;
1317                         } else
1318                                 cur++;
1319                 }
1320
1321                 if (num) {
1322                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1323                                                (__u8)types[i], 0, num, buf);
1324                         if (stored_rc)
1325                                 rc = stored_rc;
1326                 }
1327         }
1328
1329         kfree(buf);
1330         free_xid(xid);
1331         return rc;
1332 }
1333
1334 static __u32
1335 hash_lockowner(fl_owner_t owner)
1336 {
1337         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1338 }
1339
1340 struct lock_to_push {
1341         struct list_head llist;
1342         __u64 offset;
1343         __u64 length;
1344         __u32 pid;
1345         __u16 netfid;
1346         __u8 type;
1347 };
1348
1349 static int
1350 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1351 {
1352         struct inode *inode = d_inode(cfile->dentry);
1353         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1354         struct file_lock *flock;
1355         struct file_lock_context *flctx = inode->i_flctx;
1356         unsigned int count = 0, i;
1357         int rc = 0, xid, type;
1358         struct list_head locks_to_send, *el;
1359         struct lock_to_push *lck, *tmp;
1360         __u64 length;
1361
1362         xid = get_xid();
1363
1364         if (!flctx)
1365                 goto out;
1366
1367         spin_lock(&flctx->flc_lock);
1368         list_for_each(el, &flctx->flc_posix) {
1369                 count++;
1370         }
1371         spin_unlock(&flctx->flc_lock);
1372
1373         INIT_LIST_HEAD(&locks_to_send);
1374
1375         /*
1376          * Allocating count locks is enough because no FL_POSIX locks can be
1377          * added to the list while we are holding cinode->lock_sem that
1378          * protects locking operations of this inode.
1379          */
1380         for (i = 0; i < count; i++) {
1381                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1382                 if (!lck) {
1383                         rc = -ENOMEM;
1384                         goto err_out;
1385                 }
1386                 list_add_tail(&lck->llist, &locks_to_send);
1387         }
1388
1389         el = locks_to_send.next;
1390         spin_lock(&flctx->flc_lock);
1391         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1392                 if (el == &locks_to_send) {
1393                         /*
1394                          * The list ended. We don't have enough allocated
1395                          * structures - something is really wrong.
1396                          */
1397                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1398                         break;
1399                 }
1400                 length = 1 + flock->fl_end - flock->fl_start;
1401                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1402                         type = CIFS_RDLCK;
1403                 else
1404                         type = CIFS_WRLCK;
1405                 lck = list_entry(el, struct lock_to_push, llist);
1406                 lck->pid = hash_lockowner(flock->fl_owner);
1407                 lck->netfid = cfile->fid.netfid;
1408                 lck->length = length;
1409                 lck->type = type;
1410                 lck->offset = flock->fl_start;
1411         }
1412         spin_unlock(&flctx->flc_lock);
1413
1414         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1415                 int stored_rc;
1416
1417                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1418                                              lck->offset, lck->length, NULL,
1419                                              lck->type, 0);
1420                 if (stored_rc)
1421                         rc = stored_rc;
1422                 list_del(&lck->llist);
1423                 kfree(lck);
1424         }
1425
1426 out:
1427         free_xid(xid);
1428         return rc;
1429 err_out:
1430         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1431                 list_del(&lck->llist);
1432                 kfree(lck);
1433         }
1434         goto out;
1435 }
1436
1437 static int
1438 cifs_push_locks(struct cifsFileInfo *cfile)
1439 {
1440         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1441         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1442         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1443         int rc = 0;
1444
1445         /* we are going to update can_cache_brlcks here - need a write access */
1446         cifs_down_write(&cinode->lock_sem);
1447         if (!cinode->can_cache_brlcks) {
1448                 up_write(&cinode->lock_sem);
1449                 return rc;
1450         }
1451
1452         if (cap_unix(tcon->ses) &&
1453             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1454             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1455                 rc = cifs_push_posix_locks(cfile);
1456         else
1457                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1458
1459         cinode->can_cache_brlcks = false;
1460         up_write(&cinode->lock_sem);
1461         return rc;
1462 }
1463
1464 static void
1465 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1466                 bool *wait_flag, struct TCP_Server_Info *server)
1467 {
1468         if (flock->fl_flags & FL_POSIX)
1469                 cifs_dbg(FYI, "Posix\n");
1470         if (flock->fl_flags & FL_FLOCK)
1471                 cifs_dbg(FYI, "Flock\n");
1472         if (flock->fl_flags & FL_SLEEP) {
1473                 cifs_dbg(FYI, "Blocking lock\n");
1474                 *wait_flag = true;
1475         }
1476         if (flock->fl_flags & FL_ACCESS)
1477                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1478         if (flock->fl_flags & FL_LEASE)
1479                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1480         if (flock->fl_flags &
1481             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1482                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1483                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1484
1485         *type = server->vals->large_lock_type;
1486         if (flock->fl_type == F_WRLCK) {
1487                 cifs_dbg(FYI, "F_WRLCK\n");
1488                 *type |= server->vals->exclusive_lock_type;
1489                 *lock = 1;
1490         } else if (flock->fl_type == F_UNLCK) {
1491                 cifs_dbg(FYI, "F_UNLCK\n");
1492                 *type |= server->vals->unlock_lock_type;
1493                 *unlock = 1;
1494                 /* Check if unlock includes more than one lock range */
1495         } else if (flock->fl_type == F_RDLCK) {
1496                 cifs_dbg(FYI, "F_RDLCK\n");
1497                 *type |= server->vals->shared_lock_type;
1498                 *lock = 1;
1499         } else if (flock->fl_type == F_EXLCK) {
1500                 cifs_dbg(FYI, "F_EXLCK\n");
1501                 *type |= server->vals->exclusive_lock_type;
1502                 *lock = 1;
1503         } else if (flock->fl_type == F_SHLCK) {
1504                 cifs_dbg(FYI, "F_SHLCK\n");
1505                 *type |= server->vals->shared_lock_type;
1506                 *lock = 1;
1507         } else
1508                 cifs_dbg(FYI, "Unknown type of lock\n");
1509 }
1510
1511 static int
1512 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1513            bool wait_flag, bool posix_lck, unsigned int xid)
1514 {
1515         int rc = 0;
1516         __u64 length = 1 + flock->fl_end - flock->fl_start;
1517         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1518         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1519         struct TCP_Server_Info *server = tcon->ses->server;
1520         __u16 netfid = cfile->fid.netfid;
1521
1522         if (posix_lck) {
1523                 int posix_lock_type;
1524
1525                 rc = cifs_posix_lock_test(file, flock);
1526                 if (!rc)
1527                         return rc;
1528
1529                 if (type & server->vals->shared_lock_type)
1530                         posix_lock_type = CIFS_RDLCK;
1531                 else
1532                         posix_lock_type = CIFS_WRLCK;
1533                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1534                                       hash_lockowner(flock->fl_owner),
1535                                       flock->fl_start, length, flock,
1536                                       posix_lock_type, wait_flag);
1537                 return rc;
1538         }
1539
1540         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1541         if (!rc)
1542                 return rc;
1543
1544         /* BB we could chain these into one lock request BB */
1545         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1546                                     1, 0, false);
1547         if (rc == 0) {
1548                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1549                                             type, 0, 1, false);
1550                 flock->fl_type = F_UNLCK;
1551                 if (rc != 0)
1552                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1553                                  rc);
1554                 return 0;
1555         }
1556
1557         if (type & server->vals->shared_lock_type) {
1558                 flock->fl_type = F_WRLCK;
1559                 return 0;
1560         }
1561
1562         type &= ~server->vals->exclusive_lock_type;
1563
1564         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1565                                     type | server->vals->shared_lock_type,
1566                                     1, 0, false);
1567         if (rc == 0) {
1568                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1569                         type | server->vals->shared_lock_type, 0, 1, false);
1570                 flock->fl_type = F_RDLCK;
1571                 if (rc != 0)
1572                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1573                                  rc);
1574         } else
1575                 flock->fl_type = F_WRLCK;
1576
1577         return 0;
1578 }
1579
1580 void
1581 cifs_move_llist(struct list_head *source, struct list_head *dest)
1582 {
1583         struct list_head *li, *tmp;
1584         list_for_each_safe(li, tmp, source)
1585                 list_move(li, dest);
1586 }
1587
1588 void
1589 cifs_free_llist(struct list_head *llist)
1590 {
1591         struct cifsLockInfo *li, *tmp;
1592         list_for_each_entry_safe(li, tmp, llist, llist) {
1593                 cifs_del_lock_waiters(li);
1594                 list_del(&li->llist);
1595                 kfree(li);
1596         }
1597 }
1598
1599 int
1600 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1601                   unsigned int xid)
1602 {
1603         int rc = 0, stored_rc;
1604         static const int types[] = {
1605                 LOCKING_ANDX_LARGE_FILES,
1606                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1607         };
1608         unsigned int i;
1609         unsigned int max_num, num, max_buf;
1610         LOCKING_ANDX_RANGE *buf, *cur;
1611         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1612         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1613         struct cifsLockInfo *li, *tmp;
1614         __u64 length = 1 + flock->fl_end - flock->fl_start;
1615         struct list_head tmp_llist;
1616
1617         INIT_LIST_HEAD(&tmp_llist);
1618
1619         /*
1620          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1621          * and check it before using.
1622          */
1623         max_buf = tcon->ses->server->maxBuf;
1624         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1625                 return -EINVAL;
1626
1627         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1628                      PAGE_SIZE);
1629         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1630                         PAGE_SIZE);
1631         max_num = (max_buf - sizeof(struct smb_hdr)) /
1632                                                 sizeof(LOCKING_ANDX_RANGE);
1633         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1634         if (!buf)
1635                 return -ENOMEM;
1636
1637         cifs_down_write(&cinode->lock_sem);
1638         for (i = 0; i < 2; i++) {
1639                 cur = buf;
1640                 num = 0;
1641                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1642                         if (flock->fl_start > li->offset ||
1643                             (flock->fl_start + length) <
1644                             (li->offset + li->length))
1645                                 continue;
1646                         if (current->tgid != li->pid)
1647                                 continue;
1648                         if (types[i] != li->type)
1649                                 continue;
1650                         if (cinode->can_cache_brlcks) {
1651                                 /*
1652                                  * We can cache brlock requests - simply remove
1653                                  * a lock from the file's list.
1654                                  */
1655                                 list_del(&li->llist);
1656                                 cifs_del_lock_waiters(li);
1657                                 kfree(li);
1658                                 continue;
1659                         }
1660                         cur->Pid = cpu_to_le16(li->pid);
1661                         cur->LengthLow = cpu_to_le32((u32)li->length);
1662                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1663                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1664                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1665                         /*
1666                          * We need to save a lock here to let us add it again to
1667                          * the file's list if the unlock range request fails on
1668                          * the server.
1669                          */
1670                         list_move(&li->llist, &tmp_llist);
1671                         if (++num == max_num) {
1672                                 stored_rc = cifs_lockv(xid, tcon,
1673                                                        cfile->fid.netfid,
1674                                                        li->type, num, 0, buf);
1675                                 if (stored_rc) {
1676                                         /*
1677                                          * We failed on the unlock range
1678                                          * request - add all locks from the tmp
1679                                          * list to the head of the file's list.
1680                                          */
1681                                         cifs_move_llist(&tmp_llist,
1682                                                         &cfile->llist->locks);
1683                                         rc = stored_rc;
1684                                 } else
1685                                         /*
1686                                          * The unlock range request succeed -
1687                                          * free the tmp list.
1688                                          */
1689                                         cifs_free_llist(&tmp_llist);
1690                                 cur = buf;
1691                                 num = 0;
1692                         } else
1693                                 cur++;
1694                 }
1695                 if (num) {
1696                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1697                                                types[i], num, 0, buf);
1698                         if (stored_rc) {
1699                                 cifs_move_llist(&tmp_llist,
1700                                                 &cfile->llist->locks);
1701                                 rc = stored_rc;
1702                         } else
1703                                 cifs_free_llist(&tmp_llist);
1704                 }
1705         }
1706
1707         up_write(&cinode->lock_sem);
1708         kfree(buf);
1709         return rc;
1710 }
1711
1712 static int
1713 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1714            bool wait_flag, bool posix_lck, int lock, int unlock,
1715            unsigned int xid)
1716 {
1717         int rc = 0;
1718         __u64 length = 1 + flock->fl_end - flock->fl_start;
1719         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1720         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1721         struct TCP_Server_Info *server = tcon->ses->server;
1722         struct inode *inode = d_inode(cfile->dentry);
1723
1724         if (posix_lck) {
1725                 int posix_lock_type;
1726
1727                 rc = cifs_posix_lock_set(file, flock);
1728                 if (rc <= FILE_LOCK_DEFERRED)
1729                         return rc;
1730
1731                 if (type & server->vals->shared_lock_type)
1732                         posix_lock_type = CIFS_RDLCK;
1733                 else
1734                         posix_lock_type = CIFS_WRLCK;
1735
1736                 if (unlock == 1)
1737                         posix_lock_type = CIFS_UNLCK;
1738
1739                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1740                                       hash_lockowner(flock->fl_owner),
1741                                       flock->fl_start, length,
1742                                       NULL, posix_lock_type, wait_flag);
1743                 goto out;
1744         }
1745
1746         if (lock) {
1747                 struct cifsLockInfo *lock;
1748
1749                 lock = cifs_lock_init(flock->fl_start, length, type,
1750                                       flock->fl_flags);
1751                 if (!lock)
1752                         return -ENOMEM;
1753
1754                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1755                 if (rc < 0) {
1756                         kfree(lock);
1757                         return rc;
1758                 }
1759                 if (!rc)
1760                         goto out;
1761
1762                 /*
1763                  * Windows 7 server can delay breaking lease from read to None
1764                  * if we set a byte-range lock on a file - break it explicitly
1765                  * before sending the lock to the server to be sure the next
1766                  * read won't conflict with non-overlapted locks due to
1767                  * pagereading.
1768                  */
1769                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1770                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1771                         cifs_zap_mapping(inode);
1772                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1773                                  inode);
1774                         CIFS_I(inode)->oplock = 0;
1775                 }
1776
1777                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1778                                             type, 1, 0, wait_flag);
1779                 if (rc) {
1780                         kfree(lock);
1781                         return rc;
1782                 }
1783
1784                 cifs_lock_add(cfile, lock);
1785         } else if (unlock)
1786                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1787
1788 out:
1789         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1790                 /*
1791                  * If this is a request to remove all locks because we
1792                  * are closing the file, it doesn't matter if the
1793                  * unlocking failed as both cifs.ko and the SMB server
1794                  * remove the lock on file close
1795                  */
1796                 if (rc) {
1797                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1798                         if (!(flock->fl_flags & FL_CLOSE))
1799                                 return rc;
1800                 }
1801                 rc = locks_lock_file_wait(file, flock);
1802         }
1803         return rc;
1804 }
1805
1806 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1807 {
1808         int rc, xid;
1809         int lock = 0, unlock = 0;
1810         bool wait_flag = false;
1811         bool posix_lck = false;
1812         struct cifs_sb_info *cifs_sb;
1813         struct cifs_tcon *tcon;
1814         struct cifsFileInfo *cfile;
1815         __u32 type;
1816
1817         rc = -EACCES;
1818         xid = get_xid();
1819
1820         if (!(fl->fl_flags & FL_FLOCK))
1821                 return -ENOLCK;
1822
1823         cfile = (struct cifsFileInfo *)file->private_data;
1824         tcon = tlink_tcon(cfile->tlink);
1825
1826         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1827                         tcon->ses->server);
1828         cifs_sb = CIFS_FILE_SB(file);
1829
1830         if (cap_unix(tcon->ses) &&
1831             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1832             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1833                 posix_lck = true;
1834
1835         if (!lock && !unlock) {
1836                 /*
1837                  * if no lock or unlock then nothing to do since we do not
1838                  * know what it is
1839                  */
1840                 free_xid(xid);
1841                 return -EOPNOTSUPP;
1842         }
1843
1844         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1845                         xid);
1846         free_xid(xid);
1847         return rc;
1848
1849
1850 }
1851
1852 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1853 {
1854         int rc, xid;
1855         int lock = 0, unlock = 0;
1856         bool wait_flag = false;
1857         bool posix_lck = false;
1858         struct cifs_sb_info *cifs_sb;
1859         struct cifs_tcon *tcon;
1860         struct cifsFileInfo *cfile;
1861         __u32 type;
1862
1863         rc = -EACCES;
1864         xid = get_xid();
1865
1866         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1867                  cmd, flock->fl_flags, flock->fl_type,
1868                  flock->fl_start, flock->fl_end);
1869
1870         cfile = (struct cifsFileInfo *)file->private_data;
1871         tcon = tlink_tcon(cfile->tlink);
1872
1873         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1874                         tcon->ses->server);
1875         cifs_sb = CIFS_FILE_SB(file);
1876
1877         if (cap_unix(tcon->ses) &&
1878             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1879             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1880                 posix_lck = true;
1881         /*
1882          * BB add code here to normalize offset and length to account for
1883          * negative length which we can not accept over the wire.
1884          */
1885         if (IS_GETLK(cmd)) {
1886                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1887                 free_xid(xid);
1888                 return rc;
1889         }
1890
1891         if (!lock && !unlock) {
1892                 /*
1893                  * if no lock or unlock then nothing to do since we do not
1894                  * know what it is
1895                  */
1896                 free_xid(xid);
1897                 return -EOPNOTSUPP;
1898         }
1899
1900         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1901                         xid);
1902         free_xid(xid);
1903         return rc;
1904 }
1905
1906 /*
1907  * update the file size (if needed) after a write. Should be called with
1908  * the inode->i_lock held
1909  */
1910 void
1911 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1912                       unsigned int bytes_written)
1913 {
1914         loff_t end_of_write = offset + bytes_written;
1915
1916         if (end_of_write > cifsi->server_eof)
1917                 cifsi->server_eof = end_of_write;
1918 }
1919
1920 static ssize_t
1921 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1922            size_t write_size, loff_t *offset)
1923 {
1924         int rc = 0;
1925         unsigned int bytes_written = 0;
1926         unsigned int total_written;
1927         struct cifs_tcon *tcon;
1928         struct TCP_Server_Info *server;
1929         unsigned int xid;
1930         struct dentry *dentry = open_file->dentry;
1931         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1932         struct cifs_io_parms io_parms = {0};
1933
1934         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1935                  write_size, *offset, dentry);
1936
1937         tcon = tlink_tcon(open_file->tlink);
1938         server = tcon->ses->server;
1939
1940         if (!server->ops->sync_write)
1941                 return -ENOSYS;
1942
1943         xid = get_xid();
1944
1945         for (total_written = 0; write_size > total_written;
1946              total_written += bytes_written) {
1947                 rc = -EAGAIN;
1948                 while (rc == -EAGAIN) {
1949                         struct kvec iov[2];
1950                         unsigned int len;
1951
1952                         if (open_file->invalidHandle) {
1953                                 /* we could deadlock if we called
1954                                    filemap_fdatawait from here so tell
1955                                    reopen_file not to flush data to
1956                                    server now */
1957                                 rc = cifs_reopen_file(open_file, false);
1958                                 if (rc != 0)
1959                                         break;
1960                         }
1961
1962                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1963                                   (unsigned int)write_size - total_written);
1964                         /* iov[0] is reserved for smb header */
1965                         iov[1].iov_base = (char *)write_data + total_written;
1966                         iov[1].iov_len = len;
1967                         io_parms.pid = pid;
1968                         io_parms.tcon = tcon;
1969                         io_parms.offset = *offset;
1970                         io_parms.length = len;
1971                         rc = server->ops->sync_write(xid, &open_file->fid,
1972                                         &io_parms, &bytes_written, iov, 1);
1973                 }
1974                 if (rc || (bytes_written == 0)) {
1975                         if (total_written)
1976                                 break;
1977                         else {
1978                                 free_xid(xid);
1979                                 return rc;
1980                         }
1981                 } else {
1982                         spin_lock(&d_inode(dentry)->i_lock);
1983                         cifs_update_eof(cifsi, *offset, bytes_written);
1984                         spin_unlock(&d_inode(dentry)->i_lock);
1985                         *offset += bytes_written;
1986                 }
1987         }
1988
1989         cifs_stats_bytes_written(tcon, total_written);
1990
1991         if (total_written > 0) {
1992                 spin_lock(&d_inode(dentry)->i_lock);
1993                 if (*offset > d_inode(dentry)->i_size) {
1994                         i_size_write(d_inode(dentry), *offset);
1995                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
1996                 }
1997                 spin_unlock(&d_inode(dentry)->i_lock);
1998         }
1999         mark_inode_dirty_sync(d_inode(dentry));
2000         free_xid(xid);
2001         return total_written;
2002 }
2003
2004 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2005                                         bool fsuid_only)
2006 {
2007         struct cifsFileInfo *open_file = NULL;
2008         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2009
2010         /* only filter by fsuid on multiuser mounts */
2011         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2012                 fsuid_only = false;
2013
2014         spin_lock(&cifs_inode->open_file_lock);
2015         /* we could simply get the first_list_entry since write-only entries
2016            are always at the end of the list but since the first entry might
2017            have a close pending, we go through the whole list */
2018         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2019                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2020                         continue;
2021                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2022                         if ((!open_file->invalidHandle)) {
2023                                 /* found a good file */
2024                                 /* lock it so it will not be closed on us */
2025                                 cifsFileInfo_get(open_file);
2026                                 spin_unlock(&cifs_inode->open_file_lock);
2027                                 return open_file;
2028                         } /* else might as well continue, and look for
2029                              another, or simply have the caller reopen it
2030                              again rather than trying to fix this handle */
2031                 } else /* write only file */
2032                         break; /* write only files are last so must be done */
2033         }
2034         spin_unlock(&cifs_inode->open_file_lock);
2035         return NULL;
2036 }
2037
2038 /* Return -EBADF if no handle is found and general rc otherwise */
2039 int
2040 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2041                        struct cifsFileInfo **ret_file)
2042 {
2043         struct cifsFileInfo *open_file, *inv_file = NULL;
2044         struct cifs_sb_info *cifs_sb;
2045         bool any_available = false;
2046         int rc = -EBADF;
2047         unsigned int refind = 0;
2048         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2049         bool with_delete = flags & FIND_WR_WITH_DELETE;
2050         *ret_file = NULL;
2051
2052         /*
2053          * Having a null inode here (because mapping->host was set to zero by
2054          * the VFS or MM) should not happen but we had reports of on oops (due
2055          * to it being zero) during stress testcases so we need to check for it
2056          */
2057
2058         if (cifs_inode == NULL) {
2059                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2060                 dump_stack();
2061                 return rc;
2062         }
2063
2064         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2065
2066         /* only filter by fsuid on multiuser mounts */
2067         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2068                 fsuid_only = false;
2069
2070         spin_lock(&cifs_inode->open_file_lock);
2071 refind_writable:
2072         if (refind > MAX_REOPEN_ATT) {
2073                 spin_unlock(&cifs_inode->open_file_lock);
2074                 return rc;
2075         }
2076         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2077                 if (!any_available && open_file->pid != current->tgid)
2078                         continue;
2079                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2080                         continue;
2081                 if (with_delete && !(open_file->fid.access & DELETE))
2082                         continue;
2083                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2084                         if (!open_file->invalidHandle) {
2085                                 /* found a good writable file */
2086                                 cifsFileInfo_get(open_file);
2087                                 spin_unlock(&cifs_inode->open_file_lock);
2088                                 *ret_file = open_file;
2089                                 return 0;
2090                         } else {
2091                                 if (!inv_file)
2092                                         inv_file = open_file;
2093                         }
2094                 }
2095         }
2096         /* couldn't find useable FH with same pid, try any available */
2097         if (!any_available) {
2098                 any_available = true;
2099                 goto refind_writable;
2100         }
2101
2102         if (inv_file) {
2103                 any_available = false;
2104                 cifsFileInfo_get(inv_file);
2105         }
2106
2107         spin_unlock(&cifs_inode->open_file_lock);
2108
2109         if (inv_file) {
2110                 rc = cifs_reopen_file(inv_file, false);
2111                 if (!rc) {
2112                         *ret_file = inv_file;
2113                         return 0;
2114                 }
2115
2116                 spin_lock(&cifs_inode->open_file_lock);
2117                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2118                 spin_unlock(&cifs_inode->open_file_lock);
2119                 cifsFileInfo_put(inv_file);
2120                 ++refind;
2121                 inv_file = NULL;
2122                 spin_lock(&cifs_inode->open_file_lock);
2123                 goto refind_writable;
2124         }
2125
2126         return rc;
2127 }
2128
2129 struct cifsFileInfo *
2130 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2131 {
2132         struct cifsFileInfo *cfile;
2133         int rc;
2134
2135         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2136         if (rc)
2137                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2138
2139         return cfile;
2140 }
2141
2142 int
2143 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2144                        int flags,
2145                        struct cifsFileInfo **ret_file)
2146 {
2147         struct cifsFileInfo *cfile;
2148         void *page = alloc_dentry_path();
2149
2150         *ret_file = NULL;
2151
2152         spin_lock(&tcon->open_file_lock);
2153         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2154                 struct cifsInodeInfo *cinode;
2155                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2156                 if (IS_ERR(full_path)) {
2157                         spin_unlock(&tcon->open_file_lock);
2158                         free_dentry_path(page);
2159                         return PTR_ERR(full_path);
2160                 }
2161                 if (strcmp(full_path, name))
2162                         continue;
2163
2164                 cinode = CIFS_I(d_inode(cfile->dentry));
2165                 spin_unlock(&tcon->open_file_lock);
2166                 free_dentry_path(page);
2167                 return cifs_get_writable_file(cinode, flags, ret_file);
2168         }
2169
2170         spin_unlock(&tcon->open_file_lock);
2171         free_dentry_path(page);
2172         return -ENOENT;
2173 }
2174
2175 int
2176 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2177                        struct cifsFileInfo **ret_file)
2178 {
2179         struct cifsFileInfo *cfile;
2180         void *page = alloc_dentry_path();
2181
2182         *ret_file = NULL;
2183
2184         spin_lock(&tcon->open_file_lock);
2185         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2186                 struct cifsInodeInfo *cinode;
2187                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2188                 if (IS_ERR(full_path)) {
2189                         spin_unlock(&tcon->open_file_lock);
2190                         free_dentry_path(page);
2191                         return PTR_ERR(full_path);
2192                 }
2193                 if (strcmp(full_path, name))
2194                         continue;
2195
2196                 cinode = CIFS_I(d_inode(cfile->dentry));
2197                 spin_unlock(&tcon->open_file_lock);
2198                 free_dentry_path(page);
2199                 *ret_file = find_readable_file(cinode, 0);
2200                 return *ret_file ? 0 : -ENOENT;
2201         }
2202
2203         spin_unlock(&tcon->open_file_lock);
2204         free_dentry_path(page);
2205         return -ENOENT;
2206 }
2207
2208 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2209 {
2210         struct address_space *mapping = page->mapping;
2211         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2212         char *write_data;
2213         int rc = -EFAULT;
2214         int bytes_written = 0;
2215         struct inode *inode;
2216         struct cifsFileInfo *open_file;
2217
2218         if (!mapping || !mapping->host)
2219                 return -EFAULT;
2220
2221         inode = page->mapping->host;
2222
2223         offset += (loff_t)from;
2224         write_data = kmap(page);
2225         write_data += from;
2226
2227         if ((to > PAGE_SIZE) || (from > to)) {
2228                 kunmap(page);
2229                 return -EIO;
2230         }
2231
2232         /* racing with truncate? */
2233         if (offset > mapping->host->i_size) {
2234                 kunmap(page);
2235                 return 0; /* don't care */
2236         }
2237
2238         /* check to make sure that we are not extending the file */
2239         if (mapping->host->i_size - offset < (loff_t)to)
2240                 to = (unsigned)(mapping->host->i_size - offset);
2241
2242         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2243                                     &open_file);
2244         if (!rc) {
2245                 bytes_written = cifs_write(open_file, open_file->pid,
2246                                            write_data, to - from, &offset);
2247                 cifsFileInfo_put(open_file);
2248                 /* Does mm or vfs already set times? */
2249                 inode->i_atime = inode->i_mtime = current_time(inode);
2250                 if ((bytes_written > 0) && (offset))
2251                         rc = 0;
2252                 else if (bytes_written < 0)
2253                         rc = bytes_written;
2254                 else
2255                         rc = -EFAULT;
2256         } else {
2257                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2258                 if (!is_retryable_error(rc))
2259                         rc = -EIO;
2260         }
2261
2262         kunmap(page);
2263         return rc;
2264 }
2265
2266 static struct cifs_writedata *
2267 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2268                           pgoff_t end, pgoff_t *index,
2269                           unsigned int *found_pages)
2270 {
2271         struct cifs_writedata *wdata;
2272
2273         wdata = cifs_writedata_alloc((unsigned int)tofind,
2274                                      cifs_writev_complete);
2275         if (!wdata)
2276                 return NULL;
2277
2278         *found_pages = find_get_pages_range_tag(mapping, index, end,
2279                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2280         return wdata;
2281 }
2282
2283 static unsigned int
2284 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2285                     struct address_space *mapping,
2286                     struct writeback_control *wbc,
2287                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2288 {
2289         unsigned int nr_pages = 0, i;
2290         struct page *page;
2291
2292         for (i = 0; i < found_pages; i++) {
2293                 page = wdata->pages[i];
2294                 /*
2295                  * At this point we hold neither the i_pages lock nor the
2296                  * page lock: the page may be truncated or invalidated
2297                  * (changing page->mapping to NULL), or even swizzled
2298                  * back from swapper_space to tmpfs file mapping
2299                  */
2300
2301                 if (nr_pages == 0)
2302                         lock_page(page);
2303                 else if (!trylock_page(page))
2304                         break;
2305
2306                 if (unlikely(page->mapping != mapping)) {
2307                         unlock_page(page);
2308                         break;
2309                 }
2310
2311                 if (!wbc->range_cyclic && page->index > end) {
2312                         *done = true;
2313                         unlock_page(page);
2314                         break;
2315                 }
2316
2317                 if (*next && (page->index != *next)) {
2318                         /* Not next consecutive page */
2319                         unlock_page(page);
2320                         break;
2321                 }
2322
2323                 if (wbc->sync_mode != WB_SYNC_NONE)
2324                         wait_on_page_writeback(page);
2325
2326                 if (PageWriteback(page) ||
2327                                 !clear_page_dirty_for_io(page)) {
2328                         unlock_page(page);
2329                         break;
2330                 }
2331
2332                 /*
2333                  * This actually clears the dirty bit in the radix tree.
2334                  * See cifs_writepage() for more commentary.
2335                  */
2336                 set_page_writeback(page);
2337                 if (page_offset(page) >= i_size_read(mapping->host)) {
2338                         *done = true;
2339                         unlock_page(page);
2340                         end_page_writeback(page);
2341                         break;
2342                 }
2343
2344                 wdata->pages[i] = page;
2345                 *next = page->index + 1;
2346                 ++nr_pages;
2347         }
2348
2349         /* reset index to refind any pages skipped */
2350         if (nr_pages == 0)
2351                 *index = wdata->pages[0]->index + 1;
2352
2353         /* put any pages we aren't going to use */
2354         for (i = nr_pages; i < found_pages; i++) {
2355                 put_page(wdata->pages[i]);
2356                 wdata->pages[i] = NULL;
2357         }
2358
2359         return nr_pages;
2360 }
2361
2362 static int
2363 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2364                  struct address_space *mapping, struct writeback_control *wbc)
2365 {
2366         int rc;
2367
2368         wdata->sync_mode = wbc->sync_mode;
2369         wdata->nr_pages = nr_pages;
2370         wdata->offset = page_offset(wdata->pages[0]);
2371         wdata->pagesz = PAGE_SIZE;
2372         wdata->tailsz = min(i_size_read(mapping->host) -
2373                         page_offset(wdata->pages[nr_pages - 1]),
2374                         (loff_t)PAGE_SIZE);
2375         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2376         wdata->pid = wdata->cfile->pid;
2377
2378         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2379         if (rc)
2380                 return rc;
2381
2382         if (wdata->cfile->invalidHandle)
2383                 rc = -EAGAIN;
2384         else
2385                 rc = wdata->server->ops->async_writev(wdata,
2386                                                       cifs_writedata_release);
2387
2388         return rc;
2389 }
2390
2391 static int cifs_writepages(struct address_space *mapping,
2392                            struct writeback_control *wbc)
2393 {
2394         struct inode *inode = mapping->host;
2395         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2396         struct TCP_Server_Info *server;
2397         bool done = false, scanned = false, range_whole = false;
2398         pgoff_t end, index;
2399         struct cifs_writedata *wdata;
2400         struct cifsFileInfo *cfile = NULL;
2401         int rc = 0;
2402         int saved_rc = 0;
2403         unsigned int xid;
2404
2405         /*
2406          * If wsize is smaller than the page cache size, default to writing
2407          * one page at a time via cifs_writepage
2408          */
2409         if (cifs_sb->ctx->wsize < PAGE_SIZE)
2410                 return generic_writepages(mapping, wbc);
2411
2412         xid = get_xid();
2413         if (wbc->range_cyclic) {
2414                 index = mapping->writeback_index; /* Start from prev offset */
2415                 end = -1;
2416         } else {
2417                 index = wbc->range_start >> PAGE_SHIFT;
2418                 end = wbc->range_end >> PAGE_SHIFT;
2419                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2420                         range_whole = true;
2421                 scanned = true;
2422         }
2423         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2424
2425 retry:
2426         while (!done && index <= end) {
2427                 unsigned int i, nr_pages, found_pages, wsize;
2428                 pgoff_t next = 0, tofind, saved_index = index;
2429                 struct cifs_credits credits_on_stack;
2430                 struct cifs_credits *credits = &credits_on_stack;
2431                 int get_file_rc = 0;
2432
2433                 if (cfile)
2434                         cifsFileInfo_put(cfile);
2435
2436                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2437
2438                 /* in case of an error store it to return later */
2439                 if (rc)
2440                         get_file_rc = rc;
2441
2442                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2443                                                    &wsize, credits);
2444                 if (rc != 0) {
2445                         done = true;
2446                         break;
2447                 }
2448
2449                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2450
2451                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2452                                                   &found_pages);
2453                 if (!wdata) {
2454                         rc = -ENOMEM;
2455                         done = true;
2456                         add_credits_and_wake_if(server, credits, 0);
2457                         break;
2458                 }
2459
2460                 if (found_pages == 0) {
2461                         kref_put(&wdata->refcount, cifs_writedata_release);
2462                         add_credits_and_wake_if(server, credits, 0);
2463                         break;
2464                 }
2465
2466                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2467                                                end, &index, &next, &done);
2468
2469                 /* nothing to write? */
2470                 if (nr_pages == 0) {
2471                         kref_put(&wdata->refcount, cifs_writedata_release);
2472                         add_credits_and_wake_if(server, credits, 0);
2473                         continue;
2474                 }
2475
2476                 wdata->credits = credits_on_stack;
2477                 wdata->cfile = cfile;
2478                 wdata->server = server;
2479                 cfile = NULL;
2480
2481                 if (!wdata->cfile) {
2482                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2483                                  get_file_rc);
2484                         if (is_retryable_error(get_file_rc))
2485                                 rc = get_file_rc;
2486                         else
2487                                 rc = -EBADF;
2488                 } else
2489                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2490
2491                 for (i = 0; i < nr_pages; ++i)
2492                         unlock_page(wdata->pages[i]);
2493
2494                 /* send failure -- clean up the mess */
2495                 if (rc != 0) {
2496                         add_credits_and_wake_if(server, &wdata->credits, 0);
2497                         for (i = 0; i < nr_pages; ++i) {
2498                                 if (is_retryable_error(rc))
2499                                         redirty_page_for_writepage(wbc,
2500                                                            wdata->pages[i]);
2501                                 else
2502                                         SetPageError(wdata->pages[i]);
2503                                 end_page_writeback(wdata->pages[i]);
2504                                 put_page(wdata->pages[i]);
2505                         }
2506                         if (!is_retryable_error(rc))
2507                                 mapping_set_error(mapping, rc);
2508                 }
2509                 kref_put(&wdata->refcount, cifs_writedata_release);
2510
2511                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2512                         index = saved_index;
2513                         continue;
2514                 }
2515
2516                 /* Return immediately if we received a signal during writing */
2517                 if (is_interrupt_error(rc)) {
2518                         done = true;
2519                         break;
2520                 }
2521
2522                 if (rc != 0 && saved_rc == 0)
2523                         saved_rc = rc;
2524
2525                 wbc->nr_to_write -= nr_pages;
2526                 if (wbc->nr_to_write <= 0)
2527                         done = true;
2528
2529                 index = next;
2530         }
2531
2532         if (!scanned && !done) {
2533                 /*
2534                  * We hit the last page and there is more work to be done: wrap
2535                  * back to the start of the file
2536                  */
2537                 scanned = true;
2538                 index = 0;
2539                 goto retry;
2540         }
2541
2542         if (saved_rc != 0)
2543                 rc = saved_rc;
2544
2545         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2546                 mapping->writeback_index = index;
2547
2548         if (cfile)
2549                 cifsFileInfo_put(cfile);
2550         free_xid(xid);
2551         /* Indication to update ctime and mtime as close is deferred */
2552         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2553         return rc;
2554 }
2555
2556 static int
2557 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2558 {
2559         int rc;
2560         unsigned int xid;
2561
2562         xid = get_xid();
2563 /* BB add check for wbc flags */
2564         get_page(page);
2565         if (!PageUptodate(page))
2566                 cifs_dbg(FYI, "ppw - page not up to date\n");
2567
2568         /*
2569          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2570          *
2571          * A writepage() implementation always needs to do either this,
2572          * or re-dirty the page with "redirty_page_for_writepage()" in
2573          * the case of a failure.
2574          *
2575          * Just unlocking the page will cause the radix tree tag-bits
2576          * to fail to update with the state of the page correctly.
2577          */
2578         set_page_writeback(page);
2579 retry_write:
2580         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2581         if (is_retryable_error(rc)) {
2582                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2583                         goto retry_write;
2584                 redirty_page_for_writepage(wbc, page);
2585         } else if (rc != 0) {
2586                 SetPageError(page);
2587                 mapping_set_error(page->mapping, rc);
2588         } else {
2589                 SetPageUptodate(page);
2590         }
2591         end_page_writeback(page);
2592         put_page(page);
2593         free_xid(xid);
2594         return rc;
2595 }
2596
2597 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2598 {
2599         int rc = cifs_writepage_locked(page, wbc);
2600         unlock_page(page);
2601         return rc;
2602 }
2603
2604 static int cifs_write_end(struct file *file, struct address_space *mapping,
2605                         loff_t pos, unsigned len, unsigned copied,
2606                         struct page *page, void *fsdata)
2607 {
2608         int rc;
2609         struct inode *inode = mapping->host;
2610         struct cifsFileInfo *cfile = file->private_data;
2611         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2612         __u32 pid;
2613
2614         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2615                 pid = cfile->pid;
2616         else
2617                 pid = current->tgid;
2618
2619         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2620                  page, pos, copied);
2621
2622         if (PageChecked(page)) {
2623                 if (copied == len)
2624                         SetPageUptodate(page);
2625                 ClearPageChecked(page);
2626         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2627                 SetPageUptodate(page);
2628
2629         if (!PageUptodate(page)) {
2630                 char *page_data;
2631                 unsigned offset = pos & (PAGE_SIZE - 1);
2632                 unsigned int xid;
2633
2634                 xid = get_xid();
2635                 /* this is probably better than directly calling
2636                    partialpage_write since in this function the file handle is
2637                    known which we might as well leverage */
2638                 /* BB check if anything else missing out of ppw
2639                    such as updating last write time */
2640                 page_data = kmap(page);
2641                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2642                 /* if (rc < 0) should we set writebehind rc? */
2643                 kunmap(page);
2644
2645                 free_xid(xid);
2646         } else {
2647                 rc = copied;
2648                 pos += copied;
2649                 set_page_dirty(page);
2650         }
2651
2652         if (rc > 0) {
2653                 spin_lock(&inode->i_lock);
2654                 if (pos > inode->i_size) {
2655                         i_size_write(inode, pos);
2656                         inode->i_blocks = (512 - 1 + pos) >> 9;
2657                 }
2658                 spin_unlock(&inode->i_lock);
2659         }
2660
2661         unlock_page(page);
2662         put_page(page);
2663         /* Indication to update ctime and mtime as close is deferred */
2664         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2665
2666         return rc;
2667 }
2668
2669 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2670                       int datasync)
2671 {
2672         unsigned int xid;
2673         int rc = 0;
2674         struct cifs_tcon *tcon;
2675         struct TCP_Server_Info *server;
2676         struct cifsFileInfo *smbfile = file->private_data;
2677         struct inode *inode = file_inode(file);
2678         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2679
2680         rc = file_write_and_wait_range(file, start, end);
2681         if (rc) {
2682                 trace_cifs_fsync_err(inode->i_ino, rc);
2683                 return rc;
2684         }
2685
2686         xid = get_xid();
2687
2688         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2689                  file, datasync);
2690
2691         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2692                 rc = cifs_zap_mapping(inode);
2693                 if (rc) {
2694                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2695                         rc = 0; /* don't care about it in fsync */
2696                 }
2697         }
2698
2699         tcon = tlink_tcon(smbfile->tlink);
2700         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2701                 server = tcon->ses->server;
2702                 if (server->ops->flush)
2703                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2704                 else
2705                         rc = -ENOSYS;
2706         }
2707
2708         free_xid(xid);
2709         return rc;
2710 }
2711
2712 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2713 {
2714         unsigned int xid;
2715         int rc = 0;
2716         struct cifs_tcon *tcon;
2717         struct TCP_Server_Info *server;
2718         struct cifsFileInfo *smbfile = file->private_data;
2719         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2720
2721         rc = file_write_and_wait_range(file, start, end);
2722         if (rc) {
2723                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2724                 return rc;
2725         }
2726
2727         xid = get_xid();
2728
2729         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2730                  file, datasync);
2731
2732         tcon = tlink_tcon(smbfile->tlink);
2733         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2734                 server = tcon->ses->server;
2735                 if (server->ops->flush)
2736                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2737                 else
2738                         rc = -ENOSYS;
2739         }
2740
2741         free_xid(xid);
2742         return rc;
2743 }
2744
2745 /*
2746  * As file closes, flush all cached write data for this inode checking
2747  * for write behind errors.
2748  */
2749 int cifs_flush(struct file *file, fl_owner_t id)
2750 {
2751         struct inode *inode = file_inode(file);
2752         int rc = 0;
2753
2754         if (file->f_mode & FMODE_WRITE)
2755                 rc = filemap_write_and_wait(inode->i_mapping);
2756
2757         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2758         if (rc)
2759                 trace_cifs_flush_err(inode->i_ino, rc);
2760         return rc;
2761 }
2762
2763 static int
2764 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2765 {
2766         int rc = 0;
2767         unsigned long i;
2768
2769         for (i = 0; i < num_pages; i++) {
2770                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2771                 if (!pages[i]) {
2772                         /*
2773                          * save number of pages we have already allocated and
2774                          * return with ENOMEM error
2775                          */
2776                         num_pages = i;
2777                         rc = -ENOMEM;
2778                         break;
2779                 }
2780         }
2781
2782         if (rc) {
2783                 for (i = 0; i < num_pages; i++)
2784                         put_page(pages[i]);
2785         }
2786         return rc;
2787 }
2788
2789 static inline
2790 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2791 {
2792         size_t num_pages;
2793         size_t clen;
2794
2795         clen = min_t(const size_t, len, wsize);
2796         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2797
2798         if (cur_len)
2799                 *cur_len = clen;
2800
2801         return num_pages;
2802 }
2803
2804 static void
2805 cifs_uncached_writedata_release(struct kref *refcount)
2806 {
2807         int i;
2808         struct cifs_writedata *wdata = container_of(refcount,
2809                                         struct cifs_writedata, refcount);
2810
2811         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2812         for (i = 0; i < wdata->nr_pages; i++)
2813                 put_page(wdata->pages[i]);
2814         cifs_writedata_release(refcount);
2815 }
2816
2817 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2818
2819 static void
2820 cifs_uncached_writev_complete(struct work_struct *work)
2821 {
2822         struct cifs_writedata *wdata = container_of(work,
2823                                         struct cifs_writedata, work);
2824         struct inode *inode = d_inode(wdata->cfile->dentry);
2825         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2826
2827         spin_lock(&inode->i_lock);
2828         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2829         if (cifsi->server_eof > inode->i_size)
2830                 i_size_write(inode, cifsi->server_eof);
2831         spin_unlock(&inode->i_lock);
2832
2833         complete(&wdata->done);
2834         collect_uncached_write_data(wdata->ctx);
2835         /* the below call can possibly free the last ref to aio ctx */
2836         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2837 }
2838
2839 static int
2840 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2841                       size_t *len, unsigned long *num_pages)
2842 {
2843         size_t save_len, copied, bytes, cur_len = *len;
2844         unsigned long i, nr_pages = *num_pages;
2845
2846         save_len = cur_len;
2847         for (i = 0; i < nr_pages; i++) {
2848                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2849                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2850                 cur_len -= copied;
2851                 /*
2852                  * If we didn't copy as much as we expected, then that
2853                  * may mean we trod into an unmapped area. Stop copying
2854                  * at that point. On the next pass through the big
2855                  * loop, we'll likely end up getting a zero-length
2856                  * write and bailing out of it.
2857                  */
2858                 if (copied < bytes)
2859                         break;
2860         }
2861         cur_len = save_len - cur_len;
2862         *len = cur_len;
2863
2864         /*
2865          * If we have no data to send, then that probably means that
2866          * the copy above failed altogether. That's most likely because
2867          * the address in the iovec was bogus. Return -EFAULT and let
2868          * the caller free anything we allocated and bail out.
2869          */
2870         if (!cur_len)
2871                 return -EFAULT;
2872
2873         /*
2874          * i + 1 now represents the number of pages we actually used in
2875          * the copy phase above.
2876          */
2877         *num_pages = i + 1;
2878         return 0;
2879 }
2880
2881 static int
2882 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2883         struct cifs_aio_ctx *ctx)
2884 {
2885         unsigned int wsize;
2886         struct cifs_credits credits;
2887         int rc;
2888         struct TCP_Server_Info *server = wdata->server;
2889
2890         do {
2891                 if (wdata->cfile->invalidHandle) {
2892                         rc = cifs_reopen_file(wdata->cfile, false);
2893                         if (rc == -EAGAIN)
2894                                 continue;
2895                         else if (rc)
2896                                 break;
2897                 }
2898
2899
2900                 /*
2901                  * Wait for credits to resend this wdata.
2902                  * Note: we are attempting to resend the whole wdata not in
2903                  * segments
2904                  */
2905                 do {
2906                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2907                                                 &wsize, &credits);
2908                         if (rc)
2909                                 goto fail;
2910
2911                         if (wsize < wdata->bytes) {
2912                                 add_credits_and_wake_if(server, &credits, 0);
2913                                 msleep(1000);
2914                         }
2915                 } while (wsize < wdata->bytes);
2916                 wdata->credits = credits;
2917
2918                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2919
2920                 if (!rc) {
2921                         if (wdata->cfile->invalidHandle)
2922                                 rc = -EAGAIN;
2923                         else {
2924 #ifdef CONFIG_CIFS_SMB_DIRECT
2925                                 if (wdata->mr) {
2926                                         wdata->mr->need_invalidate = true;
2927                                         smbd_deregister_mr(wdata->mr);
2928                                         wdata->mr = NULL;
2929                                 }
2930 #endif
2931                                 rc = server->ops->async_writev(wdata,
2932                                         cifs_uncached_writedata_release);
2933                         }
2934                 }
2935
2936                 /* If the write was successfully sent, we are done */
2937                 if (!rc) {
2938                         list_add_tail(&wdata->list, wdata_list);
2939                         return 0;
2940                 }
2941
2942                 /* Roll back credits and retry if needed */
2943                 add_credits_and_wake_if(server, &wdata->credits, 0);
2944         } while (rc == -EAGAIN);
2945
2946 fail:
2947         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2948         return rc;
2949 }
2950
2951 static int
2952 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2953                      struct cifsFileInfo *open_file,
2954                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2955                      struct cifs_aio_ctx *ctx)
2956 {
2957         int rc = 0;
2958         size_t cur_len;
2959         unsigned long nr_pages, num_pages, i;
2960         struct cifs_writedata *wdata;
2961         struct iov_iter saved_from = *from;
2962         loff_t saved_offset = offset;
2963         pid_t pid;
2964         struct TCP_Server_Info *server;
2965         struct page **pagevec;
2966         size_t start;
2967         unsigned int xid;
2968
2969         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2970                 pid = open_file->pid;
2971         else
2972                 pid = current->tgid;
2973
2974         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2975         xid = get_xid();
2976
2977         do {
2978                 unsigned int wsize;
2979                 struct cifs_credits credits_on_stack;
2980                 struct cifs_credits *credits = &credits_on_stack;
2981
2982                 if (open_file->invalidHandle) {
2983                         rc = cifs_reopen_file(open_file, false);
2984                         if (rc == -EAGAIN)
2985                                 continue;
2986                         else if (rc)
2987                                 break;
2988                 }
2989
2990                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2991                                                    &wsize, credits);
2992                 if (rc)
2993                         break;
2994
2995                 cur_len = min_t(const size_t, len, wsize);
2996
2997                 if (ctx->direct_io) {
2998                         ssize_t result;
2999
3000                         result = iov_iter_get_pages_alloc(
3001                                 from, &pagevec, cur_len, &start);
3002                         if (result < 0) {
3003                                 cifs_dbg(VFS,
3004                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3005                                          result, iov_iter_type(from),
3006                                          from->iov_offset, from->count);
3007                                 dump_stack();
3008
3009                                 rc = result;
3010                                 add_credits_and_wake_if(server, credits, 0);
3011                                 break;
3012                         }
3013                         cur_len = (size_t)result;
3014                         iov_iter_advance(from, cur_len);
3015
3016                         nr_pages =
3017                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3018
3019                         wdata = cifs_writedata_direct_alloc(pagevec,
3020                                              cifs_uncached_writev_complete);
3021                         if (!wdata) {
3022                                 rc = -ENOMEM;
3023                                 add_credits_and_wake_if(server, credits, 0);
3024                                 break;
3025                         }
3026
3027
3028                         wdata->page_offset = start;
3029                         wdata->tailsz =
3030                                 nr_pages > 1 ?
3031                                         cur_len - (PAGE_SIZE - start) -
3032                                         (nr_pages - 2) * PAGE_SIZE :
3033                                         cur_len;
3034                 } else {
3035                         nr_pages = get_numpages(wsize, len, &cur_len);
3036                         wdata = cifs_writedata_alloc(nr_pages,
3037                                              cifs_uncached_writev_complete);
3038                         if (!wdata) {
3039                                 rc = -ENOMEM;
3040                                 add_credits_and_wake_if(server, credits, 0);
3041                                 break;
3042                         }
3043
3044                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3045                         if (rc) {
3046                                 kvfree(wdata->pages);
3047                                 kfree(wdata);
3048                                 add_credits_and_wake_if(server, credits, 0);
3049                                 break;
3050                         }
3051
3052                         num_pages = nr_pages;
3053                         rc = wdata_fill_from_iovec(
3054                                 wdata, from, &cur_len, &num_pages);
3055                         if (rc) {
3056                                 for (i = 0; i < nr_pages; i++)
3057                                         put_page(wdata->pages[i]);
3058                                 kvfree(wdata->pages);
3059                                 kfree(wdata);
3060                                 add_credits_and_wake_if(server, credits, 0);
3061                                 break;
3062                         }
3063
3064                         /*
3065                          * Bring nr_pages down to the number of pages we
3066                          * actually used, and free any pages that we didn't use.
3067                          */
3068                         for ( ; nr_pages > num_pages; nr_pages--)
3069                                 put_page(wdata->pages[nr_pages - 1]);
3070
3071                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3072                 }
3073
3074                 wdata->sync_mode = WB_SYNC_ALL;
3075                 wdata->nr_pages = nr_pages;
3076                 wdata->offset = (__u64)offset;
3077                 wdata->cfile = cifsFileInfo_get(open_file);
3078                 wdata->server = server;
3079                 wdata->pid = pid;
3080                 wdata->bytes = cur_len;
3081                 wdata->pagesz = PAGE_SIZE;
3082                 wdata->credits = credits_on_stack;
3083                 wdata->ctx = ctx;
3084                 kref_get(&ctx->refcount);
3085
3086                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3087
3088                 if (!rc) {
3089                         if (wdata->cfile->invalidHandle)
3090                                 rc = -EAGAIN;
3091                         else
3092                                 rc = server->ops->async_writev(wdata,
3093                                         cifs_uncached_writedata_release);
3094                 }
3095
3096                 if (rc) {
3097                         add_credits_and_wake_if(server, &wdata->credits, 0);
3098                         kref_put(&wdata->refcount,
3099                                  cifs_uncached_writedata_release);
3100                         if (rc == -EAGAIN) {
3101                                 *from = saved_from;
3102                                 iov_iter_advance(from, offset - saved_offset);
3103                                 continue;
3104                         }
3105                         break;
3106                 }
3107
3108                 list_add_tail(&wdata->list, wdata_list);
3109                 offset += cur_len;
3110                 len -= cur_len;
3111         } while (len > 0);
3112
3113         free_xid(xid);
3114         return rc;
3115 }
3116
3117 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3118 {
3119         struct cifs_writedata *wdata, *tmp;
3120         struct cifs_tcon *tcon;
3121         struct cifs_sb_info *cifs_sb;
3122         struct dentry *dentry = ctx->cfile->dentry;
3123         int rc;
3124
3125         tcon = tlink_tcon(ctx->cfile->tlink);
3126         cifs_sb = CIFS_SB(dentry->d_sb);
3127
3128         mutex_lock(&ctx->aio_mutex);
3129
3130         if (list_empty(&ctx->list)) {
3131                 mutex_unlock(&ctx->aio_mutex);
3132                 return;
3133         }
3134
3135         rc = ctx->rc;
3136         /*
3137          * Wait for and collect replies for any successful sends in order of
3138          * increasing offset. Once an error is hit, then return without waiting
3139          * for any more replies.
3140          */
3141 restart_loop:
3142         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3143                 if (!rc) {
3144                         if (!try_wait_for_completion(&wdata->done)) {
3145                                 mutex_unlock(&ctx->aio_mutex);
3146                                 return;
3147                         }
3148
3149                         if (wdata->result)
3150                                 rc = wdata->result;
3151                         else
3152                                 ctx->total_len += wdata->bytes;
3153
3154                         /* resend call if it's a retryable error */
3155                         if (rc == -EAGAIN) {
3156                                 struct list_head tmp_list;
3157                                 struct iov_iter tmp_from = ctx->iter;
3158
3159                                 INIT_LIST_HEAD(&tmp_list);
3160                                 list_del_init(&wdata->list);
3161
3162                                 if (ctx->direct_io)
3163                                         rc = cifs_resend_wdata(
3164                                                 wdata, &tmp_list, ctx);
3165                                 else {
3166                                         iov_iter_advance(&tmp_from,
3167                                                  wdata->offset - ctx->pos);
3168
3169                                         rc = cifs_write_from_iter(wdata->offset,
3170                                                 wdata->bytes, &tmp_from,
3171                                                 ctx->cfile, cifs_sb, &tmp_list,
3172                                                 ctx);
3173
3174                                         kref_put(&wdata->refcount,
3175                                                 cifs_uncached_writedata_release);
3176                                 }
3177
3178                                 list_splice(&tmp_list, &ctx->list);
3179                                 goto restart_loop;
3180                         }
3181                 }
3182                 list_del_init(&wdata->list);
3183                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3184         }
3185
3186         cifs_stats_bytes_written(tcon, ctx->total_len);
3187         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3188
3189         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3190
3191         mutex_unlock(&ctx->aio_mutex);
3192
3193         if (ctx->iocb && ctx->iocb->ki_complete)
3194                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3195         else
3196                 complete(&ctx->done);
3197 }
3198
3199 static ssize_t __cifs_writev(
3200         struct kiocb *iocb, struct iov_iter *from, bool direct)
3201 {
3202         struct file *file = iocb->ki_filp;
3203         ssize_t total_written = 0;
3204         struct cifsFileInfo *cfile;
3205         struct cifs_tcon *tcon;
3206         struct cifs_sb_info *cifs_sb;
3207         struct cifs_aio_ctx *ctx;
3208         struct iov_iter saved_from = *from;
3209         size_t len = iov_iter_count(from);
3210         int rc;
3211
3212         /*
3213          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3214          * In this case, fall back to non-direct write function.
3215          * this could be improved by getting pages directly in ITER_KVEC
3216          */
3217         if (direct && iov_iter_is_kvec(from)) {
3218                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3219                 direct = false;
3220         }
3221
3222         rc = generic_write_checks(iocb, from);
3223         if (rc <= 0)
3224                 return rc;
3225
3226         cifs_sb = CIFS_FILE_SB(file);
3227         cfile = file->private_data;
3228         tcon = tlink_tcon(cfile->tlink);
3229
3230         if (!tcon->ses->server->ops->async_writev)
3231                 return -ENOSYS;
3232
3233         ctx = cifs_aio_ctx_alloc();
3234         if (!ctx)
3235                 return -ENOMEM;
3236
3237         ctx->cfile = cifsFileInfo_get(cfile);
3238
3239         if (!is_sync_kiocb(iocb))
3240                 ctx->iocb = iocb;
3241
3242         ctx->pos = iocb->ki_pos;
3243
3244         if (direct) {
3245                 ctx->direct_io = true;
3246                 ctx->iter = *from;
3247                 ctx->len = len;
3248         } else {
3249                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3250                 if (rc) {
3251                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3252                         return rc;
3253                 }
3254         }
3255
3256         /* grab a lock here due to read response handlers can access ctx */
3257         mutex_lock(&ctx->aio_mutex);
3258
3259         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3260                                   cfile, cifs_sb, &ctx->list, ctx);
3261
3262         /*
3263          * If at least one write was successfully sent, then discard any rc
3264          * value from the later writes. If the other write succeeds, then
3265          * we'll end up returning whatever was written. If it fails, then
3266          * we'll get a new rc value from that.
3267          */
3268         if (!list_empty(&ctx->list))
3269                 rc = 0;
3270
3271         mutex_unlock(&ctx->aio_mutex);
3272
3273         if (rc) {
3274                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3275                 return rc;
3276         }
3277
3278         if (!is_sync_kiocb(iocb)) {
3279                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3280                 return -EIOCBQUEUED;
3281         }
3282
3283         rc = wait_for_completion_killable(&ctx->done);
3284         if (rc) {
3285                 mutex_lock(&ctx->aio_mutex);
3286                 ctx->rc = rc = -EINTR;
3287                 total_written = ctx->total_len;
3288                 mutex_unlock(&ctx->aio_mutex);
3289         } else {
3290                 rc = ctx->rc;
3291                 total_written = ctx->total_len;
3292         }
3293
3294         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3295
3296         if (unlikely(!total_written))
3297                 return rc;
3298
3299         iocb->ki_pos += total_written;
3300         return total_written;
3301 }
3302
3303 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3304 {
3305         return __cifs_writev(iocb, from, true);
3306 }
3307
3308 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3309 {
3310         return __cifs_writev(iocb, from, false);
3311 }
3312
3313 static ssize_t
3314 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3315 {
3316         struct file *file = iocb->ki_filp;
3317         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3318         struct inode *inode = file->f_mapping->host;
3319         struct cifsInodeInfo *cinode = CIFS_I(inode);
3320         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3321         ssize_t rc;
3322
3323         inode_lock(inode);
3324         /*
3325          * We need to hold the sem to be sure nobody modifies lock list
3326          * with a brlock that prevents writing.
3327          */
3328         down_read(&cinode->lock_sem);
3329
3330         rc = generic_write_checks(iocb, from);
3331         if (rc <= 0)
3332                 goto out;
3333
3334         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3335                                      server->vals->exclusive_lock_type, 0,
3336                                      NULL, CIFS_WRITE_OP))
3337                 rc = __generic_file_write_iter(iocb, from);
3338         else
3339                 rc = -EACCES;
3340 out:
3341         up_read(&cinode->lock_sem);
3342         inode_unlock(inode);
3343
3344         if (rc > 0)
3345                 rc = generic_write_sync(iocb, rc);
3346         return rc;
3347 }
3348
3349 ssize_t
3350 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3351 {
3352         struct inode *inode = file_inode(iocb->ki_filp);
3353         struct cifsInodeInfo *cinode = CIFS_I(inode);
3354         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3355         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3356                                                 iocb->ki_filp->private_data;
3357         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3358         ssize_t written;
3359
3360         written = cifs_get_writer(cinode);
3361         if (written)
3362                 return written;
3363
3364         if (CIFS_CACHE_WRITE(cinode)) {
3365                 if (cap_unix(tcon->ses) &&
3366                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3367                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3368                         written = generic_file_write_iter(iocb, from);
3369                         goto out;
3370                 }
3371                 written = cifs_writev(iocb, from);
3372                 goto out;
3373         }
3374         /*
3375          * For non-oplocked files in strict cache mode we need to write the data
3376          * to the server exactly from the pos to pos+len-1 rather than flush all
3377          * affected pages because it may cause a error with mandatory locks on
3378          * these pages but not on the region from pos to ppos+len-1.
3379          */
3380         written = cifs_user_writev(iocb, from);
3381         if (CIFS_CACHE_READ(cinode)) {
3382                 /*
3383                  * We have read level caching and we have just sent a write
3384                  * request to the server thus making data in the cache stale.
3385                  * Zap the cache and set oplock/lease level to NONE to avoid
3386                  * reading stale data from the cache. All subsequent read
3387                  * operations will read new data from the server.
3388                  */
3389                 cifs_zap_mapping(inode);
3390                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3391                          inode);
3392                 cinode->oplock = 0;
3393         }
3394 out:
3395         cifs_put_writer(cinode);
3396         return written;
3397 }
3398
3399 static struct cifs_readdata *
3400 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3401 {
3402         struct cifs_readdata *rdata;
3403
3404         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3405         if (rdata != NULL) {
3406                 rdata->pages = pages;
3407                 kref_init(&rdata->refcount);
3408                 INIT_LIST_HEAD(&rdata->list);
3409                 init_completion(&rdata->done);
3410                 INIT_WORK(&rdata->work, complete);
3411         }
3412
3413         return rdata;
3414 }
3415
3416 static struct cifs_readdata *
3417 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3418 {
3419         struct page **pages =
3420                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3421         struct cifs_readdata *ret = NULL;
3422
3423         if (pages) {
3424                 ret = cifs_readdata_direct_alloc(pages, complete);
3425                 if (!ret)
3426                         kfree(pages);
3427         }
3428
3429         return ret;
3430 }
3431
3432 void
3433 cifs_readdata_release(struct kref *refcount)
3434 {
3435         struct cifs_readdata *rdata = container_of(refcount,
3436                                         struct cifs_readdata, refcount);
3437 #ifdef CONFIG_CIFS_SMB_DIRECT
3438         if (rdata->mr) {
3439                 smbd_deregister_mr(rdata->mr);
3440                 rdata->mr = NULL;
3441         }
3442 #endif
3443         if (rdata->cfile)
3444                 cifsFileInfo_put(rdata->cfile);
3445
3446         kvfree(rdata->pages);
3447         kfree(rdata);
3448 }
3449
3450 static int
3451 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3452 {
3453         int rc = 0;
3454         struct page *page;
3455         unsigned int i;
3456
3457         for (i = 0; i < nr_pages; i++) {
3458                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3459                 if (!page) {
3460                         rc = -ENOMEM;
3461                         break;
3462                 }
3463                 rdata->pages[i] = page;
3464         }
3465
3466         if (rc) {
3467                 unsigned int nr_page_failed = i;
3468
3469                 for (i = 0; i < nr_page_failed; i++) {
3470                         put_page(rdata->pages[i]);
3471                         rdata->pages[i] = NULL;
3472                 }
3473         }
3474         return rc;
3475 }
3476
3477 static void
3478 cifs_uncached_readdata_release(struct kref *refcount)
3479 {
3480         struct cifs_readdata *rdata = container_of(refcount,
3481                                         struct cifs_readdata, refcount);
3482         unsigned int i;
3483
3484         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3485         for (i = 0; i < rdata->nr_pages; i++) {
3486                 put_page(rdata->pages[i]);
3487         }
3488         cifs_readdata_release(refcount);
3489 }
3490
3491 /**
3492  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3493  * @rdata:      the readdata response with list of pages holding data
3494  * @iter:       destination for our data
3495  *
3496  * This function copies data from a list of pages in a readdata response into
3497  * an array of iovecs. It will first calculate where the data should go
3498  * based on the info in the readdata and then copy the data into that spot.
3499  */
3500 static int
3501 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3502 {
3503         size_t remaining = rdata->got_bytes;
3504         unsigned int i;
3505
3506         for (i = 0; i < rdata->nr_pages; i++) {
3507                 struct page *page = rdata->pages[i];
3508                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3509                 size_t written;
3510
3511                 if (unlikely(iov_iter_is_pipe(iter))) {
3512                         void *addr = kmap_atomic(page);
3513
3514                         written = copy_to_iter(addr, copy, iter);
3515                         kunmap_atomic(addr);
3516                 } else
3517                         written = copy_page_to_iter(page, 0, copy, iter);
3518                 remaining -= written;
3519                 if (written < copy && iov_iter_count(iter) > 0)
3520                         break;
3521         }
3522         return remaining ? -EFAULT : 0;
3523 }
3524
3525 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3526
3527 static void
3528 cifs_uncached_readv_complete(struct work_struct *work)
3529 {
3530         struct cifs_readdata *rdata = container_of(work,
3531                                                 struct cifs_readdata, work);
3532
3533         complete(&rdata->done);
3534         collect_uncached_read_data(rdata->ctx);
3535         /* the below call can possibly free the last ref to aio ctx */
3536         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3537 }
3538
3539 static int
3540 uncached_fill_pages(struct TCP_Server_Info *server,
3541                     struct cifs_readdata *rdata, struct iov_iter *iter,
3542                     unsigned int len)
3543 {
3544         int result = 0;
3545         unsigned int i;
3546         unsigned int nr_pages = rdata->nr_pages;
3547         unsigned int page_offset = rdata->page_offset;
3548
3549         rdata->got_bytes = 0;
3550         rdata->tailsz = PAGE_SIZE;
3551         for (i = 0; i < nr_pages; i++) {
3552                 struct page *page = rdata->pages[i];
3553                 size_t n;
3554                 unsigned int segment_size = rdata->pagesz;
3555
3556                 if (i == 0)
3557                         segment_size -= page_offset;
3558                 else
3559                         page_offset = 0;
3560
3561
3562                 if (len <= 0) {
3563                         /* no need to hold page hostage */
3564                         rdata->pages[i] = NULL;
3565                         rdata->nr_pages--;
3566                         put_page(page);
3567                         continue;
3568                 }
3569
3570                 n = len;
3571                 if (len >= segment_size)
3572                         /* enough data to fill the page */
3573                         n = segment_size;
3574                 else
3575                         rdata->tailsz = len;
3576                 len -= n;
3577
3578                 if (iter)
3579                         result = copy_page_from_iter(
3580                                         page, page_offset, n, iter);
3581 #ifdef CONFIG_CIFS_SMB_DIRECT
3582                 else if (rdata->mr)
3583                         result = n;
3584 #endif
3585                 else
3586                         result = cifs_read_page_from_socket(
3587                                         server, page, page_offset, n);
3588                 if (result < 0)
3589                         break;
3590
3591                 rdata->got_bytes += result;
3592         }
3593
3594         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3595                                                 rdata->got_bytes : result;
3596 }
3597
3598 static int
3599 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3600                               struct cifs_readdata *rdata, unsigned int len)
3601 {
3602         return uncached_fill_pages(server, rdata, NULL, len);
3603 }
3604
3605 static int
3606 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3607                               struct cifs_readdata *rdata,
3608                               struct iov_iter *iter)
3609 {
3610         return uncached_fill_pages(server, rdata, iter, iter->count);
3611 }
3612
3613 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3614                         struct list_head *rdata_list,
3615                         struct cifs_aio_ctx *ctx)
3616 {
3617         unsigned int rsize;
3618         struct cifs_credits credits;
3619         int rc;
3620         struct TCP_Server_Info *server;
3621
3622         /* XXX: should we pick a new channel here? */
3623         server = rdata->server;
3624
3625         do {
3626                 if (rdata->cfile->invalidHandle) {
3627                         rc = cifs_reopen_file(rdata->cfile, true);
3628                         if (rc == -EAGAIN)
3629                                 continue;
3630                         else if (rc)
3631                                 break;
3632                 }
3633
3634                 /*
3635                  * Wait for credits to resend this rdata.
3636                  * Note: we are attempting to resend the whole rdata not in
3637                  * segments
3638                  */
3639                 do {
3640                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3641                                                 &rsize, &credits);
3642
3643                         if (rc)
3644                                 goto fail;
3645
3646                         if (rsize < rdata->bytes) {
3647                                 add_credits_and_wake_if(server, &credits, 0);
3648                                 msleep(1000);
3649                         }
3650                 } while (rsize < rdata->bytes);
3651                 rdata->credits = credits;
3652
3653                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3654                 if (!rc) {
3655                         if (rdata->cfile->invalidHandle)
3656                                 rc = -EAGAIN;
3657                         else {
3658 #ifdef CONFIG_CIFS_SMB_DIRECT
3659                                 if (rdata->mr) {
3660                                         rdata->mr->need_invalidate = true;
3661                                         smbd_deregister_mr(rdata->mr);
3662                                         rdata->mr = NULL;
3663                                 }
3664 #endif
3665                                 rc = server->ops->async_readv(rdata);
3666                         }
3667                 }
3668
3669                 /* If the read was successfully sent, we are done */
3670                 if (!rc) {
3671                         /* Add to aio pending list */
3672                         list_add_tail(&rdata->list, rdata_list);
3673                         return 0;
3674                 }
3675
3676                 /* Roll back credits and retry if needed */
3677                 add_credits_and_wake_if(server, &rdata->credits, 0);
3678         } while (rc == -EAGAIN);
3679
3680 fail:
3681         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3682         return rc;
3683 }
3684
3685 static int
3686 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3687                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3688                      struct cifs_aio_ctx *ctx)
3689 {
3690         struct cifs_readdata *rdata;
3691         unsigned int npages, rsize;
3692         struct cifs_credits credits_on_stack;
3693         struct cifs_credits *credits = &credits_on_stack;
3694         size_t cur_len;
3695         int rc;
3696         pid_t pid;
3697         struct TCP_Server_Info *server;
3698         struct page **pagevec;
3699         size_t start;
3700         struct iov_iter direct_iov = ctx->iter;
3701
3702         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3703
3704         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3705                 pid = open_file->pid;
3706         else
3707                 pid = current->tgid;
3708
3709         if (ctx->direct_io)
3710                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3711
3712         do {
3713                 if (open_file->invalidHandle) {
3714                         rc = cifs_reopen_file(open_file, true);
3715                         if (rc == -EAGAIN)
3716                                 continue;
3717                         else if (rc)
3718                                 break;
3719                 }
3720
3721                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3722                                                    &rsize, credits);
3723                 if (rc)
3724                         break;
3725
3726                 cur_len = min_t(const size_t, len, rsize);
3727
3728                 if (ctx->direct_io) {
3729                         ssize_t result;
3730
3731                         result = iov_iter_get_pages_alloc(
3732                                         &direct_iov, &pagevec,
3733                                         cur_len, &start);
3734                         if (result < 0) {
3735                                 cifs_dbg(VFS,
3736                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3737                                          result, iov_iter_type(&direct_iov),
3738                                          direct_iov.iov_offset,
3739                                          direct_iov.count);
3740                                 dump_stack();
3741
3742                                 rc = result;
3743                                 add_credits_and_wake_if(server, credits, 0);
3744                                 break;
3745                         }
3746                         cur_len = (size_t)result;
3747                         iov_iter_advance(&direct_iov, cur_len);
3748
3749                         rdata = cifs_readdata_direct_alloc(
3750                                         pagevec, cifs_uncached_readv_complete);
3751                         if (!rdata) {
3752                                 add_credits_and_wake_if(server, credits, 0);
3753                                 rc = -ENOMEM;
3754                                 break;
3755                         }
3756
3757                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3758                         rdata->page_offset = start;
3759                         rdata->tailsz = npages > 1 ?
3760                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3761                                 cur_len;
3762
3763                 } else {
3764
3765                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3766                         /* allocate a readdata struct */
3767                         rdata = cifs_readdata_alloc(npages,
3768                                             cifs_uncached_readv_complete);
3769                         if (!rdata) {
3770                                 add_credits_and_wake_if(server, credits, 0);
3771                                 rc = -ENOMEM;
3772                                 break;
3773                         }
3774
3775                         rc = cifs_read_allocate_pages(rdata, npages);
3776                         if (rc) {
3777                                 kvfree(rdata->pages);
3778                                 kfree(rdata);
3779                                 add_credits_and_wake_if(server, credits, 0);
3780                                 break;
3781                         }
3782
3783                         rdata->tailsz = PAGE_SIZE;
3784                 }
3785
3786                 rdata->server = server;
3787                 rdata->cfile = cifsFileInfo_get(open_file);
3788                 rdata->nr_pages = npages;
3789                 rdata->offset = offset;
3790                 rdata->bytes = cur_len;
3791                 rdata->pid = pid;
3792                 rdata->pagesz = PAGE_SIZE;
3793                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3794                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3795                 rdata->credits = credits_on_stack;
3796                 rdata->ctx = ctx;
3797                 kref_get(&ctx->refcount);
3798
3799                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3800
3801                 if (!rc) {
3802                         if (rdata->cfile->invalidHandle)
3803                                 rc = -EAGAIN;
3804                         else
3805                                 rc = server->ops->async_readv(rdata);
3806                 }
3807
3808                 if (rc) {
3809                         add_credits_and_wake_if(server, &rdata->credits, 0);
3810                         kref_put(&rdata->refcount,
3811                                 cifs_uncached_readdata_release);
3812                         if (rc == -EAGAIN) {
3813                                 iov_iter_revert(&direct_iov, cur_len);
3814                                 continue;
3815                         }
3816                         break;
3817                 }
3818
3819                 list_add_tail(&rdata->list, rdata_list);
3820                 offset += cur_len;
3821                 len -= cur_len;
3822         } while (len > 0);
3823
3824         return rc;
3825 }
3826
3827 static void
3828 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3829 {
3830         struct cifs_readdata *rdata, *tmp;
3831         struct iov_iter *to = &ctx->iter;
3832         struct cifs_sb_info *cifs_sb;
3833         int rc;
3834
3835         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3836
3837         mutex_lock(&ctx->aio_mutex);
3838
3839         if (list_empty(&ctx->list)) {
3840                 mutex_unlock(&ctx->aio_mutex);
3841                 return;
3842         }
3843
3844         rc = ctx->rc;
3845         /* the loop below should proceed in the order of increasing offsets */
3846 again:
3847         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3848                 if (!rc) {
3849                         if (!try_wait_for_completion(&rdata->done)) {
3850                                 mutex_unlock(&ctx->aio_mutex);
3851                                 return;
3852                         }
3853
3854                         if (rdata->result == -EAGAIN) {
3855                                 /* resend call if it's a retryable error */
3856                                 struct list_head tmp_list;
3857                                 unsigned int got_bytes = rdata->got_bytes;
3858
3859                                 list_del_init(&rdata->list);
3860                                 INIT_LIST_HEAD(&tmp_list);
3861
3862                                 /*
3863                                  * Got a part of data and then reconnect has
3864                                  * happened -- fill the buffer and continue
3865                                  * reading.
3866                                  */
3867                                 if (got_bytes && got_bytes < rdata->bytes) {
3868                                         rc = 0;
3869                                         if (!ctx->direct_io)
3870                                                 rc = cifs_readdata_to_iov(rdata, to);
3871                                         if (rc) {
3872                                                 kref_put(&rdata->refcount,
3873                                                         cifs_uncached_readdata_release);
3874                                                 continue;
3875                                         }
3876                                 }
3877
3878                                 if (ctx->direct_io) {
3879                                         /*
3880                                          * Re-use rdata as this is a
3881                                          * direct I/O
3882                                          */
3883                                         rc = cifs_resend_rdata(
3884                                                 rdata,
3885                                                 &tmp_list, ctx);
3886                                 } else {
3887                                         rc = cifs_send_async_read(
3888                                                 rdata->offset + got_bytes,
3889                                                 rdata->bytes - got_bytes,
3890                                                 rdata->cfile, cifs_sb,
3891                                                 &tmp_list, ctx);
3892
3893                                         kref_put(&rdata->refcount,
3894                                                 cifs_uncached_readdata_release);
3895                                 }
3896
3897                                 list_splice(&tmp_list, &ctx->list);
3898
3899                                 goto again;
3900                         } else if (rdata->result)
3901                                 rc = rdata->result;
3902                         else if (!ctx->direct_io)
3903                                 rc = cifs_readdata_to_iov(rdata, to);
3904
3905                         /* if there was a short read -- discard anything left */
3906                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3907                                 rc = -ENODATA;
3908
3909                         ctx->total_len += rdata->got_bytes;
3910                 }
3911                 list_del_init(&rdata->list);
3912                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3913         }
3914
3915         if (!ctx->direct_io)
3916                 ctx->total_len = ctx->len - iov_iter_count(to);
3917
3918         /* mask nodata case */
3919         if (rc == -ENODATA)
3920                 rc = 0;
3921
3922         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3923
3924         mutex_unlock(&ctx->aio_mutex);
3925
3926         if (ctx->iocb && ctx->iocb->ki_complete)
3927                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3928         else
3929                 complete(&ctx->done);
3930 }
3931
3932 static ssize_t __cifs_readv(
3933         struct kiocb *iocb, struct iov_iter *to, bool direct)
3934 {
3935         size_t len;
3936         struct file *file = iocb->ki_filp;
3937         struct cifs_sb_info *cifs_sb;
3938         struct cifsFileInfo *cfile;
3939         struct cifs_tcon *tcon;
3940         ssize_t rc, total_read = 0;
3941         loff_t offset = iocb->ki_pos;
3942         struct cifs_aio_ctx *ctx;
3943
3944         /*
3945          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3946          * fall back to data copy read path
3947          * this could be improved by getting pages directly in ITER_KVEC
3948          */
3949         if (direct && iov_iter_is_kvec(to)) {
3950                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3951                 direct = false;
3952         }
3953
3954         len = iov_iter_count(to);
3955         if (!len)
3956                 return 0;
3957
3958         cifs_sb = CIFS_FILE_SB(file);
3959         cfile = file->private_data;
3960         tcon = tlink_tcon(cfile->tlink);
3961
3962         if (!tcon->ses->server->ops->async_readv)
3963                 return -ENOSYS;
3964
3965         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3966                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3967
3968         ctx = cifs_aio_ctx_alloc();
3969         if (!ctx)
3970                 return -ENOMEM;
3971
3972         ctx->cfile = cifsFileInfo_get(cfile);
3973
3974         if (!is_sync_kiocb(iocb))
3975                 ctx->iocb = iocb;
3976
3977         if (iter_is_iovec(to))
3978                 ctx->should_dirty = true;
3979
3980         if (direct) {
3981                 ctx->pos = offset;
3982                 ctx->direct_io = true;
3983                 ctx->iter = *to;
3984                 ctx->len = len;
3985         } else {
3986                 rc = setup_aio_ctx_iter(ctx, to, READ);
3987                 if (rc) {
3988                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3989                         return rc;
3990                 }
3991                 len = ctx->len;
3992         }
3993
3994         /* grab a lock here due to read response handlers can access ctx */
3995         mutex_lock(&ctx->aio_mutex);
3996
3997         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3998
3999         /* if at least one read request send succeeded, then reset rc */
4000         if (!list_empty(&ctx->list))
4001                 rc = 0;
4002
4003         mutex_unlock(&ctx->aio_mutex);
4004
4005         if (rc) {
4006                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4007                 return rc;
4008         }
4009
4010         if (!is_sync_kiocb(iocb)) {
4011                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4012                 return -EIOCBQUEUED;
4013         }
4014
4015         rc = wait_for_completion_killable(&ctx->done);
4016         if (rc) {
4017                 mutex_lock(&ctx->aio_mutex);
4018                 ctx->rc = rc = -EINTR;
4019                 total_read = ctx->total_len;
4020                 mutex_unlock(&ctx->aio_mutex);
4021         } else {
4022                 rc = ctx->rc;
4023                 total_read = ctx->total_len;
4024         }
4025
4026         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4027
4028         if (total_read) {
4029                 iocb->ki_pos += total_read;
4030                 return total_read;
4031         }
4032         return rc;
4033 }
4034
4035 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4036 {
4037         return __cifs_readv(iocb, to, true);
4038 }
4039
4040 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4041 {
4042         return __cifs_readv(iocb, to, false);
4043 }
4044
4045 ssize_t
4046 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4047 {
4048         struct inode *inode = file_inode(iocb->ki_filp);
4049         struct cifsInodeInfo *cinode = CIFS_I(inode);
4050         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4051         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4052                                                 iocb->ki_filp->private_data;
4053         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4054         int rc = -EACCES;
4055
4056         /*
4057          * In strict cache mode we need to read from the server all the time
4058          * if we don't have level II oplock because the server can delay mtime
4059          * change - so we can't make a decision about inode invalidating.
4060          * And we can also fail with pagereading if there are mandatory locks
4061          * on pages affected by this read but not on the region from pos to
4062          * pos+len-1.
4063          */
4064         if (!CIFS_CACHE_READ(cinode))
4065                 return cifs_user_readv(iocb, to);
4066
4067         if (cap_unix(tcon->ses) &&
4068             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4069             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4070                 return generic_file_read_iter(iocb, to);
4071
4072         /*
4073          * We need to hold the sem to be sure nobody modifies lock list
4074          * with a brlock that prevents reading.
4075          */
4076         down_read(&cinode->lock_sem);
4077         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4078                                      tcon->ses->server->vals->shared_lock_type,
4079                                      0, NULL, CIFS_READ_OP))
4080                 rc = generic_file_read_iter(iocb, to);
4081         up_read(&cinode->lock_sem);
4082         return rc;
4083 }
4084
4085 static ssize_t
4086 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4087 {
4088         int rc = -EACCES;
4089         unsigned int bytes_read = 0;
4090         unsigned int total_read;
4091         unsigned int current_read_size;
4092         unsigned int rsize;
4093         struct cifs_sb_info *cifs_sb;
4094         struct cifs_tcon *tcon;
4095         struct TCP_Server_Info *server;
4096         unsigned int xid;
4097         char *cur_offset;
4098         struct cifsFileInfo *open_file;
4099         struct cifs_io_parms io_parms = {0};
4100         int buf_type = CIFS_NO_BUFFER;
4101         __u32 pid;
4102
4103         xid = get_xid();
4104         cifs_sb = CIFS_FILE_SB(file);
4105
4106         /* FIXME: set up handlers for larger reads and/or convert to async */
4107         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4108
4109         if (file->private_data == NULL) {
4110                 rc = -EBADF;
4111                 free_xid(xid);
4112                 return rc;
4113         }
4114         open_file = file->private_data;
4115         tcon = tlink_tcon(open_file->tlink);
4116         server = cifs_pick_channel(tcon->ses);
4117
4118         if (!server->ops->sync_read) {
4119                 free_xid(xid);
4120                 return -ENOSYS;
4121         }
4122
4123         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4124                 pid = open_file->pid;
4125         else
4126                 pid = current->tgid;
4127
4128         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4129                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4130
4131         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4132              total_read += bytes_read, cur_offset += bytes_read) {
4133                 do {
4134                         current_read_size = min_t(uint, read_size - total_read,
4135                                                   rsize);
4136                         /*
4137                          * For windows me and 9x we do not want to request more
4138                          * than it negotiated since it will refuse the read
4139                          * then.
4140                          */
4141                         if (!(tcon->ses->capabilities &
4142                                 tcon->ses->server->vals->cap_large_files)) {
4143                                 current_read_size = min_t(uint,
4144                                         current_read_size, CIFSMaxBufSize);
4145                         }
4146                         if (open_file->invalidHandle) {
4147                                 rc = cifs_reopen_file(open_file, true);
4148                                 if (rc != 0)
4149                                         break;
4150                         }
4151                         io_parms.pid = pid;
4152                         io_parms.tcon = tcon;
4153                         io_parms.offset = *offset;
4154                         io_parms.length = current_read_size;
4155                         io_parms.server = server;
4156                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4157                                                     &bytes_read, &cur_offset,
4158                                                     &buf_type);
4159                 } while (rc == -EAGAIN);
4160
4161                 if (rc || (bytes_read == 0)) {
4162                         if (total_read) {
4163                                 break;
4164                         } else {
4165                                 free_xid(xid);
4166                                 return rc;
4167                         }
4168                 } else {
4169                         cifs_stats_bytes_read(tcon, total_read);
4170                         *offset += bytes_read;
4171                 }
4172         }
4173         free_xid(xid);
4174         return total_read;
4175 }
4176
4177 /*
4178  * If the page is mmap'ed into a process' page tables, then we need to make
4179  * sure that it doesn't change while being written back.
4180  */
4181 static vm_fault_t
4182 cifs_page_mkwrite(struct vm_fault *vmf)
4183 {
4184         struct page *page = vmf->page;
4185
4186         lock_page(page);
4187         return VM_FAULT_LOCKED;
4188 }
4189
4190 static const struct vm_operations_struct cifs_file_vm_ops = {
4191         .fault = filemap_fault,
4192         .map_pages = filemap_map_pages,
4193         .page_mkwrite = cifs_page_mkwrite,
4194 };
4195
4196 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4197 {
4198         int xid, rc = 0;
4199         struct inode *inode = file_inode(file);
4200
4201         xid = get_xid();
4202
4203         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4204                 rc = cifs_zap_mapping(inode);
4205         if (!rc)
4206                 rc = generic_file_mmap(file, vma);
4207         if (!rc)
4208                 vma->vm_ops = &cifs_file_vm_ops;
4209
4210         free_xid(xid);
4211         return rc;
4212 }
4213
4214 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4215 {
4216         int rc, xid;
4217
4218         xid = get_xid();
4219
4220         rc = cifs_revalidate_file(file);
4221         if (rc)
4222                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4223                          rc);
4224         if (!rc)
4225                 rc = generic_file_mmap(file, vma);
4226         if (!rc)
4227                 vma->vm_ops = &cifs_file_vm_ops;
4228
4229         free_xid(xid);
4230         return rc;
4231 }
4232
4233 static void
4234 cifs_readv_complete(struct work_struct *work)
4235 {
4236         unsigned int i, got_bytes;
4237         struct cifs_readdata *rdata = container_of(work,
4238                                                 struct cifs_readdata, work);
4239
4240         got_bytes = rdata->got_bytes;
4241         for (i = 0; i < rdata->nr_pages; i++) {
4242                 struct page *page = rdata->pages[i];
4243
4244                 lru_cache_add(page);
4245
4246                 if (rdata->result == 0 ||
4247                     (rdata->result == -EAGAIN && got_bytes)) {
4248                         flush_dcache_page(page);
4249                         SetPageUptodate(page);
4250                 }
4251
4252                 unlock_page(page);
4253
4254                 if (rdata->result == 0 ||
4255                     (rdata->result == -EAGAIN && got_bytes))
4256                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4257
4258                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4259
4260                 put_page(page);
4261                 rdata->pages[i] = NULL;
4262         }
4263         kref_put(&rdata->refcount, cifs_readdata_release);
4264 }
4265
4266 static int
4267 readpages_fill_pages(struct TCP_Server_Info *server,
4268                      struct cifs_readdata *rdata, struct iov_iter *iter,
4269                      unsigned int len)
4270 {
4271         int result = 0;
4272         unsigned int i;
4273         u64 eof;
4274         pgoff_t eof_index;
4275         unsigned int nr_pages = rdata->nr_pages;
4276         unsigned int page_offset = rdata->page_offset;
4277
4278         /* determine the eof that the server (probably) has */
4279         eof = CIFS_I(rdata->mapping->host)->server_eof;
4280         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4281         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4282
4283         rdata->got_bytes = 0;
4284         rdata->tailsz = PAGE_SIZE;
4285         for (i = 0; i < nr_pages; i++) {
4286                 struct page *page = rdata->pages[i];
4287                 unsigned int to_read = rdata->pagesz;
4288                 size_t n;
4289
4290                 if (i == 0)
4291                         to_read -= page_offset;
4292                 else
4293                         page_offset = 0;
4294
4295                 n = to_read;
4296
4297                 if (len >= to_read) {
4298                         len -= to_read;
4299                 } else if (len > 0) {
4300                         /* enough for partial page, fill and zero the rest */
4301                         zero_user(page, len + page_offset, to_read - len);
4302                         n = rdata->tailsz = len;
4303                         len = 0;
4304                 } else if (page->index > eof_index) {
4305                         /*
4306                          * The VFS will not try to do readahead past the
4307                          * i_size, but it's possible that we have outstanding
4308                          * writes with gaps in the middle and the i_size hasn't
4309                          * caught up yet. Populate those with zeroed out pages
4310                          * to prevent the VFS from repeatedly attempting to
4311                          * fill them until the writes are flushed.
4312                          */
4313                         zero_user(page, 0, PAGE_SIZE);
4314                         lru_cache_add(page);
4315                         flush_dcache_page(page);
4316                         SetPageUptodate(page);
4317                         unlock_page(page);
4318                         put_page(page);
4319                         rdata->pages[i] = NULL;
4320                         rdata->nr_pages--;
4321                         continue;
4322                 } else {
4323                         /* no need to hold page hostage */
4324                         lru_cache_add(page);
4325                         unlock_page(page);
4326                         put_page(page);
4327                         rdata->pages[i] = NULL;
4328                         rdata->nr_pages--;
4329                         continue;
4330                 }
4331
4332                 if (iter)
4333                         result = copy_page_from_iter(
4334                                         page, page_offset, n, iter);
4335 #ifdef CONFIG_CIFS_SMB_DIRECT
4336                 else if (rdata->mr)
4337                         result = n;
4338 #endif
4339                 else
4340                         result = cifs_read_page_from_socket(
4341                                         server, page, page_offset, n);
4342                 if (result < 0)
4343                         break;
4344
4345                 rdata->got_bytes += result;
4346         }
4347
4348         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4349                                                 rdata->got_bytes : result;
4350 }
4351
4352 static int
4353 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4354                                struct cifs_readdata *rdata, unsigned int len)
4355 {
4356         return readpages_fill_pages(server, rdata, NULL, len);
4357 }
4358
4359 static int
4360 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4361                                struct cifs_readdata *rdata,
4362                                struct iov_iter *iter)
4363 {
4364         return readpages_fill_pages(server, rdata, iter, iter->count);
4365 }
4366
4367 static int
4368 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4369                     unsigned int rsize, struct list_head *tmplist,
4370                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4371 {
4372         struct page *page, *tpage;
4373         unsigned int expected_index;
4374         int rc;
4375         gfp_t gfp = readahead_gfp_mask(mapping);
4376
4377         INIT_LIST_HEAD(tmplist);
4378
4379         page = lru_to_page(page_list);
4380
4381         /*
4382          * Lock the page and put it in the cache. Since no one else
4383          * should have access to this page, we're safe to simply set
4384          * PG_locked without checking it first.
4385          */
4386         __SetPageLocked(page);
4387         rc = add_to_page_cache_locked(page, mapping,
4388                                       page->index, gfp);
4389
4390         /* give up if we can't stick it in the cache */
4391         if (rc) {
4392                 __ClearPageLocked(page);
4393                 return rc;
4394         }
4395
4396         /* move first page to the tmplist */
4397         *offset = (loff_t)page->index << PAGE_SHIFT;
4398         *bytes = PAGE_SIZE;
4399         *nr_pages = 1;
4400         list_move_tail(&page->lru, tmplist);
4401
4402         /* now try and add more pages onto the request */
4403         expected_index = page->index + 1;
4404         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4405                 /* discontinuity ? */
4406                 if (page->index != expected_index)
4407                         break;
4408
4409                 /* would this page push the read over the rsize? */
4410                 if (*bytes + PAGE_SIZE > rsize)
4411                         break;
4412
4413                 __SetPageLocked(page);
4414                 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4415                 if (rc) {
4416                         __ClearPageLocked(page);
4417                         break;
4418                 }
4419                 list_move_tail(&page->lru, tmplist);
4420                 (*bytes) += PAGE_SIZE;
4421                 expected_index++;
4422                 (*nr_pages)++;
4423         }
4424         return rc;
4425 }
4426
4427 static int cifs_readpages(struct file *file, struct address_space *mapping,
4428         struct list_head *page_list, unsigned num_pages)
4429 {
4430         int rc;
4431         int err = 0;
4432         struct list_head tmplist;
4433         struct cifsFileInfo *open_file = file->private_data;
4434         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4435         struct TCP_Server_Info *server;
4436         pid_t pid;
4437         unsigned int xid;
4438
4439         xid = get_xid();
4440         /*
4441          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4442          * immediately if the cookie is negative
4443          *
4444          * After this point, every page in the list might have PG_fscache set,
4445          * so we will need to clean that up off of every page we don't use.
4446          */
4447         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4448                                          &num_pages);
4449         if (rc == 0) {
4450                 free_xid(xid);
4451                 return rc;
4452         }
4453
4454         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4455                 pid = open_file->pid;
4456         else
4457                 pid = current->tgid;
4458
4459         rc = 0;
4460         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4461
4462         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4463                  __func__, file, mapping, num_pages);
4464
4465         /*
4466          * Start with the page at end of list and move it to private
4467          * list. Do the same with any following pages until we hit
4468          * the rsize limit, hit an index discontinuity, or run out of
4469          * pages. Issue the async read and then start the loop again
4470          * until the list is empty.
4471          *
4472          * Note that list order is important. The page_list is in
4473          * the order of declining indexes. When we put the pages in
4474          * the rdata->pages, then we want them in increasing order.
4475          */
4476         while (!list_empty(page_list) && !err) {
4477                 unsigned int i, nr_pages, bytes, rsize;
4478                 loff_t offset;
4479                 struct page *page, *tpage;
4480                 struct cifs_readdata *rdata;
4481                 struct cifs_credits credits_on_stack;
4482                 struct cifs_credits *credits = &credits_on_stack;
4483
4484                 if (open_file->invalidHandle) {
4485                         rc = cifs_reopen_file(open_file, true);
4486                         if (rc == -EAGAIN)
4487                                 continue;
4488                         else if (rc)
4489                                 break;
4490                 }
4491
4492                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4493                                                    &rsize, credits);
4494                 if (rc)
4495                         break;
4496
4497                 /*
4498                  * Give up immediately if rsize is too small to read an entire
4499                  * page. The VFS will fall back to readpage. We should never
4500                  * reach this point however since we set ra_pages to 0 when the
4501                  * rsize is smaller than a cache page.
4502                  */
4503                 if (unlikely(rsize < PAGE_SIZE)) {
4504                         add_credits_and_wake_if(server, credits, 0);
4505                         free_xid(xid);
4506                         return 0;
4507                 }
4508
4509                 nr_pages = 0;
4510                 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4511                                          &nr_pages, &offset, &bytes);
4512                 if (!nr_pages) {
4513                         add_credits_and_wake_if(server, credits, 0);
4514                         break;
4515                 }
4516
4517                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4518                 if (!rdata) {
4519                         /* best to give up if we're out of mem */
4520                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4521                                 list_del(&page->lru);
4522                                 lru_cache_add(page);
4523                                 unlock_page(page);
4524                                 put_page(page);
4525                         }
4526                         rc = -ENOMEM;
4527                         add_credits_and_wake_if(server, credits, 0);
4528                         break;
4529                 }
4530
4531                 rdata->cfile = cifsFileInfo_get(open_file);
4532                 rdata->server = server;
4533                 rdata->mapping = mapping;
4534                 rdata->offset = offset;
4535                 rdata->bytes = bytes;
4536                 rdata->pid = pid;
4537                 rdata->pagesz = PAGE_SIZE;
4538                 rdata->tailsz = PAGE_SIZE;
4539                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4540                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4541                 rdata->credits = credits_on_stack;
4542
4543                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4544                         list_del(&page->lru);
4545                         rdata->pages[rdata->nr_pages++] = page;
4546                 }
4547
4548                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4549
4550                 if (!rc) {
4551                         if (rdata->cfile->invalidHandle)
4552                                 rc = -EAGAIN;
4553                         else
4554                                 rc = server->ops->async_readv(rdata);
4555                 }
4556
4557                 if (rc) {
4558                         add_credits_and_wake_if(server, &rdata->credits, 0);
4559                         for (i = 0; i < rdata->nr_pages; i++) {
4560                                 page = rdata->pages[i];
4561                                 lru_cache_add(page);
4562                                 unlock_page(page);
4563                                 put_page(page);
4564                         }
4565                         /* Fallback to the readpage in error/reconnect cases */
4566                         kref_put(&rdata->refcount, cifs_readdata_release);
4567                         break;
4568                 }
4569
4570                 kref_put(&rdata->refcount, cifs_readdata_release);
4571         }
4572
4573         /* Any pages that have been shown to fscache but didn't get added to
4574          * the pagecache must be uncached before they get returned to the
4575          * allocator.
4576          */
4577         cifs_fscache_readpages_cancel(mapping->host, page_list);
4578         free_xid(xid);
4579         return rc;
4580 }
4581
4582 /*
4583  * cifs_readpage_worker must be called with the page pinned
4584  */
4585 static int cifs_readpage_worker(struct file *file, struct page *page,
4586         loff_t *poffset)
4587 {
4588         char *read_data;
4589         int rc;
4590
4591         /* Is the page cached? */
4592         rc = cifs_readpage_from_fscache(file_inode(file), page);
4593         if (rc == 0)
4594                 goto read_complete;
4595
4596         read_data = kmap(page);
4597         /* for reads over a certain size could initiate async read ahead */
4598
4599         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4600
4601         if (rc < 0)
4602                 goto io_error;
4603         else
4604                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4605
4606         /* we do not want atime to be less than mtime, it broke some apps */
4607         file_inode(file)->i_atime = current_time(file_inode(file));
4608         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4609                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4610         else
4611                 file_inode(file)->i_atime = current_time(file_inode(file));
4612
4613         if (PAGE_SIZE > rc)
4614                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4615
4616         flush_dcache_page(page);
4617         SetPageUptodate(page);
4618
4619         /* send this page to the cache */
4620         cifs_readpage_to_fscache(file_inode(file), page);
4621
4622         rc = 0;
4623
4624 io_error:
4625         kunmap(page);
4626         unlock_page(page);
4627
4628 read_complete:
4629         return rc;
4630 }
4631
4632 static int cifs_readpage(struct file *file, struct page *page)
4633 {
4634         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4635         int rc = -EACCES;
4636         unsigned int xid;
4637
4638         xid = get_xid();
4639
4640         if (file->private_data == NULL) {
4641                 rc = -EBADF;
4642                 free_xid(xid);
4643                 return rc;
4644         }
4645
4646         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4647                  page, (int)offset, (int)offset);
4648
4649         rc = cifs_readpage_worker(file, page, &offset);
4650
4651         free_xid(xid);
4652         return rc;
4653 }
4654
4655 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4656 {
4657         struct cifsFileInfo *open_file;
4658
4659         spin_lock(&cifs_inode->open_file_lock);
4660         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4661                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4662                         spin_unlock(&cifs_inode->open_file_lock);
4663                         return 1;
4664                 }
4665         }
4666         spin_unlock(&cifs_inode->open_file_lock);
4667         return 0;
4668 }
4669
4670 /* We do not want to update the file size from server for inodes
4671    open for write - to avoid races with writepage extending
4672    the file - in the future we could consider allowing
4673    refreshing the inode only on increases in the file size
4674    but this is tricky to do without racing with writebehind
4675    page caching in the current Linux kernel design */
4676 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4677 {
4678         if (!cifsInode)
4679                 return true;
4680
4681         if (is_inode_writable(cifsInode)) {
4682                 /* This inode is open for write at least once */
4683                 struct cifs_sb_info *cifs_sb;
4684
4685                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4686                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4687                         /* since no page cache to corrupt on directio
4688                         we can change size safely */
4689                         return true;
4690                 }
4691
4692                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4693                         return true;
4694
4695                 return false;
4696         } else
4697                 return true;
4698 }
4699
4700 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4701                         loff_t pos, unsigned len, unsigned flags,
4702                         struct page **pagep, void **fsdata)
4703 {
4704         int oncethru = 0;
4705         pgoff_t index = pos >> PAGE_SHIFT;
4706         loff_t offset = pos & (PAGE_SIZE - 1);
4707         loff_t page_start = pos & PAGE_MASK;
4708         loff_t i_size;
4709         struct page *page;
4710         int rc = 0;
4711
4712         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4713
4714 start:
4715         page = grab_cache_page_write_begin(mapping, index, flags);
4716         if (!page) {
4717                 rc = -ENOMEM;
4718                 goto out;
4719         }
4720
4721         if (PageUptodate(page))
4722                 goto out;
4723
4724         /*
4725          * If we write a full page it will be up to date, no need to read from
4726          * the server. If the write is short, we'll end up doing a sync write
4727          * instead.
4728          */
4729         if (len == PAGE_SIZE)
4730                 goto out;
4731
4732         /*
4733          * optimize away the read when we have an oplock, and we're not
4734          * expecting to use any of the data we'd be reading in. That
4735          * is, when the page lies beyond the EOF, or straddles the EOF
4736          * and the write will cover all of the existing data.
4737          */
4738         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4739                 i_size = i_size_read(mapping->host);
4740                 if (page_start >= i_size ||
4741                     (offset == 0 && (pos + len) >= i_size)) {
4742                         zero_user_segments(page, 0, offset,
4743                                            offset + len,
4744                                            PAGE_SIZE);
4745                         /*
4746                          * PageChecked means that the parts of the page
4747                          * to which we're not writing are considered up
4748                          * to date. Once the data is copied to the
4749                          * page, it can be set uptodate.
4750                          */
4751                         SetPageChecked(page);
4752                         goto out;
4753                 }
4754         }
4755
4756         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4757                 /*
4758                  * might as well read a page, it is fast enough. If we get
4759                  * an error, we don't need to return it. cifs_write_end will
4760                  * do a sync write instead since PG_uptodate isn't set.
4761                  */
4762                 cifs_readpage_worker(file, page, &page_start);
4763                 put_page(page);
4764                 oncethru = 1;
4765                 goto start;
4766         } else {
4767                 /* we could try using another file handle if there is one -
4768                    but how would we lock it to prevent close of that handle
4769                    racing with this read? In any case
4770                    this will be written out by write_end so is fine */
4771         }
4772 out:
4773         *pagep = page;
4774         return rc;
4775 }
4776
4777 static int cifs_release_page(struct page *page, gfp_t gfp)
4778 {
4779         if (PagePrivate(page))
4780                 return 0;
4781
4782         return cifs_fscache_release_page(page, gfp);
4783 }
4784
4785 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4786                                  unsigned int length)
4787 {
4788         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4789
4790         if (offset == 0 && length == PAGE_SIZE)
4791                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4792 }
4793
4794 static int cifs_launder_page(struct page *page)
4795 {
4796         int rc = 0;
4797         loff_t range_start = page_offset(page);
4798         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4799         struct writeback_control wbc = {
4800                 .sync_mode = WB_SYNC_ALL,
4801                 .nr_to_write = 0,
4802                 .range_start = range_start,
4803                 .range_end = range_end,
4804         };
4805
4806         cifs_dbg(FYI, "Launder page: %p\n", page);
4807
4808         if (clear_page_dirty_for_io(page))
4809                 rc = cifs_writepage_locked(page, &wbc);
4810
4811         cifs_fscache_invalidate_page(page, page->mapping->host);
4812         return rc;
4813 }
4814
4815 void cifs_oplock_break(struct work_struct *work)
4816 {
4817         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4818                                                   oplock_break);
4819         struct inode *inode = d_inode(cfile->dentry);
4820         struct cifsInodeInfo *cinode = CIFS_I(inode);
4821         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4822         struct TCP_Server_Info *server = tcon->ses->server;
4823         int rc = 0;
4824         bool purge_cache = false;
4825         bool is_deferred = false;
4826         struct cifs_deferred_close *dclose;
4827
4828         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4829                         TASK_UNINTERRUPTIBLE);
4830
4831         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4832                                       cfile->oplock_epoch, &purge_cache);
4833
4834         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4835                                                 cifs_has_mand_locks(cinode)) {
4836                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4837                          inode);
4838                 cinode->oplock = 0;
4839         }
4840
4841         if (inode && S_ISREG(inode->i_mode)) {
4842                 if (CIFS_CACHE_READ(cinode))
4843                         break_lease(inode, O_RDONLY);
4844                 else
4845                         break_lease(inode, O_WRONLY);
4846                 rc = filemap_fdatawrite(inode->i_mapping);
4847                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4848                         rc = filemap_fdatawait(inode->i_mapping);
4849                         mapping_set_error(inode->i_mapping, rc);
4850                         cifs_zap_mapping(inode);
4851                 }
4852                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4853                 if (CIFS_CACHE_WRITE(cinode))
4854                         goto oplock_break_ack;
4855         }
4856
4857         rc = cifs_push_locks(cfile);
4858         if (rc)
4859                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4860
4861 oplock_break_ack:
4862         /*
4863          * releasing stale oplock after recent reconnect of smb session using
4864          * a now incorrect file handle is not a data integrity issue but do
4865          * not bother sending an oplock release if session to server still is
4866          * disconnected since oplock already released by the server
4867          */
4868         if (!cfile->oplock_break_cancelled) {
4869                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4870                                                              cinode);
4871                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4872         }
4873         /*
4874          * When oplock break is received and there are no active
4875          * file handles but cached, then schedule deferred close immediately.
4876          * So, new open will not use cached handle.
4877          */
4878         spin_lock(&CIFS_I(inode)->deferred_lock);
4879         is_deferred = cifs_is_deferred_close(cfile, &dclose);
4880         if (is_deferred &&
4881             cfile->deferred_close_scheduled &&
4882             delayed_work_pending(&cfile->deferred)) {
4883                 /*
4884                  * If there is no pending work, mod_delayed_work queues new work.
4885                  * So, Increase the ref count to avoid use-after-free.
4886                  */
4887                 if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0))
4888                         cifsFileInfo_get(cfile);
4889         }
4890         spin_unlock(&CIFS_I(inode)->deferred_lock);
4891         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4892         cifs_done_oplock_break(cinode);
4893 }
4894
4895 /*
4896  * The presence of cifs_direct_io() in the address space ops vector
4897  * allowes open() O_DIRECT flags which would have failed otherwise.
4898  *
4899  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4900  * so this method should never be called.
4901  *
4902  * Direct IO is not yet supported in the cached mode. 
4903  */
4904 static ssize_t
4905 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4906 {
4907         /*
4908          * FIXME
4909          * Eventually need to support direct IO for non forcedirectio mounts
4910          */
4911         return -EINVAL;
4912 }
4913
4914 static int cifs_swap_activate(struct swap_info_struct *sis,
4915                               struct file *swap_file, sector_t *span)
4916 {
4917         struct cifsFileInfo *cfile = swap_file->private_data;
4918         struct inode *inode = swap_file->f_mapping->host;
4919         unsigned long blocks;
4920         long long isize;
4921
4922         cifs_dbg(FYI, "swap activate\n");
4923
4924         spin_lock(&inode->i_lock);
4925         blocks = inode->i_blocks;
4926         isize = inode->i_size;
4927         spin_unlock(&inode->i_lock);
4928         if (blocks*512 < isize) {
4929                 pr_warn("swap activate: swapfile has holes\n");
4930                 return -EINVAL;
4931         }
4932         *span = sis->pages;
4933
4934         pr_warn_once("Swap support over SMB3 is experimental\n");
4935
4936         /*
4937          * TODO: consider adding ACL (or documenting how) to prevent other
4938          * users (on this or other systems) from reading it
4939          */
4940
4941
4942         /* TODO: add sk_set_memalloc(inet) or similar */
4943
4944         if (cfile)
4945                 cfile->swapfile = true;
4946         /*
4947          * TODO: Since file already open, we can't open with DENY_ALL here
4948          * but we could add call to grab a byte range lock to prevent others
4949          * from reading or writing the file
4950          */
4951
4952         return 0;
4953 }
4954
4955 static void cifs_swap_deactivate(struct file *file)
4956 {
4957         struct cifsFileInfo *cfile = file->private_data;
4958
4959         cifs_dbg(FYI, "swap deactivate\n");
4960
4961         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4962
4963         if (cfile)
4964                 cfile->swapfile = false;
4965
4966         /* do we need to unpin (or unlock) the file */
4967 }
4968
4969 const struct address_space_operations cifs_addr_ops = {
4970         .readpage = cifs_readpage,
4971         .readpages = cifs_readpages,
4972         .writepage = cifs_writepage,
4973         .writepages = cifs_writepages,
4974         .write_begin = cifs_write_begin,
4975         .write_end = cifs_write_end,
4976         .set_page_dirty = __set_page_dirty_nobuffers,
4977         .releasepage = cifs_release_page,
4978         .direct_IO = cifs_direct_io,
4979         .invalidatepage = cifs_invalidate_page,
4980         .launder_page = cifs_launder_page,
4981         /*
4982          * TODO: investigate and if useful we could add an cifs_migratePage
4983          * helper (under an CONFIG_MIGRATION) in the future, and also
4984          * investigate and add an is_dirty_writeback helper if needed
4985          */
4986         .swap_activate = cifs_swap_activate,
4987         .swap_deactivate = cifs_swap_deactivate,
4988 };
4989
4990 /*
4991  * cifs_readpages requires the server to support a buffer large enough to
4992  * contain the header plus one complete page of data.  Otherwise, we need
4993  * to leave cifs_readpages out of the address space operations.
4994  */
4995 const struct address_space_operations cifs_addr_ops_smallbuf = {
4996         .readpage = cifs_readpage,
4997         .writepage = cifs_writepage,
4998         .writepages = cifs_writepages,
4999         .write_begin = cifs_write_begin,
5000         .write_end = cifs_write_end,
5001         .set_page_dirty = __set_page_dirty_nobuffers,
5002         .releasepage = cifs_release_page,
5003         .invalidatepage = cifs_invalidate_page,
5004         .launder_page = cifs_launder_page,
5005 };