Merge tag 'i3c/for-5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/i3c/linux
[linux-2.6-microblaze.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47 #include "fs_context.h"
48
49 static inline int cifs_convert_flags(unsigned int flags)
50 {
51         if ((flags & O_ACCMODE) == O_RDONLY)
52                 return GENERIC_READ;
53         else if ((flags & O_ACCMODE) == O_WRONLY)
54                 return GENERIC_WRITE;
55         else if ((flags & O_ACCMODE) == O_RDWR) {
56                 /* GENERIC_ALL is too much permission to request
57                    can cause unnecessary access denied on create */
58                 /* return GENERIC_ALL; */
59                 return (GENERIC_READ | GENERIC_WRITE);
60         }
61
62         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
63                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
64                 FILE_READ_DATA);
65 }
66
67 static u32 cifs_posix_convert_flags(unsigned int flags)
68 {
69         u32 posix_flags = 0;
70
71         if ((flags & O_ACCMODE) == O_RDONLY)
72                 posix_flags = SMB_O_RDONLY;
73         else if ((flags & O_ACCMODE) == O_WRONLY)
74                 posix_flags = SMB_O_WRONLY;
75         else if ((flags & O_ACCMODE) == O_RDWR)
76                 posix_flags = SMB_O_RDWR;
77
78         if (flags & O_CREAT) {
79                 posix_flags |= SMB_O_CREAT;
80                 if (flags & O_EXCL)
81                         posix_flags |= SMB_O_EXCL;
82         } else if (flags & O_EXCL)
83                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
84                          current->comm, current->tgid);
85
86         if (flags & O_TRUNC)
87                 posix_flags |= SMB_O_TRUNC;
88         /* be safe and imply O_SYNC for O_DSYNC */
89         if (flags & O_DSYNC)
90                 posix_flags |= SMB_O_SYNC;
91         if (flags & O_DIRECTORY)
92                 posix_flags |= SMB_O_DIRECTORY;
93         if (flags & O_NOFOLLOW)
94                 posix_flags |= SMB_O_NOFOLLOW;
95         if (flags & O_DIRECT)
96                 posix_flags |= SMB_O_DIRECT;
97
98         return posix_flags;
99 }
100
101 static inline int cifs_get_disposition(unsigned int flags)
102 {
103         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
104                 return FILE_CREATE;
105         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
106                 return FILE_OVERWRITE_IF;
107         else if ((flags & O_CREAT) == O_CREAT)
108                 return FILE_OPEN_IF;
109         else if ((flags & O_TRUNC) == O_TRUNC)
110                 return FILE_OVERWRITE;
111         else
112                 return FILE_OPEN;
113 }
114
115 int cifs_posix_open(char *full_path, struct inode **pinode,
116                         struct super_block *sb, int mode, unsigned int f_flags,
117                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
118 {
119         int rc;
120         FILE_UNIX_BASIC_INFO *presp_data;
121         __u32 posix_flags = 0;
122         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
123         struct cifs_fattr fattr;
124         struct tcon_link *tlink;
125         struct cifs_tcon *tcon;
126
127         cifs_dbg(FYI, "posix open %s\n", full_path);
128
129         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
130         if (presp_data == NULL)
131                 return -ENOMEM;
132
133         tlink = cifs_sb_tlink(cifs_sb);
134         if (IS_ERR(tlink)) {
135                 rc = PTR_ERR(tlink);
136                 goto posix_open_ret;
137         }
138
139         tcon = tlink_tcon(tlink);
140         mode &= ~current_umask();
141
142         posix_flags = cifs_posix_convert_flags(f_flags);
143         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
144                              poplock, full_path, cifs_sb->local_nls,
145                              cifs_remap(cifs_sb));
146         cifs_put_tlink(tlink);
147
148         if (rc)
149                 goto posix_open_ret;
150
151         if (presp_data->Type == cpu_to_le32(-1))
152                 goto posix_open_ret; /* open ok, caller does qpathinfo */
153
154         if (!pinode)
155                 goto posix_open_ret; /* caller does not need info */
156
157         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
158
159         /* get new inode and set it up */
160         if (*pinode == NULL) {
161                 cifs_fill_uniqueid(sb, &fattr);
162                 *pinode = cifs_iget(sb, &fattr);
163                 if (!*pinode) {
164                         rc = -ENOMEM;
165                         goto posix_open_ret;
166                 }
167         } else {
168                 cifs_fattr_to_inode(*pinode, &fattr);
169         }
170
171 posix_open_ret:
172         kfree(presp_data);
173         return rc;
174 }
175
176 static int
177 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
178              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
179              struct cifs_fid *fid, unsigned int xid)
180 {
181         int rc;
182         int desired_access;
183         int disposition;
184         int create_options = CREATE_NOT_DIR;
185         FILE_ALL_INFO *buf;
186         struct TCP_Server_Info *server = tcon->ses->server;
187         struct cifs_open_parms oparms;
188
189         if (!server->ops->open)
190                 return -ENOSYS;
191
192         desired_access = cifs_convert_flags(f_flags);
193
194 /*********************************************************************
195  *  open flag mapping table:
196  *
197  *      POSIX Flag            CIFS Disposition
198  *      ----------            ----------------
199  *      O_CREAT               FILE_OPEN_IF
200  *      O_CREAT | O_EXCL      FILE_CREATE
201  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
202  *      O_TRUNC               FILE_OVERWRITE
203  *      none of the above     FILE_OPEN
204  *
205  *      Note that there is not a direct match between disposition
206  *      FILE_SUPERSEDE (ie create whether or not file exists although
207  *      O_CREAT | O_TRUNC is similar but truncates the existing
208  *      file rather than creating a new file as FILE_SUPERSEDE does
209  *      (which uses the attributes / metadata passed in on open call)
210  *?
211  *?  O_SYNC is a reasonable match to CIFS writethrough flag
212  *?  and the read write flags match reasonably.  O_LARGEFILE
213  *?  is irrelevant because largefile support is always used
214  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
215  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
216  *********************************************************************/
217
218         disposition = cifs_get_disposition(f_flags);
219
220         /* BB pass O_SYNC flag through on file attributes .. BB */
221
222         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
223         if (!buf)
224                 return -ENOMEM;
225
226         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
227         if (f_flags & O_SYNC)
228                 create_options |= CREATE_WRITE_THROUGH;
229
230         if (f_flags & O_DIRECT)
231                 create_options |= CREATE_NO_BUFFER;
232
233         oparms.tcon = tcon;
234         oparms.cifs_sb = cifs_sb;
235         oparms.desired_access = desired_access;
236         oparms.create_options = cifs_create_options(cifs_sb, create_options);
237         oparms.disposition = disposition;
238         oparms.path = full_path;
239         oparms.fid = fid;
240         oparms.reconnect = false;
241
242         rc = server->ops->open(xid, &oparms, oplock, buf);
243
244         if (rc)
245                 goto out;
246
247         /* TODO: Add support for calling posix query info but with passing in fid */
248         if (tcon->unix_ext)
249                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
250                                               xid);
251         else
252                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
253                                          xid, fid);
254
255         if (rc) {
256                 server->ops->close(xid, tcon, fid);
257                 if (rc == -ESTALE)
258                         rc = -EOPENSTALE;
259         }
260
261 out:
262         kfree(buf);
263         return rc;
264 }
265
266 static bool
267 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
268 {
269         struct cifs_fid_locks *cur;
270         bool has_locks = false;
271
272         down_read(&cinode->lock_sem);
273         list_for_each_entry(cur, &cinode->llist, llist) {
274                 if (!list_empty(&cur->locks)) {
275                         has_locks = true;
276                         break;
277                 }
278         }
279         up_read(&cinode->lock_sem);
280         return has_locks;
281 }
282
283 void
284 cifs_down_write(struct rw_semaphore *sem)
285 {
286         while (!down_write_trylock(sem))
287                 msleep(10);
288 }
289
290 static void cifsFileInfo_put_work(struct work_struct *work);
291
292 struct cifsFileInfo *
293 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
294                   struct tcon_link *tlink, __u32 oplock)
295 {
296         struct dentry *dentry = file_dentry(file);
297         struct inode *inode = d_inode(dentry);
298         struct cifsInodeInfo *cinode = CIFS_I(inode);
299         struct cifsFileInfo *cfile;
300         struct cifs_fid_locks *fdlocks;
301         struct cifs_tcon *tcon = tlink_tcon(tlink);
302         struct TCP_Server_Info *server = tcon->ses->server;
303
304         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
305         if (cfile == NULL)
306                 return cfile;
307
308         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
309         if (!fdlocks) {
310                 kfree(cfile);
311                 return NULL;
312         }
313
314         INIT_LIST_HEAD(&fdlocks->locks);
315         fdlocks->cfile = cfile;
316         cfile->llist = fdlocks;
317
318         cfile->count = 1;
319         cfile->pid = current->tgid;
320         cfile->uid = current_fsuid();
321         cfile->dentry = dget(dentry);
322         cfile->f_flags = file->f_flags;
323         cfile->invalidHandle = false;
324         cfile->tlink = cifs_get_tlink(tlink);
325         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
326         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
327         mutex_init(&cfile->fh_mutex);
328         spin_lock_init(&cfile->file_info_lock);
329
330         cifs_sb_active(inode->i_sb);
331
332         /*
333          * If the server returned a read oplock and we have mandatory brlocks,
334          * set oplock level to None.
335          */
336         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
337                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
338                 oplock = 0;
339         }
340
341         cifs_down_write(&cinode->lock_sem);
342         list_add(&fdlocks->llist, &cinode->llist);
343         up_write(&cinode->lock_sem);
344
345         spin_lock(&tcon->open_file_lock);
346         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
347                 oplock = fid->pending_open->oplock;
348         list_del(&fid->pending_open->olist);
349
350         fid->purge_cache = false;
351         server->ops->set_fid(cfile, fid, oplock);
352
353         list_add(&cfile->tlist, &tcon->openFileList);
354         atomic_inc(&tcon->num_local_opens);
355
356         /* if readable file instance put first in list*/
357         spin_lock(&cinode->open_file_lock);
358         if (file->f_mode & FMODE_READ)
359                 list_add(&cfile->flist, &cinode->openFileList);
360         else
361                 list_add_tail(&cfile->flist, &cinode->openFileList);
362         spin_unlock(&cinode->open_file_lock);
363         spin_unlock(&tcon->open_file_lock);
364
365         if (fid->purge_cache)
366                 cifs_zap_mapping(inode);
367
368         file->private_data = cfile;
369         return cfile;
370 }
371
372 struct cifsFileInfo *
373 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
374 {
375         spin_lock(&cifs_file->file_info_lock);
376         cifsFileInfo_get_locked(cifs_file);
377         spin_unlock(&cifs_file->file_info_lock);
378         return cifs_file;
379 }
380
381 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
382 {
383         struct inode *inode = d_inode(cifs_file->dentry);
384         struct cifsInodeInfo *cifsi = CIFS_I(inode);
385         struct cifsLockInfo *li, *tmp;
386         struct super_block *sb = inode->i_sb;
387
388         /*
389          * Delete any outstanding lock records. We'll lose them when the file
390          * is closed anyway.
391          */
392         cifs_down_write(&cifsi->lock_sem);
393         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
394                 list_del(&li->llist);
395                 cifs_del_lock_waiters(li);
396                 kfree(li);
397         }
398         list_del(&cifs_file->llist->llist);
399         kfree(cifs_file->llist);
400         up_write(&cifsi->lock_sem);
401
402         cifs_put_tlink(cifs_file->tlink);
403         dput(cifs_file->dentry);
404         cifs_sb_deactive(sb);
405         kfree(cifs_file);
406 }
407
408 static void cifsFileInfo_put_work(struct work_struct *work)
409 {
410         struct cifsFileInfo *cifs_file = container_of(work,
411                         struct cifsFileInfo, put);
412
413         cifsFileInfo_put_final(cifs_file);
414 }
415
416 /**
417  * cifsFileInfo_put - release a reference of file priv data
418  *
419  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
420  *
421  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
422  */
423 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
424 {
425         _cifsFileInfo_put(cifs_file, true, true);
426 }
427
428 /**
429  * _cifsFileInfo_put - release a reference of file priv data
430  *
431  * This may involve closing the filehandle @cifs_file out on the
432  * server. Must be called without holding tcon->open_file_lock,
433  * cinode->open_file_lock and cifs_file->file_info_lock.
434  *
435  * If @wait_for_oplock_handler is true and we are releasing the last
436  * reference, wait for any running oplock break handler of the file
437  * and cancel any pending one.
438  *
439  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
440  * @wait_oplock_handler: must be false if called from oplock_break_handler
441  * @offload:    not offloaded on close and oplock breaks
442  *
443  */
444 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
445                        bool wait_oplock_handler, bool offload)
446 {
447         struct inode *inode = d_inode(cifs_file->dentry);
448         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
449         struct TCP_Server_Info *server = tcon->ses->server;
450         struct cifsInodeInfo *cifsi = CIFS_I(inode);
451         struct super_block *sb = inode->i_sb;
452         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
453         struct cifs_fid fid;
454         struct cifs_pending_open open;
455         bool oplock_break_cancelled;
456
457         spin_lock(&tcon->open_file_lock);
458         spin_lock(&cifsi->open_file_lock);
459         spin_lock(&cifs_file->file_info_lock);
460         if (--cifs_file->count > 0) {
461                 spin_unlock(&cifs_file->file_info_lock);
462                 spin_unlock(&cifsi->open_file_lock);
463                 spin_unlock(&tcon->open_file_lock);
464                 return;
465         }
466         spin_unlock(&cifs_file->file_info_lock);
467
468         if (server->ops->get_lease_key)
469                 server->ops->get_lease_key(inode, &fid);
470
471         /* store open in pending opens to make sure we don't miss lease break */
472         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
473
474         /* remove it from the lists */
475         list_del(&cifs_file->flist);
476         list_del(&cifs_file->tlist);
477         atomic_dec(&tcon->num_local_opens);
478
479         if (list_empty(&cifsi->openFileList)) {
480                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
481                          d_inode(cifs_file->dentry));
482                 /*
483                  * In strict cache mode we need invalidate mapping on the last
484                  * close  because it may cause a error when we open this file
485                  * again and get at least level II oplock.
486                  */
487                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
488                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
489                 cifs_set_oplock_level(cifsi, 0);
490         }
491
492         spin_unlock(&cifsi->open_file_lock);
493         spin_unlock(&tcon->open_file_lock);
494
495         oplock_break_cancelled = wait_oplock_handler ?
496                 cancel_work_sync(&cifs_file->oplock_break) : false;
497
498         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
499                 struct TCP_Server_Info *server = tcon->ses->server;
500                 unsigned int xid;
501
502                 xid = get_xid();
503                 if (server->ops->close_getattr)
504                         server->ops->close_getattr(xid, tcon, cifs_file);
505                 else if (server->ops->close)
506                         server->ops->close(xid, tcon, &cifs_file->fid);
507                 _free_xid(xid);
508         }
509
510         if (oplock_break_cancelled)
511                 cifs_done_oplock_break(cifsi);
512
513         cifs_del_pending_open(&open);
514
515         if (offload)
516                 queue_work(fileinfo_put_wq, &cifs_file->put);
517         else
518                 cifsFileInfo_put_final(cifs_file);
519 }
520
521 int cifs_open(struct inode *inode, struct file *file)
522
523 {
524         int rc = -EACCES;
525         unsigned int xid;
526         __u32 oplock;
527         struct cifs_sb_info *cifs_sb;
528         struct TCP_Server_Info *server;
529         struct cifs_tcon *tcon;
530         struct tcon_link *tlink;
531         struct cifsFileInfo *cfile = NULL;
532         char *full_path = NULL;
533         bool posix_open_ok = false;
534         struct cifs_fid fid;
535         struct cifs_pending_open open;
536
537         xid = get_xid();
538
539         cifs_sb = CIFS_SB(inode->i_sb);
540         tlink = cifs_sb_tlink(cifs_sb);
541         if (IS_ERR(tlink)) {
542                 free_xid(xid);
543                 return PTR_ERR(tlink);
544         }
545         tcon = tlink_tcon(tlink);
546         server = tcon->ses->server;
547
548         full_path = build_path_from_dentry(file_dentry(file));
549         if (full_path == NULL) {
550                 rc = -ENOMEM;
551                 goto out;
552         }
553
554         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
555                  inode, file->f_flags, full_path);
556
557         if (file->f_flags & O_DIRECT &&
558             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
559                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
560                         file->f_op = &cifs_file_direct_nobrl_ops;
561                 else
562                         file->f_op = &cifs_file_direct_ops;
563         }
564
565         if (server->oplocks)
566                 oplock = REQ_OPLOCK;
567         else
568                 oplock = 0;
569
570         if (!tcon->broken_posix_open && tcon->unix_ext &&
571             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
572                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
573                 /* can not refresh inode info since size could be stale */
574                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
575                                 cifs_sb->ctx->file_mode /* ignored */,
576                                 file->f_flags, &oplock, &fid.netfid, xid);
577                 if (rc == 0) {
578                         cifs_dbg(FYI, "posix open succeeded\n");
579                         posix_open_ok = true;
580                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
581                         if (tcon->ses->serverNOS)
582                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
583                                          tcon->ses->serverName,
584                                          tcon->ses->serverNOS);
585                         tcon->broken_posix_open = true;
586                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
587                          (rc != -EOPNOTSUPP)) /* path not found or net err */
588                         goto out;
589                 /*
590                  * Else fallthrough to retry open the old way on network i/o
591                  * or DFS errors.
592                  */
593         }
594
595         if (server->ops->get_lease_key)
596                 server->ops->get_lease_key(inode, &fid);
597
598         cifs_add_pending_open(&fid, tlink, &open);
599
600         if (!posix_open_ok) {
601                 if (server->ops->get_lease_key)
602                         server->ops->get_lease_key(inode, &fid);
603
604                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
605                                   file->f_flags, &oplock, &fid, xid);
606                 if (rc) {
607                         cifs_del_pending_open(&open);
608                         goto out;
609                 }
610         }
611
612         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
613         if (cfile == NULL) {
614                 if (server->ops->close)
615                         server->ops->close(xid, tcon, &fid);
616                 cifs_del_pending_open(&open);
617                 rc = -ENOMEM;
618                 goto out;
619         }
620
621         cifs_fscache_set_inode_cookie(inode, file);
622
623         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
624                 /*
625                  * Time to set mode which we can not set earlier due to
626                  * problems creating new read-only files.
627                  */
628                 struct cifs_unix_set_info_args args = {
629                         .mode   = inode->i_mode,
630                         .uid    = INVALID_UID, /* no change */
631                         .gid    = INVALID_GID, /* no change */
632                         .ctime  = NO_CHANGE_64,
633                         .atime  = NO_CHANGE_64,
634                         .mtime  = NO_CHANGE_64,
635                         .device = 0,
636                 };
637                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
638                                        cfile->pid);
639         }
640
641 out:
642         kfree(full_path);
643         free_xid(xid);
644         cifs_put_tlink(tlink);
645         return rc;
646 }
647
648 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
649
650 /*
651  * Try to reacquire byte range locks that were released when session
652  * to server was lost.
653  */
654 static int
655 cifs_relock_file(struct cifsFileInfo *cfile)
656 {
657         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
658         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
659         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
660         int rc = 0;
661
662         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
663         if (cinode->can_cache_brlcks) {
664                 /* can cache locks - no need to relock */
665                 up_read(&cinode->lock_sem);
666                 return rc;
667         }
668
669         if (cap_unix(tcon->ses) &&
670             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
671             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
672                 rc = cifs_push_posix_locks(cfile);
673         else
674                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
675
676         up_read(&cinode->lock_sem);
677         return rc;
678 }
679
680 static int
681 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
682 {
683         int rc = -EACCES;
684         unsigned int xid;
685         __u32 oplock;
686         struct cifs_sb_info *cifs_sb;
687         struct cifs_tcon *tcon;
688         struct TCP_Server_Info *server;
689         struct cifsInodeInfo *cinode;
690         struct inode *inode;
691         char *full_path = NULL;
692         int desired_access;
693         int disposition = FILE_OPEN;
694         int create_options = CREATE_NOT_DIR;
695         struct cifs_open_parms oparms;
696
697         xid = get_xid();
698         mutex_lock(&cfile->fh_mutex);
699         if (!cfile->invalidHandle) {
700                 mutex_unlock(&cfile->fh_mutex);
701                 rc = 0;
702                 free_xid(xid);
703                 return rc;
704         }
705
706         inode = d_inode(cfile->dentry);
707         cifs_sb = CIFS_SB(inode->i_sb);
708         tcon = tlink_tcon(cfile->tlink);
709         server = tcon->ses->server;
710
711         /*
712          * Can not grab rename sem here because various ops, including those
713          * that already have the rename sem can end up causing writepage to get
714          * called and if the server was down that means we end up here, and we
715          * can never tell if the caller already has the rename_sem.
716          */
717         full_path = build_path_from_dentry(cfile->dentry);
718         if (full_path == NULL) {
719                 rc = -ENOMEM;
720                 mutex_unlock(&cfile->fh_mutex);
721                 free_xid(xid);
722                 return rc;
723         }
724
725         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
726                  inode, cfile->f_flags, full_path);
727
728         if (tcon->ses->server->oplocks)
729                 oplock = REQ_OPLOCK;
730         else
731                 oplock = 0;
732
733         if (tcon->unix_ext && cap_unix(tcon->ses) &&
734             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
735                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
736                 /*
737                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
738                  * original open. Must mask them off for a reopen.
739                  */
740                 unsigned int oflags = cfile->f_flags &
741                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
742
743                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
744                                      cifs_sb->ctx->file_mode /* ignored */,
745                                      oflags, &oplock, &cfile->fid.netfid, xid);
746                 if (rc == 0) {
747                         cifs_dbg(FYI, "posix reopen succeeded\n");
748                         oparms.reconnect = true;
749                         goto reopen_success;
750                 }
751                 /*
752                  * fallthrough to retry open the old way on errors, especially
753                  * in the reconnect path it is important to retry hard
754                  */
755         }
756
757         desired_access = cifs_convert_flags(cfile->f_flags);
758
759         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
760         if (cfile->f_flags & O_SYNC)
761                 create_options |= CREATE_WRITE_THROUGH;
762
763         if (cfile->f_flags & O_DIRECT)
764                 create_options |= CREATE_NO_BUFFER;
765
766         if (server->ops->get_lease_key)
767                 server->ops->get_lease_key(inode, &cfile->fid);
768
769         oparms.tcon = tcon;
770         oparms.cifs_sb = cifs_sb;
771         oparms.desired_access = desired_access;
772         oparms.create_options = cifs_create_options(cifs_sb, create_options);
773         oparms.disposition = disposition;
774         oparms.path = full_path;
775         oparms.fid = &cfile->fid;
776         oparms.reconnect = true;
777
778         /*
779          * Can not refresh inode by passing in file_info buf to be returned by
780          * ops->open and then calling get_inode_info with returned buf since
781          * file might have write behind data that needs to be flushed and server
782          * version of file size can be stale. If we knew for sure that inode was
783          * not dirty locally we could do this.
784          */
785         rc = server->ops->open(xid, &oparms, &oplock, NULL);
786         if (rc == -ENOENT && oparms.reconnect == false) {
787                 /* durable handle timeout is expired - open the file again */
788                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
789                 /* indicate that we need to relock the file */
790                 oparms.reconnect = true;
791         }
792
793         if (rc) {
794                 mutex_unlock(&cfile->fh_mutex);
795                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
796                 cifs_dbg(FYI, "oplock: %d\n", oplock);
797                 goto reopen_error_exit;
798         }
799
800 reopen_success:
801         cfile->invalidHandle = false;
802         mutex_unlock(&cfile->fh_mutex);
803         cinode = CIFS_I(inode);
804
805         if (can_flush) {
806                 rc = filemap_write_and_wait(inode->i_mapping);
807                 if (!is_interrupt_error(rc))
808                         mapping_set_error(inode->i_mapping, rc);
809
810                 if (tcon->posix_extensions)
811                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
812                 else if (tcon->unix_ext)
813                         rc = cifs_get_inode_info_unix(&inode, full_path,
814                                                       inode->i_sb, xid);
815                 else
816                         rc = cifs_get_inode_info(&inode, full_path, NULL,
817                                                  inode->i_sb, xid, NULL);
818         }
819         /*
820          * Else we are writing out data to server already and could deadlock if
821          * we tried to flush data, and since we do not know if we have data that
822          * would invalidate the current end of file on the server we can not go
823          * to the server to get the new inode info.
824          */
825
826         /*
827          * If the server returned a read oplock and we have mandatory brlocks,
828          * set oplock level to None.
829          */
830         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
831                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
832                 oplock = 0;
833         }
834
835         server->ops->set_fid(cfile, &cfile->fid, oplock);
836         if (oparms.reconnect)
837                 cifs_relock_file(cfile);
838
839 reopen_error_exit:
840         kfree(full_path);
841         free_xid(xid);
842         return rc;
843 }
844
845 int cifs_close(struct inode *inode, struct file *file)
846 {
847         if (file->private_data != NULL) {
848                 _cifsFileInfo_put(file->private_data, true, false);
849                 file->private_data = NULL;
850         }
851
852         /* return code from the ->release op is always ignored */
853         return 0;
854 }
855
856 void
857 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
858 {
859         struct cifsFileInfo *open_file;
860         struct list_head *tmp;
861         struct list_head *tmp1;
862         struct list_head tmp_list;
863
864         if (!tcon->use_persistent || !tcon->need_reopen_files)
865                 return;
866
867         tcon->need_reopen_files = false;
868
869         cifs_dbg(FYI, "Reopen persistent handles\n");
870         INIT_LIST_HEAD(&tmp_list);
871
872         /* list all files open on tree connection, reopen resilient handles  */
873         spin_lock(&tcon->open_file_lock);
874         list_for_each(tmp, &tcon->openFileList) {
875                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
876                 if (!open_file->invalidHandle)
877                         continue;
878                 cifsFileInfo_get(open_file);
879                 list_add_tail(&open_file->rlist, &tmp_list);
880         }
881         spin_unlock(&tcon->open_file_lock);
882
883         list_for_each_safe(tmp, tmp1, &tmp_list) {
884                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
885                 if (cifs_reopen_file(open_file, false /* do not flush */))
886                         tcon->need_reopen_files = true;
887                 list_del_init(&open_file->rlist);
888                 cifsFileInfo_put(open_file);
889         }
890 }
891
892 int cifs_closedir(struct inode *inode, struct file *file)
893 {
894         int rc = 0;
895         unsigned int xid;
896         struct cifsFileInfo *cfile = file->private_data;
897         struct cifs_tcon *tcon;
898         struct TCP_Server_Info *server;
899         char *buf;
900
901         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
902
903         if (cfile == NULL)
904                 return rc;
905
906         xid = get_xid();
907         tcon = tlink_tcon(cfile->tlink);
908         server = tcon->ses->server;
909
910         cifs_dbg(FYI, "Freeing private data in close dir\n");
911         spin_lock(&cfile->file_info_lock);
912         if (server->ops->dir_needs_close(cfile)) {
913                 cfile->invalidHandle = true;
914                 spin_unlock(&cfile->file_info_lock);
915                 if (server->ops->close_dir)
916                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
917                 else
918                         rc = -ENOSYS;
919                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
920                 /* not much we can do if it fails anyway, ignore rc */
921                 rc = 0;
922         } else
923                 spin_unlock(&cfile->file_info_lock);
924
925         buf = cfile->srch_inf.ntwrk_buf_start;
926         if (buf) {
927                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
928                 cfile->srch_inf.ntwrk_buf_start = NULL;
929                 if (cfile->srch_inf.smallBuf)
930                         cifs_small_buf_release(buf);
931                 else
932                         cifs_buf_release(buf);
933         }
934
935         cifs_put_tlink(cfile->tlink);
936         kfree(file->private_data);
937         file->private_data = NULL;
938         /* BB can we lock the filestruct while this is going on? */
939         free_xid(xid);
940         return rc;
941 }
942
943 static struct cifsLockInfo *
944 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
945 {
946         struct cifsLockInfo *lock =
947                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
948         if (!lock)
949                 return lock;
950         lock->offset = offset;
951         lock->length = length;
952         lock->type = type;
953         lock->pid = current->tgid;
954         lock->flags = flags;
955         INIT_LIST_HEAD(&lock->blist);
956         init_waitqueue_head(&lock->block_q);
957         return lock;
958 }
959
960 void
961 cifs_del_lock_waiters(struct cifsLockInfo *lock)
962 {
963         struct cifsLockInfo *li, *tmp;
964         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
965                 list_del_init(&li->blist);
966                 wake_up(&li->block_q);
967         }
968 }
969
970 #define CIFS_LOCK_OP    0
971 #define CIFS_READ_OP    1
972 #define CIFS_WRITE_OP   2
973
974 /* @rw_check : 0 - no op, 1 - read, 2 - write */
975 static bool
976 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
977                             __u64 length, __u8 type, __u16 flags,
978                             struct cifsFileInfo *cfile,
979                             struct cifsLockInfo **conf_lock, int rw_check)
980 {
981         struct cifsLockInfo *li;
982         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
983         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
984
985         list_for_each_entry(li, &fdlocks->locks, llist) {
986                 if (offset + length <= li->offset ||
987                     offset >= li->offset + li->length)
988                         continue;
989                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
990                     server->ops->compare_fids(cfile, cur_cfile)) {
991                         /* shared lock prevents write op through the same fid */
992                         if (!(li->type & server->vals->shared_lock_type) ||
993                             rw_check != CIFS_WRITE_OP)
994                                 continue;
995                 }
996                 if ((type & server->vals->shared_lock_type) &&
997                     ((server->ops->compare_fids(cfile, cur_cfile) &&
998                      current->tgid == li->pid) || type == li->type))
999                         continue;
1000                 if (rw_check == CIFS_LOCK_OP &&
1001                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1002                     server->ops->compare_fids(cfile, cur_cfile))
1003                         continue;
1004                 if (conf_lock)
1005                         *conf_lock = li;
1006                 return true;
1007         }
1008         return false;
1009 }
1010
1011 bool
1012 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1013                         __u8 type, __u16 flags,
1014                         struct cifsLockInfo **conf_lock, int rw_check)
1015 {
1016         bool rc = false;
1017         struct cifs_fid_locks *cur;
1018         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1019
1020         list_for_each_entry(cur, &cinode->llist, llist) {
1021                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1022                                                  flags, cfile, conf_lock,
1023                                                  rw_check);
1024                 if (rc)
1025                         break;
1026         }
1027
1028         return rc;
1029 }
1030
1031 /*
1032  * Check if there is another lock that prevents us to set the lock (mandatory
1033  * style). If such a lock exists, update the flock structure with its
1034  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1035  * or leave it the same if we can't. Returns 0 if we don't need to request to
1036  * the server or 1 otherwise.
1037  */
1038 static int
1039 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1040                __u8 type, struct file_lock *flock)
1041 {
1042         int rc = 0;
1043         struct cifsLockInfo *conf_lock;
1044         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1045         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1046         bool exist;
1047
1048         down_read(&cinode->lock_sem);
1049
1050         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1051                                         flock->fl_flags, &conf_lock,
1052                                         CIFS_LOCK_OP);
1053         if (exist) {
1054                 flock->fl_start = conf_lock->offset;
1055                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1056                 flock->fl_pid = conf_lock->pid;
1057                 if (conf_lock->type & server->vals->shared_lock_type)
1058                         flock->fl_type = F_RDLCK;
1059                 else
1060                         flock->fl_type = F_WRLCK;
1061         } else if (!cinode->can_cache_brlcks)
1062                 rc = 1;
1063         else
1064                 flock->fl_type = F_UNLCK;
1065
1066         up_read(&cinode->lock_sem);
1067         return rc;
1068 }
1069
1070 static void
1071 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1072 {
1073         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1074         cifs_down_write(&cinode->lock_sem);
1075         list_add_tail(&lock->llist, &cfile->llist->locks);
1076         up_write(&cinode->lock_sem);
1077 }
1078
1079 /*
1080  * Set the byte-range lock (mandatory style). Returns:
1081  * 1) 0, if we set the lock and don't need to request to the server;
1082  * 2) 1, if no locks prevent us but we need to request to the server;
1083  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1084  */
1085 static int
1086 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1087                  bool wait)
1088 {
1089         struct cifsLockInfo *conf_lock;
1090         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1091         bool exist;
1092         int rc = 0;
1093
1094 try_again:
1095         exist = false;
1096         cifs_down_write(&cinode->lock_sem);
1097
1098         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1099                                         lock->type, lock->flags, &conf_lock,
1100                                         CIFS_LOCK_OP);
1101         if (!exist && cinode->can_cache_brlcks) {
1102                 list_add_tail(&lock->llist, &cfile->llist->locks);
1103                 up_write(&cinode->lock_sem);
1104                 return rc;
1105         }
1106
1107         if (!exist)
1108                 rc = 1;
1109         else if (!wait)
1110                 rc = -EACCES;
1111         else {
1112                 list_add_tail(&lock->blist, &conf_lock->blist);
1113                 up_write(&cinode->lock_sem);
1114                 rc = wait_event_interruptible(lock->block_q,
1115                                         (lock->blist.prev == &lock->blist) &&
1116                                         (lock->blist.next == &lock->blist));
1117                 if (!rc)
1118                         goto try_again;
1119                 cifs_down_write(&cinode->lock_sem);
1120                 list_del_init(&lock->blist);
1121         }
1122
1123         up_write(&cinode->lock_sem);
1124         return rc;
1125 }
1126
1127 /*
1128  * Check if there is another lock that prevents us to set the lock (posix
1129  * style). If such a lock exists, update the flock structure with its
1130  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1131  * or leave it the same if we can't. Returns 0 if we don't need to request to
1132  * the server or 1 otherwise.
1133  */
1134 static int
1135 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1136 {
1137         int rc = 0;
1138         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1139         unsigned char saved_type = flock->fl_type;
1140
1141         if ((flock->fl_flags & FL_POSIX) == 0)
1142                 return 1;
1143
1144         down_read(&cinode->lock_sem);
1145         posix_test_lock(file, flock);
1146
1147         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1148                 flock->fl_type = saved_type;
1149                 rc = 1;
1150         }
1151
1152         up_read(&cinode->lock_sem);
1153         return rc;
1154 }
1155
1156 /*
1157  * Set the byte-range lock (posix style). Returns:
1158  * 1) <0, if the error occurs while setting the lock;
1159  * 2) 0, if we set the lock and don't need to request to the server;
1160  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1161  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1162  */
1163 static int
1164 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1165 {
1166         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1167         int rc = FILE_LOCK_DEFERRED + 1;
1168
1169         if ((flock->fl_flags & FL_POSIX) == 0)
1170                 return rc;
1171
1172         cifs_down_write(&cinode->lock_sem);
1173         if (!cinode->can_cache_brlcks) {
1174                 up_write(&cinode->lock_sem);
1175                 return rc;
1176         }
1177
1178         rc = posix_lock_file(file, flock, NULL);
1179         up_write(&cinode->lock_sem);
1180         return rc;
1181 }
1182
1183 int
1184 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1185 {
1186         unsigned int xid;
1187         int rc = 0, stored_rc;
1188         struct cifsLockInfo *li, *tmp;
1189         struct cifs_tcon *tcon;
1190         unsigned int num, max_num, max_buf;
1191         LOCKING_ANDX_RANGE *buf, *cur;
1192         static const int types[] = {
1193                 LOCKING_ANDX_LARGE_FILES,
1194                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1195         };
1196         int i;
1197
1198         xid = get_xid();
1199         tcon = tlink_tcon(cfile->tlink);
1200
1201         /*
1202          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1203          * and check it before using.
1204          */
1205         max_buf = tcon->ses->server->maxBuf;
1206         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1207                 free_xid(xid);
1208                 return -EINVAL;
1209         }
1210
1211         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1212                      PAGE_SIZE);
1213         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1214                         PAGE_SIZE);
1215         max_num = (max_buf - sizeof(struct smb_hdr)) /
1216                                                 sizeof(LOCKING_ANDX_RANGE);
1217         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1218         if (!buf) {
1219                 free_xid(xid);
1220                 return -ENOMEM;
1221         }
1222
1223         for (i = 0; i < 2; i++) {
1224                 cur = buf;
1225                 num = 0;
1226                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1227                         if (li->type != types[i])
1228                                 continue;
1229                         cur->Pid = cpu_to_le16(li->pid);
1230                         cur->LengthLow = cpu_to_le32((u32)li->length);
1231                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1232                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1233                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1234                         if (++num == max_num) {
1235                                 stored_rc = cifs_lockv(xid, tcon,
1236                                                        cfile->fid.netfid,
1237                                                        (__u8)li->type, 0, num,
1238                                                        buf);
1239                                 if (stored_rc)
1240                                         rc = stored_rc;
1241                                 cur = buf;
1242                                 num = 0;
1243                         } else
1244                                 cur++;
1245                 }
1246
1247                 if (num) {
1248                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1249                                                (__u8)types[i], 0, num, buf);
1250                         if (stored_rc)
1251                                 rc = stored_rc;
1252                 }
1253         }
1254
1255         kfree(buf);
1256         free_xid(xid);
1257         return rc;
1258 }
1259
1260 static __u32
1261 hash_lockowner(fl_owner_t owner)
1262 {
1263         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1264 }
1265
1266 struct lock_to_push {
1267         struct list_head llist;
1268         __u64 offset;
1269         __u64 length;
1270         __u32 pid;
1271         __u16 netfid;
1272         __u8 type;
1273 };
1274
1275 static int
1276 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1277 {
1278         struct inode *inode = d_inode(cfile->dentry);
1279         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1280         struct file_lock *flock;
1281         struct file_lock_context *flctx = inode->i_flctx;
1282         unsigned int count = 0, i;
1283         int rc = 0, xid, type;
1284         struct list_head locks_to_send, *el;
1285         struct lock_to_push *lck, *tmp;
1286         __u64 length;
1287
1288         xid = get_xid();
1289
1290         if (!flctx)
1291                 goto out;
1292
1293         spin_lock(&flctx->flc_lock);
1294         list_for_each(el, &flctx->flc_posix) {
1295                 count++;
1296         }
1297         spin_unlock(&flctx->flc_lock);
1298
1299         INIT_LIST_HEAD(&locks_to_send);
1300
1301         /*
1302          * Allocating count locks is enough because no FL_POSIX locks can be
1303          * added to the list while we are holding cinode->lock_sem that
1304          * protects locking operations of this inode.
1305          */
1306         for (i = 0; i < count; i++) {
1307                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1308                 if (!lck) {
1309                         rc = -ENOMEM;
1310                         goto err_out;
1311                 }
1312                 list_add_tail(&lck->llist, &locks_to_send);
1313         }
1314
1315         el = locks_to_send.next;
1316         spin_lock(&flctx->flc_lock);
1317         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1318                 if (el == &locks_to_send) {
1319                         /*
1320                          * The list ended. We don't have enough allocated
1321                          * structures - something is really wrong.
1322                          */
1323                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1324                         break;
1325                 }
1326                 length = 1 + flock->fl_end - flock->fl_start;
1327                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1328                         type = CIFS_RDLCK;
1329                 else
1330                         type = CIFS_WRLCK;
1331                 lck = list_entry(el, struct lock_to_push, llist);
1332                 lck->pid = hash_lockowner(flock->fl_owner);
1333                 lck->netfid = cfile->fid.netfid;
1334                 lck->length = length;
1335                 lck->type = type;
1336                 lck->offset = flock->fl_start;
1337         }
1338         spin_unlock(&flctx->flc_lock);
1339
1340         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1341                 int stored_rc;
1342
1343                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1344                                              lck->offset, lck->length, NULL,
1345                                              lck->type, 0);
1346                 if (stored_rc)
1347                         rc = stored_rc;
1348                 list_del(&lck->llist);
1349                 kfree(lck);
1350         }
1351
1352 out:
1353         free_xid(xid);
1354         return rc;
1355 err_out:
1356         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1357                 list_del(&lck->llist);
1358                 kfree(lck);
1359         }
1360         goto out;
1361 }
1362
1363 static int
1364 cifs_push_locks(struct cifsFileInfo *cfile)
1365 {
1366         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1367         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1368         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1369         int rc = 0;
1370
1371         /* we are going to update can_cache_brlcks here - need a write access */
1372         cifs_down_write(&cinode->lock_sem);
1373         if (!cinode->can_cache_brlcks) {
1374                 up_write(&cinode->lock_sem);
1375                 return rc;
1376         }
1377
1378         if (cap_unix(tcon->ses) &&
1379             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1380             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1381                 rc = cifs_push_posix_locks(cfile);
1382         else
1383                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1384
1385         cinode->can_cache_brlcks = false;
1386         up_write(&cinode->lock_sem);
1387         return rc;
1388 }
1389
1390 static void
1391 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1392                 bool *wait_flag, struct TCP_Server_Info *server)
1393 {
1394         if (flock->fl_flags & FL_POSIX)
1395                 cifs_dbg(FYI, "Posix\n");
1396         if (flock->fl_flags & FL_FLOCK)
1397                 cifs_dbg(FYI, "Flock\n");
1398         if (flock->fl_flags & FL_SLEEP) {
1399                 cifs_dbg(FYI, "Blocking lock\n");
1400                 *wait_flag = true;
1401         }
1402         if (flock->fl_flags & FL_ACCESS)
1403                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1404         if (flock->fl_flags & FL_LEASE)
1405                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1406         if (flock->fl_flags &
1407             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1408                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1409                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1410
1411         *type = server->vals->large_lock_type;
1412         if (flock->fl_type == F_WRLCK) {
1413                 cifs_dbg(FYI, "F_WRLCK\n");
1414                 *type |= server->vals->exclusive_lock_type;
1415                 *lock = 1;
1416         } else if (flock->fl_type == F_UNLCK) {
1417                 cifs_dbg(FYI, "F_UNLCK\n");
1418                 *type |= server->vals->unlock_lock_type;
1419                 *unlock = 1;
1420                 /* Check if unlock includes more than one lock range */
1421         } else if (flock->fl_type == F_RDLCK) {
1422                 cifs_dbg(FYI, "F_RDLCK\n");
1423                 *type |= server->vals->shared_lock_type;
1424                 *lock = 1;
1425         } else if (flock->fl_type == F_EXLCK) {
1426                 cifs_dbg(FYI, "F_EXLCK\n");
1427                 *type |= server->vals->exclusive_lock_type;
1428                 *lock = 1;
1429         } else if (flock->fl_type == F_SHLCK) {
1430                 cifs_dbg(FYI, "F_SHLCK\n");
1431                 *type |= server->vals->shared_lock_type;
1432                 *lock = 1;
1433         } else
1434                 cifs_dbg(FYI, "Unknown type of lock\n");
1435 }
1436
1437 static int
1438 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1439            bool wait_flag, bool posix_lck, unsigned int xid)
1440 {
1441         int rc = 0;
1442         __u64 length = 1 + flock->fl_end - flock->fl_start;
1443         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1444         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1445         struct TCP_Server_Info *server = tcon->ses->server;
1446         __u16 netfid = cfile->fid.netfid;
1447
1448         if (posix_lck) {
1449                 int posix_lock_type;
1450
1451                 rc = cifs_posix_lock_test(file, flock);
1452                 if (!rc)
1453                         return rc;
1454
1455                 if (type & server->vals->shared_lock_type)
1456                         posix_lock_type = CIFS_RDLCK;
1457                 else
1458                         posix_lock_type = CIFS_WRLCK;
1459                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1460                                       hash_lockowner(flock->fl_owner),
1461                                       flock->fl_start, length, flock,
1462                                       posix_lock_type, wait_flag);
1463                 return rc;
1464         }
1465
1466         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1467         if (!rc)
1468                 return rc;
1469
1470         /* BB we could chain these into one lock request BB */
1471         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1472                                     1, 0, false);
1473         if (rc == 0) {
1474                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1475                                             type, 0, 1, false);
1476                 flock->fl_type = F_UNLCK;
1477                 if (rc != 0)
1478                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1479                                  rc);
1480                 return 0;
1481         }
1482
1483         if (type & server->vals->shared_lock_type) {
1484                 flock->fl_type = F_WRLCK;
1485                 return 0;
1486         }
1487
1488         type &= ~server->vals->exclusive_lock_type;
1489
1490         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1491                                     type | server->vals->shared_lock_type,
1492                                     1, 0, false);
1493         if (rc == 0) {
1494                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1495                         type | server->vals->shared_lock_type, 0, 1, false);
1496                 flock->fl_type = F_RDLCK;
1497                 if (rc != 0)
1498                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1499                                  rc);
1500         } else
1501                 flock->fl_type = F_WRLCK;
1502
1503         return 0;
1504 }
1505
1506 void
1507 cifs_move_llist(struct list_head *source, struct list_head *dest)
1508 {
1509         struct list_head *li, *tmp;
1510         list_for_each_safe(li, tmp, source)
1511                 list_move(li, dest);
1512 }
1513
1514 void
1515 cifs_free_llist(struct list_head *llist)
1516 {
1517         struct cifsLockInfo *li, *tmp;
1518         list_for_each_entry_safe(li, tmp, llist, llist) {
1519                 cifs_del_lock_waiters(li);
1520                 list_del(&li->llist);
1521                 kfree(li);
1522         }
1523 }
1524
1525 int
1526 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1527                   unsigned int xid)
1528 {
1529         int rc = 0, stored_rc;
1530         static const int types[] = {
1531                 LOCKING_ANDX_LARGE_FILES,
1532                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1533         };
1534         unsigned int i;
1535         unsigned int max_num, num, max_buf;
1536         LOCKING_ANDX_RANGE *buf, *cur;
1537         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1538         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1539         struct cifsLockInfo *li, *tmp;
1540         __u64 length = 1 + flock->fl_end - flock->fl_start;
1541         struct list_head tmp_llist;
1542
1543         INIT_LIST_HEAD(&tmp_llist);
1544
1545         /*
1546          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1547          * and check it before using.
1548          */
1549         max_buf = tcon->ses->server->maxBuf;
1550         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1551                 return -EINVAL;
1552
1553         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1554                      PAGE_SIZE);
1555         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1556                         PAGE_SIZE);
1557         max_num = (max_buf - sizeof(struct smb_hdr)) /
1558                                                 sizeof(LOCKING_ANDX_RANGE);
1559         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1560         if (!buf)
1561                 return -ENOMEM;
1562
1563         cifs_down_write(&cinode->lock_sem);
1564         for (i = 0; i < 2; i++) {
1565                 cur = buf;
1566                 num = 0;
1567                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1568                         if (flock->fl_start > li->offset ||
1569                             (flock->fl_start + length) <
1570                             (li->offset + li->length))
1571                                 continue;
1572                         if (current->tgid != li->pid)
1573                                 continue;
1574                         if (types[i] != li->type)
1575                                 continue;
1576                         if (cinode->can_cache_brlcks) {
1577                                 /*
1578                                  * We can cache brlock requests - simply remove
1579                                  * a lock from the file's list.
1580                                  */
1581                                 list_del(&li->llist);
1582                                 cifs_del_lock_waiters(li);
1583                                 kfree(li);
1584                                 continue;
1585                         }
1586                         cur->Pid = cpu_to_le16(li->pid);
1587                         cur->LengthLow = cpu_to_le32((u32)li->length);
1588                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1589                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1590                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1591                         /*
1592                          * We need to save a lock here to let us add it again to
1593                          * the file's list if the unlock range request fails on
1594                          * the server.
1595                          */
1596                         list_move(&li->llist, &tmp_llist);
1597                         if (++num == max_num) {
1598                                 stored_rc = cifs_lockv(xid, tcon,
1599                                                        cfile->fid.netfid,
1600                                                        li->type, num, 0, buf);
1601                                 if (stored_rc) {
1602                                         /*
1603                                          * We failed on the unlock range
1604                                          * request - add all locks from the tmp
1605                                          * list to the head of the file's list.
1606                                          */
1607                                         cifs_move_llist(&tmp_llist,
1608                                                         &cfile->llist->locks);
1609                                         rc = stored_rc;
1610                                 } else
1611                                         /*
1612                                          * The unlock range request succeed -
1613                                          * free the tmp list.
1614                                          */
1615                                         cifs_free_llist(&tmp_llist);
1616                                 cur = buf;
1617                                 num = 0;
1618                         } else
1619                                 cur++;
1620                 }
1621                 if (num) {
1622                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1623                                                types[i], num, 0, buf);
1624                         if (stored_rc) {
1625                                 cifs_move_llist(&tmp_llist,
1626                                                 &cfile->llist->locks);
1627                                 rc = stored_rc;
1628                         } else
1629                                 cifs_free_llist(&tmp_llist);
1630                 }
1631         }
1632
1633         up_write(&cinode->lock_sem);
1634         kfree(buf);
1635         return rc;
1636 }
1637
1638 static int
1639 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1640            bool wait_flag, bool posix_lck, int lock, int unlock,
1641            unsigned int xid)
1642 {
1643         int rc = 0;
1644         __u64 length = 1 + flock->fl_end - flock->fl_start;
1645         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1646         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1647         struct TCP_Server_Info *server = tcon->ses->server;
1648         struct inode *inode = d_inode(cfile->dentry);
1649
1650         if (posix_lck) {
1651                 int posix_lock_type;
1652
1653                 rc = cifs_posix_lock_set(file, flock);
1654                 if (rc <= FILE_LOCK_DEFERRED)
1655                         return rc;
1656
1657                 if (type & server->vals->shared_lock_type)
1658                         posix_lock_type = CIFS_RDLCK;
1659                 else
1660                         posix_lock_type = CIFS_WRLCK;
1661
1662                 if (unlock == 1)
1663                         posix_lock_type = CIFS_UNLCK;
1664
1665                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1666                                       hash_lockowner(flock->fl_owner),
1667                                       flock->fl_start, length,
1668                                       NULL, posix_lock_type, wait_flag);
1669                 goto out;
1670         }
1671
1672         if (lock) {
1673                 struct cifsLockInfo *lock;
1674
1675                 lock = cifs_lock_init(flock->fl_start, length, type,
1676                                       flock->fl_flags);
1677                 if (!lock)
1678                         return -ENOMEM;
1679
1680                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1681                 if (rc < 0) {
1682                         kfree(lock);
1683                         return rc;
1684                 }
1685                 if (!rc)
1686                         goto out;
1687
1688                 /*
1689                  * Windows 7 server can delay breaking lease from read to None
1690                  * if we set a byte-range lock on a file - break it explicitly
1691                  * before sending the lock to the server to be sure the next
1692                  * read won't conflict with non-overlapted locks due to
1693                  * pagereading.
1694                  */
1695                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1696                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1697                         cifs_zap_mapping(inode);
1698                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1699                                  inode);
1700                         CIFS_I(inode)->oplock = 0;
1701                 }
1702
1703                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1704                                             type, 1, 0, wait_flag);
1705                 if (rc) {
1706                         kfree(lock);
1707                         return rc;
1708                 }
1709
1710                 cifs_lock_add(cfile, lock);
1711         } else if (unlock)
1712                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1713
1714 out:
1715         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1716                 /*
1717                  * If this is a request to remove all locks because we
1718                  * are closing the file, it doesn't matter if the
1719                  * unlocking failed as both cifs.ko and the SMB server
1720                  * remove the lock on file close
1721                  */
1722                 if (rc) {
1723                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1724                         if (!(flock->fl_flags & FL_CLOSE))
1725                                 return rc;
1726                 }
1727                 rc = locks_lock_file_wait(file, flock);
1728         }
1729         return rc;
1730 }
1731
1732 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1733 {
1734         int rc, xid;
1735         int lock = 0, unlock = 0;
1736         bool wait_flag = false;
1737         bool posix_lck = false;
1738         struct cifs_sb_info *cifs_sb;
1739         struct cifs_tcon *tcon;
1740         struct cifsFileInfo *cfile;
1741         __u32 type;
1742
1743         rc = -EACCES;
1744         xid = get_xid();
1745
1746         if (!(fl->fl_flags & FL_FLOCK))
1747                 return -ENOLCK;
1748
1749         cfile = (struct cifsFileInfo *)file->private_data;
1750         tcon = tlink_tcon(cfile->tlink);
1751
1752         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1753                         tcon->ses->server);
1754         cifs_sb = CIFS_FILE_SB(file);
1755
1756         if (cap_unix(tcon->ses) &&
1757             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1758             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1759                 posix_lck = true;
1760
1761         if (!lock && !unlock) {
1762                 /*
1763                  * if no lock or unlock then nothing to do since we do not
1764                  * know what it is
1765                  */
1766                 free_xid(xid);
1767                 return -EOPNOTSUPP;
1768         }
1769
1770         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1771                         xid);
1772         free_xid(xid);
1773         return rc;
1774
1775
1776 }
1777
1778 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1779 {
1780         int rc, xid;
1781         int lock = 0, unlock = 0;
1782         bool wait_flag = false;
1783         bool posix_lck = false;
1784         struct cifs_sb_info *cifs_sb;
1785         struct cifs_tcon *tcon;
1786         struct cifsFileInfo *cfile;
1787         __u32 type;
1788
1789         rc = -EACCES;
1790         xid = get_xid();
1791
1792         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1793                  cmd, flock->fl_flags, flock->fl_type,
1794                  flock->fl_start, flock->fl_end);
1795
1796         cfile = (struct cifsFileInfo *)file->private_data;
1797         tcon = tlink_tcon(cfile->tlink);
1798
1799         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1800                         tcon->ses->server);
1801         cifs_sb = CIFS_FILE_SB(file);
1802
1803         if (cap_unix(tcon->ses) &&
1804             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1805             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1806                 posix_lck = true;
1807         /*
1808          * BB add code here to normalize offset and length to account for
1809          * negative length which we can not accept over the wire.
1810          */
1811         if (IS_GETLK(cmd)) {
1812                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1813                 free_xid(xid);
1814                 return rc;
1815         }
1816
1817         if (!lock && !unlock) {
1818                 /*
1819                  * if no lock or unlock then nothing to do since we do not
1820                  * know what it is
1821                  */
1822                 free_xid(xid);
1823                 return -EOPNOTSUPP;
1824         }
1825
1826         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1827                         xid);
1828         free_xid(xid);
1829         return rc;
1830 }
1831
1832 /*
1833  * update the file size (if needed) after a write. Should be called with
1834  * the inode->i_lock held
1835  */
1836 void
1837 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1838                       unsigned int bytes_written)
1839 {
1840         loff_t end_of_write = offset + bytes_written;
1841
1842         if (end_of_write > cifsi->server_eof)
1843                 cifsi->server_eof = end_of_write;
1844 }
1845
1846 static ssize_t
1847 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1848            size_t write_size, loff_t *offset)
1849 {
1850         int rc = 0;
1851         unsigned int bytes_written = 0;
1852         unsigned int total_written;
1853         struct cifs_tcon *tcon;
1854         struct TCP_Server_Info *server;
1855         unsigned int xid;
1856         struct dentry *dentry = open_file->dentry;
1857         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1858         struct cifs_io_parms io_parms = {0};
1859
1860         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1861                  write_size, *offset, dentry);
1862
1863         tcon = tlink_tcon(open_file->tlink);
1864         server = tcon->ses->server;
1865
1866         if (!server->ops->sync_write)
1867                 return -ENOSYS;
1868
1869         xid = get_xid();
1870
1871         for (total_written = 0; write_size > total_written;
1872              total_written += bytes_written) {
1873                 rc = -EAGAIN;
1874                 while (rc == -EAGAIN) {
1875                         struct kvec iov[2];
1876                         unsigned int len;
1877
1878                         if (open_file->invalidHandle) {
1879                                 /* we could deadlock if we called
1880                                    filemap_fdatawait from here so tell
1881                                    reopen_file not to flush data to
1882                                    server now */
1883                                 rc = cifs_reopen_file(open_file, false);
1884                                 if (rc != 0)
1885                                         break;
1886                         }
1887
1888                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1889                                   (unsigned int)write_size - total_written);
1890                         /* iov[0] is reserved for smb header */
1891                         iov[1].iov_base = (char *)write_data + total_written;
1892                         iov[1].iov_len = len;
1893                         io_parms.pid = pid;
1894                         io_parms.tcon = tcon;
1895                         io_parms.offset = *offset;
1896                         io_parms.length = len;
1897                         rc = server->ops->sync_write(xid, &open_file->fid,
1898                                         &io_parms, &bytes_written, iov, 1);
1899                 }
1900                 if (rc || (bytes_written == 0)) {
1901                         if (total_written)
1902                                 break;
1903                         else {
1904                                 free_xid(xid);
1905                                 return rc;
1906                         }
1907                 } else {
1908                         spin_lock(&d_inode(dentry)->i_lock);
1909                         cifs_update_eof(cifsi, *offset, bytes_written);
1910                         spin_unlock(&d_inode(dentry)->i_lock);
1911                         *offset += bytes_written;
1912                 }
1913         }
1914
1915         cifs_stats_bytes_written(tcon, total_written);
1916
1917         if (total_written > 0) {
1918                 spin_lock(&d_inode(dentry)->i_lock);
1919                 if (*offset > d_inode(dentry)->i_size)
1920                         i_size_write(d_inode(dentry), *offset);
1921                 spin_unlock(&d_inode(dentry)->i_lock);
1922         }
1923         mark_inode_dirty_sync(d_inode(dentry));
1924         free_xid(xid);
1925         return total_written;
1926 }
1927
1928 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1929                                         bool fsuid_only)
1930 {
1931         struct cifsFileInfo *open_file = NULL;
1932         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1933
1934         /* only filter by fsuid on multiuser mounts */
1935         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1936                 fsuid_only = false;
1937
1938         spin_lock(&cifs_inode->open_file_lock);
1939         /* we could simply get the first_list_entry since write-only entries
1940            are always at the end of the list but since the first entry might
1941            have a close pending, we go through the whole list */
1942         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1943                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1944                         continue;
1945                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1946                         if (!open_file->invalidHandle) {
1947                                 /* found a good file */
1948                                 /* lock it so it will not be closed on us */
1949                                 cifsFileInfo_get(open_file);
1950                                 spin_unlock(&cifs_inode->open_file_lock);
1951                                 return open_file;
1952                         } /* else might as well continue, and look for
1953                              another, or simply have the caller reopen it
1954                              again rather than trying to fix this handle */
1955                 } else /* write only file */
1956                         break; /* write only files are last so must be done */
1957         }
1958         spin_unlock(&cifs_inode->open_file_lock);
1959         return NULL;
1960 }
1961
1962 /* Return -EBADF if no handle is found and general rc otherwise */
1963 int
1964 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
1965                        struct cifsFileInfo **ret_file)
1966 {
1967         struct cifsFileInfo *open_file, *inv_file = NULL;
1968         struct cifs_sb_info *cifs_sb;
1969         bool any_available = false;
1970         int rc = -EBADF;
1971         unsigned int refind = 0;
1972         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
1973         bool with_delete = flags & FIND_WR_WITH_DELETE;
1974         *ret_file = NULL;
1975
1976         /*
1977          * Having a null inode here (because mapping->host was set to zero by
1978          * the VFS or MM) should not happen but we had reports of on oops (due
1979          * to it being zero) during stress testcases so we need to check for it
1980          */
1981
1982         if (cifs_inode == NULL) {
1983                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1984                 dump_stack();
1985                 return rc;
1986         }
1987
1988         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1989
1990         /* only filter by fsuid on multiuser mounts */
1991         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1992                 fsuid_only = false;
1993
1994         spin_lock(&cifs_inode->open_file_lock);
1995 refind_writable:
1996         if (refind > MAX_REOPEN_ATT) {
1997                 spin_unlock(&cifs_inode->open_file_lock);
1998                 return rc;
1999         }
2000         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2001                 if (!any_available && open_file->pid != current->tgid)
2002                         continue;
2003                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2004                         continue;
2005                 if (with_delete && !(open_file->fid.access & DELETE))
2006                         continue;
2007                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2008                         if (!open_file->invalidHandle) {
2009                                 /* found a good writable file */
2010                                 cifsFileInfo_get(open_file);
2011                                 spin_unlock(&cifs_inode->open_file_lock);
2012                                 *ret_file = open_file;
2013                                 return 0;
2014                         } else {
2015                                 if (!inv_file)
2016                                         inv_file = open_file;
2017                         }
2018                 }
2019         }
2020         /* couldn't find useable FH with same pid, try any available */
2021         if (!any_available) {
2022                 any_available = true;
2023                 goto refind_writable;
2024         }
2025
2026         if (inv_file) {
2027                 any_available = false;
2028                 cifsFileInfo_get(inv_file);
2029         }
2030
2031         spin_unlock(&cifs_inode->open_file_lock);
2032
2033         if (inv_file) {
2034                 rc = cifs_reopen_file(inv_file, false);
2035                 if (!rc) {
2036                         *ret_file = inv_file;
2037                         return 0;
2038                 }
2039
2040                 spin_lock(&cifs_inode->open_file_lock);
2041                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2042                 spin_unlock(&cifs_inode->open_file_lock);
2043                 cifsFileInfo_put(inv_file);
2044                 ++refind;
2045                 inv_file = NULL;
2046                 spin_lock(&cifs_inode->open_file_lock);
2047                 goto refind_writable;
2048         }
2049
2050         return rc;
2051 }
2052
2053 struct cifsFileInfo *
2054 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2055 {
2056         struct cifsFileInfo *cfile;
2057         int rc;
2058
2059         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2060         if (rc)
2061                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2062
2063         return cfile;
2064 }
2065
2066 int
2067 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2068                        int flags,
2069                        struct cifsFileInfo **ret_file)
2070 {
2071         struct list_head *tmp;
2072         struct cifsFileInfo *cfile;
2073         struct cifsInodeInfo *cinode;
2074         char *full_path;
2075
2076         *ret_file = NULL;
2077
2078         spin_lock(&tcon->open_file_lock);
2079         list_for_each(tmp, &tcon->openFileList) {
2080                 cfile = list_entry(tmp, struct cifsFileInfo,
2081                              tlist);
2082                 full_path = build_path_from_dentry(cfile->dentry);
2083                 if (full_path == NULL) {
2084                         spin_unlock(&tcon->open_file_lock);
2085                         return -ENOMEM;
2086                 }
2087                 if (strcmp(full_path, name)) {
2088                         kfree(full_path);
2089                         continue;
2090                 }
2091
2092                 kfree(full_path);
2093                 cinode = CIFS_I(d_inode(cfile->dentry));
2094                 spin_unlock(&tcon->open_file_lock);
2095                 return cifs_get_writable_file(cinode, flags, ret_file);
2096         }
2097
2098         spin_unlock(&tcon->open_file_lock);
2099         return -ENOENT;
2100 }
2101
2102 int
2103 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2104                        struct cifsFileInfo **ret_file)
2105 {
2106         struct list_head *tmp;
2107         struct cifsFileInfo *cfile;
2108         struct cifsInodeInfo *cinode;
2109         char *full_path;
2110
2111         *ret_file = NULL;
2112
2113         spin_lock(&tcon->open_file_lock);
2114         list_for_each(tmp, &tcon->openFileList) {
2115                 cfile = list_entry(tmp, struct cifsFileInfo,
2116                              tlist);
2117                 full_path = build_path_from_dentry(cfile->dentry);
2118                 if (full_path == NULL) {
2119                         spin_unlock(&tcon->open_file_lock);
2120                         return -ENOMEM;
2121                 }
2122                 if (strcmp(full_path, name)) {
2123                         kfree(full_path);
2124                         continue;
2125                 }
2126
2127                 kfree(full_path);
2128                 cinode = CIFS_I(d_inode(cfile->dentry));
2129                 spin_unlock(&tcon->open_file_lock);
2130                 *ret_file = find_readable_file(cinode, 0);
2131                 return *ret_file ? 0 : -ENOENT;
2132         }
2133
2134         spin_unlock(&tcon->open_file_lock);
2135         return -ENOENT;
2136 }
2137
2138 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2139 {
2140         struct address_space *mapping = page->mapping;
2141         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2142         char *write_data;
2143         int rc = -EFAULT;
2144         int bytes_written = 0;
2145         struct inode *inode;
2146         struct cifsFileInfo *open_file;
2147
2148         if (!mapping || !mapping->host)
2149                 return -EFAULT;
2150
2151         inode = page->mapping->host;
2152
2153         offset += (loff_t)from;
2154         write_data = kmap(page);
2155         write_data += from;
2156
2157         if ((to > PAGE_SIZE) || (from > to)) {
2158                 kunmap(page);
2159                 return -EIO;
2160         }
2161
2162         /* racing with truncate? */
2163         if (offset > mapping->host->i_size) {
2164                 kunmap(page);
2165                 return 0; /* don't care */
2166         }
2167
2168         /* check to make sure that we are not extending the file */
2169         if (mapping->host->i_size - offset < (loff_t)to)
2170                 to = (unsigned)(mapping->host->i_size - offset);
2171
2172         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2173                                     &open_file);
2174         if (!rc) {
2175                 bytes_written = cifs_write(open_file, open_file->pid,
2176                                            write_data, to - from, &offset);
2177                 cifsFileInfo_put(open_file);
2178                 /* Does mm or vfs already set times? */
2179                 inode->i_atime = inode->i_mtime = current_time(inode);
2180                 if ((bytes_written > 0) && (offset))
2181                         rc = 0;
2182                 else if (bytes_written < 0)
2183                         rc = bytes_written;
2184                 else
2185                         rc = -EFAULT;
2186         } else {
2187                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2188                 if (!is_retryable_error(rc))
2189                         rc = -EIO;
2190         }
2191
2192         kunmap(page);
2193         return rc;
2194 }
2195
2196 static struct cifs_writedata *
2197 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2198                           pgoff_t end, pgoff_t *index,
2199                           unsigned int *found_pages)
2200 {
2201         struct cifs_writedata *wdata;
2202
2203         wdata = cifs_writedata_alloc((unsigned int)tofind,
2204                                      cifs_writev_complete);
2205         if (!wdata)
2206                 return NULL;
2207
2208         *found_pages = find_get_pages_range_tag(mapping, index, end,
2209                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2210         return wdata;
2211 }
2212
2213 static unsigned int
2214 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2215                     struct address_space *mapping,
2216                     struct writeback_control *wbc,
2217                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2218 {
2219         unsigned int nr_pages = 0, i;
2220         struct page *page;
2221
2222         for (i = 0; i < found_pages; i++) {
2223                 page = wdata->pages[i];
2224                 /*
2225                  * At this point we hold neither the i_pages lock nor the
2226                  * page lock: the page may be truncated or invalidated
2227                  * (changing page->mapping to NULL), or even swizzled
2228                  * back from swapper_space to tmpfs file mapping
2229                  */
2230
2231                 if (nr_pages == 0)
2232                         lock_page(page);
2233                 else if (!trylock_page(page))
2234                         break;
2235
2236                 if (unlikely(page->mapping != mapping)) {
2237                         unlock_page(page);
2238                         break;
2239                 }
2240
2241                 if (!wbc->range_cyclic && page->index > end) {
2242                         *done = true;
2243                         unlock_page(page);
2244                         break;
2245                 }
2246
2247                 if (*next && (page->index != *next)) {
2248                         /* Not next consecutive page */
2249                         unlock_page(page);
2250                         break;
2251                 }
2252
2253                 if (wbc->sync_mode != WB_SYNC_NONE)
2254                         wait_on_page_writeback(page);
2255
2256                 if (PageWriteback(page) ||
2257                                 !clear_page_dirty_for_io(page)) {
2258                         unlock_page(page);
2259                         break;
2260                 }
2261
2262                 /*
2263                  * This actually clears the dirty bit in the radix tree.
2264                  * See cifs_writepage() for more commentary.
2265                  */
2266                 set_page_writeback(page);
2267                 if (page_offset(page) >= i_size_read(mapping->host)) {
2268                         *done = true;
2269                         unlock_page(page);
2270                         end_page_writeback(page);
2271                         break;
2272                 }
2273
2274                 wdata->pages[i] = page;
2275                 *next = page->index + 1;
2276                 ++nr_pages;
2277         }
2278
2279         /* reset index to refind any pages skipped */
2280         if (nr_pages == 0)
2281                 *index = wdata->pages[0]->index + 1;
2282
2283         /* put any pages we aren't going to use */
2284         for (i = nr_pages; i < found_pages; i++) {
2285                 put_page(wdata->pages[i]);
2286                 wdata->pages[i] = NULL;
2287         }
2288
2289         return nr_pages;
2290 }
2291
2292 static int
2293 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2294                  struct address_space *mapping, struct writeback_control *wbc)
2295 {
2296         int rc;
2297
2298         wdata->sync_mode = wbc->sync_mode;
2299         wdata->nr_pages = nr_pages;
2300         wdata->offset = page_offset(wdata->pages[0]);
2301         wdata->pagesz = PAGE_SIZE;
2302         wdata->tailsz = min(i_size_read(mapping->host) -
2303                         page_offset(wdata->pages[nr_pages - 1]),
2304                         (loff_t)PAGE_SIZE);
2305         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2306         wdata->pid = wdata->cfile->pid;
2307
2308         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2309         if (rc)
2310                 return rc;
2311
2312         if (wdata->cfile->invalidHandle)
2313                 rc = -EAGAIN;
2314         else
2315                 rc = wdata->server->ops->async_writev(wdata,
2316                                                       cifs_writedata_release);
2317
2318         return rc;
2319 }
2320
2321 static int cifs_writepages(struct address_space *mapping,
2322                            struct writeback_control *wbc)
2323 {
2324         struct inode *inode = mapping->host;
2325         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2326         struct TCP_Server_Info *server;
2327         bool done = false, scanned = false, range_whole = false;
2328         pgoff_t end, index;
2329         struct cifs_writedata *wdata;
2330         struct cifsFileInfo *cfile = NULL;
2331         int rc = 0;
2332         int saved_rc = 0;
2333         unsigned int xid;
2334
2335         /*
2336          * If wsize is smaller than the page cache size, default to writing
2337          * one page at a time via cifs_writepage
2338          */
2339         if (cifs_sb->ctx->wsize < PAGE_SIZE)
2340                 return generic_writepages(mapping, wbc);
2341
2342         xid = get_xid();
2343         if (wbc->range_cyclic) {
2344                 index = mapping->writeback_index; /* Start from prev offset */
2345                 end = -1;
2346         } else {
2347                 index = wbc->range_start >> PAGE_SHIFT;
2348                 end = wbc->range_end >> PAGE_SHIFT;
2349                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2350                         range_whole = true;
2351                 scanned = true;
2352         }
2353         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2354
2355 retry:
2356         while (!done && index <= end) {
2357                 unsigned int i, nr_pages, found_pages, wsize;
2358                 pgoff_t next = 0, tofind, saved_index = index;
2359                 struct cifs_credits credits_on_stack;
2360                 struct cifs_credits *credits = &credits_on_stack;
2361                 int get_file_rc = 0;
2362
2363                 if (cfile)
2364                         cifsFileInfo_put(cfile);
2365
2366                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2367
2368                 /* in case of an error store it to return later */
2369                 if (rc)
2370                         get_file_rc = rc;
2371
2372                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2373                                                    &wsize, credits);
2374                 if (rc != 0) {
2375                         done = true;
2376                         break;
2377                 }
2378
2379                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2380
2381                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2382                                                   &found_pages);
2383                 if (!wdata) {
2384                         rc = -ENOMEM;
2385                         done = true;
2386                         add_credits_and_wake_if(server, credits, 0);
2387                         break;
2388                 }
2389
2390                 if (found_pages == 0) {
2391                         kref_put(&wdata->refcount, cifs_writedata_release);
2392                         add_credits_and_wake_if(server, credits, 0);
2393                         break;
2394                 }
2395
2396                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2397                                                end, &index, &next, &done);
2398
2399                 /* nothing to write? */
2400                 if (nr_pages == 0) {
2401                         kref_put(&wdata->refcount, cifs_writedata_release);
2402                         add_credits_and_wake_if(server, credits, 0);
2403                         continue;
2404                 }
2405
2406                 wdata->credits = credits_on_stack;
2407                 wdata->cfile = cfile;
2408                 wdata->server = server;
2409                 cfile = NULL;
2410
2411                 if (!wdata->cfile) {
2412                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2413                                  get_file_rc);
2414                         if (is_retryable_error(get_file_rc))
2415                                 rc = get_file_rc;
2416                         else
2417                                 rc = -EBADF;
2418                 } else
2419                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2420
2421                 for (i = 0; i < nr_pages; ++i)
2422                         unlock_page(wdata->pages[i]);
2423
2424                 /* send failure -- clean up the mess */
2425                 if (rc != 0) {
2426                         add_credits_and_wake_if(server, &wdata->credits, 0);
2427                         for (i = 0; i < nr_pages; ++i) {
2428                                 if (is_retryable_error(rc))
2429                                         redirty_page_for_writepage(wbc,
2430                                                            wdata->pages[i]);
2431                                 else
2432                                         SetPageError(wdata->pages[i]);
2433                                 end_page_writeback(wdata->pages[i]);
2434                                 put_page(wdata->pages[i]);
2435                         }
2436                         if (!is_retryable_error(rc))
2437                                 mapping_set_error(mapping, rc);
2438                 }
2439                 kref_put(&wdata->refcount, cifs_writedata_release);
2440
2441                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2442                         index = saved_index;
2443                         continue;
2444                 }
2445
2446                 /* Return immediately if we received a signal during writing */
2447                 if (is_interrupt_error(rc)) {
2448                         done = true;
2449                         break;
2450                 }
2451
2452                 if (rc != 0 && saved_rc == 0)
2453                         saved_rc = rc;
2454
2455                 wbc->nr_to_write -= nr_pages;
2456                 if (wbc->nr_to_write <= 0)
2457                         done = true;
2458
2459                 index = next;
2460         }
2461
2462         if (!scanned && !done) {
2463                 /*
2464                  * We hit the last page and there is more work to be done: wrap
2465                  * back to the start of the file
2466                  */
2467                 scanned = true;
2468                 index = 0;
2469                 goto retry;
2470         }
2471
2472         if (saved_rc != 0)
2473                 rc = saved_rc;
2474
2475         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2476                 mapping->writeback_index = index;
2477
2478         if (cfile)
2479                 cifsFileInfo_put(cfile);
2480         free_xid(xid);
2481         return rc;
2482 }
2483
2484 static int
2485 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2486 {
2487         int rc;
2488         unsigned int xid;
2489
2490         xid = get_xid();
2491 /* BB add check for wbc flags */
2492         get_page(page);
2493         if (!PageUptodate(page))
2494                 cifs_dbg(FYI, "ppw - page not up to date\n");
2495
2496         /*
2497          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2498          *
2499          * A writepage() implementation always needs to do either this,
2500          * or re-dirty the page with "redirty_page_for_writepage()" in
2501          * the case of a failure.
2502          *
2503          * Just unlocking the page will cause the radix tree tag-bits
2504          * to fail to update with the state of the page correctly.
2505          */
2506         set_page_writeback(page);
2507 retry_write:
2508         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2509         if (is_retryable_error(rc)) {
2510                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2511                         goto retry_write;
2512                 redirty_page_for_writepage(wbc, page);
2513         } else if (rc != 0) {
2514                 SetPageError(page);
2515                 mapping_set_error(page->mapping, rc);
2516         } else {
2517                 SetPageUptodate(page);
2518         }
2519         end_page_writeback(page);
2520         put_page(page);
2521         free_xid(xid);
2522         return rc;
2523 }
2524
2525 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2526 {
2527         int rc = cifs_writepage_locked(page, wbc);
2528         unlock_page(page);
2529         return rc;
2530 }
2531
2532 static int cifs_write_end(struct file *file, struct address_space *mapping,
2533                         loff_t pos, unsigned len, unsigned copied,
2534                         struct page *page, void *fsdata)
2535 {
2536         int rc;
2537         struct inode *inode = mapping->host;
2538         struct cifsFileInfo *cfile = file->private_data;
2539         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2540         __u32 pid;
2541
2542         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2543                 pid = cfile->pid;
2544         else
2545                 pid = current->tgid;
2546
2547         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2548                  page, pos, copied);
2549
2550         if (PageChecked(page)) {
2551                 if (copied == len)
2552                         SetPageUptodate(page);
2553                 ClearPageChecked(page);
2554         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2555                 SetPageUptodate(page);
2556
2557         if (!PageUptodate(page)) {
2558                 char *page_data;
2559                 unsigned offset = pos & (PAGE_SIZE - 1);
2560                 unsigned int xid;
2561
2562                 xid = get_xid();
2563                 /* this is probably better than directly calling
2564                    partialpage_write since in this function the file handle is
2565                    known which we might as well leverage */
2566                 /* BB check if anything else missing out of ppw
2567                    such as updating last write time */
2568                 page_data = kmap(page);
2569                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2570                 /* if (rc < 0) should we set writebehind rc? */
2571                 kunmap(page);
2572
2573                 free_xid(xid);
2574         } else {
2575                 rc = copied;
2576                 pos += copied;
2577                 set_page_dirty(page);
2578         }
2579
2580         if (rc > 0) {
2581                 spin_lock(&inode->i_lock);
2582                 if (pos > inode->i_size)
2583                         i_size_write(inode, pos);
2584                 spin_unlock(&inode->i_lock);
2585         }
2586
2587         unlock_page(page);
2588         put_page(page);
2589
2590         return rc;
2591 }
2592
2593 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2594                       int datasync)
2595 {
2596         unsigned int xid;
2597         int rc = 0;
2598         struct cifs_tcon *tcon;
2599         struct TCP_Server_Info *server;
2600         struct cifsFileInfo *smbfile = file->private_data;
2601         struct inode *inode = file_inode(file);
2602         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2603
2604         rc = file_write_and_wait_range(file, start, end);
2605         if (rc) {
2606                 trace_cifs_fsync_err(inode->i_ino, rc);
2607                 return rc;
2608         }
2609
2610         xid = get_xid();
2611
2612         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2613                  file, datasync);
2614
2615         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2616                 rc = cifs_zap_mapping(inode);
2617                 if (rc) {
2618                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2619                         rc = 0; /* don't care about it in fsync */
2620                 }
2621         }
2622
2623         tcon = tlink_tcon(smbfile->tlink);
2624         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2625                 server = tcon->ses->server;
2626                 if (server->ops->flush)
2627                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2628                 else
2629                         rc = -ENOSYS;
2630         }
2631
2632         free_xid(xid);
2633         return rc;
2634 }
2635
2636 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2637 {
2638         unsigned int xid;
2639         int rc = 0;
2640         struct cifs_tcon *tcon;
2641         struct TCP_Server_Info *server;
2642         struct cifsFileInfo *smbfile = file->private_data;
2643         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2644
2645         rc = file_write_and_wait_range(file, start, end);
2646         if (rc) {
2647                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2648                 return rc;
2649         }
2650
2651         xid = get_xid();
2652
2653         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2654                  file, datasync);
2655
2656         tcon = tlink_tcon(smbfile->tlink);
2657         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2658                 server = tcon->ses->server;
2659                 if (server->ops->flush)
2660                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2661                 else
2662                         rc = -ENOSYS;
2663         }
2664
2665         free_xid(xid);
2666         return rc;
2667 }
2668
2669 /*
2670  * As file closes, flush all cached write data for this inode checking
2671  * for write behind errors.
2672  */
2673 int cifs_flush(struct file *file, fl_owner_t id)
2674 {
2675         struct inode *inode = file_inode(file);
2676         int rc = 0;
2677
2678         if (file->f_mode & FMODE_WRITE)
2679                 rc = filemap_write_and_wait(inode->i_mapping);
2680
2681         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2682         if (rc)
2683                 trace_cifs_flush_err(inode->i_ino, rc);
2684         return rc;
2685 }
2686
2687 static int
2688 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2689 {
2690         int rc = 0;
2691         unsigned long i;
2692
2693         for (i = 0; i < num_pages; i++) {
2694                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2695                 if (!pages[i]) {
2696                         /*
2697                          * save number of pages we have already allocated and
2698                          * return with ENOMEM error
2699                          */
2700                         num_pages = i;
2701                         rc = -ENOMEM;
2702                         break;
2703                 }
2704         }
2705
2706         if (rc) {
2707                 for (i = 0; i < num_pages; i++)
2708                         put_page(pages[i]);
2709         }
2710         return rc;
2711 }
2712
2713 static inline
2714 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2715 {
2716         size_t num_pages;
2717         size_t clen;
2718
2719         clen = min_t(const size_t, len, wsize);
2720         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2721
2722         if (cur_len)
2723                 *cur_len = clen;
2724
2725         return num_pages;
2726 }
2727
2728 static void
2729 cifs_uncached_writedata_release(struct kref *refcount)
2730 {
2731         int i;
2732         struct cifs_writedata *wdata = container_of(refcount,
2733                                         struct cifs_writedata, refcount);
2734
2735         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2736         for (i = 0; i < wdata->nr_pages; i++)
2737                 put_page(wdata->pages[i]);
2738         cifs_writedata_release(refcount);
2739 }
2740
2741 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2742
2743 static void
2744 cifs_uncached_writev_complete(struct work_struct *work)
2745 {
2746         struct cifs_writedata *wdata = container_of(work,
2747                                         struct cifs_writedata, work);
2748         struct inode *inode = d_inode(wdata->cfile->dentry);
2749         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2750
2751         spin_lock(&inode->i_lock);
2752         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2753         if (cifsi->server_eof > inode->i_size)
2754                 i_size_write(inode, cifsi->server_eof);
2755         spin_unlock(&inode->i_lock);
2756
2757         complete(&wdata->done);
2758         collect_uncached_write_data(wdata->ctx);
2759         /* the below call can possibly free the last ref to aio ctx */
2760         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2761 }
2762
2763 static int
2764 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2765                       size_t *len, unsigned long *num_pages)
2766 {
2767         size_t save_len, copied, bytes, cur_len = *len;
2768         unsigned long i, nr_pages = *num_pages;
2769
2770         save_len = cur_len;
2771         for (i = 0; i < nr_pages; i++) {
2772                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2773                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2774                 cur_len -= copied;
2775                 /*
2776                  * If we didn't copy as much as we expected, then that
2777                  * may mean we trod into an unmapped area. Stop copying
2778                  * at that point. On the next pass through the big
2779                  * loop, we'll likely end up getting a zero-length
2780                  * write and bailing out of it.
2781                  */
2782                 if (copied < bytes)
2783                         break;
2784         }
2785         cur_len = save_len - cur_len;
2786         *len = cur_len;
2787
2788         /*
2789          * If we have no data to send, then that probably means that
2790          * the copy above failed altogether. That's most likely because
2791          * the address in the iovec was bogus. Return -EFAULT and let
2792          * the caller free anything we allocated and bail out.
2793          */
2794         if (!cur_len)
2795                 return -EFAULT;
2796
2797         /*
2798          * i + 1 now represents the number of pages we actually used in
2799          * the copy phase above.
2800          */
2801         *num_pages = i + 1;
2802         return 0;
2803 }
2804
2805 static int
2806 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2807         struct cifs_aio_ctx *ctx)
2808 {
2809         unsigned int wsize;
2810         struct cifs_credits credits;
2811         int rc;
2812         struct TCP_Server_Info *server = wdata->server;
2813
2814         do {
2815                 if (wdata->cfile->invalidHandle) {
2816                         rc = cifs_reopen_file(wdata->cfile, false);
2817                         if (rc == -EAGAIN)
2818                                 continue;
2819                         else if (rc)
2820                                 break;
2821                 }
2822
2823
2824                 /*
2825                  * Wait for credits to resend this wdata.
2826                  * Note: we are attempting to resend the whole wdata not in
2827                  * segments
2828                  */
2829                 do {
2830                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2831                                                 &wsize, &credits);
2832                         if (rc)
2833                                 goto fail;
2834
2835                         if (wsize < wdata->bytes) {
2836                                 add_credits_and_wake_if(server, &credits, 0);
2837                                 msleep(1000);
2838                         }
2839                 } while (wsize < wdata->bytes);
2840                 wdata->credits = credits;
2841
2842                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2843
2844                 if (!rc) {
2845                         if (wdata->cfile->invalidHandle)
2846                                 rc = -EAGAIN;
2847                         else {
2848 #ifdef CONFIG_CIFS_SMB_DIRECT
2849                                 if (wdata->mr) {
2850                                         wdata->mr->need_invalidate = true;
2851                                         smbd_deregister_mr(wdata->mr);
2852                                         wdata->mr = NULL;
2853                                 }
2854 #endif
2855                                 rc = server->ops->async_writev(wdata,
2856                                         cifs_uncached_writedata_release);
2857                         }
2858                 }
2859
2860                 /* If the write was successfully sent, we are done */
2861                 if (!rc) {
2862                         list_add_tail(&wdata->list, wdata_list);
2863                         return 0;
2864                 }
2865
2866                 /* Roll back credits and retry if needed */
2867                 add_credits_and_wake_if(server, &wdata->credits, 0);
2868         } while (rc == -EAGAIN);
2869
2870 fail:
2871         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2872         return rc;
2873 }
2874
2875 static int
2876 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2877                      struct cifsFileInfo *open_file,
2878                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2879                      struct cifs_aio_ctx *ctx)
2880 {
2881         int rc = 0;
2882         size_t cur_len;
2883         unsigned long nr_pages, num_pages, i;
2884         struct cifs_writedata *wdata;
2885         struct iov_iter saved_from = *from;
2886         loff_t saved_offset = offset;
2887         pid_t pid;
2888         struct TCP_Server_Info *server;
2889         struct page **pagevec;
2890         size_t start;
2891         unsigned int xid;
2892
2893         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2894                 pid = open_file->pid;
2895         else
2896                 pid = current->tgid;
2897
2898         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2899         xid = get_xid();
2900
2901         do {
2902                 unsigned int wsize;
2903                 struct cifs_credits credits_on_stack;
2904                 struct cifs_credits *credits = &credits_on_stack;
2905
2906                 if (open_file->invalidHandle) {
2907                         rc = cifs_reopen_file(open_file, false);
2908                         if (rc == -EAGAIN)
2909                                 continue;
2910                         else if (rc)
2911                                 break;
2912                 }
2913
2914                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2915                                                    &wsize, credits);
2916                 if (rc)
2917                         break;
2918
2919                 cur_len = min_t(const size_t, len, wsize);
2920
2921                 if (ctx->direct_io) {
2922                         ssize_t result;
2923
2924                         result = iov_iter_get_pages_alloc(
2925                                 from, &pagevec, cur_len, &start);
2926                         if (result < 0) {
2927                                 cifs_dbg(VFS,
2928                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
2929                                          result, iov_iter_type(from),
2930                                          from->iov_offset, from->count);
2931                                 dump_stack();
2932
2933                                 rc = result;
2934                                 add_credits_and_wake_if(server, credits, 0);
2935                                 break;
2936                         }
2937                         cur_len = (size_t)result;
2938                         iov_iter_advance(from, cur_len);
2939
2940                         nr_pages =
2941                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2942
2943                         wdata = cifs_writedata_direct_alloc(pagevec,
2944                                              cifs_uncached_writev_complete);
2945                         if (!wdata) {
2946                                 rc = -ENOMEM;
2947                                 add_credits_and_wake_if(server, credits, 0);
2948                                 break;
2949                         }
2950
2951
2952                         wdata->page_offset = start;
2953                         wdata->tailsz =
2954                                 nr_pages > 1 ?
2955                                         cur_len - (PAGE_SIZE - start) -
2956                                         (nr_pages - 2) * PAGE_SIZE :
2957                                         cur_len;
2958                 } else {
2959                         nr_pages = get_numpages(wsize, len, &cur_len);
2960                         wdata = cifs_writedata_alloc(nr_pages,
2961                                              cifs_uncached_writev_complete);
2962                         if (!wdata) {
2963                                 rc = -ENOMEM;
2964                                 add_credits_and_wake_if(server, credits, 0);
2965                                 break;
2966                         }
2967
2968                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2969                         if (rc) {
2970                                 kvfree(wdata->pages);
2971                                 kfree(wdata);
2972                                 add_credits_and_wake_if(server, credits, 0);
2973                                 break;
2974                         }
2975
2976                         num_pages = nr_pages;
2977                         rc = wdata_fill_from_iovec(
2978                                 wdata, from, &cur_len, &num_pages);
2979                         if (rc) {
2980                                 for (i = 0; i < nr_pages; i++)
2981                                         put_page(wdata->pages[i]);
2982                                 kvfree(wdata->pages);
2983                                 kfree(wdata);
2984                                 add_credits_and_wake_if(server, credits, 0);
2985                                 break;
2986                         }
2987
2988                         /*
2989                          * Bring nr_pages down to the number of pages we
2990                          * actually used, and free any pages that we didn't use.
2991                          */
2992                         for ( ; nr_pages > num_pages; nr_pages--)
2993                                 put_page(wdata->pages[nr_pages - 1]);
2994
2995                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2996                 }
2997
2998                 wdata->sync_mode = WB_SYNC_ALL;
2999                 wdata->nr_pages = nr_pages;
3000                 wdata->offset = (__u64)offset;
3001                 wdata->cfile = cifsFileInfo_get(open_file);
3002                 wdata->server = server;
3003                 wdata->pid = pid;
3004                 wdata->bytes = cur_len;
3005                 wdata->pagesz = PAGE_SIZE;
3006                 wdata->credits = credits_on_stack;
3007                 wdata->ctx = ctx;
3008                 kref_get(&ctx->refcount);
3009
3010                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3011
3012                 if (!rc) {
3013                         if (wdata->cfile->invalidHandle)
3014                                 rc = -EAGAIN;
3015                         else
3016                                 rc = server->ops->async_writev(wdata,
3017                                         cifs_uncached_writedata_release);
3018                 }
3019
3020                 if (rc) {
3021                         add_credits_and_wake_if(server, &wdata->credits, 0);
3022                         kref_put(&wdata->refcount,
3023                                  cifs_uncached_writedata_release);
3024                         if (rc == -EAGAIN) {
3025                                 *from = saved_from;
3026                                 iov_iter_advance(from, offset - saved_offset);
3027                                 continue;
3028                         }
3029                         break;
3030                 }
3031
3032                 list_add_tail(&wdata->list, wdata_list);
3033                 offset += cur_len;
3034                 len -= cur_len;
3035         } while (len > 0);
3036
3037         free_xid(xid);
3038         return rc;
3039 }
3040
3041 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3042 {
3043         struct cifs_writedata *wdata, *tmp;
3044         struct cifs_tcon *tcon;
3045         struct cifs_sb_info *cifs_sb;
3046         struct dentry *dentry = ctx->cfile->dentry;
3047         int rc;
3048
3049         tcon = tlink_tcon(ctx->cfile->tlink);
3050         cifs_sb = CIFS_SB(dentry->d_sb);
3051
3052         mutex_lock(&ctx->aio_mutex);
3053
3054         if (list_empty(&ctx->list)) {
3055                 mutex_unlock(&ctx->aio_mutex);
3056                 return;
3057         }
3058
3059         rc = ctx->rc;
3060         /*
3061          * Wait for and collect replies for any successful sends in order of
3062          * increasing offset. Once an error is hit, then return without waiting
3063          * for any more replies.
3064          */
3065 restart_loop:
3066         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3067                 if (!rc) {
3068                         if (!try_wait_for_completion(&wdata->done)) {
3069                                 mutex_unlock(&ctx->aio_mutex);
3070                                 return;
3071                         }
3072
3073                         if (wdata->result)
3074                                 rc = wdata->result;
3075                         else
3076                                 ctx->total_len += wdata->bytes;
3077
3078                         /* resend call if it's a retryable error */
3079                         if (rc == -EAGAIN) {
3080                                 struct list_head tmp_list;
3081                                 struct iov_iter tmp_from = ctx->iter;
3082
3083                                 INIT_LIST_HEAD(&tmp_list);
3084                                 list_del_init(&wdata->list);
3085
3086                                 if (ctx->direct_io)
3087                                         rc = cifs_resend_wdata(
3088                                                 wdata, &tmp_list, ctx);
3089                                 else {
3090                                         iov_iter_advance(&tmp_from,
3091                                                  wdata->offset - ctx->pos);
3092
3093                                         rc = cifs_write_from_iter(wdata->offset,
3094                                                 wdata->bytes, &tmp_from,
3095                                                 ctx->cfile, cifs_sb, &tmp_list,
3096                                                 ctx);
3097
3098                                         kref_put(&wdata->refcount,
3099                                                 cifs_uncached_writedata_release);
3100                                 }
3101
3102                                 list_splice(&tmp_list, &ctx->list);
3103                                 goto restart_loop;
3104                         }
3105                 }
3106                 list_del_init(&wdata->list);
3107                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3108         }
3109
3110         cifs_stats_bytes_written(tcon, ctx->total_len);
3111         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3112
3113         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3114
3115         mutex_unlock(&ctx->aio_mutex);
3116
3117         if (ctx->iocb && ctx->iocb->ki_complete)
3118                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3119         else
3120                 complete(&ctx->done);
3121 }
3122
3123 static ssize_t __cifs_writev(
3124         struct kiocb *iocb, struct iov_iter *from, bool direct)
3125 {
3126         struct file *file = iocb->ki_filp;
3127         ssize_t total_written = 0;
3128         struct cifsFileInfo *cfile;
3129         struct cifs_tcon *tcon;
3130         struct cifs_sb_info *cifs_sb;
3131         struct cifs_aio_ctx *ctx;
3132         struct iov_iter saved_from = *from;
3133         size_t len = iov_iter_count(from);
3134         int rc;
3135
3136         /*
3137          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3138          * In this case, fall back to non-direct write function.
3139          * this could be improved by getting pages directly in ITER_KVEC
3140          */
3141         if (direct && iov_iter_is_kvec(from)) {
3142                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3143                 direct = false;
3144         }
3145
3146         rc = generic_write_checks(iocb, from);
3147         if (rc <= 0)
3148                 return rc;
3149
3150         cifs_sb = CIFS_FILE_SB(file);
3151         cfile = file->private_data;
3152         tcon = tlink_tcon(cfile->tlink);
3153
3154         if (!tcon->ses->server->ops->async_writev)
3155                 return -ENOSYS;
3156
3157         ctx = cifs_aio_ctx_alloc();
3158         if (!ctx)
3159                 return -ENOMEM;
3160
3161         ctx->cfile = cifsFileInfo_get(cfile);
3162
3163         if (!is_sync_kiocb(iocb))
3164                 ctx->iocb = iocb;
3165
3166         ctx->pos = iocb->ki_pos;
3167
3168         if (direct) {
3169                 ctx->direct_io = true;
3170                 ctx->iter = *from;
3171                 ctx->len = len;
3172         } else {
3173                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3174                 if (rc) {
3175                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3176                         return rc;
3177                 }
3178         }
3179
3180         /* grab a lock here due to read response handlers can access ctx */
3181         mutex_lock(&ctx->aio_mutex);
3182
3183         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3184                                   cfile, cifs_sb, &ctx->list, ctx);
3185
3186         /*
3187          * If at least one write was successfully sent, then discard any rc
3188          * value from the later writes. If the other write succeeds, then
3189          * we'll end up returning whatever was written. If it fails, then
3190          * we'll get a new rc value from that.
3191          */
3192         if (!list_empty(&ctx->list))
3193                 rc = 0;
3194
3195         mutex_unlock(&ctx->aio_mutex);
3196
3197         if (rc) {
3198                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3199                 return rc;
3200         }
3201
3202         if (!is_sync_kiocb(iocb)) {
3203                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3204                 return -EIOCBQUEUED;
3205         }
3206
3207         rc = wait_for_completion_killable(&ctx->done);
3208         if (rc) {
3209                 mutex_lock(&ctx->aio_mutex);
3210                 ctx->rc = rc = -EINTR;
3211                 total_written = ctx->total_len;
3212                 mutex_unlock(&ctx->aio_mutex);
3213         } else {
3214                 rc = ctx->rc;
3215                 total_written = ctx->total_len;
3216         }
3217
3218         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3219
3220         if (unlikely(!total_written))
3221                 return rc;
3222
3223         iocb->ki_pos += total_written;
3224         return total_written;
3225 }
3226
3227 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3228 {
3229         return __cifs_writev(iocb, from, true);
3230 }
3231
3232 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3233 {
3234         return __cifs_writev(iocb, from, false);
3235 }
3236
3237 static ssize_t
3238 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3239 {
3240         struct file *file = iocb->ki_filp;
3241         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3242         struct inode *inode = file->f_mapping->host;
3243         struct cifsInodeInfo *cinode = CIFS_I(inode);
3244         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3245         ssize_t rc;
3246
3247         inode_lock(inode);
3248         /*
3249          * We need to hold the sem to be sure nobody modifies lock list
3250          * with a brlock that prevents writing.
3251          */
3252         down_read(&cinode->lock_sem);
3253
3254         rc = generic_write_checks(iocb, from);
3255         if (rc <= 0)
3256                 goto out;
3257
3258         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3259                                      server->vals->exclusive_lock_type, 0,
3260                                      NULL, CIFS_WRITE_OP))
3261                 rc = __generic_file_write_iter(iocb, from);
3262         else
3263                 rc = -EACCES;
3264 out:
3265         up_read(&cinode->lock_sem);
3266         inode_unlock(inode);
3267
3268         if (rc > 0)
3269                 rc = generic_write_sync(iocb, rc);
3270         return rc;
3271 }
3272
3273 ssize_t
3274 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3275 {
3276         struct inode *inode = file_inode(iocb->ki_filp);
3277         struct cifsInodeInfo *cinode = CIFS_I(inode);
3278         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3279         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3280                                                 iocb->ki_filp->private_data;
3281         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3282         ssize_t written;
3283
3284         written = cifs_get_writer(cinode);
3285         if (written)
3286                 return written;
3287
3288         if (CIFS_CACHE_WRITE(cinode)) {
3289                 if (cap_unix(tcon->ses) &&
3290                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3291                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3292                         written = generic_file_write_iter(iocb, from);
3293                         goto out;
3294                 }
3295                 written = cifs_writev(iocb, from);
3296                 goto out;
3297         }
3298         /*
3299          * For non-oplocked files in strict cache mode we need to write the data
3300          * to the server exactly from the pos to pos+len-1 rather than flush all
3301          * affected pages because it may cause a error with mandatory locks on
3302          * these pages but not on the region from pos to ppos+len-1.
3303          */
3304         written = cifs_user_writev(iocb, from);
3305         if (CIFS_CACHE_READ(cinode)) {
3306                 /*
3307                  * We have read level caching and we have just sent a write
3308                  * request to the server thus making data in the cache stale.
3309                  * Zap the cache and set oplock/lease level to NONE to avoid
3310                  * reading stale data from the cache. All subsequent read
3311                  * operations will read new data from the server.
3312                  */
3313                 cifs_zap_mapping(inode);
3314                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3315                          inode);
3316                 cinode->oplock = 0;
3317         }
3318 out:
3319         cifs_put_writer(cinode);
3320         return written;
3321 }
3322
3323 static struct cifs_readdata *
3324 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3325 {
3326         struct cifs_readdata *rdata;
3327
3328         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3329         if (rdata != NULL) {
3330                 rdata->pages = pages;
3331                 kref_init(&rdata->refcount);
3332                 INIT_LIST_HEAD(&rdata->list);
3333                 init_completion(&rdata->done);
3334                 INIT_WORK(&rdata->work, complete);
3335         }
3336
3337         return rdata;
3338 }
3339
3340 static struct cifs_readdata *
3341 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3342 {
3343         struct page **pages =
3344                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3345         struct cifs_readdata *ret = NULL;
3346
3347         if (pages) {
3348                 ret = cifs_readdata_direct_alloc(pages, complete);
3349                 if (!ret)
3350                         kfree(pages);
3351         }
3352
3353         return ret;
3354 }
3355
3356 void
3357 cifs_readdata_release(struct kref *refcount)
3358 {
3359         struct cifs_readdata *rdata = container_of(refcount,
3360                                         struct cifs_readdata, refcount);
3361 #ifdef CONFIG_CIFS_SMB_DIRECT
3362         if (rdata->mr) {
3363                 smbd_deregister_mr(rdata->mr);
3364                 rdata->mr = NULL;
3365         }
3366 #endif
3367         if (rdata->cfile)
3368                 cifsFileInfo_put(rdata->cfile);
3369
3370         kvfree(rdata->pages);
3371         kfree(rdata);
3372 }
3373
3374 static int
3375 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3376 {
3377         int rc = 0;
3378         struct page *page;
3379         unsigned int i;
3380
3381         for (i = 0; i < nr_pages; i++) {
3382                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3383                 if (!page) {
3384                         rc = -ENOMEM;
3385                         break;
3386                 }
3387                 rdata->pages[i] = page;
3388         }
3389
3390         if (rc) {
3391                 unsigned int nr_page_failed = i;
3392
3393                 for (i = 0; i < nr_page_failed; i++) {
3394                         put_page(rdata->pages[i]);
3395                         rdata->pages[i] = NULL;
3396                 }
3397         }
3398         return rc;
3399 }
3400
3401 static void
3402 cifs_uncached_readdata_release(struct kref *refcount)
3403 {
3404         struct cifs_readdata *rdata = container_of(refcount,
3405                                         struct cifs_readdata, refcount);
3406         unsigned int i;
3407
3408         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3409         for (i = 0; i < rdata->nr_pages; i++) {
3410                 put_page(rdata->pages[i]);
3411         }
3412         cifs_readdata_release(refcount);
3413 }
3414
3415 /**
3416  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3417  * @rdata:      the readdata response with list of pages holding data
3418  * @iter:       destination for our data
3419  *
3420  * This function copies data from a list of pages in a readdata response into
3421  * an array of iovecs. It will first calculate where the data should go
3422  * based on the info in the readdata and then copy the data into that spot.
3423  */
3424 static int
3425 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3426 {
3427         size_t remaining = rdata->got_bytes;
3428         unsigned int i;
3429
3430         for (i = 0; i < rdata->nr_pages; i++) {
3431                 struct page *page = rdata->pages[i];
3432                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3433                 size_t written;
3434
3435                 if (unlikely(iov_iter_is_pipe(iter))) {
3436                         void *addr = kmap_atomic(page);
3437
3438                         written = copy_to_iter(addr, copy, iter);
3439                         kunmap_atomic(addr);
3440                 } else
3441                         written = copy_page_to_iter(page, 0, copy, iter);
3442                 remaining -= written;
3443                 if (written < copy && iov_iter_count(iter) > 0)
3444                         break;
3445         }
3446         return remaining ? -EFAULT : 0;
3447 }
3448
3449 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3450
3451 static void
3452 cifs_uncached_readv_complete(struct work_struct *work)
3453 {
3454         struct cifs_readdata *rdata = container_of(work,
3455                                                 struct cifs_readdata, work);
3456
3457         complete(&rdata->done);
3458         collect_uncached_read_data(rdata->ctx);
3459         /* the below call can possibly free the last ref to aio ctx */
3460         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3461 }
3462
3463 static int
3464 uncached_fill_pages(struct TCP_Server_Info *server,
3465                     struct cifs_readdata *rdata, struct iov_iter *iter,
3466                     unsigned int len)
3467 {
3468         int result = 0;
3469         unsigned int i;
3470         unsigned int nr_pages = rdata->nr_pages;
3471         unsigned int page_offset = rdata->page_offset;
3472
3473         rdata->got_bytes = 0;
3474         rdata->tailsz = PAGE_SIZE;
3475         for (i = 0; i < nr_pages; i++) {
3476                 struct page *page = rdata->pages[i];
3477                 size_t n;
3478                 unsigned int segment_size = rdata->pagesz;
3479
3480                 if (i == 0)
3481                         segment_size -= page_offset;
3482                 else
3483                         page_offset = 0;
3484
3485
3486                 if (len <= 0) {
3487                         /* no need to hold page hostage */
3488                         rdata->pages[i] = NULL;
3489                         rdata->nr_pages--;
3490                         put_page(page);
3491                         continue;
3492                 }
3493
3494                 n = len;
3495                 if (len >= segment_size)
3496                         /* enough data to fill the page */
3497                         n = segment_size;
3498                 else
3499                         rdata->tailsz = len;
3500                 len -= n;
3501
3502                 if (iter)
3503                         result = copy_page_from_iter(
3504                                         page, page_offset, n, iter);
3505 #ifdef CONFIG_CIFS_SMB_DIRECT
3506                 else if (rdata->mr)
3507                         result = n;
3508 #endif
3509                 else
3510                         result = cifs_read_page_from_socket(
3511                                         server, page, page_offset, n);
3512                 if (result < 0)
3513                         break;
3514
3515                 rdata->got_bytes += result;
3516         }
3517
3518         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3519                                                 rdata->got_bytes : result;
3520 }
3521
3522 static int
3523 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3524                               struct cifs_readdata *rdata, unsigned int len)
3525 {
3526         return uncached_fill_pages(server, rdata, NULL, len);
3527 }
3528
3529 static int
3530 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3531                               struct cifs_readdata *rdata,
3532                               struct iov_iter *iter)
3533 {
3534         return uncached_fill_pages(server, rdata, iter, iter->count);
3535 }
3536
3537 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3538                         struct list_head *rdata_list,
3539                         struct cifs_aio_ctx *ctx)
3540 {
3541         unsigned int rsize;
3542         struct cifs_credits credits;
3543         int rc;
3544         struct TCP_Server_Info *server;
3545
3546         /* XXX: should we pick a new channel here? */
3547         server = rdata->server;
3548
3549         do {
3550                 if (rdata->cfile->invalidHandle) {
3551                         rc = cifs_reopen_file(rdata->cfile, true);
3552                         if (rc == -EAGAIN)
3553                                 continue;
3554                         else if (rc)
3555                                 break;
3556                 }
3557
3558                 /*
3559                  * Wait for credits to resend this rdata.
3560                  * Note: we are attempting to resend the whole rdata not in
3561                  * segments
3562                  */
3563                 do {
3564                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3565                                                 &rsize, &credits);
3566
3567                         if (rc)
3568                                 goto fail;
3569
3570                         if (rsize < rdata->bytes) {
3571                                 add_credits_and_wake_if(server, &credits, 0);
3572                                 msleep(1000);
3573                         }
3574                 } while (rsize < rdata->bytes);
3575                 rdata->credits = credits;
3576
3577                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3578                 if (!rc) {
3579                         if (rdata->cfile->invalidHandle)
3580                                 rc = -EAGAIN;
3581                         else {
3582 #ifdef CONFIG_CIFS_SMB_DIRECT
3583                                 if (rdata->mr) {
3584                                         rdata->mr->need_invalidate = true;
3585                                         smbd_deregister_mr(rdata->mr);
3586                                         rdata->mr = NULL;
3587                                 }
3588 #endif
3589                                 rc = server->ops->async_readv(rdata);
3590                         }
3591                 }
3592
3593                 /* If the read was successfully sent, we are done */
3594                 if (!rc) {
3595                         /* Add to aio pending list */
3596                         list_add_tail(&rdata->list, rdata_list);
3597                         return 0;
3598                 }
3599
3600                 /* Roll back credits and retry if needed */
3601                 add_credits_and_wake_if(server, &rdata->credits, 0);
3602         } while (rc == -EAGAIN);
3603
3604 fail:
3605         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3606         return rc;
3607 }
3608
3609 static int
3610 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3611                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3612                      struct cifs_aio_ctx *ctx)
3613 {
3614         struct cifs_readdata *rdata;
3615         unsigned int npages, rsize;
3616         struct cifs_credits credits_on_stack;
3617         struct cifs_credits *credits = &credits_on_stack;
3618         size_t cur_len;
3619         int rc;
3620         pid_t pid;
3621         struct TCP_Server_Info *server;
3622         struct page **pagevec;
3623         size_t start;
3624         struct iov_iter direct_iov = ctx->iter;
3625
3626         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3627
3628         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3629                 pid = open_file->pid;
3630         else
3631                 pid = current->tgid;
3632
3633         if (ctx->direct_io)
3634                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3635
3636         do {
3637                 if (open_file->invalidHandle) {
3638                         rc = cifs_reopen_file(open_file, true);
3639                         if (rc == -EAGAIN)
3640                                 continue;
3641                         else if (rc)
3642                                 break;
3643                 }
3644
3645                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3646                                                    &rsize, credits);
3647                 if (rc)
3648                         break;
3649
3650                 cur_len = min_t(const size_t, len, rsize);
3651
3652                 if (ctx->direct_io) {
3653                         ssize_t result;
3654
3655                         result = iov_iter_get_pages_alloc(
3656                                         &direct_iov, &pagevec,
3657                                         cur_len, &start);
3658                         if (result < 0) {
3659                                 cifs_dbg(VFS,
3660                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3661                                          result, iov_iter_type(&direct_iov),
3662                                          direct_iov.iov_offset,
3663                                          direct_iov.count);
3664                                 dump_stack();
3665
3666                                 rc = result;
3667                                 add_credits_and_wake_if(server, credits, 0);
3668                                 break;
3669                         }
3670                         cur_len = (size_t)result;
3671                         iov_iter_advance(&direct_iov, cur_len);
3672
3673                         rdata = cifs_readdata_direct_alloc(
3674                                         pagevec, cifs_uncached_readv_complete);
3675                         if (!rdata) {
3676                                 add_credits_and_wake_if(server, credits, 0);
3677                                 rc = -ENOMEM;
3678                                 break;
3679                         }
3680
3681                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3682                         rdata->page_offset = start;
3683                         rdata->tailsz = npages > 1 ?
3684                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3685                                 cur_len;
3686
3687                 } else {
3688
3689                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3690                         /* allocate a readdata struct */
3691                         rdata = cifs_readdata_alloc(npages,
3692                                             cifs_uncached_readv_complete);
3693                         if (!rdata) {
3694                                 add_credits_and_wake_if(server, credits, 0);
3695                                 rc = -ENOMEM;
3696                                 break;
3697                         }
3698
3699                         rc = cifs_read_allocate_pages(rdata, npages);
3700                         if (rc) {
3701                                 kvfree(rdata->pages);
3702                                 kfree(rdata);
3703                                 add_credits_and_wake_if(server, credits, 0);
3704                                 break;
3705                         }
3706
3707                         rdata->tailsz = PAGE_SIZE;
3708                 }
3709
3710                 rdata->server = server;
3711                 rdata->cfile = cifsFileInfo_get(open_file);
3712                 rdata->nr_pages = npages;
3713                 rdata->offset = offset;
3714                 rdata->bytes = cur_len;
3715                 rdata->pid = pid;
3716                 rdata->pagesz = PAGE_SIZE;
3717                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3718                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3719                 rdata->credits = credits_on_stack;
3720                 rdata->ctx = ctx;
3721                 kref_get(&ctx->refcount);
3722
3723                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3724
3725                 if (!rc) {
3726                         if (rdata->cfile->invalidHandle)
3727                                 rc = -EAGAIN;
3728                         else
3729                                 rc = server->ops->async_readv(rdata);
3730                 }
3731
3732                 if (rc) {
3733                         add_credits_and_wake_if(server, &rdata->credits, 0);
3734                         kref_put(&rdata->refcount,
3735                                 cifs_uncached_readdata_release);
3736                         if (rc == -EAGAIN) {
3737                                 iov_iter_revert(&direct_iov, cur_len);
3738                                 continue;
3739                         }
3740                         break;
3741                 }
3742
3743                 list_add_tail(&rdata->list, rdata_list);
3744                 offset += cur_len;
3745                 len -= cur_len;
3746         } while (len > 0);
3747
3748         return rc;
3749 }
3750
3751 static void
3752 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3753 {
3754         struct cifs_readdata *rdata, *tmp;
3755         struct iov_iter *to = &ctx->iter;
3756         struct cifs_sb_info *cifs_sb;
3757         int rc;
3758
3759         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3760
3761         mutex_lock(&ctx->aio_mutex);
3762
3763         if (list_empty(&ctx->list)) {
3764                 mutex_unlock(&ctx->aio_mutex);
3765                 return;
3766         }
3767
3768         rc = ctx->rc;
3769         /* the loop below should proceed in the order of increasing offsets */
3770 again:
3771         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3772                 if (!rc) {
3773                         if (!try_wait_for_completion(&rdata->done)) {
3774                                 mutex_unlock(&ctx->aio_mutex);
3775                                 return;
3776                         }
3777
3778                         if (rdata->result == -EAGAIN) {
3779                                 /* resend call if it's a retryable error */
3780                                 struct list_head tmp_list;
3781                                 unsigned int got_bytes = rdata->got_bytes;
3782
3783                                 list_del_init(&rdata->list);
3784                                 INIT_LIST_HEAD(&tmp_list);
3785
3786                                 /*
3787                                  * Got a part of data and then reconnect has
3788                                  * happened -- fill the buffer and continue
3789                                  * reading.
3790                                  */
3791                                 if (got_bytes && got_bytes < rdata->bytes) {
3792                                         rc = 0;
3793                                         if (!ctx->direct_io)
3794                                                 rc = cifs_readdata_to_iov(rdata, to);
3795                                         if (rc) {
3796                                                 kref_put(&rdata->refcount,
3797                                                         cifs_uncached_readdata_release);
3798                                                 continue;
3799                                         }
3800                                 }
3801
3802                                 if (ctx->direct_io) {
3803                                         /*
3804                                          * Re-use rdata as this is a
3805                                          * direct I/O
3806                                          */
3807                                         rc = cifs_resend_rdata(
3808                                                 rdata,
3809                                                 &tmp_list, ctx);
3810                                 } else {
3811                                         rc = cifs_send_async_read(
3812                                                 rdata->offset + got_bytes,
3813                                                 rdata->bytes - got_bytes,
3814                                                 rdata->cfile, cifs_sb,
3815                                                 &tmp_list, ctx);
3816
3817                                         kref_put(&rdata->refcount,
3818                                                 cifs_uncached_readdata_release);
3819                                 }
3820
3821                                 list_splice(&tmp_list, &ctx->list);
3822
3823                                 goto again;
3824                         } else if (rdata->result)
3825                                 rc = rdata->result;
3826                         else if (!ctx->direct_io)
3827                                 rc = cifs_readdata_to_iov(rdata, to);
3828
3829                         /* if there was a short read -- discard anything left */
3830                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3831                                 rc = -ENODATA;
3832
3833                         ctx->total_len += rdata->got_bytes;
3834                 }
3835                 list_del_init(&rdata->list);
3836                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3837         }
3838
3839         if (!ctx->direct_io)
3840                 ctx->total_len = ctx->len - iov_iter_count(to);
3841
3842         /* mask nodata case */
3843         if (rc == -ENODATA)
3844                 rc = 0;
3845
3846         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3847
3848         mutex_unlock(&ctx->aio_mutex);
3849
3850         if (ctx->iocb && ctx->iocb->ki_complete)
3851                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3852         else
3853                 complete(&ctx->done);
3854 }
3855
3856 static ssize_t __cifs_readv(
3857         struct kiocb *iocb, struct iov_iter *to, bool direct)
3858 {
3859         size_t len;
3860         struct file *file = iocb->ki_filp;
3861         struct cifs_sb_info *cifs_sb;
3862         struct cifsFileInfo *cfile;
3863         struct cifs_tcon *tcon;
3864         ssize_t rc, total_read = 0;
3865         loff_t offset = iocb->ki_pos;
3866         struct cifs_aio_ctx *ctx;
3867
3868         /*
3869          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3870          * fall back to data copy read path
3871          * this could be improved by getting pages directly in ITER_KVEC
3872          */
3873         if (direct && iov_iter_is_kvec(to)) {
3874                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3875                 direct = false;
3876         }
3877
3878         len = iov_iter_count(to);
3879         if (!len)
3880                 return 0;
3881
3882         cifs_sb = CIFS_FILE_SB(file);
3883         cfile = file->private_data;
3884         tcon = tlink_tcon(cfile->tlink);
3885
3886         if (!tcon->ses->server->ops->async_readv)
3887                 return -ENOSYS;
3888
3889         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3890                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3891
3892         ctx = cifs_aio_ctx_alloc();
3893         if (!ctx)
3894                 return -ENOMEM;
3895
3896         ctx->cfile = cifsFileInfo_get(cfile);
3897
3898         if (!is_sync_kiocb(iocb))
3899                 ctx->iocb = iocb;
3900
3901         if (iter_is_iovec(to))
3902                 ctx->should_dirty = true;
3903
3904         if (direct) {
3905                 ctx->pos = offset;
3906                 ctx->direct_io = true;
3907                 ctx->iter = *to;
3908                 ctx->len = len;
3909         } else {
3910                 rc = setup_aio_ctx_iter(ctx, to, READ);
3911                 if (rc) {
3912                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3913                         return rc;
3914                 }
3915                 len = ctx->len;
3916         }
3917
3918         /* grab a lock here due to read response handlers can access ctx */
3919         mutex_lock(&ctx->aio_mutex);
3920
3921         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3922
3923         /* if at least one read request send succeeded, then reset rc */
3924         if (!list_empty(&ctx->list))
3925                 rc = 0;
3926
3927         mutex_unlock(&ctx->aio_mutex);
3928
3929         if (rc) {
3930                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3931                 return rc;
3932         }
3933
3934         if (!is_sync_kiocb(iocb)) {
3935                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3936                 return -EIOCBQUEUED;
3937         }
3938
3939         rc = wait_for_completion_killable(&ctx->done);
3940         if (rc) {
3941                 mutex_lock(&ctx->aio_mutex);
3942                 ctx->rc = rc = -EINTR;
3943                 total_read = ctx->total_len;
3944                 mutex_unlock(&ctx->aio_mutex);
3945         } else {
3946                 rc = ctx->rc;
3947                 total_read = ctx->total_len;
3948         }
3949
3950         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3951
3952         if (total_read) {
3953                 iocb->ki_pos += total_read;
3954                 return total_read;
3955         }
3956         return rc;
3957 }
3958
3959 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3960 {
3961         return __cifs_readv(iocb, to, true);
3962 }
3963
3964 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3965 {
3966         return __cifs_readv(iocb, to, false);
3967 }
3968
3969 ssize_t
3970 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3971 {
3972         struct inode *inode = file_inode(iocb->ki_filp);
3973         struct cifsInodeInfo *cinode = CIFS_I(inode);
3974         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3975         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3976                                                 iocb->ki_filp->private_data;
3977         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3978         int rc = -EACCES;
3979
3980         /*
3981          * In strict cache mode we need to read from the server all the time
3982          * if we don't have level II oplock because the server can delay mtime
3983          * change - so we can't make a decision about inode invalidating.
3984          * And we can also fail with pagereading if there are mandatory locks
3985          * on pages affected by this read but not on the region from pos to
3986          * pos+len-1.
3987          */
3988         if (!CIFS_CACHE_READ(cinode))
3989                 return cifs_user_readv(iocb, to);
3990
3991         if (cap_unix(tcon->ses) &&
3992             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3993             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3994                 return generic_file_read_iter(iocb, to);
3995
3996         /*
3997          * We need to hold the sem to be sure nobody modifies lock list
3998          * with a brlock that prevents reading.
3999          */
4000         down_read(&cinode->lock_sem);
4001         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4002                                      tcon->ses->server->vals->shared_lock_type,
4003                                      0, NULL, CIFS_READ_OP))
4004                 rc = generic_file_read_iter(iocb, to);
4005         up_read(&cinode->lock_sem);
4006         return rc;
4007 }
4008
4009 static ssize_t
4010 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4011 {
4012         int rc = -EACCES;
4013         unsigned int bytes_read = 0;
4014         unsigned int total_read;
4015         unsigned int current_read_size;
4016         unsigned int rsize;
4017         struct cifs_sb_info *cifs_sb;
4018         struct cifs_tcon *tcon;
4019         struct TCP_Server_Info *server;
4020         unsigned int xid;
4021         char *cur_offset;
4022         struct cifsFileInfo *open_file;
4023         struct cifs_io_parms io_parms = {0};
4024         int buf_type = CIFS_NO_BUFFER;
4025         __u32 pid;
4026
4027         xid = get_xid();
4028         cifs_sb = CIFS_FILE_SB(file);
4029
4030         /* FIXME: set up handlers for larger reads and/or convert to async */
4031         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4032
4033         if (file->private_data == NULL) {
4034                 rc = -EBADF;
4035                 free_xid(xid);
4036                 return rc;
4037         }
4038         open_file = file->private_data;
4039         tcon = tlink_tcon(open_file->tlink);
4040         server = cifs_pick_channel(tcon->ses);
4041
4042         if (!server->ops->sync_read) {
4043                 free_xid(xid);
4044                 return -ENOSYS;
4045         }
4046
4047         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4048                 pid = open_file->pid;
4049         else
4050                 pid = current->tgid;
4051
4052         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4053                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4054
4055         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4056              total_read += bytes_read, cur_offset += bytes_read) {
4057                 do {
4058                         current_read_size = min_t(uint, read_size - total_read,
4059                                                   rsize);
4060                         /*
4061                          * For windows me and 9x we do not want to request more
4062                          * than it negotiated since it will refuse the read
4063                          * then.
4064                          */
4065                         if (!(tcon->ses->capabilities &
4066                                 tcon->ses->server->vals->cap_large_files)) {
4067                                 current_read_size = min_t(uint,
4068                                         current_read_size, CIFSMaxBufSize);
4069                         }
4070                         if (open_file->invalidHandle) {
4071                                 rc = cifs_reopen_file(open_file, true);
4072                                 if (rc != 0)
4073                                         break;
4074                         }
4075                         io_parms.pid = pid;
4076                         io_parms.tcon = tcon;
4077                         io_parms.offset = *offset;
4078                         io_parms.length = current_read_size;
4079                         io_parms.server = server;
4080                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4081                                                     &bytes_read, &cur_offset,
4082                                                     &buf_type);
4083                 } while (rc == -EAGAIN);
4084
4085                 if (rc || (bytes_read == 0)) {
4086                         if (total_read) {
4087                                 break;
4088                         } else {
4089                                 free_xid(xid);
4090                                 return rc;
4091                         }
4092                 } else {
4093                         cifs_stats_bytes_read(tcon, total_read);
4094                         *offset += bytes_read;
4095                 }
4096         }
4097         free_xid(xid);
4098         return total_read;
4099 }
4100
4101 /*
4102  * If the page is mmap'ed into a process' page tables, then we need to make
4103  * sure that it doesn't change while being written back.
4104  */
4105 static vm_fault_t
4106 cifs_page_mkwrite(struct vm_fault *vmf)
4107 {
4108         struct page *page = vmf->page;
4109
4110         lock_page(page);
4111         return VM_FAULT_LOCKED;
4112 }
4113
4114 static const struct vm_operations_struct cifs_file_vm_ops = {
4115         .fault = filemap_fault,
4116         .map_pages = filemap_map_pages,
4117         .page_mkwrite = cifs_page_mkwrite,
4118 };
4119
4120 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4121 {
4122         int xid, rc = 0;
4123         struct inode *inode = file_inode(file);
4124
4125         xid = get_xid();
4126
4127         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4128                 rc = cifs_zap_mapping(inode);
4129         if (!rc)
4130                 rc = generic_file_mmap(file, vma);
4131         if (!rc)
4132                 vma->vm_ops = &cifs_file_vm_ops;
4133
4134         free_xid(xid);
4135         return rc;
4136 }
4137
4138 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4139 {
4140         int rc, xid;
4141
4142         xid = get_xid();
4143
4144         rc = cifs_revalidate_file(file);
4145         if (rc)
4146                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4147                          rc);
4148         if (!rc)
4149                 rc = generic_file_mmap(file, vma);
4150         if (!rc)
4151                 vma->vm_ops = &cifs_file_vm_ops;
4152
4153         free_xid(xid);
4154         return rc;
4155 }
4156
4157 static void
4158 cifs_readv_complete(struct work_struct *work)
4159 {
4160         unsigned int i, got_bytes;
4161         struct cifs_readdata *rdata = container_of(work,
4162                                                 struct cifs_readdata, work);
4163
4164         got_bytes = rdata->got_bytes;
4165         for (i = 0; i < rdata->nr_pages; i++) {
4166                 struct page *page = rdata->pages[i];
4167
4168                 lru_cache_add(page);
4169
4170                 if (rdata->result == 0 ||
4171                     (rdata->result == -EAGAIN && got_bytes)) {
4172                         flush_dcache_page(page);
4173                         SetPageUptodate(page);
4174                 }
4175
4176                 unlock_page(page);
4177
4178                 if (rdata->result == 0 ||
4179                     (rdata->result == -EAGAIN && got_bytes))
4180                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4181
4182                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4183
4184                 put_page(page);
4185                 rdata->pages[i] = NULL;
4186         }
4187         kref_put(&rdata->refcount, cifs_readdata_release);
4188 }
4189
4190 static int
4191 readpages_fill_pages(struct TCP_Server_Info *server,
4192                      struct cifs_readdata *rdata, struct iov_iter *iter,
4193                      unsigned int len)
4194 {
4195         int result = 0;
4196         unsigned int i;
4197         u64 eof;
4198         pgoff_t eof_index;
4199         unsigned int nr_pages = rdata->nr_pages;
4200         unsigned int page_offset = rdata->page_offset;
4201
4202         /* determine the eof that the server (probably) has */
4203         eof = CIFS_I(rdata->mapping->host)->server_eof;
4204         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4205         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4206
4207         rdata->got_bytes = 0;
4208         rdata->tailsz = PAGE_SIZE;
4209         for (i = 0; i < nr_pages; i++) {
4210                 struct page *page = rdata->pages[i];
4211                 unsigned int to_read = rdata->pagesz;
4212                 size_t n;
4213
4214                 if (i == 0)
4215                         to_read -= page_offset;
4216                 else
4217                         page_offset = 0;
4218
4219                 n = to_read;
4220
4221                 if (len >= to_read) {
4222                         len -= to_read;
4223                 } else if (len > 0) {
4224                         /* enough for partial page, fill and zero the rest */
4225                         zero_user(page, len + page_offset, to_read - len);
4226                         n = rdata->tailsz = len;
4227                         len = 0;
4228                 } else if (page->index > eof_index) {
4229                         /*
4230                          * The VFS will not try to do readahead past the
4231                          * i_size, but it's possible that we have outstanding
4232                          * writes with gaps in the middle and the i_size hasn't
4233                          * caught up yet. Populate those with zeroed out pages
4234                          * to prevent the VFS from repeatedly attempting to
4235                          * fill them until the writes are flushed.
4236                          */
4237                         zero_user(page, 0, PAGE_SIZE);
4238                         lru_cache_add(page);
4239                         flush_dcache_page(page);
4240                         SetPageUptodate(page);
4241                         unlock_page(page);
4242                         put_page(page);
4243                         rdata->pages[i] = NULL;
4244                         rdata->nr_pages--;
4245                         continue;
4246                 } else {
4247                         /* no need to hold page hostage */
4248                         lru_cache_add(page);
4249                         unlock_page(page);
4250                         put_page(page);
4251                         rdata->pages[i] = NULL;
4252                         rdata->nr_pages--;
4253                         continue;
4254                 }
4255
4256                 if (iter)
4257                         result = copy_page_from_iter(
4258                                         page, page_offset, n, iter);
4259 #ifdef CONFIG_CIFS_SMB_DIRECT
4260                 else if (rdata->mr)
4261                         result = n;
4262 #endif
4263                 else
4264                         result = cifs_read_page_from_socket(
4265                                         server, page, page_offset, n);
4266                 if (result < 0)
4267                         break;
4268
4269                 rdata->got_bytes += result;
4270         }
4271
4272         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4273                                                 rdata->got_bytes : result;
4274 }
4275
4276 static int
4277 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4278                                struct cifs_readdata *rdata, unsigned int len)
4279 {
4280         return readpages_fill_pages(server, rdata, NULL, len);
4281 }
4282
4283 static int
4284 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4285                                struct cifs_readdata *rdata,
4286                                struct iov_iter *iter)
4287 {
4288         return readpages_fill_pages(server, rdata, iter, iter->count);
4289 }
4290
4291 static int
4292 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4293                     unsigned int rsize, struct list_head *tmplist,
4294                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4295 {
4296         struct page *page, *tpage;
4297         unsigned int expected_index;
4298         int rc;
4299         gfp_t gfp = readahead_gfp_mask(mapping);
4300
4301         INIT_LIST_HEAD(tmplist);
4302
4303         page = lru_to_page(page_list);
4304
4305         /*
4306          * Lock the page and put it in the cache. Since no one else
4307          * should have access to this page, we're safe to simply set
4308          * PG_locked without checking it first.
4309          */
4310         __SetPageLocked(page);
4311         rc = add_to_page_cache_locked(page, mapping,
4312                                       page->index, gfp);
4313
4314         /* give up if we can't stick it in the cache */
4315         if (rc) {
4316                 __ClearPageLocked(page);
4317                 return rc;
4318         }
4319
4320         /* move first page to the tmplist */
4321         *offset = (loff_t)page->index << PAGE_SHIFT;
4322         *bytes = PAGE_SIZE;
4323         *nr_pages = 1;
4324         list_move_tail(&page->lru, tmplist);
4325
4326         /* now try and add more pages onto the request */
4327         expected_index = page->index + 1;
4328         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4329                 /* discontinuity ? */
4330                 if (page->index != expected_index)
4331                         break;
4332
4333                 /* would this page push the read over the rsize? */
4334                 if (*bytes + PAGE_SIZE > rsize)
4335                         break;
4336
4337                 __SetPageLocked(page);
4338                 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4339                 if (rc) {
4340                         __ClearPageLocked(page);
4341                         break;
4342                 }
4343                 list_move_tail(&page->lru, tmplist);
4344                 (*bytes) += PAGE_SIZE;
4345                 expected_index++;
4346                 (*nr_pages)++;
4347         }
4348         return rc;
4349 }
4350
4351 static int cifs_readpages(struct file *file, struct address_space *mapping,
4352         struct list_head *page_list, unsigned num_pages)
4353 {
4354         int rc;
4355         int err = 0;
4356         struct list_head tmplist;
4357         struct cifsFileInfo *open_file = file->private_data;
4358         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4359         struct TCP_Server_Info *server;
4360         pid_t pid;
4361         unsigned int xid;
4362
4363         xid = get_xid();
4364         /*
4365          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4366          * immediately if the cookie is negative
4367          *
4368          * After this point, every page in the list might have PG_fscache set,
4369          * so we will need to clean that up off of every page we don't use.
4370          */
4371         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4372                                          &num_pages);
4373         if (rc == 0) {
4374                 free_xid(xid);
4375                 return rc;
4376         }
4377
4378         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4379                 pid = open_file->pid;
4380         else
4381                 pid = current->tgid;
4382
4383         rc = 0;
4384         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4385
4386         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4387                  __func__, file, mapping, num_pages);
4388
4389         /*
4390          * Start with the page at end of list and move it to private
4391          * list. Do the same with any following pages until we hit
4392          * the rsize limit, hit an index discontinuity, or run out of
4393          * pages. Issue the async read and then start the loop again
4394          * until the list is empty.
4395          *
4396          * Note that list order is important. The page_list is in
4397          * the order of declining indexes. When we put the pages in
4398          * the rdata->pages, then we want them in increasing order.
4399          */
4400         while (!list_empty(page_list) && !err) {
4401                 unsigned int i, nr_pages, bytes, rsize;
4402                 loff_t offset;
4403                 struct page *page, *tpage;
4404                 struct cifs_readdata *rdata;
4405                 struct cifs_credits credits_on_stack;
4406                 struct cifs_credits *credits = &credits_on_stack;
4407
4408                 if (open_file->invalidHandle) {
4409                         rc = cifs_reopen_file(open_file, true);
4410                         if (rc == -EAGAIN)
4411                                 continue;
4412                         else if (rc)
4413                                 break;
4414                 }
4415
4416                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4417                                                    &rsize, credits);
4418                 if (rc)
4419                         break;
4420
4421                 /*
4422                  * Give up immediately if rsize is too small to read an entire
4423                  * page. The VFS will fall back to readpage. We should never
4424                  * reach this point however since we set ra_pages to 0 when the
4425                  * rsize is smaller than a cache page.
4426                  */
4427                 if (unlikely(rsize < PAGE_SIZE)) {
4428                         add_credits_and_wake_if(server, credits, 0);
4429                         free_xid(xid);
4430                         return 0;
4431                 }
4432
4433                 nr_pages = 0;
4434                 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4435                                          &nr_pages, &offset, &bytes);
4436                 if (!nr_pages) {
4437                         add_credits_and_wake_if(server, credits, 0);
4438                         break;
4439                 }
4440
4441                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4442                 if (!rdata) {
4443                         /* best to give up if we're out of mem */
4444                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4445                                 list_del(&page->lru);
4446                                 lru_cache_add(page);
4447                                 unlock_page(page);
4448                                 put_page(page);
4449                         }
4450                         rc = -ENOMEM;
4451                         add_credits_and_wake_if(server, credits, 0);
4452                         break;
4453                 }
4454
4455                 rdata->cfile = cifsFileInfo_get(open_file);
4456                 rdata->server = server;
4457                 rdata->mapping = mapping;
4458                 rdata->offset = offset;
4459                 rdata->bytes = bytes;
4460                 rdata->pid = pid;
4461                 rdata->pagesz = PAGE_SIZE;
4462                 rdata->tailsz = PAGE_SIZE;
4463                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4464                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4465                 rdata->credits = credits_on_stack;
4466
4467                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4468                         list_del(&page->lru);
4469                         rdata->pages[rdata->nr_pages++] = page;
4470                 }
4471
4472                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4473
4474                 if (!rc) {
4475                         if (rdata->cfile->invalidHandle)
4476                                 rc = -EAGAIN;
4477                         else
4478                                 rc = server->ops->async_readv(rdata);
4479                 }
4480
4481                 if (rc) {
4482                         add_credits_and_wake_if(server, &rdata->credits, 0);
4483                         for (i = 0; i < rdata->nr_pages; i++) {
4484                                 page = rdata->pages[i];
4485                                 lru_cache_add(page);
4486                                 unlock_page(page);
4487                                 put_page(page);
4488                         }
4489                         /* Fallback to the readpage in error/reconnect cases */
4490                         kref_put(&rdata->refcount, cifs_readdata_release);
4491                         break;
4492                 }
4493
4494                 kref_put(&rdata->refcount, cifs_readdata_release);
4495         }
4496
4497         /* Any pages that have been shown to fscache but didn't get added to
4498          * the pagecache must be uncached before they get returned to the
4499          * allocator.
4500          */
4501         cifs_fscache_readpages_cancel(mapping->host, page_list);
4502         free_xid(xid);
4503         return rc;
4504 }
4505
4506 /*
4507  * cifs_readpage_worker must be called with the page pinned
4508  */
4509 static int cifs_readpage_worker(struct file *file, struct page *page,
4510         loff_t *poffset)
4511 {
4512         char *read_data;
4513         int rc;
4514
4515         /* Is the page cached? */
4516         rc = cifs_readpage_from_fscache(file_inode(file), page);
4517         if (rc == 0)
4518                 goto read_complete;
4519
4520         read_data = kmap(page);
4521         /* for reads over a certain size could initiate async read ahead */
4522
4523         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4524
4525         if (rc < 0)
4526                 goto io_error;
4527         else
4528                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4529
4530         /* we do not want atime to be less than mtime, it broke some apps */
4531         file_inode(file)->i_atime = current_time(file_inode(file));
4532         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4533                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4534         else
4535                 file_inode(file)->i_atime = current_time(file_inode(file));
4536
4537         if (PAGE_SIZE > rc)
4538                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4539
4540         flush_dcache_page(page);
4541         SetPageUptodate(page);
4542
4543         /* send this page to the cache */
4544         cifs_readpage_to_fscache(file_inode(file), page);
4545
4546         rc = 0;
4547
4548 io_error:
4549         kunmap(page);
4550         unlock_page(page);
4551
4552 read_complete:
4553         return rc;
4554 }
4555
4556 static int cifs_readpage(struct file *file, struct page *page)
4557 {
4558         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4559         int rc = -EACCES;
4560         unsigned int xid;
4561
4562         xid = get_xid();
4563
4564         if (file->private_data == NULL) {
4565                 rc = -EBADF;
4566                 free_xid(xid);
4567                 return rc;
4568         }
4569
4570         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4571                  page, (int)offset, (int)offset);
4572
4573         rc = cifs_readpage_worker(file, page, &offset);
4574
4575         free_xid(xid);
4576         return rc;
4577 }
4578
4579 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4580 {
4581         struct cifsFileInfo *open_file;
4582
4583         spin_lock(&cifs_inode->open_file_lock);
4584         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4585                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4586                         spin_unlock(&cifs_inode->open_file_lock);
4587                         return 1;
4588                 }
4589         }
4590         spin_unlock(&cifs_inode->open_file_lock);
4591         return 0;
4592 }
4593
4594 /* We do not want to update the file size from server for inodes
4595    open for write - to avoid races with writepage extending
4596    the file - in the future we could consider allowing
4597    refreshing the inode only on increases in the file size
4598    but this is tricky to do without racing with writebehind
4599    page caching in the current Linux kernel design */
4600 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4601 {
4602         if (!cifsInode)
4603                 return true;
4604
4605         if (is_inode_writable(cifsInode)) {
4606                 /* This inode is open for write at least once */
4607                 struct cifs_sb_info *cifs_sb;
4608
4609                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4610                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4611                         /* since no page cache to corrupt on directio
4612                         we can change size safely */
4613                         return true;
4614                 }
4615
4616                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4617                         return true;
4618
4619                 return false;
4620         } else
4621                 return true;
4622 }
4623
4624 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4625                         loff_t pos, unsigned len, unsigned flags,
4626                         struct page **pagep, void **fsdata)
4627 {
4628         int oncethru = 0;
4629         pgoff_t index = pos >> PAGE_SHIFT;
4630         loff_t offset = pos & (PAGE_SIZE - 1);
4631         loff_t page_start = pos & PAGE_MASK;
4632         loff_t i_size;
4633         struct page *page;
4634         int rc = 0;
4635
4636         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4637
4638 start:
4639         page = grab_cache_page_write_begin(mapping, index, flags);
4640         if (!page) {
4641                 rc = -ENOMEM;
4642                 goto out;
4643         }
4644
4645         if (PageUptodate(page))
4646                 goto out;
4647
4648         /*
4649          * If we write a full page it will be up to date, no need to read from
4650          * the server. If the write is short, we'll end up doing a sync write
4651          * instead.
4652          */
4653         if (len == PAGE_SIZE)
4654                 goto out;
4655
4656         /*
4657          * optimize away the read when we have an oplock, and we're not
4658          * expecting to use any of the data we'd be reading in. That
4659          * is, when the page lies beyond the EOF, or straddles the EOF
4660          * and the write will cover all of the existing data.
4661          */
4662         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4663                 i_size = i_size_read(mapping->host);
4664                 if (page_start >= i_size ||
4665                     (offset == 0 && (pos + len) >= i_size)) {
4666                         zero_user_segments(page, 0, offset,
4667                                            offset + len,
4668                                            PAGE_SIZE);
4669                         /*
4670                          * PageChecked means that the parts of the page
4671                          * to which we're not writing are considered up
4672                          * to date. Once the data is copied to the
4673                          * page, it can be set uptodate.
4674                          */
4675                         SetPageChecked(page);
4676                         goto out;
4677                 }
4678         }
4679
4680         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4681                 /*
4682                  * might as well read a page, it is fast enough. If we get
4683                  * an error, we don't need to return it. cifs_write_end will
4684                  * do a sync write instead since PG_uptodate isn't set.
4685                  */
4686                 cifs_readpage_worker(file, page, &page_start);
4687                 put_page(page);
4688                 oncethru = 1;
4689                 goto start;
4690         } else {
4691                 /* we could try using another file handle if there is one -
4692                    but how would we lock it to prevent close of that handle
4693                    racing with this read? In any case
4694                    this will be written out by write_end so is fine */
4695         }
4696 out:
4697         *pagep = page;
4698         return rc;
4699 }
4700
4701 static int cifs_release_page(struct page *page, gfp_t gfp)
4702 {
4703         if (PagePrivate(page))
4704                 return 0;
4705
4706         return cifs_fscache_release_page(page, gfp);
4707 }
4708
4709 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4710                                  unsigned int length)
4711 {
4712         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4713
4714         if (offset == 0 && length == PAGE_SIZE)
4715                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4716 }
4717
4718 static int cifs_launder_page(struct page *page)
4719 {
4720         int rc = 0;
4721         loff_t range_start = page_offset(page);
4722         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4723         struct writeback_control wbc = {
4724                 .sync_mode = WB_SYNC_ALL,
4725                 .nr_to_write = 0,
4726                 .range_start = range_start,
4727                 .range_end = range_end,
4728         };
4729
4730         cifs_dbg(FYI, "Launder page: %p\n", page);
4731
4732         if (clear_page_dirty_for_io(page))
4733                 rc = cifs_writepage_locked(page, &wbc);
4734
4735         cifs_fscache_invalidate_page(page, page->mapping->host);
4736         return rc;
4737 }
4738
4739 void cifs_oplock_break(struct work_struct *work)
4740 {
4741         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4742                                                   oplock_break);
4743         struct inode *inode = d_inode(cfile->dentry);
4744         struct cifsInodeInfo *cinode = CIFS_I(inode);
4745         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4746         struct TCP_Server_Info *server = tcon->ses->server;
4747         int rc = 0;
4748         bool purge_cache = false;
4749
4750         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4751                         TASK_UNINTERRUPTIBLE);
4752
4753         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4754                                       cfile->oplock_epoch, &purge_cache);
4755
4756         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4757                                                 cifs_has_mand_locks(cinode)) {
4758                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4759                          inode);
4760                 cinode->oplock = 0;
4761         }
4762
4763         if (inode && S_ISREG(inode->i_mode)) {
4764                 if (CIFS_CACHE_READ(cinode))
4765                         break_lease(inode, O_RDONLY);
4766                 else
4767                         break_lease(inode, O_WRONLY);
4768                 rc = filemap_fdatawrite(inode->i_mapping);
4769                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4770                         rc = filemap_fdatawait(inode->i_mapping);
4771                         mapping_set_error(inode->i_mapping, rc);
4772                         cifs_zap_mapping(inode);
4773                 }
4774                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4775                 if (CIFS_CACHE_WRITE(cinode))
4776                         goto oplock_break_ack;
4777         }
4778
4779         rc = cifs_push_locks(cfile);
4780         if (rc)
4781                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4782
4783 oplock_break_ack:
4784         /*
4785          * releasing stale oplock after recent reconnect of smb session using
4786          * a now incorrect file handle is not a data integrity issue but do
4787          * not bother sending an oplock release if session to server still is
4788          * disconnected since oplock already released by the server
4789          */
4790         if (!cfile->oplock_break_cancelled) {
4791                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4792                                                              cinode);
4793                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4794         }
4795         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4796         cifs_done_oplock_break(cinode);
4797 }
4798
4799 /*
4800  * The presence of cifs_direct_io() in the address space ops vector
4801  * allowes open() O_DIRECT flags which would have failed otherwise.
4802  *
4803  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4804  * so this method should never be called.
4805  *
4806  * Direct IO is not yet supported in the cached mode. 
4807  */
4808 static ssize_t
4809 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4810 {
4811         /*
4812          * FIXME
4813          * Eventually need to support direct IO for non forcedirectio mounts
4814          */
4815         return -EINVAL;
4816 }
4817
4818 static int cifs_swap_activate(struct swap_info_struct *sis,
4819                               struct file *swap_file, sector_t *span)
4820 {
4821         struct cifsFileInfo *cfile = swap_file->private_data;
4822         struct inode *inode = swap_file->f_mapping->host;
4823         unsigned long blocks;
4824         long long isize;
4825
4826         cifs_dbg(FYI, "swap activate\n");
4827
4828         spin_lock(&inode->i_lock);
4829         blocks = inode->i_blocks;
4830         isize = inode->i_size;
4831         spin_unlock(&inode->i_lock);
4832         if (blocks*512 < isize) {
4833                 pr_warn("swap activate: swapfile has holes\n");
4834                 return -EINVAL;
4835         }
4836         *span = sis->pages;
4837
4838         pr_warn_once("Swap support over SMB3 is experimental\n");
4839
4840         /*
4841          * TODO: consider adding ACL (or documenting how) to prevent other
4842          * users (on this or other systems) from reading it
4843          */
4844
4845
4846         /* TODO: add sk_set_memalloc(inet) or similar */
4847
4848         if (cfile)
4849                 cfile->swapfile = true;
4850         /*
4851          * TODO: Since file already open, we can't open with DENY_ALL here
4852          * but we could add call to grab a byte range lock to prevent others
4853          * from reading or writing the file
4854          */
4855
4856         return 0;
4857 }
4858
4859 static void cifs_swap_deactivate(struct file *file)
4860 {
4861         struct cifsFileInfo *cfile = file->private_data;
4862
4863         cifs_dbg(FYI, "swap deactivate\n");
4864
4865         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4866
4867         if (cfile)
4868                 cfile->swapfile = false;
4869
4870         /* do we need to unpin (or unlock) the file */
4871 }
4872
4873 const struct address_space_operations cifs_addr_ops = {
4874         .readpage = cifs_readpage,
4875         .readpages = cifs_readpages,
4876         .writepage = cifs_writepage,
4877         .writepages = cifs_writepages,
4878         .write_begin = cifs_write_begin,
4879         .write_end = cifs_write_end,
4880         .set_page_dirty = __set_page_dirty_nobuffers,
4881         .releasepage = cifs_release_page,
4882         .direct_IO = cifs_direct_io,
4883         .invalidatepage = cifs_invalidate_page,
4884         .launder_page = cifs_launder_page,
4885         /*
4886          * TODO: investigate and if useful we could add an cifs_migratePage
4887          * helper (under an CONFIG_MIGRATION) in the future, and also
4888          * investigate and add an is_dirty_writeback helper if needed
4889          */
4890         .swap_activate = cifs_swap_activate,
4891         .swap_deactivate = cifs_swap_deactivate,
4892 };
4893
4894 /*
4895  * cifs_readpages requires the server to support a buffer large enough to
4896  * contain the header plus one complete page of data.  Otherwise, we need
4897  * to leave cifs_readpages out of the address space operations.
4898  */
4899 const struct address_space_operations cifs_addr_ops_smallbuf = {
4900         .readpage = cifs_readpage,
4901         .writepage = cifs_writepage,
4902         .writepages = cifs_writepages,
4903         .write_begin = cifs_write_begin,
4904         .write_end = cifs_write_end,
4905         .set_page_dirty = __set_page_dirty_nobuffers,
4906         .releasepage = cifs_release_page,
4907         .invalidatepage = cifs_invalidate_page,
4908         .launder_page = cifs_launder_page,
4909 };