Merge tag '5.18-smb3-fixes-part2' of git://git.samba.org/sfrench/cifs-2.6
[linux-2.6-microblaze.git] / fs / cifs / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/backing-dev.h>
13 #include <linux/stat.h>
14 #include <linux/fcntl.h>
15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h>
17 #include <linux/writeback.h>
18 #include <linux/task_io_accounting_ops.h>
19 #include <linux/delay.h>
20 #include <linux/mount.h>
21 #include <linux/slab.h>
22 #include <linux/swap.h>
23 #include <linux/mm.h>
24 #include <asm/div64.h>
25 #include "cifsfs.h"
26 #include "cifspdu.h"
27 #include "cifsglob.h"
28 #include "cifsproto.h"
29 #include "cifs_unicode.h"
30 #include "cifs_debug.h"
31 #include "cifs_fs_sb.h"
32 #include "fscache.h"
33 #include "smbdirect.h"
34 #include "fs_context.h"
35 #include "cifs_ioctl.h"
36
37 static inline int cifs_convert_flags(unsigned int flags)
38 {
39         if ((flags & O_ACCMODE) == O_RDONLY)
40                 return GENERIC_READ;
41         else if ((flags & O_ACCMODE) == O_WRONLY)
42                 return GENERIC_WRITE;
43         else if ((flags & O_ACCMODE) == O_RDWR) {
44                 /* GENERIC_ALL is too much permission to request
45                    can cause unnecessary access denied on create */
46                 /* return GENERIC_ALL; */
47                 return (GENERIC_READ | GENERIC_WRITE);
48         }
49
50         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
51                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
52                 FILE_READ_DATA);
53 }
54
55 static u32 cifs_posix_convert_flags(unsigned int flags)
56 {
57         u32 posix_flags = 0;
58
59         if ((flags & O_ACCMODE) == O_RDONLY)
60                 posix_flags = SMB_O_RDONLY;
61         else if ((flags & O_ACCMODE) == O_WRONLY)
62                 posix_flags = SMB_O_WRONLY;
63         else if ((flags & O_ACCMODE) == O_RDWR)
64                 posix_flags = SMB_O_RDWR;
65
66         if (flags & O_CREAT) {
67                 posix_flags |= SMB_O_CREAT;
68                 if (flags & O_EXCL)
69                         posix_flags |= SMB_O_EXCL;
70         } else if (flags & O_EXCL)
71                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
72                          current->comm, current->tgid);
73
74         if (flags & O_TRUNC)
75                 posix_flags |= SMB_O_TRUNC;
76         /* be safe and imply O_SYNC for O_DSYNC */
77         if (flags & O_DSYNC)
78                 posix_flags |= SMB_O_SYNC;
79         if (flags & O_DIRECTORY)
80                 posix_flags |= SMB_O_DIRECTORY;
81         if (flags & O_NOFOLLOW)
82                 posix_flags |= SMB_O_NOFOLLOW;
83         if (flags & O_DIRECT)
84                 posix_flags |= SMB_O_DIRECT;
85
86         return posix_flags;
87 }
88
89 static inline int cifs_get_disposition(unsigned int flags)
90 {
91         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
92                 return FILE_CREATE;
93         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
94                 return FILE_OVERWRITE_IF;
95         else if ((flags & O_CREAT) == O_CREAT)
96                 return FILE_OPEN_IF;
97         else if ((flags & O_TRUNC) == O_TRUNC)
98                 return FILE_OVERWRITE;
99         else
100                 return FILE_OPEN;
101 }
102
103 int cifs_posix_open(const char *full_path, struct inode **pinode,
104                         struct super_block *sb, int mode, unsigned int f_flags,
105                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
106 {
107         int rc;
108         FILE_UNIX_BASIC_INFO *presp_data;
109         __u32 posix_flags = 0;
110         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
111         struct cifs_fattr fattr;
112         struct tcon_link *tlink;
113         struct cifs_tcon *tcon;
114
115         cifs_dbg(FYI, "posix open %s\n", full_path);
116
117         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
118         if (presp_data == NULL)
119                 return -ENOMEM;
120
121         tlink = cifs_sb_tlink(cifs_sb);
122         if (IS_ERR(tlink)) {
123                 rc = PTR_ERR(tlink);
124                 goto posix_open_ret;
125         }
126
127         tcon = tlink_tcon(tlink);
128         mode &= ~current_umask();
129
130         posix_flags = cifs_posix_convert_flags(f_flags);
131         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
132                              poplock, full_path, cifs_sb->local_nls,
133                              cifs_remap(cifs_sb));
134         cifs_put_tlink(tlink);
135
136         if (rc)
137                 goto posix_open_ret;
138
139         if (presp_data->Type == cpu_to_le32(-1))
140                 goto posix_open_ret; /* open ok, caller does qpathinfo */
141
142         if (!pinode)
143                 goto posix_open_ret; /* caller does not need info */
144
145         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
146
147         /* get new inode and set it up */
148         if (*pinode == NULL) {
149                 cifs_fill_uniqueid(sb, &fattr);
150                 *pinode = cifs_iget(sb, &fattr);
151                 if (!*pinode) {
152                         rc = -ENOMEM;
153                         goto posix_open_ret;
154                 }
155         } else {
156                 cifs_revalidate_mapping(*pinode);
157                 rc = cifs_fattr_to_inode(*pinode, &fattr);
158         }
159
160 posix_open_ret:
161         kfree(presp_data);
162         return rc;
163 }
164
165 static int
166 cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
167              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
168              struct cifs_fid *fid, unsigned int xid)
169 {
170         int rc;
171         int desired_access;
172         int disposition;
173         int create_options = CREATE_NOT_DIR;
174         FILE_ALL_INFO *buf;
175         struct TCP_Server_Info *server = tcon->ses->server;
176         struct cifs_open_parms oparms;
177
178         if (!server->ops->open)
179                 return -ENOSYS;
180
181         desired_access = cifs_convert_flags(f_flags);
182
183 /*********************************************************************
184  *  open flag mapping table:
185  *
186  *      POSIX Flag            CIFS Disposition
187  *      ----------            ----------------
188  *      O_CREAT               FILE_OPEN_IF
189  *      O_CREAT | O_EXCL      FILE_CREATE
190  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
191  *      O_TRUNC               FILE_OVERWRITE
192  *      none of the above     FILE_OPEN
193  *
194  *      Note that there is not a direct match between disposition
195  *      FILE_SUPERSEDE (ie create whether or not file exists although
196  *      O_CREAT | O_TRUNC is similar but truncates the existing
197  *      file rather than creating a new file as FILE_SUPERSEDE does
198  *      (which uses the attributes / metadata passed in on open call)
199  *?
200  *?  O_SYNC is a reasonable match to CIFS writethrough flag
201  *?  and the read write flags match reasonably.  O_LARGEFILE
202  *?  is irrelevant because largefile support is always used
203  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
204  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
205  *********************************************************************/
206
207         disposition = cifs_get_disposition(f_flags);
208
209         /* BB pass O_SYNC flag through on file attributes .. BB */
210
211         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
212         if (!buf)
213                 return -ENOMEM;
214
215         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
216         if (f_flags & O_SYNC)
217                 create_options |= CREATE_WRITE_THROUGH;
218
219         if (f_flags & O_DIRECT)
220                 create_options |= CREATE_NO_BUFFER;
221
222         oparms.tcon = tcon;
223         oparms.cifs_sb = cifs_sb;
224         oparms.desired_access = desired_access;
225         oparms.create_options = cifs_create_options(cifs_sb, create_options);
226         oparms.disposition = disposition;
227         oparms.path = full_path;
228         oparms.fid = fid;
229         oparms.reconnect = false;
230
231         rc = server->ops->open(xid, &oparms, oplock, buf);
232
233         if (rc)
234                 goto out;
235
236         /* TODO: Add support for calling posix query info but with passing in fid */
237         if (tcon->unix_ext)
238                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
239                                               xid);
240         else
241                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
242                                          xid, fid);
243
244         if (rc) {
245                 server->ops->close(xid, tcon, fid);
246                 if (rc == -ESTALE)
247                         rc = -EOPENSTALE;
248         }
249
250 out:
251         kfree(buf);
252         return rc;
253 }
254
255 static bool
256 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
257 {
258         struct cifs_fid_locks *cur;
259         bool has_locks = false;
260
261         down_read(&cinode->lock_sem);
262         list_for_each_entry(cur, &cinode->llist, llist) {
263                 if (!list_empty(&cur->locks)) {
264                         has_locks = true;
265                         break;
266                 }
267         }
268         up_read(&cinode->lock_sem);
269         return has_locks;
270 }
271
272 void
273 cifs_down_write(struct rw_semaphore *sem)
274 {
275         while (!down_write_trylock(sem))
276                 msleep(10);
277 }
278
279 static void cifsFileInfo_put_work(struct work_struct *work);
280
281 struct cifsFileInfo *
282 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
283                   struct tcon_link *tlink, __u32 oplock)
284 {
285         struct dentry *dentry = file_dentry(file);
286         struct inode *inode = d_inode(dentry);
287         struct cifsInodeInfo *cinode = CIFS_I(inode);
288         struct cifsFileInfo *cfile;
289         struct cifs_fid_locks *fdlocks;
290         struct cifs_tcon *tcon = tlink_tcon(tlink);
291         struct TCP_Server_Info *server = tcon->ses->server;
292
293         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
294         if (cfile == NULL)
295                 return cfile;
296
297         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
298         if (!fdlocks) {
299                 kfree(cfile);
300                 return NULL;
301         }
302
303         INIT_LIST_HEAD(&fdlocks->locks);
304         fdlocks->cfile = cfile;
305         cfile->llist = fdlocks;
306
307         cfile->count = 1;
308         cfile->pid = current->tgid;
309         cfile->uid = current_fsuid();
310         cfile->dentry = dget(dentry);
311         cfile->f_flags = file->f_flags;
312         cfile->invalidHandle = false;
313         cfile->deferred_close_scheduled = false;
314         cfile->tlink = cifs_get_tlink(tlink);
315         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
316         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
317         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
318         mutex_init(&cfile->fh_mutex);
319         spin_lock_init(&cfile->file_info_lock);
320
321         cifs_sb_active(inode->i_sb);
322
323         /*
324          * If the server returned a read oplock and we have mandatory brlocks,
325          * set oplock level to None.
326          */
327         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
328                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
329                 oplock = 0;
330         }
331
332         cifs_down_write(&cinode->lock_sem);
333         list_add(&fdlocks->llist, &cinode->llist);
334         up_write(&cinode->lock_sem);
335
336         spin_lock(&tcon->open_file_lock);
337         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
338                 oplock = fid->pending_open->oplock;
339         list_del(&fid->pending_open->olist);
340
341         fid->purge_cache = false;
342         server->ops->set_fid(cfile, fid, oplock);
343
344         list_add(&cfile->tlist, &tcon->openFileList);
345         atomic_inc(&tcon->num_local_opens);
346
347         /* if readable file instance put first in list*/
348         spin_lock(&cinode->open_file_lock);
349         if (file->f_mode & FMODE_READ)
350                 list_add(&cfile->flist, &cinode->openFileList);
351         else
352                 list_add_tail(&cfile->flist, &cinode->openFileList);
353         spin_unlock(&cinode->open_file_lock);
354         spin_unlock(&tcon->open_file_lock);
355
356         if (fid->purge_cache)
357                 cifs_zap_mapping(inode);
358
359         file->private_data = cfile;
360         return cfile;
361 }
362
363 struct cifsFileInfo *
364 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
365 {
366         spin_lock(&cifs_file->file_info_lock);
367         cifsFileInfo_get_locked(cifs_file);
368         spin_unlock(&cifs_file->file_info_lock);
369         return cifs_file;
370 }
371
372 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
373 {
374         struct inode *inode = d_inode(cifs_file->dentry);
375         struct cifsInodeInfo *cifsi = CIFS_I(inode);
376         struct cifsLockInfo *li, *tmp;
377         struct super_block *sb = inode->i_sb;
378
379         /*
380          * Delete any outstanding lock records. We'll lose them when the file
381          * is closed anyway.
382          */
383         cifs_down_write(&cifsi->lock_sem);
384         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
385                 list_del(&li->llist);
386                 cifs_del_lock_waiters(li);
387                 kfree(li);
388         }
389         list_del(&cifs_file->llist->llist);
390         kfree(cifs_file->llist);
391         up_write(&cifsi->lock_sem);
392
393         cifs_put_tlink(cifs_file->tlink);
394         dput(cifs_file->dentry);
395         cifs_sb_deactive(sb);
396         kfree(cifs_file);
397 }
398
399 static void cifsFileInfo_put_work(struct work_struct *work)
400 {
401         struct cifsFileInfo *cifs_file = container_of(work,
402                         struct cifsFileInfo, put);
403
404         cifsFileInfo_put_final(cifs_file);
405 }
406
407 /**
408  * cifsFileInfo_put - release a reference of file priv data
409  *
410  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
411  *
412  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
413  */
414 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
415 {
416         _cifsFileInfo_put(cifs_file, true, true);
417 }
418
419 /**
420  * _cifsFileInfo_put - release a reference of file priv data
421  *
422  * This may involve closing the filehandle @cifs_file out on the
423  * server. Must be called without holding tcon->open_file_lock,
424  * cinode->open_file_lock and cifs_file->file_info_lock.
425  *
426  * If @wait_for_oplock_handler is true and we are releasing the last
427  * reference, wait for any running oplock break handler of the file
428  * and cancel any pending one.
429  *
430  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
431  * @wait_oplock_handler: must be false if called from oplock_break_handler
432  * @offload:    not offloaded on close and oplock breaks
433  *
434  */
435 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
436                        bool wait_oplock_handler, bool offload)
437 {
438         struct inode *inode = d_inode(cifs_file->dentry);
439         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
440         struct TCP_Server_Info *server = tcon->ses->server;
441         struct cifsInodeInfo *cifsi = CIFS_I(inode);
442         struct super_block *sb = inode->i_sb;
443         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
444         struct cifs_fid fid;
445         struct cifs_pending_open open;
446         bool oplock_break_cancelled;
447
448         spin_lock(&tcon->open_file_lock);
449         spin_lock(&cifsi->open_file_lock);
450         spin_lock(&cifs_file->file_info_lock);
451         if (--cifs_file->count > 0) {
452                 spin_unlock(&cifs_file->file_info_lock);
453                 spin_unlock(&cifsi->open_file_lock);
454                 spin_unlock(&tcon->open_file_lock);
455                 return;
456         }
457         spin_unlock(&cifs_file->file_info_lock);
458
459         if (server->ops->get_lease_key)
460                 server->ops->get_lease_key(inode, &fid);
461
462         /* store open in pending opens to make sure we don't miss lease break */
463         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
464
465         /* remove it from the lists */
466         list_del(&cifs_file->flist);
467         list_del(&cifs_file->tlist);
468         atomic_dec(&tcon->num_local_opens);
469
470         if (list_empty(&cifsi->openFileList)) {
471                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
472                          d_inode(cifs_file->dentry));
473                 /*
474                  * In strict cache mode we need invalidate mapping on the last
475                  * close  because it may cause a error when we open this file
476                  * again and get at least level II oplock.
477                  */
478                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
479                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
480                 cifs_set_oplock_level(cifsi, 0);
481         }
482
483         spin_unlock(&cifsi->open_file_lock);
484         spin_unlock(&tcon->open_file_lock);
485
486         oplock_break_cancelled = wait_oplock_handler ?
487                 cancel_work_sync(&cifs_file->oplock_break) : false;
488
489         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
490                 struct TCP_Server_Info *server = tcon->ses->server;
491                 unsigned int xid;
492
493                 xid = get_xid();
494                 if (server->ops->close_getattr)
495                         server->ops->close_getattr(xid, tcon, cifs_file);
496                 else if (server->ops->close)
497                         server->ops->close(xid, tcon, &cifs_file->fid);
498                 _free_xid(xid);
499         }
500
501         if (oplock_break_cancelled)
502                 cifs_done_oplock_break(cifsi);
503
504         cifs_del_pending_open(&open);
505
506         if (offload)
507                 queue_work(fileinfo_put_wq, &cifs_file->put);
508         else
509                 cifsFileInfo_put_final(cifs_file);
510 }
511
512 int cifs_open(struct inode *inode, struct file *file)
513
514 {
515         int rc = -EACCES;
516         unsigned int xid;
517         __u32 oplock;
518         struct cifs_sb_info *cifs_sb;
519         struct TCP_Server_Info *server;
520         struct cifs_tcon *tcon;
521         struct tcon_link *tlink;
522         struct cifsFileInfo *cfile = NULL;
523         void *page;
524         const char *full_path;
525         bool posix_open_ok = false;
526         struct cifs_fid fid;
527         struct cifs_pending_open open;
528
529         xid = get_xid();
530
531         cifs_sb = CIFS_SB(inode->i_sb);
532         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
533                 free_xid(xid);
534                 return -EIO;
535         }
536
537         tlink = cifs_sb_tlink(cifs_sb);
538         if (IS_ERR(tlink)) {
539                 free_xid(xid);
540                 return PTR_ERR(tlink);
541         }
542         tcon = tlink_tcon(tlink);
543         server = tcon->ses->server;
544
545         page = alloc_dentry_path();
546         full_path = build_path_from_dentry(file_dentry(file), page);
547         if (IS_ERR(full_path)) {
548                 rc = PTR_ERR(full_path);
549                 goto out;
550         }
551
552         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
553                  inode, file->f_flags, full_path);
554
555         if (file->f_flags & O_DIRECT &&
556             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
557                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
558                         file->f_op = &cifs_file_direct_nobrl_ops;
559                 else
560                         file->f_op = &cifs_file_direct_ops;
561         }
562
563         /* Get the cached handle as SMB2 close is deferred */
564         rc = cifs_get_readable_path(tcon, full_path, &cfile);
565         if (rc == 0) {
566                 if (file->f_flags == cfile->f_flags) {
567                         file->private_data = cfile;
568                         spin_lock(&CIFS_I(inode)->deferred_lock);
569                         cifs_del_deferred_close(cfile);
570                         spin_unlock(&CIFS_I(inode)->deferred_lock);
571                         goto use_cache;
572                 } else {
573                         _cifsFileInfo_put(cfile, true, false);
574                 }
575         }
576
577         if (server->oplocks)
578                 oplock = REQ_OPLOCK;
579         else
580                 oplock = 0;
581
582         if (!tcon->broken_posix_open && tcon->unix_ext &&
583             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
584                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
585                 /* can not refresh inode info since size could be stale */
586                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
587                                 cifs_sb->ctx->file_mode /* ignored */,
588                                 file->f_flags, &oplock, &fid.netfid, xid);
589                 if (rc == 0) {
590                         cifs_dbg(FYI, "posix open succeeded\n");
591                         posix_open_ok = true;
592                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
593                         if (tcon->ses->serverNOS)
594                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
595                                          tcon->ses->ip_addr,
596                                          tcon->ses->serverNOS);
597                         tcon->broken_posix_open = true;
598                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
599                          (rc != -EOPNOTSUPP)) /* path not found or net err */
600                         goto out;
601                 /*
602                  * Else fallthrough to retry open the old way on network i/o
603                  * or DFS errors.
604                  */
605         }
606
607         if (server->ops->get_lease_key)
608                 server->ops->get_lease_key(inode, &fid);
609
610         cifs_add_pending_open(&fid, tlink, &open);
611
612         if (!posix_open_ok) {
613                 if (server->ops->get_lease_key)
614                         server->ops->get_lease_key(inode, &fid);
615
616                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
617                                   file->f_flags, &oplock, &fid, xid);
618                 if (rc) {
619                         cifs_del_pending_open(&open);
620                         goto out;
621                 }
622         }
623
624         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
625         if (cfile == NULL) {
626                 if (server->ops->close)
627                         server->ops->close(xid, tcon, &fid);
628                 cifs_del_pending_open(&open);
629                 rc = -ENOMEM;
630                 goto out;
631         }
632
633         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
634                 /*
635                  * Time to set mode which we can not set earlier due to
636                  * problems creating new read-only files.
637                  */
638                 struct cifs_unix_set_info_args args = {
639                         .mode   = inode->i_mode,
640                         .uid    = INVALID_UID, /* no change */
641                         .gid    = INVALID_GID, /* no change */
642                         .ctime  = NO_CHANGE_64,
643                         .atime  = NO_CHANGE_64,
644                         .mtime  = NO_CHANGE_64,
645                         .device = 0,
646                 };
647                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
648                                        cfile->pid);
649         }
650
651 use_cache:
652         fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
653                            file->f_mode & FMODE_WRITE);
654         if (file->f_flags & O_DIRECT &&
655             (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
656              file->f_flags & O_APPEND))
657                 cifs_invalidate_cache(file_inode(file),
658                                       FSCACHE_INVAL_DIO_WRITE);
659
660 out:
661         free_dentry_path(page);
662         free_xid(xid);
663         cifs_put_tlink(tlink);
664         return rc;
665 }
666
667 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
668
669 /*
670  * Try to reacquire byte range locks that were released when session
671  * to server was lost.
672  */
673 static int
674 cifs_relock_file(struct cifsFileInfo *cfile)
675 {
676         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
677         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
678         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
679         int rc = 0;
680
681         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
682         if (cinode->can_cache_brlcks) {
683                 /* can cache locks - no need to relock */
684                 up_read(&cinode->lock_sem);
685                 return rc;
686         }
687
688         if (cap_unix(tcon->ses) &&
689             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
690             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
691                 rc = cifs_push_posix_locks(cfile);
692         else
693                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
694
695         up_read(&cinode->lock_sem);
696         return rc;
697 }
698
699 static int
700 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
701 {
702         int rc = -EACCES;
703         unsigned int xid;
704         __u32 oplock;
705         struct cifs_sb_info *cifs_sb;
706         struct cifs_tcon *tcon;
707         struct TCP_Server_Info *server;
708         struct cifsInodeInfo *cinode;
709         struct inode *inode;
710         void *page;
711         const char *full_path;
712         int desired_access;
713         int disposition = FILE_OPEN;
714         int create_options = CREATE_NOT_DIR;
715         struct cifs_open_parms oparms;
716
717         xid = get_xid();
718         mutex_lock(&cfile->fh_mutex);
719         if (!cfile->invalidHandle) {
720                 mutex_unlock(&cfile->fh_mutex);
721                 free_xid(xid);
722                 return 0;
723         }
724
725         inode = d_inode(cfile->dentry);
726         cifs_sb = CIFS_SB(inode->i_sb);
727         tcon = tlink_tcon(cfile->tlink);
728         server = tcon->ses->server;
729
730         /*
731          * Can not grab rename sem here because various ops, including those
732          * that already have the rename sem can end up causing writepage to get
733          * called and if the server was down that means we end up here, and we
734          * can never tell if the caller already has the rename_sem.
735          */
736         page = alloc_dentry_path();
737         full_path = build_path_from_dentry(cfile->dentry, page);
738         if (IS_ERR(full_path)) {
739                 mutex_unlock(&cfile->fh_mutex);
740                 free_dentry_path(page);
741                 free_xid(xid);
742                 return PTR_ERR(full_path);
743         }
744
745         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
746                  inode, cfile->f_flags, full_path);
747
748         if (tcon->ses->server->oplocks)
749                 oplock = REQ_OPLOCK;
750         else
751                 oplock = 0;
752
753         if (tcon->unix_ext && cap_unix(tcon->ses) &&
754             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
755                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
756                 /*
757                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
758                  * original open. Must mask them off for a reopen.
759                  */
760                 unsigned int oflags = cfile->f_flags &
761                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
762
763                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
764                                      cifs_sb->ctx->file_mode /* ignored */,
765                                      oflags, &oplock, &cfile->fid.netfid, xid);
766                 if (rc == 0) {
767                         cifs_dbg(FYI, "posix reopen succeeded\n");
768                         oparms.reconnect = true;
769                         goto reopen_success;
770                 }
771                 /*
772                  * fallthrough to retry open the old way on errors, especially
773                  * in the reconnect path it is important to retry hard
774                  */
775         }
776
777         desired_access = cifs_convert_flags(cfile->f_flags);
778
779         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
780         if (cfile->f_flags & O_SYNC)
781                 create_options |= CREATE_WRITE_THROUGH;
782
783         if (cfile->f_flags & O_DIRECT)
784                 create_options |= CREATE_NO_BUFFER;
785
786         if (server->ops->get_lease_key)
787                 server->ops->get_lease_key(inode, &cfile->fid);
788
789         oparms.tcon = tcon;
790         oparms.cifs_sb = cifs_sb;
791         oparms.desired_access = desired_access;
792         oparms.create_options = cifs_create_options(cifs_sb, create_options);
793         oparms.disposition = disposition;
794         oparms.path = full_path;
795         oparms.fid = &cfile->fid;
796         oparms.reconnect = true;
797
798         /*
799          * Can not refresh inode by passing in file_info buf to be returned by
800          * ops->open and then calling get_inode_info with returned buf since
801          * file might have write behind data that needs to be flushed and server
802          * version of file size can be stale. If we knew for sure that inode was
803          * not dirty locally we could do this.
804          */
805         rc = server->ops->open(xid, &oparms, &oplock, NULL);
806         if (rc == -ENOENT && oparms.reconnect == false) {
807                 /* durable handle timeout is expired - open the file again */
808                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
809                 /* indicate that we need to relock the file */
810                 oparms.reconnect = true;
811         }
812
813         if (rc) {
814                 mutex_unlock(&cfile->fh_mutex);
815                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
816                 cifs_dbg(FYI, "oplock: %d\n", oplock);
817                 goto reopen_error_exit;
818         }
819
820 reopen_success:
821         cfile->invalidHandle = false;
822         mutex_unlock(&cfile->fh_mutex);
823         cinode = CIFS_I(inode);
824
825         if (can_flush) {
826                 rc = filemap_write_and_wait(inode->i_mapping);
827                 if (!is_interrupt_error(rc))
828                         mapping_set_error(inode->i_mapping, rc);
829
830                 if (tcon->posix_extensions)
831                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
832                 else if (tcon->unix_ext)
833                         rc = cifs_get_inode_info_unix(&inode, full_path,
834                                                       inode->i_sb, xid);
835                 else
836                         rc = cifs_get_inode_info(&inode, full_path, NULL,
837                                                  inode->i_sb, xid, NULL);
838         }
839         /*
840          * Else we are writing out data to server already and could deadlock if
841          * we tried to flush data, and since we do not know if we have data that
842          * would invalidate the current end of file on the server we can not go
843          * to the server to get the new inode info.
844          */
845
846         /*
847          * If the server returned a read oplock and we have mandatory brlocks,
848          * set oplock level to None.
849          */
850         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
851                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
852                 oplock = 0;
853         }
854
855         server->ops->set_fid(cfile, &cfile->fid, oplock);
856         if (oparms.reconnect)
857                 cifs_relock_file(cfile);
858
859 reopen_error_exit:
860         free_dentry_path(page);
861         free_xid(xid);
862         return rc;
863 }
864
865 void smb2_deferred_work_close(struct work_struct *work)
866 {
867         struct cifsFileInfo *cfile = container_of(work,
868                         struct cifsFileInfo, deferred.work);
869
870         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
871         cifs_del_deferred_close(cfile);
872         cfile->deferred_close_scheduled = false;
873         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
874         _cifsFileInfo_put(cfile, true, false);
875 }
876
877 int cifs_close(struct inode *inode, struct file *file)
878 {
879         struct cifsFileInfo *cfile;
880         struct cifsInodeInfo *cinode = CIFS_I(inode);
881         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
882         struct cifs_deferred_close *dclose;
883
884         cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
885
886         if (file->private_data != NULL) {
887                 cfile = file->private_data;
888                 file->private_data = NULL;
889                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
890                 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
891                     cinode->lease_granted &&
892                     !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
893                     dclose) {
894                         if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
895                                 inode->i_ctime = inode->i_mtime = current_time(inode);
896                         }
897                         spin_lock(&cinode->deferred_lock);
898                         cifs_add_deferred_close(cfile, dclose);
899                         if (cfile->deferred_close_scheduled &&
900                             delayed_work_pending(&cfile->deferred)) {
901                                 /*
902                                  * If there is no pending work, mod_delayed_work queues new work.
903                                  * So, Increase the ref count to avoid use-after-free.
904                                  */
905                                 if (!mod_delayed_work(deferredclose_wq,
906                                                 &cfile->deferred, cifs_sb->ctx->acregmax))
907                                         cifsFileInfo_get(cfile);
908                         } else {
909                                 /* Deferred close for files */
910                                 queue_delayed_work(deferredclose_wq,
911                                                 &cfile->deferred, cifs_sb->ctx->acregmax);
912                                 cfile->deferred_close_scheduled = true;
913                                 spin_unlock(&cinode->deferred_lock);
914                                 return 0;
915                         }
916                         spin_unlock(&cinode->deferred_lock);
917                         _cifsFileInfo_put(cfile, true, false);
918                 } else {
919                         _cifsFileInfo_put(cfile, true, false);
920                         kfree(dclose);
921                 }
922         }
923
924         /* return code from the ->release op is always ignored */
925         return 0;
926 }
927
928 void
929 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
930 {
931         struct cifsFileInfo *open_file;
932         struct list_head *tmp;
933         struct list_head *tmp1;
934         struct list_head tmp_list;
935
936         if (!tcon->use_persistent || !tcon->need_reopen_files)
937                 return;
938
939         tcon->need_reopen_files = false;
940
941         cifs_dbg(FYI, "Reopen persistent handles\n");
942         INIT_LIST_HEAD(&tmp_list);
943
944         /* list all files open on tree connection, reopen resilient handles  */
945         spin_lock(&tcon->open_file_lock);
946         list_for_each(tmp, &tcon->openFileList) {
947                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
948                 if (!open_file->invalidHandle)
949                         continue;
950                 cifsFileInfo_get(open_file);
951                 list_add_tail(&open_file->rlist, &tmp_list);
952         }
953         spin_unlock(&tcon->open_file_lock);
954
955         list_for_each_safe(tmp, tmp1, &tmp_list) {
956                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
957                 if (cifs_reopen_file(open_file, false /* do not flush */))
958                         tcon->need_reopen_files = true;
959                 list_del_init(&open_file->rlist);
960                 cifsFileInfo_put(open_file);
961         }
962 }
963
964 int cifs_closedir(struct inode *inode, struct file *file)
965 {
966         int rc = 0;
967         unsigned int xid;
968         struct cifsFileInfo *cfile = file->private_data;
969         struct cifs_tcon *tcon;
970         struct TCP_Server_Info *server;
971         char *buf;
972
973         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
974
975         if (cfile == NULL)
976                 return rc;
977
978         xid = get_xid();
979         tcon = tlink_tcon(cfile->tlink);
980         server = tcon->ses->server;
981
982         cifs_dbg(FYI, "Freeing private data in close dir\n");
983         spin_lock(&cfile->file_info_lock);
984         if (server->ops->dir_needs_close(cfile)) {
985                 cfile->invalidHandle = true;
986                 spin_unlock(&cfile->file_info_lock);
987                 if (server->ops->close_dir)
988                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
989                 else
990                         rc = -ENOSYS;
991                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
992                 /* not much we can do if it fails anyway, ignore rc */
993                 rc = 0;
994         } else
995                 spin_unlock(&cfile->file_info_lock);
996
997         buf = cfile->srch_inf.ntwrk_buf_start;
998         if (buf) {
999                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1000                 cfile->srch_inf.ntwrk_buf_start = NULL;
1001                 if (cfile->srch_inf.smallBuf)
1002                         cifs_small_buf_release(buf);
1003                 else
1004                         cifs_buf_release(buf);
1005         }
1006
1007         cifs_put_tlink(cfile->tlink);
1008         kfree(file->private_data);
1009         file->private_data = NULL;
1010         /* BB can we lock the filestruct while this is going on? */
1011         free_xid(xid);
1012         return rc;
1013 }
1014
1015 static struct cifsLockInfo *
1016 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1017 {
1018         struct cifsLockInfo *lock =
1019                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1020         if (!lock)
1021                 return lock;
1022         lock->offset = offset;
1023         lock->length = length;
1024         lock->type = type;
1025         lock->pid = current->tgid;
1026         lock->flags = flags;
1027         INIT_LIST_HEAD(&lock->blist);
1028         init_waitqueue_head(&lock->block_q);
1029         return lock;
1030 }
1031
1032 void
1033 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1034 {
1035         struct cifsLockInfo *li, *tmp;
1036         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1037                 list_del_init(&li->blist);
1038                 wake_up(&li->block_q);
1039         }
1040 }
1041
1042 #define CIFS_LOCK_OP    0
1043 #define CIFS_READ_OP    1
1044 #define CIFS_WRITE_OP   2
1045
1046 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1047 static bool
1048 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1049                             __u64 length, __u8 type, __u16 flags,
1050                             struct cifsFileInfo *cfile,
1051                             struct cifsLockInfo **conf_lock, int rw_check)
1052 {
1053         struct cifsLockInfo *li;
1054         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1055         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1056
1057         list_for_each_entry(li, &fdlocks->locks, llist) {
1058                 if (offset + length <= li->offset ||
1059                     offset >= li->offset + li->length)
1060                         continue;
1061                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1062                     server->ops->compare_fids(cfile, cur_cfile)) {
1063                         /* shared lock prevents write op through the same fid */
1064                         if (!(li->type & server->vals->shared_lock_type) ||
1065                             rw_check != CIFS_WRITE_OP)
1066                                 continue;
1067                 }
1068                 if ((type & server->vals->shared_lock_type) &&
1069                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1070                      current->tgid == li->pid) || type == li->type))
1071                         continue;
1072                 if (rw_check == CIFS_LOCK_OP &&
1073                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1074                     server->ops->compare_fids(cfile, cur_cfile))
1075                         continue;
1076                 if (conf_lock)
1077                         *conf_lock = li;
1078                 return true;
1079         }
1080         return false;
1081 }
1082
1083 bool
1084 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1085                         __u8 type, __u16 flags,
1086                         struct cifsLockInfo **conf_lock, int rw_check)
1087 {
1088         bool rc = false;
1089         struct cifs_fid_locks *cur;
1090         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1091
1092         list_for_each_entry(cur, &cinode->llist, llist) {
1093                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1094                                                  flags, cfile, conf_lock,
1095                                                  rw_check);
1096                 if (rc)
1097                         break;
1098         }
1099
1100         return rc;
1101 }
1102
1103 /*
1104  * Check if there is another lock that prevents us to set the lock (mandatory
1105  * style). If such a lock exists, update the flock structure with its
1106  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1107  * or leave it the same if we can't. Returns 0 if we don't need to request to
1108  * the server or 1 otherwise.
1109  */
1110 static int
1111 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1112                __u8 type, struct file_lock *flock)
1113 {
1114         int rc = 0;
1115         struct cifsLockInfo *conf_lock;
1116         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1117         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1118         bool exist;
1119
1120         down_read(&cinode->lock_sem);
1121
1122         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1123                                         flock->fl_flags, &conf_lock,
1124                                         CIFS_LOCK_OP);
1125         if (exist) {
1126                 flock->fl_start = conf_lock->offset;
1127                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1128                 flock->fl_pid = conf_lock->pid;
1129                 if (conf_lock->type & server->vals->shared_lock_type)
1130                         flock->fl_type = F_RDLCK;
1131                 else
1132                         flock->fl_type = F_WRLCK;
1133         } else if (!cinode->can_cache_brlcks)
1134                 rc = 1;
1135         else
1136                 flock->fl_type = F_UNLCK;
1137
1138         up_read(&cinode->lock_sem);
1139         return rc;
1140 }
1141
1142 static void
1143 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1144 {
1145         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1146         cifs_down_write(&cinode->lock_sem);
1147         list_add_tail(&lock->llist, &cfile->llist->locks);
1148         up_write(&cinode->lock_sem);
1149 }
1150
1151 /*
1152  * Set the byte-range lock (mandatory style). Returns:
1153  * 1) 0, if we set the lock and don't need to request to the server;
1154  * 2) 1, if no locks prevent us but we need to request to the server;
1155  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1156  */
1157 static int
1158 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1159                  bool wait)
1160 {
1161         struct cifsLockInfo *conf_lock;
1162         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1163         bool exist;
1164         int rc = 0;
1165
1166 try_again:
1167         exist = false;
1168         cifs_down_write(&cinode->lock_sem);
1169
1170         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1171                                         lock->type, lock->flags, &conf_lock,
1172                                         CIFS_LOCK_OP);
1173         if (!exist && cinode->can_cache_brlcks) {
1174                 list_add_tail(&lock->llist, &cfile->llist->locks);
1175                 up_write(&cinode->lock_sem);
1176                 return rc;
1177         }
1178
1179         if (!exist)
1180                 rc = 1;
1181         else if (!wait)
1182                 rc = -EACCES;
1183         else {
1184                 list_add_tail(&lock->blist, &conf_lock->blist);
1185                 up_write(&cinode->lock_sem);
1186                 rc = wait_event_interruptible(lock->block_q,
1187                                         (lock->blist.prev == &lock->blist) &&
1188                                         (lock->blist.next == &lock->blist));
1189                 if (!rc)
1190                         goto try_again;
1191                 cifs_down_write(&cinode->lock_sem);
1192                 list_del_init(&lock->blist);
1193         }
1194
1195         up_write(&cinode->lock_sem);
1196         return rc;
1197 }
1198
1199 /*
1200  * Check if there is another lock that prevents us to set the lock (posix
1201  * style). If such a lock exists, update the flock structure with its
1202  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1203  * or leave it the same if we can't. Returns 0 if we don't need to request to
1204  * the server or 1 otherwise.
1205  */
1206 static int
1207 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1208 {
1209         int rc = 0;
1210         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1211         unsigned char saved_type = flock->fl_type;
1212
1213         if ((flock->fl_flags & FL_POSIX) == 0)
1214                 return 1;
1215
1216         down_read(&cinode->lock_sem);
1217         posix_test_lock(file, flock);
1218
1219         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1220                 flock->fl_type = saved_type;
1221                 rc = 1;
1222         }
1223
1224         up_read(&cinode->lock_sem);
1225         return rc;
1226 }
1227
1228 /*
1229  * Set the byte-range lock (posix style). Returns:
1230  * 1) <0, if the error occurs while setting the lock;
1231  * 2) 0, if we set the lock and don't need to request to the server;
1232  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1233  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1234  */
1235 static int
1236 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1237 {
1238         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1239         int rc = FILE_LOCK_DEFERRED + 1;
1240
1241         if ((flock->fl_flags & FL_POSIX) == 0)
1242                 return rc;
1243
1244         cifs_down_write(&cinode->lock_sem);
1245         if (!cinode->can_cache_brlcks) {
1246                 up_write(&cinode->lock_sem);
1247                 return rc;
1248         }
1249
1250         rc = posix_lock_file(file, flock, NULL);
1251         up_write(&cinode->lock_sem);
1252         return rc;
1253 }
1254
1255 int
1256 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1257 {
1258         unsigned int xid;
1259         int rc = 0, stored_rc;
1260         struct cifsLockInfo *li, *tmp;
1261         struct cifs_tcon *tcon;
1262         unsigned int num, max_num, max_buf;
1263         LOCKING_ANDX_RANGE *buf, *cur;
1264         static const int types[] = {
1265                 LOCKING_ANDX_LARGE_FILES,
1266                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1267         };
1268         int i;
1269
1270         xid = get_xid();
1271         tcon = tlink_tcon(cfile->tlink);
1272
1273         /*
1274          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1275          * and check it before using.
1276          */
1277         max_buf = tcon->ses->server->maxBuf;
1278         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1279                 free_xid(xid);
1280                 return -EINVAL;
1281         }
1282
1283         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1284                      PAGE_SIZE);
1285         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1286                         PAGE_SIZE);
1287         max_num = (max_buf - sizeof(struct smb_hdr)) /
1288                                                 sizeof(LOCKING_ANDX_RANGE);
1289         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1290         if (!buf) {
1291                 free_xid(xid);
1292                 return -ENOMEM;
1293         }
1294
1295         for (i = 0; i < 2; i++) {
1296                 cur = buf;
1297                 num = 0;
1298                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1299                         if (li->type != types[i])
1300                                 continue;
1301                         cur->Pid = cpu_to_le16(li->pid);
1302                         cur->LengthLow = cpu_to_le32((u32)li->length);
1303                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1304                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1305                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1306                         if (++num == max_num) {
1307                                 stored_rc = cifs_lockv(xid, tcon,
1308                                                        cfile->fid.netfid,
1309                                                        (__u8)li->type, 0, num,
1310                                                        buf);
1311                                 if (stored_rc)
1312                                         rc = stored_rc;
1313                                 cur = buf;
1314                                 num = 0;
1315                         } else
1316                                 cur++;
1317                 }
1318
1319                 if (num) {
1320                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1321                                                (__u8)types[i], 0, num, buf);
1322                         if (stored_rc)
1323                                 rc = stored_rc;
1324                 }
1325         }
1326
1327         kfree(buf);
1328         free_xid(xid);
1329         return rc;
1330 }
1331
1332 static __u32
1333 hash_lockowner(fl_owner_t owner)
1334 {
1335         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1336 }
1337
1338 struct lock_to_push {
1339         struct list_head llist;
1340         __u64 offset;
1341         __u64 length;
1342         __u32 pid;
1343         __u16 netfid;
1344         __u8 type;
1345 };
1346
1347 static int
1348 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1349 {
1350         struct inode *inode = d_inode(cfile->dentry);
1351         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1352         struct file_lock *flock;
1353         struct file_lock_context *flctx = inode->i_flctx;
1354         unsigned int count = 0, i;
1355         int rc = 0, xid, type;
1356         struct list_head locks_to_send, *el;
1357         struct lock_to_push *lck, *tmp;
1358         __u64 length;
1359
1360         xid = get_xid();
1361
1362         if (!flctx)
1363                 goto out;
1364
1365         spin_lock(&flctx->flc_lock);
1366         list_for_each(el, &flctx->flc_posix) {
1367                 count++;
1368         }
1369         spin_unlock(&flctx->flc_lock);
1370
1371         INIT_LIST_HEAD(&locks_to_send);
1372
1373         /*
1374          * Allocating count locks is enough because no FL_POSIX locks can be
1375          * added to the list while we are holding cinode->lock_sem that
1376          * protects locking operations of this inode.
1377          */
1378         for (i = 0; i < count; i++) {
1379                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1380                 if (!lck) {
1381                         rc = -ENOMEM;
1382                         goto err_out;
1383                 }
1384                 list_add_tail(&lck->llist, &locks_to_send);
1385         }
1386
1387         el = locks_to_send.next;
1388         spin_lock(&flctx->flc_lock);
1389         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1390                 if (el == &locks_to_send) {
1391                         /*
1392                          * The list ended. We don't have enough allocated
1393                          * structures - something is really wrong.
1394                          */
1395                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1396                         break;
1397                 }
1398                 length = 1 + flock->fl_end - flock->fl_start;
1399                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1400                         type = CIFS_RDLCK;
1401                 else
1402                         type = CIFS_WRLCK;
1403                 lck = list_entry(el, struct lock_to_push, llist);
1404                 lck->pid = hash_lockowner(flock->fl_owner);
1405                 lck->netfid = cfile->fid.netfid;
1406                 lck->length = length;
1407                 lck->type = type;
1408                 lck->offset = flock->fl_start;
1409         }
1410         spin_unlock(&flctx->flc_lock);
1411
1412         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1413                 int stored_rc;
1414
1415                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1416                                              lck->offset, lck->length, NULL,
1417                                              lck->type, 0);
1418                 if (stored_rc)
1419                         rc = stored_rc;
1420                 list_del(&lck->llist);
1421                 kfree(lck);
1422         }
1423
1424 out:
1425         free_xid(xid);
1426         return rc;
1427 err_out:
1428         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1429                 list_del(&lck->llist);
1430                 kfree(lck);
1431         }
1432         goto out;
1433 }
1434
1435 static int
1436 cifs_push_locks(struct cifsFileInfo *cfile)
1437 {
1438         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1439         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1440         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1441         int rc = 0;
1442
1443         /* we are going to update can_cache_brlcks here - need a write access */
1444         cifs_down_write(&cinode->lock_sem);
1445         if (!cinode->can_cache_brlcks) {
1446                 up_write(&cinode->lock_sem);
1447                 return rc;
1448         }
1449
1450         if (cap_unix(tcon->ses) &&
1451             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1452             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1453                 rc = cifs_push_posix_locks(cfile);
1454         else
1455                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1456
1457         cinode->can_cache_brlcks = false;
1458         up_write(&cinode->lock_sem);
1459         return rc;
1460 }
1461
1462 static void
1463 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1464                 bool *wait_flag, struct TCP_Server_Info *server)
1465 {
1466         if (flock->fl_flags & FL_POSIX)
1467                 cifs_dbg(FYI, "Posix\n");
1468         if (flock->fl_flags & FL_FLOCK)
1469                 cifs_dbg(FYI, "Flock\n");
1470         if (flock->fl_flags & FL_SLEEP) {
1471                 cifs_dbg(FYI, "Blocking lock\n");
1472                 *wait_flag = true;
1473         }
1474         if (flock->fl_flags & FL_ACCESS)
1475                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1476         if (flock->fl_flags & FL_LEASE)
1477                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1478         if (flock->fl_flags &
1479             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1480                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1481                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1482
1483         *type = server->vals->large_lock_type;
1484         if (flock->fl_type == F_WRLCK) {
1485                 cifs_dbg(FYI, "F_WRLCK\n");
1486                 *type |= server->vals->exclusive_lock_type;
1487                 *lock = 1;
1488         } else if (flock->fl_type == F_UNLCK) {
1489                 cifs_dbg(FYI, "F_UNLCK\n");
1490                 *type |= server->vals->unlock_lock_type;
1491                 *unlock = 1;
1492                 /* Check if unlock includes more than one lock range */
1493         } else if (flock->fl_type == F_RDLCK) {
1494                 cifs_dbg(FYI, "F_RDLCK\n");
1495                 *type |= server->vals->shared_lock_type;
1496                 *lock = 1;
1497         } else if (flock->fl_type == F_EXLCK) {
1498                 cifs_dbg(FYI, "F_EXLCK\n");
1499                 *type |= server->vals->exclusive_lock_type;
1500                 *lock = 1;
1501         } else if (flock->fl_type == F_SHLCK) {
1502                 cifs_dbg(FYI, "F_SHLCK\n");
1503                 *type |= server->vals->shared_lock_type;
1504                 *lock = 1;
1505         } else
1506                 cifs_dbg(FYI, "Unknown type of lock\n");
1507 }
1508
1509 static int
1510 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1511            bool wait_flag, bool posix_lck, unsigned int xid)
1512 {
1513         int rc = 0;
1514         __u64 length = 1 + flock->fl_end - flock->fl_start;
1515         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1516         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1517         struct TCP_Server_Info *server = tcon->ses->server;
1518         __u16 netfid = cfile->fid.netfid;
1519
1520         if (posix_lck) {
1521                 int posix_lock_type;
1522
1523                 rc = cifs_posix_lock_test(file, flock);
1524                 if (!rc)
1525                         return rc;
1526
1527                 if (type & server->vals->shared_lock_type)
1528                         posix_lock_type = CIFS_RDLCK;
1529                 else
1530                         posix_lock_type = CIFS_WRLCK;
1531                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1532                                       hash_lockowner(flock->fl_owner),
1533                                       flock->fl_start, length, flock,
1534                                       posix_lock_type, wait_flag);
1535                 return rc;
1536         }
1537
1538         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1539         if (!rc)
1540                 return rc;
1541
1542         /* BB we could chain these into one lock request BB */
1543         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1544                                     1, 0, false);
1545         if (rc == 0) {
1546                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1547                                             type, 0, 1, false);
1548                 flock->fl_type = F_UNLCK;
1549                 if (rc != 0)
1550                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1551                                  rc);
1552                 return 0;
1553         }
1554
1555         if (type & server->vals->shared_lock_type) {
1556                 flock->fl_type = F_WRLCK;
1557                 return 0;
1558         }
1559
1560         type &= ~server->vals->exclusive_lock_type;
1561
1562         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1563                                     type | server->vals->shared_lock_type,
1564                                     1, 0, false);
1565         if (rc == 0) {
1566                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1567                         type | server->vals->shared_lock_type, 0, 1, false);
1568                 flock->fl_type = F_RDLCK;
1569                 if (rc != 0)
1570                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1571                                  rc);
1572         } else
1573                 flock->fl_type = F_WRLCK;
1574
1575         return 0;
1576 }
1577
1578 void
1579 cifs_move_llist(struct list_head *source, struct list_head *dest)
1580 {
1581         struct list_head *li, *tmp;
1582         list_for_each_safe(li, tmp, source)
1583                 list_move(li, dest);
1584 }
1585
1586 void
1587 cifs_free_llist(struct list_head *llist)
1588 {
1589         struct cifsLockInfo *li, *tmp;
1590         list_for_each_entry_safe(li, tmp, llist, llist) {
1591                 cifs_del_lock_waiters(li);
1592                 list_del(&li->llist);
1593                 kfree(li);
1594         }
1595 }
1596
1597 int
1598 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1599                   unsigned int xid)
1600 {
1601         int rc = 0, stored_rc;
1602         static const int types[] = {
1603                 LOCKING_ANDX_LARGE_FILES,
1604                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1605         };
1606         unsigned int i;
1607         unsigned int max_num, num, max_buf;
1608         LOCKING_ANDX_RANGE *buf, *cur;
1609         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1610         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1611         struct cifsLockInfo *li, *tmp;
1612         __u64 length = 1 + flock->fl_end - flock->fl_start;
1613         struct list_head tmp_llist;
1614
1615         INIT_LIST_HEAD(&tmp_llist);
1616
1617         /*
1618          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1619          * and check it before using.
1620          */
1621         max_buf = tcon->ses->server->maxBuf;
1622         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1623                 return -EINVAL;
1624
1625         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1626                      PAGE_SIZE);
1627         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1628                         PAGE_SIZE);
1629         max_num = (max_buf - sizeof(struct smb_hdr)) /
1630                                                 sizeof(LOCKING_ANDX_RANGE);
1631         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1632         if (!buf)
1633                 return -ENOMEM;
1634
1635         cifs_down_write(&cinode->lock_sem);
1636         for (i = 0; i < 2; i++) {
1637                 cur = buf;
1638                 num = 0;
1639                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1640                         if (flock->fl_start > li->offset ||
1641                             (flock->fl_start + length) <
1642                             (li->offset + li->length))
1643                                 continue;
1644                         if (current->tgid != li->pid)
1645                                 continue;
1646                         if (types[i] != li->type)
1647                                 continue;
1648                         if (cinode->can_cache_brlcks) {
1649                                 /*
1650                                  * We can cache brlock requests - simply remove
1651                                  * a lock from the file's list.
1652                                  */
1653                                 list_del(&li->llist);
1654                                 cifs_del_lock_waiters(li);
1655                                 kfree(li);
1656                                 continue;
1657                         }
1658                         cur->Pid = cpu_to_le16(li->pid);
1659                         cur->LengthLow = cpu_to_le32((u32)li->length);
1660                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1661                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1662                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1663                         /*
1664                          * We need to save a lock here to let us add it again to
1665                          * the file's list if the unlock range request fails on
1666                          * the server.
1667                          */
1668                         list_move(&li->llist, &tmp_llist);
1669                         if (++num == max_num) {
1670                                 stored_rc = cifs_lockv(xid, tcon,
1671                                                        cfile->fid.netfid,
1672                                                        li->type, num, 0, buf);
1673                                 if (stored_rc) {
1674                                         /*
1675                                          * We failed on the unlock range
1676                                          * request - add all locks from the tmp
1677                                          * list to the head of the file's list.
1678                                          */
1679                                         cifs_move_llist(&tmp_llist,
1680                                                         &cfile->llist->locks);
1681                                         rc = stored_rc;
1682                                 } else
1683                                         /*
1684                                          * The unlock range request succeed -
1685                                          * free the tmp list.
1686                                          */
1687                                         cifs_free_llist(&tmp_llist);
1688                                 cur = buf;
1689                                 num = 0;
1690                         } else
1691                                 cur++;
1692                 }
1693                 if (num) {
1694                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1695                                                types[i], num, 0, buf);
1696                         if (stored_rc) {
1697                                 cifs_move_llist(&tmp_llist,
1698                                                 &cfile->llist->locks);
1699                                 rc = stored_rc;
1700                         } else
1701                                 cifs_free_llist(&tmp_llist);
1702                 }
1703         }
1704
1705         up_write(&cinode->lock_sem);
1706         kfree(buf);
1707         return rc;
1708 }
1709
1710 static int
1711 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1712            bool wait_flag, bool posix_lck, int lock, int unlock,
1713            unsigned int xid)
1714 {
1715         int rc = 0;
1716         __u64 length = 1 + flock->fl_end - flock->fl_start;
1717         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1718         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1719         struct TCP_Server_Info *server = tcon->ses->server;
1720         struct inode *inode = d_inode(cfile->dentry);
1721
1722         if (posix_lck) {
1723                 int posix_lock_type;
1724
1725                 rc = cifs_posix_lock_set(file, flock);
1726                 if (rc <= FILE_LOCK_DEFERRED)
1727                         return rc;
1728
1729                 if (type & server->vals->shared_lock_type)
1730                         posix_lock_type = CIFS_RDLCK;
1731                 else
1732                         posix_lock_type = CIFS_WRLCK;
1733
1734                 if (unlock == 1)
1735                         posix_lock_type = CIFS_UNLCK;
1736
1737                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1738                                       hash_lockowner(flock->fl_owner),
1739                                       flock->fl_start, length,
1740                                       NULL, posix_lock_type, wait_flag);
1741                 goto out;
1742         }
1743
1744         if (lock) {
1745                 struct cifsLockInfo *lock;
1746
1747                 lock = cifs_lock_init(flock->fl_start, length, type,
1748                                       flock->fl_flags);
1749                 if (!lock)
1750                         return -ENOMEM;
1751
1752                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1753                 if (rc < 0) {
1754                         kfree(lock);
1755                         return rc;
1756                 }
1757                 if (!rc)
1758                         goto out;
1759
1760                 /*
1761                  * Windows 7 server can delay breaking lease from read to None
1762                  * if we set a byte-range lock on a file - break it explicitly
1763                  * before sending the lock to the server to be sure the next
1764                  * read won't conflict with non-overlapted locks due to
1765                  * pagereading.
1766                  */
1767                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1768                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1769                         cifs_zap_mapping(inode);
1770                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1771                                  inode);
1772                         CIFS_I(inode)->oplock = 0;
1773                 }
1774
1775                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1776                                             type, 1, 0, wait_flag);
1777                 if (rc) {
1778                         kfree(lock);
1779                         return rc;
1780                 }
1781
1782                 cifs_lock_add(cfile, lock);
1783         } else if (unlock)
1784                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1785
1786 out:
1787         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1788                 /*
1789                  * If this is a request to remove all locks because we
1790                  * are closing the file, it doesn't matter if the
1791                  * unlocking failed as both cifs.ko and the SMB server
1792                  * remove the lock on file close
1793                  */
1794                 if (rc) {
1795                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1796                         if (!(flock->fl_flags & FL_CLOSE))
1797                                 return rc;
1798                 }
1799                 rc = locks_lock_file_wait(file, flock);
1800         }
1801         return rc;
1802 }
1803
1804 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1805 {
1806         int rc, xid;
1807         int lock = 0, unlock = 0;
1808         bool wait_flag = false;
1809         bool posix_lck = false;
1810         struct cifs_sb_info *cifs_sb;
1811         struct cifs_tcon *tcon;
1812         struct cifsFileInfo *cfile;
1813         __u32 type;
1814
1815         rc = -EACCES;
1816         xid = get_xid();
1817
1818         if (!(fl->fl_flags & FL_FLOCK))
1819                 return -ENOLCK;
1820
1821         cfile = (struct cifsFileInfo *)file->private_data;
1822         tcon = tlink_tcon(cfile->tlink);
1823
1824         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1825                         tcon->ses->server);
1826         cifs_sb = CIFS_FILE_SB(file);
1827
1828         if (cap_unix(tcon->ses) &&
1829             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1830             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1831                 posix_lck = true;
1832
1833         if (!lock && !unlock) {
1834                 /*
1835                  * if no lock or unlock then nothing to do since we do not
1836                  * know what it is
1837                  */
1838                 free_xid(xid);
1839                 return -EOPNOTSUPP;
1840         }
1841
1842         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1843                         xid);
1844         free_xid(xid);
1845         return rc;
1846
1847
1848 }
1849
1850 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1851 {
1852         int rc, xid;
1853         int lock = 0, unlock = 0;
1854         bool wait_flag = false;
1855         bool posix_lck = false;
1856         struct cifs_sb_info *cifs_sb;
1857         struct cifs_tcon *tcon;
1858         struct cifsFileInfo *cfile;
1859         __u32 type;
1860
1861         rc = -EACCES;
1862         xid = get_xid();
1863
1864         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1865                  cmd, flock->fl_flags, flock->fl_type,
1866                  flock->fl_start, flock->fl_end);
1867
1868         cfile = (struct cifsFileInfo *)file->private_data;
1869         tcon = tlink_tcon(cfile->tlink);
1870
1871         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1872                         tcon->ses->server);
1873         cifs_sb = CIFS_FILE_SB(file);
1874         set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
1875
1876         if (cap_unix(tcon->ses) &&
1877             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1878             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1879                 posix_lck = true;
1880         /*
1881          * BB add code here to normalize offset and length to account for
1882          * negative length which we can not accept over the wire.
1883          */
1884         if (IS_GETLK(cmd)) {
1885                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1886                 free_xid(xid);
1887                 return rc;
1888         }
1889
1890         if (!lock && !unlock) {
1891                 /*
1892                  * if no lock or unlock then nothing to do since we do not
1893                  * know what it is
1894                  */
1895                 free_xid(xid);
1896                 return -EOPNOTSUPP;
1897         }
1898
1899         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1900                         xid);
1901         free_xid(xid);
1902         return rc;
1903 }
1904
1905 /*
1906  * update the file size (if needed) after a write. Should be called with
1907  * the inode->i_lock held
1908  */
1909 void
1910 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1911                       unsigned int bytes_written)
1912 {
1913         loff_t end_of_write = offset + bytes_written;
1914
1915         if (end_of_write > cifsi->server_eof)
1916                 cifsi->server_eof = end_of_write;
1917 }
1918
1919 static ssize_t
1920 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1921            size_t write_size, loff_t *offset)
1922 {
1923         int rc = 0;
1924         unsigned int bytes_written = 0;
1925         unsigned int total_written;
1926         struct cifs_tcon *tcon;
1927         struct TCP_Server_Info *server;
1928         unsigned int xid;
1929         struct dentry *dentry = open_file->dentry;
1930         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1931         struct cifs_io_parms io_parms = {0};
1932
1933         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1934                  write_size, *offset, dentry);
1935
1936         tcon = tlink_tcon(open_file->tlink);
1937         server = tcon->ses->server;
1938
1939         if (!server->ops->sync_write)
1940                 return -ENOSYS;
1941
1942         xid = get_xid();
1943
1944         for (total_written = 0; write_size > total_written;
1945              total_written += bytes_written) {
1946                 rc = -EAGAIN;
1947                 while (rc == -EAGAIN) {
1948                         struct kvec iov[2];
1949                         unsigned int len;
1950
1951                         if (open_file->invalidHandle) {
1952                                 /* we could deadlock if we called
1953                                    filemap_fdatawait from here so tell
1954                                    reopen_file not to flush data to
1955                                    server now */
1956                                 rc = cifs_reopen_file(open_file, false);
1957                                 if (rc != 0)
1958                                         break;
1959                         }
1960
1961                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1962                                   (unsigned int)write_size - total_written);
1963                         /* iov[0] is reserved for smb header */
1964                         iov[1].iov_base = (char *)write_data + total_written;
1965                         iov[1].iov_len = len;
1966                         io_parms.pid = pid;
1967                         io_parms.tcon = tcon;
1968                         io_parms.offset = *offset;
1969                         io_parms.length = len;
1970                         rc = server->ops->sync_write(xid, &open_file->fid,
1971                                         &io_parms, &bytes_written, iov, 1);
1972                 }
1973                 if (rc || (bytes_written == 0)) {
1974                         if (total_written)
1975                                 break;
1976                         else {
1977                                 free_xid(xid);
1978                                 return rc;
1979                         }
1980                 } else {
1981                         spin_lock(&d_inode(dentry)->i_lock);
1982                         cifs_update_eof(cifsi, *offset, bytes_written);
1983                         spin_unlock(&d_inode(dentry)->i_lock);
1984                         *offset += bytes_written;
1985                 }
1986         }
1987
1988         cifs_stats_bytes_written(tcon, total_written);
1989
1990         if (total_written > 0) {
1991                 spin_lock(&d_inode(dentry)->i_lock);
1992                 if (*offset > d_inode(dentry)->i_size) {
1993                         i_size_write(d_inode(dentry), *offset);
1994                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
1995                 }
1996                 spin_unlock(&d_inode(dentry)->i_lock);
1997         }
1998         mark_inode_dirty_sync(d_inode(dentry));
1999         free_xid(xid);
2000         return total_written;
2001 }
2002
2003 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2004                                         bool fsuid_only)
2005 {
2006         struct cifsFileInfo *open_file = NULL;
2007         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2008
2009         /* only filter by fsuid on multiuser mounts */
2010         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2011                 fsuid_only = false;
2012
2013         spin_lock(&cifs_inode->open_file_lock);
2014         /* we could simply get the first_list_entry since write-only entries
2015            are always at the end of the list but since the first entry might
2016            have a close pending, we go through the whole list */
2017         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2018                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2019                         continue;
2020                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2021                         if ((!open_file->invalidHandle)) {
2022                                 /* found a good file */
2023                                 /* lock it so it will not be closed on us */
2024                                 cifsFileInfo_get(open_file);
2025                                 spin_unlock(&cifs_inode->open_file_lock);
2026                                 return open_file;
2027                         } /* else might as well continue, and look for
2028                              another, or simply have the caller reopen it
2029                              again rather than trying to fix this handle */
2030                 } else /* write only file */
2031                         break; /* write only files are last so must be done */
2032         }
2033         spin_unlock(&cifs_inode->open_file_lock);
2034         return NULL;
2035 }
2036
2037 /* Return -EBADF if no handle is found and general rc otherwise */
2038 int
2039 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2040                        struct cifsFileInfo **ret_file)
2041 {
2042         struct cifsFileInfo *open_file, *inv_file = NULL;
2043         struct cifs_sb_info *cifs_sb;
2044         bool any_available = false;
2045         int rc = -EBADF;
2046         unsigned int refind = 0;
2047         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2048         bool with_delete = flags & FIND_WR_WITH_DELETE;
2049         *ret_file = NULL;
2050
2051         /*
2052          * Having a null inode here (because mapping->host was set to zero by
2053          * the VFS or MM) should not happen but we had reports of on oops (due
2054          * to it being zero) during stress testcases so we need to check for it
2055          */
2056
2057         if (cifs_inode == NULL) {
2058                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2059                 dump_stack();
2060                 return rc;
2061         }
2062
2063         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2064
2065         /* only filter by fsuid on multiuser mounts */
2066         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2067                 fsuid_only = false;
2068
2069         spin_lock(&cifs_inode->open_file_lock);
2070 refind_writable:
2071         if (refind > MAX_REOPEN_ATT) {
2072                 spin_unlock(&cifs_inode->open_file_lock);
2073                 return rc;
2074         }
2075         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2076                 if (!any_available && open_file->pid != current->tgid)
2077                         continue;
2078                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2079                         continue;
2080                 if (with_delete && !(open_file->fid.access & DELETE))
2081                         continue;
2082                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2083                         if (!open_file->invalidHandle) {
2084                                 /* found a good writable file */
2085                                 cifsFileInfo_get(open_file);
2086                                 spin_unlock(&cifs_inode->open_file_lock);
2087                                 *ret_file = open_file;
2088                                 return 0;
2089                         } else {
2090                                 if (!inv_file)
2091                                         inv_file = open_file;
2092                         }
2093                 }
2094         }
2095         /* couldn't find useable FH with same pid, try any available */
2096         if (!any_available) {
2097                 any_available = true;
2098                 goto refind_writable;
2099         }
2100
2101         if (inv_file) {
2102                 any_available = false;
2103                 cifsFileInfo_get(inv_file);
2104         }
2105
2106         spin_unlock(&cifs_inode->open_file_lock);
2107
2108         if (inv_file) {
2109                 rc = cifs_reopen_file(inv_file, false);
2110                 if (!rc) {
2111                         *ret_file = inv_file;
2112                         return 0;
2113                 }
2114
2115                 spin_lock(&cifs_inode->open_file_lock);
2116                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2117                 spin_unlock(&cifs_inode->open_file_lock);
2118                 cifsFileInfo_put(inv_file);
2119                 ++refind;
2120                 inv_file = NULL;
2121                 spin_lock(&cifs_inode->open_file_lock);
2122                 goto refind_writable;
2123         }
2124
2125         return rc;
2126 }
2127
2128 struct cifsFileInfo *
2129 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2130 {
2131         struct cifsFileInfo *cfile;
2132         int rc;
2133
2134         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2135         if (rc)
2136                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2137
2138         return cfile;
2139 }
2140
2141 int
2142 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2143                        int flags,
2144                        struct cifsFileInfo **ret_file)
2145 {
2146         struct cifsFileInfo *cfile;
2147         void *page = alloc_dentry_path();
2148
2149         *ret_file = NULL;
2150
2151         spin_lock(&tcon->open_file_lock);
2152         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2153                 struct cifsInodeInfo *cinode;
2154                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2155                 if (IS_ERR(full_path)) {
2156                         spin_unlock(&tcon->open_file_lock);
2157                         free_dentry_path(page);
2158                         return PTR_ERR(full_path);
2159                 }
2160                 if (strcmp(full_path, name))
2161                         continue;
2162
2163                 cinode = CIFS_I(d_inode(cfile->dentry));
2164                 spin_unlock(&tcon->open_file_lock);
2165                 free_dentry_path(page);
2166                 return cifs_get_writable_file(cinode, flags, ret_file);
2167         }
2168
2169         spin_unlock(&tcon->open_file_lock);
2170         free_dentry_path(page);
2171         return -ENOENT;
2172 }
2173
2174 int
2175 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2176                        struct cifsFileInfo **ret_file)
2177 {
2178         struct cifsFileInfo *cfile;
2179         void *page = alloc_dentry_path();
2180
2181         *ret_file = NULL;
2182
2183         spin_lock(&tcon->open_file_lock);
2184         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2185                 struct cifsInodeInfo *cinode;
2186                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2187                 if (IS_ERR(full_path)) {
2188                         spin_unlock(&tcon->open_file_lock);
2189                         free_dentry_path(page);
2190                         return PTR_ERR(full_path);
2191                 }
2192                 if (strcmp(full_path, name))
2193                         continue;
2194
2195                 cinode = CIFS_I(d_inode(cfile->dentry));
2196                 spin_unlock(&tcon->open_file_lock);
2197                 free_dentry_path(page);
2198                 *ret_file = find_readable_file(cinode, 0);
2199                 return *ret_file ? 0 : -ENOENT;
2200         }
2201
2202         spin_unlock(&tcon->open_file_lock);
2203         free_dentry_path(page);
2204         return -ENOENT;
2205 }
2206
2207 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2208 {
2209         struct address_space *mapping = page->mapping;
2210         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2211         char *write_data;
2212         int rc = -EFAULT;
2213         int bytes_written = 0;
2214         struct inode *inode;
2215         struct cifsFileInfo *open_file;
2216
2217         if (!mapping || !mapping->host)
2218                 return -EFAULT;
2219
2220         inode = page->mapping->host;
2221
2222         offset += (loff_t)from;
2223         write_data = kmap(page);
2224         write_data += from;
2225
2226         if ((to > PAGE_SIZE) || (from > to)) {
2227                 kunmap(page);
2228                 return -EIO;
2229         }
2230
2231         /* racing with truncate? */
2232         if (offset > mapping->host->i_size) {
2233                 kunmap(page);
2234                 return 0; /* don't care */
2235         }
2236
2237         /* check to make sure that we are not extending the file */
2238         if (mapping->host->i_size - offset < (loff_t)to)
2239                 to = (unsigned)(mapping->host->i_size - offset);
2240
2241         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2242                                     &open_file);
2243         if (!rc) {
2244                 bytes_written = cifs_write(open_file, open_file->pid,
2245                                            write_data, to - from, &offset);
2246                 cifsFileInfo_put(open_file);
2247                 /* Does mm or vfs already set times? */
2248                 inode->i_atime = inode->i_mtime = current_time(inode);
2249                 if ((bytes_written > 0) && (offset))
2250                         rc = 0;
2251                 else if (bytes_written < 0)
2252                         rc = bytes_written;
2253                 else
2254                         rc = -EFAULT;
2255         } else {
2256                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2257                 if (!is_retryable_error(rc))
2258                         rc = -EIO;
2259         }
2260
2261         kunmap(page);
2262         return rc;
2263 }
2264
2265 static struct cifs_writedata *
2266 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2267                           pgoff_t end, pgoff_t *index,
2268                           unsigned int *found_pages)
2269 {
2270         struct cifs_writedata *wdata;
2271
2272         wdata = cifs_writedata_alloc((unsigned int)tofind,
2273                                      cifs_writev_complete);
2274         if (!wdata)
2275                 return NULL;
2276
2277         *found_pages = find_get_pages_range_tag(mapping, index, end,
2278                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2279         return wdata;
2280 }
2281
2282 static unsigned int
2283 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2284                     struct address_space *mapping,
2285                     struct writeback_control *wbc,
2286                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2287 {
2288         unsigned int nr_pages = 0, i;
2289         struct page *page;
2290
2291         for (i = 0; i < found_pages; i++) {
2292                 page = wdata->pages[i];
2293                 /*
2294                  * At this point we hold neither the i_pages lock nor the
2295                  * page lock: the page may be truncated or invalidated
2296                  * (changing page->mapping to NULL), or even swizzled
2297                  * back from swapper_space to tmpfs file mapping
2298                  */
2299
2300                 if (nr_pages == 0)
2301                         lock_page(page);
2302                 else if (!trylock_page(page))
2303                         break;
2304
2305                 if (unlikely(page->mapping != mapping)) {
2306                         unlock_page(page);
2307                         break;
2308                 }
2309
2310                 if (!wbc->range_cyclic && page->index > end) {
2311                         *done = true;
2312                         unlock_page(page);
2313                         break;
2314                 }
2315
2316                 if (*next && (page->index != *next)) {
2317                         /* Not next consecutive page */
2318                         unlock_page(page);
2319                         break;
2320                 }
2321
2322                 if (wbc->sync_mode != WB_SYNC_NONE)
2323                         wait_on_page_writeback(page);
2324
2325                 if (PageWriteback(page) ||
2326                                 !clear_page_dirty_for_io(page)) {
2327                         unlock_page(page);
2328                         break;
2329                 }
2330
2331                 /*
2332                  * This actually clears the dirty bit in the radix tree.
2333                  * See cifs_writepage() for more commentary.
2334                  */
2335                 set_page_writeback(page);
2336                 if (page_offset(page) >= i_size_read(mapping->host)) {
2337                         *done = true;
2338                         unlock_page(page);
2339                         end_page_writeback(page);
2340                         break;
2341                 }
2342
2343                 wdata->pages[i] = page;
2344                 *next = page->index + 1;
2345                 ++nr_pages;
2346         }
2347
2348         /* reset index to refind any pages skipped */
2349         if (nr_pages == 0)
2350                 *index = wdata->pages[0]->index + 1;
2351
2352         /* put any pages we aren't going to use */
2353         for (i = nr_pages; i < found_pages; i++) {
2354                 put_page(wdata->pages[i]);
2355                 wdata->pages[i] = NULL;
2356         }
2357
2358         return nr_pages;
2359 }
2360
2361 static int
2362 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2363                  struct address_space *mapping, struct writeback_control *wbc)
2364 {
2365         int rc;
2366
2367         wdata->sync_mode = wbc->sync_mode;
2368         wdata->nr_pages = nr_pages;
2369         wdata->offset = page_offset(wdata->pages[0]);
2370         wdata->pagesz = PAGE_SIZE;
2371         wdata->tailsz = min(i_size_read(mapping->host) -
2372                         page_offset(wdata->pages[nr_pages - 1]),
2373                         (loff_t)PAGE_SIZE);
2374         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2375         wdata->pid = wdata->cfile->pid;
2376
2377         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2378         if (rc)
2379                 return rc;
2380
2381         if (wdata->cfile->invalidHandle)
2382                 rc = -EAGAIN;
2383         else
2384                 rc = wdata->server->ops->async_writev(wdata,
2385                                                       cifs_writedata_release);
2386
2387         return rc;
2388 }
2389
2390 static int cifs_writepages(struct address_space *mapping,
2391                            struct writeback_control *wbc)
2392 {
2393         struct inode *inode = mapping->host;
2394         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2395         struct TCP_Server_Info *server;
2396         bool done = false, scanned = false, range_whole = false;
2397         pgoff_t end, index;
2398         struct cifs_writedata *wdata;
2399         struct cifsFileInfo *cfile = NULL;
2400         int rc = 0;
2401         int saved_rc = 0;
2402         unsigned int xid;
2403
2404         /*
2405          * If wsize is smaller than the page cache size, default to writing
2406          * one page at a time via cifs_writepage
2407          */
2408         if (cifs_sb->ctx->wsize < PAGE_SIZE)
2409                 return generic_writepages(mapping, wbc);
2410
2411         xid = get_xid();
2412         if (wbc->range_cyclic) {
2413                 index = mapping->writeback_index; /* Start from prev offset */
2414                 end = -1;
2415         } else {
2416                 index = wbc->range_start >> PAGE_SHIFT;
2417                 end = wbc->range_end >> PAGE_SHIFT;
2418                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2419                         range_whole = true;
2420                 scanned = true;
2421         }
2422         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2423
2424 retry:
2425         while (!done && index <= end) {
2426                 unsigned int i, nr_pages, found_pages, wsize;
2427                 pgoff_t next = 0, tofind, saved_index = index;
2428                 struct cifs_credits credits_on_stack;
2429                 struct cifs_credits *credits = &credits_on_stack;
2430                 int get_file_rc = 0;
2431
2432                 if (cfile)
2433                         cifsFileInfo_put(cfile);
2434
2435                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2436
2437                 /* in case of an error store it to return later */
2438                 if (rc)
2439                         get_file_rc = rc;
2440
2441                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2442                                                    &wsize, credits);
2443                 if (rc != 0) {
2444                         done = true;
2445                         break;
2446                 }
2447
2448                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2449
2450                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2451                                                   &found_pages);
2452                 if (!wdata) {
2453                         rc = -ENOMEM;
2454                         done = true;
2455                         add_credits_and_wake_if(server, credits, 0);
2456                         break;
2457                 }
2458
2459                 if (found_pages == 0) {
2460                         kref_put(&wdata->refcount, cifs_writedata_release);
2461                         add_credits_and_wake_if(server, credits, 0);
2462                         break;
2463                 }
2464
2465                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2466                                                end, &index, &next, &done);
2467
2468                 /* nothing to write? */
2469                 if (nr_pages == 0) {
2470                         kref_put(&wdata->refcount, cifs_writedata_release);
2471                         add_credits_and_wake_if(server, credits, 0);
2472                         continue;
2473                 }
2474
2475                 wdata->credits = credits_on_stack;
2476                 wdata->cfile = cfile;
2477                 wdata->server = server;
2478                 cfile = NULL;
2479
2480                 if (!wdata->cfile) {
2481                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2482                                  get_file_rc);
2483                         if (is_retryable_error(get_file_rc))
2484                                 rc = get_file_rc;
2485                         else
2486                                 rc = -EBADF;
2487                 } else
2488                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2489
2490                 for (i = 0; i < nr_pages; ++i)
2491                         unlock_page(wdata->pages[i]);
2492
2493                 /* send failure -- clean up the mess */
2494                 if (rc != 0) {
2495                         add_credits_and_wake_if(server, &wdata->credits, 0);
2496                         for (i = 0; i < nr_pages; ++i) {
2497                                 if (is_retryable_error(rc))
2498                                         redirty_page_for_writepage(wbc,
2499                                                            wdata->pages[i]);
2500                                 else
2501                                         SetPageError(wdata->pages[i]);
2502                                 end_page_writeback(wdata->pages[i]);
2503                                 put_page(wdata->pages[i]);
2504                         }
2505                         if (!is_retryable_error(rc))
2506                                 mapping_set_error(mapping, rc);
2507                 }
2508                 kref_put(&wdata->refcount, cifs_writedata_release);
2509
2510                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2511                         index = saved_index;
2512                         continue;
2513                 }
2514
2515                 /* Return immediately if we received a signal during writing */
2516                 if (is_interrupt_error(rc)) {
2517                         done = true;
2518                         break;
2519                 }
2520
2521                 if (rc != 0 && saved_rc == 0)
2522                         saved_rc = rc;
2523
2524                 wbc->nr_to_write -= nr_pages;
2525                 if (wbc->nr_to_write <= 0)
2526                         done = true;
2527
2528                 index = next;
2529         }
2530
2531         if (!scanned && !done) {
2532                 /*
2533                  * We hit the last page and there is more work to be done: wrap
2534                  * back to the start of the file
2535                  */
2536                 scanned = true;
2537                 index = 0;
2538                 goto retry;
2539         }
2540
2541         if (saved_rc != 0)
2542                 rc = saved_rc;
2543
2544         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2545                 mapping->writeback_index = index;
2546
2547         if (cfile)
2548                 cifsFileInfo_put(cfile);
2549         free_xid(xid);
2550         /* Indication to update ctime and mtime as close is deferred */
2551         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2552         return rc;
2553 }
2554
2555 static int
2556 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2557 {
2558         int rc;
2559         unsigned int xid;
2560
2561         xid = get_xid();
2562 /* BB add check for wbc flags */
2563         get_page(page);
2564         if (!PageUptodate(page))
2565                 cifs_dbg(FYI, "ppw - page not up to date\n");
2566
2567         /*
2568          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2569          *
2570          * A writepage() implementation always needs to do either this,
2571          * or re-dirty the page with "redirty_page_for_writepage()" in
2572          * the case of a failure.
2573          *
2574          * Just unlocking the page will cause the radix tree tag-bits
2575          * to fail to update with the state of the page correctly.
2576          */
2577         set_page_writeback(page);
2578 retry_write:
2579         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2580         if (is_retryable_error(rc)) {
2581                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2582                         goto retry_write;
2583                 redirty_page_for_writepage(wbc, page);
2584         } else if (rc != 0) {
2585                 SetPageError(page);
2586                 mapping_set_error(page->mapping, rc);
2587         } else {
2588                 SetPageUptodate(page);
2589         }
2590         end_page_writeback(page);
2591         put_page(page);
2592         free_xid(xid);
2593         return rc;
2594 }
2595
2596 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2597 {
2598         int rc = cifs_writepage_locked(page, wbc);
2599         unlock_page(page);
2600         return rc;
2601 }
2602
2603 static int cifs_write_end(struct file *file, struct address_space *mapping,
2604                         loff_t pos, unsigned len, unsigned copied,
2605                         struct page *page, void *fsdata)
2606 {
2607         int rc;
2608         struct inode *inode = mapping->host;
2609         struct cifsFileInfo *cfile = file->private_data;
2610         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2611         __u32 pid;
2612
2613         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2614                 pid = cfile->pid;
2615         else
2616                 pid = current->tgid;
2617
2618         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2619                  page, pos, copied);
2620
2621         if (PageChecked(page)) {
2622                 if (copied == len)
2623                         SetPageUptodate(page);
2624                 ClearPageChecked(page);
2625         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2626                 SetPageUptodate(page);
2627
2628         if (!PageUptodate(page)) {
2629                 char *page_data;
2630                 unsigned offset = pos & (PAGE_SIZE - 1);
2631                 unsigned int xid;
2632
2633                 xid = get_xid();
2634                 /* this is probably better than directly calling
2635                    partialpage_write since in this function the file handle is
2636                    known which we might as well leverage */
2637                 /* BB check if anything else missing out of ppw
2638                    such as updating last write time */
2639                 page_data = kmap(page);
2640                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2641                 /* if (rc < 0) should we set writebehind rc? */
2642                 kunmap(page);
2643
2644                 free_xid(xid);
2645         } else {
2646                 rc = copied;
2647                 pos += copied;
2648                 set_page_dirty(page);
2649         }
2650
2651         if (rc > 0) {
2652                 spin_lock(&inode->i_lock);
2653                 if (pos > inode->i_size) {
2654                         i_size_write(inode, pos);
2655                         inode->i_blocks = (512 - 1 + pos) >> 9;
2656                 }
2657                 spin_unlock(&inode->i_lock);
2658         }
2659
2660         unlock_page(page);
2661         put_page(page);
2662         /* Indication to update ctime and mtime as close is deferred */
2663         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2664
2665         return rc;
2666 }
2667
2668 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2669                       int datasync)
2670 {
2671         unsigned int xid;
2672         int rc = 0;
2673         struct cifs_tcon *tcon;
2674         struct TCP_Server_Info *server;
2675         struct cifsFileInfo *smbfile = file->private_data;
2676         struct inode *inode = file_inode(file);
2677         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2678
2679         rc = file_write_and_wait_range(file, start, end);
2680         if (rc) {
2681                 trace_cifs_fsync_err(inode->i_ino, rc);
2682                 return rc;
2683         }
2684
2685         xid = get_xid();
2686
2687         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2688                  file, datasync);
2689
2690         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2691                 rc = cifs_zap_mapping(inode);
2692                 if (rc) {
2693                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2694                         rc = 0; /* don't care about it in fsync */
2695                 }
2696         }
2697
2698         tcon = tlink_tcon(smbfile->tlink);
2699         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2700                 server = tcon->ses->server;
2701                 if (server->ops->flush == NULL) {
2702                         rc = -ENOSYS;
2703                         goto strict_fsync_exit;
2704                 }
2705
2706                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2707                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2708                         if (smbfile) {
2709                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2710                                 cifsFileInfo_put(smbfile);
2711                         } else
2712                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2713                 } else
2714                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2715         }
2716
2717 strict_fsync_exit:
2718         free_xid(xid);
2719         return rc;
2720 }
2721
2722 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2723 {
2724         unsigned int xid;
2725         int rc = 0;
2726         struct cifs_tcon *tcon;
2727         struct TCP_Server_Info *server;
2728         struct cifsFileInfo *smbfile = file->private_data;
2729         struct inode *inode = file_inode(file);
2730         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2731
2732         rc = file_write_and_wait_range(file, start, end);
2733         if (rc) {
2734                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2735                 return rc;
2736         }
2737
2738         xid = get_xid();
2739
2740         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2741                  file, datasync);
2742
2743         tcon = tlink_tcon(smbfile->tlink);
2744         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2745                 server = tcon->ses->server;
2746                 if (server->ops->flush == NULL) {
2747                         rc = -ENOSYS;
2748                         goto fsync_exit;
2749                 }
2750
2751                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2752                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2753                         if (smbfile) {
2754                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2755                                 cifsFileInfo_put(smbfile);
2756                         } else
2757                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2758                 } else
2759                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2760         }
2761
2762 fsync_exit:
2763         free_xid(xid);
2764         return rc;
2765 }
2766
2767 /*
2768  * As file closes, flush all cached write data for this inode checking
2769  * for write behind errors.
2770  */
2771 int cifs_flush(struct file *file, fl_owner_t id)
2772 {
2773         struct inode *inode = file_inode(file);
2774         int rc = 0;
2775
2776         if (file->f_mode & FMODE_WRITE)
2777                 rc = filemap_write_and_wait(inode->i_mapping);
2778
2779         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2780         if (rc)
2781                 trace_cifs_flush_err(inode->i_ino, rc);
2782         return rc;
2783 }
2784
2785 static int
2786 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2787 {
2788         int rc = 0;
2789         unsigned long i;
2790
2791         for (i = 0; i < num_pages; i++) {
2792                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2793                 if (!pages[i]) {
2794                         /*
2795                          * save number of pages we have already allocated and
2796                          * return with ENOMEM error
2797                          */
2798                         num_pages = i;
2799                         rc = -ENOMEM;
2800                         break;
2801                 }
2802         }
2803
2804         if (rc) {
2805                 for (i = 0; i < num_pages; i++)
2806                         put_page(pages[i]);
2807         }
2808         return rc;
2809 }
2810
2811 static inline
2812 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2813 {
2814         size_t num_pages;
2815         size_t clen;
2816
2817         clen = min_t(const size_t, len, wsize);
2818         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2819
2820         if (cur_len)
2821                 *cur_len = clen;
2822
2823         return num_pages;
2824 }
2825
2826 static void
2827 cifs_uncached_writedata_release(struct kref *refcount)
2828 {
2829         int i;
2830         struct cifs_writedata *wdata = container_of(refcount,
2831                                         struct cifs_writedata, refcount);
2832
2833         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2834         for (i = 0; i < wdata->nr_pages; i++)
2835                 put_page(wdata->pages[i]);
2836         cifs_writedata_release(refcount);
2837 }
2838
2839 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2840
2841 static void
2842 cifs_uncached_writev_complete(struct work_struct *work)
2843 {
2844         struct cifs_writedata *wdata = container_of(work,
2845                                         struct cifs_writedata, work);
2846         struct inode *inode = d_inode(wdata->cfile->dentry);
2847         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2848
2849         spin_lock(&inode->i_lock);
2850         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2851         if (cifsi->server_eof > inode->i_size)
2852                 i_size_write(inode, cifsi->server_eof);
2853         spin_unlock(&inode->i_lock);
2854
2855         complete(&wdata->done);
2856         collect_uncached_write_data(wdata->ctx);
2857         /* the below call can possibly free the last ref to aio ctx */
2858         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2859 }
2860
2861 static int
2862 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2863                       size_t *len, unsigned long *num_pages)
2864 {
2865         size_t save_len, copied, bytes, cur_len = *len;
2866         unsigned long i, nr_pages = *num_pages;
2867
2868         save_len = cur_len;
2869         for (i = 0; i < nr_pages; i++) {
2870                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2871                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2872                 cur_len -= copied;
2873                 /*
2874                  * If we didn't copy as much as we expected, then that
2875                  * may mean we trod into an unmapped area. Stop copying
2876                  * at that point. On the next pass through the big
2877                  * loop, we'll likely end up getting a zero-length
2878                  * write and bailing out of it.
2879                  */
2880                 if (copied < bytes)
2881                         break;
2882         }
2883         cur_len = save_len - cur_len;
2884         *len = cur_len;
2885
2886         /*
2887          * If we have no data to send, then that probably means that
2888          * the copy above failed altogether. That's most likely because
2889          * the address in the iovec was bogus. Return -EFAULT and let
2890          * the caller free anything we allocated and bail out.
2891          */
2892         if (!cur_len)
2893                 return -EFAULT;
2894
2895         /*
2896          * i + 1 now represents the number of pages we actually used in
2897          * the copy phase above.
2898          */
2899         *num_pages = i + 1;
2900         return 0;
2901 }
2902
2903 static int
2904 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2905         struct cifs_aio_ctx *ctx)
2906 {
2907         unsigned int wsize;
2908         struct cifs_credits credits;
2909         int rc;
2910         struct TCP_Server_Info *server = wdata->server;
2911
2912         do {
2913                 if (wdata->cfile->invalidHandle) {
2914                         rc = cifs_reopen_file(wdata->cfile, false);
2915                         if (rc == -EAGAIN)
2916                                 continue;
2917                         else if (rc)
2918                                 break;
2919                 }
2920
2921
2922                 /*
2923                  * Wait for credits to resend this wdata.
2924                  * Note: we are attempting to resend the whole wdata not in
2925                  * segments
2926                  */
2927                 do {
2928                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2929                                                 &wsize, &credits);
2930                         if (rc)
2931                                 goto fail;
2932
2933                         if (wsize < wdata->bytes) {
2934                                 add_credits_and_wake_if(server, &credits, 0);
2935                                 msleep(1000);
2936                         }
2937                 } while (wsize < wdata->bytes);
2938                 wdata->credits = credits;
2939
2940                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2941
2942                 if (!rc) {
2943                         if (wdata->cfile->invalidHandle)
2944                                 rc = -EAGAIN;
2945                         else {
2946 #ifdef CONFIG_CIFS_SMB_DIRECT
2947                                 if (wdata->mr) {
2948                                         wdata->mr->need_invalidate = true;
2949                                         smbd_deregister_mr(wdata->mr);
2950                                         wdata->mr = NULL;
2951                                 }
2952 #endif
2953                                 rc = server->ops->async_writev(wdata,
2954                                         cifs_uncached_writedata_release);
2955                         }
2956                 }
2957
2958                 /* If the write was successfully sent, we are done */
2959                 if (!rc) {
2960                         list_add_tail(&wdata->list, wdata_list);
2961                         return 0;
2962                 }
2963
2964                 /* Roll back credits and retry if needed */
2965                 add_credits_and_wake_if(server, &wdata->credits, 0);
2966         } while (rc == -EAGAIN);
2967
2968 fail:
2969         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2970         return rc;
2971 }
2972
2973 static int
2974 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2975                      struct cifsFileInfo *open_file,
2976                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2977                      struct cifs_aio_ctx *ctx)
2978 {
2979         int rc = 0;
2980         size_t cur_len;
2981         unsigned long nr_pages, num_pages, i;
2982         struct cifs_writedata *wdata;
2983         struct iov_iter saved_from = *from;
2984         loff_t saved_offset = offset;
2985         pid_t pid;
2986         struct TCP_Server_Info *server;
2987         struct page **pagevec;
2988         size_t start;
2989         unsigned int xid;
2990
2991         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2992                 pid = open_file->pid;
2993         else
2994                 pid = current->tgid;
2995
2996         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2997         xid = get_xid();
2998
2999         do {
3000                 unsigned int wsize;
3001                 struct cifs_credits credits_on_stack;
3002                 struct cifs_credits *credits = &credits_on_stack;
3003
3004                 if (open_file->invalidHandle) {
3005                         rc = cifs_reopen_file(open_file, false);
3006                         if (rc == -EAGAIN)
3007                                 continue;
3008                         else if (rc)
3009                                 break;
3010                 }
3011
3012                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3013                                                    &wsize, credits);
3014                 if (rc)
3015                         break;
3016
3017                 cur_len = min_t(const size_t, len, wsize);
3018
3019                 if (ctx->direct_io) {
3020                         ssize_t result;
3021
3022                         result = iov_iter_get_pages_alloc(
3023                                 from, &pagevec, cur_len, &start);
3024                         if (result < 0) {
3025                                 cifs_dbg(VFS,
3026                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3027                                          result, iov_iter_type(from),
3028                                          from->iov_offset, from->count);
3029                                 dump_stack();
3030
3031                                 rc = result;
3032                                 add_credits_and_wake_if(server, credits, 0);
3033                                 break;
3034                         }
3035                         cur_len = (size_t)result;
3036                         iov_iter_advance(from, cur_len);
3037
3038                         nr_pages =
3039                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3040
3041                         wdata = cifs_writedata_direct_alloc(pagevec,
3042                                              cifs_uncached_writev_complete);
3043                         if (!wdata) {
3044                                 rc = -ENOMEM;
3045                                 add_credits_and_wake_if(server, credits, 0);
3046                                 break;
3047                         }
3048
3049
3050                         wdata->page_offset = start;
3051                         wdata->tailsz =
3052                                 nr_pages > 1 ?
3053                                         cur_len - (PAGE_SIZE - start) -
3054                                         (nr_pages - 2) * PAGE_SIZE :
3055                                         cur_len;
3056                 } else {
3057                         nr_pages = get_numpages(wsize, len, &cur_len);
3058                         wdata = cifs_writedata_alloc(nr_pages,
3059                                              cifs_uncached_writev_complete);
3060                         if (!wdata) {
3061                                 rc = -ENOMEM;
3062                                 add_credits_and_wake_if(server, credits, 0);
3063                                 break;
3064                         }
3065
3066                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3067                         if (rc) {
3068                                 kvfree(wdata->pages);
3069                                 kfree(wdata);
3070                                 add_credits_and_wake_if(server, credits, 0);
3071                                 break;
3072                         }
3073
3074                         num_pages = nr_pages;
3075                         rc = wdata_fill_from_iovec(
3076                                 wdata, from, &cur_len, &num_pages);
3077                         if (rc) {
3078                                 for (i = 0; i < nr_pages; i++)
3079                                         put_page(wdata->pages[i]);
3080                                 kvfree(wdata->pages);
3081                                 kfree(wdata);
3082                                 add_credits_and_wake_if(server, credits, 0);
3083                                 break;
3084                         }
3085
3086                         /*
3087                          * Bring nr_pages down to the number of pages we
3088                          * actually used, and free any pages that we didn't use.
3089                          */
3090                         for ( ; nr_pages > num_pages; nr_pages--)
3091                                 put_page(wdata->pages[nr_pages - 1]);
3092
3093                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3094                 }
3095
3096                 wdata->sync_mode = WB_SYNC_ALL;
3097                 wdata->nr_pages = nr_pages;
3098                 wdata->offset = (__u64)offset;
3099                 wdata->cfile = cifsFileInfo_get(open_file);
3100                 wdata->server = server;
3101                 wdata->pid = pid;
3102                 wdata->bytes = cur_len;
3103                 wdata->pagesz = PAGE_SIZE;
3104                 wdata->credits = credits_on_stack;
3105                 wdata->ctx = ctx;
3106                 kref_get(&ctx->refcount);
3107
3108                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3109
3110                 if (!rc) {
3111                         if (wdata->cfile->invalidHandle)
3112                                 rc = -EAGAIN;
3113                         else
3114                                 rc = server->ops->async_writev(wdata,
3115                                         cifs_uncached_writedata_release);
3116                 }
3117
3118                 if (rc) {
3119                         add_credits_and_wake_if(server, &wdata->credits, 0);
3120                         kref_put(&wdata->refcount,
3121                                  cifs_uncached_writedata_release);
3122                         if (rc == -EAGAIN) {
3123                                 *from = saved_from;
3124                                 iov_iter_advance(from, offset - saved_offset);
3125                                 continue;
3126                         }
3127                         break;
3128                 }
3129
3130                 list_add_tail(&wdata->list, wdata_list);
3131                 offset += cur_len;
3132                 len -= cur_len;
3133         } while (len > 0);
3134
3135         free_xid(xid);
3136         return rc;
3137 }
3138
3139 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3140 {
3141         struct cifs_writedata *wdata, *tmp;
3142         struct cifs_tcon *tcon;
3143         struct cifs_sb_info *cifs_sb;
3144         struct dentry *dentry = ctx->cfile->dentry;
3145         ssize_t rc;
3146
3147         tcon = tlink_tcon(ctx->cfile->tlink);
3148         cifs_sb = CIFS_SB(dentry->d_sb);
3149
3150         mutex_lock(&ctx->aio_mutex);
3151
3152         if (list_empty(&ctx->list)) {
3153                 mutex_unlock(&ctx->aio_mutex);
3154                 return;
3155         }
3156
3157         rc = ctx->rc;
3158         /*
3159          * Wait for and collect replies for any successful sends in order of
3160          * increasing offset. Once an error is hit, then return without waiting
3161          * for any more replies.
3162          */
3163 restart_loop:
3164         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3165                 if (!rc) {
3166                         if (!try_wait_for_completion(&wdata->done)) {
3167                                 mutex_unlock(&ctx->aio_mutex);
3168                                 return;
3169                         }
3170
3171                         if (wdata->result)
3172                                 rc = wdata->result;
3173                         else
3174                                 ctx->total_len += wdata->bytes;
3175
3176                         /* resend call if it's a retryable error */
3177                         if (rc == -EAGAIN) {
3178                                 struct list_head tmp_list;
3179                                 struct iov_iter tmp_from = ctx->iter;
3180
3181                                 INIT_LIST_HEAD(&tmp_list);
3182                                 list_del_init(&wdata->list);
3183
3184                                 if (ctx->direct_io)
3185                                         rc = cifs_resend_wdata(
3186                                                 wdata, &tmp_list, ctx);
3187                                 else {
3188                                         iov_iter_advance(&tmp_from,
3189                                                  wdata->offset - ctx->pos);
3190
3191                                         rc = cifs_write_from_iter(wdata->offset,
3192                                                 wdata->bytes, &tmp_from,
3193                                                 ctx->cfile, cifs_sb, &tmp_list,
3194                                                 ctx);
3195
3196                                         kref_put(&wdata->refcount,
3197                                                 cifs_uncached_writedata_release);
3198                                 }
3199
3200                                 list_splice(&tmp_list, &ctx->list);
3201                                 goto restart_loop;
3202                         }
3203                 }
3204                 list_del_init(&wdata->list);
3205                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3206         }
3207
3208         cifs_stats_bytes_written(tcon, ctx->total_len);
3209         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3210
3211         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3212
3213         mutex_unlock(&ctx->aio_mutex);
3214
3215         if (ctx->iocb && ctx->iocb->ki_complete)
3216                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3217         else
3218                 complete(&ctx->done);
3219 }
3220
3221 static ssize_t __cifs_writev(
3222         struct kiocb *iocb, struct iov_iter *from, bool direct)
3223 {
3224         struct file *file = iocb->ki_filp;
3225         ssize_t total_written = 0;
3226         struct cifsFileInfo *cfile;
3227         struct cifs_tcon *tcon;
3228         struct cifs_sb_info *cifs_sb;
3229         struct cifs_aio_ctx *ctx;
3230         struct iov_iter saved_from = *from;
3231         size_t len = iov_iter_count(from);
3232         int rc;
3233
3234         /*
3235          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3236          * In this case, fall back to non-direct write function.
3237          * this could be improved by getting pages directly in ITER_KVEC
3238          */
3239         if (direct && iov_iter_is_kvec(from)) {
3240                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3241                 direct = false;
3242         }
3243
3244         rc = generic_write_checks(iocb, from);
3245         if (rc <= 0)
3246                 return rc;
3247
3248         cifs_sb = CIFS_FILE_SB(file);
3249         cfile = file->private_data;
3250         tcon = tlink_tcon(cfile->tlink);
3251
3252         if (!tcon->ses->server->ops->async_writev)
3253                 return -ENOSYS;
3254
3255         ctx = cifs_aio_ctx_alloc();
3256         if (!ctx)
3257                 return -ENOMEM;
3258
3259         ctx->cfile = cifsFileInfo_get(cfile);
3260
3261         if (!is_sync_kiocb(iocb))
3262                 ctx->iocb = iocb;
3263
3264         ctx->pos = iocb->ki_pos;
3265
3266         if (direct) {
3267                 ctx->direct_io = true;
3268                 ctx->iter = *from;
3269                 ctx->len = len;
3270         } else {
3271                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3272                 if (rc) {
3273                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3274                         return rc;
3275                 }
3276         }
3277
3278         /* grab a lock here due to read response handlers can access ctx */
3279         mutex_lock(&ctx->aio_mutex);
3280
3281         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3282                                   cfile, cifs_sb, &ctx->list, ctx);
3283
3284         /*
3285          * If at least one write was successfully sent, then discard any rc
3286          * value from the later writes. If the other write succeeds, then
3287          * we'll end up returning whatever was written. If it fails, then
3288          * we'll get a new rc value from that.
3289          */
3290         if (!list_empty(&ctx->list))
3291                 rc = 0;
3292
3293         mutex_unlock(&ctx->aio_mutex);
3294
3295         if (rc) {
3296                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3297                 return rc;
3298         }
3299
3300         if (!is_sync_kiocb(iocb)) {
3301                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3302                 return -EIOCBQUEUED;
3303         }
3304
3305         rc = wait_for_completion_killable(&ctx->done);
3306         if (rc) {
3307                 mutex_lock(&ctx->aio_mutex);
3308                 ctx->rc = rc = -EINTR;
3309                 total_written = ctx->total_len;
3310                 mutex_unlock(&ctx->aio_mutex);
3311         } else {
3312                 rc = ctx->rc;
3313                 total_written = ctx->total_len;
3314         }
3315
3316         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3317
3318         if (unlikely(!total_written))
3319                 return rc;
3320
3321         iocb->ki_pos += total_written;
3322         return total_written;
3323 }
3324
3325 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3326 {
3327         return __cifs_writev(iocb, from, true);
3328 }
3329
3330 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3331 {
3332         return __cifs_writev(iocb, from, false);
3333 }
3334
3335 static ssize_t
3336 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3337 {
3338         struct file *file = iocb->ki_filp;
3339         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3340         struct inode *inode = file->f_mapping->host;
3341         struct cifsInodeInfo *cinode = CIFS_I(inode);
3342         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3343         ssize_t rc;
3344
3345         inode_lock(inode);
3346         /*
3347          * We need to hold the sem to be sure nobody modifies lock list
3348          * with a brlock that prevents writing.
3349          */
3350         down_read(&cinode->lock_sem);
3351
3352         rc = generic_write_checks(iocb, from);
3353         if (rc <= 0)
3354                 goto out;
3355
3356         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3357                                      server->vals->exclusive_lock_type, 0,
3358                                      NULL, CIFS_WRITE_OP))
3359                 rc = __generic_file_write_iter(iocb, from);
3360         else
3361                 rc = -EACCES;
3362 out:
3363         up_read(&cinode->lock_sem);
3364         inode_unlock(inode);
3365
3366         if (rc > 0)
3367                 rc = generic_write_sync(iocb, rc);
3368         return rc;
3369 }
3370
3371 ssize_t
3372 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3373 {
3374         struct inode *inode = file_inode(iocb->ki_filp);
3375         struct cifsInodeInfo *cinode = CIFS_I(inode);
3376         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3377         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3378                                                 iocb->ki_filp->private_data;
3379         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3380         ssize_t written;
3381
3382         written = cifs_get_writer(cinode);
3383         if (written)
3384                 return written;
3385
3386         if (CIFS_CACHE_WRITE(cinode)) {
3387                 if (cap_unix(tcon->ses) &&
3388                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3389                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3390                         written = generic_file_write_iter(iocb, from);
3391                         goto out;
3392                 }
3393                 written = cifs_writev(iocb, from);
3394                 goto out;
3395         }
3396         /*
3397          * For non-oplocked files in strict cache mode we need to write the data
3398          * to the server exactly from the pos to pos+len-1 rather than flush all
3399          * affected pages because it may cause a error with mandatory locks on
3400          * these pages but not on the region from pos to ppos+len-1.
3401          */
3402         written = cifs_user_writev(iocb, from);
3403         if (CIFS_CACHE_READ(cinode)) {
3404                 /*
3405                  * We have read level caching and we have just sent a write
3406                  * request to the server thus making data in the cache stale.
3407                  * Zap the cache and set oplock/lease level to NONE to avoid
3408                  * reading stale data from the cache. All subsequent read
3409                  * operations will read new data from the server.
3410                  */
3411                 cifs_zap_mapping(inode);
3412                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3413                          inode);
3414                 cinode->oplock = 0;
3415         }
3416 out:
3417         cifs_put_writer(cinode);
3418         return written;
3419 }
3420
3421 static struct cifs_readdata *
3422 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3423 {
3424         struct cifs_readdata *rdata;
3425
3426         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3427         if (rdata != NULL) {
3428                 rdata->pages = pages;
3429                 kref_init(&rdata->refcount);
3430                 INIT_LIST_HEAD(&rdata->list);
3431                 init_completion(&rdata->done);
3432                 INIT_WORK(&rdata->work, complete);
3433         }
3434
3435         return rdata;
3436 }
3437
3438 static struct cifs_readdata *
3439 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3440 {
3441         struct page **pages =
3442                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3443         struct cifs_readdata *ret = NULL;
3444
3445         if (pages) {
3446                 ret = cifs_readdata_direct_alloc(pages, complete);
3447                 if (!ret)
3448                         kfree(pages);
3449         }
3450
3451         return ret;
3452 }
3453
3454 void
3455 cifs_readdata_release(struct kref *refcount)
3456 {
3457         struct cifs_readdata *rdata = container_of(refcount,
3458                                         struct cifs_readdata, refcount);
3459 #ifdef CONFIG_CIFS_SMB_DIRECT
3460         if (rdata->mr) {
3461                 smbd_deregister_mr(rdata->mr);
3462                 rdata->mr = NULL;
3463         }
3464 #endif
3465         if (rdata->cfile)
3466                 cifsFileInfo_put(rdata->cfile);
3467
3468         kvfree(rdata->pages);
3469         kfree(rdata);
3470 }
3471
3472 static int
3473 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3474 {
3475         int rc = 0;
3476         struct page *page;
3477         unsigned int i;
3478
3479         for (i = 0; i < nr_pages; i++) {
3480                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3481                 if (!page) {
3482                         rc = -ENOMEM;
3483                         break;
3484                 }
3485                 rdata->pages[i] = page;
3486         }
3487
3488         if (rc) {
3489                 unsigned int nr_page_failed = i;
3490
3491                 for (i = 0; i < nr_page_failed; i++) {
3492                         put_page(rdata->pages[i]);
3493                         rdata->pages[i] = NULL;
3494                 }
3495         }
3496         return rc;
3497 }
3498
3499 static void
3500 cifs_uncached_readdata_release(struct kref *refcount)
3501 {
3502         struct cifs_readdata *rdata = container_of(refcount,
3503                                         struct cifs_readdata, refcount);
3504         unsigned int i;
3505
3506         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3507         for (i = 0; i < rdata->nr_pages; i++) {
3508                 put_page(rdata->pages[i]);
3509         }
3510         cifs_readdata_release(refcount);
3511 }
3512
3513 /**
3514  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3515  * @rdata:      the readdata response with list of pages holding data
3516  * @iter:       destination for our data
3517  *
3518  * This function copies data from a list of pages in a readdata response into
3519  * an array of iovecs. It will first calculate where the data should go
3520  * based on the info in the readdata and then copy the data into that spot.
3521  */
3522 static int
3523 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3524 {
3525         size_t remaining = rdata->got_bytes;
3526         unsigned int i;
3527
3528         for (i = 0; i < rdata->nr_pages; i++) {
3529                 struct page *page = rdata->pages[i];
3530                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3531                 size_t written;
3532
3533                 if (unlikely(iov_iter_is_pipe(iter))) {
3534                         void *addr = kmap_atomic(page);
3535
3536                         written = copy_to_iter(addr, copy, iter);
3537                         kunmap_atomic(addr);
3538                 } else
3539                         written = copy_page_to_iter(page, 0, copy, iter);
3540                 remaining -= written;
3541                 if (written < copy && iov_iter_count(iter) > 0)
3542                         break;
3543         }
3544         return remaining ? -EFAULT : 0;
3545 }
3546
3547 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3548
3549 static void
3550 cifs_uncached_readv_complete(struct work_struct *work)
3551 {
3552         struct cifs_readdata *rdata = container_of(work,
3553                                                 struct cifs_readdata, work);
3554
3555         complete(&rdata->done);
3556         collect_uncached_read_data(rdata->ctx);
3557         /* the below call can possibly free the last ref to aio ctx */
3558         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3559 }
3560
3561 static int
3562 uncached_fill_pages(struct TCP_Server_Info *server,
3563                     struct cifs_readdata *rdata, struct iov_iter *iter,
3564                     unsigned int len)
3565 {
3566         int result = 0;
3567         unsigned int i;
3568         unsigned int nr_pages = rdata->nr_pages;
3569         unsigned int page_offset = rdata->page_offset;
3570
3571         rdata->got_bytes = 0;
3572         rdata->tailsz = PAGE_SIZE;
3573         for (i = 0; i < nr_pages; i++) {
3574                 struct page *page = rdata->pages[i];
3575                 size_t n;
3576                 unsigned int segment_size = rdata->pagesz;
3577
3578                 if (i == 0)
3579                         segment_size -= page_offset;
3580                 else
3581                         page_offset = 0;
3582
3583
3584                 if (len <= 0) {
3585                         /* no need to hold page hostage */
3586                         rdata->pages[i] = NULL;
3587                         rdata->nr_pages--;
3588                         put_page(page);
3589                         continue;
3590                 }
3591
3592                 n = len;
3593                 if (len >= segment_size)
3594                         /* enough data to fill the page */
3595                         n = segment_size;
3596                 else
3597                         rdata->tailsz = len;
3598                 len -= n;
3599
3600                 if (iter)
3601                         result = copy_page_from_iter(
3602                                         page, page_offset, n, iter);
3603 #ifdef CONFIG_CIFS_SMB_DIRECT
3604                 else if (rdata->mr)
3605                         result = n;
3606 #endif
3607                 else
3608                         result = cifs_read_page_from_socket(
3609                                         server, page, page_offset, n);
3610                 if (result < 0)
3611                         break;
3612
3613                 rdata->got_bytes += result;
3614         }
3615
3616         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3617                                                 rdata->got_bytes : result;
3618 }
3619
3620 static int
3621 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3622                               struct cifs_readdata *rdata, unsigned int len)
3623 {
3624         return uncached_fill_pages(server, rdata, NULL, len);
3625 }
3626
3627 static int
3628 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3629                               struct cifs_readdata *rdata,
3630                               struct iov_iter *iter)
3631 {
3632         return uncached_fill_pages(server, rdata, iter, iter->count);
3633 }
3634
3635 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3636                         struct list_head *rdata_list,
3637                         struct cifs_aio_ctx *ctx)
3638 {
3639         unsigned int rsize;
3640         struct cifs_credits credits;
3641         int rc;
3642         struct TCP_Server_Info *server;
3643
3644         /* XXX: should we pick a new channel here? */
3645         server = rdata->server;
3646
3647         do {
3648                 if (rdata->cfile->invalidHandle) {
3649                         rc = cifs_reopen_file(rdata->cfile, true);
3650                         if (rc == -EAGAIN)
3651                                 continue;
3652                         else if (rc)
3653                                 break;
3654                 }
3655
3656                 /*
3657                  * Wait for credits to resend this rdata.
3658                  * Note: we are attempting to resend the whole rdata not in
3659                  * segments
3660                  */
3661                 do {
3662                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3663                                                 &rsize, &credits);
3664
3665                         if (rc)
3666                                 goto fail;
3667
3668                         if (rsize < rdata->bytes) {
3669                                 add_credits_and_wake_if(server, &credits, 0);
3670                                 msleep(1000);
3671                         }
3672                 } while (rsize < rdata->bytes);
3673                 rdata->credits = credits;
3674
3675                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3676                 if (!rc) {
3677                         if (rdata->cfile->invalidHandle)
3678                                 rc = -EAGAIN;
3679                         else {
3680 #ifdef CONFIG_CIFS_SMB_DIRECT
3681                                 if (rdata->mr) {
3682                                         rdata->mr->need_invalidate = true;
3683                                         smbd_deregister_mr(rdata->mr);
3684                                         rdata->mr = NULL;
3685                                 }
3686 #endif
3687                                 rc = server->ops->async_readv(rdata);
3688                         }
3689                 }
3690
3691                 /* If the read was successfully sent, we are done */
3692                 if (!rc) {
3693                         /* Add to aio pending list */
3694                         list_add_tail(&rdata->list, rdata_list);
3695                         return 0;
3696                 }
3697
3698                 /* Roll back credits and retry if needed */
3699                 add_credits_and_wake_if(server, &rdata->credits, 0);
3700         } while (rc == -EAGAIN);
3701
3702 fail:
3703         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3704         return rc;
3705 }
3706
3707 static int
3708 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3709                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3710                      struct cifs_aio_ctx *ctx)
3711 {
3712         struct cifs_readdata *rdata;
3713         unsigned int npages, rsize;
3714         struct cifs_credits credits_on_stack;
3715         struct cifs_credits *credits = &credits_on_stack;
3716         size_t cur_len;
3717         int rc;
3718         pid_t pid;
3719         struct TCP_Server_Info *server;
3720         struct page **pagevec;
3721         size_t start;
3722         struct iov_iter direct_iov = ctx->iter;
3723
3724         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3725
3726         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3727                 pid = open_file->pid;
3728         else
3729                 pid = current->tgid;
3730
3731         if (ctx->direct_io)
3732                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3733
3734         do {
3735                 if (open_file->invalidHandle) {
3736                         rc = cifs_reopen_file(open_file, true);
3737                         if (rc == -EAGAIN)
3738                                 continue;
3739                         else if (rc)
3740                                 break;
3741                 }
3742
3743                 if (cifs_sb->ctx->rsize == 0)
3744                         cifs_sb->ctx->rsize =
3745                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
3746                                                              cifs_sb->ctx);
3747
3748                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3749                                                    &rsize, credits);
3750                 if (rc)
3751                         break;
3752
3753                 cur_len = min_t(const size_t, len, rsize);
3754
3755                 if (ctx->direct_io) {
3756                         ssize_t result;
3757
3758                         result = iov_iter_get_pages_alloc(
3759                                         &direct_iov, &pagevec,
3760                                         cur_len, &start);
3761                         if (result < 0) {
3762                                 cifs_dbg(VFS,
3763                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3764                                          result, iov_iter_type(&direct_iov),
3765                                          direct_iov.iov_offset,
3766                                          direct_iov.count);
3767                                 dump_stack();
3768
3769                                 rc = result;
3770                                 add_credits_and_wake_if(server, credits, 0);
3771                                 break;
3772                         }
3773                         cur_len = (size_t)result;
3774                         iov_iter_advance(&direct_iov, cur_len);
3775
3776                         rdata = cifs_readdata_direct_alloc(
3777                                         pagevec, cifs_uncached_readv_complete);
3778                         if (!rdata) {
3779                                 add_credits_and_wake_if(server, credits, 0);
3780                                 rc = -ENOMEM;
3781                                 break;
3782                         }
3783
3784                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3785                         rdata->page_offset = start;
3786                         rdata->tailsz = npages > 1 ?
3787                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3788                                 cur_len;
3789
3790                 } else {
3791
3792                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3793                         /* allocate a readdata struct */
3794                         rdata = cifs_readdata_alloc(npages,
3795                                             cifs_uncached_readv_complete);
3796                         if (!rdata) {
3797                                 add_credits_and_wake_if(server, credits, 0);
3798                                 rc = -ENOMEM;
3799                                 break;
3800                         }
3801
3802                         rc = cifs_read_allocate_pages(rdata, npages);
3803                         if (rc) {
3804                                 kvfree(rdata->pages);
3805                                 kfree(rdata);
3806                                 add_credits_and_wake_if(server, credits, 0);
3807                                 break;
3808                         }
3809
3810                         rdata->tailsz = PAGE_SIZE;
3811                 }
3812
3813                 rdata->server = server;
3814                 rdata->cfile = cifsFileInfo_get(open_file);
3815                 rdata->nr_pages = npages;
3816                 rdata->offset = offset;
3817                 rdata->bytes = cur_len;
3818                 rdata->pid = pid;
3819                 rdata->pagesz = PAGE_SIZE;
3820                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3821                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3822                 rdata->credits = credits_on_stack;
3823                 rdata->ctx = ctx;
3824                 kref_get(&ctx->refcount);
3825
3826                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3827
3828                 if (!rc) {
3829                         if (rdata->cfile->invalidHandle)
3830                                 rc = -EAGAIN;
3831                         else
3832                                 rc = server->ops->async_readv(rdata);
3833                 }
3834
3835                 if (rc) {
3836                         add_credits_and_wake_if(server, &rdata->credits, 0);
3837                         kref_put(&rdata->refcount,
3838                                 cifs_uncached_readdata_release);
3839                         if (rc == -EAGAIN) {
3840                                 iov_iter_revert(&direct_iov, cur_len);
3841                                 continue;
3842                         }
3843                         break;
3844                 }
3845
3846                 list_add_tail(&rdata->list, rdata_list);
3847                 offset += cur_len;
3848                 len -= cur_len;
3849         } while (len > 0);
3850
3851         return rc;
3852 }
3853
3854 static void
3855 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3856 {
3857         struct cifs_readdata *rdata, *tmp;
3858         struct iov_iter *to = &ctx->iter;
3859         struct cifs_sb_info *cifs_sb;
3860         int rc;
3861
3862         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3863
3864         mutex_lock(&ctx->aio_mutex);
3865
3866         if (list_empty(&ctx->list)) {
3867                 mutex_unlock(&ctx->aio_mutex);
3868                 return;
3869         }
3870
3871         rc = ctx->rc;
3872         /* the loop below should proceed in the order of increasing offsets */
3873 again:
3874         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3875                 if (!rc) {
3876                         if (!try_wait_for_completion(&rdata->done)) {
3877                                 mutex_unlock(&ctx->aio_mutex);
3878                                 return;
3879                         }
3880
3881                         if (rdata->result == -EAGAIN) {
3882                                 /* resend call if it's a retryable error */
3883                                 struct list_head tmp_list;
3884                                 unsigned int got_bytes = rdata->got_bytes;
3885
3886                                 list_del_init(&rdata->list);
3887                                 INIT_LIST_HEAD(&tmp_list);
3888
3889                                 /*
3890                                  * Got a part of data and then reconnect has
3891                                  * happened -- fill the buffer and continue
3892                                  * reading.
3893                                  */
3894                                 if (got_bytes && got_bytes < rdata->bytes) {
3895                                         rc = 0;
3896                                         if (!ctx->direct_io)
3897                                                 rc = cifs_readdata_to_iov(rdata, to);
3898                                         if (rc) {
3899                                                 kref_put(&rdata->refcount,
3900                                                         cifs_uncached_readdata_release);
3901                                                 continue;
3902                                         }
3903                                 }
3904
3905                                 if (ctx->direct_io) {
3906                                         /*
3907                                          * Re-use rdata as this is a
3908                                          * direct I/O
3909                                          */
3910                                         rc = cifs_resend_rdata(
3911                                                 rdata,
3912                                                 &tmp_list, ctx);
3913                                 } else {
3914                                         rc = cifs_send_async_read(
3915                                                 rdata->offset + got_bytes,
3916                                                 rdata->bytes - got_bytes,
3917                                                 rdata->cfile, cifs_sb,
3918                                                 &tmp_list, ctx);
3919
3920                                         kref_put(&rdata->refcount,
3921                                                 cifs_uncached_readdata_release);
3922                                 }
3923
3924                                 list_splice(&tmp_list, &ctx->list);
3925
3926                                 goto again;
3927                         } else if (rdata->result)
3928                                 rc = rdata->result;
3929                         else if (!ctx->direct_io)
3930                                 rc = cifs_readdata_to_iov(rdata, to);
3931
3932                         /* if there was a short read -- discard anything left */
3933                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3934                                 rc = -ENODATA;
3935
3936                         ctx->total_len += rdata->got_bytes;
3937                 }
3938                 list_del_init(&rdata->list);
3939                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3940         }
3941
3942         if (!ctx->direct_io)
3943                 ctx->total_len = ctx->len - iov_iter_count(to);
3944
3945         /* mask nodata case */
3946         if (rc == -ENODATA)
3947                 rc = 0;
3948
3949         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3950
3951         mutex_unlock(&ctx->aio_mutex);
3952
3953         if (ctx->iocb && ctx->iocb->ki_complete)
3954                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3955         else
3956                 complete(&ctx->done);
3957 }
3958
3959 static ssize_t __cifs_readv(
3960         struct kiocb *iocb, struct iov_iter *to, bool direct)
3961 {
3962         size_t len;
3963         struct file *file = iocb->ki_filp;
3964         struct cifs_sb_info *cifs_sb;
3965         struct cifsFileInfo *cfile;
3966         struct cifs_tcon *tcon;
3967         ssize_t rc, total_read = 0;
3968         loff_t offset = iocb->ki_pos;
3969         struct cifs_aio_ctx *ctx;
3970
3971         /*
3972          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3973          * fall back to data copy read path
3974          * this could be improved by getting pages directly in ITER_KVEC
3975          */
3976         if (direct && iov_iter_is_kvec(to)) {
3977                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3978                 direct = false;
3979         }
3980
3981         len = iov_iter_count(to);
3982         if (!len)
3983                 return 0;
3984
3985         cifs_sb = CIFS_FILE_SB(file);
3986         cfile = file->private_data;
3987         tcon = tlink_tcon(cfile->tlink);
3988
3989         if (!tcon->ses->server->ops->async_readv)
3990                 return -ENOSYS;
3991
3992         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3993                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3994
3995         ctx = cifs_aio_ctx_alloc();
3996         if (!ctx)
3997                 return -ENOMEM;
3998
3999         ctx->cfile = cifsFileInfo_get(cfile);
4000
4001         if (!is_sync_kiocb(iocb))
4002                 ctx->iocb = iocb;
4003
4004         if (iter_is_iovec(to))
4005                 ctx->should_dirty = true;
4006
4007         if (direct) {
4008                 ctx->pos = offset;
4009                 ctx->direct_io = true;
4010                 ctx->iter = *to;
4011                 ctx->len = len;
4012         } else {
4013                 rc = setup_aio_ctx_iter(ctx, to, READ);
4014                 if (rc) {
4015                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4016                         return rc;
4017                 }
4018                 len = ctx->len;
4019         }
4020
4021         /* grab a lock here due to read response handlers can access ctx */
4022         mutex_lock(&ctx->aio_mutex);
4023
4024         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4025
4026         /* if at least one read request send succeeded, then reset rc */
4027         if (!list_empty(&ctx->list))
4028                 rc = 0;
4029
4030         mutex_unlock(&ctx->aio_mutex);
4031
4032         if (rc) {
4033                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4034                 return rc;
4035         }
4036
4037         if (!is_sync_kiocb(iocb)) {
4038                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4039                 return -EIOCBQUEUED;
4040         }
4041
4042         rc = wait_for_completion_killable(&ctx->done);
4043         if (rc) {
4044                 mutex_lock(&ctx->aio_mutex);
4045                 ctx->rc = rc = -EINTR;
4046                 total_read = ctx->total_len;
4047                 mutex_unlock(&ctx->aio_mutex);
4048         } else {
4049                 rc = ctx->rc;
4050                 total_read = ctx->total_len;
4051         }
4052
4053         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4054
4055         if (total_read) {
4056                 iocb->ki_pos += total_read;
4057                 return total_read;
4058         }
4059         return rc;
4060 }
4061
4062 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4063 {
4064         return __cifs_readv(iocb, to, true);
4065 }
4066
4067 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4068 {
4069         return __cifs_readv(iocb, to, false);
4070 }
4071
4072 ssize_t
4073 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4074 {
4075         struct inode *inode = file_inode(iocb->ki_filp);
4076         struct cifsInodeInfo *cinode = CIFS_I(inode);
4077         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4078         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4079                                                 iocb->ki_filp->private_data;
4080         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4081         int rc = -EACCES;
4082
4083         /*
4084          * In strict cache mode we need to read from the server all the time
4085          * if we don't have level II oplock because the server can delay mtime
4086          * change - so we can't make a decision about inode invalidating.
4087          * And we can also fail with pagereading if there are mandatory locks
4088          * on pages affected by this read but not on the region from pos to
4089          * pos+len-1.
4090          */
4091         if (!CIFS_CACHE_READ(cinode))
4092                 return cifs_user_readv(iocb, to);
4093
4094         if (cap_unix(tcon->ses) &&
4095             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4096             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4097                 return generic_file_read_iter(iocb, to);
4098
4099         /*
4100          * We need to hold the sem to be sure nobody modifies lock list
4101          * with a brlock that prevents reading.
4102          */
4103         down_read(&cinode->lock_sem);
4104         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4105                                      tcon->ses->server->vals->shared_lock_type,
4106                                      0, NULL, CIFS_READ_OP))
4107                 rc = generic_file_read_iter(iocb, to);
4108         up_read(&cinode->lock_sem);
4109         return rc;
4110 }
4111
4112 static ssize_t
4113 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4114 {
4115         int rc = -EACCES;
4116         unsigned int bytes_read = 0;
4117         unsigned int total_read;
4118         unsigned int current_read_size;
4119         unsigned int rsize;
4120         struct cifs_sb_info *cifs_sb;
4121         struct cifs_tcon *tcon;
4122         struct TCP_Server_Info *server;
4123         unsigned int xid;
4124         char *cur_offset;
4125         struct cifsFileInfo *open_file;
4126         struct cifs_io_parms io_parms = {0};
4127         int buf_type = CIFS_NO_BUFFER;
4128         __u32 pid;
4129
4130         xid = get_xid();
4131         cifs_sb = CIFS_FILE_SB(file);
4132
4133         /* FIXME: set up handlers for larger reads and/or convert to async */
4134         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4135
4136         if (file->private_data == NULL) {
4137                 rc = -EBADF;
4138                 free_xid(xid);
4139                 return rc;
4140         }
4141         open_file = file->private_data;
4142         tcon = tlink_tcon(open_file->tlink);
4143         server = cifs_pick_channel(tcon->ses);
4144
4145         if (!server->ops->sync_read) {
4146                 free_xid(xid);
4147                 return -ENOSYS;
4148         }
4149
4150         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4151                 pid = open_file->pid;
4152         else
4153                 pid = current->tgid;
4154
4155         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4156                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4157
4158         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4159              total_read += bytes_read, cur_offset += bytes_read) {
4160                 do {
4161                         current_read_size = min_t(uint, read_size - total_read,
4162                                                   rsize);
4163                         /*
4164                          * For windows me and 9x we do not want to request more
4165                          * than it negotiated since it will refuse the read
4166                          * then.
4167                          */
4168                         if (!(tcon->ses->capabilities &
4169                                 tcon->ses->server->vals->cap_large_files)) {
4170                                 current_read_size = min_t(uint,
4171                                         current_read_size, CIFSMaxBufSize);
4172                         }
4173                         if (open_file->invalidHandle) {
4174                                 rc = cifs_reopen_file(open_file, true);
4175                                 if (rc != 0)
4176                                         break;
4177                         }
4178                         io_parms.pid = pid;
4179                         io_parms.tcon = tcon;
4180                         io_parms.offset = *offset;
4181                         io_parms.length = current_read_size;
4182                         io_parms.server = server;
4183                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4184                                                     &bytes_read, &cur_offset,
4185                                                     &buf_type);
4186                 } while (rc == -EAGAIN);
4187
4188                 if (rc || (bytes_read == 0)) {
4189                         if (total_read) {
4190                                 break;
4191                         } else {
4192                                 free_xid(xid);
4193                                 return rc;
4194                         }
4195                 } else {
4196                         cifs_stats_bytes_read(tcon, total_read);
4197                         *offset += bytes_read;
4198                 }
4199         }
4200         free_xid(xid);
4201         return total_read;
4202 }
4203
4204 /*
4205  * If the page is mmap'ed into a process' page tables, then we need to make
4206  * sure that it doesn't change while being written back.
4207  */
4208 static vm_fault_t
4209 cifs_page_mkwrite(struct vm_fault *vmf)
4210 {
4211         struct page *page = vmf->page;
4212
4213         /* Wait for the page to be written to the cache before we allow it to
4214          * be modified.  We then assume the entire page will need writing back.
4215          */
4216 #ifdef CONFIG_CIFS_FSCACHE
4217         if (PageFsCache(page) &&
4218             wait_on_page_fscache_killable(page) < 0)
4219                 return VM_FAULT_RETRY;
4220 #endif
4221
4222         wait_on_page_writeback(page);
4223
4224         if (lock_page_killable(page) < 0)
4225                 return VM_FAULT_RETRY;
4226         return VM_FAULT_LOCKED;
4227 }
4228
4229 static const struct vm_operations_struct cifs_file_vm_ops = {
4230         .fault = filemap_fault,
4231         .map_pages = filemap_map_pages,
4232         .page_mkwrite = cifs_page_mkwrite,
4233 };
4234
4235 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4236 {
4237         int xid, rc = 0;
4238         struct inode *inode = file_inode(file);
4239
4240         xid = get_xid();
4241
4242         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4243                 rc = cifs_zap_mapping(inode);
4244         if (!rc)
4245                 rc = generic_file_mmap(file, vma);
4246         if (!rc)
4247                 vma->vm_ops = &cifs_file_vm_ops;
4248
4249         free_xid(xid);
4250         return rc;
4251 }
4252
4253 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4254 {
4255         int rc, xid;
4256
4257         xid = get_xid();
4258
4259         rc = cifs_revalidate_file(file);
4260         if (rc)
4261                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4262                          rc);
4263         if (!rc)
4264                 rc = generic_file_mmap(file, vma);
4265         if (!rc)
4266                 vma->vm_ops = &cifs_file_vm_ops;
4267
4268         free_xid(xid);
4269         return rc;
4270 }
4271
4272 static void
4273 cifs_readv_complete(struct work_struct *work)
4274 {
4275         unsigned int i, got_bytes;
4276         struct cifs_readdata *rdata = container_of(work,
4277                                                 struct cifs_readdata, work);
4278
4279         got_bytes = rdata->got_bytes;
4280         for (i = 0; i < rdata->nr_pages; i++) {
4281                 struct page *page = rdata->pages[i];
4282
4283                 if (rdata->result == 0 ||
4284                     (rdata->result == -EAGAIN && got_bytes)) {
4285                         flush_dcache_page(page);
4286                         SetPageUptodate(page);
4287                 } else
4288                         SetPageError(page);
4289
4290                 if (rdata->result == 0 ||
4291                     (rdata->result == -EAGAIN && got_bytes))
4292                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4293
4294                 unlock_page(page);
4295
4296                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4297
4298                 put_page(page);
4299                 rdata->pages[i] = NULL;
4300         }
4301         kref_put(&rdata->refcount, cifs_readdata_release);
4302 }
4303
4304 static int
4305 readpages_fill_pages(struct TCP_Server_Info *server,
4306                      struct cifs_readdata *rdata, struct iov_iter *iter,
4307                      unsigned int len)
4308 {
4309         int result = 0;
4310         unsigned int i;
4311         u64 eof;
4312         pgoff_t eof_index;
4313         unsigned int nr_pages = rdata->nr_pages;
4314         unsigned int page_offset = rdata->page_offset;
4315
4316         /* determine the eof that the server (probably) has */
4317         eof = CIFS_I(rdata->mapping->host)->server_eof;
4318         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4319         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4320
4321         rdata->got_bytes = 0;
4322         rdata->tailsz = PAGE_SIZE;
4323         for (i = 0; i < nr_pages; i++) {
4324                 struct page *page = rdata->pages[i];
4325                 unsigned int to_read = rdata->pagesz;
4326                 size_t n;
4327
4328                 if (i == 0)
4329                         to_read -= page_offset;
4330                 else
4331                         page_offset = 0;
4332
4333                 n = to_read;
4334
4335                 if (len >= to_read) {
4336                         len -= to_read;
4337                 } else if (len > 0) {
4338                         /* enough for partial page, fill and zero the rest */
4339                         zero_user(page, len + page_offset, to_read - len);
4340                         n = rdata->tailsz = len;
4341                         len = 0;
4342                 } else if (page->index > eof_index) {
4343                         /*
4344                          * The VFS will not try to do readahead past the
4345                          * i_size, but it's possible that we have outstanding
4346                          * writes with gaps in the middle and the i_size hasn't
4347                          * caught up yet. Populate those with zeroed out pages
4348                          * to prevent the VFS from repeatedly attempting to
4349                          * fill them until the writes are flushed.
4350                          */
4351                         zero_user(page, 0, PAGE_SIZE);
4352                         flush_dcache_page(page);
4353                         SetPageUptodate(page);
4354                         unlock_page(page);
4355                         put_page(page);
4356                         rdata->pages[i] = NULL;
4357                         rdata->nr_pages--;
4358                         continue;
4359                 } else {
4360                         /* no need to hold page hostage */
4361                         unlock_page(page);
4362                         put_page(page);
4363                         rdata->pages[i] = NULL;
4364                         rdata->nr_pages--;
4365                         continue;
4366                 }
4367
4368                 if (iter)
4369                         result = copy_page_from_iter(
4370                                         page, page_offset, n, iter);
4371 #ifdef CONFIG_CIFS_SMB_DIRECT
4372                 else if (rdata->mr)
4373                         result = n;
4374 #endif
4375                 else
4376                         result = cifs_read_page_from_socket(
4377                                         server, page, page_offset, n);
4378                 if (result < 0)
4379                         break;
4380
4381                 rdata->got_bytes += result;
4382         }
4383
4384         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4385                                                 rdata->got_bytes : result;
4386 }
4387
4388 static int
4389 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4390                                struct cifs_readdata *rdata, unsigned int len)
4391 {
4392         return readpages_fill_pages(server, rdata, NULL, len);
4393 }
4394
4395 static int
4396 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4397                                struct cifs_readdata *rdata,
4398                                struct iov_iter *iter)
4399 {
4400         return readpages_fill_pages(server, rdata, iter, iter->count);
4401 }
4402
4403 static void cifs_readahead(struct readahead_control *ractl)
4404 {
4405         int rc;
4406         struct cifsFileInfo *open_file = ractl->file->private_data;
4407         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4408         struct TCP_Server_Info *server;
4409         pid_t pid;
4410         unsigned int xid, nr_pages, last_batch_size = 0, cache_nr_pages = 0;
4411         pgoff_t next_cached = ULONG_MAX;
4412         bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4413                 cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4414         bool check_cache = caching;
4415
4416         xid = get_xid();
4417
4418         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4419                 pid = open_file->pid;
4420         else
4421                 pid = current->tgid;
4422
4423         rc = 0;
4424         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4425
4426         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4427                  __func__, ractl->file, ractl->mapping, readahead_count(ractl));
4428
4429         /*
4430          * Chop the readahead request up into rsize-sized read requests.
4431          */
4432         while ((nr_pages = readahead_count(ractl) - last_batch_size)) {
4433                 unsigned int i, got, rsize;
4434                 struct page *page;
4435                 struct cifs_readdata *rdata;
4436                 struct cifs_credits credits_on_stack;
4437                 struct cifs_credits *credits = &credits_on_stack;
4438                 pgoff_t index = readahead_index(ractl) + last_batch_size;
4439
4440                 /*
4441                  * Find out if we have anything cached in the range of
4442                  * interest, and if so, where the next chunk of cached data is.
4443                  */
4444                 if (caching) {
4445                         if (check_cache) {
4446                                 rc = cifs_fscache_query_occupancy(
4447                                         ractl->mapping->host, index, nr_pages,
4448                                         &next_cached, &cache_nr_pages);
4449                                 if (rc < 0)
4450                                         caching = false;
4451                                 check_cache = false;
4452                         }
4453
4454                         if (index == next_cached) {
4455                                 /*
4456                                  * TODO: Send a whole batch of pages to be read
4457                                  * by the cache.
4458                                  */
4459                                 page = readahead_page(ractl);
4460                                 last_batch_size = 1 << thp_order(page);
4461                                 if (cifs_readpage_from_fscache(ractl->mapping->host,
4462                                                                page) < 0) {
4463                                         /*
4464                                          * TODO: Deal with cache read failure
4465                                          * here, but for the moment, delegate
4466                                          * that to readpage.
4467                                          */
4468                                         caching = false;
4469                                 }
4470                                 unlock_page(page);
4471                                 next_cached++;
4472                                 cache_nr_pages--;
4473                                 if (cache_nr_pages == 0)
4474                                         check_cache = true;
4475                                 continue;
4476                         }
4477                 }
4478
4479                 if (open_file->invalidHandle) {
4480                         rc = cifs_reopen_file(open_file, true);
4481                         if (rc) {
4482                                 if (rc == -EAGAIN)
4483                                         continue;
4484                                 break;
4485                         }
4486                 }
4487
4488                 if (cifs_sb->ctx->rsize == 0)
4489                         cifs_sb->ctx->rsize =
4490                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4491                                                              cifs_sb->ctx);
4492
4493                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4494                                                    &rsize, credits);
4495                 if (rc)
4496                         break;
4497                 nr_pages = min_t(size_t, rsize / PAGE_SIZE, readahead_count(ractl));
4498                 nr_pages = min_t(size_t, nr_pages, next_cached - index);
4499
4500                 /*
4501                  * Give up immediately if rsize is too small to read an entire
4502                  * page. The VFS will fall back to readpage. We should never
4503                  * reach this point however since we set ra_pages to 0 when the
4504                  * rsize is smaller than a cache page.
4505                  */
4506                 if (unlikely(!nr_pages)) {
4507                         add_credits_and_wake_if(server, credits, 0);
4508                         break;
4509                 }
4510
4511                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4512                 if (!rdata) {
4513                         /* best to give up if we're out of mem */
4514                         add_credits_and_wake_if(server, credits, 0);
4515                         break;
4516                 }
4517
4518                 got = __readahead_batch(ractl, rdata->pages, nr_pages);
4519                 if (got != nr_pages) {
4520                         pr_warn("__readahead_batch() returned %u/%u\n",
4521                                 got, nr_pages);
4522                         nr_pages = got;
4523                 }
4524
4525                 rdata->nr_pages = nr_pages;
4526                 rdata->bytes    = readahead_batch_length(ractl);
4527                 rdata->cfile    = cifsFileInfo_get(open_file);
4528                 rdata->server   = server;
4529                 rdata->mapping  = ractl->mapping;
4530                 rdata->offset   = readahead_pos(ractl);
4531                 rdata->pid      = pid;
4532                 rdata->pagesz   = PAGE_SIZE;
4533                 rdata->tailsz   = PAGE_SIZE;
4534                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4535                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4536                 rdata->credits  = credits_on_stack;
4537
4538                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4539                 if (!rc) {
4540                         if (rdata->cfile->invalidHandle)
4541                                 rc = -EAGAIN;
4542                         else
4543                                 rc = server->ops->async_readv(rdata);
4544                 }
4545
4546                 if (rc) {
4547                         add_credits_and_wake_if(server, &rdata->credits, 0);
4548                         for (i = 0; i < rdata->nr_pages; i++) {
4549                                 page = rdata->pages[i];
4550                                 unlock_page(page);
4551                                 put_page(page);
4552                         }
4553                         /* Fallback to the readpage in error/reconnect cases */
4554                         kref_put(&rdata->refcount, cifs_readdata_release);
4555                         break;
4556                 }
4557
4558                 kref_put(&rdata->refcount, cifs_readdata_release);
4559                 last_batch_size = nr_pages;
4560         }
4561
4562         free_xid(xid);
4563 }
4564
4565 /*
4566  * cifs_readpage_worker must be called with the page pinned
4567  */
4568 static int cifs_readpage_worker(struct file *file, struct page *page,
4569         loff_t *poffset)
4570 {
4571         char *read_data;
4572         int rc;
4573
4574         /* Is the page cached? */
4575         rc = cifs_readpage_from_fscache(file_inode(file), page);
4576         if (rc == 0)
4577                 goto read_complete;
4578
4579         read_data = kmap(page);
4580         /* for reads over a certain size could initiate async read ahead */
4581
4582         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4583
4584         if (rc < 0)
4585                 goto io_error;
4586         else
4587                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4588
4589         /* we do not want atime to be less than mtime, it broke some apps */
4590         file_inode(file)->i_atime = current_time(file_inode(file));
4591         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4592                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4593         else
4594                 file_inode(file)->i_atime = current_time(file_inode(file));
4595
4596         if (PAGE_SIZE > rc)
4597                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4598
4599         flush_dcache_page(page);
4600         SetPageUptodate(page);
4601
4602         /* send this page to the cache */
4603         cifs_readpage_to_fscache(file_inode(file), page);
4604
4605         rc = 0;
4606
4607 io_error:
4608         kunmap(page);
4609         unlock_page(page);
4610
4611 read_complete:
4612         return rc;
4613 }
4614
4615 static int cifs_readpage(struct file *file, struct page *page)
4616 {
4617         loff_t offset = page_file_offset(page);
4618         int rc = -EACCES;
4619         unsigned int xid;
4620
4621         xid = get_xid();
4622
4623         if (file->private_data == NULL) {
4624                 rc = -EBADF;
4625                 free_xid(xid);
4626                 return rc;
4627         }
4628
4629         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4630                  page, (int)offset, (int)offset);
4631
4632         rc = cifs_readpage_worker(file, page, &offset);
4633
4634         free_xid(xid);
4635         return rc;
4636 }
4637
4638 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4639 {
4640         struct cifsFileInfo *open_file;
4641
4642         spin_lock(&cifs_inode->open_file_lock);
4643         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4644                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4645                         spin_unlock(&cifs_inode->open_file_lock);
4646                         return 1;
4647                 }
4648         }
4649         spin_unlock(&cifs_inode->open_file_lock);
4650         return 0;
4651 }
4652
4653 /* We do not want to update the file size from server for inodes
4654    open for write - to avoid races with writepage extending
4655    the file - in the future we could consider allowing
4656    refreshing the inode only on increases in the file size
4657    but this is tricky to do without racing with writebehind
4658    page caching in the current Linux kernel design */
4659 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4660 {
4661         if (!cifsInode)
4662                 return true;
4663
4664         if (is_inode_writable(cifsInode)) {
4665                 /* This inode is open for write at least once */
4666                 struct cifs_sb_info *cifs_sb;
4667
4668                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4669                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4670                         /* since no page cache to corrupt on directio
4671                         we can change size safely */
4672                         return true;
4673                 }
4674
4675                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4676                         return true;
4677
4678                 return false;
4679         } else
4680                 return true;
4681 }
4682
4683 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4684                         loff_t pos, unsigned len, unsigned flags,
4685                         struct page **pagep, void **fsdata)
4686 {
4687         int oncethru = 0;
4688         pgoff_t index = pos >> PAGE_SHIFT;
4689         loff_t offset = pos & (PAGE_SIZE - 1);
4690         loff_t page_start = pos & PAGE_MASK;
4691         loff_t i_size;
4692         struct page *page;
4693         int rc = 0;
4694
4695         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4696
4697 start:
4698         page = grab_cache_page_write_begin(mapping, index, flags);
4699         if (!page) {
4700                 rc = -ENOMEM;
4701                 goto out;
4702         }
4703
4704         if (PageUptodate(page))
4705                 goto out;
4706
4707         /*
4708          * If we write a full page it will be up to date, no need to read from
4709          * the server. If the write is short, we'll end up doing a sync write
4710          * instead.
4711          */
4712         if (len == PAGE_SIZE)
4713                 goto out;
4714
4715         /*
4716          * optimize away the read when we have an oplock, and we're not
4717          * expecting to use any of the data we'd be reading in. That
4718          * is, when the page lies beyond the EOF, or straddles the EOF
4719          * and the write will cover all of the existing data.
4720          */
4721         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4722                 i_size = i_size_read(mapping->host);
4723                 if (page_start >= i_size ||
4724                     (offset == 0 && (pos + len) >= i_size)) {
4725                         zero_user_segments(page, 0, offset,
4726                                            offset + len,
4727                                            PAGE_SIZE);
4728                         /*
4729                          * PageChecked means that the parts of the page
4730                          * to which we're not writing are considered up
4731                          * to date. Once the data is copied to the
4732                          * page, it can be set uptodate.
4733                          */
4734                         SetPageChecked(page);
4735                         goto out;
4736                 }
4737         }
4738
4739         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4740                 /*
4741                  * might as well read a page, it is fast enough. If we get
4742                  * an error, we don't need to return it. cifs_write_end will
4743                  * do a sync write instead since PG_uptodate isn't set.
4744                  */
4745                 cifs_readpage_worker(file, page, &page_start);
4746                 put_page(page);
4747                 oncethru = 1;
4748                 goto start;
4749         } else {
4750                 /* we could try using another file handle if there is one -
4751                    but how would we lock it to prevent close of that handle
4752                    racing with this read? In any case
4753                    this will be written out by write_end so is fine */
4754         }
4755 out:
4756         *pagep = page;
4757         return rc;
4758 }
4759
4760 static int cifs_release_page(struct page *page, gfp_t gfp)
4761 {
4762         if (PagePrivate(page))
4763                 return 0;
4764         if (PageFsCache(page)) {
4765                 if (current_is_kswapd() || !(gfp & __GFP_FS))
4766                         return false;
4767                 wait_on_page_fscache(page);
4768         }
4769         fscache_note_page_release(cifs_inode_cookie(page->mapping->host));
4770         return true;
4771 }
4772
4773 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4774                                  size_t length)
4775 {
4776         folio_wait_fscache(folio);
4777 }
4778
4779 static int cifs_launder_folio(struct folio *folio)
4780 {
4781         int rc = 0;
4782         loff_t range_start = folio_pos(folio);
4783         loff_t range_end = range_start + folio_size(folio);
4784         struct writeback_control wbc = {
4785                 .sync_mode = WB_SYNC_ALL,
4786                 .nr_to_write = 0,
4787                 .range_start = range_start,
4788                 .range_end = range_end,
4789         };
4790
4791         cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
4792
4793         if (folio_clear_dirty_for_io(folio))
4794                 rc = cifs_writepage_locked(&folio->page, &wbc);
4795
4796         folio_wait_fscache(folio);
4797         return rc;
4798 }
4799
4800 void cifs_oplock_break(struct work_struct *work)
4801 {
4802         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4803                                                   oplock_break);
4804         struct inode *inode = d_inode(cfile->dentry);
4805         struct cifsInodeInfo *cinode = CIFS_I(inode);
4806         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4807         struct TCP_Server_Info *server = tcon->ses->server;
4808         int rc = 0;
4809         bool purge_cache = false;
4810         bool is_deferred = false;
4811         struct cifs_deferred_close *dclose;
4812
4813         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4814                         TASK_UNINTERRUPTIBLE);
4815
4816         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4817                                       cfile->oplock_epoch, &purge_cache);
4818
4819         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4820                                                 cifs_has_mand_locks(cinode)) {
4821                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4822                          inode);
4823                 cinode->oplock = 0;
4824         }
4825
4826         if (inode && S_ISREG(inode->i_mode)) {
4827                 if (CIFS_CACHE_READ(cinode))
4828                         break_lease(inode, O_RDONLY);
4829                 else
4830                         break_lease(inode, O_WRONLY);
4831                 rc = filemap_fdatawrite(inode->i_mapping);
4832                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4833                         rc = filemap_fdatawait(inode->i_mapping);
4834                         mapping_set_error(inode->i_mapping, rc);
4835                         cifs_zap_mapping(inode);
4836                 }
4837                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4838                 if (CIFS_CACHE_WRITE(cinode))
4839                         goto oplock_break_ack;
4840         }
4841
4842         rc = cifs_push_locks(cfile);
4843         if (rc)
4844                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4845
4846 oplock_break_ack:
4847         /*
4848          * When oplock break is received and there are no active
4849          * file handles but cached, then schedule deferred close immediately.
4850          * So, new open will not use cached handle.
4851          */
4852         spin_lock(&CIFS_I(inode)->deferred_lock);
4853         is_deferred = cifs_is_deferred_close(cfile, &dclose);
4854         spin_unlock(&CIFS_I(inode)->deferred_lock);
4855         if (is_deferred &&
4856             cfile->deferred_close_scheduled &&
4857             delayed_work_pending(&cfile->deferred)) {
4858                 if (cancel_delayed_work(&cfile->deferred)) {
4859                         _cifsFileInfo_put(cfile, false, false);
4860                         goto oplock_break_done;
4861                 }
4862         }
4863         /*
4864          * releasing stale oplock after recent reconnect of smb session using
4865          * a now incorrect file handle is not a data integrity issue but do
4866          * not bother sending an oplock release if session to server still is
4867          * disconnected since oplock already released by the server
4868          */
4869         if (!cfile->oplock_break_cancelled) {
4870                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4871                                                              cinode);
4872                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4873         }
4874 oplock_break_done:
4875         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4876         cifs_done_oplock_break(cinode);
4877 }
4878
4879 /*
4880  * The presence of cifs_direct_io() in the address space ops vector
4881  * allowes open() O_DIRECT flags which would have failed otherwise.
4882  *
4883  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4884  * so this method should never be called.
4885  *
4886  * Direct IO is not yet supported in the cached mode.
4887  */
4888 static ssize_t
4889 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4890 {
4891         /*
4892          * FIXME
4893          * Eventually need to support direct IO for non forcedirectio mounts
4894          */
4895         return -EINVAL;
4896 }
4897
4898 static int cifs_swap_activate(struct swap_info_struct *sis,
4899                               struct file *swap_file, sector_t *span)
4900 {
4901         struct cifsFileInfo *cfile = swap_file->private_data;
4902         struct inode *inode = swap_file->f_mapping->host;
4903         unsigned long blocks;
4904         long long isize;
4905
4906         cifs_dbg(FYI, "swap activate\n");
4907
4908         spin_lock(&inode->i_lock);
4909         blocks = inode->i_blocks;
4910         isize = inode->i_size;
4911         spin_unlock(&inode->i_lock);
4912         if (blocks*512 < isize) {
4913                 pr_warn("swap activate: swapfile has holes\n");
4914                 return -EINVAL;
4915         }
4916         *span = sis->pages;
4917
4918         pr_warn_once("Swap support over SMB3 is experimental\n");
4919
4920         /*
4921          * TODO: consider adding ACL (or documenting how) to prevent other
4922          * users (on this or other systems) from reading it
4923          */
4924
4925
4926         /* TODO: add sk_set_memalloc(inet) or similar */
4927
4928         if (cfile)
4929                 cfile->swapfile = true;
4930         /*
4931          * TODO: Since file already open, we can't open with DENY_ALL here
4932          * but we could add call to grab a byte range lock to prevent others
4933          * from reading or writing the file
4934          */
4935
4936         return 0;
4937 }
4938
4939 static void cifs_swap_deactivate(struct file *file)
4940 {
4941         struct cifsFileInfo *cfile = file->private_data;
4942
4943         cifs_dbg(FYI, "swap deactivate\n");
4944
4945         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4946
4947         if (cfile)
4948                 cfile->swapfile = false;
4949
4950         /* do we need to unpin (or unlock) the file */
4951 }
4952
4953 /*
4954  * Mark a page as having been made dirty and thus needing writeback.  We also
4955  * need to pin the cache object to write back to.
4956  */
4957 #ifdef CONFIG_CIFS_FSCACHE
4958 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
4959 {
4960         return fscache_dirty_folio(mapping, folio,
4961                                         cifs_inode_cookie(mapping->host));
4962 }
4963 #else
4964 #define cifs_dirty_folio filemap_dirty_folio
4965 #endif
4966
4967 const struct address_space_operations cifs_addr_ops = {
4968         .readpage = cifs_readpage,
4969         .readahead = cifs_readahead,
4970         .writepage = cifs_writepage,
4971         .writepages = cifs_writepages,
4972         .write_begin = cifs_write_begin,
4973         .write_end = cifs_write_end,
4974         .dirty_folio = cifs_dirty_folio,
4975         .releasepage = cifs_release_page,
4976         .direct_IO = cifs_direct_io,
4977         .invalidate_folio = cifs_invalidate_folio,
4978         .launder_folio = cifs_launder_folio,
4979         /*
4980          * TODO: investigate and if useful we could add an cifs_migratePage
4981          * helper (under an CONFIG_MIGRATION) in the future, and also
4982          * investigate and add an is_dirty_writeback helper if needed
4983          */
4984         .swap_activate = cifs_swap_activate,
4985         .swap_deactivate = cifs_swap_deactivate,
4986 };
4987
4988 /*
4989  * cifs_readpages requires the server to support a buffer large enough to
4990  * contain the header plus one complete page of data.  Otherwise, we need
4991  * to leave cifs_readpages out of the address space operations.
4992  */
4993 const struct address_space_operations cifs_addr_ops_smallbuf = {
4994         .readpage = cifs_readpage,
4995         .writepage = cifs_writepage,
4996         .writepages = cifs_writepages,
4997         .write_begin = cifs_write_begin,
4998         .write_end = cifs_write_end,
4999         .dirty_folio = cifs_dirty_folio,
5000         .releasepage = cifs_release_page,
5001         .invalidate_folio = cifs_invalidate_folio,
5002         .launder_folio = cifs_launder_folio,
5003 };