Merge tag '5.15-rc-first-ksmbd-merge' of git://git.samba.org/ksmbd
[linux-2.6-microblaze.git] / fs / cifs / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *   fs/cifs/file.c
4  *
5  *   vfs operations that deal with files
6  *
7  *   Copyright (C) International Business Machines  Corp., 2002,2010
8  *   Author(s): Steve French (sfrench@us.ibm.com)
9  *              Jeremy Allison (jra@samba.org)
10  *
11  */
12 #include <linux/fs.h>
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
24 #include <linux/mm.h>
25 #include <asm/div64.h>
26 #include "cifsfs.h"
27 #include "cifspdu.h"
28 #include "cifsglob.h"
29 #include "cifsproto.h"
30 #include "cifs_unicode.h"
31 #include "cifs_debug.h"
32 #include "cifs_fs_sb.h"
33 #include "fscache.h"
34 #include "smbdirect.h"
35 #include "fs_context.h"
36 #include "cifs_ioctl.h"
37
38 static inline int cifs_convert_flags(unsigned int flags)
39 {
40         if ((flags & O_ACCMODE) == O_RDONLY)
41                 return GENERIC_READ;
42         else if ((flags & O_ACCMODE) == O_WRONLY)
43                 return GENERIC_WRITE;
44         else if ((flags & O_ACCMODE) == O_RDWR) {
45                 /* GENERIC_ALL is too much permission to request
46                    can cause unnecessary access denied on create */
47                 /* return GENERIC_ALL; */
48                 return (GENERIC_READ | GENERIC_WRITE);
49         }
50
51         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
52                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
53                 FILE_READ_DATA);
54 }
55
56 static u32 cifs_posix_convert_flags(unsigned int flags)
57 {
58         u32 posix_flags = 0;
59
60         if ((flags & O_ACCMODE) == O_RDONLY)
61                 posix_flags = SMB_O_RDONLY;
62         else if ((flags & O_ACCMODE) == O_WRONLY)
63                 posix_flags = SMB_O_WRONLY;
64         else if ((flags & O_ACCMODE) == O_RDWR)
65                 posix_flags = SMB_O_RDWR;
66
67         if (flags & O_CREAT) {
68                 posix_flags |= SMB_O_CREAT;
69                 if (flags & O_EXCL)
70                         posix_flags |= SMB_O_EXCL;
71         } else if (flags & O_EXCL)
72                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
73                          current->comm, current->tgid);
74
75         if (flags & O_TRUNC)
76                 posix_flags |= SMB_O_TRUNC;
77         /* be safe and imply O_SYNC for O_DSYNC */
78         if (flags & O_DSYNC)
79                 posix_flags |= SMB_O_SYNC;
80         if (flags & O_DIRECTORY)
81                 posix_flags |= SMB_O_DIRECTORY;
82         if (flags & O_NOFOLLOW)
83                 posix_flags |= SMB_O_NOFOLLOW;
84         if (flags & O_DIRECT)
85                 posix_flags |= SMB_O_DIRECT;
86
87         return posix_flags;
88 }
89
90 static inline int cifs_get_disposition(unsigned int flags)
91 {
92         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
93                 return FILE_CREATE;
94         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
95                 return FILE_OVERWRITE_IF;
96         else if ((flags & O_CREAT) == O_CREAT)
97                 return FILE_OPEN_IF;
98         else if ((flags & O_TRUNC) == O_TRUNC)
99                 return FILE_OVERWRITE;
100         else
101                 return FILE_OPEN;
102 }
103
104 int cifs_posix_open(const char *full_path, struct inode **pinode,
105                         struct super_block *sb, int mode, unsigned int f_flags,
106                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
107 {
108         int rc;
109         FILE_UNIX_BASIC_INFO *presp_data;
110         __u32 posix_flags = 0;
111         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
112         struct cifs_fattr fattr;
113         struct tcon_link *tlink;
114         struct cifs_tcon *tcon;
115
116         cifs_dbg(FYI, "posix open %s\n", full_path);
117
118         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
119         if (presp_data == NULL)
120                 return -ENOMEM;
121
122         tlink = cifs_sb_tlink(cifs_sb);
123         if (IS_ERR(tlink)) {
124                 rc = PTR_ERR(tlink);
125                 goto posix_open_ret;
126         }
127
128         tcon = tlink_tcon(tlink);
129         mode &= ~current_umask();
130
131         posix_flags = cifs_posix_convert_flags(f_flags);
132         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
133                              poplock, full_path, cifs_sb->local_nls,
134                              cifs_remap(cifs_sb));
135         cifs_put_tlink(tlink);
136
137         if (rc)
138                 goto posix_open_ret;
139
140         if (presp_data->Type == cpu_to_le32(-1))
141                 goto posix_open_ret; /* open ok, caller does qpathinfo */
142
143         if (!pinode)
144                 goto posix_open_ret; /* caller does not need info */
145
146         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
147
148         /* get new inode and set it up */
149         if (*pinode == NULL) {
150                 cifs_fill_uniqueid(sb, &fattr);
151                 *pinode = cifs_iget(sb, &fattr);
152                 if (!*pinode) {
153                         rc = -ENOMEM;
154                         goto posix_open_ret;
155                 }
156         } else {
157                 cifs_revalidate_mapping(*pinode);
158                 rc = cifs_fattr_to_inode(*pinode, &fattr);
159         }
160
161 posix_open_ret:
162         kfree(presp_data);
163         return rc;
164 }
165
166 static int
167 cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
168              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
169              struct cifs_fid *fid, unsigned int xid)
170 {
171         int rc;
172         int desired_access;
173         int disposition;
174         int create_options = CREATE_NOT_DIR;
175         FILE_ALL_INFO *buf;
176         struct TCP_Server_Info *server = tcon->ses->server;
177         struct cifs_open_parms oparms;
178
179         if (!server->ops->open)
180                 return -ENOSYS;
181
182         desired_access = cifs_convert_flags(f_flags);
183
184 /*********************************************************************
185  *  open flag mapping table:
186  *
187  *      POSIX Flag            CIFS Disposition
188  *      ----------            ----------------
189  *      O_CREAT               FILE_OPEN_IF
190  *      O_CREAT | O_EXCL      FILE_CREATE
191  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
192  *      O_TRUNC               FILE_OVERWRITE
193  *      none of the above     FILE_OPEN
194  *
195  *      Note that there is not a direct match between disposition
196  *      FILE_SUPERSEDE (ie create whether or not file exists although
197  *      O_CREAT | O_TRUNC is similar but truncates the existing
198  *      file rather than creating a new file as FILE_SUPERSEDE does
199  *      (which uses the attributes / metadata passed in on open call)
200  *?
201  *?  O_SYNC is a reasonable match to CIFS writethrough flag
202  *?  and the read write flags match reasonably.  O_LARGEFILE
203  *?  is irrelevant because largefile support is always used
204  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
205  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
206  *********************************************************************/
207
208         disposition = cifs_get_disposition(f_flags);
209
210         /* BB pass O_SYNC flag through on file attributes .. BB */
211
212         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
213         if (!buf)
214                 return -ENOMEM;
215
216         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
217         if (f_flags & O_SYNC)
218                 create_options |= CREATE_WRITE_THROUGH;
219
220         if (f_flags & O_DIRECT)
221                 create_options |= CREATE_NO_BUFFER;
222
223         oparms.tcon = tcon;
224         oparms.cifs_sb = cifs_sb;
225         oparms.desired_access = desired_access;
226         oparms.create_options = cifs_create_options(cifs_sb, create_options);
227         oparms.disposition = disposition;
228         oparms.path = full_path;
229         oparms.fid = fid;
230         oparms.reconnect = false;
231
232         rc = server->ops->open(xid, &oparms, oplock, buf);
233
234         if (rc)
235                 goto out;
236
237         /* TODO: Add support for calling posix query info but with passing in fid */
238         if (tcon->unix_ext)
239                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
240                                               xid);
241         else
242                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
243                                          xid, fid);
244
245         if (rc) {
246                 server->ops->close(xid, tcon, fid);
247                 if (rc == -ESTALE)
248                         rc = -EOPENSTALE;
249         }
250
251 out:
252         kfree(buf);
253         return rc;
254 }
255
256 static bool
257 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
258 {
259         struct cifs_fid_locks *cur;
260         bool has_locks = false;
261
262         down_read(&cinode->lock_sem);
263         list_for_each_entry(cur, &cinode->llist, llist) {
264                 if (!list_empty(&cur->locks)) {
265                         has_locks = true;
266                         break;
267                 }
268         }
269         up_read(&cinode->lock_sem);
270         return has_locks;
271 }
272
273 void
274 cifs_down_write(struct rw_semaphore *sem)
275 {
276         while (!down_write_trylock(sem))
277                 msleep(10);
278 }
279
280 static void cifsFileInfo_put_work(struct work_struct *work);
281
282 struct cifsFileInfo *
283 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
284                   struct tcon_link *tlink, __u32 oplock)
285 {
286         struct dentry *dentry = file_dentry(file);
287         struct inode *inode = d_inode(dentry);
288         struct cifsInodeInfo *cinode = CIFS_I(inode);
289         struct cifsFileInfo *cfile;
290         struct cifs_fid_locks *fdlocks;
291         struct cifs_tcon *tcon = tlink_tcon(tlink);
292         struct TCP_Server_Info *server = tcon->ses->server;
293
294         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
295         if (cfile == NULL)
296                 return cfile;
297
298         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
299         if (!fdlocks) {
300                 kfree(cfile);
301                 return NULL;
302         }
303
304         INIT_LIST_HEAD(&fdlocks->locks);
305         fdlocks->cfile = cfile;
306         cfile->llist = fdlocks;
307
308         cfile->count = 1;
309         cfile->pid = current->tgid;
310         cfile->uid = current_fsuid();
311         cfile->dentry = dget(dentry);
312         cfile->f_flags = file->f_flags;
313         cfile->invalidHandle = false;
314         cfile->deferred_close_scheduled = false;
315         cfile->tlink = cifs_get_tlink(tlink);
316         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
317         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
318         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
319         mutex_init(&cfile->fh_mutex);
320         spin_lock_init(&cfile->file_info_lock);
321
322         cifs_sb_active(inode->i_sb);
323
324         /*
325          * If the server returned a read oplock and we have mandatory brlocks,
326          * set oplock level to None.
327          */
328         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
329                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
330                 oplock = 0;
331         }
332
333         cifs_down_write(&cinode->lock_sem);
334         list_add(&fdlocks->llist, &cinode->llist);
335         up_write(&cinode->lock_sem);
336
337         spin_lock(&tcon->open_file_lock);
338         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
339                 oplock = fid->pending_open->oplock;
340         list_del(&fid->pending_open->olist);
341
342         fid->purge_cache = false;
343         server->ops->set_fid(cfile, fid, oplock);
344
345         list_add(&cfile->tlist, &tcon->openFileList);
346         atomic_inc(&tcon->num_local_opens);
347
348         /* if readable file instance put first in list*/
349         spin_lock(&cinode->open_file_lock);
350         if (file->f_mode & FMODE_READ)
351                 list_add(&cfile->flist, &cinode->openFileList);
352         else
353                 list_add_tail(&cfile->flist, &cinode->openFileList);
354         spin_unlock(&cinode->open_file_lock);
355         spin_unlock(&tcon->open_file_lock);
356
357         if (fid->purge_cache)
358                 cifs_zap_mapping(inode);
359
360         file->private_data = cfile;
361         return cfile;
362 }
363
364 struct cifsFileInfo *
365 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
366 {
367         spin_lock(&cifs_file->file_info_lock);
368         cifsFileInfo_get_locked(cifs_file);
369         spin_unlock(&cifs_file->file_info_lock);
370         return cifs_file;
371 }
372
373 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
374 {
375         struct inode *inode = d_inode(cifs_file->dentry);
376         struct cifsInodeInfo *cifsi = CIFS_I(inode);
377         struct cifsLockInfo *li, *tmp;
378         struct super_block *sb = inode->i_sb;
379
380         /*
381          * Delete any outstanding lock records. We'll lose them when the file
382          * is closed anyway.
383          */
384         cifs_down_write(&cifsi->lock_sem);
385         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
386                 list_del(&li->llist);
387                 cifs_del_lock_waiters(li);
388                 kfree(li);
389         }
390         list_del(&cifs_file->llist->llist);
391         kfree(cifs_file->llist);
392         up_write(&cifsi->lock_sem);
393
394         cifs_put_tlink(cifs_file->tlink);
395         dput(cifs_file->dentry);
396         cifs_sb_deactive(sb);
397         kfree(cifs_file);
398 }
399
400 static void cifsFileInfo_put_work(struct work_struct *work)
401 {
402         struct cifsFileInfo *cifs_file = container_of(work,
403                         struct cifsFileInfo, put);
404
405         cifsFileInfo_put_final(cifs_file);
406 }
407
408 /**
409  * cifsFileInfo_put - release a reference of file priv data
410  *
411  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
412  *
413  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
414  */
415 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
416 {
417         _cifsFileInfo_put(cifs_file, true, true);
418 }
419
420 /**
421  * _cifsFileInfo_put - release a reference of file priv data
422  *
423  * This may involve closing the filehandle @cifs_file out on the
424  * server. Must be called without holding tcon->open_file_lock,
425  * cinode->open_file_lock and cifs_file->file_info_lock.
426  *
427  * If @wait_for_oplock_handler is true and we are releasing the last
428  * reference, wait for any running oplock break handler of the file
429  * and cancel any pending one.
430  *
431  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
432  * @wait_oplock_handler: must be false if called from oplock_break_handler
433  * @offload:    not offloaded on close and oplock breaks
434  *
435  */
436 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
437                        bool wait_oplock_handler, bool offload)
438 {
439         struct inode *inode = d_inode(cifs_file->dentry);
440         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
441         struct TCP_Server_Info *server = tcon->ses->server;
442         struct cifsInodeInfo *cifsi = CIFS_I(inode);
443         struct super_block *sb = inode->i_sb;
444         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
445         struct cifs_fid fid;
446         struct cifs_pending_open open;
447         bool oplock_break_cancelled;
448
449         spin_lock(&tcon->open_file_lock);
450         spin_lock(&cifsi->open_file_lock);
451         spin_lock(&cifs_file->file_info_lock);
452         if (--cifs_file->count > 0) {
453                 spin_unlock(&cifs_file->file_info_lock);
454                 spin_unlock(&cifsi->open_file_lock);
455                 spin_unlock(&tcon->open_file_lock);
456                 return;
457         }
458         spin_unlock(&cifs_file->file_info_lock);
459
460         if (server->ops->get_lease_key)
461                 server->ops->get_lease_key(inode, &fid);
462
463         /* store open in pending opens to make sure we don't miss lease break */
464         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
465
466         /* remove it from the lists */
467         list_del(&cifs_file->flist);
468         list_del(&cifs_file->tlist);
469         atomic_dec(&tcon->num_local_opens);
470
471         if (list_empty(&cifsi->openFileList)) {
472                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
473                          d_inode(cifs_file->dentry));
474                 /*
475                  * In strict cache mode we need invalidate mapping on the last
476                  * close  because it may cause a error when we open this file
477                  * again and get at least level II oplock.
478                  */
479                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
480                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
481                 cifs_set_oplock_level(cifsi, 0);
482         }
483
484         spin_unlock(&cifsi->open_file_lock);
485         spin_unlock(&tcon->open_file_lock);
486
487         oplock_break_cancelled = wait_oplock_handler ?
488                 cancel_work_sync(&cifs_file->oplock_break) : false;
489
490         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
491                 struct TCP_Server_Info *server = tcon->ses->server;
492                 unsigned int xid;
493
494                 xid = get_xid();
495                 if (server->ops->close_getattr)
496                         server->ops->close_getattr(xid, tcon, cifs_file);
497                 else if (server->ops->close)
498                         server->ops->close(xid, tcon, &cifs_file->fid);
499                 _free_xid(xid);
500         }
501
502         if (oplock_break_cancelled)
503                 cifs_done_oplock_break(cifsi);
504
505         cifs_del_pending_open(&open);
506
507         if (offload)
508                 queue_work(fileinfo_put_wq, &cifs_file->put);
509         else
510                 cifsFileInfo_put_final(cifs_file);
511 }
512
513 int cifs_open(struct inode *inode, struct file *file)
514
515 {
516         int rc = -EACCES;
517         unsigned int xid;
518         __u32 oplock;
519         struct cifs_sb_info *cifs_sb;
520         struct TCP_Server_Info *server;
521         struct cifs_tcon *tcon;
522         struct tcon_link *tlink;
523         struct cifsFileInfo *cfile = NULL;
524         void *page;
525         const char *full_path;
526         bool posix_open_ok = false;
527         struct cifs_fid fid;
528         struct cifs_pending_open open;
529
530         xid = get_xid();
531
532         cifs_sb = CIFS_SB(inode->i_sb);
533         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
534                 free_xid(xid);
535                 return -EIO;
536         }
537
538         tlink = cifs_sb_tlink(cifs_sb);
539         if (IS_ERR(tlink)) {
540                 free_xid(xid);
541                 return PTR_ERR(tlink);
542         }
543         tcon = tlink_tcon(tlink);
544         server = tcon->ses->server;
545
546         page = alloc_dentry_path();
547         full_path = build_path_from_dentry(file_dentry(file), page);
548         if (IS_ERR(full_path)) {
549                 rc = PTR_ERR(full_path);
550                 goto out;
551         }
552
553         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
554                  inode, file->f_flags, full_path);
555
556         if (file->f_flags & O_DIRECT &&
557             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
558                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
559                         file->f_op = &cifs_file_direct_nobrl_ops;
560                 else
561                         file->f_op = &cifs_file_direct_ops;
562         }
563
564         /* Get the cached handle as SMB2 close is deferred */
565         rc = cifs_get_readable_path(tcon, full_path, &cfile);
566         if (rc == 0) {
567                 if (file->f_flags == cfile->f_flags) {
568                         file->private_data = cfile;
569                         spin_lock(&CIFS_I(inode)->deferred_lock);
570                         cifs_del_deferred_close(cfile);
571                         spin_unlock(&CIFS_I(inode)->deferred_lock);
572                         goto out;
573                 } else {
574                         _cifsFileInfo_put(cfile, true, false);
575                 }
576         }
577
578         if (server->oplocks)
579                 oplock = REQ_OPLOCK;
580         else
581                 oplock = 0;
582
583         if (!tcon->broken_posix_open && tcon->unix_ext &&
584             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
585                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
586                 /* can not refresh inode info since size could be stale */
587                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
588                                 cifs_sb->ctx->file_mode /* ignored */,
589                                 file->f_flags, &oplock, &fid.netfid, xid);
590                 if (rc == 0) {
591                         cifs_dbg(FYI, "posix open succeeded\n");
592                         posix_open_ok = true;
593                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
594                         if (tcon->ses->serverNOS)
595                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
596                                          tcon->ses->ip_addr,
597                                          tcon->ses->serverNOS);
598                         tcon->broken_posix_open = true;
599                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
600                          (rc != -EOPNOTSUPP)) /* path not found or net err */
601                         goto out;
602                 /*
603                  * Else fallthrough to retry open the old way on network i/o
604                  * or DFS errors.
605                  */
606         }
607
608         if (server->ops->get_lease_key)
609                 server->ops->get_lease_key(inode, &fid);
610
611         cifs_add_pending_open(&fid, tlink, &open);
612
613         if (!posix_open_ok) {
614                 if (server->ops->get_lease_key)
615                         server->ops->get_lease_key(inode, &fid);
616
617                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
618                                   file->f_flags, &oplock, &fid, xid);
619                 if (rc) {
620                         cifs_del_pending_open(&open);
621                         goto out;
622                 }
623         }
624
625         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
626         if (cfile == NULL) {
627                 if (server->ops->close)
628                         server->ops->close(xid, tcon, &fid);
629                 cifs_del_pending_open(&open);
630                 rc = -ENOMEM;
631                 goto out;
632         }
633
634         cifs_fscache_set_inode_cookie(inode, file);
635
636         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
637                 /*
638                  * Time to set mode which we can not set earlier due to
639                  * problems creating new read-only files.
640                  */
641                 struct cifs_unix_set_info_args args = {
642                         .mode   = inode->i_mode,
643                         .uid    = INVALID_UID, /* no change */
644                         .gid    = INVALID_GID, /* no change */
645                         .ctime  = NO_CHANGE_64,
646                         .atime  = NO_CHANGE_64,
647                         .mtime  = NO_CHANGE_64,
648                         .device = 0,
649                 };
650                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
651                                        cfile->pid);
652         }
653
654 out:
655         free_dentry_path(page);
656         free_xid(xid);
657         cifs_put_tlink(tlink);
658         return rc;
659 }
660
661 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
662
663 /*
664  * Try to reacquire byte range locks that were released when session
665  * to server was lost.
666  */
667 static int
668 cifs_relock_file(struct cifsFileInfo *cfile)
669 {
670         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
671         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
672         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
673         int rc = 0;
674
675         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
676         if (cinode->can_cache_brlcks) {
677                 /* can cache locks - no need to relock */
678                 up_read(&cinode->lock_sem);
679                 return rc;
680         }
681
682         if (cap_unix(tcon->ses) &&
683             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
684             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
685                 rc = cifs_push_posix_locks(cfile);
686         else
687                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
688
689         up_read(&cinode->lock_sem);
690         return rc;
691 }
692
693 static int
694 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
695 {
696         int rc = -EACCES;
697         unsigned int xid;
698         __u32 oplock;
699         struct cifs_sb_info *cifs_sb;
700         struct cifs_tcon *tcon;
701         struct TCP_Server_Info *server;
702         struct cifsInodeInfo *cinode;
703         struct inode *inode;
704         void *page;
705         const char *full_path;
706         int desired_access;
707         int disposition = FILE_OPEN;
708         int create_options = CREATE_NOT_DIR;
709         struct cifs_open_parms oparms;
710
711         xid = get_xid();
712         mutex_lock(&cfile->fh_mutex);
713         if (!cfile->invalidHandle) {
714                 mutex_unlock(&cfile->fh_mutex);
715                 free_xid(xid);
716                 return 0;
717         }
718
719         inode = d_inode(cfile->dentry);
720         cifs_sb = CIFS_SB(inode->i_sb);
721         tcon = tlink_tcon(cfile->tlink);
722         server = tcon->ses->server;
723
724         /*
725          * Can not grab rename sem here because various ops, including those
726          * that already have the rename sem can end up causing writepage to get
727          * called and if the server was down that means we end up here, and we
728          * can never tell if the caller already has the rename_sem.
729          */
730         page = alloc_dentry_path();
731         full_path = build_path_from_dentry(cfile->dentry, page);
732         if (IS_ERR(full_path)) {
733                 mutex_unlock(&cfile->fh_mutex);
734                 free_dentry_path(page);
735                 free_xid(xid);
736                 return PTR_ERR(full_path);
737         }
738
739         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
740                  inode, cfile->f_flags, full_path);
741
742         if (tcon->ses->server->oplocks)
743                 oplock = REQ_OPLOCK;
744         else
745                 oplock = 0;
746
747         if (tcon->unix_ext && cap_unix(tcon->ses) &&
748             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
749                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
750                 /*
751                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
752                  * original open. Must mask them off for a reopen.
753                  */
754                 unsigned int oflags = cfile->f_flags &
755                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
756
757                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
758                                      cifs_sb->ctx->file_mode /* ignored */,
759                                      oflags, &oplock, &cfile->fid.netfid, xid);
760                 if (rc == 0) {
761                         cifs_dbg(FYI, "posix reopen succeeded\n");
762                         oparms.reconnect = true;
763                         goto reopen_success;
764                 }
765                 /*
766                  * fallthrough to retry open the old way on errors, especially
767                  * in the reconnect path it is important to retry hard
768                  */
769         }
770
771         desired_access = cifs_convert_flags(cfile->f_flags);
772
773         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
774         if (cfile->f_flags & O_SYNC)
775                 create_options |= CREATE_WRITE_THROUGH;
776
777         if (cfile->f_flags & O_DIRECT)
778                 create_options |= CREATE_NO_BUFFER;
779
780         if (server->ops->get_lease_key)
781                 server->ops->get_lease_key(inode, &cfile->fid);
782
783         oparms.tcon = tcon;
784         oparms.cifs_sb = cifs_sb;
785         oparms.desired_access = desired_access;
786         oparms.create_options = cifs_create_options(cifs_sb, create_options);
787         oparms.disposition = disposition;
788         oparms.path = full_path;
789         oparms.fid = &cfile->fid;
790         oparms.reconnect = true;
791
792         /*
793          * Can not refresh inode by passing in file_info buf to be returned by
794          * ops->open and then calling get_inode_info with returned buf since
795          * file might have write behind data that needs to be flushed and server
796          * version of file size can be stale. If we knew for sure that inode was
797          * not dirty locally we could do this.
798          */
799         rc = server->ops->open(xid, &oparms, &oplock, NULL);
800         if (rc == -ENOENT && oparms.reconnect == false) {
801                 /* durable handle timeout is expired - open the file again */
802                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
803                 /* indicate that we need to relock the file */
804                 oparms.reconnect = true;
805         }
806
807         if (rc) {
808                 mutex_unlock(&cfile->fh_mutex);
809                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
810                 cifs_dbg(FYI, "oplock: %d\n", oplock);
811                 goto reopen_error_exit;
812         }
813
814 reopen_success:
815         cfile->invalidHandle = false;
816         mutex_unlock(&cfile->fh_mutex);
817         cinode = CIFS_I(inode);
818
819         if (can_flush) {
820                 rc = filemap_write_and_wait(inode->i_mapping);
821                 if (!is_interrupt_error(rc))
822                         mapping_set_error(inode->i_mapping, rc);
823
824                 if (tcon->posix_extensions)
825                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
826                 else if (tcon->unix_ext)
827                         rc = cifs_get_inode_info_unix(&inode, full_path,
828                                                       inode->i_sb, xid);
829                 else
830                         rc = cifs_get_inode_info(&inode, full_path, NULL,
831                                                  inode->i_sb, xid, NULL);
832         }
833         /*
834          * Else we are writing out data to server already and could deadlock if
835          * we tried to flush data, and since we do not know if we have data that
836          * would invalidate the current end of file on the server we can not go
837          * to the server to get the new inode info.
838          */
839
840         /*
841          * If the server returned a read oplock and we have mandatory brlocks,
842          * set oplock level to None.
843          */
844         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
845                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
846                 oplock = 0;
847         }
848
849         server->ops->set_fid(cfile, &cfile->fid, oplock);
850         if (oparms.reconnect)
851                 cifs_relock_file(cfile);
852
853 reopen_error_exit:
854         free_dentry_path(page);
855         free_xid(xid);
856         return rc;
857 }
858
859 void smb2_deferred_work_close(struct work_struct *work)
860 {
861         struct cifsFileInfo *cfile = container_of(work,
862                         struct cifsFileInfo, deferred.work);
863
864         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
865         cifs_del_deferred_close(cfile);
866         cfile->deferred_close_scheduled = false;
867         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
868         _cifsFileInfo_put(cfile, true, false);
869 }
870
871 int cifs_close(struct inode *inode, struct file *file)
872 {
873         struct cifsFileInfo *cfile;
874         struct cifsInodeInfo *cinode = CIFS_I(inode);
875         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
876         struct cifs_deferred_close *dclose;
877
878         if (file->private_data != NULL) {
879                 cfile = file->private_data;
880                 file->private_data = NULL;
881                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
882                 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
883                     cinode->lease_granted &&
884                     dclose) {
885                         if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags))
886                                 inode->i_ctime = inode->i_mtime = current_time(inode);
887                         spin_lock(&cinode->deferred_lock);
888                         cifs_add_deferred_close(cfile, dclose);
889                         if (cfile->deferred_close_scheduled &&
890                             delayed_work_pending(&cfile->deferred)) {
891                                 /*
892                                  * If there is no pending work, mod_delayed_work queues new work.
893                                  * So, Increase the ref count to avoid use-after-free.
894                                  */
895                                 if (!mod_delayed_work(deferredclose_wq,
896                                                 &cfile->deferred, cifs_sb->ctx->acregmax))
897                                         cifsFileInfo_get(cfile);
898                         } else {
899                                 /* Deferred close for files */
900                                 queue_delayed_work(deferredclose_wq,
901                                                 &cfile->deferred, cifs_sb->ctx->acregmax);
902                                 cfile->deferred_close_scheduled = true;
903                                 spin_unlock(&cinode->deferred_lock);
904                                 return 0;
905                         }
906                         spin_unlock(&cinode->deferred_lock);
907                         _cifsFileInfo_put(cfile, true, false);
908                 } else {
909                         _cifsFileInfo_put(cfile, true, false);
910                         kfree(dclose);
911                 }
912         }
913
914         /* return code from the ->release op is always ignored */
915         return 0;
916 }
917
918 void
919 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
920 {
921         struct cifsFileInfo *open_file;
922         struct list_head *tmp;
923         struct list_head *tmp1;
924         struct list_head tmp_list;
925
926         if (!tcon->use_persistent || !tcon->need_reopen_files)
927                 return;
928
929         tcon->need_reopen_files = false;
930
931         cifs_dbg(FYI, "Reopen persistent handles\n");
932         INIT_LIST_HEAD(&tmp_list);
933
934         /* list all files open on tree connection, reopen resilient handles  */
935         spin_lock(&tcon->open_file_lock);
936         list_for_each(tmp, &tcon->openFileList) {
937                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
938                 if (!open_file->invalidHandle)
939                         continue;
940                 cifsFileInfo_get(open_file);
941                 list_add_tail(&open_file->rlist, &tmp_list);
942         }
943         spin_unlock(&tcon->open_file_lock);
944
945         list_for_each_safe(tmp, tmp1, &tmp_list) {
946                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
947                 if (cifs_reopen_file(open_file, false /* do not flush */))
948                         tcon->need_reopen_files = true;
949                 list_del_init(&open_file->rlist);
950                 cifsFileInfo_put(open_file);
951         }
952 }
953
954 int cifs_closedir(struct inode *inode, struct file *file)
955 {
956         int rc = 0;
957         unsigned int xid;
958         struct cifsFileInfo *cfile = file->private_data;
959         struct cifs_tcon *tcon;
960         struct TCP_Server_Info *server;
961         char *buf;
962
963         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
964
965         if (cfile == NULL)
966                 return rc;
967
968         xid = get_xid();
969         tcon = tlink_tcon(cfile->tlink);
970         server = tcon->ses->server;
971
972         cifs_dbg(FYI, "Freeing private data in close dir\n");
973         spin_lock(&cfile->file_info_lock);
974         if (server->ops->dir_needs_close(cfile)) {
975                 cfile->invalidHandle = true;
976                 spin_unlock(&cfile->file_info_lock);
977                 if (server->ops->close_dir)
978                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
979                 else
980                         rc = -ENOSYS;
981                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
982                 /* not much we can do if it fails anyway, ignore rc */
983                 rc = 0;
984         } else
985                 spin_unlock(&cfile->file_info_lock);
986
987         buf = cfile->srch_inf.ntwrk_buf_start;
988         if (buf) {
989                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
990                 cfile->srch_inf.ntwrk_buf_start = NULL;
991                 if (cfile->srch_inf.smallBuf)
992                         cifs_small_buf_release(buf);
993                 else
994                         cifs_buf_release(buf);
995         }
996
997         cifs_put_tlink(cfile->tlink);
998         kfree(file->private_data);
999         file->private_data = NULL;
1000         /* BB can we lock the filestruct while this is going on? */
1001         free_xid(xid);
1002         return rc;
1003 }
1004
1005 static struct cifsLockInfo *
1006 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1007 {
1008         struct cifsLockInfo *lock =
1009                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1010         if (!lock)
1011                 return lock;
1012         lock->offset = offset;
1013         lock->length = length;
1014         lock->type = type;
1015         lock->pid = current->tgid;
1016         lock->flags = flags;
1017         INIT_LIST_HEAD(&lock->blist);
1018         init_waitqueue_head(&lock->block_q);
1019         return lock;
1020 }
1021
1022 void
1023 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1024 {
1025         struct cifsLockInfo *li, *tmp;
1026         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1027                 list_del_init(&li->blist);
1028                 wake_up(&li->block_q);
1029         }
1030 }
1031
1032 #define CIFS_LOCK_OP    0
1033 #define CIFS_READ_OP    1
1034 #define CIFS_WRITE_OP   2
1035
1036 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1037 static bool
1038 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1039                             __u64 length, __u8 type, __u16 flags,
1040                             struct cifsFileInfo *cfile,
1041                             struct cifsLockInfo **conf_lock, int rw_check)
1042 {
1043         struct cifsLockInfo *li;
1044         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1045         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1046
1047         list_for_each_entry(li, &fdlocks->locks, llist) {
1048                 if (offset + length <= li->offset ||
1049                     offset >= li->offset + li->length)
1050                         continue;
1051                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1052                     server->ops->compare_fids(cfile, cur_cfile)) {
1053                         /* shared lock prevents write op through the same fid */
1054                         if (!(li->type & server->vals->shared_lock_type) ||
1055                             rw_check != CIFS_WRITE_OP)
1056                                 continue;
1057                 }
1058                 if ((type & server->vals->shared_lock_type) &&
1059                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1060                      current->tgid == li->pid) || type == li->type))
1061                         continue;
1062                 if (rw_check == CIFS_LOCK_OP &&
1063                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1064                     server->ops->compare_fids(cfile, cur_cfile))
1065                         continue;
1066                 if (conf_lock)
1067                         *conf_lock = li;
1068                 return true;
1069         }
1070         return false;
1071 }
1072
1073 bool
1074 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1075                         __u8 type, __u16 flags,
1076                         struct cifsLockInfo **conf_lock, int rw_check)
1077 {
1078         bool rc = false;
1079         struct cifs_fid_locks *cur;
1080         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1081
1082         list_for_each_entry(cur, &cinode->llist, llist) {
1083                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1084                                                  flags, cfile, conf_lock,
1085                                                  rw_check);
1086                 if (rc)
1087                         break;
1088         }
1089
1090         return rc;
1091 }
1092
1093 /*
1094  * Check if there is another lock that prevents us to set the lock (mandatory
1095  * style). If such a lock exists, update the flock structure with its
1096  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1097  * or leave it the same if we can't. Returns 0 if we don't need to request to
1098  * the server or 1 otherwise.
1099  */
1100 static int
1101 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1102                __u8 type, struct file_lock *flock)
1103 {
1104         int rc = 0;
1105         struct cifsLockInfo *conf_lock;
1106         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1107         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1108         bool exist;
1109
1110         down_read(&cinode->lock_sem);
1111
1112         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1113                                         flock->fl_flags, &conf_lock,
1114                                         CIFS_LOCK_OP);
1115         if (exist) {
1116                 flock->fl_start = conf_lock->offset;
1117                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1118                 flock->fl_pid = conf_lock->pid;
1119                 if (conf_lock->type & server->vals->shared_lock_type)
1120                         flock->fl_type = F_RDLCK;
1121                 else
1122                         flock->fl_type = F_WRLCK;
1123         } else if (!cinode->can_cache_brlcks)
1124                 rc = 1;
1125         else
1126                 flock->fl_type = F_UNLCK;
1127
1128         up_read(&cinode->lock_sem);
1129         return rc;
1130 }
1131
1132 static void
1133 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1134 {
1135         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1136         cifs_down_write(&cinode->lock_sem);
1137         list_add_tail(&lock->llist, &cfile->llist->locks);
1138         up_write(&cinode->lock_sem);
1139 }
1140
1141 /*
1142  * Set the byte-range lock (mandatory style). Returns:
1143  * 1) 0, if we set the lock and don't need to request to the server;
1144  * 2) 1, if no locks prevent us but we need to request to the server;
1145  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1146  */
1147 static int
1148 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1149                  bool wait)
1150 {
1151         struct cifsLockInfo *conf_lock;
1152         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1153         bool exist;
1154         int rc = 0;
1155
1156 try_again:
1157         exist = false;
1158         cifs_down_write(&cinode->lock_sem);
1159
1160         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1161                                         lock->type, lock->flags, &conf_lock,
1162                                         CIFS_LOCK_OP);
1163         if (!exist && cinode->can_cache_brlcks) {
1164                 list_add_tail(&lock->llist, &cfile->llist->locks);
1165                 up_write(&cinode->lock_sem);
1166                 return rc;
1167         }
1168
1169         if (!exist)
1170                 rc = 1;
1171         else if (!wait)
1172                 rc = -EACCES;
1173         else {
1174                 list_add_tail(&lock->blist, &conf_lock->blist);
1175                 up_write(&cinode->lock_sem);
1176                 rc = wait_event_interruptible(lock->block_q,
1177                                         (lock->blist.prev == &lock->blist) &&
1178                                         (lock->blist.next == &lock->blist));
1179                 if (!rc)
1180                         goto try_again;
1181                 cifs_down_write(&cinode->lock_sem);
1182                 list_del_init(&lock->blist);
1183         }
1184
1185         up_write(&cinode->lock_sem);
1186         return rc;
1187 }
1188
1189 /*
1190  * Check if there is another lock that prevents us to set the lock (posix
1191  * style). If such a lock exists, update the flock structure with its
1192  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1193  * or leave it the same if we can't. Returns 0 if we don't need to request to
1194  * the server or 1 otherwise.
1195  */
1196 static int
1197 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1198 {
1199         int rc = 0;
1200         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1201         unsigned char saved_type = flock->fl_type;
1202
1203         if ((flock->fl_flags & FL_POSIX) == 0)
1204                 return 1;
1205
1206         down_read(&cinode->lock_sem);
1207         posix_test_lock(file, flock);
1208
1209         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1210                 flock->fl_type = saved_type;
1211                 rc = 1;
1212         }
1213
1214         up_read(&cinode->lock_sem);
1215         return rc;
1216 }
1217
1218 /*
1219  * Set the byte-range lock (posix style). Returns:
1220  * 1) <0, if the error occurs while setting the lock;
1221  * 2) 0, if we set the lock and don't need to request to the server;
1222  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1223  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1224  */
1225 static int
1226 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1227 {
1228         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1229         int rc = FILE_LOCK_DEFERRED + 1;
1230
1231         if ((flock->fl_flags & FL_POSIX) == 0)
1232                 return rc;
1233
1234         cifs_down_write(&cinode->lock_sem);
1235         if (!cinode->can_cache_brlcks) {
1236                 up_write(&cinode->lock_sem);
1237                 return rc;
1238         }
1239
1240         rc = posix_lock_file(file, flock, NULL);
1241         up_write(&cinode->lock_sem);
1242         return rc;
1243 }
1244
1245 int
1246 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1247 {
1248         unsigned int xid;
1249         int rc = 0, stored_rc;
1250         struct cifsLockInfo *li, *tmp;
1251         struct cifs_tcon *tcon;
1252         unsigned int num, max_num, max_buf;
1253         LOCKING_ANDX_RANGE *buf, *cur;
1254         static const int types[] = {
1255                 LOCKING_ANDX_LARGE_FILES,
1256                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1257         };
1258         int i;
1259
1260         xid = get_xid();
1261         tcon = tlink_tcon(cfile->tlink);
1262
1263         /*
1264          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1265          * and check it before using.
1266          */
1267         max_buf = tcon->ses->server->maxBuf;
1268         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1269                 free_xid(xid);
1270                 return -EINVAL;
1271         }
1272
1273         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1274                      PAGE_SIZE);
1275         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1276                         PAGE_SIZE);
1277         max_num = (max_buf - sizeof(struct smb_hdr)) /
1278                                                 sizeof(LOCKING_ANDX_RANGE);
1279         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1280         if (!buf) {
1281                 free_xid(xid);
1282                 return -ENOMEM;
1283         }
1284
1285         for (i = 0; i < 2; i++) {
1286                 cur = buf;
1287                 num = 0;
1288                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1289                         if (li->type != types[i])
1290                                 continue;
1291                         cur->Pid = cpu_to_le16(li->pid);
1292                         cur->LengthLow = cpu_to_le32((u32)li->length);
1293                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1294                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1295                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1296                         if (++num == max_num) {
1297                                 stored_rc = cifs_lockv(xid, tcon,
1298                                                        cfile->fid.netfid,
1299                                                        (__u8)li->type, 0, num,
1300                                                        buf);
1301                                 if (stored_rc)
1302                                         rc = stored_rc;
1303                                 cur = buf;
1304                                 num = 0;
1305                         } else
1306                                 cur++;
1307                 }
1308
1309                 if (num) {
1310                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1311                                                (__u8)types[i], 0, num, buf);
1312                         if (stored_rc)
1313                                 rc = stored_rc;
1314                 }
1315         }
1316
1317         kfree(buf);
1318         free_xid(xid);
1319         return rc;
1320 }
1321
1322 static __u32
1323 hash_lockowner(fl_owner_t owner)
1324 {
1325         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1326 }
1327
1328 struct lock_to_push {
1329         struct list_head llist;
1330         __u64 offset;
1331         __u64 length;
1332         __u32 pid;
1333         __u16 netfid;
1334         __u8 type;
1335 };
1336
1337 static int
1338 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1339 {
1340         struct inode *inode = d_inode(cfile->dentry);
1341         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1342         struct file_lock *flock;
1343         struct file_lock_context *flctx = inode->i_flctx;
1344         unsigned int count = 0, i;
1345         int rc = 0, xid, type;
1346         struct list_head locks_to_send, *el;
1347         struct lock_to_push *lck, *tmp;
1348         __u64 length;
1349
1350         xid = get_xid();
1351
1352         if (!flctx)
1353                 goto out;
1354
1355         spin_lock(&flctx->flc_lock);
1356         list_for_each(el, &flctx->flc_posix) {
1357                 count++;
1358         }
1359         spin_unlock(&flctx->flc_lock);
1360
1361         INIT_LIST_HEAD(&locks_to_send);
1362
1363         /*
1364          * Allocating count locks is enough because no FL_POSIX locks can be
1365          * added to the list while we are holding cinode->lock_sem that
1366          * protects locking operations of this inode.
1367          */
1368         for (i = 0; i < count; i++) {
1369                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1370                 if (!lck) {
1371                         rc = -ENOMEM;
1372                         goto err_out;
1373                 }
1374                 list_add_tail(&lck->llist, &locks_to_send);
1375         }
1376
1377         el = locks_to_send.next;
1378         spin_lock(&flctx->flc_lock);
1379         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1380                 if (el == &locks_to_send) {
1381                         /*
1382                          * The list ended. We don't have enough allocated
1383                          * structures - something is really wrong.
1384                          */
1385                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1386                         break;
1387                 }
1388                 length = 1 + flock->fl_end - flock->fl_start;
1389                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1390                         type = CIFS_RDLCK;
1391                 else
1392                         type = CIFS_WRLCK;
1393                 lck = list_entry(el, struct lock_to_push, llist);
1394                 lck->pid = hash_lockowner(flock->fl_owner);
1395                 lck->netfid = cfile->fid.netfid;
1396                 lck->length = length;
1397                 lck->type = type;
1398                 lck->offset = flock->fl_start;
1399         }
1400         spin_unlock(&flctx->flc_lock);
1401
1402         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1403                 int stored_rc;
1404
1405                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1406                                              lck->offset, lck->length, NULL,
1407                                              lck->type, 0);
1408                 if (stored_rc)
1409                         rc = stored_rc;
1410                 list_del(&lck->llist);
1411                 kfree(lck);
1412         }
1413
1414 out:
1415         free_xid(xid);
1416         return rc;
1417 err_out:
1418         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1419                 list_del(&lck->llist);
1420                 kfree(lck);
1421         }
1422         goto out;
1423 }
1424
1425 static int
1426 cifs_push_locks(struct cifsFileInfo *cfile)
1427 {
1428         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1429         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1430         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1431         int rc = 0;
1432
1433         /* we are going to update can_cache_brlcks here - need a write access */
1434         cifs_down_write(&cinode->lock_sem);
1435         if (!cinode->can_cache_brlcks) {
1436                 up_write(&cinode->lock_sem);
1437                 return rc;
1438         }
1439
1440         if (cap_unix(tcon->ses) &&
1441             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1442             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1443                 rc = cifs_push_posix_locks(cfile);
1444         else
1445                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1446
1447         cinode->can_cache_brlcks = false;
1448         up_write(&cinode->lock_sem);
1449         return rc;
1450 }
1451
1452 static void
1453 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1454                 bool *wait_flag, struct TCP_Server_Info *server)
1455 {
1456         if (flock->fl_flags & FL_POSIX)
1457                 cifs_dbg(FYI, "Posix\n");
1458         if (flock->fl_flags & FL_FLOCK)
1459                 cifs_dbg(FYI, "Flock\n");
1460         if (flock->fl_flags & FL_SLEEP) {
1461                 cifs_dbg(FYI, "Blocking lock\n");
1462                 *wait_flag = true;
1463         }
1464         if (flock->fl_flags & FL_ACCESS)
1465                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1466         if (flock->fl_flags & FL_LEASE)
1467                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1468         if (flock->fl_flags &
1469             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1470                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1471                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1472
1473         *type = server->vals->large_lock_type;
1474         if (flock->fl_type == F_WRLCK) {
1475                 cifs_dbg(FYI, "F_WRLCK\n");
1476                 *type |= server->vals->exclusive_lock_type;
1477                 *lock = 1;
1478         } else if (flock->fl_type == F_UNLCK) {
1479                 cifs_dbg(FYI, "F_UNLCK\n");
1480                 *type |= server->vals->unlock_lock_type;
1481                 *unlock = 1;
1482                 /* Check if unlock includes more than one lock range */
1483         } else if (flock->fl_type == F_RDLCK) {
1484                 cifs_dbg(FYI, "F_RDLCK\n");
1485                 *type |= server->vals->shared_lock_type;
1486                 *lock = 1;
1487         } else if (flock->fl_type == F_EXLCK) {
1488                 cifs_dbg(FYI, "F_EXLCK\n");
1489                 *type |= server->vals->exclusive_lock_type;
1490                 *lock = 1;
1491         } else if (flock->fl_type == F_SHLCK) {
1492                 cifs_dbg(FYI, "F_SHLCK\n");
1493                 *type |= server->vals->shared_lock_type;
1494                 *lock = 1;
1495         } else
1496                 cifs_dbg(FYI, "Unknown type of lock\n");
1497 }
1498
1499 static int
1500 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1501            bool wait_flag, bool posix_lck, unsigned int xid)
1502 {
1503         int rc = 0;
1504         __u64 length = 1 + flock->fl_end - flock->fl_start;
1505         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1506         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1507         struct TCP_Server_Info *server = tcon->ses->server;
1508         __u16 netfid = cfile->fid.netfid;
1509
1510         if (posix_lck) {
1511                 int posix_lock_type;
1512
1513                 rc = cifs_posix_lock_test(file, flock);
1514                 if (!rc)
1515                         return rc;
1516
1517                 if (type & server->vals->shared_lock_type)
1518                         posix_lock_type = CIFS_RDLCK;
1519                 else
1520                         posix_lock_type = CIFS_WRLCK;
1521                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1522                                       hash_lockowner(flock->fl_owner),
1523                                       flock->fl_start, length, flock,
1524                                       posix_lock_type, wait_flag);
1525                 return rc;
1526         }
1527
1528         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1529         if (!rc)
1530                 return rc;
1531
1532         /* BB we could chain these into one lock request BB */
1533         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1534                                     1, 0, false);
1535         if (rc == 0) {
1536                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1537                                             type, 0, 1, false);
1538                 flock->fl_type = F_UNLCK;
1539                 if (rc != 0)
1540                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1541                                  rc);
1542                 return 0;
1543         }
1544
1545         if (type & server->vals->shared_lock_type) {
1546                 flock->fl_type = F_WRLCK;
1547                 return 0;
1548         }
1549
1550         type &= ~server->vals->exclusive_lock_type;
1551
1552         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1553                                     type | server->vals->shared_lock_type,
1554                                     1, 0, false);
1555         if (rc == 0) {
1556                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1557                         type | server->vals->shared_lock_type, 0, 1, false);
1558                 flock->fl_type = F_RDLCK;
1559                 if (rc != 0)
1560                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1561                                  rc);
1562         } else
1563                 flock->fl_type = F_WRLCK;
1564
1565         return 0;
1566 }
1567
1568 void
1569 cifs_move_llist(struct list_head *source, struct list_head *dest)
1570 {
1571         struct list_head *li, *tmp;
1572         list_for_each_safe(li, tmp, source)
1573                 list_move(li, dest);
1574 }
1575
1576 void
1577 cifs_free_llist(struct list_head *llist)
1578 {
1579         struct cifsLockInfo *li, *tmp;
1580         list_for_each_entry_safe(li, tmp, llist, llist) {
1581                 cifs_del_lock_waiters(li);
1582                 list_del(&li->llist);
1583                 kfree(li);
1584         }
1585 }
1586
1587 int
1588 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1589                   unsigned int xid)
1590 {
1591         int rc = 0, stored_rc;
1592         static const int types[] = {
1593                 LOCKING_ANDX_LARGE_FILES,
1594                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1595         };
1596         unsigned int i;
1597         unsigned int max_num, num, max_buf;
1598         LOCKING_ANDX_RANGE *buf, *cur;
1599         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1600         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1601         struct cifsLockInfo *li, *tmp;
1602         __u64 length = 1 + flock->fl_end - flock->fl_start;
1603         struct list_head tmp_llist;
1604
1605         INIT_LIST_HEAD(&tmp_llist);
1606
1607         /*
1608          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1609          * and check it before using.
1610          */
1611         max_buf = tcon->ses->server->maxBuf;
1612         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1613                 return -EINVAL;
1614
1615         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1616                      PAGE_SIZE);
1617         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1618                         PAGE_SIZE);
1619         max_num = (max_buf - sizeof(struct smb_hdr)) /
1620                                                 sizeof(LOCKING_ANDX_RANGE);
1621         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1622         if (!buf)
1623                 return -ENOMEM;
1624
1625         cifs_down_write(&cinode->lock_sem);
1626         for (i = 0; i < 2; i++) {
1627                 cur = buf;
1628                 num = 0;
1629                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1630                         if (flock->fl_start > li->offset ||
1631                             (flock->fl_start + length) <
1632                             (li->offset + li->length))
1633                                 continue;
1634                         if (current->tgid != li->pid)
1635                                 continue;
1636                         if (types[i] != li->type)
1637                                 continue;
1638                         if (cinode->can_cache_brlcks) {
1639                                 /*
1640                                  * We can cache brlock requests - simply remove
1641                                  * a lock from the file's list.
1642                                  */
1643                                 list_del(&li->llist);
1644                                 cifs_del_lock_waiters(li);
1645                                 kfree(li);
1646                                 continue;
1647                         }
1648                         cur->Pid = cpu_to_le16(li->pid);
1649                         cur->LengthLow = cpu_to_le32((u32)li->length);
1650                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1651                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1652                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1653                         /*
1654                          * We need to save a lock here to let us add it again to
1655                          * the file's list if the unlock range request fails on
1656                          * the server.
1657                          */
1658                         list_move(&li->llist, &tmp_llist);
1659                         if (++num == max_num) {
1660                                 stored_rc = cifs_lockv(xid, tcon,
1661                                                        cfile->fid.netfid,
1662                                                        li->type, num, 0, buf);
1663                                 if (stored_rc) {
1664                                         /*
1665                                          * We failed on the unlock range
1666                                          * request - add all locks from the tmp
1667                                          * list to the head of the file's list.
1668                                          */
1669                                         cifs_move_llist(&tmp_llist,
1670                                                         &cfile->llist->locks);
1671                                         rc = stored_rc;
1672                                 } else
1673                                         /*
1674                                          * The unlock range request succeed -
1675                                          * free the tmp list.
1676                                          */
1677                                         cifs_free_llist(&tmp_llist);
1678                                 cur = buf;
1679                                 num = 0;
1680                         } else
1681                                 cur++;
1682                 }
1683                 if (num) {
1684                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1685                                                types[i], num, 0, buf);
1686                         if (stored_rc) {
1687                                 cifs_move_llist(&tmp_llist,
1688                                                 &cfile->llist->locks);
1689                                 rc = stored_rc;
1690                         } else
1691                                 cifs_free_llist(&tmp_llist);
1692                 }
1693         }
1694
1695         up_write(&cinode->lock_sem);
1696         kfree(buf);
1697         return rc;
1698 }
1699
1700 static int
1701 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1702            bool wait_flag, bool posix_lck, int lock, int unlock,
1703            unsigned int xid)
1704 {
1705         int rc = 0;
1706         __u64 length = 1 + flock->fl_end - flock->fl_start;
1707         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1708         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1709         struct TCP_Server_Info *server = tcon->ses->server;
1710         struct inode *inode = d_inode(cfile->dentry);
1711
1712         if (posix_lck) {
1713                 int posix_lock_type;
1714
1715                 rc = cifs_posix_lock_set(file, flock);
1716                 if (rc <= FILE_LOCK_DEFERRED)
1717                         return rc;
1718
1719                 if (type & server->vals->shared_lock_type)
1720                         posix_lock_type = CIFS_RDLCK;
1721                 else
1722                         posix_lock_type = CIFS_WRLCK;
1723
1724                 if (unlock == 1)
1725                         posix_lock_type = CIFS_UNLCK;
1726
1727                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1728                                       hash_lockowner(flock->fl_owner),
1729                                       flock->fl_start, length,
1730                                       NULL, posix_lock_type, wait_flag);
1731                 goto out;
1732         }
1733
1734         if (lock) {
1735                 struct cifsLockInfo *lock;
1736
1737                 lock = cifs_lock_init(flock->fl_start, length, type,
1738                                       flock->fl_flags);
1739                 if (!lock)
1740                         return -ENOMEM;
1741
1742                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1743                 if (rc < 0) {
1744                         kfree(lock);
1745                         return rc;
1746                 }
1747                 if (!rc)
1748                         goto out;
1749
1750                 /*
1751                  * Windows 7 server can delay breaking lease from read to None
1752                  * if we set a byte-range lock on a file - break it explicitly
1753                  * before sending the lock to the server to be sure the next
1754                  * read won't conflict with non-overlapted locks due to
1755                  * pagereading.
1756                  */
1757                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1758                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1759                         cifs_zap_mapping(inode);
1760                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1761                                  inode);
1762                         CIFS_I(inode)->oplock = 0;
1763                 }
1764
1765                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1766                                             type, 1, 0, wait_flag);
1767                 if (rc) {
1768                         kfree(lock);
1769                         return rc;
1770                 }
1771
1772                 cifs_lock_add(cfile, lock);
1773         } else if (unlock)
1774                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1775
1776 out:
1777         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1778                 /*
1779                  * If this is a request to remove all locks because we
1780                  * are closing the file, it doesn't matter if the
1781                  * unlocking failed as both cifs.ko and the SMB server
1782                  * remove the lock on file close
1783                  */
1784                 if (rc) {
1785                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1786                         if (!(flock->fl_flags & FL_CLOSE))
1787                                 return rc;
1788                 }
1789                 rc = locks_lock_file_wait(file, flock);
1790         }
1791         return rc;
1792 }
1793
1794 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1795 {
1796         int rc, xid;
1797         int lock = 0, unlock = 0;
1798         bool wait_flag = false;
1799         bool posix_lck = false;
1800         struct cifs_sb_info *cifs_sb;
1801         struct cifs_tcon *tcon;
1802         struct cifsFileInfo *cfile;
1803         __u32 type;
1804
1805         rc = -EACCES;
1806         xid = get_xid();
1807
1808         if (!(fl->fl_flags & FL_FLOCK))
1809                 return -ENOLCK;
1810
1811         cfile = (struct cifsFileInfo *)file->private_data;
1812         tcon = tlink_tcon(cfile->tlink);
1813
1814         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1815                         tcon->ses->server);
1816         cifs_sb = CIFS_FILE_SB(file);
1817
1818         if (cap_unix(tcon->ses) &&
1819             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1820             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1821                 posix_lck = true;
1822
1823         if (!lock && !unlock) {
1824                 /*
1825                  * if no lock or unlock then nothing to do since we do not
1826                  * know what it is
1827                  */
1828                 free_xid(xid);
1829                 return -EOPNOTSUPP;
1830         }
1831
1832         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1833                         xid);
1834         free_xid(xid);
1835         return rc;
1836
1837
1838 }
1839
1840 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1841 {
1842         int rc, xid;
1843         int lock = 0, unlock = 0;
1844         bool wait_flag = false;
1845         bool posix_lck = false;
1846         struct cifs_sb_info *cifs_sb;
1847         struct cifs_tcon *tcon;
1848         struct cifsFileInfo *cfile;
1849         __u32 type;
1850
1851         rc = -EACCES;
1852         xid = get_xid();
1853
1854         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1855                  cmd, flock->fl_flags, flock->fl_type,
1856                  flock->fl_start, flock->fl_end);
1857
1858         cfile = (struct cifsFileInfo *)file->private_data;
1859         tcon = tlink_tcon(cfile->tlink);
1860
1861         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1862                         tcon->ses->server);
1863         cifs_sb = CIFS_FILE_SB(file);
1864
1865         if (cap_unix(tcon->ses) &&
1866             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1867             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1868                 posix_lck = true;
1869         /*
1870          * BB add code here to normalize offset and length to account for
1871          * negative length which we can not accept over the wire.
1872          */
1873         if (IS_GETLK(cmd)) {
1874                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1875                 free_xid(xid);
1876                 return rc;
1877         }
1878
1879         if (!lock && !unlock) {
1880                 /*
1881                  * if no lock or unlock then nothing to do since we do not
1882                  * know what it is
1883                  */
1884                 free_xid(xid);
1885                 return -EOPNOTSUPP;
1886         }
1887
1888         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1889                         xid);
1890         free_xid(xid);
1891         return rc;
1892 }
1893
1894 /*
1895  * update the file size (if needed) after a write. Should be called with
1896  * the inode->i_lock held
1897  */
1898 void
1899 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1900                       unsigned int bytes_written)
1901 {
1902         loff_t end_of_write = offset + bytes_written;
1903
1904         if (end_of_write > cifsi->server_eof)
1905                 cifsi->server_eof = end_of_write;
1906 }
1907
1908 static ssize_t
1909 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1910            size_t write_size, loff_t *offset)
1911 {
1912         int rc = 0;
1913         unsigned int bytes_written = 0;
1914         unsigned int total_written;
1915         struct cifs_tcon *tcon;
1916         struct TCP_Server_Info *server;
1917         unsigned int xid;
1918         struct dentry *dentry = open_file->dentry;
1919         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1920         struct cifs_io_parms io_parms = {0};
1921
1922         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1923                  write_size, *offset, dentry);
1924
1925         tcon = tlink_tcon(open_file->tlink);
1926         server = tcon->ses->server;
1927
1928         if (!server->ops->sync_write)
1929                 return -ENOSYS;
1930
1931         xid = get_xid();
1932
1933         for (total_written = 0; write_size > total_written;
1934              total_written += bytes_written) {
1935                 rc = -EAGAIN;
1936                 while (rc == -EAGAIN) {
1937                         struct kvec iov[2];
1938                         unsigned int len;
1939
1940                         if (open_file->invalidHandle) {
1941                                 /* we could deadlock if we called
1942                                    filemap_fdatawait from here so tell
1943                                    reopen_file not to flush data to
1944                                    server now */
1945                                 rc = cifs_reopen_file(open_file, false);
1946                                 if (rc != 0)
1947                                         break;
1948                         }
1949
1950                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1951                                   (unsigned int)write_size - total_written);
1952                         /* iov[0] is reserved for smb header */
1953                         iov[1].iov_base = (char *)write_data + total_written;
1954                         iov[1].iov_len = len;
1955                         io_parms.pid = pid;
1956                         io_parms.tcon = tcon;
1957                         io_parms.offset = *offset;
1958                         io_parms.length = len;
1959                         rc = server->ops->sync_write(xid, &open_file->fid,
1960                                         &io_parms, &bytes_written, iov, 1);
1961                 }
1962                 if (rc || (bytes_written == 0)) {
1963                         if (total_written)
1964                                 break;
1965                         else {
1966                                 free_xid(xid);
1967                                 return rc;
1968                         }
1969                 } else {
1970                         spin_lock(&d_inode(dentry)->i_lock);
1971                         cifs_update_eof(cifsi, *offset, bytes_written);
1972                         spin_unlock(&d_inode(dentry)->i_lock);
1973                         *offset += bytes_written;
1974                 }
1975         }
1976
1977         cifs_stats_bytes_written(tcon, total_written);
1978
1979         if (total_written > 0) {
1980                 spin_lock(&d_inode(dentry)->i_lock);
1981                 if (*offset > d_inode(dentry)->i_size) {
1982                         i_size_write(d_inode(dentry), *offset);
1983                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
1984                 }
1985                 spin_unlock(&d_inode(dentry)->i_lock);
1986         }
1987         mark_inode_dirty_sync(d_inode(dentry));
1988         free_xid(xid);
1989         return total_written;
1990 }
1991
1992 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1993                                         bool fsuid_only)
1994 {
1995         struct cifsFileInfo *open_file = NULL;
1996         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1997
1998         /* only filter by fsuid on multiuser mounts */
1999         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2000                 fsuid_only = false;
2001
2002         spin_lock(&cifs_inode->open_file_lock);
2003         /* we could simply get the first_list_entry since write-only entries
2004            are always at the end of the list but since the first entry might
2005            have a close pending, we go through the whole list */
2006         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2007                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2008                         continue;
2009                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2010                         if ((!open_file->invalidHandle)) {
2011                                 /* found a good file */
2012                                 /* lock it so it will not be closed on us */
2013                                 cifsFileInfo_get(open_file);
2014                                 spin_unlock(&cifs_inode->open_file_lock);
2015                                 return open_file;
2016                         } /* else might as well continue, and look for
2017                              another, or simply have the caller reopen it
2018                              again rather than trying to fix this handle */
2019                 } else /* write only file */
2020                         break; /* write only files are last so must be done */
2021         }
2022         spin_unlock(&cifs_inode->open_file_lock);
2023         return NULL;
2024 }
2025
2026 /* Return -EBADF if no handle is found and general rc otherwise */
2027 int
2028 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2029                        struct cifsFileInfo **ret_file)
2030 {
2031         struct cifsFileInfo *open_file, *inv_file = NULL;
2032         struct cifs_sb_info *cifs_sb;
2033         bool any_available = false;
2034         int rc = -EBADF;
2035         unsigned int refind = 0;
2036         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2037         bool with_delete = flags & FIND_WR_WITH_DELETE;
2038         *ret_file = NULL;
2039
2040         /*
2041          * Having a null inode here (because mapping->host was set to zero by
2042          * the VFS or MM) should not happen but we had reports of on oops (due
2043          * to it being zero) during stress testcases so we need to check for it
2044          */
2045
2046         if (cifs_inode == NULL) {
2047                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2048                 dump_stack();
2049                 return rc;
2050         }
2051
2052         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2053
2054         /* only filter by fsuid on multiuser mounts */
2055         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2056                 fsuid_only = false;
2057
2058         spin_lock(&cifs_inode->open_file_lock);
2059 refind_writable:
2060         if (refind > MAX_REOPEN_ATT) {
2061                 spin_unlock(&cifs_inode->open_file_lock);
2062                 return rc;
2063         }
2064         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2065                 if (!any_available && open_file->pid != current->tgid)
2066                         continue;
2067                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2068                         continue;
2069                 if (with_delete && !(open_file->fid.access & DELETE))
2070                         continue;
2071                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2072                         if (!open_file->invalidHandle) {
2073                                 /* found a good writable file */
2074                                 cifsFileInfo_get(open_file);
2075                                 spin_unlock(&cifs_inode->open_file_lock);
2076                                 *ret_file = open_file;
2077                                 return 0;
2078                         } else {
2079                                 if (!inv_file)
2080                                         inv_file = open_file;
2081                         }
2082                 }
2083         }
2084         /* couldn't find useable FH with same pid, try any available */
2085         if (!any_available) {
2086                 any_available = true;
2087                 goto refind_writable;
2088         }
2089
2090         if (inv_file) {
2091                 any_available = false;
2092                 cifsFileInfo_get(inv_file);
2093         }
2094
2095         spin_unlock(&cifs_inode->open_file_lock);
2096
2097         if (inv_file) {
2098                 rc = cifs_reopen_file(inv_file, false);
2099                 if (!rc) {
2100                         *ret_file = inv_file;
2101                         return 0;
2102                 }
2103
2104                 spin_lock(&cifs_inode->open_file_lock);
2105                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2106                 spin_unlock(&cifs_inode->open_file_lock);
2107                 cifsFileInfo_put(inv_file);
2108                 ++refind;
2109                 inv_file = NULL;
2110                 spin_lock(&cifs_inode->open_file_lock);
2111                 goto refind_writable;
2112         }
2113
2114         return rc;
2115 }
2116
2117 struct cifsFileInfo *
2118 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2119 {
2120         struct cifsFileInfo *cfile;
2121         int rc;
2122
2123         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2124         if (rc)
2125                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2126
2127         return cfile;
2128 }
2129
2130 int
2131 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2132                        int flags,
2133                        struct cifsFileInfo **ret_file)
2134 {
2135         struct cifsFileInfo *cfile;
2136         void *page = alloc_dentry_path();
2137
2138         *ret_file = NULL;
2139
2140         spin_lock(&tcon->open_file_lock);
2141         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2142                 struct cifsInodeInfo *cinode;
2143                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2144                 if (IS_ERR(full_path)) {
2145                         spin_unlock(&tcon->open_file_lock);
2146                         free_dentry_path(page);
2147                         return PTR_ERR(full_path);
2148                 }
2149                 if (strcmp(full_path, name))
2150                         continue;
2151
2152                 cinode = CIFS_I(d_inode(cfile->dentry));
2153                 spin_unlock(&tcon->open_file_lock);
2154                 free_dentry_path(page);
2155                 return cifs_get_writable_file(cinode, flags, ret_file);
2156         }
2157
2158         spin_unlock(&tcon->open_file_lock);
2159         free_dentry_path(page);
2160         return -ENOENT;
2161 }
2162
2163 int
2164 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2165                        struct cifsFileInfo **ret_file)
2166 {
2167         struct cifsFileInfo *cfile;
2168         void *page = alloc_dentry_path();
2169
2170         *ret_file = NULL;
2171
2172         spin_lock(&tcon->open_file_lock);
2173         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2174                 struct cifsInodeInfo *cinode;
2175                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2176                 if (IS_ERR(full_path)) {
2177                         spin_unlock(&tcon->open_file_lock);
2178                         free_dentry_path(page);
2179                         return PTR_ERR(full_path);
2180                 }
2181                 if (strcmp(full_path, name))
2182                         continue;
2183
2184                 cinode = CIFS_I(d_inode(cfile->dentry));
2185                 spin_unlock(&tcon->open_file_lock);
2186                 free_dentry_path(page);
2187                 *ret_file = find_readable_file(cinode, 0);
2188                 return *ret_file ? 0 : -ENOENT;
2189         }
2190
2191         spin_unlock(&tcon->open_file_lock);
2192         free_dentry_path(page);
2193         return -ENOENT;
2194 }
2195
2196 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2197 {
2198         struct address_space *mapping = page->mapping;
2199         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2200         char *write_data;
2201         int rc = -EFAULT;
2202         int bytes_written = 0;
2203         struct inode *inode;
2204         struct cifsFileInfo *open_file;
2205
2206         if (!mapping || !mapping->host)
2207                 return -EFAULT;
2208
2209         inode = page->mapping->host;
2210
2211         offset += (loff_t)from;
2212         write_data = kmap(page);
2213         write_data += from;
2214
2215         if ((to > PAGE_SIZE) || (from > to)) {
2216                 kunmap(page);
2217                 return -EIO;
2218         }
2219
2220         /* racing with truncate? */
2221         if (offset > mapping->host->i_size) {
2222                 kunmap(page);
2223                 return 0; /* don't care */
2224         }
2225
2226         /* check to make sure that we are not extending the file */
2227         if (mapping->host->i_size - offset < (loff_t)to)
2228                 to = (unsigned)(mapping->host->i_size - offset);
2229
2230         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2231                                     &open_file);
2232         if (!rc) {
2233                 bytes_written = cifs_write(open_file, open_file->pid,
2234                                            write_data, to - from, &offset);
2235                 cifsFileInfo_put(open_file);
2236                 /* Does mm or vfs already set times? */
2237                 inode->i_atime = inode->i_mtime = current_time(inode);
2238                 if ((bytes_written > 0) && (offset))
2239                         rc = 0;
2240                 else if (bytes_written < 0)
2241                         rc = bytes_written;
2242                 else
2243                         rc = -EFAULT;
2244         } else {
2245                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2246                 if (!is_retryable_error(rc))
2247                         rc = -EIO;
2248         }
2249
2250         kunmap(page);
2251         return rc;
2252 }
2253
2254 static struct cifs_writedata *
2255 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2256                           pgoff_t end, pgoff_t *index,
2257                           unsigned int *found_pages)
2258 {
2259         struct cifs_writedata *wdata;
2260
2261         wdata = cifs_writedata_alloc((unsigned int)tofind,
2262                                      cifs_writev_complete);
2263         if (!wdata)
2264                 return NULL;
2265
2266         *found_pages = find_get_pages_range_tag(mapping, index, end,
2267                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2268         return wdata;
2269 }
2270
2271 static unsigned int
2272 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2273                     struct address_space *mapping,
2274                     struct writeback_control *wbc,
2275                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2276 {
2277         unsigned int nr_pages = 0, i;
2278         struct page *page;
2279
2280         for (i = 0; i < found_pages; i++) {
2281                 page = wdata->pages[i];
2282                 /*
2283                  * At this point we hold neither the i_pages lock nor the
2284                  * page lock: the page may be truncated or invalidated
2285                  * (changing page->mapping to NULL), or even swizzled
2286                  * back from swapper_space to tmpfs file mapping
2287                  */
2288
2289                 if (nr_pages == 0)
2290                         lock_page(page);
2291                 else if (!trylock_page(page))
2292                         break;
2293
2294                 if (unlikely(page->mapping != mapping)) {
2295                         unlock_page(page);
2296                         break;
2297                 }
2298
2299                 if (!wbc->range_cyclic && page->index > end) {
2300                         *done = true;
2301                         unlock_page(page);
2302                         break;
2303                 }
2304
2305                 if (*next && (page->index != *next)) {
2306                         /* Not next consecutive page */
2307                         unlock_page(page);
2308                         break;
2309                 }
2310
2311                 if (wbc->sync_mode != WB_SYNC_NONE)
2312                         wait_on_page_writeback(page);
2313
2314                 if (PageWriteback(page) ||
2315                                 !clear_page_dirty_for_io(page)) {
2316                         unlock_page(page);
2317                         break;
2318                 }
2319
2320                 /*
2321                  * This actually clears the dirty bit in the radix tree.
2322                  * See cifs_writepage() for more commentary.
2323                  */
2324                 set_page_writeback(page);
2325                 if (page_offset(page) >= i_size_read(mapping->host)) {
2326                         *done = true;
2327                         unlock_page(page);
2328                         end_page_writeback(page);
2329                         break;
2330                 }
2331
2332                 wdata->pages[i] = page;
2333                 *next = page->index + 1;
2334                 ++nr_pages;
2335         }
2336
2337         /* reset index to refind any pages skipped */
2338         if (nr_pages == 0)
2339                 *index = wdata->pages[0]->index + 1;
2340
2341         /* put any pages we aren't going to use */
2342         for (i = nr_pages; i < found_pages; i++) {
2343                 put_page(wdata->pages[i]);
2344                 wdata->pages[i] = NULL;
2345         }
2346
2347         return nr_pages;
2348 }
2349
2350 static int
2351 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2352                  struct address_space *mapping, struct writeback_control *wbc)
2353 {
2354         int rc;
2355
2356         wdata->sync_mode = wbc->sync_mode;
2357         wdata->nr_pages = nr_pages;
2358         wdata->offset = page_offset(wdata->pages[0]);
2359         wdata->pagesz = PAGE_SIZE;
2360         wdata->tailsz = min(i_size_read(mapping->host) -
2361                         page_offset(wdata->pages[nr_pages - 1]),
2362                         (loff_t)PAGE_SIZE);
2363         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2364         wdata->pid = wdata->cfile->pid;
2365
2366         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2367         if (rc)
2368                 return rc;
2369
2370         if (wdata->cfile->invalidHandle)
2371                 rc = -EAGAIN;
2372         else
2373                 rc = wdata->server->ops->async_writev(wdata,
2374                                                       cifs_writedata_release);
2375
2376         return rc;
2377 }
2378
2379 static int cifs_writepages(struct address_space *mapping,
2380                            struct writeback_control *wbc)
2381 {
2382         struct inode *inode = mapping->host;
2383         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2384         struct TCP_Server_Info *server;
2385         bool done = false, scanned = false, range_whole = false;
2386         pgoff_t end, index;
2387         struct cifs_writedata *wdata;
2388         struct cifsFileInfo *cfile = NULL;
2389         int rc = 0;
2390         int saved_rc = 0;
2391         unsigned int xid;
2392
2393         /*
2394          * If wsize is smaller than the page cache size, default to writing
2395          * one page at a time via cifs_writepage
2396          */
2397         if (cifs_sb->ctx->wsize < PAGE_SIZE)
2398                 return generic_writepages(mapping, wbc);
2399
2400         xid = get_xid();
2401         if (wbc->range_cyclic) {
2402                 index = mapping->writeback_index; /* Start from prev offset */
2403                 end = -1;
2404         } else {
2405                 index = wbc->range_start >> PAGE_SHIFT;
2406                 end = wbc->range_end >> PAGE_SHIFT;
2407                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2408                         range_whole = true;
2409                 scanned = true;
2410         }
2411         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2412
2413 retry:
2414         while (!done && index <= end) {
2415                 unsigned int i, nr_pages, found_pages, wsize;
2416                 pgoff_t next = 0, tofind, saved_index = index;
2417                 struct cifs_credits credits_on_stack;
2418                 struct cifs_credits *credits = &credits_on_stack;
2419                 int get_file_rc = 0;
2420
2421                 if (cfile)
2422                         cifsFileInfo_put(cfile);
2423
2424                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2425
2426                 /* in case of an error store it to return later */
2427                 if (rc)
2428                         get_file_rc = rc;
2429
2430                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2431                                                    &wsize, credits);
2432                 if (rc != 0) {
2433                         done = true;
2434                         break;
2435                 }
2436
2437                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2438
2439                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2440                                                   &found_pages);
2441                 if (!wdata) {
2442                         rc = -ENOMEM;
2443                         done = true;
2444                         add_credits_and_wake_if(server, credits, 0);
2445                         break;
2446                 }
2447
2448                 if (found_pages == 0) {
2449                         kref_put(&wdata->refcount, cifs_writedata_release);
2450                         add_credits_and_wake_if(server, credits, 0);
2451                         break;
2452                 }
2453
2454                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2455                                                end, &index, &next, &done);
2456
2457                 /* nothing to write? */
2458                 if (nr_pages == 0) {
2459                         kref_put(&wdata->refcount, cifs_writedata_release);
2460                         add_credits_and_wake_if(server, credits, 0);
2461                         continue;
2462                 }
2463
2464                 wdata->credits = credits_on_stack;
2465                 wdata->cfile = cfile;
2466                 wdata->server = server;
2467                 cfile = NULL;
2468
2469                 if (!wdata->cfile) {
2470                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2471                                  get_file_rc);
2472                         if (is_retryable_error(get_file_rc))
2473                                 rc = get_file_rc;
2474                         else
2475                                 rc = -EBADF;
2476                 } else
2477                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2478
2479                 for (i = 0; i < nr_pages; ++i)
2480                         unlock_page(wdata->pages[i]);
2481
2482                 /* send failure -- clean up the mess */
2483                 if (rc != 0) {
2484                         add_credits_and_wake_if(server, &wdata->credits, 0);
2485                         for (i = 0; i < nr_pages; ++i) {
2486                                 if (is_retryable_error(rc))
2487                                         redirty_page_for_writepage(wbc,
2488                                                            wdata->pages[i]);
2489                                 else
2490                                         SetPageError(wdata->pages[i]);
2491                                 end_page_writeback(wdata->pages[i]);
2492                                 put_page(wdata->pages[i]);
2493                         }
2494                         if (!is_retryable_error(rc))
2495                                 mapping_set_error(mapping, rc);
2496                 }
2497                 kref_put(&wdata->refcount, cifs_writedata_release);
2498
2499                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2500                         index = saved_index;
2501                         continue;
2502                 }
2503
2504                 /* Return immediately if we received a signal during writing */
2505                 if (is_interrupt_error(rc)) {
2506                         done = true;
2507                         break;
2508                 }
2509
2510                 if (rc != 0 && saved_rc == 0)
2511                         saved_rc = rc;
2512
2513                 wbc->nr_to_write -= nr_pages;
2514                 if (wbc->nr_to_write <= 0)
2515                         done = true;
2516
2517                 index = next;
2518         }
2519
2520         if (!scanned && !done) {
2521                 /*
2522                  * We hit the last page and there is more work to be done: wrap
2523                  * back to the start of the file
2524                  */
2525                 scanned = true;
2526                 index = 0;
2527                 goto retry;
2528         }
2529
2530         if (saved_rc != 0)
2531                 rc = saved_rc;
2532
2533         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2534                 mapping->writeback_index = index;
2535
2536         if (cfile)
2537                 cifsFileInfo_put(cfile);
2538         free_xid(xid);
2539         /* Indication to update ctime and mtime as close is deferred */
2540         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2541         return rc;
2542 }
2543
2544 static int
2545 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2546 {
2547         int rc;
2548         unsigned int xid;
2549
2550         xid = get_xid();
2551 /* BB add check for wbc flags */
2552         get_page(page);
2553         if (!PageUptodate(page))
2554                 cifs_dbg(FYI, "ppw - page not up to date\n");
2555
2556         /*
2557          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2558          *
2559          * A writepage() implementation always needs to do either this,
2560          * or re-dirty the page with "redirty_page_for_writepage()" in
2561          * the case of a failure.
2562          *
2563          * Just unlocking the page will cause the radix tree tag-bits
2564          * to fail to update with the state of the page correctly.
2565          */
2566         set_page_writeback(page);
2567 retry_write:
2568         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2569         if (is_retryable_error(rc)) {
2570                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2571                         goto retry_write;
2572                 redirty_page_for_writepage(wbc, page);
2573         } else if (rc != 0) {
2574                 SetPageError(page);
2575                 mapping_set_error(page->mapping, rc);
2576         } else {
2577                 SetPageUptodate(page);
2578         }
2579         end_page_writeback(page);
2580         put_page(page);
2581         free_xid(xid);
2582         return rc;
2583 }
2584
2585 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2586 {
2587         int rc = cifs_writepage_locked(page, wbc);
2588         unlock_page(page);
2589         return rc;
2590 }
2591
2592 static int cifs_write_end(struct file *file, struct address_space *mapping,
2593                         loff_t pos, unsigned len, unsigned copied,
2594                         struct page *page, void *fsdata)
2595 {
2596         int rc;
2597         struct inode *inode = mapping->host;
2598         struct cifsFileInfo *cfile = file->private_data;
2599         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2600         __u32 pid;
2601
2602         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2603                 pid = cfile->pid;
2604         else
2605                 pid = current->tgid;
2606
2607         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2608                  page, pos, copied);
2609
2610         if (PageChecked(page)) {
2611                 if (copied == len)
2612                         SetPageUptodate(page);
2613                 ClearPageChecked(page);
2614         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2615                 SetPageUptodate(page);
2616
2617         if (!PageUptodate(page)) {
2618                 char *page_data;
2619                 unsigned offset = pos & (PAGE_SIZE - 1);
2620                 unsigned int xid;
2621
2622                 xid = get_xid();
2623                 /* this is probably better than directly calling
2624                    partialpage_write since in this function the file handle is
2625                    known which we might as well leverage */
2626                 /* BB check if anything else missing out of ppw
2627                    such as updating last write time */
2628                 page_data = kmap(page);
2629                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2630                 /* if (rc < 0) should we set writebehind rc? */
2631                 kunmap(page);
2632
2633                 free_xid(xid);
2634         } else {
2635                 rc = copied;
2636                 pos += copied;
2637                 set_page_dirty(page);
2638         }
2639
2640         if (rc > 0) {
2641                 spin_lock(&inode->i_lock);
2642                 if (pos > inode->i_size) {
2643                         i_size_write(inode, pos);
2644                         inode->i_blocks = (512 - 1 + pos) >> 9;
2645                 }
2646                 spin_unlock(&inode->i_lock);
2647         }
2648
2649         unlock_page(page);
2650         put_page(page);
2651         /* Indication to update ctime and mtime as close is deferred */
2652         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2653
2654         return rc;
2655 }
2656
2657 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2658                       int datasync)
2659 {
2660         unsigned int xid;
2661         int rc = 0;
2662         struct cifs_tcon *tcon;
2663         struct TCP_Server_Info *server;
2664         struct cifsFileInfo *smbfile = file->private_data;
2665         struct inode *inode = file_inode(file);
2666         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2667
2668         rc = file_write_and_wait_range(file, start, end);
2669         if (rc) {
2670                 trace_cifs_fsync_err(inode->i_ino, rc);
2671                 return rc;
2672         }
2673
2674         xid = get_xid();
2675
2676         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2677                  file, datasync);
2678
2679         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2680                 rc = cifs_zap_mapping(inode);
2681                 if (rc) {
2682                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2683                         rc = 0; /* don't care about it in fsync */
2684                 }
2685         }
2686
2687         tcon = tlink_tcon(smbfile->tlink);
2688         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2689                 server = tcon->ses->server;
2690                 if (server->ops->flush)
2691                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2692                 else
2693                         rc = -ENOSYS;
2694         }
2695
2696         free_xid(xid);
2697         return rc;
2698 }
2699
2700 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2701 {
2702         unsigned int xid;
2703         int rc = 0;
2704         struct cifs_tcon *tcon;
2705         struct TCP_Server_Info *server;
2706         struct cifsFileInfo *smbfile = file->private_data;
2707         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2708
2709         rc = file_write_and_wait_range(file, start, end);
2710         if (rc) {
2711                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2712                 return rc;
2713         }
2714
2715         xid = get_xid();
2716
2717         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2718                  file, datasync);
2719
2720         tcon = tlink_tcon(smbfile->tlink);
2721         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2722                 server = tcon->ses->server;
2723                 if (server->ops->flush)
2724                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2725                 else
2726                         rc = -ENOSYS;
2727         }
2728
2729         free_xid(xid);
2730         return rc;
2731 }
2732
2733 /*
2734  * As file closes, flush all cached write data for this inode checking
2735  * for write behind errors.
2736  */
2737 int cifs_flush(struct file *file, fl_owner_t id)
2738 {
2739         struct inode *inode = file_inode(file);
2740         int rc = 0;
2741
2742         if (file->f_mode & FMODE_WRITE)
2743                 rc = filemap_write_and_wait(inode->i_mapping);
2744
2745         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2746         if (rc)
2747                 trace_cifs_flush_err(inode->i_ino, rc);
2748         return rc;
2749 }
2750
2751 static int
2752 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2753 {
2754         int rc = 0;
2755         unsigned long i;
2756
2757         for (i = 0; i < num_pages; i++) {
2758                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2759                 if (!pages[i]) {
2760                         /*
2761                          * save number of pages we have already allocated and
2762                          * return with ENOMEM error
2763                          */
2764                         num_pages = i;
2765                         rc = -ENOMEM;
2766                         break;
2767                 }
2768         }
2769
2770         if (rc) {
2771                 for (i = 0; i < num_pages; i++)
2772                         put_page(pages[i]);
2773         }
2774         return rc;
2775 }
2776
2777 static inline
2778 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2779 {
2780         size_t num_pages;
2781         size_t clen;
2782
2783         clen = min_t(const size_t, len, wsize);
2784         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2785
2786         if (cur_len)
2787                 *cur_len = clen;
2788
2789         return num_pages;
2790 }
2791
2792 static void
2793 cifs_uncached_writedata_release(struct kref *refcount)
2794 {
2795         int i;
2796         struct cifs_writedata *wdata = container_of(refcount,
2797                                         struct cifs_writedata, refcount);
2798
2799         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2800         for (i = 0; i < wdata->nr_pages; i++)
2801                 put_page(wdata->pages[i]);
2802         cifs_writedata_release(refcount);
2803 }
2804
2805 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2806
2807 static void
2808 cifs_uncached_writev_complete(struct work_struct *work)
2809 {
2810         struct cifs_writedata *wdata = container_of(work,
2811                                         struct cifs_writedata, work);
2812         struct inode *inode = d_inode(wdata->cfile->dentry);
2813         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2814
2815         spin_lock(&inode->i_lock);
2816         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2817         if (cifsi->server_eof > inode->i_size)
2818                 i_size_write(inode, cifsi->server_eof);
2819         spin_unlock(&inode->i_lock);
2820
2821         complete(&wdata->done);
2822         collect_uncached_write_data(wdata->ctx);
2823         /* the below call can possibly free the last ref to aio ctx */
2824         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2825 }
2826
2827 static int
2828 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2829                       size_t *len, unsigned long *num_pages)
2830 {
2831         size_t save_len, copied, bytes, cur_len = *len;
2832         unsigned long i, nr_pages = *num_pages;
2833
2834         save_len = cur_len;
2835         for (i = 0; i < nr_pages; i++) {
2836                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2837                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2838                 cur_len -= copied;
2839                 /*
2840                  * If we didn't copy as much as we expected, then that
2841                  * may mean we trod into an unmapped area. Stop copying
2842                  * at that point. On the next pass through the big
2843                  * loop, we'll likely end up getting a zero-length
2844                  * write and bailing out of it.
2845                  */
2846                 if (copied < bytes)
2847                         break;
2848         }
2849         cur_len = save_len - cur_len;
2850         *len = cur_len;
2851
2852         /*
2853          * If we have no data to send, then that probably means that
2854          * the copy above failed altogether. That's most likely because
2855          * the address in the iovec was bogus. Return -EFAULT and let
2856          * the caller free anything we allocated and bail out.
2857          */
2858         if (!cur_len)
2859                 return -EFAULT;
2860
2861         /*
2862          * i + 1 now represents the number of pages we actually used in
2863          * the copy phase above.
2864          */
2865         *num_pages = i + 1;
2866         return 0;
2867 }
2868
2869 static int
2870 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2871         struct cifs_aio_ctx *ctx)
2872 {
2873         unsigned int wsize;
2874         struct cifs_credits credits;
2875         int rc;
2876         struct TCP_Server_Info *server = wdata->server;
2877
2878         do {
2879                 if (wdata->cfile->invalidHandle) {
2880                         rc = cifs_reopen_file(wdata->cfile, false);
2881                         if (rc == -EAGAIN)
2882                                 continue;
2883                         else if (rc)
2884                                 break;
2885                 }
2886
2887
2888                 /*
2889                  * Wait for credits to resend this wdata.
2890                  * Note: we are attempting to resend the whole wdata not in
2891                  * segments
2892                  */
2893                 do {
2894                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2895                                                 &wsize, &credits);
2896                         if (rc)
2897                                 goto fail;
2898
2899                         if (wsize < wdata->bytes) {
2900                                 add_credits_and_wake_if(server, &credits, 0);
2901                                 msleep(1000);
2902                         }
2903                 } while (wsize < wdata->bytes);
2904                 wdata->credits = credits;
2905
2906                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2907
2908                 if (!rc) {
2909                         if (wdata->cfile->invalidHandle)
2910                                 rc = -EAGAIN;
2911                         else {
2912 #ifdef CONFIG_CIFS_SMB_DIRECT
2913                                 if (wdata->mr) {
2914                                         wdata->mr->need_invalidate = true;
2915                                         smbd_deregister_mr(wdata->mr);
2916                                         wdata->mr = NULL;
2917                                 }
2918 #endif
2919                                 rc = server->ops->async_writev(wdata,
2920                                         cifs_uncached_writedata_release);
2921                         }
2922                 }
2923
2924                 /* If the write was successfully sent, we are done */
2925                 if (!rc) {
2926                         list_add_tail(&wdata->list, wdata_list);
2927                         return 0;
2928                 }
2929
2930                 /* Roll back credits and retry if needed */
2931                 add_credits_and_wake_if(server, &wdata->credits, 0);
2932         } while (rc == -EAGAIN);
2933
2934 fail:
2935         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2936         return rc;
2937 }
2938
2939 static int
2940 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2941                      struct cifsFileInfo *open_file,
2942                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2943                      struct cifs_aio_ctx *ctx)
2944 {
2945         int rc = 0;
2946         size_t cur_len;
2947         unsigned long nr_pages, num_pages, i;
2948         struct cifs_writedata *wdata;
2949         struct iov_iter saved_from = *from;
2950         loff_t saved_offset = offset;
2951         pid_t pid;
2952         struct TCP_Server_Info *server;
2953         struct page **pagevec;
2954         size_t start;
2955         unsigned int xid;
2956
2957         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2958                 pid = open_file->pid;
2959         else
2960                 pid = current->tgid;
2961
2962         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2963         xid = get_xid();
2964
2965         do {
2966                 unsigned int wsize;
2967                 struct cifs_credits credits_on_stack;
2968                 struct cifs_credits *credits = &credits_on_stack;
2969
2970                 if (open_file->invalidHandle) {
2971                         rc = cifs_reopen_file(open_file, false);
2972                         if (rc == -EAGAIN)
2973                                 continue;
2974                         else if (rc)
2975                                 break;
2976                 }
2977
2978                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2979                                                    &wsize, credits);
2980                 if (rc)
2981                         break;
2982
2983                 cur_len = min_t(const size_t, len, wsize);
2984
2985                 if (ctx->direct_io) {
2986                         ssize_t result;
2987
2988                         result = iov_iter_get_pages_alloc(
2989                                 from, &pagevec, cur_len, &start);
2990                         if (result < 0) {
2991                                 cifs_dbg(VFS,
2992                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
2993                                          result, iov_iter_type(from),
2994                                          from->iov_offset, from->count);
2995                                 dump_stack();
2996
2997                                 rc = result;
2998                                 add_credits_and_wake_if(server, credits, 0);
2999                                 break;
3000                         }
3001                         cur_len = (size_t)result;
3002                         iov_iter_advance(from, cur_len);
3003
3004                         nr_pages =
3005                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3006
3007                         wdata = cifs_writedata_direct_alloc(pagevec,
3008                                              cifs_uncached_writev_complete);
3009                         if (!wdata) {
3010                                 rc = -ENOMEM;
3011                                 add_credits_and_wake_if(server, credits, 0);
3012                                 break;
3013                         }
3014
3015
3016                         wdata->page_offset = start;
3017                         wdata->tailsz =
3018                                 nr_pages > 1 ?
3019                                         cur_len - (PAGE_SIZE - start) -
3020                                         (nr_pages - 2) * PAGE_SIZE :
3021                                         cur_len;
3022                 } else {
3023                         nr_pages = get_numpages(wsize, len, &cur_len);
3024                         wdata = cifs_writedata_alloc(nr_pages,
3025                                              cifs_uncached_writev_complete);
3026                         if (!wdata) {
3027                                 rc = -ENOMEM;
3028                                 add_credits_and_wake_if(server, credits, 0);
3029                                 break;
3030                         }
3031
3032                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3033                         if (rc) {
3034                                 kvfree(wdata->pages);
3035                                 kfree(wdata);
3036                                 add_credits_and_wake_if(server, credits, 0);
3037                                 break;
3038                         }
3039
3040                         num_pages = nr_pages;
3041                         rc = wdata_fill_from_iovec(
3042                                 wdata, from, &cur_len, &num_pages);
3043                         if (rc) {
3044                                 for (i = 0; i < nr_pages; i++)
3045                                         put_page(wdata->pages[i]);
3046                                 kvfree(wdata->pages);
3047                                 kfree(wdata);
3048                                 add_credits_and_wake_if(server, credits, 0);
3049                                 break;
3050                         }
3051
3052                         /*
3053                          * Bring nr_pages down to the number of pages we
3054                          * actually used, and free any pages that we didn't use.
3055                          */
3056                         for ( ; nr_pages > num_pages; nr_pages--)
3057                                 put_page(wdata->pages[nr_pages - 1]);
3058
3059                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3060                 }
3061
3062                 wdata->sync_mode = WB_SYNC_ALL;
3063                 wdata->nr_pages = nr_pages;
3064                 wdata->offset = (__u64)offset;
3065                 wdata->cfile = cifsFileInfo_get(open_file);
3066                 wdata->server = server;
3067                 wdata->pid = pid;
3068                 wdata->bytes = cur_len;
3069                 wdata->pagesz = PAGE_SIZE;
3070                 wdata->credits = credits_on_stack;
3071                 wdata->ctx = ctx;
3072                 kref_get(&ctx->refcount);
3073
3074                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3075
3076                 if (!rc) {
3077                         if (wdata->cfile->invalidHandle)
3078                                 rc = -EAGAIN;
3079                         else
3080                                 rc = server->ops->async_writev(wdata,
3081                                         cifs_uncached_writedata_release);
3082                 }
3083
3084                 if (rc) {
3085                         add_credits_and_wake_if(server, &wdata->credits, 0);
3086                         kref_put(&wdata->refcount,
3087                                  cifs_uncached_writedata_release);
3088                         if (rc == -EAGAIN) {
3089                                 *from = saved_from;
3090                                 iov_iter_advance(from, offset - saved_offset);
3091                                 continue;
3092                         }
3093                         break;
3094                 }
3095
3096                 list_add_tail(&wdata->list, wdata_list);
3097                 offset += cur_len;
3098                 len -= cur_len;
3099         } while (len > 0);
3100
3101         free_xid(xid);
3102         return rc;
3103 }
3104
3105 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3106 {
3107         struct cifs_writedata *wdata, *tmp;
3108         struct cifs_tcon *tcon;
3109         struct cifs_sb_info *cifs_sb;
3110         struct dentry *dentry = ctx->cfile->dentry;
3111         int rc;
3112
3113         tcon = tlink_tcon(ctx->cfile->tlink);
3114         cifs_sb = CIFS_SB(dentry->d_sb);
3115
3116         mutex_lock(&ctx->aio_mutex);
3117
3118         if (list_empty(&ctx->list)) {
3119                 mutex_unlock(&ctx->aio_mutex);
3120                 return;
3121         }
3122
3123         rc = ctx->rc;
3124         /*
3125          * Wait for and collect replies for any successful sends in order of
3126          * increasing offset. Once an error is hit, then return without waiting
3127          * for any more replies.
3128          */
3129 restart_loop:
3130         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3131                 if (!rc) {
3132                         if (!try_wait_for_completion(&wdata->done)) {
3133                                 mutex_unlock(&ctx->aio_mutex);
3134                                 return;
3135                         }
3136
3137                         if (wdata->result)
3138                                 rc = wdata->result;
3139                         else
3140                                 ctx->total_len += wdata->bytes;
3141
3142                         /* resend call if it's a retryable error */
3143                         if (rc == -EAGAIN) {
3144                                 struct list_head tmp_list;
3145                                 struct iov_iter tmp_from = ctx->iter;
3146
3147                                 INIT_LIST_HEAD(&tmp_list);
3148                                 list_del_init(&wdata->list);
3149
3150                                 if (ctx->direct_io)
3151                                         rc = cifs_resend_wdata(
3152                                                 wdata, &tmp_list, ctx);
3153                                 else {
3154                                         iov_iter_advance(&tmp_from,
3155                                                  wdata->offset - ctx->pos);
3156
3157                                         rc = cifs_write_from_iter(wdata->offset,
3158                                                 wdata->bytes, &tmp_from,
3159                                                 ctx->cfile, cifs_sb, &tmp_list,
3160                                                 ctx);
3161
3162                                         kref_put(&wdata->refcount,
3163                                                 cifs_uncached_writedata_release);
3164                                 }
3165
3166                                 list_splice(&tmp_list, &ctx->list);
3167                                 goto restart_loop;
3168                         }
3169                 }
3170                 list_del_init(&wdata->list);
3171                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3172         }
3173
3174         cifs_stats_bytes_written(tcon, ctx->total_len);
3175         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3176
3177         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3178
3179         mutex_unlock(&ctx->aio_mutex);
3180
3181         if (ctx->iocb && ctx->iocb->ki_complete)
3182                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3183         else
3184                 complete(&ctx->done);
3185 }
3186
3187 static ssize_t __cifs_writev(
3188         struct kiocb *iocb, struct iov_iter *from, bool direct)
3189 {
3190         struct file *file = iocb->ki_filp;
3191         ssize_t total_written = 0;
3192         struct cifsFileInfo *cfile;
3193         struct cifs_tcon *tcon;
3194         struct cifs_sb_info *cifs_sb;
3195         struct cifs_aio_ctx *ctx;
3196         struct iov_iter saved_from = *from;
3197         size_t len = iov_iter_count(from);
3198         int rc;
3199
3200         /*
3201          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3202          * In this case, fall back to non-direct write function.
3203          * this could be improved by getting pages directly in ITER_KVEC
3204          */
3205         if (direct && iov_iter_is_kvec(from)) {
3206                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3207                 direct = false;
3208         }
3209
3210         rc = generic_write_checks(iocb, from);
3211         if (rc <= 0)
3212                 return rc;
3213
3214         cifs_sb = CIFS_FILE_SB(file);
3215         cfile = file->private_data;
3216         tcon = tlink_tcon(cfile->tlink);
3217
3218         if (!tcon->ses->server->ops->async_writev)
3219                 return -ENOSYS;
3220
3221         ctx = cifs_aio_ctx_alloc();
3222         if (!ctx)
3223                 return -ENOMEM;
3224
3225         ctx->cfile = cifsFileInfo_get(cfile);
3226
3227         if (!is_sync_kiocb(iocb))
3228                 ctx->iocb = iocb;
3229
3230         ctx->pos = iocb->ki_pos;
3231
3232         if (direct) {
3233                 ctx->direct_io = true;
3234                 ctx->iter = *from;
3235                 ctx->len = len;
3236         } else {
3237                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3238                 if (rc) {
3239                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3240                         return rc;
3241                 }
3242         }
3243
3244         /* grab a lock here due to read response handlers can access ctx */
3245         mutex_lock(&ctx->aio_mutex);
3246
3247         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3248                                   cfile, cifs_sb, &ctx->list, ctx);
3249
3250         /*
3251          * If at least one write was successfully sent, then discard any rc
3252          * value from the later writes. If the other write succeeds, then
3253          * we'll end up returning whatever was written. If it fails, then
3254          * we'll get a new rc value from that.
3255          */
3256         if (!list_empty(&ctx->list))
3257                 rc = 0;
3258
3259         mutex_unlock(&ctx->aio_mutex);
3260
3261         if (rc) {
3262                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3263                 return rc;
3264         }
3265
3266         if (!is_sync_kiocb(iocb)) {
3267                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3268                 return -EIOCBQUEUED;
3269         }
3270
3271         rc = wait_for_completion_killable(&ctx->done);
3272         if (rc) {
3273                 mutex_lock(&ctx->aio_mutex);
3274                 ctx->rc = rc = -EINTR;
3275                 total_written = ctx->total_len;
3276                 mutex_unlock(&ctx->aio_mutex);
3277         } else {
3278                 rc = ctx->rc;
3279                 total_written = ctx->total_len;
3280         }
3281
3282         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3283
3284         if (unlikely(!total_written))
3285                 return rc;
3286
3287         iocb->ki_pos += total_written;
3288         return total_written;
3289 }
3290
3291 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3292 {
3293         return __cifs_writev(iocb, from, true);
3294 }
3295
3296 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3297 {
3298         return __cifs_writev(iocb, from, false);
3299 }
3300
3301 static ssize_t
3302 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3303 {
3304         struct file *file = iocb->ki_filp;
3305         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3306         struct inode *inode = file->f_mapping->host;
3307         struct cifsInodeInfo *cinode = CIFS_I(inode);
3308         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3309         ssize_t rc;
3310
3311         inode_lock(inode);
3312         /*
3313          * We need to hold the sem to be sure nobody modifies lock list
3314          * with a brlock that prevents writing.
3315          */
3316         down_read(&cinode->lock_sem);
3317
3318         rc = generic_write_checks(iocb, from);
3319         if (rc <= 0)
3320                 goto out;
3321
3322         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3323                                      server->vals->exclusive_lock_type, 0,
3324                                      NULL, CIFS_WRITE_OP))
3325                 rc = __generic_file_write_iter(iocb, from);
3326         else
3327                 rc = -EACCES;
3328 out:
3329         up_read(&cinode->lock_sem);
3330         inode_unlock(inode);
3331
3332         if (rc > 0)
3333                 rc = generic_write_sync(iocb, rc);
3334         return rc;
3335 }
3336
3337 ssize_t
3338 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3339 {
3340         struct inode *inode = file_inode(iocb->ki_filp);
3341         struct cifsInodeInfo *cinode = CIFS_I(inode);
3342         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3343         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3344                                                 iocb->ki_filp->private_data;
3345         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3346         ssize_t written;
3347
3348         written = cifs_get_writer(cinode);
3349         if (written)
3350                 return written;
3351
3352         if (CIFS_CACHE_WRITE(cinode)) {
3353                 if (cap_unix(tcon->ses) &&
3354                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3355                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3356                         written = generic_file_write_iter(iocb, from);
3357                         goto out;
3358                 }
3359                 written = cifs_writev(iocb, from);
3360                 goto out;
3361         }
3362         /*
3363          * For non-oplocked files in strict cache mode we need to write the data
3364          * to the server exactly from the pos to pos+len-1 rather than flush all
3365          * affected pages because it may cause a error with mandatory locks on
3366          * these pages but not on the region from pos to ppos+len-1.
3367          */
3368         written = cifs_user_writev(iocb, from);
3369         if (CIFS_CACHE_READ(cinode)) {
3370                 /*
3371                  * We have read level caching and we have just sent a write
3372                  * request to the server thus making data in the cache stale.
3373                  * Zap the cache and set oplock/lease level to NONE to avoid
3374                  * reading stale data from the cache. All subsequent read
3375                  * operations will read new data from the server.
3376                  */
3377                 cifs_zap_mapping(inode);
3378                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3379                          inode);
3380                 cinode->oplock = 0;
3381         }
3382 out:
3383         cifs_put_writer(cinode);
3384         return written;
3385 }
3386
3387 static struct cifs_readdata *
3388 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3389 {
3390         struct cifs_readdata *rdata;
3391
3392         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3393         if (rdata != NULL) {
3394                 rdata->pages = pages;
3395                 kref_init(&rdata->refcount);
3396                 INIT_LIST_HEAD(&rdata->list);
3397                 init_completion(&rdata->done);
3398                 INIT_WORK(&rdata->work, complete);
3399         }
3400
3401         return rdata;
3402 }
3403
3404 static struct cifs_readdata *
3405 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3406 {
3407         struct page **pages =
3408                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3409         struct cifs_readdata *ret = NULL;
3410
3411         if (pages) {
3412                 ret = cifs_readdata_direct_alloc(pages, complete);
3413                 if (!ret)
3414                         kfree(pages);
3415         }
3416
3417         return ret;
3418 }
3419
3420 void
3421 cifs_readdata_release(struct kref *refcount)
3422 {
3423         struct cifs_readdata *rdata = container_of(refcount,
3424                                         struct cifs_readdata, refcount);
3425 #ifdef CONFIG_CIFS_SMB_DIRECT
3426         if (rdata->mr) {
3427                 smbd_deregister_mr(rdata->mr);
3428                 rdata->mr = NULL;
3429         }
3430 #endif
3431         if (rdata->cfile)
3432                 cifsFileInfo_put(rdata->cfile);
3433
3434         kvfree(rdata->pages);
3435         kfree(rdata);
3436 }
3437
3438 static int
3439 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3440 {
3441         int rc = 0;
3442         struct page *page;
3443         unsigned int i;
3444
3445         for (i = 0; i < nr_pages; i++) {
3446                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3447                 if (!page) {
3448                         rc = -ENOMEM;
3449                         break;
3450                 }
3451                 rdata->pages[i] = page;
3452         }
3453
3454         if (rc) {
3455                 unsigned int nr_page_failed = i;
3456
3457                 for (i = 0; i < nr_page_failed; i++) {
3458                         put_page(rdata->pages[i]);
3459                         rdata->pages[i] = NULL;
3460                 }
3461         }
3462         return rc;
3463 }
3464
3465 static void
3466 cifs_uncached_readdata_release(struct kref *refcount)
3467 {
3468         struct cifs_readdata *rdata = container_of(refcount,
3469                                         struct cifs_readdata, refcount);
3470         unsigned int i;
3471
3472         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3473         for (i = 0; i < rdata->nr_pages; i++) {
3474                 put_page(rdata->pages[i]);
3475         }
3476         cifs_readdata_release(refcount);
3477 }
3478
3479 /**
3480  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3481  * @rdata:      the readdata response with list of pages holding data
3482  * @iter:       destination for our data
3483  *
3484  * This function copies data from a list of pages in a readdata response into
3485  * an array of iovecs. It will first calculate where the data should go
3486  * based on the info in the readdata and then copy the data into that spot.
3487  */
3488 static int
3489 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3490 {
3491         size_t remaining = rdata->got_bytes;
3492         unsigned int i;
3493
3494         for (i = 0; i < rdata->nr_pages; i++) {
3495                 struct page *page = rdata->pages[i];
3496                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3497                 size_t written;
3498
3499                 if (unlikely(iov_iter_is_pipe(iter))) {
3500                         void *addr = kmap_atomic(page);
3501
3502                         written = copy_to_iter(addr, copy, iter);
3503                         kunmap_atomic(addr);
3504                 } else
3505                         written = copy_page_to_iter(page, 0, copy, iter);
3506                 remaining -= written;
3507                 if (written < copy && iov_iter_count(iter) > 0)
3508                         break;
3509         }
3510         return remaining ? -EFAULT : 0;
3511 }
3512
3513 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3514
3515 static void
3516 cifs_uncached_readv_complete(struct work_struct *work)
3517 {
3518         struct cifs_readdata *rdata = container_of(work,
3519                                                 struct cifs_readdata, work);
3520
3521         complete(&rdata->done);
3522         collect_uncached_read_data(rdata->ctx);
3523         /* the below call can possibly free the last ref to aio ctx */
3524         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3525 }
3526
3527 static int
3528 uncached_fill_pages(struct TCP_Server_Info *server,
3529                     struct cifs_readdata *rdata, struct iov_iter *iter,
3530                     unsigned int len)
3531 {
3532         int result = 0;
3533         unsigned int i;
3534         unsigned int nr_pages = rdata->nr_pages;
3535         unsigned int page_offset = rdata->page_offset;
3536
3537         rdata->got_bytes = 0;
3538         rdata->tailsz = PAGE_SIZE;
3539         for (i = 0; i < nr_pages; i++) {
3540                 struct page *page = rdata->pages[i];
3541                 size_t n;
3542                 unsigned int segment_size = rdata->pagesz;
3543
3544                 if (i == 0)
3545                         segment_size -= page_offset;
3546                 else
3547                         page_offset = 0;
3548
3549
3550                 if (len <= 0) {
3551                         /* no need to hold page hostage */
3552                         rdata->pages[i] = NULL;
3553                         rdata->nr_pages--;
3554                         put_page(page);
3555                         continue;
3556                 }
3557
3558                 n = len;
3559                 if (len >= segment_size)
3560                         /* enough data to fill the page */
3561                         n = segment_size;
3562                 else
3563                         rdata->tailsz = len;
3564                 len -= n;
3565
3566                 if (iter)
3567                         result = copy_page_from_iter(
3568                                         page, page_offset, n, iter);
3569 #ifdef CONFIG_CIFS_SMB_DIRECT
3570                 else if (rdata->mr)
3571                         result = n;
3572 #endif
3573                 else
3574                         result = cifs_read_page_from_socket(
3575                                         server, page, page_offset, n);
3576                 if (result < 0)
3577                         break;
3578
3579                 rdata->got_bytes += result;
3580         }
3581
3582         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3583                                                 rdata->got_bytes : result;
3584 }
3585
3586 static int
3587 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3588                               struct cifs_readdata *rdata, unsigned int len)
3589 {
3590         return uncached_fill_pages(server, rdata, NULL, len);
3591 }
3592
3593 static int
3594 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3595                               struct cifs_readdata *rdata,
3596                               struct iov_iter *iter)
3597 {
3598         return uncached_fill_pages(server, rdata, iter, iter->count);
3599 }
3600
3601 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3602                         struct list_head *rdata_list,
3603                         struct cifs_aio_ctx *ctx)
3604 {
3605         unsigned int rsize;
3606         struct cifs_credits credits;
3607         int rc;
3608         struct TCP_Server_Info *server;
3609
3610         /* XXX: should we pick a new channel here? */
3611         server = rdata->server;
3612
3613         do {
3614                 if (rdata->cfile->invalidHandle) {
3615                         rc = cifs_reopen_file(rdata->cfile, true);
3616                         if (rc == -EAGAIN)
3617                                 continue;
3618                         else if (rc)
3619                                 break;
3620                 }
3621
3622                 /*
3623                  * Wait for credits to resend this rdata.
3624                  * Note: we are attempting to resend the whole rdata not in
3625                  * segments
3626                  */
3627                 do {
3628                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3629                                                 &rsize, &credits);
3630
3631                         if (rc)
3632                                 goto fail;
3633
3634                         if (rsize < rdata->bytes) {
3635                                 add_credits_and_wake_if(server, &credits, 0);
3636                                 msleep(1000);
3637                         }
3638                 } while (rsize < rdata->bytes);
3639                 rdata->credits = credits;
3640
3641                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3642                 if (!rc) {
3643                         if (rdata->cfile->invalidHandle)
3644                                 rc = -EAGAIN;
3645                         else {
3646 #ifdef CONFIG_CIFS_SMB_DIRECT
3647                                 if (rdata->mr) {
3648                                         rdata->mr->need_invalidate = true;
3649                                         smbd_deregister_mr(rdata->mr);
3650                                         rdata->mr = NULL;
3651                                 }
3652 #endif
3653                                 rc = server->ops->async_readv(rdata);
3654                         }
3655                 }
3656
3657                 /* If the read was successfully sent, we are done */
3658                 if (!rc) {
3659                         /* Add to aio pending list */
3660                         list_add_tail(&rdata->list, rdata_list);
3661                         return 0;
3662                 }
3663
3664                 /* Roll back credits and retry if needed */
3665                 add_credits_and_wake_if(server, &rdata->credits, 0);
3666         } while (rc == -EAGAIN);
3667
3668 fail:
3669         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3670         return rc;
3671 }
3672
3673 static int
3674 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3675                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3676                      struct cifs_aio_ctx *ctx)
3677 {
3678         struct cifs_readdata *rdata;
3679         unsigned int npages, rsize;
3680         struct cifs_credits credits_on_stack;
3681         struct cifs_credits *credits = &credits_on_stack;
3682         size_t cur_len;
3683         int rc;
3684         pid_t pid;
3685         struct TCP_Server_Info *server;
3686         struct page **pagevec;
3687         size_t start;
3688         struct iov_iter direct_iov = ctx->iter;
3689
3690         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3691
3692         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3693                 pid = open_file->pid;
3694         else
3695                 pid = current->tgid;
3696
3697         if (ctx->direct_io)
3698                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3699
3700         do {
3701                 if (open_file->invalidHandle) {
3702                         rc = cifs_reopen_file(open_file, true);
3703                         if (rc == -EAGAIN)
3704                                 continue;
3705                         else if (rc)
3706                                 break;
3707                 }
3708
3709                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3710                                                    &rsize, credits);
3711                 if (rc)
3712                         break;
3713
3714                 cur_len = min_t(const size_t, len, rsize);
3715
3716                 if (ctx->direct_io) {
3717                         ssize_t result;
3718
3719                         result = iov_iter_get_pages_alloc(
3720                                         &direct_iov, &pagevec,
3721                                         cur_len, &start);
3722                         if (result < 0) {
3723                                 cifs_dbg(VFS,
3724                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3725                                          result, iov_iter_type(&direct_iov),
3726                                          direct_iov.iov_offset,
3727                                          direct_iov.count);
3728                                 dump_stack();
3729
3730                                 rc = result;
3731                                 add_credits_and_wake_if(server, credits, 0);
3732                                 break;
3733                         }
3734                         cur_len = (size_t)result;
3735                         iov_iter_advance(&direct_iov, cur_len);
3736
3737                         rdata = cifs_readdata_direct_alloc(
3738                                         pagevec, cifs_uncached_readv_complete);
3739                         if (!rdata) {
3740                                 add_credits_and_wake_if(server, credits, 0);
3741                                 rc = -ENOMEM;
3742                                 break;
3743                         }
3744
3745                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3746                         rdata->page_offset = start;
3747                         rdata->tailsz = npages > 1 ?
3748                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3749                                 cur_len;
3750
3751                 } else {
3752
3753                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3754                         /* allocate a readdata struct */
3755                         rdata = cifs_readdata_alloc(npages,
3756                                             cifs_uncached_readv_complete);
3757                         if (!rdata) {
3758                                 add_credits_and_wake_if(server, credits, 0);
3759                                 rc = -ENOMEM;
3760                                 break;
3761                         }
3762
3763                         rc = cifs_read_allocate_pages(rdata, npages);
3764                         if (rc) {
3765                                 kvfree(rdata->pages);
3766                                 kfree(rdata);
3767                                 add_credits_and_wake_if(server, credits, 0);
3768                                 break;
3769                         }
3770
3771                         rdata->tailsz = PAGE_SIZE;
3772                 }
3773
3774                 rdata->server = server;
3775                 rdata->cfile = cifsFileInfo_get(open_file);
3776                 rdata->nr_pages = npages;
3777                 rdata->offset = offset;
3778                 rdata->bytes = cur_len;
3779                 rdata->pid = pid;
3780                 rdata->pagesz = PAGE_SIZE;
3781                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3782                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3783                 rdata->credits = credits_on_stack;
3784                 rdata->ctx = ctx;
3785                 kref_get(&ctx->refcount);
3786
3787                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3788
3789                 if (!rc) {
3790                         if (rdata->cfile->invalidHandle)
3791                                 rc = -EAGAIN;
3792                         else
3793                                 rc = server->ops->async_readv(rdata);
3794                 }
3795
3796                 if (rc) {
3797                         add_credits_and_wake_if(server, &rdata->credits, 0);
3798                         kref_put(&rdata->refcount,
3799                                 cifs_uncached_readdata_release);
3800                         if (rc == -EAGAIN) {
3801                                 iov_iter_revert(&direct_iov, cur_len);
3802                                 continue;
3803                         }
3804                         break;
3805                 }
3806
3807                 list_add_tail(&rdata->list, rdata_list);
3808                 offset += cur_len;
3809                 len -= cur_len;
3810         } while (len > 0);
3811
3812         return rc;
3813 }
3814
3815 static void
3816 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3817 {
3818         struct cifs_readdata *rdata, *tmp;
3819         struct iov_iter *to = &ctx->iter;
3820         struct cifs_sb_info *cifs_sb;
3821         int rc;
3822
3823         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3824
3825         mutex_lock(&ctx->aio_mutex);
3826
3827         if (list_empty(&ctx->list)) {
3828                 mutex_unlock(&ctx->aio_mutex);
3829                 return;
3830         }
3831
3832         rc = ctx->rc;
3833         /* the loop below should proceed in the order of increasing offsets */
3834 again:
3835         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3836                 if (!rc) {
3837                         if (!try_wait_for_completion(&rdata->done)) {
3838                                 mutex_unlock(&ctx->aio_mutex);
3839                                 return;
3840                         }
3841
3842                         if (rdata->result == -EAGAIN) {
3843                                 /* resend call if it's a retryable error */
3844                                 struct list_head tmp_list;
3845                                 unsigned int got_bytes = rdata->got_bytes;
3846
3847                                 list_del_init(&rdata->list);
3848                                 INIT_LIST_HEAD(&tmp_list);
3849
3850                                 /*
3851                                  * Got a part of data and then reconnect has
3852                                  * happened -- fill the buffer and continue
3853                                  * reading.
3854                                  */
3855                                 if (got_bytes && got_bytes < rdata->bytes) {
3856                                         rc = 0;
3857                                         if (!ctx->direct_io)
3858                                                 rc = cifs_readdata_to_iov(rdata, to);
3859                                         if (rc) {
3860                                                 kref_put(&rdata->refcount,
3861                                                         cifs_uncached_readdata_release);
3862                                                 continue;
3863                                         }
3864                                 }
3865
3866                                 if (ctx->direct_io) {
3867                                         /*
3868                                          * Re-use rdata as this is a
3869                                          * direct I/O
3870                                          */
3871                                         rc = cifs_resend_rdata(
3872                                                 rdata,
3873                                                 &tmp_list, ctx);
3874                                 } else {
3875                                         rc = cifs_send_async_read(
3876                                                 rdata->offset + got_bytes,
3877                                                 rdata->bytes - got_bytes,
3878                                                 rdata->cfile, cifs_sb,
3879                                                 &tmp_list, ctx);
3880
3881                                         kref_put(&rdata->refcount,
3882                                                 cifs_uncached_readdata_release);
3883                                 }
3884
3885                                 list_splice(&tmp_list, &ctx->list);
3886
3887                                 goto again;
3888                         } else if (rdata->result)
3889                                 rc = rdata->result;
3890                         else if (!ctx->direct_io)
3891                                 rc = cifs_readdata_to_iov(rdata, to);
3892
3893                         /* if there was a short read -- discard anything left */
3894                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3895                                 rc = -ENODATA;
3896
3897                         ctx->total_len += rdata->got_bytes;
3898                 }
3899                 list_del_init(&rdata->list);
3900                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3901         }
3902
3903         if (!ctx->direct_io)
3904                 ctx->total_len = ctx->len - iov_iter_count(to);
3905
3906         /* mask nodata case */
3907         if (rc == -ENODATA)
3908                 rc = 0;
3909
3910         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3911
3912         mutex_unlock(&ctx->aio_mutex);
3913
3914         if (ctx->iocb && ctx->iocb->ki_complete)
3915                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3916         else
3917                 complete(&ctx->done);
3918 }
3919
3920 static ssize_t __cifs_readv(
3921         struct kiocb *iocb, struct iov_iter *to, bool direct)
3922 {
3923         size_t len;
3924         struct file *file = iocb->ki_filp;
3925         struct cifs_sb_info *cifs_sb;
3926         struct cifsFileInfo *cfile;
3927         struct cifs_tcon *tcon;
3928         ssize_t rc, total_read = 0;
3929         loff_t offset = iocb->ki_pos;
3930         struct cifs_aio_ctx *ctx;
3931
3932         /*
3933          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3934          * fall back to data copy read path
3935          * this could be improved by getting pages directly in ITER_KVEC
3936          */
3937         if (direct && iov_iter_is_kvec(to)) {
3938                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3939                 direct = false;
3940         }
3941
3942         len = iov_iter_count(to);
3943         if (!len)
3944                 return 0;
3945
3946         cifs_sb = CIFS_FILE_SB(file);
3947         cfile = file->private_data;
3948         tcon = tlink_tcon(cfile->tlink);
3949
3950         if (!tcon->ses->server->ops->async_readv)
3951                 return -ENOSYS;
3952
3953         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3954                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3955
3956         ctx = cifs_aio_ctx_alloc();
3957         if (!ctx)
3958                 return -ENOMEM;
3959
3960         ctx->cfile = cifsFileInfo_get(cfile);
3961
3962         if (!is_sync_kiocb(iocb))
3963                 ctx->iocb = iocb;
3964
3965         if (iter_is_iovec(to))
3966                 ctx->should_dirty = true;
3967
3968         if (direct) {
3969                 ctx->pos = offset;
3970                 ctx->direct_io = true;
3971                 ctx->iter = *to;
3972                 ctx->len = len;
3973         } else {
3974                 rc = setup_aio_ctx_iter(ctx, to, READ);
3975                 if (rc) {
3976                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3977                         return rc;
3978                 }
3979                 len = ctx->len;
3980         }
3981
3982         /* grab a lock here due to read response handlers can access ctx */
3983         mutex_lock(&ctx->aio_mutex);
3984
3985         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3986
3987         /* if at least one read request send succeeded, then reset rc */
3988         if (!list_empty(&ctx->list))
3989                 rc = 0;
3990
3991         mutex_unlock(&ctx->aio_mutex);
3992
3993         if (rc) {
3994                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3995                 return rc;
3996         }
3997
3998         if (!is_sync_kiocb(iocb)) {
3999                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4000                 return -EIOCBQUEUED;
4001         }
4002
4003         rc = wait_for_completion_killable(&ctx->done);
4004         if (rc) {
4005                 mutex_lock(&ctx->aio_mutex);
4006                 ctx->rc = rc = -EINTR;
4007                 total_read = ctx->total_len;
4008                 mutex_unlock(&ctx->aio_mutex);
4009         } else {
4010                 rc = ctx->rc;
4011                 total_read = ctx->total_len;
4012         }
4013
4014         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4015
4016         if (total_read) {
4017                 iocb->ki_pos += total_read;
4018                 return total_read;
4019         }
4020         return rc;
4021 }
4022
4023 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4024 {
4025         return __cifs_readv(iocb, to, true);
4026 }
4027
4028 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4029 {
4030         return __cifs_readv(iocb, to, false);
4031 }
4032
4033 ssize_t
4034 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4035 {
4036         struct inode *inode = file_inode(iocb->ki_filp);
4037         struct cifsInodeInfo *cinode = CIFS_I(inode);
4038         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4039         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4040                                                 iocb->ki_filp->private_data;
4041         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4042         int rc = -EACCES;
4043
4044         /*
4045          * In strict cache mode we need to read from the server all the time
4046          * if we don't have level II oplock because the server can delay mtime
4047          * change - so we can't make a decision about inode invalidating.
4048          * And we can also fail with pagereading if there are mandatory locks
4049          * on pages affected by this read but not on the region from pos to
4050          * pos+len-1.
4051          */
4052         if (!CIFS_CACHE_READ(cinode))
4053                 return cifs_user_readv(iocb, to);
4054
4055         if (cap_unix(tcon->ses) &&
4056             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4057             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4058                 return generic_file_read_iter(iocb, to);
4059
4060         /*
4061          * We need to hold the sem to be sure nobody modifies lock list
4062          * with a brlock that prevents reading.
4063          */
4064         down_read(&cinode->lock_sem);
4065         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4066                                      tcon->ses->server->vals->shared_lock_type,
4067                                      0, NULL, CIFS_READ_OP))
4068                 rc = generic_file_read_iter(iocb, to);
4069         up_read(&cinode->lock_sem);
4070         return rc;
4071 }
4072
4073 static ssize_t
4074 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4075 {
4076         int rc = -EACCES;
4077         unsigned int bytes_read = 0;
4078         unsigned int total_read;
4079         unsigned int current_read_size;
4080         unsigned int rsize;
4081         struct cifs_sb_info *cifs_sb;
4082         struct cifs_tcon *tcon;
4083         struct TCP_Server_Info *server;
4084         unsigned int xid;
4085         char *cur_offset;
4086         struct cifsFileInfo *open_file;
4087         struct cifs_io_parms io_parms = {0};
4088         int buf_type = CIFS_NO_BUFFER;
4089         __u32 pid;
4090
4091         xid = get_xid();
4092         cifs_sb = CIFS_FILE_SB(file);
4093
4094         /* FIXME: set up handlers for larger reads and/or convert to async */
4095         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4096
4097         if (file->private_data == NULL) {
4098                 rc = -EBADF;
4099                 free_xid(xid);
4100                 return rc;
4101         }
4102         open_file = file->private_data;
4103         tcon = tlink_tcon(open_file->tlink);
4104         server = cifs_pick_channel(tcon->ses);
4105
4106         if (!server->ops->sync_read) {
4107                 free_xid(xid);
4108                 return -ENOSYS;
4109         }
4110
4111         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4112                 pid = open_file->pid;
4113         else
4114                 pid = current->tgid;
4115
4116         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4117                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4118
4119         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4120              total_read += bytes_read, cur_offset += bytes_read) {
4121                 do {
4122                         current_read_size = min_t(uint, read_size - total_read,
4123                                                   rsize);
4124                         /*
4125                          * For windows me and 9x we do not want to request more
4126                          * than it negotiated since it will refuse the read
4127                          * then.
4128                          */
4129                         if (!(tcon->ses->capabilities &
4130                                 tcon->ses->server->vals->cap_large_files)) {
4131                                 current_read_size = min_t(uint,
4132                                         current_read_size, CIFSMaxBufSize);
4133                         }
4134                         if (open_file->invalidHandle) {
4135                                 rc = cifs_reopen_file(open_file, true);
4136                                 if (rc != 0)
4137                                         break;
4138                         }
4139                         io_parms.pid = pid;
4140                         io_parms.tcon = tcon;
4141                         io_parms.offset = *offset;
4142                         io_parms.length = current_read_size;
4143                         io_parms.server = server;
4144                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4145                                                     &bytes_read, &cur_offset,
4146                                                     &buf_type);
4147                 } while (rc == -EAGAIN);
4148
4149                 if (rc || (bytes_read == 0)) {
4150                         if (total_read) {
4151                                 break;
4152                         } else {
4153                                 free_xid(xid);
4154                                 return rc;
4155                         }
4156                 } else {
4157                         cifs_stats_bytes_read(tcon, total_read);
4158                         *offset += bytes_read;
4159                 }
4160         }
4161         free_xid(xid);
4162         return total_read;
4163 }
4164
4165 /*
4166  * If the page is mmap'ed into a process' page tables, then we need to make
4167  * sure that it doesn't change while being written back.
4168  */
4169 static vm_fault_t
4170 cifs_page_mkwrite(struct vm_fault *vmf)
4171 {
4172         struct page *page = vmf->page;
4173
4174         lock_page(page);
4175         return VM_FAULT_LOCKED;
4176 }
4177
4178 static const struct vm_operations_struct cifs_file_vm_ops = {
4179         .fault = filemap_fault,
4180         .map_pages = filemap_map_pages,
4181         .page_mkwrite = cifs_page_mkwrite,
4182 };
4183
4184 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4185 {
4186         int xid, rc = 0;
4187         struct inode *inode = file_inode(file);
4188
4189         xid = get_xid();
4190
4191         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4192                 rc = cifs_zap_mapping(inode);
4193         if (!rc)
4194                 rc = generic_file_mmap(file, vma);
4195         if (!rc)
4196                 vma->vm_ops = &cifs_file_vm_ops;
4197
4198         free_xid(xid);
4199         return rc;
4200 }
4201
4202 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4203 {
4204         int rc, xid;
4205
4206         xid = get_xid();
4207
4208         rc = cifs_revalidate_file(file);
4209         if (rc)
4210                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4211                          rc);
4212         if (!rc)
4213                 rc = generic_file_mmap(file, vma);
4214         if (!rc)
4215                 vma->vm_ops = &cifs_file_vm_ops;
4216
4217         free_xid(xid);
4218         return rc;
4219 }
4220
4221 static void
4222 cifs_readv_complete(struct work_struct *work)
4223 {
4224         unsigned int i, got_bytes;
4225         struct cifs_readdata *rdata = container_of(work,
4226                                                 struct cifs_readdata, work);
4227
4228         got_bytes = rdata->got_bytes;
4229         for (i = 0; i < rdata->nr_pages; i++) {
4230                 struct page *page = rdata->pages[i];
4231
4232                 lru_cache_add(page);
4233
4234                 if (rdata->result == 0 ||
4235                     (rdata->result == -EAGAIN && got_bytes)) {
4236                         flush_dcache_page(page);
4237                         SetPageUptodate(page);
4238                 }
4239
4240                 unlock_page(page);
4241
4242                 if (rdata->result == 0 ||
4243                     (rdata->result == -EAGAIN && got_bytes))
4244                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4245
4246                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4247
4248                 put_page(page);
4249                 rdata->pages[i] = NULL;
4250         }
4251         kref_put(&rdata->refcount, cifs_readdata_release);
4252 }
4253
4254 static int
4255 readpages_fill_pages(struct TCP_Server_Info *server,
4256                      struct cifs_readdata *rdata, struct iov_iter *iter,
4257                      unsigned int len)
4258 {
4259         int result = 0;
4260         unsigned int i;
4261         u64 eof;
4262         pgoff_t eof_index;
4263         unsigned int nr_pages = rdata->nr_pages;
4264         unsigned int page_offset = rdata->page_offset;
4265
4266         /* determine the eof that the server (probably) has */
4267         eof = CIFS_I(rdata->mapping->host)->server_eof;
4268         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4269         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4270
4271         rdata->got_bytes = 0;
4272         rdata->tailsz = PAGE_SIZE;
4273         for (i = 0; i < nr_pages; i++) {
4274                 struct page *page = rdata->pages[i];
4275                 unsigned int to_read = rdata->pagesz;
4276                 size_t n;
4277
4278                 if (i == 0)
4279                         to_read -= page_offset;
4280                 else
4281                         page_offset = 0;
4282
4283                 n = to_read;
4284
4285                 if (len >= to_read) {
4286                         len -= to_read;
4287                 } else if (len > 0) {
4288                         /* enough for partial page, fill and zero the rest */
4289                         zero_user(page, len + page_offset, to_read - len);
4290                         n = rdata->tailsz = len;
4291                         len = 0;
4292                 } else if (page->index > eof_index) {
4293                         /*
4294                          * The VFS will not try to do readahead past the
4295                          * i_size, but it's possible that we have outstanding
4296                          * writes with gaps in the middle and the i_size hasn't
4297                          * caught up yet. Populate those with zeroed out pages
4298                          * to prevent the VFS from repeatedly attempting to
4299                          * fill them until the writes are flushed.
4300                          */
4301                         zero_user(page, 0, PAGE_SIZE);
4302                         lru_cache_add(page);
4303                         flush_dcache_page(page);
4304                         SetPageUptodate(page);
4305                         unlock_page(page);
4306                         put_page(page);
4307                         rdata->pages[i] = NULL;
4308                         rdata->nr_pages--;
4309                         continue;
4310                 } else {
4311                         /* no need to hold page hostage */
4312                         lru_cache_add(page);
4313                         unlock_page(page);
4314                         put_page(page);
4315                         rdata->pages[i] = NULL;
4316                         rdata->nr_pages--;
4317                         continue;
4318                 }
4319
4320                 if (iter)
4321                         result = copy_page_from_iter(
4322                                         page, page_offset, n, iter);
4323 #ifdef CONFIG_CIFS_SMB_DIRECT
4324                 else if (rdata->mr)
4325                         result = n;
4326 #endif
4327                 else
4328                         result = cifs_read_page_from_socket(
4329                                         server, page, page_offset, n);
4330                 if (result < 0)
4331                         break;
4332
4333                 rdata->got_bytes += result;
4334         }
4335
4336         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4337                                                 rdata->got_bytes : result;
4338 }
4339
4340 static int
4341 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4342                                struct cifs_readdata *rdata, unsigned int len)
4343 {
4344         return readpages_fill_pages(server, rdata, NULL, len);
4345 }
4346
4347 static int
4348 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4349                                struct cifs_readdata *rdata,
4350                                struct iov_iter *iter)
4351 {
4352         return readpages_fill_pages(server, rdata, iter, iter->count);
4353 }
4354
4355 static int
4356 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4357                     unsigned int rsize, struct list_head *tmplist,
4358                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4359 {
4360         struct page *page, *tpage;
4361         unsigned int expected_index;
4362         int rc;
4363         gfp_t gfp = readahead_gfp_mask(mapping);
4364
4365         INIT_LIST_HEAD(tmplist);
4366
4367         page = lru_to_page(page_list);
4368
4369         /*
4370          * Lock the page and put it in the cache. Since no one else
4371          * should have access to this page, we're safe to simply set
4372          * PG_locked without checking it first.
4373          */
4374         __SetPageLocked(page);
4375         rc = add_to_page_cache_locked(page, mapping,
4376                                       page->index, gfp);
4377
4378         /* give up if we can't stick it in the cache */
4379         if (rc) {
4380                 __ClearPageLocked(page);
4381                 return rc;
4382         }
4383
4384         /* move first page to the tmplist */
4385         *offset = (loff_t)page->index << PAGE_SHIFT;
4386         *bytes = PAGE_SIZE;
4387         *nr_pages = 1;
4388         list_move_tail(&page->lru, tmplist);
4389
4390         /* now try and add more pages onto the request */
4391         expected_index = page->index + 1;
4392         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4393                 /* discontinuity ? */
4394                 if (page->index != expected_index)
4395                         break;
4396
4397                 /* would this page push the read over the rsize? */
4398                 if (*bytes + PAGE_SIZE > rsize)
4399                         break;
4400
4401                 __SetPageLocked(page);
4402                 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4403                 if (rc) {
4404                         __ClearPageLocked(page);
4405                         break;
4406                 }
4407                 list_move_tail(&page->lru, tmplist);
4408                 (*bytes) += PAGE_SIZE;
4409                 expected_index++;
4410                 (*nr_pages)++;
4411         }
4412         return rc;
4413 }
4414
4415 static int cifs_readpages(struct file *file, struct address_space *mapping,
4416         struct list_head *page_list, unsigned num_pages)
4417 {
4418         int rc;
4419         int err = 0;
4420         struct list_head tmplist;
4421         struct cifsFileInfo *open_file = file->private_data;
4422         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4423         struct TCP_Server_Info *server;
4424         pid_t pid;
4425         unsigned int xid;
4426
4427         xid = get_xid();
4428         /*
4429          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4430          * immediately if the cookie is negative
4431          *
4432          * After this point, every page in the list might have PG_fscache set,
4433          * so we will need to clean that up off of every page we don't use.
4434          */
4435         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4436                                          &num_pages);
4437         if (rc == 0) {
4438                 free_xid(xid);
4439                 return rc;
4440         }
4441
4442         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4443                 pid = open_file->pid;
4444         else
4445                 pid = current->tgid;
4446
4447         rc = 0;
4448         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4449
4450         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4451                  __func__, file, mapping, num_pages);
4452
4453         /*
4454          * Start with the page at end of list and move it to private
4455          * list. Do the same with any following pages until we hit
4456          * the rsize limit, hit an index discontinuity, or run out of
4457          * pages. Issue the async read and then start the loop again
4458          * until the list is empty.
4459          *
4460          * Note that list order is important. The page_list is in
4461          * the order of declining indexes. When we put the pages in
4462          * the rdata->pages, then we want them in increasing order.
4463          */
4464         while (!list_empty(page_list) && !err) {
4465                 unsigned int i, nr_pages, bytes, rsize;
4466                 loff_t offset;
4467                 struct page *page, *tpage;
4468                 struct cifs_readdata *rdata;
4469                 struct cifs_credits credits_on_stack;
4470                 struct cifs_credits *credits = &credits_on_stack;
4471
4472                 if (open_file->invalidHandle) {
4473                         rc = cifs_reopen_file(open_file, true);
4474                         if (rc == -EAGAIN)
4475                                 continue;
4476                         else if (rc)
4477                                 break;
4478                 }
4479
4480                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4481                                                    &rsize, credits);
4482                 if (rc)
4483                         break;
4484
4485                 /*
4486                  * Give up immediately if rsize is too small to read an entire
4487                  * page. The VFS will fall back to readpage. We should never
4488                  * reach this point however since we set ra_pages to 0 when the
4489                  * rsize is smaller than a cache page.
4490                  */
4491                 if (unlikely(rsize < PAGE_SIZE)) {
4492                         add_credits_and_wake_if(server, credits, 0);
4493                         free_xid(xid);
4494                         return 0;
4495                 }
4496
4497                 nr_pages = 0;
4498                 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4499                                          &nr_pages, &offset, &bytes);
4500                 if (!nr_pages) {
4501                         add_credits_and_wake_if(server, credits, 0);
4502                         break;
4503                 }
4504
4505                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4506                 if (!rdata) {
4507                         /* best to give up if we're out of mem */
4508                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4509                                 list_del(&page->lru);
4510                                 lru_cache_add(page);
4511                                 unlock_page(page);
4512                                 put_page(page);
4513                         }
4514                         rc = -ENOMEM;
4515                         add_credits_and_wake_if(server, credits, 0);
4516                         break;
4517                 }
4518
4519                 rdata->cfile = cifsFileInfo_get(open_file);
4520                 rdata->server = server;
4521                 rdata->mapping = mapping;
4522                 rdata->offset = offset;
4523                 rdata->bytes = bytes;
4524                 rdata->pid = pid;
4525                 rdata->pagesz = PAGE_SIZE;
4526                 rdata->tailsz = PAGE_SIZE;
4527                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4528                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4529                 rdata->credits = credits_on_stack;
4530
4531                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4532                         list_del(&page->lru);
4533                         rdata->pages[rdata->nr_pages++] = page;
4534                 }
4535
4536                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4537
4538                 if (!rc) {
4539                         if (rdata->cfile->invalidHandle)
4540                                 rc = -EAGAIN;
4541                         else
4542                                 rc = server->ops->async_readv(rdata);
4543                 }
4544
4545                 if (rc) {
4546                         add_credits_and_wake_if(server, &rdata->credits, 0);
4547                         for (i = 0; i < rdata->nr_pages; i++) {
4548                                 page = rdata->pages[i];
4549                                 lru_cache_add(page);
4550                                 unlock_page(page);
4551                                 put_page(page);
4552                         }
4553                         /* Fallback to the readpage in error/reconnect cases */
4554                         kref_put(&rdata->refcount, cifs_readdata_release);
4555                         break;
4556                 }
4557
4558                 kref_put(&rdata->refcount, cifs_readdata_release);
4559         }
4560
4561         /* Any pages that have been shown to fscache but didn't get added to
4562          * the pagecache must be uncached before they get returned to the
4563          * allocator.
4564          */
4565         cifs_fscache_readpages_cancel(mapping->host, page_list);
4566         free_xid(xid);
4567         return rc;
4568 }
4569
4570 /*
4571  * cifs_readpage_worker must be called with the page pinned
4572  */
4573 static int cifs_readpage_worker(struct file *file, struct page *page,
4574         loff_t *poffset)
4575 {
4576         char *read_data;
4577         int rc;
4578
4579         /* Is the page cached? */
4580         rc = cifs_readpage_from_fscache(file_inode(file), page);
4581         if (rc == 0)
4582                 goto read_complete;
4583
4584         read_data = kmap(page);
4585         /* for reads over a certain size could initiate async read ahead */
4586
4587         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4588
4589         if (rc < 0)
4590                 goto io_error;
4591         else
4592                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4593
4594         /* we do not want atime to be less than mtime, it broke some apps */
4595         file_inode(file)->i_atime = current_time(file_inode(file));
4596         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4597                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4598         else
4599                 file_inode(file)->i_atime = current_time(file_inode(file));
4600
4601         if (PAGE_SIZE > rc)
4602                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4603
4604         flush_dcache_page(page);
4605         SetPageUptodate(page);
4606
4607         /* send this page to the cache */
4608         cifs_readpage_to_fscache(file_inode(file), page);
4609
4610         rc = 0;
4611
4612 io_error:
4613         kunmap(page);
4614         unlock_page(page);
4615
4616 read_complete:
4617         return rc;
4618 }
4619
4620 static int cifs_readpage(struct file *file, struct page *page)
4621 {
4622         loff_t offset = page_file_offset(page);
4623         int rc = -EACCES;
4624         unsigned int xid;
4625
4626         xid = get_xid();
4627
4628         if (file->private_data == NULL) {
4629                 rc = -EBADF;
4630                 free_xid(xid);
4631                 return rc;
4632         }
4633
4634         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4635                  page, (int)offset, (int)offset);
4636
4637         rc = cifs_readpage_worker(file, page, &offset);
4638
4639         free_xid(xid);
4640         return rc;
4641 }
4642
4643 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4644 {
4645         struct cifsFileInfo *open_file;
4646
4647         spin_lock(&cifs_inode->open_file_lock);
4648         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4649                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4650                         spin_unlock(&cifs_inode->open_file_lock);
4651                         return 1;
4652                 }
4653         }
4654         spin_unlock(&cifs_inode->open_file_lock);
4655         return 0;
4656 }
4657
4658 /* We do not want to update the file size from server for inodes
4659    open for write - to avoid races with writepage extending
4660    the file - in the future we could consider allowing
4661    refreshing the inode only on increases in the file size
4662    but this is tricky to do without racing with writebehind
4663    page caching in the current Linux kernel design */
4664 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4665 {
4666         if (!cifsInode)
4667                 return true;
4668
4669         if (is_inode_writable(cifsInode)) {
4670                 /* This inode is open for write at least once */
4671                 struct cifs_sb_info *cifs_sb;
4672
4673                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4674                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4675                         /* since no page cache to corrupt on directio
4676                         we can change size safely */
4677                         return true;
4678                 }
4679
4680                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4681                         return true;
4682
4683                 return false;
4684         } else
4685                 return true;
4686 }
4687
4688 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4689                         loff_t pos, unsigned len, unsigned flags,
4690                         struct page **pagep, void **fsdata)
4691 {
4692         int oncethru = 0;
4693         pgoff_t index = pos >> PAGE_SHIFT;
4694         loff_t offset = pos & (PAGE_SIZE - 1);
4695         loff_t page_start = pos & PAGE_MASK;
4696         loff_t i_size;
4697         struct page *page;
4698         int rc = 0;
4699
4700         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4701
4702 start:
4703         page = grab_cache_page_write_begin(mapping, index, flags);
4704         if (!page) {
4705                 rc = -ENOMEM;
4706                 goto out;
4707         }
4708
4709         if (PageUptodate(page))
4710                 goto out;
4711
4712         /*
4713          * If we write a full page it will be up to date, no need to read from
4714          * the server. If the write is short, we'll end up doing a sync write
4715          * instead.
4716          */
4717         if (len == PAGE_SIZE)
4718                 goto out;
4719
4720         /*
4721          * optimize away the read when we have an oplock, and we're not
4722          * expecting to use any of the data we'd be reading in. That
4723          * is, when the page lies beyond the EOF, or straddles the EOF
4724          * and the write will cover all of the existing data.
4725          */
4726         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4727                 i_size = i_size_read(mapping->host);
4728                 if (page_start >= i_size ||
4729                     (offset == 0 && (pos + len) >= i_size)) {
4730                         zero_user_segments(page, 0, offset,
4731                                            offset + len,
4732                                            PAGE_SIZE);
4733                         /*
4734                          * PageChecked means that the parts of the page
4735                          * to which we're not writing are considered up
4736                          * to date. Once the data is copied to the
4737                          * page, it can be set uptodate.
4738                          */
4739                         SetPageChecked(page);
4740                         goto out;
4741                 }
4742         }
4743
4744         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4745                 /*
4746                  * might as well read a page, it is fast enough. If we get
4747                  * an error, we don't need to return it. cifs_write_end will
4748                  * do a sync write instead since PG_uptodate isn't set.
4749                  */
4750                 cifs_readpage_worker(file, page, &page_start);
4751                 put_page(page);
4752                 oncethru = 1;
4753                 goto start;
4754         } else {
4755                 /* we could try using another file handle if there is one -
4756                    but how would we lock it to prevent close of that handle
4757                    racing with this read? In any case
4758                    this will be written out by write_end so is fine */
4759         }
4760 out:
4761         *pagep = page;
4762         return rc;
4763 }
4764
4765 static int cifs_release_page(struct page *page, gfp_t gfp)
4766 {
4767         if (PagePrivate(page))
4768                 return 0;
4769
4770         return cifs_fscache_release_page(page, gfp);
4771 }
4772
4773 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4774                                  unsigned int length)
4775 {
4776         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4777
4778         if (offset == 0 && length == PAGE_SIZE)
4779                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4780 }
4781
4782 static int cifs_launder_page(struct page *page)
4783 {
4784         int rc = 0;
4785         loff_t range_start = page_offset(page);
4786         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4787         struct writeback_control wbc = {
4788                 .sync_mode = WB_SYNC_ALL,
4789                 .nr_to_write = 0,
4790                 .range_start = range_start,
4791                 .range_end = range_end,
4792         };
4793
4794         cifs_dbg(FYI, "Launder page: %p\n", page);
4795
4796         if (clear_page_dirty_for_io(page))
4797                 rc = cifs_writepage_locked(page, &wbc);
4798
4799         cifs_fscache_invalidate_page(page, page->mapping->host);
4800         return rc;
4801 }
4802
4803 void cifs_oplock_break(struct work_struct *work)
4804 {
4805         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4806                                                   oplock_break);
4807         struct inode *inode = d_inode(cfile->dentry);
4808         struct cifsInodeInfo *cinode = CIFS_I(inode);
4809         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4810         struct TCP_Server_Info *server = tcon->ses->server;
4811         int rc = 0;
4812         bool purge_cache = false;
4813         bool is_deferred = false;
4814         struct cifs_deferred_close *dclose;
4815
4816         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4817                         TASK_UNINTERRUPTIBLE);
4818
4819         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4820                                       cfile->oplock_epoch, &purge_cache);
4821
4822         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4823                                                 cifs_has_mand_locks(cinode)) {
4824                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4825                          inode);
4826                 cinode->oplock = 0;
4827         }
4828
4829         if (inode && S_ISREG(inode->i_mode)) {
4830                 if (CIFS_CACHE_READ(cinode))
4831                         break_lease(inode, O_RDONLY);
4832                 else
4833                         break_lease(inode, O_WRONLY);
4834                 rc = filemap_fdatawrite(inode->i_mapping);
4835                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4836                         rc = filemap_fdatawait(inode->i_mapping);
4837                         mapping_set_error(inode->i_mapping, rc);
4838                         cifs_zap_mapping(inode);
4839                 }
4840                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4841                 if (CIFS_CACHE_WRITE(cinode))
4842                         goto oplock_break_ack;
4843         }
4844
4845         rc = cifs_push_locks(cfile);
4846         if (rc)
4847                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4848
4849 oplock_break_ack:
4850         /*
4851          * When oplock break is received and there are no active
4852          * file handles but cached, then schedule deferred close immediately.
4853          * So, new open will not use cached handle.
4854          */
4855         spin_lock(&CIFS_I(inode)->deferred_lock);
4856         is_deferred = cifs_is_deferred_close(cfile, &dclose);
4857         spin_unlock(&CIFS_I(inode)->deferred_lock);
4858         if (is_deferred &&
4859             cfile->deferred_close_scheduled &&
4860             delayed_work_pending(&cfile->deferred)) {
4861                 if (cancel_delayed_work(&cfile->deferred)) {
4862                         _cifsFileInfo_put(cfile, false, false);
4863                         goto oplock_break_done;
4864                 }
4865         }
4866         /*
4867          * releasing stale oplock after recent reconnect of smb session using
4868          * a now incorrect file handle is not a data integrity issue but do
4869          * not bother sending an oplock release if session to server still is
4870          * disconnected since oplock already released by the server
4871          */
4872         if (!cfile->oplock_break_cancelled) {
4873                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4874                                                              cinode);
4875                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4876         }
4877 oplock_break_done:
4878         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4879         cifs_done_oplock_break(cinode);
4880 }
4881
4882 /*
4883  * The presence of cifs_direct_io() in the address space ops vector
4884  * allowes open() O_DIRECT flags which would have failed otherwise.
4885  *
4886  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4887  * so this method should never be called.
4888  *
4889  * Direct IO is not yet supported in the cached mode. 
4890  */
4891 static ssize_t
4892 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4893 {
4894         /*
4895          * FIXME
4896          * Eventually need to support direct IO for non forcedirectio mounts
4897          */
4898         return -EINVAL;
4899 }
4900
4901 static int cifs_swap_activate(struct swap_info_struct *sis,
4902                               struct file *swap_file, sector_t *span)
4903 {
4904         struct cifsFileInfo *cfile = swap_file->private_data;
4905         struct inode *inode = swap_file->f_mapping->host;
4906         unsigned long blocks;
4907         long long isize;
4908
4909         cifs_dbg(FYI, "swap activate\n");
4910
4911         spin_lock(&inode->i_lock);
4912         blocks = inode->i_blocks;
4913         isize = inode->i_size;
4914         spin_unlock(&inode->i_lock);
4915         if (blocks*512 < isize) {
4916                 pr_warn("swap activate: swapfile has holes\n");
4917                 return -EINVAL;
4918         }
4919         *span = sis->pages;
4920
4921         pr_warn_once("Swap support over SMB3 is experimental\n");
4922
4923         /*
4924          * TODO: consider adding ACL (or documenting how) to prevent other
4925          * users (on this or other systems) from reading it
4926          */
4927
4928
4929         /* TODO: add sk_set_memalloc(inet) or similar */
4930
4931         if (cfile)
4932                 cfile->swapfile = true;
4933         /*
4934          * TODO: Since file already open, we can't open with DENY_ALL here
4935          * but we could add call to grab a byte range lock to prevent others
4936          * from reading or writing the file
4937          */
4938
4939         return 0;
4940 }
4941
4942 static void cifs_swap_deactivate(struct file *file)
4943 {
4944         struct cifsFileInfo *cfile = file->private_data;
4945
4946         cifs_dbg(FYI, "swap deactivate\n");
4947
4948         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4949
4950         if (cfile)
4951                 cfile->swapfile = false;
4952
4953         /* do we need to unpin (or unlock) the file */
4954 }
4955
4956 const struct address_space_operations cifs_addr_ops = {
4957         .readpage = cifs_readpage,
4958         .readpages = cifs_readpages,
4959         .writepage = cifs_writepage,
4960         .writepages = cifs_writepages,
4961         .write_begin = cifs_write_begin,
4962         .write_end = cifs_write_end,
4963         .set_page_dirty = __set_page_dirty_nobuffers,
4964         .releasepage = cifs_release_page,
4965         .direct_IO = cifs_direct_io,
4966         .invalidatepage = cifs_invalidate_page,
4967         .launder_page = cifs_launder_page,
4968         /*
4969          * TODO: investigate and if useful we could add an cifs_migratePage
4970          * helper (under an CONFIG_MIGRATION) in the future, and also
4971          * investigate and add an is_dirty_writeback helper if needed
4972          */
4973         .swap_activate = cifs_swap_activate,
4974         .swap_deactivate = cifs_swap_deactivate,
4975 };
4976
4977 /*
4978  * cifs_readpages requires the server to support a buffer large enough to
4979  * contain the header plus one complete page of data.  Otherwise, we need
4980  * to leave cifs_readpages out of the address space operations.
4981  */
4982 const struct address_space_operations cifs_addr_ops_smallbuf = {
4983         .readpage = cifs_readpage,
4984         .writepage = cifs_writepage,
4985         .writepages = cifs_writepages,
4986         .write_begin = cifs_write_begin,
4987         .write_end = cifs_write_end,
4988         .set_page_dirty = __set_page_dirty_nobuffers,
4989         .releasepage = cifs_release_page,
4990         .invalidatepage = cifs_invalidate_page,
4991         .launder_page = cifs_launder_page,
4992 };