posix-cpu-timers: Fix two trivial comments
[linux-2.6-microblaze.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47
48 static inline int cifs_convert_flags(unsigned int flags)
49 {
50         if ((flags & O_ACCMODE) == O_RDONLY)
51                 return GENERIC_READ;
52         else if ((flags & O_ACCMODE) == O_WRONLY)
53                 return GENERIC_WRITE;
54         else if ((flags & O_ACCMODE) == O_RDWR) {
55                 /* GENERIC_ALL is too much permission to request
56                    can cause unnecessary access denied on create */
57                 /* return GENERIC_ALL; */
58                 return (GENERIC_READ | GENERIC_WRITE);
59         }
60
61         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63                 FILE_READ_DATA);
64 }
65
66 static u32 cifs_posix_convert_flags(unsigned int flags)
67 {
68         u32 posix_flags = 0;
69
70         if ((flags & O_ACCMODE) == O_RDONLY)
71                 posix_flags = SMB_O_RDONLY;
72         else if ((flags & O_ACCMODE) == O_WRONLY)
73                 posix_flags = SMB_O_WRONLY;
74         else if ((flags & O_ACCMODE) == O_RDWR)
75                 posix_flags = SMB_O_RDWR;
76
77         if (flags & O_CREAT) {
78                 posix_flags |= SMB_O_CREAT;
79                 if (flags & O_EXCL)
80                         posix_flags |= SMB_O_EXCL;
81         } else if (flags & O_EXCL)
82                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83                          current->comm, current->tgid);
84
85         if (flags & O_TRUNC)
86                 posix_flags |= SMB_O_TRUNC;
87         /* be safe and imply O_SYNC for O_DSYNC */
88         if (flags & O_DSYNC)
89                 posix_flags |= SMB_O_SYNC;
90         if (flags & O_DIRECTORY)
91                 posix_flags |= SMB_O_DIRECTORY;
92         if (flags & O_NOFOLLOW)
93                 posix_flags |= SMB_O_NOFOLLOW;
94         if (flags & O_DIRECT)
95                 posix_flags |= SMB_O_DIRECT;
96
97         return posix_flags;
98 }
99
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103                 return FILE_CREATE;
104         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105                 return FILE_OVERWRITE_IF;
106         else if ((flags & O_CREAT) == O_CREAT)
107                 return FILE_OPEN_IF;
108         else if ((flags & O_TRUNC) == O_TRUNC)
109                 return FILE_OVERWRITE;
110         else
111                 return FILE_OPEN;
112 }
113
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115                         struct super_block *sb, int mode, unsigned int f_flags,
116                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118         int rc;
119         FILE_UNIX_BASIC_INFO *presp_data;
120         __u32 posix_flags = 0;
121         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122         struct cifs_fattr fattr;
123         struct tcon_link *tlink;
124         struct cifs_tcon *tcon;
125
126         cifs_dbg(FYI, "posix open %s\n", full_path);
127
128         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129         if (presp_data == NULL)
130                 return -ENOMEM;
131
132         tlink = cifs_sb_tlink(cifs_sb);
133         if (IS_ERR(tlink)) {
134                 rc = PTR_ERR(tlink);
135                 goto posix_open_ret;
136         }
137
138         tcon = tlink_tcon(tlink);
139         mode &= ~current_umask();
140
141         posix_flags = cifs_posix_convert_flags(f_flags);
142         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143                              poplock, full_path, cifs_sb->local_nls,
144                              cifs_remap(cifs_sb));
145         cifs_put_tlink(tlink);
146
147         if (rc)
148                 goto posix_open_ret;
149
150         if (presp_data->Type == cpu_to_le32(-1))
151                 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153         if (!pinode)
154                 goto posix_open_ret; /* caller does not need info */
155
156         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158         /* get new inode and set it up */
159         if (*pinode == NULL) {
160                 cifs_fill_uniqueid(sb, &fattr);
161                 *pinode = cifs_iget(sb, &fattr);
162                 if (!*pinode) {
163                         rc = -ENOMEM;
164                         goto posix_open_ret;
165                 }
166         } else {
167                 cifs_fattr_to_inode(*pinode, &fattr);
168         }
169
170 posix_open_ret:
171         kfree(presp_data);
172         return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178              struct cifs_fid *fid, unsigned int xid)
179 {
180         int rc;
181         int desired_access;
182         int disposition;
183         int create_options = CREATE_NOT_DIR;
184         FILE_ALL_INFO *buf;
185         struct TCP_Server_Info *server = tcon->ses->server;
186         struct cifs_open_parms oparms;
187
188         if (!server->ops->open)
189                 return -ENOSYS;
190
191         desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194  *  open flag mapping table:
195  *
196  *      POSIX Flag            CIFS Disposition
197  *      ----------            ----------------
198  *      O_CREAT               FILE_OPEN_IF
199  *      O_CREAT | O_EXCL      FILE_CREATE
200  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
201  *      O_TRUNC               FILE_OVERWRITE
202  *      none of the above     FILE_OPEN
203  *
204  *      Note that there is not a direct match between disposition
205  *      FILE_SUPERSEDE (ie create whether or not file exists although
206  *      O_CREAT | O_TRUNC is similar but truncates the existing
207  *      file rather than creating a new file as FILE_SUPERSEDE does
208  *      (which uses the attributes / metadata passed in on open call)
209  *?
210  *?  O_SYNC is a reasonable match to CIFS writethrough flag
211  *?  and the read write flags match reasonably.  O_LARGEFILE
212  *?  is irrelevant because largefile support is always used
213  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215  *********************************************************************/
216
217         disposition = cifs_get_disposition(f_flags);
218
219         /* BB pass O_SYNC flag through on file attributes .. BB */
220
221         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222         if (!buf)
223                 return -ENOMEM;
224
225         if (backup_cred(cifs_sb))
226                 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
229         if (f_flags & O_SYNC)
230                 create_options |= CREATE_WRITE_THROUGH;
231
232         if (f_flags & O_DIRECT)
233                 create_options |= CREATE_NO_BUFFER;
234
235         oparms.tcon = tcon;
236         oparms.cifs_sb = cifs_sb;
237         oparms.desired_access = desired_access;
238         oparms.create_options = create_options;
239         oparms.disposition = disposition;
240         oparms.path = full_path;
241         oparms.fid = fid;
242         oparms.reconnect = false;
243
244         rc = server->ops->open(xid, &oparms, oplock, buf);
245
246         if (rc)
247                 goto out;
248
249         if (tcon->unix_ext)
250                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
251                                               xid);
252         else
253                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
254                                          xid, fid);
255
256         if (rc) {
257                 server->ops->close(xid, tcon, fid);
258                 if (rc == -ESTALE)
259                         rc = -EOPENSTALE;
260         }
261
262 out:
263         kfree(buf);
264         return rc;
265 }
266
267 static bool
268 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
269 {
270         struct cifs_fid_locks *cur;
271         bool has_locks = false;
272
273         down_read(&cinode->lock_sem);
274         list_for_each_entry(cur, &cinode->llist, llist) {
275                 if (!list_empty(&cur->locks)) {
276                         has_locks = true;
277                         break;
278                 }
279         }
280         up_read(&cinode->lock_sem);
281         return has_locks;
282 }
283
284 struct cifsFileInfo *
285 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
286                   struct tcon_link *tlink, __u32 oplock)
287 {
288         struct dentry *dentry = file_dentry(file);
289         struct inode *inode = d_inode(dentry);
290         struct cifsInodeInfo *cinode = CIFS_I(inode);
291         struct cifsFileInfo *cfile;
292         struct cifs_fid_locks *fdlocks;
293         struct cifs_tcon *tcon = tlink_tcon(tlink);
294         struct TCP_Server_Info *server = tcon->ses->server;
295
296         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
297         if (cfile == NULL)
298                 return cfile;
299
300         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
301         if (!fdlocks) {
302                 kfree(cfile);
303                 return NULL;
304         }
305
306         INIT_LIST_HEAD(&fdlocks->locks);
307         fdlocks->cfile = cfile;
308         cfile->llist = fdlocks;
309         down_write(&cinode->lock_sem);
310         list_add(&fdlocks->llist, &cinode->llist);
311         up_write(&cinode->lock_sem);
312
313         cfile->count = 1;
314         cfile->pid = current->tgid;
315         cfile->uid = current_fsuid();
316         cfile->dentry = dget(dentry);
317         cfile->f_flags = file->f_flags;
318         cfile->invalidHandle = false;
319         cfile->tlink = cifs_get_tlink(tlink);
320         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
321         mutex_init(&cfile->fh_mutex);
322         spin_lock_init(&cfile->file_info_lock);
323
324         cifs_sb_active(inode->i_sb);
325
326         /*
327          * If the server returned a read oplock and we have mandatory brlocks,
328          * set oplock level to None.
329          */
330         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
331                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
332                 oplock = 0;
333         }
334
335         spin_lock(&tcon->open_file_lock);
336         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
337                 oplock = fid->pending_open->oplock;
338         list_del(&fid->pending_open->olist);
339
340         fid->purge_cache = false;
341         server->ops->set_fid(cfile, fid, oplock);
342
343         list_add(&cfile->tlist, &tcon->openFileList);
344         atomic_inc(&tcon->num_local_opens);
345
346         /* if readable file instance put first in list*/
347         spin_lock(&cinode->open_file_lock);
348         if (file->f_mode & FMODE_READ)
349                 list_add(&cfile->flist, &cinode->openFileList);
350         else
351                 list_add_tail(&cfile->flist, &cinode->openFileList);
352         spin_unlock(&cinode->open_file_lock);
353         spin_unlock(&tcon->open_file_lock);
354
355         if (fid->purge_cache)
356                 cifs_zap_mapping(inode);
357
358         file->private_data = cfile;
359         return cfile;
360 }
361
362 struct cifsFileInfo *
363 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
364 {
365         spin_lock(&cifs_file->file_info_lock);
366         cifsFileInfo_get_locked(cifs_file);
367         spin_unlock(&cifs_file->file_info_lock);
368         return cifs_file;
369 }
370
371 /**
372  * cifsFileInfo_put - release a reference of file priv data
373  *
374  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
375  */
376 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
377 {
378         _cifsFileInfo_put(cifs_file, true);
379 }
380
381 /**
382  * _cifsFileInfo_put - release a reference of file priv data
383  *
384  * This may involve closing the filehandle @cifs_file out on the
385  * server. Must be called without holding tcon->open_file_lock and
386  * cifs_file->file_info_lock.
387  *
388  * If @wait_for_oplock_handler is true and we are releasing the last
389  * reference, wait for any running oplock break handler of the file
390  * and cancel any pending one. If calling this function from the
391  * oplock break handler, you need to pass false.
392  *
393  */
394 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
395 {
396         struct inode *inode = d_inode(cifs_file->dentry);
397         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
398         struct TCP_Server_Info *server = tcon->ses->server;
399         struct cifsInodeInfo *cifsi = CIFS_I(inode);
400         struct super_block *sb = inode->i_sb;
401         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
402         struct cifsLockInfo *li, *tmp;
403         struct cifs_fid fid;
404         struct cifs_pending_open open;
405         bool oplock_break_cancelled;
406
407         spin_lock(&tcon->open_file_lock);
408
409         spin_lock(&cifs_file->file_info_lock);
410         if (--cifs_file->count > 0) {
411                 spin_unlock(&cifs_file->file_info_lock);
412                 spin_unlock(&tcon->open_file_lock);
413                 return;
414         }
415         spin_unlock(&cifs_file->file_info_lock);
416
417         if (server->ops->get_lease_key)
418                 server->ops->get_lease_key(inode, &fid);
419
420         /* store open in pending opens to make sure we don't miss lease break */
421         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
422
423         /* remove it from the lists */
424         spin_lock(&cifsi->open_file_lock);
425         list_del(&cifs_file->flist);
426         spin_unlock(&cifsi->open_file_lock);
427         list_del(&cifs_file->tlist);
428         atomic_dec(&tcon->num_local_opens);
429
430         if (list_empty(&cifsi->openFileList)) {
431                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
432                          d_inode(cifs_file->dentry));
433                 /*
434                  * In strict cache mode we need invalidate mapping on the last
435                  * close  because it may cause a error when we open this file
436                  * again and get at least level II oplock.
437                  */
438                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
439                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
440                 cifs_set_oplock_level(cifsi, 0);
441         }
442
443         spin_unlock(&tcon->open_file_lock);
444
445         oplock_break_cancelled = wait_oplock_handler ?
446                 cancel_work_sync(&cifs_file->oplock_break) : false;
447
448         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
449                 struct TCP_Server_Info *server = tcon->ses->server;
450                 unsigned int xid;
451
452                 xid = get_xid();
453                 if (server->ops->close)
454                         server->ops->close(xid, tcon, &cifs_file->fid);
455                 _free_xid(xid);
456         }
457
458         if (oplock_break_cancelled)
459                 cifs_done_oplock_break(cifsi);
460
461         cifs_del_pending_open(&open);
462
463         /*
464          * Delete any outstanding lock records. We'll lose them when the file
465          * is closed anyway.
466          */
467         down_write(&cifsi->lock_sem);
468         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
469                 list_del(&li->llist);
470                 cifs_del_lock_waiters(li);
471                 kfree(li);
472         }
473         list_del(&cifs_file->llist->llist);
474         kfree(cifs_file->llist);
475         up_write(&cifsi->lock_sem);
476
477         cifs_put_tlink(cifs_file->tlink);
478         dput(cifs_file->dentry);
479         cifs_sb_deactive(sb);
480         kfree(cifs_file);
481 }
482
483 int cifs_open(struct inode *inode, struct file *file)
484
485 {
486         int rc = -EACCES;
487         unsigned int xid;
488         __u32 oplock;
489         struct cifs_sb_info *cifs_sb;
490         struct TCP_Server_Info *server;
491         struct cifs_tcon *tcon;
492         struct tcon_link *tlink;
493         struct cifsFileInfo *cfile = NULL;
494         char *full_path = NULL;
495         bool posix_open_ok = false;
496         struct cifs_fid fid;
497         struct cifs_pending_open open;
498
499         xid = get_xid();
500
501         cifs_sb = CIFS_SB(inode->i_sb);
502         tlink = cifs_sb_tlink(cifs_sb);
503         if (IS_ERR(tlink)) {
504                 free_xid(xid);
505                 return PTR_ERR(tlink);
506         }
507         tcon = tlink_tcon(tlink);
508         server = tcon->ses->server;
509
510         full_path = build_path_from_dentry(file_dentry(file));
511         if (full_path == NULL) {
512                 rc = -ENOMEM;
513                 goto out;
514         }
515
516         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
517                  inode, file->f_flags, full_path);
518
519         if (file->f_flags & O_DIRECT &&
520             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
521                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
522                         file->f_op = &cifs_file_direct_nobrl_ops;
523                 else
524                         file->f_op = &cifs_file_direct_ops;
525         }
526
527         if (server->oplocks)
528                 oplock = REQ_OPLOCK;
529         else
530                 oplock = 0;
531
532         if (!tcon->broken_posix_open && tcon->unix_ext &&
533             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
534                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
535                 /* can not refresh inode info since size could be stale */
536                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
537                                 cifs_sb->mnt_file_mode /* ignored */,
538                                 file->f_flags, &oplock, &fid.netfid, xid);
539                 if (rc == 0) {
540                         cifs_dbg(FYI, "posix open succeeded\n");
541                         posix_open_ok = true;
542                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
543                         if (tcon->ses->serverNOS)
544                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
545                                          tcon->ses->serverName,
546                                          tcon->ses->serverNOS);
547                         tcon->broken_posix_open = true;
548                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
549                          (rc != -EOPNOTSUPP)) /* path not found or net err */
550                         goto out;
551                 /*
552                  * Else fallthrough to retry open the old way on network i/o
553                  * or DFS errors.
554                  */
555         }
556
557         if (server->ops->get_lease_key)
558                 server->ops->get_lease_key(inode, &fid);
559
560         cifs_add_pending_open(&fid, tlink, &open);
561
562         if (!posix_open_ok) {
563                 if (server->ops->get_lease_key)
564                         server->ops->get_lease_key(inode, &fid);
565
566                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
567                                   file->f_flags, &oplock, &fid, xid);
568                 if (rc) {
569                         cifs_del_pending_open(&open);
570                         goto out;
571                 }
572         }
573
574         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
575         if (cfile == NULL) {
576                 if (server->ops->close)
577                         server->ops->close(xid, tcon, &fid);
578                 cifs_del_pending_open(&open);
579                 rc = -ENOMEM;
580                 goto out;
581         }
582
583         cifs_fscache_set_inode_cookie(inode, file);
584
585         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
586                 /*
587                  * Time to set mode which we can not set earlier due to
588                  * problems creating new read-only files.
589                  */
590                 struct cifs_unix_set_info_args args = {
591                         .mode   = inode->i_mode,
592                         .uid    = INVALID_UID, /* no change */
593                         .gid    = INVALID_GID, /* no change */
594                         .ctime  = NO_CHANGE_64,
595                         .atime  = NO_CHANGE_64,
596                         .mtime  = NO_CHANGE_64,
597                         .device = 0,
598                 };
599                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
600                                        cfile->pid);
601         }
602
603 out:
604         kfree(full_path);
605         free_xid(xid);
606         cifs_put_tlink(tlink);
607         return rc;
608 }
609
610 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
611
612 /*
613  * Try to reacquire byte range locks that were released when session
614  * to server was lost.
615  */
616 static int
617 cifs_relock_file(struct cifsFileInfo *cfile)
618 {
619         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
620         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
621         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
622         int rc = 0;
623
624         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
625         if (cinode->can_cache_brlcks) {
626                 /* can cache locks - no need to relock */
627                 up_read(&cinode->lock_sem);
628                 return rc;
629         }
630
631         if (cap_unix(tcon->ses) &&
632             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
633             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
634                 rc = cifs_push_posix_locks(cfile);
635         else
636                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
637
638         up_read(&cinode->lock_sem);
639         return rc;
640 }
641
642 static int
643 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
644 {
645         int rc = -EACCES;
646         unsigned int xid;
647         __u32 oplock;
648         struct cifs_sb_info *cifs_sb;
649         struct cifs_tcon *tcon;
650         struct TCP_Server_Info *server;
651         struct cifsInodeInfo *cinode;
652         struct inode *inode;
653         char *full_path = NULL;
654         int desired_access;
655         int disposition = FILE_OPEN;
656         int create_options = CREATE_NOT_DIR;
657         struct cifs_open_parms oparms;
658
659         xid = get_xid();
660         mutex_lock(&cfile->fh_mutex);
661         if (!cfile->invalidHandle) {
662                 mutex_unlock(&cfile->fh_mutex);
663                 rc = 0;
664                 free_xid(xid);
665                 return rc;
666         }
667
668         inode = d_inode(cfile->dentry);
669         cifs_sb = CIFS_SB(inode->i_sb);
670         tcon = tlink_tcon(cfile->tlink);
671         server = tcon->ses->server;
672
673         /*
674          * Can not grab rename sem here because various ops, including those
675          * that already have the rename sem can end up causing writepage to get
676          * called and if the server was down that means we end up here, and we
677          * can never tell if the caller already has the rename_sem.
678          */
679         full_path = build_path_from_dentry(cfile->dentry);
680         if (full_path == NULL) {
681                 rc = -ENOMEM;
682                 mutex_unlock(&cfile->fh_mutex);
683                 free_xid(xid);
684                 return rc;
685         }
686
687         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
688                  inode, cfile->f_flags, full_path);
689
690         if (tcon->ses->server->oplocks)
691                 oplock = REQ_OPLOCK;
692         else
693                 oplock = 0;
694
695         if (tcon->unix_ext && cap_unix(tcon->ses) &&
696             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
697                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
698                 /*
699                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
700                  * original open. Must mask them off for a reopen.
701                  */
702                 unsigned int oflags = cfile->f_flags &
703                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
704
705                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
706                                      cifs_sb->mnt_file_mode /* ignored */,
707                                      oflags, &oplock, &cfile->fid.netfid, xid);
708                 if (rc == 0) {
709                         cifs_dbg(FYI, "posix reopen succeeded\n");
710                         oparms.reconnect = true;
711                         goto reopen_success;
712                 }
713                 /*
714                  * fallthrough to retry open the old way on errors, especially
715                  * in the reconnect path it is important to retry hard
716                  */
717         }
718
719         desired_access = cifs_convert_flags(cfile->f_flags);
720
721         if (backup_cred(cifs_sb))
722                 create_options |= CREATE_OPEN_BACKUP_INTENT;
723
724         if (server->ops->get_lease_key)
725                 server->ops->get_lease_key(inode, &cfile->fid);
726
727         oparms.tcon = tcon;
728         oparms.cifs_sb = cifs_sb;
729         oparms.desired_access = desired_access;
730         oparms.create_options = create_options;
731         oparms.disposition = disposition;
732         oparms.path = full_path;
733         oparms.fid = &cfile->fid;
734         oparms.reconnect = true;
735
736         /*
737          * Can not refresh inode by passing in file_info buf to be returned by
738          * ops->open and then calling get_inode_info with returned buf since
739          * file might have write behind data that needs to be flushed and server
740          * version of file size can be stale. If we knew for sure that inode was
741          * not dirty locally we could do this.
742          */
743         rc = server->ops->open(xid, &oparms, &oplock, NULL);
744         if (rc == -ENOENT && oparms.reconnect == false) {
745                 /* durable handle timeout is expired - open the file again */
746                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
747                 /* indicate that we need to relock the file */
748                 oparms.reconnect = true;
749         }
750
751         if (rc) {
752                 mutex_unlock(&cfile->fh_mutex);
753                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
754                 cifs_dbg(FYI, "oplock: %d\n", oplock);
755                 goto reopen_error_exit;
756         }
757
758 reopen_success:
759         cfile->invalidHandle = false;
760         mutex_unlock(&cfile->fh_mutex);
761         cinode = CIFS_I(inode);
762
763         if (can_flush) {
764                 rc = filemap_write_and_wait(inode->i_mapping);
765                 if (!is_interrupt_error(rc))
766                         mapping_set_error(inode->i_mapping, rc);
767
768                 if (tcon->unix_ext)
769                         rc = cifs_get_inode_info_unix(&inode, full_path,
770                                                       inode->i_sb, xid);
771                 else
772                         rc = cifs_get_inode_info(&inode, full_path, NULL,
773                                                  inode->i_sb, xid, NULL);
774         }
775         /*
776          * Else we are writing out data to server already and could deadlock if
777          * we tried to flush data, and since we do not know if we have data that
778          * would invalidate the current end of file on the server we can not go
779          * to the server to get the new inode info.
780          */
781
782         /*
783          * If the server returned a read oplock and we have mandatory brlocks,
784          * set oplock level to None.
785          */
786         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
787                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
788                 oplock = 0;
789         }
790
791         server->ops->set_fid(cfile, &cfile->fid, oplock);
792         if (oparms.reconnect)
793                 cifs_relock_file(cfile);
794
795 reopen_error_exit:
796         kfree(full_path);
797         free_xid(xid);
798         return rc;
799 }
800
801 int cifs_close(struct inode *inode, struct file *file)
802 {
803         if (file->private_data != NULL) {
804                 cifsFileInfo_put(file->private_data);
805                 file->private_data = NULL;
806         }
807
808         /* return code from the ->release op is always ignored */
809         return 0;
810 }
811
812 void
813 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
814 {
815         struct cifsFileInfo *open_file;
816         struct list_head *tmp;
817         struct list_head *tmp1;
818         struct list_head tmp_list;
819
820         if (!tcon->use_persistent || !tcon->need_reopen_files)
821                 return;
822
823         tcon->need_reopen_files = false;
824
825         cifs_dbg(FYI, "Reopen persistent handles");
826         INIT_LIST_HEAD(&tmp_list);
827
828         /* list all files open on tree connection, reopen resilient handles  */
829         spin_lock(&tcon->open_file_lock);
830         list_for_each(tmp, &tcon->openFileList) {
831                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
832                 if (!open_file->invalidHandle)
833                         continue;
834                 cifsFileInfo_get(open_file);
835                 list_add_tail(&open_file->rlist, &tmp_list);
836         }
837         spin_unlock(&tcon->open_file_lock);
838
839         list_for_each_safe(tmp, tmp1, &tmp_list) {
840                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
841                 if (cifs_reopen_file(open_file, false /* do not flush */))
842                         tcon->need_reopen_files = true;
843                 list_del_init(&open_file->rlist);
844                 cifsFileInfo_put(open_file);
845         }
846 }
847
848 int cifs_closedir(struct inode *inode, struct file *file)
849 {
850         int rc = 0;
851         unsigned int xid;
852         struct cifsFileInfo *cfile = file->private_data;
853         struct cifs_tcon *tcon;
854         struct TCP_Server_Info *server;
855         char *buf;
856
857         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
858
859         if (cfile == NULL)
860                 return rc;
861
862         xid = get_xid();
863         tcon = tlink_tcon(cfile->tlink);
864         server = tcon->ses->server;
865
866         cifs_dbg(FYI, "Freeing private data in close dir\n");
867         spin_lock(&cfile->file_info_lock);
868         if (server->ops->dir_needs_close(cfile)) {
869                 cfile->invalidHandle = true;
870                 spin_unlock(&cfile->file_info_lock);
871                 if (server->ops->close_dir)
872                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
873                 else
874                         rc = -ENOSYS;
875                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
876                 /* not much we can do if it fails anyway, ignore rc */
877                 rc = 0;
878         } else
879                 spin_unlock(&cfile->file_info_lock);
880
881         buf = cfile->srch_inf.ntwrk_buf_start;
882         if (buf) {
883                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
884                 cfile->srch_inf.ntwrk_buf_start = NULL;
885                 if (cfile->srch_inf.smallBuf)
886                         cifs_small_buf_release(buf);
887                 else
888                         cifs_buf_release(buf);
889         }
890
891         cifs_put_tlink(cfile->tlink);
892         kfree(file->private_data);
893         file->private_data = NULL;
894         /* BB can we lock the filestruct while this is going on? */
895         free_xid(xid);
896         return rc;
897 }
898
899 static struct cifsLockInfo *
900 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
901 {
902         struct cifsLockInfo *lock =
903                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
904         if (!lock)
905                 return lock;
906         lock->offset = offset;
907         lock->length = length;
908         lock->type = type;
909         lock->pid = current->tgid;
910         lock->flags = flags;
911         INIT_LIST_HEAD(&lock->blist);
912         init_waitqueue_head(&lock->block_q);
913         return lock;
914 }
915
916 void
917 cifs_del_lock_waiters(struct cifsLockInfo *lock)
918 {
919         struct cifsLockInfo *li, *tmp;
920         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
921                 list_del_init(&li->blist);
922                 wake_up(&li->block_q);
923         }
924 }
925
926 #define CIFS_LOCK_OP    0
927 #define CIFS_READ_OP    1
928 #define CIFS_WRITE_OP   2
929
930 /* @rw_check : 0 - no op, 1 - read, 2 - write */
931 static bool
932 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
933                             __u64 length, __u8 type, __u16 flags,
934                             struct cifsFileInfo *cfile,
935                             struct cifsLockInfo **conf_lock, int rw_check)
936 {
937         struct cifsLockInfo *li;
938         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
939         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
940
941         list_for_each_entry(li, &fdlocks->locks, llist) {
942                 if (offset + length <= li->offset ||
943                     offset >= li->offset + li->length)
944                         continue;
945                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
946                     server->ops->compare_fids(cfile, cur_cfile)) {
947                         /* shared lock prevents write op through the same fid */
948                         if (!(li->type & server->vals->shared_lock_type) ||
949                             rw_check != CIFS_WRITE_OP)
950                                 continue;
951                 }
952                 if ((type & server->vals->shared_lock_type) &&
953                     ((server->ops->compare_fids(cfile, cur_cfile) &&
954                      current->tgid == li->pid) || type == li->type))
955                         continue;
956                 if (rw_check == CIFS_LOCK_OP &&
957                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
958                     server->ops->compare_fids(cfile, cur_cfile))
959                         continue;
960                 if (conf_lock)
961                         *conf_lock = li;
962                 return true;
963         }
964         return false;
965 }
966
967 bool
968 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
969                         __u8 type, __u16 flags,
970                         struct cifsLockInfo **conf_lock, int rw_check)
971 {
972         bool rc = false;
973         struct cifs_fid_locks *cur;
974         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
975
976         list_for_each_entry(cur, &cinode->llist, llist) {
977                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
978                                                  flags, cfile, conf_lock,
979                                                  rw_check);
980                 if (rc)
981                         break;
982         }
983
984         return rc;
985 }
986
987 /*
988  * Check if there is another lock that prevents us to set the lock (mandatory
989  * style). If such a lock exists, update the flock structure with its
990  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
991  * or leave it the same if we can't. Returns 0 if we don't need to request to
992  * the server or 1 otherwise.
993  */
994 static int
995 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
996                __u8 type, struct file_lock *flock)
997 {
998         int rc = 0;
999         struct cifsLockInfo *conf_lock;
1000         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1001         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1002         bool exist;
1003
1004         down_read(&cinode->lock_sem);
1005
1006         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1007                                         flock->fl_flags, &conf_lock,
1008                                         CIFS_LOCK_OP);
1009         if (exist) {
1010                 flock->fl_start = conf_lock->offset;
1011                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1012                 flock->fl_pid = conf_lock->pid;
1013                 if (conf_lock->type & server->vals->shared_lock_type)
1014                         flock->fl_type = F_RDLCK;
1015                 else
1016                         flock->fl_type = F_WRLCK;
1017         } else if (!cinode->can_cache_brlcks)
1018                 rc = 1;
1019         else
1020                 flock->fl_type = F_UNLCK;
1021
1022         up_read(&cinode->lock_sem);
1023         return rc;
1024 }
1025
1026 static void
1027 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1028 {
1029         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1030         down_write(&cinode->lock_sem);
1031         list_add_tail(&lock->llist, &cfile->llist->locks);
1032         up_write(&cinode->lock_sem);
1033 }
1034
1035 /*
1036  * Set the byte-range lock (mandatory style). Returns:
1037  * 1) 0, if we set the lock and don't need to request to the server;
1038  * 2) 1, if no locks prevent us but we need to request to the server;
1039  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1040  */
1041 static int
1042 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1043                  bool wait)
1044 {
1045         struct cifsLockInfo *conf_lock;
1046         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1047         bool exist;
1048         int rc = 0;
1049
1050 try_again:
1051         exist = false;
1052         down_write(&cinode->lock_sem);
1053
1054         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1055                                         lock->type, lock->flags, &conf_lock,
1056                                         CIFS_LOCK_OP);
1057         if (!exist && cinode->can_cache_brlcks) {
1058                 list_add_tail(&lock->llist, &cfile->llist->locks);
1059                 up_write(&cinode->lock_sem);
1060                 return rc;
1061         }
1062
1063         if (!exist)
1064                 rc = 1;
1065         else if (!wait)
1066                 rc = -EACCES;
1067         else {
1068                 list_add_tail(&lock->blist, &conf_lock->blist);
1069                 up_write(&cinode->lock_sem);
1070                 rc = wait_event_interruptible(lock->block_q,
1071                                         (lock->blist.prev == &lock->blist) &&
1072                                         (lock->blist.next == &lock->blist));
1073                 if (!rc)
1074                         goto try_again;
1075                 down_write(&cinode->lock_sem);
1076                 list_del_init(&lock->blist);
1077         }
1078
1079         up_write(&cinode->lock_sem);
1080         return rc;
1081 }
1082
1083 /*
1084  * Check if there is another lock that prevents us to set the lock (posix
1085  * style). If such a lock exists, update the flock structure with its
1086  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1087  * or leave it the same if we can't. Returns 0 if we don't need to request to
1088  * the server or 1 otherwise.
1089  */
1090 static int
1091 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1092 {
1093         int rc = 0;
1094         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1095         unsigned char saved_type = flock->fl_type;
1096
1097         if ((flock->fl_flags & FL_POSIX) == 0)
1098                 return 1;
1099
1100         down_read(&cinode->lock_sem);
1101         posix_test_lock(file, flock);
1102
1103         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1104                 flock->fl_type = saved_type;
1105                 rc = 1;
1106         }
1107
1108         up_read(&cinode->lock_sem);
1109         return rc;
1110 }
1111
1112 /*
1113  * Set the byte-range lock (posix style). Returns:
1114  * 1) 0, if we set the lock and don't need to request to the server;
1115  * 2) 1, if we need to request to the server;
1116  * 3) <0, if the error occurs while setting the lock.
1117  */
1118 static int
1119 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1120 {
1121         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1122         int rc = 1;
1123
1124         if ((flock->fl_flags & FL_POSIX) == 0)
1125                 return rc;
1126
1127 try_again:
1128         down_write(&cinode->lock_sem);
1129         if (!cinode->can_cache_brlcks) {
1130                 up_write(&cinode->lock_sem);
1131                 return rc;
1132         }
1133
1134         rc = posix_lock_file(file, flock, NULL);
1135         up_write(&cinode->lock_sem);
1136         if (rc == FILE_LOCK_DEFERRED) {
1137                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
1138                 if (!rc)
1139                         goto try_again;
1140                 locks_delete_block(flock);
1141         }
1142         return rc;
1143 }
1144
1145 int
1146 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1147 {
1148         unsigned int xid;
1149         int rc = 0, stored_rc;
1150         struct cifsLockInfo *li, *tmp;
1151         struct cifs_tcon *tcon;
1152         unsigned int num, max_num, max_buf;
1153         LOCKING_ANDX_RANGE *buf, *cur;
1154         static const int types[] = {
1155                 LOCKING_ANDX_LARGE_FILES,
1156                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1157         };
1158         int i;
1159
1160         xid = get_xid();
1161         tcon = tlink_tcon(cfile->tlink);
1162
1163         /*
1164          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1165          * and check it before using.
1166          */
1167         max_buf = tcon->ses->server->maxBuf;
1168         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1169                 free_xid(xid);
1170                 return -EINVAL;
1171         }
1172
1173         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1174                      PAGE_SIZE);
1175         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1176                         PAGE_SIZE);
1177         max_num = (max_buf - sizeof(struct smb_hdr)) /
1178                                                 sizeof(LOCKING_ANDX_RANGE);
1179         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1180         if (!buf) {
1181                 free_xid(xid);
1182                 return -ENOMEM;
1183         }
1184
1185         for (i = 0; i < 2; i++) {
1186                 cur = buf;
1187                 num = 0;
1188                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1189                         if (li->type != types[i])
1190                                 continue;
1191                         cur->Pid = cpu_to_le16(li->pid);
1192                         cur->LengthLow = cpu_to_le32((u32)li->length);
1193                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1194                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1195                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1196                         if (++num == max_num) {
1197                                 stored_rc = cifs_lockv(xid, tcon,
1198                                                        cfile->fid.netfid,
1199                                                        (__u8)li->type, 0, num,
1200                                                        buf);
1201                                 if (stored_rc)
1202                                         rc = stored_rc;
1203                                 cur = buf;
1204                                 num = 0;
1205                         } else
1206                                 cur++;
1207                 }
1208
1209                 if (num) {
1210                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1211                                                (__u8)types[i], 0, num, buf);
1212                         if (stored_rc)
1213                                 rc = stored_rc;
1214                 }
1215         }
1216
1217         kfree(buf);
1218         free_xid(xid);
1219         return rc;
1220 }
1221
1222 static __u32
1223 hash_lockowner(fl_owner_t owner)
1224 {
1225         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1226 }
1227
1228 struct lock_to_push {
1229         struct list_head llist;
1230         __u64 offset;
1231         __u64 length;
1232         __u32 pid;
1233         __u16 netfid;
1234         __u8 type;
1235 };
1236
1237 static int
1238 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1239 {
1240         struct inode *inode = d_inode(cfile->dentry);
1241         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1242         struct file_lock *flock;
1243         struct file_lock_context *flctx = inode->i_flctx;
1244         unsigned int count = 0, i;
1245         int rc = 0, xid, type;
1246         struct list_head locks_to_send, *el;
1247         struct lock_to_push *lck, *tmp;
1248         __u64 length;
1249
1250         xid = get_xid();
1251
1252         if (!flctx)
1253                 goto out;
1254
1255         spin_lock(&flctx->flc_lock);
1256         list_for_each(el, &flctx->flc_posix) {
1257                 count++;
1258         }
1259         spin_unlock(&flctx->flc_lock);
1260
1261         INIT_LIST_HEAD(&locks_to_send);
1262
1263         /*
1264          * Allocating count locks is enough because no FL_POSIX locks can be
1265          * added to the list while we are holding cinode->lock_sem that
1266          * protects locking operations of this inode.
1267          */
1268         for (i = 0; i < count; i++) {
1269                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1270                 if (!lck) {
1271                         rc = -ENOMEM;
1272                         goto err_out;
1273                 }
1274                 list_add_tail(&lck->llist, &locks_to_send);
1275         }
1276
1277         el = locks_to_send.next;
1278         spin_lock(&flctx->flc_lock);
1279         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1280                 if (el == &locks_to_send) {
1281                         /*
1282                          * The list ended. We don't have enough allocated
1283                          * structures - something is really wrong.
1284                          */
1285                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1286                         break;
1287                 }
1288                 length = 1 + flock->fl_end - flock->fl_start;
1289                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1290                         type = CIFS_RDLCK;
1291                 else
1292                         type = CIFS_WRLCK;
1293                 lck = list_entry(el, struct lock_to_push, llist);
1294                 lck->pid = hash_lockowner(flock->fl_owner);
1295                 lck->netfid = cfile->fid.netfid;
1296                 lck->length = length;
1297                 lck->type = type;
1298                 lck->offset = flock->fl_start;
1299         }
1300         spin_unlock(&flctx->flc_lock);
1301
1302         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1303                 int stored_rc;
1304
1305                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1306                                              lck->offset, lck->length, NULL,
1307                                              lck->type, 0);
1308                 if (stored_rc)
1309                         rc = stored_rc;
1310                 list_del(&lck->llist);
1311                 kfree(lck);
1312         }
1313
1314 out:
1315         free_xid(xid);
1316         return rc;
1317 err_out:
1318         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1319                 list_del(&lck->llist);
1320                 kfree(lck);
1321         }
1322         goto out;
1323 }
1324
1325 static int
1326 cifs_push_locks(struct cifsFileInfo *cfile)
1327 {
1328         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1329         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1330         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1331         int rc = 0;
1332
1333         /* we are going to update can_cache_brlcks here - need a write access */
1334         down_write(&cinode->lock_sem);
1335         if (!cinode->can_cache_brlcks) {
1336                 up_write(&cinode->lock_sem);
1337                 return rc;
1338         }
1339
1340         if (cap_unix(tcon->ses) &&
1341             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1342             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1343                 rc = cifs_push_posix_locks(cfile);
1344         else
1345                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1346
1347         cinode->can_cache_brlcks = false;
1348         up_write(&cinode->lock_sem);
1349         return rc;
1350 }
1351
1352 static void
1353 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1354                 bool *wait_flag, struct TCP_Server_Info *server)
1355 {
1356         if (flock->fl_flags & FL_POSIX)
1357                 cifs_dbg(FYI, "Posix\n");
1358         if (flock->fl_flags & FL_FLOCK)
1359                 cifs_dbg(FYI, "Flock\n");
1360         if (flock->fl_flags & FL_SLEEP) {
1361                 cifs_dbg(FYI, "Blocking lock\n");
1362                 *wait_flag = true;
1363         }
1364         if (flock->fl_flags & FL_ACCESS)
1365                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1366         if (flock->fl_flags & FL_LEASE)
1367                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1368         if (flock->fl_flags &
1369             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1370                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1371                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1372
1373         *type = server->vals->large_lock_type;
1374         if (flock->fl_type == F_WRLCK) {
1375                 cifs_dbg(FYI, "F_WRLCK\n");
1376                 *type |= server->vals->exclusive_lock_type;
1377                 *lock = 1;
1378         } else if (flock->fl_type == F_UNLCK) {
1379                 cifs_dbg(FYI, "F_UNLCK\n");
1380                 *type |= server->vals->unlock_lock_type;
1381                 *unlock = 1;
1382                 /* Check if unlock includes more than one lock range */
1383         } else if (flock->fl_type == F_RDLCK) {
1384                 cifs_dbg(FYI, "F_RDLCK\n");
1385                 *type |= server->vals->shared_lock_type;
1386                 *lock = 1;
1387         } else if (flock->fl_type == F_EXLCK) {
1388                 cifs_dbg(FYI, "F_EXLCK\n");
1389                 *type |= server->vals->exclusive_lock_type;
1390                 *lock = 1;
1391         } else if (flock->fl_type == F_SHLCK) {
1392                 cifs_dbg(FYI, "F_SHLCK\n");
1393                 *type |= server->vals->shared_lock_type;
1394                 *lock = 1;
1395         } else
1396                 cifs_dbg(FYI, "Unknown type of lock\n");
1397 }
1398
1399 static int
1400 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1401            bool wait_flag, bool posix_lck, unsigned int xid)
1402 {
1403         int rc = 0;
1404         __u64 length = 1 + flock->fl_end - flock->fl_start;
1405         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1406         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1407         struct TCP_Server_Info *server = tcon->ses->server;
1408         __u16 netfid = cfile->fid.netfid;
1409
1410         if (posix_lck) {
1411                 int posix_lock_type;
1412
1413                 rc = cifs_posix_lock_test(file, flock);
1414                 if (!rc)
1415                         return rc;
1416
1417                 if (type & server->vals->shared_lock_type)
1418                         posix_lock_type = CIFS_RDLCK;
1419                 else
1420                         posix_lock_type = CIFS_WRLCK;
1421                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1422                                       hash_lockowner(flock->fl_owner),
1423                                       flock->fl_start, length, flock,
1424                                       posix_lock_type, wait_flag);
1425                 return rc;
1426         }
1427
1428         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1429         if (!rc)
1430                 return rc;
1431
1432         /* BB we could chain these into one lock request BB */
1433         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1434                                     1, 0, false);
1435         if (rc == 0) {
1436                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1437                                             type, 0, 1, false);
1438                 flock->fl_type = F_UNLCK;
1439                 if (rc != 0)
1440                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1441                                  rc);
1442                 return 0;
1443         }
1444
1445         if (type & server->vals->shared_lock_type) {
1446                 flock->fl_type = F_WRLCK;
1447                 return 0;
1448         }
1449
1450         type &= ~server->vals->exclusive_lock_type;
1451
1452         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1453                                     type | server->vals->shared_lock_type,
1454                                     1, 0, false);
1455         if (rc == 0) {
1456                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1457                         type | server->vals->shared_lock_type, 0, 1, false);
1458                 flock->fl_type = F_RDLCK;
1459                 if (rc != 0)
1460                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1461                                  rc);
1462         } else
1463                 flock->fl_type = F_WRLCK;
1464
1465         return 0;
1466 }
1467
1468 void
1469 cifs_move_llist(struct list_head *source, struct list_head *dest)
1470 {
1471         struct list_head *li, *tmp;
1472         list_for_each_safe(li, tmp, source)
1473                 list_move(li, dest);
1474 }
1475
1476 void
1477 cifs_free_llist(struct list_head *llist)
1478 {
1479         struct cifsLockInfo *li, *tmp;
1480         list_for_each_entry_safe(li, tmp, llist, llist) {
1481                 cifs_del_lock_waiters(li);
1482                 list_del(&li->llist);
1483                 kfree(li);
1484         }
1485 }
1486
1487 int
1488 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1489                   unsigned int xid)
1490 {
1491         int rc = 0, stored_rc;
1492         static const int types[] = {
1493                 LOCKING_ANDX_LARGE_FILES,
1494                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1495         };
1496         unsigned int i;
1497         unsigned int max_num, num, max_buf;
1498         LOCKING_ANDX_RANGE *buf, *cur;
1499         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1500         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1501         struct cifsLockInfo *li, *tmp;
1502         __u64 length = 1 + flock->fl_end - flock->fl_start;
1503         struct list_head tmp_llist;
1504
1505         INIT_LIST_HEAD(&tmp_llist);
1506
1507         /*
1508          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1509          * and check it before using.
1510          */
1511         max_buf = tcon->ses->server->maxBuf;
1512         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1513                 return -EINVAL;
1514
1515         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1516                      PAGE_SIZE);
1517         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1518                         PAGE_SIZE);
1519         max_num = (max_buf - sizeof(struct smb_hdr)) /
1520                                                 sizeof(LOCKING_ANDX_RANGE);
1521         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1522         if (!buf)
1523                 return -ENOMEM;
1524
1525         down_write(&cinode->lock_sem);
1526         for (i = 0; i < 2; i++) {
1527                 cur = buf;
1528                 num = 0;
1529                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1530                         if (flock->fl_start > li->offset ||
1531                             (flock->fl_start + length) <
1532                             (li->offset + li->length))
1533                                 continue;
1534                         if (current->tgid != li->pid)
1535                                 continue;
1536                         if (types[i] != li->type)
1537                                 continue;
1538                         if (cinode->can_cache_brlcks) {
1539                                 /*
1540                                  * We can cache brlock requests - simply remove
1541                                  * a lock from the file's list.
1542                                  */
1543                                 list_del(&li->llist);
1544                                 cifs_del_lock_waiters(li);
1545                                 kfree(li);
1546                                 continue;
1547                         }
1548                         cur->Pid = cpu_to_le16(li->pid);
1549                         cur->LengthLow = cpu_to_le32((u32)li->length);
1550                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1551                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1552                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1553                         /*
1554                          * We need to save a lock here to let us add it again to
1555                          * the file's list if the unlock range request fails on
1556                          * the server.
1557                          */
1558                         list_move(&li->llist, &tmp_llist);
1559                         if (++num == max_num) {
1560                                 stored_rc = cifs_lockv(xid, tcon,
1561                                                        cfile->fid.netfid,
1562                                                        li->type, num, 0, buf);
1563                                 if (stored_rc) {
1564                                         /*
1565                                          * We failed on the unlock range
1566                                          * request - add all locks from the tmp
1567                                          * list to the head of the file's list.
1568                                          */
1569                                         cifs_move_llist(&tmp_llist,
1570                                                         &cfile->llist->locks);
1571                                         rc = stored_rc;
1572                                 } else
1573                                         /*
1574                                          * The unlock range request succeed -
1575                                          * free the tmp list.
1576                                          */
1577                                         cifs_free_llist(&tmp_llist);
1578                                 cur = buf;
1579                                 num = 0;
1580                         } else
1581                                 cur++;
1582                 }
1583                 if (num) {
1584                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1585                                                types[i], num, 0, buf);
1586                         if (stored_rc) {
1587                                 cifs_move_llist(&tmp_llist,
1588                                                 &cfile->llist->locks);
1589                                 rc = stored_rc;
1590                         } else
1591                                 cifs_free_llist(&tmp_llist);
1592                 }
1593         }
1594
1595         up_write(&cinode->lock_sem);
1596         kfree(buf);
1597         return rc;
1598 }
1599
1600 static int
1601 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1602            bool wait_flag, bool posix_lck, int lock, int unlock,
1603            unsigned int xid)
1604 {
1605         int rc = 0;
1606         __u64 length = 1 + flock->fl_end - flock->fl_start;
1607         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1608         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1609         struct TCP_Server_Info *server = tcon->ses->server;
1610         struct inode *inode = d_inode(cfile->dentry);
1611
1612         if (posix_lck) {
1613                 int posix_lock_type;
1614
1615                 rc = cifs_posix_lock_set(file, flock);
1616                 if (!rc || rc < 0)
1617                         return rc;
1618
1619                 if (type & server->vals->shared_lock_type)
1620                         posix_lock_type = CIFS_RDLCK;
1621                 else
1622                         posix_lock_type = CIFS_WRLCK;
1623
1624                 if (unlock == 1)
1625                         posix_lock_type = CIFS_UNLCK;
1626
1627                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1628                                       hash_lockowner(flock->fl_owner),
1629                                       flock->fl_start, length,
1630                                       NULL, posix_lock_type, wait_flag);
1631                 goto out;
1632         }
1633
1634         if (lock) {
1635                 struct cifsLockInfo *lock;
1636
1637                 lock = cifs_lock_init(flock->fl_start, length, type,
1638                                       flock->fl_flags);
1639                 if (!lock)
1640                         return -ENOMEM;
1641
1642                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1643                 if (rc < 0) {
1644                         kfree(lock);
1645                         return rc;
1646                 }
1647                 if (!rc)
1648                         goto out;
1649
1650                 /*
1651                  * Windows 7 server can delay breaking lease from read to None
1652                  * if we set a byte-range lock on a file - break it explicitly
1653                  * before sending the lock to the server to be sure the next
1654                  * read won't conflict with non-overlapted locks due to
1655                  * pagereading.
1656                  */
1657                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1658                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1659                         cifs_zap_mapping(inode);
1660                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1661                                  inode);
1662                         CIFS_I(inode)->oplock = 0;
1663                 }
1664
1665                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1666                                             type, 1, 0, wait_flag);
1667                 if (rc) {
1668                         kfree(lock);
1669                         return rc;
1670                 }
1671
1672                 cifs_lock_add(cfile, lock);
1673         } else if (unlock)
1674                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1675
1676 out:
1677         if (flock->fl_flags & FL_POSIX) {
1678                 /*
1679                  * If this is a request to remove all locks because we
1680                  * are closing the file, it doesn't matter if the
1681                  * unlocking failed as both cifs.ko and the SMB server
1682                  * remove the lock on file close
1683                  */
1684                 if (rc) {
1685                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1686                         if (!(flock->fl_flags & FL_CLOSE))
1687                                 return rc;
1688                 }
1689                 rc = locks_lock_file_wait(file, flock);
1690         }
1691         return rc;
1692 }
1693
1694 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1695 {
1696         int rc, xid;
1697         int lock = 0, unlock = 0;
1698         bool wait_flag = false;
1699         bool posix_lck = false;
1700         struct cifs_sb_info *cifs_sb;
1701         struct cifs_tcon *tcon;
1702         struct cifsFileInfo *cfile;
1703         __u32 type;
1704
1705         rc = -EACCES;
1706         xid = get_xid();
1707
1708         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1709                  cmd, flock->fl_flags, flock->fl_type,
1710                  flock->fl_start, flock->fl_end);
1711
1712         cfile = (struct cifsFileInfo *)file->private_data;
1713         tcon = tlink_tcon(cfile->tlink);
1714
1715         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1716                         tcon->ses->server);
1717         cifs_sb = CIFS_FILE_SB(file);
1718
1719         if (cap_unix(tcon->ses) &&
1720             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1721             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1722                 posix_lck = true;
1723         /*
1724          * BB add code here to normalize offset and length to account for
1725          * negative length which we can not accept over the wire.
1726          */
1727         if (IS_GETLK(cmd)) {
1728                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1729                 free_xid(xid);
1730                 return rc;
1731         }
1732
1733         if (!lock && !unlock) {
1734                 /*
1735                  * if no lock or unlock then nothing to do since we do not
1736                  * know what it is
1737                  */
1738                 free_xid(xid);
1739                 return -EOPNOTSUPP;
1740         }
1741
1742         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1743                         xid);
1744         free_xid(xid);
1745         return rc;
1746 }
1747
1748 /*
1749  * update the file size (if needed) after a write. Should be called with
1750  * the inode->i_lock held
1751  */
1752 void
1753 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1754                       unsigned int bytes_written)
1755 {
1756         loff_t end_of_write = offset + bytes_written;
1757
1758         if (end_of_write > cifsi->server_eof)
1759                 cifsi->server_eof = end_of_write;
1760 }
1761
1762 static ssize_t
1763 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1764            size_t write_size, loff_t *offset)
1765 {
1766         int rc = 0;
1767         unsigned int bytes_written = 0;
1768         unsigned int total_written;
1769         struct cifs_tcon *tcon;
1770         struct TCP_Server_Info *server;
1771         unsigned int xid;
1772         struct dentry *dentry = open_file->dentry;
1773         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1774         struct cifs_io_parms io_parms;
1775
1776         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1777                  write_size, *offset, dentry);
1778
1779         tcon = tlink_tcon(open_file->tlink);
1780         server = tcon->ses->server;
1781
1782         if (!server->ops->sync_write)
1783                 return -ENOSYS;
1784
1785         xid = get_xid();
1786
1787         for (total_written = 0; write_size > total_written;
1788              total_written += bytes_written) {
1789                 rc = -EAGAIN;
1790                 while (rc == -EAGAIN) {
1791                         struct kvec iov[2];
1792                         unsigned int len;
1793
1794                         if (open_file->invalidHandle) {
1795                                 /* we could deadlock if we called
1796                                    filemap_fdatawait from here so tell
1797                                    reopen_file not to flush data to
1798                                    server now */
1799                                 rc = cifs_reopen_file(open_file, false);
1800                                 if (rc != 0)
1801                                         break;
1802                         }
1803
1804                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1805                                   (unsigned int)write_size - total_written);
1806                         /* iov[0] is reserved for smb header */
1807                         iov[1].iov_base = (char *)write_data + total_written;
1808                         iov[1].iov_len = len;
1809                         io_parms.pid = pid;
1810                         io_parms.tcon = tcon;
1811                         io_parms.offset = *offset;
1812                         io_parms.length = len;
1813                         rc = server->ops->sync_write(xid, &open_file->fid,
1814                                         &io_parms, &bytes_written, iov, 1);
1815                 }
1816                 if (rc || (bytes_written == 0)) {
1817                         if (total_written)
1818                                 break;
1819                         else {
1820                                 free_xid(xid);
1821                                 return rc;
1822                         }
1823                 } else {
1824                         spin_lock(&d_inode(dentry)->i_lock);
1825                         cifs_update_eof(cifsi, *offset, bytes_written);
1826                         spin_unlock(&d_inode(dentry)->i_lock);
1827                         *offset += bytes_written;
1828                 }
1829         }
1830
1831         cifs_stats_bytes_written(tcon, total_written);
1832
1833         if (total_written > 0) {
1834                 spin_lock(&d_inode(dentry)->i_lock);
1835                 if (*offset > d_inode(dentry)->i_size)
1836                         i_size_write(d_inode(dentry), *offset);
1837                 spin_unlock(&d_inode(dentry)->i_lock);
1838         }
1839         mark_inode_dirty_sync(d_inode(dentry));
1840         free_xid(xid);
1841         return total_written;
1842 }
1843
1844 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1845                                         bool fsuid_only)
1846 {
1847         struct cifsFileInfo *open_file = NULL;
1848         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1849
1850         /* only filter by fsuid on multiuser mounts */
1851         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1852                 fsuid_only = false;
1853
1854         spin_lock(&cifs_inode->open_file_lock);
1855         /* we could simply get the first_list_entry since write-only entries
1856            are always at the end of the list but since the first entry might
1857            have a close pending, we go through the whole list */
1858         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1859                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1860                         continue;
1861                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1862                         if (!open_file->invalidHandle) {
1863                                 /* found a good file */
1864                                 /* lock it so it will not be closed on us */
1865                                 cifsFileInfo_get(open_file);
1866                                 spin_unlock(&cifs_inode->open_file_lock);
1867                                 return open_file;
1868                         } /* else might as well continue, and look for
1869                              another, or simply have the caller reopen it
1870                              again rather than trying to fix this handle */
1871                 } else /* write only file */
1872                         break; /* write only files are last so must be done */
1873         }
1874         spin_unlock(&cifs_inode->open_file_lock);
1875         return NULL;
1876 }
1877
1878 /* Return -EBADF if no handle is found and general rc otherwise */
1879 int
1880 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,
1881                        struct cifsFileInfo **ret_file)
1882 {
1883         struct cifsFileInfo *open_file, *inv_file = NULL;
1884         struct cifs_sb_info *cifs_sb;
1885         bool any_available = false;
1886         int rc = -EBADF;
1887         unsigned int refind = 0;
1888
1889         *ret_file = NULL;
1890
1891         /*
1892          * Having a null inode here (because mapping->host was set to zero by
1893          * the VFS or MM) should not happen but we had reports of on oops (due
1894          * to it being zero) during stress testcases so we need to check for it
1895          */
1896
1897         if (cifs_inode == NULL) {
1898                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1899                 dump_stack();
1900                 return rc;
1901         }
1902
1903         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1904
1905         /* only filter by fsuid on multiuser mounts */
1906         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1907                 fsuid_only = false;
1908
1909         spin_lock(&cifs_inode->open_file_lock);
1910 refind_writable:
1911         if (refind > MAX_REOPEN_ATT) {
1912                 spin_unlock(&cifs_inode->open_file_lock);
1913                 return rc;
1914         }
1915         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1916                 if (!any_available && open_file->pid != current->tgid)
1917                         continue;
1918                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1919                         continue;
1920                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1921                         if (!open_file->invalidHandle) {
1922                                 /* found a good writable file */
1923                                 cifsFileInfo_get(open_file);
1924                                 spin_unlock(&cifs_inode->open_file_lock);
1925                                 *ret_file = open_file;
1926                                 return 0;
1927                         } else {
1928                                 if (!inv_file)
1929                                         inv_file = open_file;
1930                         }
1931                 }
1932         }
1933         /* couldn't find useable FH with same pid, try any available */
1934         if (!any_available) {
1935                 any_available = true;
1936                 goto refind_writable;
1937         }
1938
1939         if (inv_file) {
1940                 any_available = false;
1941                 cifsFileInfo_get(inv_file);
1942         }
1943
1944         spin_unlock(&cifs_inode->open_file_lock);
1945
1946         if (inv_file) {
1947                 rc = cifs_reopen_file(inv_file, false);
1948                 if (!rc) {
1949                         *ret_file = inv_file;
1950                         return 0;
1951                 }
1952
1953                 spin_lock(&cifs_inode->open_file_lock);
1954                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
1955                 spin_unlock(&cifs_inode->open_file_lock);
1956                 cifsFileInfo_put(inv_file);
1957                 ++refind;
1958                 inv_file = NULL;
1959                 spin_lock(&cifs_inode->open_file_lock);
1960                 goto refind_writable;
1961         }
1962
1963         return rc;
1964 }
1965
1966 struct cifsFileInfo *
1967 find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only)
1968 {
1969         struct cifsFileInfo *cfile;
1970         int rc;
1971
1972         rc = cifs_get_writable_file(cifs_inode, fsuid_only, &cfile);
1973         if (rc)
1974                 cifs_dbg(FYI, "couldn't find writable handle rc=%d", rc);
1975
1976         return cfile;
1977 }
1978
1979 int
1980 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
1981                        struct cifsFileInfo **ret_file)
1982 {
1983         struct list_head *tmp;
1984         struct cifsFileInfo *cfile;
1985         struct cifsInodeInfo *cinode;
1986         char *full_path;
1987
1988         *ret_file = NULL;
1989
1990         spin_lock(&tcon->open_file_lock);
1991         list_for_each(tmp, &tcon->openFileList) {
1992                 cfile = list_entry(tmp, struct cifsFileInfo,
1993                              tlist);
1994                 full_path = build_path_from_dentry(cfile->dentry);
1995                 if (full_path == NULL) {
1996                         spin_unlock(&tcon->open_file_lock);
1997                         return -ENOMEM;
1998                 }
1999                 if (strcmp(full_path, name)) {
2000                         kfree(full_path);
2001                         continue;
2002                 }
2003
2004                 kfree(full_path);
2005                 cinode = CIFS_I(d_inode(cfile->dentry));
2006                 spin_unlock(&tcon->open_file_lock);
2007                 return cifs_get_writable_file(cinode, 0, ret_file);
2008         }
2009
2010         spin_unlock(&tcon->open_file_lock);
2011         return -ENOENT;
2012 }
2013
2014 int
2015 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2016                        struct cifsFileInfo **ret_file)
2017 {
2018         struct list_head *tmp;
2019         struct cifsFileInfo *cfile;
2020         struct cifsInodeInfo *cinode;
2021         char *full_path;
2022
2023         *ret_file = NULL;
2024
2025         spin_lock(&tcon->open_file_lock);
2026         list_for_each(tmp, &tcon->openFileList) {
2027                 cfile = list_entry(tmp, struct cifsFileInfo,
2028                              tlist);
2029                 full_path = build_path_from_dentry(cfile->dentry);
2030                 if (full_path == NULL) {
2031                         spin_unlock(&tcon->open_file_lock);
2032                         return -ENOMEM;
2033                 }
2034                 if (strcmp(full_path, name)) {
2035                         kfree(full_path);
2036                         continue;
2037                 }
2038
2039                 kfree(full_path);
2040                 cinode = CIFS_I(d_inode(cfile->dentry));
2041                 spin_unlock(&tcon->open_file_lock);
2042                 *ret_file = find_readable_file(cinode, 0);
2043                 return *ret_file ? 0 : -ENOENT;
2044         }
2045
2046         spin_unlock(&tcon->open_file_lock);
2047         return -ENOENT;
2048 }
2049
2050 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2051 {
2052         struct address_space *mapping = page->mapping;
2053         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2054         char *write_data;
2055         int rc = -EFAULT;
2056         int bytes_written = 0;
2057         struct inode *inode;
2058         struct cifsFileInfo *open_file;
2059
2060         if (!mapping || !mapping->host)
2061                 return -EFAULT;
2062
2063         inode = page->mapping->host;
2064
2065         offset += (loff_t)from;
2066         write_data = kmap(page);
2067         write_data += from;
2068
2069         if ((to > PAGE_SIZE) || (from > to)) {
2070                 kunmap(page);
2071                 return -EIO;
2072         }
2073
2074         /* racing with truncate? */
2075         if (offset > mapping->host->i_size) {
2076                 kunmap(page);
2077                 return 0; /* don't care */
2078         }
2079
2080         /* check to make sure that we are not extending the file */
2081         if (mapping->host->i_size - offset < (loff_t)to)
2082                 to = (unsigned)(mapping->host->i_size - offset);
2083
2084         rc = cifs_get_writable_file(CIFS_I(mapping->host), false, &open_file);
2085         if (!rc) {
2086                 bytes_written = cifs_write(open_file, open_file->pid,
2087                                            write_data, to - from, &offset);
2088                 cifsFileInfo_put(open_file);
2089                 /* Does mm or vfs already set times? */
2090                 inode->i_atime = inode->i_mtime = current_time(inode);
2091                 if ((bytes_written > 0) && (offset))
2092                         rc = 0;
2093                 else if (bytes_written < 0)
2094                         rc = bytes_written;
2095                 else
2096                         rc = -EFAULT;
2097         } else {
2098                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2099                 if (!is_retryable_error(rc))
2100                         rc = -EIO;
2101         }
2102
2103         kunmap(page);
2104         return rc;
2105 }
2106
2107 static struct cifs_writedata *
2108 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2109                           pgoff_t end, pgoff_t *index,
2110                           unsigned int *found_pages)
2111 {
2112         struct cifs_writedata *wdata;
2113
2114         wdata = cifs_writedata_alloc((unsigned int)tofind,
2115                                      cifs_writev_complete);
2116         if (!wdata)
2117                 return NULL;
2118
2119         *found_pages = find_get_pages_range_tag(mapping, index, end,
2120                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2121         return wdata;
2122 }
2123
2124 static unsigned int
2125 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2126                     struct address_space *mapping,
2127                     struct writeback_control *wbc,
2128                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2129 {
2130         unsigned int nr_pages = 0, i;
2131         struct page *page;
2132
2133         for (i = 0; i < found_pages; i++) {
2134                 page = wdata->pages[i];
2135                 /*
2136                  * At this point we hold neither the i_pages lock nor the
2137                  * page lock: the page may be truncated or invalidated
2138                  * (changing page->mapping to NULL), or even swizzled
2139                  * back from swapper_space to tmpfs file mapping
2140                  */
2141
2142                 if (nr_pages == 0)
2143                         lock_page(page);
2144                 else if (!trylock_page(page))
2145                         break;
2146
2147                 if (unlikely(page->mapping != mapping)) {
2148                         unlock_page(page);
2149                         break;
2150                 }
2151
2152                 if (!wbc->range_cyclic && page->index > end) {
2153                         *done = true;
2154                         unlock_page(page);
2155                         break;
2156                 }
2157
2158                 if (*next && (page->index != *next)) {
2159                         /* Not next consecutive page */
2160                         unlock_page(page);
2161                         break;
2162                 }
2163
2164                 if (wbc->sync_mode != WB_SYNC_NONE)
2165                         wait_on_page_writeback(page);
2166
2167                 if (PageWriteback(page) ||
2168                                 !clear_page_dirty_for_io(page)) {
2169                         unlock_page(page);
2170                         break;
2171                 }
2172
2173                 /*
2174                  * This actually clears the dirty bit in the radix tree.
2175                  * See cifs_writepage() for more commentary.
2176                  */
2177                 set_page_writeback(page);
2178                 if (page_offset(page) >= i_size_read(mapping->host)) {
2179                         *done = true;
2180                         unlock_page(page);
2181                         end_page_writeback(page);
2182                         break;
2183                 }
2184
2185                 wdata->pages[i] = page;
2186                 *next = page->index + 1;
2187                 ++nr_pages;
2188         }
2189
2190         /* reset index to refind any pages skipped */
2191         if (nr_pages == 0)
2192                 *index = wdata->pages[0]->index + 1;
2193
2194         /* put any pages we aren't going to use */
2195         for (i = nr_pages; i < found_pages; i++) {
2196                 put_page(wdata->pages[i]);
2197                 wdata->pages[i] = NULL;
2198         }
2199
2200         return nr_pages;
2201 }
2202
2203 static int
2204 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2205                  struct address_space *mapping, struct writeback_control *wbc)
2206 {
2207         int rc;
2208         struct TCP_Server_Info *server =
2209                                 tlink_tcon(wdata->cfile->tlink)->ses->server;
2210
2211         wdata->sync_mode = wbc->sync_mode;
2212         wdata->nr_pages = nr_pages;
2213         wdata->offset = page_offset(wdata->pages[0]);
2214         wdata->pagesz = PAGE_SIZE;
2215         wdata->tailsz = min(i_size_read(mapping->host) -
2216                         page_offset(wdata->pages[nr_pages - 1]),
2217                         (loff_t)PAGE_SIZE);
2218         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2219         wdata->pid = wdata->cfile->pid;
2220
2221         rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2222         if (rc)
2223                 return rc;
2224
2225         if (wdata->cfile->invalidHandle)
2226                 rc = -EAGAIN;
2227         else
2228                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2229
2230         return rc;
2231 }
2232
2233 static int cifs_writepages(struct address_space *mapping,
2234                            struct writeback_control *wbc)
2235 {
2236         struct inode *inode = mapping->host;
2237         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2238         struct TCP_Server_Info *server;
2239         bool done = false, scanned = false, range_whole = false;
2240         pgoff_t end, index;
2241         struct cifs_writedata *wdata;
2242         struct cifsFileInfo *cfile = NULL;
2243         int rc = 0;
2244         int saved_rc = 0;
2245         unsigned int xid;
2246
2247         /*
2248          * If wsize is smaller than the page cache size, default to writing
2249          * one page at a time via cifs_writepage
2250          */
2251         if (cifs_sb->wsize < PAGE_SIZE)
2252                 return generic_writepages(mapping, wbc);
2253
2254         xid = get_xid();
2255         if (wbc->range_cyclic) {
2256                 index = mapping->writeback_index; /* Start from prev offset */
2257                 end = -1;
2258         } else {
2259                 index = wbc->range_start >> PAGE_SHIFT;
2260                 end = wbc->range_end >> PAGE_SHIFT;
2261                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2262                         range_whole = true;
2263                 scanned = true;
2264         }
2265         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2266 retry:
2267         while (!done && index <= end) {
2268                 unsigned int i, nr_pages, found_pages, wsize;
2269                 pgoff_t next = 0, tofind, saved_index = index;
2270                 struct cifs_credits credits_on_stack;
2271                 struct cifs_credits *credits = &credits_on_stack;
2272                 int get_file_rc = 0;
2273
2274                 if (cfile)
2275                         cifsFileInfo_put(cfile);
2276
2277                 rc = cifs_get_writable_file(CIFS_I(inode), false, &cfile);
2278
2279                 /* in case of an error store it to return later */
2280                 if (rc)
2281                         get_file_rc = rc;
2282
2283                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2284                                                    &wsize, credits);
2285                 if (rc != 0) {
2286                         done = true;
2287                         break;
2288                 }
2289
2290                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2291
2292                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2293                                                   &found_pages);
2294                 if (!wdata) {
2295                         rc = -ENOMEM;
2296                         done = true;
2297                         add_credits_and_wake_if(server, credits, 0);
2298                         break;
2299                 }
2300
2301                 if (found_pages == 0) {
2302                         kref_put(&wdata->refcount, cifs_writedata_release);
2303                         add_credits_and_wake_if(server, credits, 0);
2304                         break;
2305                 }
2306
2307                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2308                                                end, &index, &next, &done);
2309
2310                 /* nothing to write? */
2311                 if (nr_pages == 0) {
2312                         kref_put(&wdata->refcount, cifs_writedata_release);
2313                         add_credits_and_wake_if(server, credits, 0);
2314                         continue;
2315                 }
2316
2317                 wdata->credits = credits_on_stack;
2318                 wdata->cfile = cfile;
2319                 cfile = NULL;
2320
2321                 if (!wdata->cfile) {
2322                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2323                                  get_file_rc);
2324                         if (is_retryable_error(get_file_rc))
2325                                 rc = get_file_rc;
2326                         else
2327                                 rc = -EBADF;
2328                 } else
2329                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2330
2331                 for (i = 0; i < nr_pages; ++i)
2332                         unlock_page(wdata->pages[i]);
2333
2334                 /* send failure -- clean up the mess */
2335                 if (rc != 0) {
2336                         add_credits_and_wake_if(server, &wdata->credits, 0);
2337                         for (i = 0; i < nr_pages; ++i) {
2338                                 if (is_retryable_error(rc))
2339                                         redirty_page_for_writepage(wbc,
2340                                                            wdata->pages[i]);
2341                                 else
2342                                         SetPageError(wdata->pages[i]);
2343                                 end_page_writeback(wdata->pages[i]);
2344                                 put_page(wdata->pages[i]);
2345                         }
2346                         if (!is_retryable_error(rc))
2347                                 mapping_set_error(mapping, rc);
2348                 }
2349                 kref_put(&wdata->refcount, cifs_writedata_release);
2350
2351                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2352                         index = saved_index;
2353                         continue;
2354                 }
2355
2356                 /* Return immediately if we received a signal during writing */
2357                 if (is_interrupt_error(rc)) {
2358                         done = true;
2359                         break;
2360                 }
2361
2362                 if (rc != 0 && saved_rc == 0)
2363                         saved_rc = rc;
2364
2365                 wbc->nr_to_write -= nr_pages;
2366                 if (wbc->nr_to_write <= 0)
2367                         done = true;
2368
2369                 index = next;
2370         }
2371
2372         if (!scanned && !done) {
2373                 /*
2374                  * We hit the last page and there is more work to be done: wrap
2375                  * back to the start of the file
2376                  */
2377                 scanned = true;
2378                 index = 0;
2379                 goto retry;
2380         }
2381
2382         if (saved_rc != 0)
2383                 rc = saved_rc;
2384
2385         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2386                 mapping->writeback_index = index;
2387
2388         if (cfile)
2389                 cifsFileInfo_put(cfile);
2390         free_xid(xid);
2391         return rc;
2392 }
2393
2394 static int
2395 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2396 {
2397         int rc;
2398         unsigned int xid;
2399
2400         xid = get_xid();
2401 /* BB add check for wbc flags */
2402         get_page(page);
2403         if (!PageUptodate(page))
2404                 cifs_dbg(FYI, "ppw - page not up to date\n");
2405
2406         /*
2407          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2408          *
2409          * A writepage() implementation always needs to do either this,
2410          * or re-dirty the page with "redirty_page_for_writepage()" in
2411          * the case of a failure.
2412          *
2413          * Just unlocking the page will cause the radix tree tag-bits
2414          * to fail to update with the state of the page correctly.
2415          */
2416         set_page_writeback(page);
2417 retry_write:
2418         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2419         if (is_retryable_error(rc)) {
2420                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2421                         goto retry_write;
2422                 redirty_page_for_writepage(wbc, page);
2423         } else if (rc != 0) {
2424                 SetPageError(page);
2425                 mapping_set_error(page->mapping, rc);
2426         } else {
2427                 SetPageUptodate(page);
2428         }
2429         end_page_writeback(page);
2430         put_page(page);
2431         free_xid(xid);
2432         return rc;
2433 }
2434
2435 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2436 {
2437         int rc = cifs_writepage_locked(page, wbc);
2438         unlock_page(page);
2439         return rc;
2440 }
2441
2442 static int cifs_write_end(struct file *file, struct address_space *mapping,
2443                         loff_t pos, unsigned len, unsigned copied,
2444                         struct page *page, void *fsdata)
2445 {
2446         int rc;
2447         struct inode *inode = mapping->host;
2448         struct cifsFileInfo *cfile = file->private_data;
2449         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2450         __u32 pid;
2451
2452         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2453                 pid = cfile->pid;
2454         else
2455                 pid = current->tgid;
2456
2457         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2458                  page, pos, copied);
2459
2460         if (PageChecked(page)) {
2461                 if (copied == len)
2462                         SetPageUptodate(page);
2463                 ClearPageChecked(page);
2464         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2465                 SetPageUptodate(page);
2466
2467         if (!PageUptodate(page)) {
2468                 char *page_data;
2469                 unsigned offset = pos & (PAGE_SIZE - 1);
2470                 unsigned int xid;
2471
2472                 xid = get_xid();
2473                 /* this is probably better than directly calling
2474                    partialpage_write since in this function the file handle is
2475                    known which we might as well leverage */
2476                 /* BB check if anything else missing out of ppw
2477                    such as updating last write time */
2478                 page_data = kmap(page);
2479                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2480                 /* if (rc < 0) should we set writebehind rc? */
2481                 kunmap(page);
2482
2483                 free_xid(xid);
2484         } else {
2485                 rc = copied;
2486                 pos += copied;
2487                 set_page_dirty(page);
2488         }
2489
2490         if (rc > 0) {
2491                 spin_lock(&inode->i_lock);
2492                 if (pos > inode->i_size)
2493                         i_size_write(inode, pos);
2494                 spin_unlock(&inode->i_lock);
2495         }
2496
2497         unlock_page(page);
2498         put_page(page);
2499
2500         return rc;
2501 }
2502
2503 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2504                       int datasync)
2505 {
2506         unsigned int xid;
2507         int rc = 0;
2508         struct cifs_tcon *tcon;
2509         struct TCP_Server_Info *server;
2510         struct cifsFileInfo *smbfile = file->private_data;
2511         struct inode *inode = file_inode(file);
2512         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2513
2514         rc = file_write_and_wait_range(file, start, end);
2515         if (rc)
2516                 return rc;
2517
2518         xid = get_xid();
2519
2520         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2521                  file, datasync);
2522
2523         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2524                 rc = cifs_zap_mapping(inode);
2525                 if (rc) {
2526                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2527                         rc = 0; /* don't care about it in fsync */
2528                 }
2529         }
2530
2531         tcon = tlink_tcon(smbfile->tlink);
2532         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2533                 server = tcon->ses->server;
2534                 if (server->ops->flush)
2535                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2536                 else
2537                         rc = -ENOSYS;
2538         }
2539
2540         free_xid(xid);
2541         return rc;
2542 }
2543
2544 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2545 {
2546         unsigned int xid;
2547         int rc = 0;
2548         struct cifs_tcon *tcon;
2549         struct TCP_Server_Info *server;
2550         struct cifsFileInfo *smbfile = file->private_data;
2551         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2552
2553         rc = file_write_and_wait_range(file, start, end);
2554         if (rc)
2555                 return rc;
2556
2557         xid = get_xid();
2558
2559         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2560                  file, datasync);
2561
2562         tcon = tlink_tcon(smbfile->tlink);
2563         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2564                 server = tcon->ses->server;
2565                 if (server->ops->flush)
2566                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2567                 else
2568                         rc = -ENOSYS;
2569         }
2570
2571         free_xid(xid);
2572         return rc;
2573 }
2574
2575 /*
2576  * As file closes, flush all cached write data for this inode checking
2577  * for write behind errors.
2578  */
2579 int cifs_flush(struct file *file, fl_owner_t id)
2580 {
2581         struct inode *inode = file_inode(file);
2582         int rc = 0;
2583
2584         if (file->f_mode & FMODE_WRITE)
2585                 rc = filemap_write_and_wait(inode->i_mapping);
2586
2587         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2588
2589         return rc;
2590 }
2591
2592 static int
2593 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2594 {
2595         int rc = 0;
2596         unsigned long i;
2597
2598         for (i = 0; i < num_pages; i++) {
2599                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2600                 if (!pages[i]) {
2601                         /*
2602                          * save number of pages we have already allocated and
2603                          * return with ENOMEM error
2604                          */
2605                         num_pages = i;
2606                         rc = -ENOMEM;
2607                         break;
2608                 }
2609         }
2610
2611         if (rc) {
2612                 for (i = 0; i < num_pages; i++)
2613                         put_page(pages[i]);
2614         }
2615         return rc;
2616 }
2617
2618 static inline
2619 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2620 {
2621         size_t num_pages;
2622         size_t clen;
2623
2624         clen = min_t(const size_t, len, wsize);
2625         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2626
2627         if (cur_len)
2628                 *cur_len = clen;
2629
2630         return num_pages;
2631 }
2632
2633 static void
2634 cifs_uncached_writedata_release(struct kref *refcount)
2635 {
2636         int i;
2637         struct cifs_writedata *wdata = container_of(refcount,
2638                                         struct cifs_writedata, refcount);
2639
2640         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2641         for (i = 0; i < wdata->nr_pages; i++)
2642                 put_page(wdata->pages[i]);
2643         cifs_writedata_release(refcount);
2644 }
2645
2646 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2647
2648 static void
2649 cifs_uncached_writev_complete(struct work_struct *work)
2650 {
2651         struct cifs_writedata *wdata = container_of(work,
2652                                         struct cifs_writedata, work);
2653         struct inode *inode = d_inode(wdata->cfile->dentry);
2654         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2655
2656         spin_lock(&inode->i_lock);
2657         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2658         if (cifsi->server_eof > inode->i_size)
2659                 i_size_write(inode, cifsi->server_eof);
2660         spin_unlock(&inode->i_lock);
2661
2662         complete(&wdata->done);
2663         collect_uncached_write_data(wdata->ctx);
2664         /* the below call can possibly free the last ref to aio ctx */
2665         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2666 }
2667
2668 static int
2669 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2670                       size_t *len, unsigned long *num_pages)
2671 {
2672         size_t save_len, copied, bytes, cur_len = *len;
2673         unsigned long i, nr_pages = *num_pages;
2674
2675         save_len = cur_len;
2676         for (i = 0; i < nr_pages; i++) {
2677                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2678                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2679                 cur_len -= copied;
2680                 /*
2681                  * If we didn't copy as much as we expected, then that
2682                  * may mean we trod into an unmapped area. Stop copying
2683                  * at that point. On the next pass through the big
2684                  * loop, we'll likely end up getting a zero-length
2685                  * write and bailing out of it.
2686                  */
2687                 if (copied < bytes)
2688                         break;
2689         }
2690         cur_len = save_len - cur_len;
2691         *len = cur_len;
2692
2693         /*
2694          * If we have no data to send, then that probably means that
2695          * the copy above failed altogether. That's most likely because
2696          * the address in the iovec was bogus. Return -EFAULT and let
2697          * the caller free anything we allocated and bail out.
2698          */
2699         if (!cur_len)
2700                 return -EFAULT;
2701
2702         /*
2703          * i + 1 now represents the number of pages we actually used in
2704          * the copy phase above.
2705          */
2706         *num_pages = i + 1;
2707         return 0;
2708 }
2709
2710 static int
2711 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2712         struct cifs_aio_ctx *ctx)
2713 {
2714         unsigned int wsize;
2715         struct cifs_credits credits;
2716         int rc;
2717         struct TCP_Server_Info *server =
2718                 tlink_tcon(wdata->cfile->tlink)->ses->server;
2719
2720         do {
2721                 if (wdata->cfile->invalidHandle) {
2722                         rc = cifs_reopen_file(wdata->cfile, false);
2723                         if (rc == -EAGAIN)
2724                                 continue;
2725                         else if (rc)
2726                                 break;
2727                 }
2728
2729
2730                 /*
2731                  * Wait for credits to resend this wdata.
2732                  * Note: we are attempting to resend the whole wdata not in
2733                  * segments
2734                  */
2735                 do {
2736                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2737                                                 &wsize, &credits);
2738                         if (rc)
2739                                 goto fail;
2740
2741                         if (wsize < wdata->bytes) {
2742                                 add_credits_and_wake_if(server, &credits, 0);
2743                                 msleep(1000);
2744                         }
2745                 } while (wsize < wdata->bytes);
2746                 wdata->credits = credits;
2747
2748                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2749
2750                 if (!rc) {
2751                         if (wdata->cfile->invalidHandle)
2752                                 rc = -EAGAIN;
2753                         else
2754                                 rc = server->ops->async_writev(wdata,
2755                                         cifs_uncached_writedata_release);
2756                 }
2757
2758                 /* If the write was successfully sent, we are done */
2759                 if (!rc) {
2760                         list_add_tail(&wdata->list, wdata_list);
2761                         return 0;
2762                 }
2763
2764                 /* Roll back credits and retry if needed */
2765                 add_credits_and_wake_if(server, &wdata->credits, 0);
2766         } while (rc == -EAGAIN);
2767
2768 fail:
2769         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2770         return rc;
2771 }
2772
2773 static int
2774 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2775                      struct cifsFileInfo *open_file,
2776                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2777                      struct cifs_aio_ctx *ctx)
2778 {
2779         int rc = 0;
2780         size_t cur_len;
2781         unsigned long nr_pages, num_pages, i;
2782         struct cifs_writedata *wdata;
2783         struct iov_iter saved_from = *from;
2784         loff_t saved_offset = offset;
2785         pid_t pid;
2786         struct TCP_Server_Info *server;
2787         struct page **pagevec;
2788         size_t start;
2789         unsigned int xid;
2790
2791         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2792                 pid = open_file->pid;
2793         else
2794                 pid = current->tgid;
2795
2796         server = tlink_tcon(open_file->tlink)->ses->server;
2797         xid = get_xid();
2798
2799         do {
2800                 unsigned int wsize;
2801                 struct cifs_credits credits_on_stack;
2802                 struct cifs_credits *credits = &credits_on_stack;
2803
2804                 if (open_file->invalidHandle) {
2805                         rc = cifs_reopen_file(open_file, false);
2806                         if (rc == -EAGAIN)
2807                                 continue;
2808                         else if (rc)
2809                                 break;
2810                 }
2811
2812                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2813                                                    &wsize, credits);
2814                 if (rc)
2815                         break;
2816
2817                 cur_len = min_t(const size_t, len, wsize);
2818
2819                 if (ctx->direct_io) {
2820                         ssize_t result;
2821
2822                         result = iov_iter_get_pages_alloc(
2823                                 from, &pagevec, cur_len, &start);
2824                         if (result < 0) {
2825                                 cifs_dbg(VFS,
2826                                         "direct_writev couldn't get user pages "
2827                                         "(rc=%zd) iter type %d iov_offset %zd "
2828                                         "count %zd\n",
2829                                         result, from->type,
2830                                         from->iov_offset, from->count);
2831                                 dump_stack();
2832
2833                                 rc = result;
2834                                 add_credits_and_wake_if(server, credits, 0);
2835                                 break;
2836                         }
2837                         cur_len = (size_t)result;
2838                         iov_iter_advance(from, cur_len);
2839
2840                         nr_pages =
2841                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2842
2843                         wdata = cifs_writedata_direct_alloc(pagevec,
2844                                              cifs_uncached_writev_complete);
2845                         if (!wdata) {
2846                                 rc = -ENOMEM;
2847                                 add_credits_and_wake_if(server, credits, 0);
2848                                 break;
2849                         }
2850
2851
2852                         wdata->page_offset = start;
2853                         wdata->tailsz =
2854                                 nr_pages > 1 ?
2855                                         cur_len - (PAGE_SIZE - start) -
2856                                         (nr_pages - 2) * PAGE_SIZE :
2857                                         cur_len;
2858                 } else {
2859                         nr_pages = get_numpages(wsize, len, &cur_len);
2860                         wdata = cifs_writedata_alloc(nr_pages,
2861                                              cifs_uncached_writev_complete);
2862                         if (!wdata) {
2863                                 rc = -ENOMEM;
2864                                 add_credits_and_wake_if(server, credits, 0);
2865                                 break;
2866                         }
2867
2868                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2869                         if (rc) {
2870                                 kvfree(wdata->pages);
2871                                 kfree(wdata);
2872                                 add_credits_and_wake_if(server, credits, 0);
2873                                 break;
2874                         }
2875
2876                         num_pages = nr_pages;
2877                         rc = wdata_fill_from_iovec(
2878                                 wdata, from, &cur_len, &num_pages);
2879                         if (rc) {
2880                                 for (i = 0; i < nr_pages; i++)
2881                                         put_page(wdata->pages[i]);
2882                                 kvfree(wdata->pages);
2883                                 kfree(wdata);
2884                                 add_credits_and_wake_if(server, credits, 0);
2885                                 break;
2886                         }
2887
2888                         /*
2889                          * Bring nr_pages down to the number of pages we
2890                          * actually used, and free any pages that we didn't use.
2891                          */
2892                         for ( ; nr_pages > num_pages; nr_pages--)
2893                                 put_page(wdata->pages[nr_pages - 1]);
2894
2895                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2896                 }
2897
2898                 wdata->sync_mode = WB_SYNC_ALL;
2899                 wdata->nr_pages = nr_pages;
2900                 wdata->offset = (__u64)offset;
2901                 wdata->cfile = cifsFileInfo_get(open_file);
2902                 wdata->pid = pid;
2903                 wdata->bytes = cur_len;
2904                 wdata->pagesz = PAGE_SIZE;
2905                 wdata->credits = credits_on_stack;
2906                 wdata->ctx = ctx;
2907                 kref_get(&ctx->refcount);
2908
2909                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2910
2911                 if (!rc) {
2912                         if (wdata->cfile->invalidHandle)
2913                                 rc = -EAGAIN;
2914                         else
2915                                 rc = server->ops->async_writev(wdata,
2916                                         cifs_uncached_writedata_release);
2917                 }
2918
2919                 if (rc) {
2920                         add_credits_and_wake_if(server, &wdata->credits, 0);
2921                         kref_put(&wdata->refcount,
2922                                  cifs_uncached_writedata_release);
2923                         if (rc == -EAGAIN) {
2924                                 *from = saved_from;
2925                                 iov_iter_advance(from, offset - saved_offset);
2926                                 continue;
2927                         }
2928                         break;
2929                 }
2930
2931                 list_add_tail(&wdata->list, wdata_list);
2932                 offset += cur_len;
2933                 len -= cur_len;
2934         } while (len > 0);
2935
2936         free_xid(xid);
2937         return rc;
2938 }
2939
2940 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2941 {
2942         struct cifs_writedata *wdata, *tmp;
2943         struct cifs_tcon *tcon;
2944         struct cifs_sb_info *cifs_sb;
2945         struct dentry *dentry = ctx->cfile->dentry;
2946         int rc;
2947
2948         tcon = tlink_tcon(ctx->cfile->tlink);
2949         cifs_sb = CIFS_SB(dentry->d_sb);
2950
2951         mutex_lock(&ctx->aio_mutex);
2952
2953         if (list_empty(&ctx->list)) {
2954                 mutex_unlock(&ctx->aio_mutex);
2955                 return;
2956         }
2957
2958         rc = ctx->rc;
2959         /*
2960          * Wait for and collect replies for any successful sends in order of
2961          * increasing offset. Once an error is hit, then return without waiting
2962          * for any more replies.
2963          */
2964 restart_loop:
2965         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2966                 if (!rc) {
2967                         if (!try_wait_for_completion(&wdata->done)) {
2968                                 mutex_unlock(&ctx->aio_mutex);
2969                                 return;
2970                         }
2971
2972                         if (wdata->result)
2973                                 rc = wdata->result;
2974                         else
2975                                 ctx->total_len += wdata->bytes;
2976
2977                         /* resend call if it's a retryable error */
2978                         if (rc == -EAGAIN) {
2979                                 struct list_head tmp_list;
2980                                 struct iov_iter tmp_from = ctx->iter;
2981
2982                                 INIT_LIST_HEAD(&tmp_list);
2983                                 list_del_init(&wdata->list);
2984
2985                                 if (ctx->direct_io)
2986                                         rc = cifs_resend_wdata(
2987                                                 wdata, &tmp_list, ctx);
2988                                 else {
2989                                         iov_iter_advance(&tmp_from,
2990                                                  wdata->offset - ctx->pos);
2991
2992                                         rc = cifs_write_from_iter(wdata->offset,
2993                                                 wdata->bytes, &tmp_from,
2994                                                 ctx->cfile, cifs_sb, &tmp_list,
2995                                                 ctx);
2996
2997                                         kref_put(&wdata->refcount,
2998                                                 cifs_uncached_writedata_release);
2999                                 }
3000
3001                                 list_splice(&tmp_list, &ctx->list);
3002                                 goto restart_loop;
3003                         }
3004                 }
3005                 list_del_init(&wdata->list);
3006                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3007         }
3008
3009         cifs_stats_bytes_written(tcon, ctx->total_len);
3010         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3011
3012         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3013
3014         mutex_unlock(&ctx->aio_mutex);
3015
3016         if (ctx->iocb && ctx->iocb->ki_complete)
3017                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3018         else
3019                 complete(&ctx->done);
3020 }
3021
3022 static ssize_t __cifs_writev(
3023         struct kiocb *iocb, struct iov_iter *from, bool direct)
3024 {
3025         struct file *file = iocb->ki_filp;
3026         ssize_t total_written = 0;
3027         struct cifsFileInfo *cfile;
3028         struct cifs_tcon *tcon;
3029         struct cifs_sb_info *cifs_sb;
3030         struct cifs_aio_ctx *ctx;
3031         struct iov_iter saved_from = *from;
3032         size_t len = iov_iter_count(from);
3033         int rc;
3034
3035         /*
3036          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3037          * In this case, fall back to non-direct write function.
3038          * this could be improved by getting pages directly in ITER_KVEC
3039          */
3040         if (direct && from->type & ITER_KVEC) {
3041                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3042                 direct = false;
3043         }
3044
3045         rc = generic_write_checks(iocb, from);
3046         if (rc <= 0)
3047                 return rc;
3048
3049         cifs_sb = CIFS_FILE_SB(file);
3050         cfile = file->private_data;
3051         tcon = tlink_tcon(cfile->tlink);
3052
3053         if (!tcon->ses->server->ops->async_writev)
3054                 return -ENOSYS;
3055
3056         ctx = cifs_aio_ctx_alloc();
3057         if (!ctx)
3058                 return -ENOMEM;
3059
3060         ctx->cfile = cifsFileInfo_get(cfile);
3061
3062         if (!is_sync_kiocb(iocb))
3063                 ctx->iocb = iocb;
3064
3065         ctx->pos = iocb->ki_pos;
3066
3067         if (direct) {
3068                 ctx->direct_io = true;
3069                 ctx->iter = *from;
3070                 ctx->len = len;
3071         } else {
3072                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3073                 if (rc) {
3074                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3075                         return rc;
3076                 }
3077         }
3078
3079         /* grab a lock here due to read response handlers can access ctx */
3080         mutex_lock(&ctx->aio_mutex);
3081
3082         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3083                                   cfile, cifs_sb, &ctx->list, ctx);
3084
3085         /*
3086          * If at least one write was successfully sent, then discard any rc
3087          * value from the later writes. If the other write succeeds, then
3088          * we'll end up returning whatever was written. If it fails, then
3089          * we'll get a new rc value from that.
3090          */
3091         if (!list_empty(&ctx->list))
3092                 rc = 0;
3093
3094         mutex_unlock(&ctx->aio_mutex);
3095
3096         if (rc) {
3097                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3098                 return rc;
3099         }
3100
3101         if (!is_sync_kiocb(iocb)) {
3102                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3103                 return -EIOCBQUEUED;
3104         }
3105
3106         rc = wait_for_completion_killable(&ctx->done);
3107         if (rc) {
3108                 mutex_lock(&ctx->aio_mutex);
3109                 ctx->rc = rc = -EINTR;
3110                 total_written = ctx->total_len;
3111                 mutex_unlock(&ctx->aio_mutex);
3112         } else {
3113                 rc = ctx->rc;
3114                 total_written = ctx->total_len;
3115         }
3116
3117         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3118
3119         if (unlikely(!total_written))
3120                 return rc;
3121
3122         iocb->ki_pos += total_written;
3123         return total_written;
3124 }
3125
3126 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3127 {
3128         return __cifs_writev(iocb, from, true);
3129 }
3130
3131 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3132 {
3133         return __cifs_writev(iocb, from, false);
3134 }
3135
3136 static ssize_t
3137 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3138 {
3139         struct file *file = iocb->ki_filp;
3140         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3141         struct inode *inode = file->f_mapping->host;
3142         struct cifsInodeInfo *cinode = CIFS_I(inode);
3143         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3144         ssize_t rc;
3145
3146         inode_lock(inode);
3147         /*
3148          * We need to hold the sem to be sure nobody modifies lock list
3149          * with a brlock that prevents writing.
3150          */
3151         down_read(&cinode->lock_sem);
3152
3153         rc = generic_write_checks(iocb, from);
3154         if (rc <= 0)
3155                 goto out;
3156
3157         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3158                                      server->vals->exclusive_lock_type, 0,
3159                                      NULL, CIFS_WRITE_OP))
3160                 rc = __generic_file_write_iter(iocb, from);
3161         else
3162                 rc = -EACCES;
3163 out:
3164         up_read(&cinode->lock_sem);
3165         inode_unlock(inode);
3166
3167         if (rc > 0)
3168                 rc = generic_write_sync(iocb, rc);
3169         return rc;
3170 }
3171
3172 ssize_t
3173 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3174 {
3175         struct inode *inode = file_inode(iocb->ki_filp);
3176         struct cifsInodeInfo *cinode = CIFS_I(inode);
3177         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3178         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3179                                                 iocb->ki_filp->private_data;
3180         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3181         ssize_t written;
3182
3183         written = cifs_get_writer(cinode);
3184         if (written)
3185                 return written;
3186
3187         if (CIFS_CACHE_WRITE(cinode)) {
3188                 if (cap_unix(tcon->ses) &&
3189                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3190                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3191                         written = generic_file_write_iter(iocb, from);
3192                         goto out;
3193                 }
3194                 written = cifs_writev(iocb, from);
3195                 goto out;
3196         }
3197         /*
3198          * For non-oplocked files in strict cache mode we need to write the data
3199          * to the server exactly from the pos to pos+len-1 rather than flush all
3200          * affected pages because it may cause a error with mandatory locks on
3201          * these pages but not on the region from pos to ppos+len-1.
3202          */
3203         written = cifs_user_writev(iocb, from);
3204         if (CIFS_CACHE_READ(cinode)) {
3205                 /*
3206                  * We have read level caching and we have just sent a write
3207                  * request to the server thus making data in the cache stale.
3208                  * Zap the cache and set oplock/lease level to NONE to avoid
3209                  * reading stale data from the cache. All subsequent read
3210                  * operations will read new data from the server.
3211                  */
3212                 cifs_zap_mapping(inode);
3213                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3214                          inode);
3215                 cinode->oplock = 0;
3216         }
3217 out:
3218         cifs_put_writer(cinode);
3219         return written;
3220 }
3221
3222 static struct cifs_readdata *
3223 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3224 {
3225         struct cifs_readdata *rdata;
3226
3227         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3228         if (rdata != NULL) {
3229                 rdata->pages = pages;
3230                 kref_init(&rdata->refcount);
3231                 INIT_LIST_HEAD(&rdata->list);
3232                 init_completion(&rdata->done);
3233                 INIT_WORK(&rdata->work, complete);
3234         }
3235
3236         return rdata;
3237 }
3238
3239 static struct cifs_readdata *
3240 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3241 {
3242         struct page **pages =
3243                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3244         struct cifs_readdata *ret = NULL;
3245
3246         if (pages) {
3247                 ret = cifs_readdata_direct_alloc(pages, complete);
3248                 if (!ret)
3249                         kfree(pages);
3250         }
3251
3252         return ret;
3253 }
3254
3255 void
3256 cifs_readdata_release(struct kref *refcount)
3257 {
3258         struct cifs_readdata *rdata = container_of(refcount,
3259                                         struct cifs_readdata, refcount);
3260 #ifdef CONFIG_CIFS_SMB_DIRECT
3261         if (rdata->mr) {
3262                 smbd_deregister_mr(rdata->mr);
3263                 rdata->mr = NULL;
3264         }
3265 #endif
3266         if (rdata->cfile)
3267                 cifsFileInfo_put(rdata->cfile);
3268
3269         kvfree(rdata->pages);
3270         kfree(rdata);
3271 }
3272
3273 static int
3274 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3275 {
3276         int rc = 0;
3277         struct page *page;
3278         unsigned int i;
3279
3280         for (i = 0; i < nr_pages; i++) {
3281                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3282                 if (!page) {
3283                         rc = -ENOMEM;
3284                         break;
3285                 }
3286                 rdata->pages[i] = page;
3287         }
3288
3289         if (rc) {
3290                 unsigned int nr_page_failed = i;
3291
3292                 for (i = 0; i < nr_page_failed; i++) {
3293                         put_page(rdata->pages[i]);
3294                         rdata->pages[i] = NULL;
3295                 }
3296         }
3297         return rc;
3298 }
3299
3300 static void
3301 cifs_uncached_readdata_release(struct kref *refcount)
3302 {
3303         struct cifs_readdata *rdata = container_of(refcount,
3304                                         struct cifs_readdata, refcount);
3305         unsigned int i;
3306
3307         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3308         for (i = 0; i < rdata->nr_pages; i++) {
3309                 put_page(rdata->pages[i]);
3310         }
3311         cifs_readdata_release(refcount);
3312 }
3313
3314 /**
3315  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3316  * @rdata:      the readdata response with list of pages holding data
3317  * @iter:       destination for our data
3318  *
3319  * This function copies data from a list of pages in a readdata response into
3320  * an array of iovecs. It will first calculate where the data should go
3321  * based on the info in the readdata and then copy the data into that spot.
3322  */
3323 static int
3324 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3325 {
3326         size_t remaining = rdata->got_bytes;
3327         unsigned int i;
3328
3329         for (i = 0; i < rdata->nr_pages; i++) {
3330                 struct page *page = rdata->pages[i];
3331                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3332                 size_t written;
3333
3334                 if (unlikely(iov_iter_is_pipe(iter))) {
3335                         void *addr = kmap_atomic(page);
3336
3337                         written = copy_to_iter(addr, copy, iter);
3338                         kunmap_atomic(addr);
3339                 } else
3340                         written = copy_page_to_iter(page, 0, copy, iter);
3341                 remaining -= written;
3342                 if (written < copy && iov_iter_count(iter) > 0)
3343                         break;
3344         }
3345         return remaining ? -EFAULT : 0;
3346 }
3347
3348 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3349
3350 static void
3351 cifs_uncached_readv_complete(struct work_struct *work)
3352 {
3353         struct cifs_readdata *rdata = container_of(work,
3354                                                 struct cifs_readdata, work);
3355
3356         complete(&rdata->done);
3357         collect_uncached_read_data(rdata->ctx);
3358         /* the below call can possibly free the last ref to aio ctx */
3359         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3360 }
3361
3362 static int
3363 uncached_fill_pages(struct TCP_Server_Info *server,
3364                     struct cifs_readdata *rdata, struct iov_iter *iter,
3365                     unsigned int len)
3366 {
3367         int result = 0;
3368         unsigned int i;
3369         unsigned int nr_pages = rdata->nr_pages;
3370         unsigned int page_offset = rdata->page_offset;
3371
3372         rdata->got_bytes = 0;
3373         rdata->tailsz = PAGE_SIZE;
3374         for (i = 0; i < nr_pages; i++) {
3375                 struct page *page = rdata->pages[i];
3376                 size_t n;
3377                 unsigned int segment_size = rdata->pagesz;
3378
3379                 if (i == 0)
3380                         segment_size -= page_offset;
3381                 else
3382                         page_offset = 0;
3383
3384
3385                 if (len <= 0) {
3386                         /* no need to hold page hostage */
3387                         rdata->pages[i] = NULL;
3388                         rdata->nr_pages--;
3389                         put_page(page);
3390                         continue;
3391                 }
3392
3393                 n = len;
3394                 if (len >= segment_size)
3395                         /* enough data to fill the page */
3396                         n = segment_size;
3397                 else
3398                         rdata->tailsz = len;
3399                 len -= n;
3400
3401                 if (iter)
3402                         result = copy_page_from_iter(
3403                                         page, page_offset, n, iter);
3404 #ifdef CONFIG_CIFS_SMB_DIRECT
3405                 else if (rdata->mr)
3406                         result = n;
3407 #endif
3408                 else
3409                         result = cifs_read_page_from_socket(
3410                                         server, page, page_offset, n);
3411                 if (result < 0)
3412                         break;
3413
3414                 rdata->got_bytes += result;
3415         }
3416
3417         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3418                                                 rdata->got_bytes : result;
3419 }
3420
3421 static int
3422 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3423                               struct cifs_readdata *rdata, unsigned int len)
3424 {
3425         return uncached_fill_pages(server, rdata, NULL, len);
3426 }
3427
3428 static int
3429 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3430                               struct cifs_readdata *rdata,
3431                               struct iov_iter *iter)
3432 {
3433         return uncached_fill_pages(server, rdata, iter, iter->count);
3434 }
3435
3436 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3437                         struct list_head *rdata_list,
3438                         struct cifs_aio_ctx *ctx)
3439 {
3440         unsigned int rsize;
3441         struct cifs_credits credits;
3442         int rc;
3443         struct TCP_Server_Info *server =
3444                 tlink_tcon(rdata->cfile->tlink)->ses->server;
3445
3446         do {
3447                 if (rdata->cfile->invalidHandle) {
3448                         rc = cifs_reopen_file(rdata->cfile, true);
3449                         if (rc == -EAGAIN)
3450                                 continue;
3451                         else if (rc)
3452                                 break;
3453                 }
3454
3455                 /*
3456                  * Wait for credits to resend this rdata.
3457                  * Note: we are attempting to resend the whole rdata not in
3458                  * segments
3459                  */
3460                 do {
3461                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3462                                                 &rsize, &credits);
3463
3464                         if (rc)
3465                                 goto fail;
3466
3467                         if (rsize < rdata->bytes) {
3468                                 add_credits_and_wake_if(server, &credits, 0);
3469                                 msleep(1000);
3470                         }
3471                 } while (rsize < rdata->bytes);
3472                 rdata->credits = credits;
3473
3474                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3475                 if (!rc) {
3476                         if (rdata->cfile->invalidHandle)
3477                                 rc = -EAGAIN;
3478                         else
3479                                 rc = server->ops->async_readv(rdata);
3480                 }
3481
3482                 /* If the read was successfully sent, we are done */
3483                 if (!rc) {
3484                         /* Add to aio pending list */
3485                         list_add_tail(&rdata->list, rdata_list);
3486                         return 0;
3487                 }
3488
3489                 /* Roll back credits and retry if needed */
3490                 add_credits_and_wake_if(server, &rdata->credits, 0);
3491         } while (rc == -EAGAIN);
3492
3493 fail:
3494         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3495         return rc;
3496 }
3497
3498 static int
3499 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3500                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3501                      struct cifs_aio_ctx *ctx)
3502 {
3503         struct cifs_readdata *rdata;
3504         unsigned int npages, rsize;
3505         struct cifs_credits credits_on_stack;
3506         struct cifs_credits *credits = &credits_on_stack;
3507         size_t cur_len;
3508         int rc;
3509         pid_t pid;
3510         struct TCP_Server_Info *server;
3511         struct page **pagevec;
3512         size_t start;
3513         struct iov_iter direct_iov = ctx->iter;
3514
3515         server = tlink_tcon(open_file->tlink)->ses->server;
3516
3517         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3518                 pid = open_file->pid;
3519         else
3520                 pid = current->tgid;
3521
3522         if (ctx->direct_io)
3523                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3524
3525         do {
3526                 if (open_file->invalidHandle) {
3527                         rc = cifs_reopen_file(open_file, true);
3528                         if (rc == -EAGAIN)
3529                                 continue;
3530                         else if (rc)
3531                                 break;
3532                 }
3533
3534                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3535                                                    &rsize, credits);
3536                 if (rc)
3537                         break;
3538
3539                 cur_len = min_t(const size_t, len, rsize);
3540
3541                 if (ctx->direct_io) {
3542                         ssize_t result;
3543
3544                         result = iov_iter_get_pages_alloc(
3545                                         &direct_iov, &pagevec,
3546                                         cur_len, &start);
3547                         if (result < 0) {
3548                                 cifs_dbg(VFS,
3549                                         "couldn't get user pages (rc=%zd)"
3550                                         " iter type %d"
3551                                         " iov_offset %zd count %zd\n",
3552                                         result, direct_iov.type,
3553                                         direct_iov.iov_offset,
3554                                         direct_iov.count);
3555                                 dump_stack();
3556
3557                                 rc = result;
3558                                 add_credits_and_wake_if(server, credits, 0);
3559                                 break;
3560                         }
3561                         cur_len = (size_t)result;
3562                         iov_iter_advance(&direct_iov, cur_len);
3563
3564                         rdata = cifs_readdata_direct_alloc(
3565                                         pagevec, cifs_uncached_readv_complete);
3566                         if (!rdata) {
3567                                 add_credits_and_wake_if(server, credits, 0);
3568                                 rc = -ENOMEM;
3569                                 break;
3570                         }
3571
3572                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3573                         rdata->page_offset = start;
3574                         rdata->tailsz = npages > 1 ?
3575                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3576                                 cur_len;
3577
3578                 } else {
3579
3580                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3581                         /* allocate a readdata struct */
3582                         rdata = cifs_readdata_alloc(npages,
3583                                             cifs_uncached_readv_complete);
3584                         if (!rdata) {
3585                                 add_credits_and_wake_if(server, credits, 0);
3586                                 rc = -ENOMEM;
3587                                 break;
3588                         }
3589
3590                         rc = cifs_read_allocate_pages(rdata, npages);
3591                         if (rc) {
3592                                 kvfree(rdata->pages);
3593                                 kfree(rdata);
3594                                 add_credits_and_wake_if(server, credits, 0);
3595                                 break;
3596                         }
3597
3598                         rdata->tailsz = PAGE_SIZE;
3599                 }
3600
3601                 rdata->cfile = cifsFileInfo_get(open_file);
3602                 rdata->nr_pages = npages;
3603                 rdata->offset = offset;
3604                 rdata->bytes = cur_len;
3605                 rdata->pid = pid;
3606                 rdata->pagesz = PAGE_SIZE;
3607                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3608                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3609                 rdata->credits = credits_on_stack;
3610                 rdata->ctx = ctx;
3611                 kref_get(&ctx->refcount);
3612
3613                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3614
3615                 if (!rc) {
3616                         if (rdata->cfile->invalidHandle)
3617                                 rc = -EAGAIN;
3618                         else
3619                                 rc = server->ops->async_readv(rdata);
3620                 }
3621
3622                 if (rc) {
3623                         add_credits_and_wake_if(server, &rdata->credits, 0);
3624                         kref_put(&rdata->refcount,
3625                                 cifs_uncached_readdata_release);
3626                         if (rc == -EAGAIN) {
3627                                 iov_iter_revert(&direct_iov, cur_len);
3628                                 continue;
3629                         }
3630                         break;
3631                 }
3632
3633                 list_add_tail(&rdata->list, rdata_list);
3634                 offset += cur_len;
3635                 len -= cur_len;
3636         } while (len > 0);
3637
3638         return rc;
3639 }
3640
3641 static void
3642 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3643 {
3644         struct cifs_readdata *rdata, *tmp;
3645         struct iov_iter *to = &ctx->iter;
3646         struct cifs_sb_info *cifs_sb;
3647         int rc;
3648
3649         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3650
3651         mutex_lock(&ctx->aio_mutex);
3652
3653         if (list_empty(&ctx->list)) {
3654                 mutex_unlock(&ctx->aio_mutex);
3655                 return;
3656         }
3657
3658         rc = ctx->rc;
3659         /* the loop below should proceed in the order of increasing offsets */
3660 again:
3661         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3662                 if (!rc) {
3663                         if (!try_wait_for_completion(&rdata->done)) {
3664                                 mutex_unlock(&ctx->aio_mutex);
3665                                 return;
3666                         }
3667
3668                         if (rdata->result == -EAGAIN) {
3669                                 /* resend call if it's a retryable error */
3670                                 struct list_head tmp_list;
3671                                 unsigned int got_bytes = rdata->got_bytes;
3672
3673                                 list_del_init(&rdata->list);
3674                                 INIT_LIST_HEAD(&tmp_list);
3675
3676                                 /*
3677                                  * Got a part of data and then reconnect has
3678                                  * happened -- fill the buffer and continue
3679                                  * reading.
3680                                  */
3681                                 if (got_bytes && got_bytes < rdata->bytes) {
3682                                         rc = 0;
3683                                         if (!ctx->direct_io)
3684                                                 rc = cifs_readdata_to_iov(rdata, to);
3685                                         if (rc) {
3686                                                 kref_put(&rdata->refcount,
3687                                                         cifs_uncached_readdata_release);
3688                                                 continue;
3689                                         }
3690                                 }
3691
3692                                 if (ctx->direct_io) {
3693                                         /*
3694                                          * Re-use rdata as this is a
3695                                          * direct I/O
3696                                          */
3697                                         rc = cifs_resend_rdata(
3698                                                 rdata,
3699                                                 &tmp_list, ctx);
3700                                 } else {
3701                                         rc = cifs_send_async_read(
3702                                                 rdata->offset + got_bytes,
3703                                                 rdata->bytes - got_bytes,
3704                                                 rdata->cfile, cifs_sb,
3705                                                 &tmp_list, ctx);
3706
3707                                         kref_put(&rdata->refcount,
3708                                                 cifs_uncached_readdata_release);
3709                                 }
3710
3711                                 list_splice(&tmp_list, &ctx->list);
3712
3713                                 goto again;
3714                         } else if (rdata->result)
3715                                 rc = rdata->result;
3716                         else if (!ctx->direct_io)
3717                                 rc = cifs_readdata_to_iov(rdata, to);
3718
3719                         /* if there was a short read -- discard anything left */
3720                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3721                                 rc = -ENODATA;
3722
3723                         ctx->total_len += rdata->got_bytes;
3724                 }
3725                 list_del_init(&rdata->list);
3726                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3727         }
3728
3729         if (!ctx->direct_io)
3730                 ctx->total_len = ctx->len - iov_iter_count(to);
3731
3732         /* mask nodata case */
3733         if (rc == -ENODATA)
3734                 rc = 0;
3735
3736         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3737
3738         mutex_unlock(&ctx->aio_mutex);
3739
3740         if (ctx->iocb && ctx->iocb->ki_complete)
3741                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3742         else
3743                 complete(&ctx->done);
3744 }
3745
3746 static ssize_t __cifs_readv(
3747         struct kiocb *iocb, struct iov_iter *to, bool direct)
3748 {
3749         size_t len;
3750         struct file *file = iocb->ki_filp;
3751         struct cifs_sb_info *cifs_sb;
3752         struct cifsFileInfo *cfile;
3753         struct cifs_tcon *tcon;
3754         ssize_t rc, total_read = 0;
3755         loff_t offset = iocb->ki_pos;
3756         struct cifs_aio_ctx *ctx;
3757
3758         /*
3759          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3760          * fall back to data copy read path
3761          * this could be improved by getting pages directly in ITER_KVEC
3762          */
3763         if (direct && to->type & ITER_KVEC) {
3764                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3765                 direct = false;
3766         }
3767
3768         len = iov_iter_count(to);
3769         if (!len)
3770                 return 0;
3771
3772         cifs_sb = CIFS_FILE_SB(file);
3773         cfile = file->private_data;
3774         tcon = tlink_tcon(cfile->tlink);
3775
3776         if (!tcon->ses->server->ops->async_readv)
3777                 return -ENOSYS;
3778
3779         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3780                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3781
3782         ctx = cifs_aio_ctx_alloc();
3783         if (!ctx)
3784                 return -ENOMEM;
3785
3786         ctx->cfile = cifsFileInfo_get(cfile);
3787
3788         if (!is_sync_kiocb(iocb))
3789                 ctx->iocb = iocb;
3790
3791         if (iter_is_iovec(to))
3792                 ctx->should_dirty = true;
3793
3794         if (direct) {
3795                 ctx->pos = offset;
3796                 ctx->direct_io = true;
3797                 ctx->iter = *to;
3798                 ctx->len = len;
3799         } else {
3800                 rc = setup_aio_ctx_iter(ctx, to, READ);
3801                 if (rc) {
3802                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3803                         return rc;
3804                 }
3805                 len = ctx->len;
3806         }
3807
3808         /* grab a lock here due to read response handlers can access ctx */
3809         mutex_lock(&ctx->aio_mutex);
3810
3811         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3812
3813         /* if at least one read request send succeeded, then reset rc */
3814         if (!list_empty(&ctx->list))
3815                 rc = 0;
3816
3817         mutex_unlock(&ctx->aio_mutex);
3818
3819         if (rc) {
3820                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3821                 return rc;
3822         }
3823
3824         if (!is_sync_kiocb(iocb)) {
3825                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3826                 return -EIOCBQUEUED;
3827         }
3828
3829         rc = wait_for_completion_killable(&ctx->done);
3830         if (rc) {
3831                 mutex_lock(&ctx->aio_mutex);
3832                 ctx->rc = rc = -EINTR;
3833                 total_read = ctx->total_len;
3834                 mutex_unlock(&ctx->aio_mutex);
3835         } else {
3836                 rc = ctx->rc;
3837                 total_read = ctx->total_len;
3838         }
3839
3840         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3841
3842         if (total_read) {
3843                 iocb->ki_pos += total_read;
3844                 return total_read;
3845         }
3846         return rc;
3847 }
3848
3849 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3850 {
3851         return __cifs_readv(iocb, to, true);
3852 }
3853
3854 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3855 {
3856         return __cifs_readv(iocb, to, false);
3857 }
3858
3859 ssize_t
3860 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3861 {
3862         struct inode *inode = file_inode(iocb->ki_filp);
3863         struct cifsInodeInfo *cinode = CIFS_I(inode);
3864         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3865         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3866                                                 iocb->ki_filp->private_data;
3867         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3868         int rc = -EACCES;
3869
3870         /*
3871          * In strict cache mode we need to read from the server all the time
3872          * if we don't have level II oplock because the server can delay mtime
3873          * change - so we can't make a decision about inode invalidating.
3874          * And we can also fail with pagereading if there are mandatory locks
3875          * on pages affected by this read but not on the region from pos to
3876          * pos+len-1.
3877          */
3878         if (!CIFS_CACHE_READ(cinode))
3879                 return cifs_user_readv(iocb, to);
3880
3881         if (cap_unix(tcon->ses) &&
3882             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3883             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3884                 return generic_file_read_iter(iocb, to);
3885
3886         /*
3887          * We need to hold the sem to be sure nobody modifies lock list
3888          * with a brlock that prevents reading.
3889          */
3890         down_read(&cinode->lock_sem);
3891         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3892                                      tcon->ses->server->vals->shared_lock_type,
3893                                      0, NULL, CIFS_READ_OP))
3894                 rc = generic_file_read_iter(iocb, to);
3895         up_read(&cinode->lock_sem);
3896         return rc;
3897 }
3898
3899 static ssize_t
3900 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3901 {
3902         int rc = -EACCES;
3903         unsigned int bytes_read = 0;
3904         unsigned int total_read;
3905         unsigned int current_read_size;
3906         unsigned int rsize;
3907         struct cifs_sb_info *cifs_sb;
3908         struct cifs_tcon *tcon;
3909         struct TCP_Server_Info *server;
3910         unsigned int xid;
3911         char *cur_offset;
3912         struct cifsFileInfo *open_file;
3913         struct cifs_io_parms io_parms;
3914         int buf_type = CIFS_NO_BUFFER;
3915         __u32 pid;
3916
3917         xid = get_xid();
3918         cifs_sb = CIFS_FILE_SB(file);
3919
3920         /* FIXME: set up handlers for larger reads and/or convert to async */
3921         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3922
3923         if (file->private_data == NULL) {
3924                 rc = -EBADF;
3925                 free_xid(xid);
3926                 return rc;
3927         }
3928         open_file = file->private_data;
3929         tcon = tlink_tcon(open_file->tlink);
3930         server = tcon->ses->server;
3931
3932         if (!server->ops->sync_read) {
3933                 free_xid(xid);
3934                 return -ENOSYS;
3935         }
3936
3937         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3938                 pid = open_file->pid;
3939         else
3940                 pid = current->tgid;
3941
3942         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3943                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3944
3945         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3946              total_read += bytes_read, cur_offset += bytes_read) {
3947                 do {
3948                         current_read_size = min_t(uint, read_size - total_read,
3949                                                   rsize);
3950                         /*
3951                          * For windows me and 9x we do not want to request more
3952                          * than it negotiated since it will refuse the read
3953                          * then.
3954                          */
3955                         if ((tcon->ses) && !(tcon->ses->capabilities &
3956                                 tcon->ses->server->vals->cap_large_files)) {
3957                                 current_read_size = min_t(uint,
3958                                         current_read_size, CIFSMaxBufSize);
3959                         }
3960                         if (open_file->invalidHandle) {
3961                                 rc = cifs_reopen_file(open_file, true);
3962                                 if (rc != 0)
3963                                         break;
3964                         }
3965                         io_parms.pid = pid;
3966                         io_parms.tcon = tcon;
3967                         io_parms.offset = *offset;
3968                         io_parms.length = current_read_size;
3969                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3970                                                     &bytes_read, &cur_offset,
3971                                                     &buf_type);
3972                 } while (rc == -EAGAIN);
3973
3974                 if (rc || (bytes_read == 0)) {
3975                         if (total_read) {
3976                                 break;
3977                         } else {
3978                                 free_xid(xid);
3979                                 return rc;
3980                         }
3981                 } else {
3982                         cifs_stats_bytes_read(tcon, total_read);
3983                         *offset += bytes_read;
3984                 }
3985         }
3986         free_xid(xid);
3987         return total_read;
3988 }
3989
3990 /*
3991  * If the page is mmap'ed into a process' page tables, then we need to make
3992  * sure that it doesn't change while being written back.
3993  */
3994 static vm_fault_t
3995 cifs_page_mkwrite(struct vm_fault *vmf)
3996 {
3997         struct page *page = vmf->page;
3998
3999         lock_page(page);
4000         return VM_FAULT_LOCKED;
4001 }
4002
4003 static const struct vm_operations_struct cifs_file_vm_ops = {
4004         .fault = filemap_fault,
4005         .map_pages = filemap_map_pages,
4006         .page_mkwrite = cifs_page_mkwrite,
4007 };
4008
4009 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4010 {
4011         int xid, rc = 0;
4012         struct inode *inode = file_inode(file);
4013
4014         xid = get_xid();
4015
4016         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4017                 rc = cifs_zap_mapping(inode);
4018         if (!rc)
4019                 rc = generic_file_mmap(file, vma);
4020         if (!rc)
4021                 vma->vm_ops = &cifs_file_vm_ops;
4022
4023         free_xid(xid);
4024         return rc;
4025 }
4026
4027 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4028 {
4029         int rc, xid;
4030
4031         xid = get_xid();
4032
4033         rc = cifs_revalidate_file(file);
4034         if (rc)
4035                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4036                          rc);
4037         if (!rc)
4038                 rc = generic_file_mmap(file, vma);
4039         if (!rc)
4040                 vma->vm_ops = &cifs_file_vm_ops;
4041
4042         free_xid(xid);
4043         return rc;
4044 }
4045
4046 static void
4047 cifs_readv_complete(struct work_struct *work)
4048 {
4049         unsigned int i, got_bytes;
4050         struct cifs_readdata *rdata = container_of(work,
4051                                                 struct cifs_readdata, work);
4052
4053         got_bytes = rdata->got_bytes;
4054         for (i = 0; i < rdata->nr_pages; i++) {
4055                 struct page *page = rdata->pages[i];
4056
4057                 lru_cache_add_file(page);
4058
4059                 if (rdata->result == 0 ||
4060                     (rdata->result == -EAGAIN && got_bytes)) {
4061                         flush_dcache_page(page);
4062                         SetPageUptodate(page);
4063                 }
4064
4065                 unlock_page(page);
4066
4067                 if (rdata->result == 0 ||
4068                     (rdata->result == -EAGAIN && got_bytes))
4069                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4070
4071                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4072
4073                 put_page(page);
4074                 rdata->pages[i] = NULL;
4075         }
4076         kref_put(&rdata->refcount, cifs_readdata_release);
4077 }
4078
4079 static int
4080 readpages_fill_pages(struct TCP_Server_Info *server,
4081                      struct cifs_readdata *rdata, struct iov_iter *iter,
4082                      unsigned int len)
4083 {
4084         int result = 0;
4085         unsigned int i;
4086         u64 eof;
4087         pgoff_t eof_index;
4088         unsigned int nr_pages = rdata->nr_pages;
4089         unsigned int page_offset = rdata->page_offset;
4090
4091         /* determine the eof that the server (probably) has */
4092         eof = CIFS_I(rdata->mapping->host)->server_eof;
4093         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4094         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4095
4096         rdata->got_bytes = 0;
4097         rdata->tailsz = PAGE_SIZE;
4098         for (i = 0; i < nr_pages; i++) {
4099                 struct page *page = rdata->pages[i];
4100                 unsigned int to_read = rdata->pagesz;
4101                 size_t n;
4102
4103                 if (i == 0)
4104                         to_read -= page_offset;
4105                 else
4106                         page_offset = 0;
4107
4108                 n = to_read;
4109
4110                 if (len >= to_read) {
4111                         len -= to_read;
4112                 } else if (len > 0) {
4113                         /* enough for partial page, fill and zero the rest */
4114                         zero_user(page, len + page_offset, to_read - len);
4115                         n = rdata->tailsz = len;
4116                         len = 0;
4117                 } else if (page->index > eof_index) {
4118                         /*
4119                          * The VFS will not try to do readahead past the
4120                          * i_size, but it's possible that we have outstanding
4121                          * writes with gaps in the middle and the i_size hasn't
4122                          * caught up yet. Populate those with zeroed out pages
4123                          * to prevent the VFS from repeatedly attempting to
4124                          * fill them until the writes are flushed.
4125                          */
4126                         zero_user(page, 0, PAGE_SIZE);
4127                         lru_cache_add_file(page);
4128                         flush_dcache_page(page);
4129                         SetPageUptodate(page);
4130                         unlock_page(page);
4131                         put_page(page);
4132                         rdata->pages[i] = NULL;
4133                         rdata->nr_pages--;
4134                         continue;
4135                 } else {
4136                         /* no need to hold page hostage */
4137                         lru_cache_add_file(page);
4138                         unlock_page(page);
4139                         put_page(page);
4140                         rdata->pages[i] = NULL;
4141                         rdata->nr_pages--;
4142                         continue;
4143                 }
4144
4145                 if (iter)
4146                         result = copy_page_from_iter(
4147                                         page, page_offset, n, iter);
4148 #ifdef CONFIG_CIFS_SMB_DIRECT
4149                 else if (rdata->mr)
4150                         result = n;
4151 #endif
4152                 else
4153                         result = cifs_read_page_from_socket(
4154                                         server, page, page_offset, n);
4155                 if (result < 0)
4156                         break;
4157
4158                 rdata->got_bytes += result;
4159         }
4160
4161         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4162                                                 rdata->got_bytes : result;
4163 }
4164
4165 static int
4166 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4167                                struct cifs_readdata *rdata, unsigned int len)
4168 {
4169         return readpages_fill_pages(server, rdata, NULL, len);
4170 }
4171
4172 static int
4173 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4174                                struct cifs_readdata *rdata,
4175                                struct iov_iter *iter)
4176 {
4177         return readpages_fill_pages(server, rdata, iter, iter->count);
4178 }
4179
4180 static int
4181 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4182                     unsigned int rsize, struct list_head *tmplist,
4183                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4184 {
4185         struct page *page, *tpage;
4186         unsigned int expected_index;
4187         int rc;
4188         gfp_t gfp = readahead_gfp_mask(mapping);
4189
4190         INIT_LIST_HEAD(tmplist);
4191
4192         page = lru_to_page(page_list);
4193
4194         /*
4195          * Lock the page and put it in the cache. Since no one else
4196          * should have access to this page, we're safe to simply set
4197          * PG_locked without checking it first.
4198          */
4199         __SetPageLocked(page);
4200         rc = add_to_page_cache_locked(page, mapping,
4201                                       page->index, gfp);
4202
4203         /* give up if we can't stick it in the cache */
4204         if (rc) {
4205                 __ClearPageLocked(page);
4206                 return rc;
4207         }
4208
4209         /* move first page to the tmplist */
4210         *offset = (loff_t)page->index << PAGE_SHIFT;
4211         *bytes = PAGE_SIZE;
4212         *nr_pages = 1;
4213         list_move_tail(&page->lru, tmplist);
4214
4215         /* now try and add more pages onto the request */
4216         expected_index = page->index + 1;
4217         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4218                 /* discontinuity ? */
4219                 if (page->index != expected_index)
4220                         break;
4221
4222                 /* would this page push the read over the rsize? */
4223                 if (*bytes + PAGE_SIZE > rsize)
4224                         break;
4225
4226                 __SetPageLocked(page);
4227                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4228                         __ClearPageLocked(page);
4229                         break;
4230                 }
4231                 list_move_tail(&page->lru, tmplist);
4232                 (*bytes) += PAGE_SIZE;
4233                 expected_index++;
4234                 (*nr_pages)++;
4235         }
4236         return rc;
4237 }
4238
4239 static int cifs_readpages(struct file *file, struct address_space *mapping,
4240         struct list_head *page_list, unsigned num_pages)
4241 {
4242         int rc;
4243         struct list_head tmplist;
4244         struct cifsFileInfo *open_file = file->private_data;
4245         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4246         struct TCP_Server_Info *server;
4247         pid_t pid;
4248         unsigned int xid;
4249
4250         xid = get_xid();
4251         /*
4252          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4253          * immediately if the cookie is negative
4254          *
4255          * After this point, every page in the list might have PG_fscache set,
4256          * so we will need to clean that up off of every page we don't use.
4257          */
4258         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4259                                          &num_pages);
4260         if (rc == 0) {
4261                 free_xid(xid);
4262                 return rc;
4263         }
4264
4265         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4266                 pid = open_file->pid;
4267         else
4268                 pid = current->tgid;
4269
4270         rc = 0;
4271         server = tlink_tcon(open_file->tlink)->ses->server;
4272
4273         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4274                  __func__, file, mapping, num_pages);
4275
4276         /*
4277          * Start with the page at end of list and move it to private
4278          * list. Do the same with any following pages until we hit
4279          * the rsize limit, hit an index discontinuity, or run out of
4280          * pages. Issue the async read and then start the loop again
4281          * until the list is empty.
4282          *
4283          * Note that list order is important. The page_list is in
4284          * the order of declining indexes. When we put the pages in
4285          * the rdata->pages, then we want them in increasing order.
4286          */
4287         while (!list_empty(page_list)) {
4288                 unsigned int i, nr_pages, bytes, rsize;
4289                 loff_t offset;
4290                 struct page *page, *tpage;
4291                 struct cifs_readdata *rdata;
4292                 struct cifs_credits credits_on_stack;
4293                 struct cifs_credits *credits = &credits_on_stack;
4294
4295                 if (open_file->invalidHandle) {
4296                         rc = cifs_reopen_file(open_file, true);
4297                         if (rc == -EAGAIN)
4298                                 continue;
4299                         else if (rc)
4300                                 break;
4301                 }
4302
4303                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4304                                                    &rsize, credits);
4305                 if (rc)
4306                         break;
4307
4308                 /*
4309                  * Give up immediately if rsize is too small to read an entire
4310                  * page. The VFS will fall back to readpage. We should never
4311                  * reach this point however since we set ra_pages to 0 when the
4312                  * rsize is smaller than a cache page.
4313                  */
4314                 if (unlikely(rsize < PAGE_SIZE)) {
4315                         add_credits_and_wake_if(server, credits, 0);
4316                         free_xid(xid);
4317                         return 0;
4318                 }
4319
4320                 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4321                                          &nr_pages, &offset, &bytes);
4322                 if (rc) {
4323                         add_credits_and_wake_if(server, credits, 0);
4324                         break;
4325                 }
4326
4327                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4328                 if (!rdata) {
4329                         /* best to give up if we're out of mem */
4330                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4331                                 list_del(&page->lru);
4332                                 lru_cache_add_file(page);
4333                                 unlock_page(page);
4334                                 put_page(page);
4335                         }
4336                         rc = -ENOMEM;
4337                         add_credits_and_wake_if(server, credits, 0);
4338                         break;
4339                 }
4340
4341                 rdata->cfile = cifsFileInfo_get(open_file);
4342                 rdata->mapping = mapping;
4343                 rdata->offset = offset;
4344                 rdata->bytes = bytes;
4345                 rdata->pid = pid;
4346                 rdata->pagesz = PAGE_SIZE;
4347                 rdata->tailsz = PAGE_SIZE;
4348                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4349                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4350                 rdata->credits = credits_on_stack;
4351
4352                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4353                         list_del(&page->lru);
4354                         rdata->pages[rdata->nr_pages++] = page;
4355                 }
4356
4357                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4358
4359                 if (!rc) {
4360                         if (rdata->cfile->invalidHandle)
4361                                 rc = -EAGAIN;
4362                         else
4363                                 rc = server->ops->async_readv(rdata);
4364                 }
4365
4366                 if (rc) {
4367                         add_credits_and_wake_if(server, &rdata->credits, 0);
4368                         for (i = 0; i < rdata->nr_pages; i++) {
4369                                 page = rdata->pages[i];
4370                                 lru_cache_add_file(page);
4371                                 unlock_page(page);
4372                                 put_page(page);
4373                         }
4374                         /* Fallback to the readpage in error/reconnect cases */
4375                         kref_put(&rdata->refcount, cifs_readdata_release);
4376                         break;
4377                 }
4378
4379                 kref_put(&rdata->refcount, cifs_readdata_release);
4380         }
4381
4382         /* Any pages that have been shown to fscache but didn't get added to
4383          * the pagecache must be uncached before they get returned to the
4384          * allocator.
4385          */
4386         cifs_fscache_readpages_cancel(mapping->host, page_list);
4387         free_xid(xid);
4388         return rc;
4389 }
4390
4391 /*
4392  * cifs_readpage_worker must be called with the page pinned
4393  */
4394 static int cifs_readpage_worker(struct file *file, struct page *page,
4395         loff_t *poffset)
4396 {
4397         char *read_data;
4398         int rc;
4399
4400         /* Is the page cached? */
4401         rc = cifs_readpage_from_fscache(file_inode(file), page);
4402         if (rc == 0)
4403                 goto read_complete;
4404
4405         read_data = kmap(page);
4406         /* for reads over a certain size could initiate async read ahead */
4407
4408         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4409
4410         if (rc < 0)
4411                 goto io_error;
4412         else
4413                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4414
4415         /* we do not want atime to be less than mtime, it broke some apps */
4416         file_inode(file)->i_atime = current_time(file_inode(file));
4417         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4418                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4419         else
4420                 file_inode(file)->i_atime = current_time(file_inode(file));
4421
4422         if (PAGE_SIZE > rc)
4423                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4424
4425         flush_dcache_page(page);
4426         SetPageUptodate(page);
4427
4428         /* send this page to the cache */
4429         cifs_readpage_to_fscache(file_inode(file), page);
4430
4431         rc = 0;
4432
4433 io_error:
4434         kunmap(page);
4435         unlock_page(page);
4436
4437 read_complete:
4438         return rc;
4439 }
4440
4441 static int cifs_readpage(struct file *file, struct page *page)
4442 {
4443         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4444         int rc = -EACCES;
4445         unsigned int xid;
4446
4447         xid = get_xid();
4448
4449         if (file->private_data == NULL) {
4450                 rc = -EBADF;
4451                 free_xid(xid);
4452                 return rc;
4453         }
4454
4455         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4456                  page, (int)offset, (int)offset);
4457
4458         rc = cifs_readpage_worker(file, page, &offset);
4459
4460         free_xid(xid);
4461         return rc;
4462 }
4463
4464 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4465 {
4466         struct cifsFileInfo *open_file;
4467
4468         spin_lock(&cifs_inode->open_file_lock);
4469         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4470                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4471                         spin_unlock(&cifs_inode->open_file_lock);
4472                         return 1;
4473                 }
4474         }
4475         spin_unlock(&cifs_inode->open_file_lock);
4476         return 0;
4477 }
4478
4479 /* We do not want to update the file size from server for inodes
4480    open for write - to avoid races with writepage extending
4481    the file - in the future we could consider allowing
4482    refreshing the inode only on increases in the file size
4483    but this is tricky to do without racing with writebehind
4484    page caching in the current Linux kernel design */
4485 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4486 {
4487         if (!cifsInode)
4488                 return true;
4489
4490         if (is_inode_writable(cifsInode)) {
4491                 /* This inode is open for write at least once */
4492                 struct cifs_sb_info *cifs_sb;
4493
4494                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4495                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4496                         /* since no page cache to corrupt on directio
4497                         we can change size safely */
4498                         return true;
4499                 }
4500
4501                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4502                         return true;
4503
4504                 return false;
4505         } else
4506                 return true;
4507 }
4508
4509 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4510                         loff_t pos, unsigned len, unsigned flags,
4511                         struct page **pagep, void **fsdata)
4512 {
4513         int oncethru = 0;
4514         pgoff_t index = pos >> PAGE_SHIFT;
4515         loff_t offset = pos & (PAGE_SIZE - 1);
4516         loff_t page_start = pos & PAGE_MASK;
4517         loff_t i_size;
4518         struct page *page;
4519         int rc = 0;
4520
4521         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4522
4523 start:
4524         page = grab_cache_page_write_begin(mapping, index, flags);
4525         if (!page) {
4526                 rc = -ENOMEM;
4527                 goto out;
4528         }
4529
4530         if (PageUptodate(page))
4531                 goto out;
4532
4533         /*
4534          * If we write a full page it will be up to date, no need to read from
4535          * the server. If the write is short, we'll end up doing a sync write
4536          * instead.
4537          */
4538         if (len == PAGE_SIZE)
4539                 goto out;
4540
4541         /*
4542          * optimize away the read when we have an oplock, and we're not
4543          * expecting to use any of the data we'd be reading in. That
4544          * is, when the page lies beyond the EOF, or straddles the EOF
4545          * and the write will cover all of the existing data.
4546          */
4547         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4548                 i_size = i_size_read(mapping->host);
4549                 if (page_start >= i_size ||
4550                     (offset == 0 && (pos + len) >= i_size)) {
4551                         zero_user_segments(page, 0, offset,
4552                                            offset + len,
4553                                            PAGE_SIZE);
4554                         /*
4555                          * PageChecked means that the parts of the page
4556                          * to which we're not writing are considered up
4557                          * to date. Once the data is copied to the
4558                          * page, it can be set uptodate.
4559                          */
4560                         SetPageChecked(page);
4561                         goto out;
4562                 }
4563         }
4564
4565         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4566                 /*
4567                  * might as well read a page, it is fast enough. If we get
4568                  * an error, we don't need to return it. cifs_write_end will
4569                  * do a sync write instead since PG_uptodate isn't set.
4570                  */
4571                 cifs_readpage_worker(file, page, &page_start);
4572                 put_page(page);
4573                 oncethru = 1;
4574                 goto start;
4575         } else {
4576                 /* we could try using another file handle if there is one -
4577                    but how would we lock it to prevent close of that handle
4578                    racing with this read? In any case
4579                    this will be written out by write_end so is fine */
4580         }
4581 out:
4582         *pagep = page;
4583         return rc;
4584 }
4585
4586 static int cifs_release_page(struct page *page, gfp_t gfp)
4587 {
4588         if (PagePrivate(page))
4589                 return 0;
4590
4591         return cifs_fscache_release_page(page, gfp);
4592 }
4593
4594 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4595                                  unsigned int length)
4596 {
4597         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4598
4599         if (offset == 0 && length == PAGE_SIZE)
4600                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4601 }
4602
4603 static int cifs_launder_page(struct page *page)
4604 {
4605         int rc = 0;
4606         loff_t range_start = page_offset(page);
4607         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4608         struct writeback_control wbc = {
4609                 .sync_mode = WB_SYNC_ALL,
4610                 .nr_to_write = 0,
4611                 .range_start = range_start,
4612                 .range_end = range_end,
4613         };
4614
4615         cifs_dbg(FYI, "Launder page: %p\n", page);
4616
4617         if (clear_page_dirty_for_io(page))
4618                 rc = cifs_writepage_locked(page, &wbc);
4619
4620         cifs_fscache_invalidate_page(page, page->mapping->host);
4621         return rc;
4622 }
4623
4624 void cifs_oplock_break(struct work_struct *work)
4625 {
4626         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4627                                                   oplock_break);
4628         struct inode *inode = d_inode(cfile->dentry);
4629         struct cifsInodeInfo *cinode = CIFS_I(inode);
4630         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4631         struct TCP_Server_Info *server = tcon->ses->server;
4632         int rc = 0;
4633
4634         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4635                         TASK_UNINTERRUPTIBLE);
4636
4637         server->ops->downgrade_oplock(server, cinode,
4638                 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
4639
4640         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4641                                                 cifs_has_mand_locks(cinode)) {
4642                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4643                          inode);
4644                 cinode->oplock = 0;
4645         }
4646
4647         if (inode && S_ISREG(inode->i_mode)) {
4648                 if (CIFS_CACHE_READ(cinode))
4649                         break_lease(inode, O_RDONLY);
4650                 else
4651                         break_lease(inode, O_WRONLY);
4652                 rc = filemap_fdatawrite(inode->i_mapping);
4653                 if (!CIFS_CACHE_READ(cinode)) {
4654                         rc = filemap_fdatawait(inode->i_mapping);
4655                         mapping_set_error(inode->i_mapping, rc);
4656                         cifs_zap_mapping(inode);
4657                 }
4658                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4659         }
4660
4661         rc = cifs_push_locks(cfile);
4662         if (rc)
4663                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4664
4665         /*
4666          * releasing stale oplock after recent reconnect of smb session using
4667          * a now incorrect file handle is not a data integrity issue but do
4668          * not bother sending an oplock release if session to server still is
4669          * disconnected since oplock already released by the server
4670          */
4671         if (!cfile->oplock_break_cancelled) {
4672                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4673                                                              cinode);
4674                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4675         }
4676         _cifsFileInfo_put(cfile, false /* do not wait for ourself */);
4677         cifs_done_oplock_break(cinode);
4678 }
4679
4680 /*
4681  * The presence of cifs_direct_io() in the address space ops vector
4682  * allowes open() O_DIRECT flags which would have failed otherwise.
4683  *
4684  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4685  * so this method should never be called.
4686  *
4687  * Direct IO is not yet supported in the cached mode. 
4688  */
4689 static ssize_t
4690 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4691 {
4692         /*
4693          * FIXME
4694          * Eventually need to support direct IO for non forcedirectio mounts
4695          */
4696         return -EINVAL;
4697 }
4698
4699
4700 const struct address_space_operations cifs_addr_ops = {
4701         .readpage = cifs_readpage,
4702         .readpages = cifs_readpages,
4703         .writepage = cifs_writepage,
4704         .writepages = cifs_writepages,
4705         .write_begin = cifs_write_begin,
4706         .write_end = cifs_write_end,
4707         .set_page_dirty = __set_page_dirty_nobuffers,
4708         .releasepage = cifs_release_page,
4709         .direct_IO = cifs_direct_io,
4710         .invalidatepage = cifs_invalidate_page,
4711         .launder_page = cifs_launder_page,
4712 };
4713
4714 /*
4715  * cifs_readpages requires the server to support a buffer large enough to
4716  * contain the header plus one complete page of data.  Otherwise, we need
4717  * to leave cifs_readpages out of the address space operations.
4718  */
4719 const struct address_space_operations cifs_addr_ops_smallbuf = {
4720         .readpage = cifs_readpage,
4721         .writepage = cifs_writepage,
4722         .writepages = cifs_writepages,
4723         .write_begin = cifs_write_begin,
4724         .write_end = cifs_write_end,
4725         .set_page_dirty = __set_page_dirty_nobuffers,
4726         .releasepage = cifs_release_page,
4727         .invalidatepage = cifs_invalidate_page,
4728         .launder_page = cifs_launder_page,
4729 };