Merge branch 'upstream' of git://git.linux-mips.org/pub/scm/ralf/upstream-linus
[linux-2.6-microblaze.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49         if ((flags & O_ACCMODE) == O_RDONLY)
50                 return GENERIC_READ;
51         else if ((flags & O_ACCMODE) == O_WRONLY)
52                 return GENERIC_WRITE;
53         else if ((flags & O_ACCMODE) == O_RDWR) {
54                 /* GENERIC_ALL is too much permission to request
55                    can cause unnecessary access denied on create */
56                 /* return GENERIC_ALL; */
57                 return (GENERIC_READ | GENERIC_WRITE);
58         }
59
60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62                 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67         u32 posix_flags = 0;
68
69         if ((flags & O_ACCMODE) == O_RDONLY)
70                 posix_flags = SMB_O_RDONLY;
71         else if ((flags & O_ACCMODE) == O_WRONLY)
72                 posix_flags = SMB_O_WRONLY;
73         else if ((flags & O_ACCMODE) == O_RDWR)
74                 posix_flags = SMB_O_RDWR;
75
76         if (flags & O_CREAT) {
77                 posix_flags |= SMB_O_CREAT;
78                 if (flags & O_EXCL)
79                         posix_flags |= SMB_O_EXCL;
80         } else if (flags & O_EXCL)
81                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82                          current->comm, current->tgid);
83
84         if (flags & O_TRUNC)
85                 posix_flags |= SMB_O_TRUNC;
86         /* be safe and imply O_SYNC for O_DSYNC */
87         if (flags & O_DSYNC)
88                 posix_flags |= SMB_O_SYNC;
89         if (flags & O_DIRECTORY)
90                 posix_flags |= SMB_O_DIRECTORY;
91         if (flags & O_NOFOLLOW)
92                 posix_flags |= SMB_O_NOFOLLOW;
93         if (flags & O_DIRECT)
94                 posix_flags |= SMB_O_DIRECT;
95
96         return posix_flags;
97 }
98
99 static inline int cifs_get_disposition(unsigned int flags)
100 {
101         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102                 return FILE_CREATE;
103         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104                 return FILE_OVERWRITE_IF;
105         else if ((flags & O_CREAT) == O_CREAT)
106                 return FILE_OPEN_IF;
107         else if ((flags & O_TRUNC) == O_TRUNC)
108                 return FILE_OVERWRITE;
109         else
110                 return FILE_OPEN;
111 }
112
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114                         struct super_block *sb, int mode, unsigned int f_flags,
115                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116 {
117         int rc;
118         FILE_UNIX_BASIC_INFO *presp_data;
119         __u32 posix_flags = 0;
120         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121         struct cifs_fattr fattr;
122         struct tcon_link *tlink;
123         struct cifs_tcon *tcon;
124
125         cifs_dbg(FYI, "posix open %s\n", full_path);
126
127         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128         if (presp_data == NULL)
129                 return -ENOMEM;
130
131         tlink = cifs_sb_tlink(cifs_sb);
132         if (IS_ERR(tlink)) {
133                 rc = PTR_ERR(tlink);
134                 goto posix_open_ret;
135         }
136
137         tcon = tlink_tcon(tlink);
138         mode &= ~current_umask();
139
140         posix_flags = cifs_posix_convert_flags(f_flags);
141         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142                              poplock, full_path, cifs_sb->local_nls,
143                              cifs_sb->mnt_cifs_flags &
144                                         CIFS_MOUNT_MAP_SPECIAL_CHR);
145         cifs_put_tlink(tlink);
146
147         if (rc)
148                 goto posix_open_ret;
149
150         if (presp_data->Type == cpu_to_le32(-1))
151                 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153         if (!pinode)
154                 goto posix_open_ret; /* caller does not need info */
155
156         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158         /* get new inode and set it up */
159         if (*pinode == NULL) {
160                 cifs_fill_uniqueid(sb, &fattr);
161                 *pinode = cifs_iget(sb, &fattr);
162                 if (!*pinode) {
163                         rc = -ENOMEM;
164                         goto posix_open_ret;
165                 }
166         } else {
167                 cifs_fattr_to_inode(*pinode, &fattr);
168         }
169
170 posix_open_ret:
171         kfree(presp_data);
172         return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178              struct cifs_fid *fid, unsigned int xid)
179 {
180         int rc;
181         int desired_access;
182         int disposition;
183         int create_options = CREATE_NOT_DIR;
184         FILE_ALL_INFO *buf;
185         struct TCP_Server_Info *server = tcon->ses->server;
186         struct cifs_open_parms oparms;
187
188         if (!server->ops->open)
189                 return -ENOSYS;
190
191         desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194  *  open flag mapping table:
195  *
196  *      POSIX Flag            CIFS Disposition
197  *      ----------            ----------------
198  *      O_CREAT               FILE_OPEN_IF
199  *      O_CREAT | O_EXCL      FILE_CREATE
200  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
201  *      O_TRUNC               FILE_OVERWRITE
202  *      none of the above     FILE_OPEN
203  *
204  *      Note that there is not a direct match between disposition
205  *      FILE_SUPERSEDE (ie create whether or not file exists although
206  *      O_CREAT | O_TRUNC is similar but truncates the existing
207  *      file rather than creating a new file as FILE_SUPERSEDE does
208  *      (which uses the attributes / metadata passed in on open call)
209  *?
210  *?  O_SYNC is a reasonable match to CIFS writethrough flag
211  *?  and the read write flags match reasonably.  O_LARGEFILE
212  *?  is irrelevant because largefile support is always used
213  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215  *********************************************************************/
216
217         disposition = cifs_get_disposition(f_flags);
218
219         /* BB pass O_SYNC flag through on file attributes .. BB */
220
221         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222         if (!buf)
223                 return -ENOMEM;
224
225         if (backup_cred(cifs_sb))
226                 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228         oparms.tcon = tcon;
229         oparms.cifs_sb = cifs_sb;
230         oparms.desired_access = desired_access;
231         oparms.create_options = create_options;
232         oparms.disposition = disposition;
233         oparms.path = full_path;
234         oparms.fid = fid;
235         oparms.reconnect = false;
236
237         rc = server->ops->open(xid, &oparms, oplock, buf);
238
239         if (rc)
240                 goto out;
241
242         if (tcon->unix_ext)
243                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
244                                               xid);
245         else
246                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
247                                          xid, fid);
248
249 out:
250         kfree(buf);
251         return rc;
252 }
253
254 static bool
255 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
256 {
257         struct cifs_fid_locks *cur;
258         bool has_locks = false;
259
260         down_read(&cinode->lock_sem);
261         list_for_each_entry(cur, &cinode->llist, llist) {
262                 if (!list_empty(&cur->locks)) {
263                         has_locks = true;
264                         break;
265                 }
266         }
267         up_read(&cinode->lock_sem);
268         return has_locks;
269 }
270
271 struct cifsFileInfo *
272 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
273                   struct tcon_link *tlink, __u32 oplock)
274 {
275         struct dentry *dentry = file->f_path.dentry;
276         struct inode *inode = d_inode(dentry);
277         struct cifsInodeInfo *cinode = CIFS_I(inode);
278         struct cifsFileInfo *cfile;
279         struct cifs_fid_locks *fdlocks;
280         struct cifs_tcon *tcon = tlink_tcon(tlink);
281         struct TCP_Server_Info *server = tcon->ses->server;
282
283         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
284         if (cfile == NULL)
285                 return cfile;
286
287         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
288         if (!fdlocks) {
289                 kfree(cfile);
290                 return NULL;
291         }
292
293         INIT_LIST_HEAD(&fdlocks->locks);
294         fdlocks->cfile = cfile;
295         cfile->llist = fdlocks;
296         down_write(&cinode->lock_sem);
297         list_add(&fdlocks->llist, &cinode->llist);
298         up_write(&cinode->lock_sem);
299
300         cfile->count = 1;
301         cfile->pid = current->tgid;
302         cfile->uid = current_fsuid();
303         cfile->dentry = dget(dentry);
304         cfile->f_flags = file->f_flags;
305         cfile->invalidHandle = false;
306         cfile->tlink = cifs_get_tlink(tlink);
307         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
308         mutex_init(&cfile->fh_mutex);
309
310         cifs_sb_active(inode->i_sb);
311
312         /*
313          * If the server returned a read oplock and we have mandatory brlocks,
314          * set oplock level to None.
315          */
316         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
317                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
318                 oplock = 0;
319         }
320
321         spin_lock(&cifs_file_list_lock);
322         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
323                 oplock = fid->pending_open->oplock;
324         list_del(&fid->pending_open->olist);
325
326         fid->purge_cache = false;
327         server->ops->set_fid(cfile, fid, oplock);
328
329         list_add(&cfile->tlist, &tcon->openFileList);
330         /* if readable file instance put first in list*/
331         if (file->f_mode & FMODE_READ)
332                 list_add(&cfile->flist, &cinode->openFileList);
333         else
334                 list_add_tail(&cfile->flist, &cinode->openFileList);
335         spin_unlock(&cifs_file_list_lock);
336
337         if (fid->purge_cache)
338                 cifs_zap_mapping(inode);
339
340         file->private_data = cfile;
341         return cfile;
342 }
343
344 struct cifsFileInfo *
345 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
346 {
347         spin_lock(&cifs_file_list_lock);
348         cifsFileInfo_get_locked(cifs_file);
349         spin_unlock(&cifs_file_list_lock);
350         return cifs_file;
351 }
352
353 /*
354  * Release a reference on the file private data. This may involve closing
355  * the filehandle out on the server. Must be called without holding
356  * cifs_file_list_lock.
357  */
358 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
359 {
360         struct inode *inode = d_inode(cifs_file->dentry);
361         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
362         struct TCP_Server_Info *server = tcon->ses->server;
363         struct cifsInodeInfo *cifsi = CIFS_I(inode);
364         struct super_block *sb = inode->i_sb;
365         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
366         struct cifsLockInfo *li, *tmp;
367         struct cifs_fid fid;
368         struct cifs_pending_open open;
369         bool oplock_break_cancelled;
370
371         spin_lock(&cifs_file_list_lock);
372         if (--cifs_file->count > 0) {
373                 spin_unlock(&cifs_file_list_lock);
374                 return;
375         }
376
377         if (server->ops->get_lease_key)
378                 server->ops->get_lease_key(inode, &fid);
379
380         /* store open in pending opens to make sure we don't miss lease break */
381         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
382
383         /* remove it from the lists */
384         list_del(&cifs_file->flist);
385         list_del(&cifs_file->tlist);
386
387         if (list_empty(&cifsi->openFileList)) {
388                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
389                          d_inode(cifs_file->dentry));
390                 /*
391                  * In strict cache mode we need invalidate mapping on the last
392                  * close  because it may cause a error when we open this file
393                  * again and get at least level II oplock.
394                  */
395                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
396                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
397                 cifs_set_oplock_level(cifsi, 0);
398         }
399         spin_unlock(&cifs_file_list_lock);
400
401         oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
402
403         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
404                 struct TCP_Server_Info *server = tcon->ses->server;
405                 unsigned int xid;
406
407                 xid = get_xid();
408                 if (server->ops->close)
409                         server->ops->close(xid, tcon, &cifs_file->fid);
410                 _free_xid(xid);
411         }
412
413         if (oplock_break_cancelled)
414                 cifs_done_oplock_break(cifsi);
415
416         cifs_del_pending_open(&open);
417
418         /*
419          * Delete any outstanding lock records. We'll lose them when the file
420          * is closed anyway.
421          */
422         down_write(&cifsi->lock_sem);
423         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
424                 list_del(&li->llist);
425                 cifs_del_lock_waiters(li);
426                 kfree(li);
427         }
428         list_del(&cifs_file->llist->llist);
429         kfree(cifs_file->llist);
430         up_write(&cifsi->lock_sem);
431
432         cifs_put_tlink(cifs_file->tlink);
433         dput(cifs_file->dentry);
434         cifs_sb_deactive(sb);
435         kfree(cifs_file);
436 }
437
438 int cifs_open(struct inode *inode, struct file *file)
439
440 {
441         int rc = -EACCES;
442         unsigned int xid;
443         __u32 oplock;
444         struct cifs_sb_info *cifs_sb;
445         struct TCP_Server_Info *server;
446         struct cifs_tcon *tcon;
447         struct tcon_link *tlink;
448         struct cifsFileInfo *cfile = NULL;
449         char *full_path = NULL;
450         bool posix_open_ok = false;
451         struct cifs_fid fid;
452         struct cifs_pending_open open;
453
454         xid = get_xid();
455
456         cifs_sb = CIFS_SB(inode->i_sb);
457         tlink = cifs_sb_tlink(cifs_sb);
458         if (IS_ERR(tlink)) {
459                 free_xid(xid);
460                 return PTR_ERR(tlink);
461         }
462         tcon = tlink_tcon(tlink);
463         server = tcon->ses->server;
464
465         full_path = build_path_from_dentry(file->f_path.dentry);
466         if (full_path == NULL) {
467                 rc = -ENOMEM;
468                 goto out;
469         }
470
471         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
472                  inode, file->f_flags, full_path);
473
474         if (file->f_flags & O_DIRECT &&
475             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
476                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
477                         file->f_op = &cifs_file_direct_nobrl_ops;
478                 else
479                         file->f_op = &cifs_file_direct_ops;
480         }
481
482         if (server->oplocks)
483                 oplock = REQ_OPLOCK;
484         else
485                 oplock = 0;
486
487         if (!tcon->broken_posix_open && tcon->unix_ext &&
488             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
489                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
490                 /* can not refresh inode info since size could be stale */
491                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
492                                 cifs_sb->mnt_file_mode /* ignored */,
493                                 file->f_flags, &oplock, &fid.netfid, xid);
494                 if (rc == 0) {
495                         cifs_dbg(FYI, "posix open succeeded\n");
496                         posix_open_ok = true;
497                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
498                         if (tcon->ses->serverNOS)
499                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
500                                          tcon->ses->serverName,
501                                          tcon->ses->serverNOS);
502                         tcon->broken_posix_open = true;
503                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
504                          (rc != -EOPNOTSUPP)) /* path not found or net err */
505                         goto out;
506                 /*
507                  * Else fallthrough to retry open the old way on network i/o
508                  * or DFS errors.
509                  */
510         }
511
512         if (server->ops->get_lease_key)
513                 server->ops->get_lease_key(inode, &fid);
514
515         cifs_add_pending_open(&fid, tlink, &open);
516
517         if (!posix_open_ok) {
518                 if (server->ops->get_lease_key)
519                         server->ops->get_lease_key(inode, &fid);
520
521                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
522                                   file->f_flags, &oplock, &fid, xid);
523                 if (rc) {
524                         cifs_del_pending_open(&open);
525                         goto out;
526                 }
527         }
528
529         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
530         if (cfile == NULL) {
531                 if (server->ops->close)
532                         server->ops->close(xid, tcon, &fid);
533                 cifs_del_pending_open(&open);
534                 rc = -ENOMEM;
535                 goto out;
536         }
537
538         cifs_fscache_set_inode_cookie(inode, file);
539
540         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
541                 /*
542                  * Time to set mode which we can not set earlier due to
543                  * problems creating new read-only files.
544                  */
545                 struct cifs_unix_set_info_args args = {
546                         .mode   = inode->i_mode,
547                         .uid    = INVALID_UID, /* no change */
548                         .gid    = INVALID_GID, /* no change */
549                         .ctime  = NO_CHANGE_64,
550                         .atime  = NO_CHANGE_64,
551                         .mtime  = NO_CHANGE_64,
552                         .device = 0,
553                 };
554                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
555                                        cfile->pid);
556         }
557
558 out:
559         kfree(full_path);
560         free_xid(xid);
561         cifs_put_tlink(tlink);
562         return rc;
563 }
564
565 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
566
567 /*
568  * Try to reacquire byte range locks that were released when session
569  * to server was lost.
570  */
571 static int
572 cifs_relock_file(struct cifsFileInfo *cfile)
573 {
574         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
575         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
576         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
577         int rc = 0;
578
579         down_read(&cinode->lock_sem);
580         if (cinode->can_cache_brlcks) {
581                 /* can cache locks - no need to relock */
582                 up_read(&cinode->lock_sem);
583                 return rc;
584         }
585
586         if (cap_unix(tcon->ses) &&
587             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
588             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
589                 rc = cifs_push_posix_locks(cfile);
590         else
591                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
592
593         up_read(&cinode->lock_sem);
594         return rc;
595 }
596
597 static int
598 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
599 {
600         int rc = -EACCES;
601         unsigned int xid;
602         __u32 oplock;
603         struct cifs_sb_info *cifs_sb;
604         struct cifs_tcon *tcon;
605         struct TCP_Server_Info *server;
606         struct cifsInodeInfo *cinode;
607         struct inode *inode;
608         char *full_path = NULL;
609         int desired_access;
610         int disposition = FILE_OPEN;
611         int create_options = CREATE_NOT_DIR;
612         struct cifs_open_parms oparms;
613
614         xid = get_xid();
615         mutex_lock(&cfile->fh_mutex);
616         if (!cfile->invalidHandle) {
617                 mutex_unlock(&cfile->fh_mutex);
618                 rc = 0;
619                 free_xid(xid);
620                 return rc;
621         }
622
623         inode = d_inode(cfile->dentry);
624         cifs_sb = CIFS_SB(inode->i_sb);
625         tcon = tlink_tcon(cfile->tlink);
626         server = tcon->ses->server;
627
628         /*
629          * Can not grab rename sem here because various ops, including those
630          * that already have the rename sem can end up causing writepage to get
631          * called and if the server was down that means we end up here, and we
632          * can never tell if the caller already has the rename_sem.
633          */
634         full_path = build_path_from_dentry(cfile->dentry);
635         if (full_path == NULL) {
636                 rc = -ENOMEM;
637                 mutex_unlock(&cfile->fh_mutex);
638                 free_xid(xid);
639                 return rc;
640         }
641
642         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
643                  inode, cfile->f_flags, full_path);
644
645         if (tcon->ses->server->oplocks)
646                 oplock = REQ_OPLOCK;
647         else
648                 oplock = 0;
649
650         if (tcon->unix_ext && cap_unix(tcon->ses) &&
651             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
652                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
653                 /*
654                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
655                  * original open. Must mask them off for a reopen.
656                  */
657                 unsigned int oflags = cfile->f_flags &
658                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
659
660                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
661                                      cifs_sb->mnt_file_mode /* ignored */,
662                                      oflags, &oplock, &cfile->fid.netfid, xid);
663                 if (rc == 0) {
664                         cifs_dbg(FYI, "posix reopen succeeded\n");
665                         oparms.reconnect = true;
666                         goto reopen_success;
667                 }
668                 /*
669                  * fallthrough to retry open the old way on errors, especially
670                  * in the reconnect path it is important to retry hard
671                  */
672         }
673
674         desired_access = cifs_convert_flags(cfile->f_flags);
675
676         if (backup_cred(cifs_sb))
677                 create_options |= CREATE_OPEN_BACKUP_INTENT;
678
679         if (server->ops->get_lease_key)
680                 server->ops->get_lease_key(inode, &cfile->fid);
681
682         oparms.tcon = tcon;
683         oparms.cifs_sb = cifs_sb;
684         oparms.desired_access = desired_access;
685         oparms.create_options = create_options;
686         oparms.disposition = disposition;
687         oparms.path = full_path;
688         oparms.fid = &cfile->fid;
689         oparms.reconnect = true;
690
691         /*
692          * Can not refresh inode by passing in file_info buf to be returned by
693          * ops->open and then calling get_inode_info with returned buf since
694          * file might have write behind data that needs to be flushed and server
695          * version of file size can be stale. If we knew for sure that inode was
696          * not dirty locally we could do this.
697          */
698         rc = server->ops->open(xid, &oparms, &oplock, NULL);
699         if (rc == -ENOENT && oparms.reconnect == false) {
700                 /* durable handle timeout is expired - open the file again */
701                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
702                 /* indicate that we need to relock the file */
703                 oparms.reconnect = true;
704         }
705
706         if (rc) {
707                 mutex_unlock(&cfile->fh_mutex);
708                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
709                 cifs_dbg(FYI, "oplock: %d\n", oplock);
710                 goto reopen_error_exit;
711         }
712
713 reopen_success:
714         cfile->invalidHandle = false;
715         mutex_unlock(&cfile->fh_mutex);
716         cinode = CIFS_I(inode);
717
718         if (can_flush) {
719                 rc = filemap_write_and_wait(inode->i_mapping);
720                 mapping_set_error(inode->i_mapping, rc);
721
722                 if (tcon->unix_ext)
723                         rc = cifs_get_inode_info_unix(&inode, full_path,
724                                                       inode->i_sb, xid);
725                 else
726                         rc = cifs_get_inode_info(&inode, full_path, NULL,
727                                                  inode->i_sb, xid, NULL);
728         }
729         /*
730          * Else we are writing out data to server already and could deadlock if
731          * we tried to flush data, and since we do not know if we have data that
732          * would invalidate the current end of file on the server we can not go
733          * to the server to get the new inode info.
734          */
735
736         server->ops->set_fid(cfile, &cfile->fid, oplock);
737         if (oparms.reconnect)
738                 cifs_relock_file(cfile);
739
740 reopen_error_exit:
741         kfree(full_path);
742         free_xid(xid);
743         return rc;
744 }
745
746 int cifs_close(struct inode *inode, struct file *file)
747 {
748         if (file->private_data != NULL) {
749                 cifsFileInfo_put(file->private_data);
750                 file->private_data = NULL;
751         }
752
753         /* return code from the ->release op is always ignored */
754         return 0;
755 }
756
757 int cifs_closedir(struct inode *inode, struct file *file)
758 {
759         int rc = 0;
760         unsigned int xid;
761         struct cifsFileInfo *cfile = file->private_data;
762         struct cifs_tcon *tcon;
763         struct TCP_Server_Info *server;
764         char *buf;
765
766         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
767
768         if (cfile == NULL)
769                 return rc;
770
771         xid = get_xid();
772         tcon = tlink_tcon(cfile->tlink);
773         server = tcon->ses->server;
774
775         cifs_dbg(FYI, "Freeing private data in close dir\n");
776         spin_lock(&cifs_file_list_lock);
777         if (server->ops->dir_needs_close(cfile)) {
778                 cfile->invalidHandle = true;
779                 spin_unlock(&cifs_file_list_lock);
780                 if (server->ops->close_dir)
781                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
782                 else
783                         rc = -ENOSYS;
784                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
785                 /* not much we can do if it fails anyway, ignore rc */
786                 rc = 0;
787         } else
788                 spin_unlock(&cifs_file_list_lock);
789
790         buf = cfile->srch_inf.ntwrk_buf_start;
791         if (buf) {
792                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
793                 cfile->srch_inf.ntwrk_buf_start = NULL;
794                 if (cfile->srch_inf.smallBuf)
795                         cifs_small_buf_release(buf);
796                 else
797                         cifs_buf_release(buf);
798         }
799
800         cifs_put_tlink(cfile->tlink);
801         kfree(file->private_data);
802         file->private_data = NULL;
803         /* BB can we lock the filestruct while this is going on? */
804         free_xid(xid);
805         return rc;
806 }
807
808 static struct cifsLockInfo *
809 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
810 {
811         struct cifsLockInfo *lock =
812                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
813         if (!lock)
814                 return lock;
815         lock->offset = offset;
816         lock->length = length;
817         lock->type = type;
818         lock->pid = current->tgid;
819         INIT_LIST_HEAD(&lock->blist);
820         init_waitqueue_head(&lock->block_q);
821         return lock;
822 }
823
824 void
825 cifs_del_lock_waiters(struct cifsLockInfo *lock)
826 {
827         struct cifsLockInfo *li, *tmp;
828         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
829                 list_del_init(&li->blist);
830                 wake_up(&li->block_q);
831         }
832 }
833
834 #define CIFS_LOCK_OP    0
835 #define CIFS_READ_OP    1
836 #define CIFS_WRITE_OP   2
837
838 /* @rw_check : 0 - no op, 1 - read, 2 - write */
839 static bool
840 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
841                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
842                             struct cifsLockInfo **conf_lock, int rw_check)
843 {
844         struct cifsLockInfo *li;
845         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
846         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
847
848         list_for_each_entry(li, &fdlocks->locks, llist) {
849                 if (offset + length <= li->offset ||
850                     offset >= li->offset + li->length)
851                         continue;
852                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
853                     server->ops->compare_fids(cfile, cur_cfile)) {
854                         /* shared lock prevents write op through the same fid */
855                         if (!(li->type & server->vals->shared_lock_type) ||
856                             rw_check != CIFS_WRITE_OP)
857                                 continue;
858                 }
859                 if ((type & server->vals->shared_lock_type) &&
860                     ((server->ops->compare_fids(cfile, cur_cfile) &&
861                      current->tgid == li->pid) || type == li->type))
862                         continue;
863                 if (conf_lock)
864                         *conf_lock = li;
865                 return true;
866         }
867         return false;
868 }
869
870 bool
871 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
872                         __u8 type, struct cifsLockInfo **conf_lock,
873                         int rw_check)
874 {
875         bool rc = false;
876         struct cifs_fid_locks *cur;
877         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
878
879         list_for_each_entry(cur, &cinode->llist, llist) {
880                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
881                                                  cfile, conf_lock, rw_check);
882                 if (rc)
883                         break;
884         }
885
886         return rc;
887 }
888
889 /*
890  * Check if there is another lock that prevents us to set the lock (mandatory
891  * style). If such a lock exists, update the flock structure with its
892  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
893  * or leave it the same if we can't. Returns 0 if we don't need to request to
894  * the server or 1 otherwise.
895  */
896 static int
897 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
898                __u8 type, struct file_lock *flock)
899 {
900         int rc = 0;
901         struct cifsLockInfo *conf_lock;
902         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
903         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
904         bool exist;
905
906         down_read(&cinode->lock_sem);
907
908         exist = cifs_find_lock_conflict(cfile, offset, length, type,
909                                         &conf_lock, CIFS_LOCK_OP);
910         if (exist) {
911                 flock->fl_start = conf_lock->offset;
912                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
913                 flock->fl_pid = conf_lock->pid;
914                 if (conf_lock->type & server->vals->shared_lock_type)
915                         flock->fl_type = F_RDLCK;
916                 else
917                         flock->fl_type = F_WRLCK;
918         } else if (!cinode->can_cache_brlcks)
919                 rc = 1;
920         else
921                 flock->fl_type = F_UNLCK;
922
923         up_read(&cinode->lock_sem);
924         return rc;
925 }
926
927 static void
928 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
929 {
930         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
931         down_write(&cinode->lock_sem);
932         list_add_tail(&lock->llist, &cfile->llist->locks);
933         up_write(&cinode->lock_sem);
934 }
935
936 /*
937  * Set the byte-range lock (mandatory style). Returns:
938  * 1) 0, if we set the lock and don't need to request to the server;
939  * 2) 1, if no locks prevent us but we need to request to the server;
940  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
941  */
942 static int
943 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
944                  bool wait)
945 {
946         struct cifsLockInfo *conf_lock;
947         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
948         bool exist;
949         int rc = 0;
950
951 try_again:
952         exist = false;
953         down_write(&cinode->lock_sem);
954
955         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
956                                         lock->type, &conf_lock, CIFS_LOCK_OP);
957         if (!exist && cinode->can_cache_brlcks) {
958                 list_add_tail(&lock->llist, &cfile->llist->locks);
959                 up_write(&cinode->lock_sem);
960                 return rc;
961         }
962
963         if (!exist)
964                 rc = 1;
965         else if (!wait)
966                 rc = -EACCES;
967         else {
968                 list_add_tail(&lock->blist, &conf_lock->blist);
969                 up_write(&cinode->lock_sem);
970                 rc = wait_event_interruptible(lock->block_q,
971                                         (lock->blist.prev == &lock->blist) &&
972                                         (lock->blist.next == &lock->blist));
973                 if (!rc)
974                         goto try_again;
975                 down_write(&cinode->lock_sem);
976                 list_del_init(&lock->blist);
977         }
978
979         up_write(&cinode->lock_sem);
980         return rc;
981 }
982
983 /*
984  * Check if there is another lock that prevents us to set the lock (posix
985  * style). If such a lock exists, update the flock structure with its
986  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
987  * or leave it the same if we can't. Returns 0 if we don't need to request to
988  * the server or 1 otherwise.
989  */
990 static int
991 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
992 {
993         int rc = 0;
994         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
995         unsigned char saved_type = flock->fl_type;
996
997         if ((flock->fl_flags & FL_POSIX) == 0)
998                 return 1;
999
1000         down_read(&cinode->lock_sem);
1001         posix_test_lock(file, flock);
1002
1003         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1004                 flock->fl_type = saved_type;
1005                 rc = 1;
1006         }
1007
1008         up_read(&cinode->lock_sem);
1009         return rc;
1010 }
1011
1012 /*
1013  * Set the byte-range lock (posix style). Returns:
1014  * 1) 0, if we set the lock and don't need to request to the server;
1015  * 2) 1, if we need to request to the server;
1016  * 3) <0, if the error occurs while setting the lock.
1017  */
1018 static int
1019 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1020 {
1021         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1022         int rc = 1;
1023
1024         if ((flock->fl_flags & FL_POSIX) == 0)
1025                 return rc;
1026
1027 try_again:
1028         down_write(&cinode->lock_sem);
1029         if (!cinode->can_cache_brlcks) {
1030                 up_write(&cinode->lock_sem);
1031                 return rc;
1032         }
1033
1034         rc = posix_lock_file(file, flock, NULL);
1035         up_write(&cinode->lock_sem);
1036         if (rc == FILE_LOCK_DEFERRED) {
1037                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1038                 if (!rc)
1039                         goto try_again;
1040                 posix_unblock_lock(flock);
1041         }
1042         return rc;
1043 }
1044
1045 int
1046 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1047 {
1048         unsigned int xid;
1049         int rc = 0, stored_rc;
1050         struct cifsLockInfo *li, *tmp;
1051         struct cifs_tcon *tcon;
1052         unsigned int num, max_num, max_buf;
1053         LOCKING_ANDX_RANGE *buf, *cur;
1054         int types[] = {LOCKING_ANDX_LARGE_FILES,
1055                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1056         int i;
1057
1058         xid = get_xid();
1059         tcon = tlink_tcon(cfile->tlink);
1060
1061         /*
1062          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1063          * and check it for zero before using.
1064          */
1065         max_buf = tcon->ses->server->maxBuf;
1066         if (!max_buf) {
1067                 free_xid(xid);
1068                 return -EINVAL;
1069         }
1070
1071         max_num = (max_buf - sizeof(struct smb_hdr)) /
1072                                                 sizeof(LOCKING_ANDX_RANGE);
1073         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1074         if (!buf) {
1075                 free_xid(xid);
1076                 return -ENOMEM;
1077         }
1078
1079         for (i = 0; i < 2; i++) {
1080                 cur = buf;
1081                 num = 0;
1082                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1083                         if (li->type != types[i])
1084                                 continue;
1085                         cur->Pid = cpu_to_le16(li->pid);
1086                         cur->LengthLow = cpu_to_le32((u32)li->length);
1087                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1088                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1089                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1090                         if (++num == max_num) {
1091                                 stored_rc = cifs_lockv(xid, tcon,
1092                                                        cfile->fid.netfid,
1093                                                        (__u8)li->type, 0, num,
1094                                                        buf);
1095                                 if (stored_rc)
1096                                         rc = stored_rc;
1097                                 cur = buf;
1098                                 num = 0;
1099                         } else
1100                                 cur++;
1101                 }
1102
1103                 if (num) {
1104                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1105                                                (__u8)types[i], 0, num, buf);
1106                         if (stored_rc)
1107                                 rc = stored_rc;
1108                 }
1109         }
1110
1111         kfree(buf);
1112         free_xid(xid);
1113         return rc;
1114 }
1115
1116 struct lock_to_push {
1117         struct list_head llist;
1118         __u64 offset;
1119         __u64 length;
1120         __u32 pid;
1121         __u16 netfid;
1122         __u8 type;
1123 };
1124
1125 static int
1126 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1127 {
1128         struct inode *inode = d_inode(cfile->dentry);
1129         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1130         struct file_lock *flock;
1131         struct file_lock_context *flctx = inode->i_flctx;
1132         unsigned int count = 0, i;
1133         int rc = 0, xid, type;
1134         struct list_head locks_to_send, *el;
1135         struct lock_to_push *lck, *tmp;
1136         __u64 length;
1137
1138         xid = get_xid();
1139
1140         if (!flctx)
1141                 goto out;
1142
1143         spin_lock(&flctx->flc_lock);
1144         list_for_each(el, &flctx->flc_posix) {
1145                 count++;
1146         }
1147         spin_unlock(&flctx->flc_lock);
1148
1149         INIT_LIST_HEAD(&locks_to_send);
1150
1151         /*
1152          * Allocating count locks is enough because no FL_POSIX locks can be
1153          * added to the list while we are holding cinode->lock_sem that
1154          * protects locking operations of this inode.
1155          */
1156         for (i = 0; i < count; i++) {
1157                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1158                 if (!lck) {
1159                         rc = -ENOMEM;
1160                         goto err_out;
1161                 }
1162                 list_add_tail(&lck->llist, &locks_to_send);
1163         }
1164
1165         el = locks_to_send.next;
1166         spin_lock(&flctx->flc_lock);
1167         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1168                 if (el == &locks_to_send) {
1169                         /*
1170                          * The list ended. We don't have enough allocated
1171                          * structures - something is really wrong.
1172                          */
1173                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1174                         break;
1175                 }
1176                 length = 1 + flock->fl_end - flock->fl_start;
1177                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1178                         type = CIFS_RDLCK;
1179                 else
1180                         type = CIFS_WRLCK;
1181                 lck = list_entry(el, struct lock_to_push, llist);
1182                 lck->pid = flock->fl_pid;
1183                 lck->netfid = cfile->fid.netfid;
1184                 lck->length = length;
1185                 lck->type = type;
1186                 lck->offset = flock->fl_start;
1187         }
1188         spin_unlock(&flctx->flc_lock);
1189
1190         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1191                 int stored_rc;
1192
1193                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1194                                              lck->offset, lck->length, NULL,
1195                                              lck->type, 0);
1196                 if (stored_rc)
1197                         rc = stored_rc;
1198                 list_del(&lck->llist);
1199                 kfree(lck);
1200         }
1201
1202 out:
1203         free_xid(xid);
1204         return rc;
1205 err_out:
1206         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1207                 list_del(&lck->llist);
1208                 kfree(lck);
1209         }
1210         goto out;
1211 }
1212
1213 static int
1214 cifs_push_locks(struct cifsFileInfo *cfile)
1215 {
1216         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1217         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1218         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1219         int rc = 0;
1220
1221         /* we are going to update can_cache_brlcks here - need a write access */
1222         down_write(&cinode->lock_sem);
1223         if (!cinode->can_cache_brlcks) {
1224                 up_write(&cinode->lock_sem);
1225                 return rc;
1226         }
1227
1228         if (cap_unix(tcon->ses) &&
1229             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1230             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1231                 rc = cifs_push_posix_locks(cfile);
1232         else
1233                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1234
1235         cinode->can_cache_brlcks = false;
1236         up_write(&cinode->lock_sem);
1237         return rc;
1238 }
1239
1240 static void
1241 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1242                 bool *wait_flag, struct TCP_Server_Info *server)
1243 {
1244         if (flock->fl_flags & FL_POSIX)
1245                 cifs_dbg(FYI, "Posix\n");
1246         if (flock->fl_flags & FL_FLOCK)
1247                 cifs_dbg(FYI, "Flock\n");
1248         if (flock->fl_flags & FL_SLEEP) {
1249                 cifs_dbg(FYI, "Blocking lock\n");
1250                 *wait_flag = true;
1251         }
1252         if (flock->fl_flags & FL_ACCESS)
1253                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1254         if (flock->fl_flags & FL_LEASE)
1255                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1256         if (flock->fl_flags &
1257             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1258                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1259                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1260
1261         *type = server->vals->large_lock_type;
1262         if (flock->fl_type == F_WRLCK) {
1263                 cifs_dbg(FYI, "F_WRLCK\n");
1264                 *type |= server->vals->exclusive_lock_type;
1265                 *lock = 1;
1266         } else if (flock->fl_type == F_UNLCK) {
1267                 cifs_dbg(FYI, "F_UNLCK\n");
1268                 *type |= server->vals->unlock_lock_type;
1269                 *unlock = 1;
1270                 /* Check if unlock includes more than one lock range */
1271         } else if (flock->fl_type == F_RDLCK) {
1272                 cifs_dbg(FYI, "F_RDLCK\n");
1273                 *type |= server->vals->shared_lock_type;
1274                 *lock = 1;
1275         } else if (flock->fl_type == F_EXLCK) {
1276                 cifs_dbg(FYI, "F_EXLCK\n");
1277                 *type |= server->vals->exclusive_lock_type;
1278                 *lock = 1;
1279         } else if (flock->fl_type == F_SHLCK) {
1280                 cifs_dbg(FYI, "F_SHLCK\n");
1281                 *type |= server->vals->shared_lock_type;
1282                 *lock = 1;
1283         } else
1284                 cifs_dbg(FYI, "Unknown type of lock\n");
1285 }
1286
1287 static int
1288 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1289            bool wait_flag, bool posix_lck, unsigned int xid)
1290 {
1291         int rc = 0;
1292         __u64 length = 1 + flock->fl_end - flock->fl_start;
1293         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1294         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1295         struct TCP_Server_Info *server = tcon->ses->server;
1296         __u16 netfid = cfile->fid.netfid;
1297
1298         if (posix_lck) {
1299                 int posix_lock_type;
1300
1301                 rc = cifs_posix_lock_test(file, flock);
1302                 if (!rc)
1303                         return rc;
1304
1305                 if (type & server->vals->shared_lock_type)
1306                         posix_lock_type = CIFS_RDLCK;
1307                 else
1308                         posix_lock_type = CIFS_WRLCK;
1309                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1310                                       flock->fl_start, length, flock,
1311                                       posix_lock_type, wait_flag);
1312                 return rc;
1313         }
1314
1315         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1316         if (!rc)
1317                 return rc;
1318
1319         /* BB we could chain these into one lock request BB */
1320         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1321                                     1, 0, false);
1322         if (rc == 0) {
1323                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1324                                             type, 0, 1, false);
1325                 flock->fl_type = F_UNLCK;
1326                 if (rc != 0)
1327                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1328                                  rc);
1329                 return 0;
1330         }
1331
1332         if (type & server->vals->shared_lock_type) {
1333                 flock->fl_type = F_WRLCK;
1334                 return 0;
1335         }
1336
1337         type &= ~server->vals->exclusive_lock_type;
1338
1339         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1340                                     type | server->vals->shared_lock_type,
1341                                     1, 0, false);
1342         if (rc == 0) {
1343                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1344                         type | server->vals->shared_lock_type, 0, 1, false);
1345                 flock->fl_type = F_RDLCK;
1346                 if (rc != 0)
1347                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1348                                  rc);
1349         } else
1350                 flock->fl_type = F_WRLCK;
1351
1352         return 0;
1353 }
1354
1355 void
1356 cifs_move_llist(struct list_head *source, struct list_head *dest)
1357 {
1358         struct list_head *li, *tmp;
1359         list_for_each_safe(li, tmp, source)
1360                 list_move(li, dest);
1361 }
1362
1363 void
1364 cifs_free_llist(struct list_head *llist)
1365 {
1366         struct cifsLockInfo *li, *tmp;
1367         list_for_each_entry_safe(li, tmp, llist, llist) {
1368                 cifs_del_lock_waiters(li);
1369                 list_del(&li->llist);
1370                 kfree(li);
1371         }
1372 }
1373
1374 int
1375 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1376                   unsigned int xid)
1377 {
1378         int rc = 0, stored_rc;
1379         int types[] = {LOCKING_ANDX_LARGE_FILES,
1380                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1381         unsigned int i;
1382         unsigned int max_num, num, max_buf;
1383         LOCKING_ANDX_RANGE *buf, *cur;
1384         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1385         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1386         struct cifsLockInfo *li, *tmp;
1387         __u64 length = 1 + flock->fl_end - flock->fl_start;
1388         struct list_head tmp_llist;
1389
1390         INIT_LIST_HEAD(&tmp_llist);
1391
1392         /*
1393          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1394          * and check it for zero before using.
1395          */
1396         max_buf = tcon->ses->server->maxBuf;
1397         if (!max_buf)
1398                 return -EINVAL;
1399
1400         max_num = (max_buf - sizeof(struct smb_hdr)) /
1401                                                 sizeof(LOCKING_ANDX_RANGE);
1402         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1403         if (!buf)
1404                 return -ENOMEM;
1405
1406         down_write(&cinode->lock_sem);
1407         for (i = 0; i < 2; i++) {
1408                 cur = buf;
1409                 num = 0;
1410                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1411                         if (flock->fl_start > li->offset ||
1412                             (flock->fl_start + length) <
1413                             (li->offset + li->length))
1414                                 continue;
1415                         if (current->tgid != li->pid)
1416                                 continue;
1417                         if (types[i] != li->type)
1418                                 continue;
1419                         if (cinode->can_cache_brlcks) {
1420                                 /*
1421                                  * We can cache brlock requests - simply remove
1422                                  * a lock from the file's list.
1423                                  */
1424                                 list_del(&li->llist);
1425                                 cifs_del_lock_waiters(li);
1426                                 kfree(li);
1427                                 continue;
1428                         }
1429                         cur->Pid = cpu_to_le16(li->pid);
1430                         cur->LengthLow = cpu_to_le32((u32)li->length);
1431                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1432                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1433                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1434                         /*
1435                          * We need to save a lock here to let us add it again to
1436                          * the file's list if the unlock range request fails on
1437                          * the server.
1438                          */
1439                         list_move(&li->llist, &tmp_llist);
1440                         if (++num == max_num) {
1441                                 stored_rc = cifs_lockv(xid, tcon,
1442                                                        cfile->fid.netfid,
1443                                                        li->type, num, 0, buf);
1444                                 if (stored_rc) {
1445                                         /*
1446                                          * We failed on the unlock range
1447                                          * request - add all locks from the tmp
1448                                          * list to the head of the file's list.
1449                                          */
1450                                         cifs_move_llist(&tmp_llist,
1451                                                         &cfile->llist->locks);
1452                                         rc = stored_rc;
1453                                 } else
1454                                         /*
1455                                          * The unlock range request succeed -
1456                                          * free the tmp list.
1457                                          */
1458                                         cifs_free_llist(&tmp_llist);
1459                                 cur = buf;
1460                                 num = 0;
1461                         } else
1462                                 cur++;
1463                 }
1464                 if (num) {
1465                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1466                                                types[i], num, 0, buf);
1467                         if (stored_rc) {
1468                                 cifs_move_llist(&tmp_llist,
1469                                                 &cfile->llist->locks);
1470                                 rc = stored_rc;
1471                         } else
1472                                 cifs_free_llist(&tmp_llist);
1473                 }
1474         }
1475
1476         up_write(&cinode->lock_sem);
1477         kfree(buf);
1478         return rc;
1479 }
1480
1481 static int
1482 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1483            bool wait_flag, bool posix_lck, int lock, int unlock,
1484            unsigned int xid)
1485 {
1486         int rc = 0;
1487         __u64 length = 1 + flock->fl_end - flock->fl_start;
1488         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1489         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1490         struct TCP_Server_Info *server = tcon->ses->server;
1491         struct inode *inode = d_inode(cfile->dentry);
1492
1493         if (posix_lck) {
1494                 int posix_lock_type;
1495
1496                 rc = cifs_posix_lock_set(file, flock);
1497                 if (!rc || rc < 0)
1498                         return rc;
1499
1500                 if (type & server->vals->shared_lock_type)
1501                         posix_lock_type = CIFS_RDLCK;
1502                 else
1503                         posix_lock_type = CIFS_WRLCK;
1504
1505                 if (unlock == 1)
1506                         posix_lock_type = CIFS_UNLCK;
1507
1508                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1509                                       current->tgid, flock->fl_start, length,
1510                                       NULL, posix_lock_type, wait_flag);
1511                 goto out;
1512         }
1513
1514         if (lock) {
1515                 struct cifsLockInfo *lock;
1516
1517                 lock = cifs_lock_init(flock->fl_start, length, type);
1518                 if (!lock)
1519                         return -ENOMEM;
1520
1521                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1522                 if (rc < 0) {
1523                         kfree(lock);
1524                         return rc;
1525                 }
1526                 if (!rc)
1527                         goto out;
1528
1529                 /*
1530                  * Windows 7 server can delay breaking lease from read to None
1531                  * if we set a byte-range lock on a file - break it explicitly
1532                  * before sending the lock to the server to be sure the next
1533                  * read won't conflict with non-overlapted locks due to
1534                  * pagereading.
1535                  */
1536                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1537                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1538                         cifs_zap_mapping(inode);
1539                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1540                                  inode);
1541                         CIFS_I(inode)->oplock = 0;
1542                 }
1543
1544                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1545                                             type, 1, 0, wait_flag);
1546                 if (rc) {
1547                         kfree(lock);
1548                         return rc;
1549                 }
1550
1551                 cifs_lock_add(cfile, lock);
1552         } else if (unlock)
1553                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1554
1555 out:
1556         if (flock->fl_flags & FL_POSIX)
1557                 posix_lock_file_wait(file, flock);
1558         return rc;
1559 }
1560
1561 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1562 {
1563         int rc, xid;
1564         int lock = 0, unlock = 0;
1565         bool wait_flag = false;
1566         bool posix_lck = false;
1567         struct cifs_sb_info *cifs_sb;
1568         struct cifs_tcon *tcon;
1569         struct cifsInodeInfo *cinode;
1570         struct cifsFileInfo *cfile;
1571         __u16 netfid;
1572         __u32 type;
1573
1574         rc = -EACCES;
1575         xid = get_xid();
1576
1577         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1578                  cmd, flock->fl_flags, flock->fl_type,
1579                  flock->fl_start, flock->fl_end);
1580
1581         cfile = (struct cifsFileInfo *)file->private_data;
1582         tcon = tlink_tcon(cfile->tlink);
1583
1584         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1585                         tcon->ses->server);
1586
1587         cifs_sb = CIFS_FILE_SB(file);
1588         netfid = cfile->fid.netfid;
1589         cinode = CIFS_I(file_inode(file));
1590
1591         if (cap_unix(tcon->ses) &&
1592             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1593             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1594                 posix_lck = true;
1595         /*
1596          * BB add code here to normalize offset and length to account for
1597          * negative length which we can not accept over the wire.
1598          */
1599         if (IS_GETLK(cmd)) {
1600                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1601                 free_xid(xid);
1602                 return rc;
1603         }
1604
1605         if (!lock && !unlock) {
1606                 /*
1607                  * if no lock or unlock then nothing to do since we do not
1608                  * know what it is
1609                  */
1610                 free_xid(xid);
1611                 return -EOPNOTSUPP;
1612         }
1613
1614         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1615                         xid);
1616         free_xid(xid);
1617         return rc;
1618 }
1619
1620 /*
1621  * update the file size (if needed) after a write. Should be called with
1622  * the inode->i_lock held
1623  */
1624 void
1625 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1626                       unsigned int bytes_written)
1627 {
1628         loff_t end_of_write = offset + bytes_written;
1629
1630         if (end_of_write > cifsi->server_eof)
1631                 cifsi->server_eof = end_of_write;
1632 }
1633
1634 static ssize_t
1635 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1636            size_t write_size, loff_t *offset)
1637 {
1638         int rc = 0;
1639         unsigned int bytes_written = 0;
1640         unsigned int total_written;
1641         struct cifs_sb_info *cifs_sb;
1642         struct cifs_tcon *tcon;
1643         struct TCP_Server_Info *server;
1644         unsigned int xid;
1645         struct dentry *dentry = open_file->dentry;
1646         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1647         struct cifs_io_parms io_parms;
1648
1649         cifs_sb = CIFS_SB(dentry->d_sb);
1650
1651         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1652                  write_size, *offset, dentry);
1653
1654         tcon = tlink_tcon(open_file->tlink);
1655         server = tcon->ses->server;
1656
1657         if (!server->ops->sync_write)
1658                 return -ENOSYS;
1659
1660         xid = get_xid();
1661
1662         for (total_written = 0; write_size > total_written;
1663              total_written += bytes_written) {
1664                 rc = -EAGAIN;
1665                 while (rc == -EAGAIN) {
1666                         struct kvec iov[2];
1667                         unsigned int len;
1668
1669                         if (open_file->invalidHandle) {
1670                                 /* we could deadlock if we called
1671                                    filemap_fdatawait from here so tell
1672                                    reopen_file not to flush data to
1673                                    server now */
1674                                 rc = cifs_reopen_file(open_file, false);
1675                                 if (rc != 0)
1676                                         break;
1677                         }
1678
1679                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1680                                   (unsigned int)write_size - total_written);
1681                         /* iov[0] is reserved for smb header */
1682                         iov[1].iov_base = (char *)write_data + total_written;
1683                         iov[1].iov_len = len;
1684                         io_parms.pid = pid;
1685                         io_parms.tcon = tcon;
1686                         io_parms.offset = *offset;
1687                         io_parms.length = len;
1688                         rc = server->ops->sync_write(xid, &open_file->fid,
1689                                         &io_parms, &bytes_written, iov, 1);
1690                 }
1691                 if (rc || (bytes_written == 0)) {
1692                         if (total_written)
1693                                 break;
1694                         else {
1695                                 free_xid(xid);
1696                                 return rc;
1697                         }
1698                 } else {
1699                         spin_lock(&d_inode(dentry)->i_lock);
1700                         cifs_update_eof(cifsi, *offset, bytes_written);
1701                         spin_unlock(&d_inode(dentry)->i_lock);
1702                         *offset += bytes_written;
1703                 }
1704         }
1705
1706         cifs_stats_bytes_written(tcon, total_written);
1707
1708         if (total_written > 0) {
1709                 spin_lock(&d_inode(dentry)->i_lock);
1710                 if (*offset > d_inode(dentry)->i_size)
1711                         i_size_write(d_inode(dentry), *offset);
1712                 spin_unlock(&d_inode(dentry)->i_lock);
1713         }
1714         mark_inode_dirty_sync(d_inode(dentry));
1715         free_xid(xid);
1716         return total_written;
1717 }
1718
1719 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1720                                         bool fsuid_only)
1721 {
1722         struct cifsFileInfo *open_file = NULL;
1723         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1724
1725         /* only filter by fsuid on multiuser mounts */
1726         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1727                 fsuid_only = false;
1728
1729         spin_lock(&cifs_file_list_lock);
1730         /* we could simply get the first_list_entry since write-only entries
1731            are always at the end of the list but since the first entry might
1732            have a close pending, we go through the whole list */
1733         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1734                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1735                         continue;
1736                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1737                         if (!open_file->invalidHandle) {
1738                                 /* found a good file */
1739                                 /* lock it so it will not be closed on us */
1740                                 cifsFileInfo_get_locked(open_file);
1741                                 spin_unlock(&cifs_file_list_lock);
1742                                 return open_file;
1743                         } /* else might as well continue, and look for
1744                              another, or simply have the caller reopen it
1745                              again rather than trying to fix this handle */
1746                 } else /* write only file */
1747                         break; /* write only files are last so must be done */
1748         }
1749         spin_unlock(&cifs_file_list_lock);
1750         return NULL;
1751 }
1752
1753 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1754                                         bool fsuid_only)
1755 {
1756         struct cifsFileInfo *open_file, *inv_file = NULL;
1757         struct cifs_sb_info *cifs_sb;
1758         bool any_available = false;
1759         int rc;
1760         unsigned int refind = 0;
1761
1762         /* Having a null inode here (because mapping->host was set to zero by
1763         the VFS or MM) should not happen but we had reports of on oops (due to
1764         it being zero) during stress testcases so we need to check for it */
1765
1766         if (cifs_inode == NULL) {
1767                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1768                 dump_stack();
1769                 return NULL;
1770         }
1771
1772         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1773
1774         /* only filter by fsuid on multiuser mounts */
1775         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1776                 fsuid_only = false;
1777
1778         spin_lock(&cifs_file_list_lock);
1779 refind_writable:
1780         if (refind > MAX_REOPEN_ATT) {
1781                 spin_unlock(&cifs_file_list_lock);
1782                 return NULL;
1783         }
1784         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1785                 if (!any_available && open_file->pid != current->tgid)
1786                         continue;
1787                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1788                         continue;
1789                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1790                         if (!open_file->invalidHandle) {
1791                                 /* found a good writable file */
1792                                 cifsFileInfo_get_locked(open_file);
1793                                 spin_unlock(&cifs_file_list_lock);
1794                                 return open_file;
1795                         } else {
1796                                 if (!inv_file)
1797                                         inv_file = open_file;
1798                         }
1799                 }
1800         }
1801         /* couldn't find useable FH with same pid, try any available */
1802         if (!any_available) {
1803                 any_available = true;
1804                 goto refind_writable;
1805         }
1806
1807         if (inv_file) {
1808                 any_available = false;
1809                 cifsFileInfo_get_locked(inv_file);
1810         }
1811
1812         spin_unlock(&cifs_file_list_lock);
1813
1814         if (inv_file) {
1815                 rc = cifs_reopen_file(inv_file, false);
1816                 if (!rc)
1817                         return inv_file;
1818                 else {
1819                         spin_lock(&cifs_file_list_lock);
1820                         list_move_tail(&inv_file->flist,
1821                                         &cifs_inode->openFileList);
1822                         spin_unlock(&cifs_file_list_lock);
1823                         cifsFileInfo_put(inv_file);
1824                         spin_lock(&cifs_file_list_lock);
1825                         ++refind;
1826                         inv_file = NULL;
1827                         goto refind_writable;
1828                 }
1829         }
1830
1831         return NULL;
1832 }
1833
1834 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1835 {
1836         struct address_space *mapping = page->mapping;
1837         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1838         char *write_data;
1839         int rc = -EFAULT;
1840         int bytes_written = 0;
1841         struct inode *inode;
1842         struct cifsFileInfo *open_file;
1843
1844         if (!mapping || !mapping->host)
1845                 return -EFAULT;
1846
1847         inode = page->mapping->host;
1848
1849         offset += (loff_t)from;
1850         write_data = kmap(page);
1851         write_data += from;
1852
1853         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1854                 kunmap(page);
1855                 return -EIO;
1856         }
1857
1858         /* racing with truncate? */
1859         if (offset > mapping->host->i_size) {
1860                 kunmap(page);
1861                 return 0; /* don't care */
1862         }
1863
1864         /* check to make sure that we are not extending the file */
1865         if (mapping->host->i_size - offset < (loff_t)to)
1866                 to = (unsigned)(mapping->host->i_size - offset);
1867
1868         open_file = find_writable_file(CIFS_I(mapping->host), false);
1869         if (open_file) {
1870                 bytes_written = cifs_write(open_file, open_file->pid,
1871                                            write_data, to - from, &offset);
1872                 cifsFileInfo_put(open_file);
1873                 /* Does mm or vfs already set times? */
1874                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1875                 if ((bytes_written > 0) && (offset))
1876                         rc = 0;
1877                 else if (bytes_written < 0)
1878                         rc = bytes_written;
1879         } else {
1880                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1881                 rc = -EIO;
1882         }
1883
1884         kunmap(page);
1885         return rc;
1886 }
1887
1888 static struct cifs_writedata *
1889 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1890                           pgoff_t end, pgoff_t *index,
1891                           unsigned int *found_pages)
1892 {
1893         unsigned int nr_pages;
1894         struct page **pages;
1895         struct cifs_writedata *wdata;
1896
1897         wdata = cifs_writedata_alloc((unsigned int)tofind,
1898                                      cifs_writev_complete);
1899         if (!wdata)
1900                 return NULL;
1901
1902         /*
1903          * find_get_pages_tag seems to return a max of 256 on each
1904          * iteration, so we must call it several times in order to
1905          * fill the array or the wsize is effectively limited to
1906          * 256 * PAGE_CACHE_SIZE.
1907          */
1908         *found_pages = 0;
1909         pages = wdata->pages;
1910         do {
1911                 nr_pages = find_get_pages_tag(mapping, index,
1912                                               PAGECACHE_TAG_DIRTY, tofind,
1913                                               pages);
1914                 *found_pages += nr_pages;
1915                 tofind -= nr_pages;
1916                 pages += nr_pages;
1917         } while (nr_pages && tofind && *index <= end);
1918
1919         return wdata;
1920 }
1921
1922 static unsigned int
1923 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1924                     struct address_space *mapping,
1925                     struct writeback_control *wbc,
1926                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1927 {
1928         unsigned int nr_pages = 0, i;
1929         struct page *page;
1930
1931         for (i = 0; i < found_pages; i++) {
1932                 page = wdata->pages[i];
1933                 /*
1934                  * At this point we hold neither mapping->tree_lock nor
1935                  * lock on the page itself: the page may be truncated or
1936                  * invalidated (changing page->mapping to NULL), or even
1937                  * swizzled back from swapper_space to tmpfs file
1938                  * mapping
1939                  */
1940
1941                 if (nr_pages == 0)
1942                         lock_page(page);
1943                 else if (!trylock_page(page))
1944                         break;
1945
1946                 if (unlikely(page->mapping != mapping)) {
1947                         unlock_page(page);
1948                         break;
1949                 }
1950
1951                 if (!wbc->range_cyclic && page->index > end) {
1952                         *done = true;
1953                         unlock_page(page);
1954                         break;
1955                 }
1956
1957                 if (*next && (page->index != *next)) {
1958                         /* Not next consecutive page */
1959                         unlock_page(page);
1960                         break;
1961                 }
1962
1963                 if (wbc->sync_mode != WB_SYNC_NONE)
1964                         wait_on_page_writeback(page);
1965
1966                 if (PageWriteback(page) ||
1967                                 !clear_page_dirty_for_io(page)) {
1968                         unlock_page(page);
1969                         break;
1970                 }
1971
1972                 /*
1973                  * This actually clears the dirty bit in the radix tree.
1974                  * See cifs_writepage() for more commentary.
1975                  */
1976                 set_page_writeback(page);
1977                 if (page_offset(page) >= i_size_read(mapping->host)) {
1978                         *done = true;
1979                         unlock_page(page);
1980                         end_page_writeback(page);
1981                         break;
1982                 }
1983
1984                 wdata->pages[i] = page;
1985                 *next = page->index + 1;
1986                 ++nr_pages;
1987         }
1988
1989         /* reset index to refind any pages skipped */
1990         if (nr_pages == 0)
1991                 *index = wdata->pages[0]->index + 1;
1992
1993         /* put any pages we aren't going to use */
1994         for (i = nr_pages; i < found_pages; i++) {
1995                 page_cache_release(wdata->pages[i]);
1996                 wdata->pages[i] = NULL;
1997         }
1998
1999         return nr_pages;
2000 }
2001
2002 static int
2003 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2004                  struct address_space *mapping, struct writeback_control *wbc)
2005 {
2006         int rc = 0;
2007         struct TCP_Server_Info *server;
2008         unsigned int i;
2009
2010         wdata->sync_mode = wbc->sync_mode;
2011         wdata->nr_pages = nr_pages;
2012         wdata->offset = page_offset(wdata->pages[0]);
2013         wdata->pagesz = PAGE_CACHE_SIZE;
2014         wdata->tailsz = min(i_size_read(mapping->host) -
2015                         page_offset(wdata->pages[nr_pages - 1]),
2016                         (loff_t)PAGE_CACHE_SIZE);
2017         wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz;
2018
2019         if (wdata->cfile != NULL)
2020                 cifsFileInfo_put(wdata->cfile);
2021         wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2022         if (!wdata->cfile) {
2023                 cifs_dbg(VFS, "No writable handles for inode\n");
2024                 rc = -EBADF;
2025         } else {
2026                 wdata->pid = wdata->cfile->pid;
2027                 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2028                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2029         }
2030
2031         for (i = 0; i < nr_pages; ++i)
2032                 unlock_page(wdata->pages[i]);
2033
2034         return rc;
2035 }
2036
2037 static int cifs_writepages(struct address_space *mapping,
2038                            struct writeback_control *wbc)
2039 {
2040         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2041         struct TCP_Server_Info *server;
2042         bool done = false, scanned = false, range_whole = false;
2043         pgoff_t end, index;
2044         struct cifs_writedata *wdata;
2045         int rc = 0;
2046
2047         /*
2048          * If wsize is smaller than the page cache size, default to writing
2049          * one page at a time via cifs_writepage
2050          */
2051         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
2052                 return generic_writepages(mapping, wbc);
2053
2054         if (wbc->range_cyclic) {
2055                 index = mapping->writeback_index; /* Start from prev offset */
2056                 end = -1;
2057         } else {
2058                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2059                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2060                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2061                         range_whole = true;
2062                 scanned = true;
2063         }
2064         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2065 retry:
2066         while (!done && index <= end) {
2067                 unsigned int i, nr_pages, found_pages, wsize, credits;
2068                 pgoff_t next = 0, tofind, saved_index = index;
2069
2070                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2071                                                    &wsize, &credits);
2072                 if (rc)
2073                         break;
2074
2075                 tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1;
2076
2077                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2078                                                   &found_pages);
2079                 if (!wdata) {
2080                         rc = -ENOMEM;
2081                         add_credits_and_wake_if(server, credits, 0);
2082                         break;
2083                 }
2084
2085                 if (found_pages == 0) {
2086                         kref_put(&wdata->refcount, cifs_writedata_release);
2087                         add_credits_and_wake_if(server, credits, 0);
2088                         break;
2089                 }
2090
2091                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2092                                                end, &index, &next, &done);
2093
2094                 /* nothing to write? */
2095                 if (nr_pages == 0) {
2096                         kref_put(&wdata->refcount, cifs_writedata_release);
2097                         add_credits_and_wake_if(server, credits, 0);
2098                         continue;
2099                 }
2100
2101                 wdata->credits = credits;
2102
2103                 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2104
2105                 /* send failure -- clean up the mess */
2106                 if (rc != 0) {
2107                         add_credits_and_wake_if(server, wdata->credits, 0);
2108                         for (i = 0; i < nr_pages; ++i) {
2109                                 if (rc == -EAGAIN)
2110                                         redirty_page_for_writepage(wbc,
2111                                                            wdata->pages[i]);
2112                                 else
2113                                         SetPageError(wdata->pages[i]);
2114                                 end_page_writeback(wdata->pages[i]);
2115                                 page_cache_release(wdata->pages[i]);
2116                         }
2117                         if (rc != -EAGAIN)
2118                                 mapping_set_error(mapping, rc);
2119                 }
2120                 kref_put(&wdata->refcount, cifs_writedata_release);
2121
2122                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2123                         index = saved_index;
2124                         continue;
2125                 }
2126
2127                 wbc->nr_to_write -= nr_pages;
2128                 if (wbc->nr_to_write <= 0)
2129                         done = true;
2130
2131                 index = next;
2132         }
2133
2134         if (!scanned && !done) {
2135                 /*
2136                  * We hit the last page and there is more work to be done: wrap
2137                  * back to the start of the file
2138                  */
2139                 scanned = true;
2140                 index = 0;
2141                 goto retry;
2142         }
2143
2144         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2145                 mapping->writeback_index = index;
2146
2147         return rc;
2148 }
2149
2150 static int
2151 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2152 {
2153         int rc;
2154         unsigned int xid;
2155
2156         xid = get_xid();
2157 /* BB add check for wbc flags */
2158         page_cache_get(page);
2159         if (!PageUptodate(page))
2160                 cifs_dbg(FYI, "ppw - page not up to date\n");
2161
2162         /*
2163          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2164          *
2165          * A writepage() implementation always needs to do either this,
2166          * or re-dirty the page with "redirty_page_for_writepage()" in
2167          * the case of a failure.
2168          *
2169          * Just unlocking the page will cause the radix tree tag-bits
2170          * to fail to update with the state of the page correctly.
2171          */
2172         set_page_writeback(page);
2173 retry_write:
2174         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2175         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2176                 goto retry_write;
2177         else if (rc == -EAGAIN)
2178                 redirty_page_for_writepage(wbc, page);
2179         else if (rc != 0)
2180                 SetPageError(page);
2181         else
2182                 SetPageUptodate(page);
2183         end_page_writeback(page);
2184         page_cache_release(page);
2185         free_xid(xid);
2186         return rc;
2187 }
2188
2189 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2190 {
2191         int rc = cifs_writepage_locked(page, wbc);
2192         unlock_page(page);
2193         return rc;
2194 }
2195
2196 static int cifs_write_end(struct file *file, struct address_space *mapping,
2197                         loff_t pos, unsigned len, unsigned copied,
2198                         struct page *page, void *fsdata)
2199 {
2200         int rc;
2201         struct inode *inode = mapping->host;
2202         struct cifsFileInfo *cfile = file->private_data;
2203         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2204         __u32 pid;
2205
2206         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2207                 pid = cfile->pid;
2208         else
2209                 pid = current->tgid;
2210
2211         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2212                  page, pos, copied);
2213
2214         if (PageChecked(page)) {
2215                 if (copied == len)
2216                         SetPageUptodate(page);
2217                 ClearPageChecked(page);
2218         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2219                 SetPageUptodate(page);
2220
2221         if (!PageUptodate(page)) {
2222                 char *page_data;
2223                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2224                 unsigned int xid;
2225
2226                 xid = get_xid();
2227                 /* this is probably better than directly calling
2228                    partialpage_write since in this function the file handle is
2229                    known which we might as well leverage */
2230                 /* BB check if anything else missing out of ppw
2231                    such as updating last write time */
2232                 page_data = kmap(page);
2233                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2234                 /* if (rc < 0) should we set writebehind rc? */
2235                 kunmap(page);
2236
2237                 free_xid(xid);
2238         } else {
2239                 rc = copied;
2240                 pos += copied;
2241                 set_page_dirty(page);
2242         }
2243
2244         if (rc > 0) {
2245                 spin_lock(&inode->i_lock);
2246                 if (pos > inode->i_size)
2247                         i_size_write(inode, pos);
2248                 spin_unlock(&inode->i_lock);
2249         }
2250
2251         unlock_page(page);
2252         page_cache_release(page);
2253
2254         return rc;
2255 }
2256
2257 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2258                       int datasync)
2259 {
2260         unsigned int xid;
2261         int rc = 0;
2262         struct cifs_tcon *tcon;
2263         struct TCP_Server_Info *server;
2264         struct cifsFileInfo *smbfile = file->private_data;
2265         struct inode *inode = file_inode(file);
2266         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2267
2268         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2269         if (rc)
2270                 return rc;
2271         mutex_lock(&inode->i_mutex);
2272
2273         xid = get_xid();
2274
2275         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2276                  file, datasync);
2277
2278         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2279                 rc = cifs_zap_mapping(inode);
2280                 if (rc) {
2281                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2282                         rc = 0; /* don't care about it in fsync */
2283                 }
2284         }
2285
2286         tcon = tlink_tcon(smbfile->tlink);
2287         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2288                 server = tcon->ses->server;
2289                 if (server->ops->flush)
2290                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2291                 else
2292                         rc = -ENOSYS;
2293         }
2294
2295         free_xid(xid);
2296         mutex_unlock(&inode->i_mutex);
2297         return rc;
2298 }
2299
2300 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2301 {
2302         unsigned int xid;
2303         int rc = 0;
2304         struct cifs_tcon *tcon;
2305         struct TCP_Server_Info *server;
2306         struct cifsFileInfo *smbfile = file->private_data;
2307         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2308         struct inode *inode = file->f_mapping->host;
2309
2310         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2311         if (rc)
2312                 return rc;
2313         mutex_lock(&inode->i_mutex);
2314
2315         xid = get_xid();
2316
2317         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2318                  file, datasync);
2319
2320         tcon = tlink_tcon(smbfile->tlink);
2321         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2322                 server = tcon->ses->server;
2323                 if (server->ops->flush)
2324                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2325                 else
2326                         rc = -ENOSYS;
2327         }
2328
2329         free_xid(xid);
2330         mutex_unlock(&inode->i_mutex);
2331         return rc;
2332 }
2333
2334 /*
2335  * As file closes, flush all cached write data for this inode checking
2336  * for write behind errors.
2337  */
2338 int cifs_flush(struct file *file, fl_owner_t id)
2339 {
2340         struct inode *inode = file_inode(file);
2341         int rc = 0;
2342
2343         if (file->f_mode & FMODE_WRITE)
2344                 rc = filemap_write_and_wait(inode->i_mapping);
2345
2346         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2347
2348         return rc;
2349 }
2350
2351 static int
2352 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2353 {
2354         int rc = 0;
2355         unsigned long i;
2356
2357         for (i = 0; i < num_pages; i++) {
2358                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2359                 if (!pages[i]) {
2360                         /*
2361                          * save number of pages we have already allocated and
2362                          * return with ENOMEM error
2363                          */
2364                         num_pages = i;
2365                         rc = -ENOMEM;
2366                         break;
2367                 }
2368         }
2369
2370         if (rc) {
2371                 for (i = 0; i < num_pages; i++)
2372                         put_page(pages[i]);
2373         }
2374         return rc;
2375 }
2376
2377 static inline
2378 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2379 {
2380         size_t num_pages;
2381         size_t clen;
2382
2383         clen = min_t(const size_t, len, wsize);
2384         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2385
2386         if (cur_len)
2387                 *cur_len = clen;
2388
2389         return num_pages;
2390 }
2391
2392 static void
2393 cifs_uncached_writedata_release(struct kref *refcount)
2394 {
2395         int i;
2396         struct cifs_writedata *wdata = container_of(refcount,
2397                                         struct cifs_writedata, refcount);
2398
2399         for (i = 0; i < wdata->nr_pages; i++)
2400                 put_page(wdata->pages[i]);
2401         cifs_writedata_release(refcount);
2402 }
2403
2404 static void
2405 cifs_uncached_writev_complete(struct work_struct *work)
2406 {
2407         struct cifs_writedata *wdata = container_of(work,
2408                                         struct cifs_writedata, work);
2409         struct inode *inode = d_inode(wdata->cfile->dentry);
2410         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2411
2412         spin_lock(&inode->i_lock);
2413         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2414         if (cifsi->server_eof > inode->i_size)
2415                 i_size_write(inode, cifsi->server_eof);
2416         spin_unlock(&inode->i_lock);
2417
2418         complete(&wdata->done);
2419
2420         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2421 }
2422
2423 static int
2424 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2425                       size_t *len, unsigned long *num_pages)
2426 {
2427         size_t save_len, copied, bytes, cur_len = *len;
2428         unsigned long i, nr_pages = *num_pages;
2429
2430         save_len = cur_len;
2431         for (i = 0; i < nr_pages; i++) {
2432                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2433                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2434                 cur_len -= copied;
2435                 /*
2436                  * If we didn't copy as much as we expected, then that
2437                  * may mean we trod into an unmapped area. Stop copying
2438                  * at that point. On the next pass through the big
2439                  * loop, we'll likely end up getting a zero-length
2440                  * write and bailing out of it.
2441                  */
2442                 if (copied < bytes)
2443                         break;
2444         }
2445         cur_len = save_len - cur_len;
2446         *len = cur_len;
2447
2448         /*
2449          * If we have no data to send, then that probably means that
2450          * the copy above failed altogether. That's most likely because
2451          * the address in the iovec was bogus. Return -EFAULT and let
2452          * the caller free anything we allocated and bail out.
2453          */
2454         if (!cur_len)
2455                 return -EFAULT;
2456
2457         /*
2458          * i + 1 now represents the number of pages we actually used in
2459          * the copy phase above.
2460          */
2461         *num_pages = i + 1;
2462         return 0;
2463 }
2464
2465 static int
2466 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2467                      struct cifsFileInfo *open_file,
2468                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2469 {
2470         int rc = 0;
2471         size_t cur_len;
2472         unsigned long nr_pages, num_pages, i;
2473         struct cifs_writedata *wdata;
2474         struct iov_iter saved_from;
2475         loff_t saved_offset = offset;
2476         pid_t pid;
2477         struct TCP_Server_Info *server;
2478
2479         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2480                 pid = open_file->pid;
2481         else
2482                 pid = current->tgid;
2483
2484         server = tlink_tcon(open_file->tlink)->ses->server;
2485         memcpy(&saved_from, from, sizeof(struct iov_iter));
2486
2487         do {
2488                 unsigned int wsize, credits;
2489
2490                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2491                                                    &wsize, &credits);
2492                 if (rc)
2493                         break;
2494
2495                 nr_pages = get_numpages(wsize, len, &cur_len);
2496                 wdata = cifs_writedata_alloc(nr_pages,
2497                                              cifs_uncached_writev_complete);
2498                 if (!wdata) {
2499                         rc = -ENOMEM;
2500                         add_credits_and_wake_if(server, credits, 0);
2501                         break;
2502                 }
2503
2504                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2505                 if (rc) {
2506                         kfree(wdata);
2507                         add_credits_and_wake_if(server, credits, 0);
2508                         break;
2509                 }
2510
2511                 num_pages = nr_pages;
2512                 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2513                 if (rc) {
2514                         for (i = 0; i < nr_pages; i++)
2515                                 put_page(wdata->pages[i]);
2516                         kfree(wdata);
2517                         add_credits_and_wake_if(server, credits, 0);
2518                         break;
2519                 }
2520
2521                 /*
2522                  * Bring nr_pages down to the number of pages we actually used,
2523                  * and free any pages that we didn't use.
2524                  */
2525                 for ( ; nr_pages > num_pages; nr_pages--)
2526                         put_page(wdata->pages[nr_pages - 1]);
2527
2528                 wdata->sync_mode = WB_SYNC_ALL;
2529                 wdata->nr_pages = nr_pages;
2530                 wdata->offset = (__u64)offset;
2531                 wdata->cfile = cifsFileInfo_get(open_file);
2532                 wdata->pid = pid;
2533                 wdata->bytes = cur_len;
2534                 wdata->pagesz = PAGE_SIZE;
2535                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2536                 wdata->credits = credits;
2537
2538                 if (!wdata->cfile->invalidHandle ||
2539                     !cifs_reopen_file(wdata->cfile, false))
2540                         rc = server->ops->async_writev(wdata,
2541                                         cifs_uncached_writedata_release);
2542                 if (rc) {
2543                         add_credits_and_wake_if(server, wdata->credits, 0);
2544                         kref_put(&wdata->refcount,
2545                                  cifs_uncached_writedata_release);
2546                         if (rc == -EAGAIN) {
2547                                 memcpy(from, &saved_from,
2548                                        sizeof(struct iov_iter));
2549                                 iov_iter_advance(from, offset - saved_offset);
2550                                 continue;
2551                         }
2552                         break;
2553                 }
2554
2555                 list_add_tail(&wdata->list, wdata_list);
2556                 offset += cur_len;
2557                 len -= cur_len;
2558         } while (len > 0);
2559
2560         return rc;
2561 }
2562
2563 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2564 {
2565         struct file *file = iocb->ki_filp;
2566         ssize_t total_written = 0;
2567         struct cifsFileInfo *open_file;
2568         struct cifs_tcon *tcon;
2569         struct cifs_sb_info *cifs_sb;
2570         struct cifs_writedata *wdata, *tmp;
2571         struct list_head wdata_list;
2572         struct iov_iter saved_from;
2573         int rc;
2574
2575         /*
2576          * BB - optimize the way when signing is disabled. We can drop this
2577          * extra memory-to-memory copying and use iovec buffers for constructing
2578          * write request.
2579          */
2580
2581         rc = generic_write_checks(iocb, from);
2582         if (rc <= 0)
2583                 return rc;
2584
2585         INIT_LIST_HEAD(&wdata_list);
2586         cifs_sb = CIFS_FILE_SB(file);
2587         open_file = file->private_data;
2588         tcon = tlink_tcon(open_file->tlink);
2589
2590         if (!tcon->ses->server->ops->async_writev)
2591                 return -ENOSYS;
2592
2593         memcpy(&saved_from, from, sizeof(struct iov_iter));
2594
2595         rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
2596                                   open_file, cifs_sb, &wdata_list);
2597
2598         /*
2599          * If at least one write was successfully sent, then discard any rc
2600          * value from the later writes. If the other write succeeds, then
2601          * we'll end up returning whatever was written. If it fails, then
2602          * we'll get a new rc value from that.
2603          */
2604         if (!list_empty(&wdata_list))
2605                 rc = 0;
2606
2607         /*
2608          * Wait for and collect replies for any successful sends in order of
2609          * increasing offset. Once an error is hit or we get a fatal signal
2610          * while waiting, then return without waiting for any more replies.
2611          */
2612 restart_loop:
2613         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2614                 if (!rc) {
2615                         /* FIXME: freezable too? */
2616                         rc = wait_for_completion_killable(&wdata->done);
2617                         if (rc)
2618                                 rc = -EINTR;
2619                         else if (wdata->result)
2620                                 rc = wdata->result;
2621                         else
2622                                 total_written += wdata->bytes;
2623
2624                         /* resend call if it's a retryable error */
2625                         if (rc == -EAGAIN) {
2626                                 struct list_head tmp_list;
2627                                 struct iov_iter tmp_from;
2628
2629                                 INIT_LIST_HEAD(&tmp_list);
2630                                 list_del_init(&wdata->list);
2631
2632                                 memcpy(&tmp_from, &saved_from,
2633                                        sizeof(struct iov_iter));
2634                                 iov_iter_advance(&tmp_from,
2635                                                  wdata->offset - iocb->ki_pos);
2636
2637                                 rc = cifs_write_from_iter(wdata->offset,
2638                                                 wdata->bytes, &tmp_from,
2639                                                 open_file, cifs_sb, &tmp_list);
2640
2641                                 list_splice(&tmp_list, &wdata_list);
2642
2643                                 kref_put(&wdata->refcount,
2644                                          cifs_uncached_writedata_release);
2645                                 goto restart_loop;
2646                         }
2647                 }
2648                 list_del_init(&wdata->list);
2649                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2650         }
2651
2652         if (unlikely(!total_written))
2653                 return rc;
2654
2655         iocb->ki_pos += total_written;
2656         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
2657         cifs_stats_bytes_written(tcon, total_written);
2658         return total_written;
2659 }
2660
2661 static ssize_t
2662 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2663 {
2664         struct file *file = iocb->ki_filp;
2665         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2666         struct inode *inode = file->f_mapping->host;
2667         struct cifsInodeInfo *cinode = CIFS_I(inode);
2668         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2669         ssize_t rc;
2670
2671         /*
2672          * We need to hold the sem to be sure nobody modifies lock list
2673          * with a brlock that prevents writing.
2674          */
2675         down_read(&cinode->lock_sem);
2676         mutex_lock(&inode->i_mutex);
2677
2678         rc = generic_write_checks(iocb, from);
2679         if (rc <= 0)
2680                 goto out;
2681
2682         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2683                                      server->vals->exclusive_lock_type, NULL,
2684                                      CIFS_WRITE_OP))
2685                 rc = __generic_file_write_iter(iocb, from);
2686         else
2687                 rc = -EACCES;
2688 out:
2689         mutex_unlock(&inode->i_mutex);
2690
2691         if (rc > 0) {
2692                 ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc);
2693                 if (err < 0)
2694                         rc = err;
2695         }
2696         up_read(&cinode->lock_sem);
2697         return rc;
2698 }
2699
2700 ssize_t
2701 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2702 {
2703         struct inode *inode = file_inode(iocb->ki_filp);
2704         struct cifsInodeInfo *cinode = CIFS_I(inode);
2705         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2706         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2707                                                 iocb->ki_filp->private_data;
2708         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2709         ssize_t written;
2710
2711         written = cifs_get_writer(cinode);
2712         if (written)
2713                 return written;
2714
2715         if (CIFS_CACHE_WRITE(cinode)) {
2716                 if (cap_unix(tcon->ses) &&
2717                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2718                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2719                         written = generic_file_write_iter(iocb, from);
2720                         goto out;
2721                 }
2722                 written = cifs_writev(iocb, from);
2723                 goto out;
2724         }
2725         /*
2726          * For non-oplocked files in strict cache mode we need to write the data
2727          * to the server exactly from the pos to pos+len-1 rather than flush all
2728          * affected pages because it may cause a error with mandatory locks on
2729          * these pages but not on the region from pos to ppos+len-1.
2730          */
2731         written = cifs_user_writev(iocb, from);
2732         if (written > 0 && CIFS_CACHE_READ(cinode)) {
2733                 /*
2734                  * Windows 7 server can delay breaking level2 oplock if a write
2735                  * request comes - break it on the client to prevent reading
2736                  * an old data.
2737                  */
2738                 cifs_zap_mapping(inode);
2739                 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2740                          inode);
2741                 cinode->oplock = 0;
2742         }
2743 out:
2744         cifs_put_writer(cinode);
2745         return written;
2746 }
2747
2748 static struct cifs_readdata *
2749 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2750 {
2751         struct cifs_readdata *rdata;
2752
2753         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2754                         GFP_KERNEL);
2755         if (rdata != NULL) {
2756                 kref_init(&rdata->refcount);
2757                 INIT_LIST_HEAD(&rdata->list);
2758                 init_completion(&rdata->done);
2759                 INIT_WORK(&rdata->work, complete);
2760         }
2761
2762         return rdata;
2763 }
2764
2765 void
2766 cifs_readdata_release(struct kref *refcount)
2767 {
2768         struct cifs_readdata *rdata = container_of(refcount,
2769                                         struct cifs_readdata, refcount);
2770
2771         if (rdata->cfile)
2772                 cifsFileInfo_put(rdata->cfile);
2773
2774         kfree(rdata);
2775 }
2776
2777 static int
2778 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2779 {
2780         int rc = 0;
2781         struct page *page;
2782         unsigned int i;
2783
2784         for (i = 0; i < nr_pages; i++) {
2785                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2786                 if (!page) {
2787                         rc = -ENOMEM;
2788                         break;
2789                 }
2790                 rdata->pages[i] = page;
2791         }
2792
2793         if (rc) {
2794                 for (i = 0; i < nr_pages; i++) {
2795                         put_page(rdata->pages[i]);
2796                         rdata->pages[i] = NULL;
2797                 }
2798         }
2799         return rc;
2800 }
2801
2802 static void
2803 cifs_uncached_readdata_release(struct kref *refcount)
2804 {
2805         struct cifs_readdata *rdata = container_of(refcount,
2806                                         struct cifs_readdata, refcount);
2807         unsigned int i;
2808
2809         for (i = 0; i < rdata->nr_pages; i++) {
2810                 put_page(rdata->pages[i]);
2811                 rdata->pages[i] = NULL;
2812         }
2813         cifs_readdata_release(refcount);
2814 }
2815
2816 /**
2817  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2818  * @rdata:      the readdata response with list of pages holding data
2819  * @iter:       destination for our data
2820  *
2821  * This function copies data from a list of pages in a readdata response into
2822  * an array of iovecs. It will first calculate where the data should go
2823  * based on the info in the readdata and then copy the data into that spot.
2824  */
2825 static int
2826 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2827 {
2828         size_t remaining = rdata->got_bytes;
2829         unsigned int i;
2830
2831         for (i = 0; i < rdata->nr_pages; i++) {
2832                 struct page *page = rdata->pages[i];
2833                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2834                 size_t written = copy_page_to_iter(page, 0, copy, iter);
2835                 remaining -= written;
2836                 if (written < copy && iov_iter_count(iter) > 0)
2837                         break;
2838         }
2839         return remaining ? -EFAULT : 0;
2840 }
2841
2842 static void
2843 cifs_uncached_readv_complete(struct work_struct *work)
2844 {
2845         struct cifs_readdata *rdata = container_of(work,
2846                                                 struct cifs_readdata, work);
2847
2848         complete(&rdata->done);
2849         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2850 }
2851
2852 static int
2853 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2854                         struct cifs_readdata *rdata, unsigned int len)
2855 {
2856         int result = 0;
2857         unsigned int i;
2858         unsigned int nr_pages = rdata->nr_pages;
2859         struct kvec iov;
2860
2861         rdata->got_bytes = 0;
2862         rdata->tailsz = PAGE_SIZE;
2863         for (i = 0; i < nr_pages; i++) {
2864                 struct page *page = rdata->pages[i];
2865
2866                 if (len >= PAGE_SIZE) {
2867                         /* enough data to fill the page */
2868                         iov.iov_base = kmap(page);
2869                         iov.iov_len = PAGE_SIZE;
2870                         cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2871                                  i, iov.iov_base, iov.iov_len);
2872                         len -= PAGE_SIZE;
2873                 } else if (len > 0) {
2874                         /* enough for partial page, fill and zero the rest */
2875                         iov.iov_base = kmap(page);
2876                         iov.iov_len = len;
2877                         cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2878                                  i, iov.iov_base, iov.iov_len);
2879                         memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2880                         rdata->tailsz = len;
2881                         len = 0;
2882                 } else {
2883                         /* no need to hold page hostage */
2884                         rdata->pages[i] = NULL;
2885                         rdata->nr_pages--;
2886                         put_page(page);
2887                         continue;
2888                 }
2889
2890                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2891                 kunmap(page);
2892                 if (result < 0)
2893                         break;
2894
2895                 rdata->got_bytes += result;
2896         }
2897
2898         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2899                                                 rdata->got_bytes : result;
2900 }
2901
2902 static int
2903 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2904                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2905 {
2906         struct cifs_readdata *rdata;
2907         unsigned int npages, rsize, credits;
2908         size_t cur_len;
2909         int rc;
2910         pid_t pid;
2911         struct TCP_Server_Info *server;
2912
2913         server = tlink_tcon(open_file->tlink)->ses->server;
2914
2915         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2916                 pid = open_file->pid;
2917         else
2918                 pid = current->tgid;
2919
2920         do {
2921                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2922                                                    &rsize, &credits);
2923                 if (rc)
2924                         break;
2925
2926                 cur_len = min_t(const size_t, len, rsize);
2927                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2928
2929                 /* allocate a readdata struct */
2930                 rdata = cifs_readdata_alloc(npages,
2931                                             cifs_uncached_readv_complete);
2932                 if (!rdata) {
2933                         add_credits_and_wake_if(server, credits, 0);
2934                         rc = -ENOMEM;
2935                         break;
2936                 }
2937
2938                 rc = cifs_read_allocate_pages(rdata, npages);
2939                 if (rc)
2940                         goto error;
2941
2942                 rdata->cfile = cifsFileInfo_get(open_file);
2943                 rdata->nr_pages = npages;
2944                 rdata->offset = offset;
2945                 rdata->bytes = cur_len;
2946                 rdata->pid = pid;
2947                 rdata->pagesz = PAGE_SIZE;
2948                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2949                 rdata->credits = credits;
2950
2951                 if (!rdata->cfile->invalidHandle ||
2952                     !cifs_reopen_file(rdata->cfile, true))
2953                         rc = server->ops->async_readv(rdata);
2954 error:
2955                 if (rc) {
2956                         add_credits_and_wake_if(server, rdata->credits, 0);
2957                         kref_put(&rdata->refcount,
2958                                  cifs_uncached_readdata_release);
2959                         if (rc == -EAGAIN)
2960                                 continue;
2961                         break;
2962                 }
2963
2964                 list_add_tail(&rdata->list, rdata_list);
2965                 offset += cur_len;
2966                 len -= cur_len;
2967         } while (len > 0);
2968
2969         return rc;
2970 }
2971
2972 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
2973 {
2974         struct file *file = iocb->ki_filp;
2975         ssize_t rc;
2976         size_t len;
2977         ssize_t total_read = 0;
2978         loff_t offset = iocb->ki_pos;
2979         struct cifs_sb_info *cifs_sb;
2980         struct cifs_tcon *tcon;
2981         struct cifsFileInfo *open_file;
2982         struct cifs_readdata *rdata, *tmp;
2983         struct list_head rdata_list;
2984
2985         len = iov_iter_count(to);
2986         if (!len)
2987                 return 0;
2988
2989         INIT_LIST_HEAD(&rdata_list);
2990         cifs_sb = CIFS_FILE_SB(file);
2991         open_file = file->private_data;
2992         tcon = tlink_tcon(open_file->tlink);
2993
2994         if (!tcon->ses->server->ops->async_readv)
2995                 return -ENOSYS;
2996
2997         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2998                 cifs_dbg(FYI, "attempting read on write only file instance\n");
2999
3000         rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3001
3002         /* if at least one read request send succeeded, then reset rc */
3003         if (!list_empty(&rdata_list))
3004                 rc = 0;
3005
3006         len = iov_iter_count(to);
3007         /* the loop below should proceed in the order of increasing offsets */
3008 again:
3009         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3010                 if (!rc) {
3011                         /* FIXME: freezable sleep too? */
3012                         rc = wait_for_completion_killable(&rdata->done);
3013                         if (rc)
3014                                 rc = -EINTR;
3015                         else if (rdata->result == -EAGAIN) {
3016                                 /* resend call if it's a retryable error */
3017                                 struct list_head tmp_list;
3018                                 unsigned int got_bytes = rdata->got_bytes;
3019
3020                                 list_del_init(&rdata->list);
3021                                 INIT_LIST_HEAD(&tmp_list);
3022
3023                                 /*
3024                                  * Got a part of data and then reconnect has
3025                                  * happened -- fill the buffer and continue
3026                                  * reading.
3027                                  */
3028                                 if (got_bytes && got_bytes < rdata->bytes) {
3029                                         rc = cifs_readdata_to_iov(rdata, to);
3030                                         if (rc) {
3031                                                 kref_put(&rdata->refcount,
3032                                                 cifs_uncached_readdata_release);
3033                                                 continue;
3034                                         }
3035                                 }
3036
3037                                 rc = cifs_send_async_read(
3038                                                 rdata->offset + got_bytes,
3039                                                 rdata->bytes - got_bytes,
3040                                                 rdata->cfile, cifs_sb,
3041                                                 &tmp_list);
3042
3043                                 list_splice(&tmp_list, &rdata_list);
3044
3045                                 kref_put(&rdata->refcount,
3046                                          cifs_uncached_readdata_release);
3047                                 goto again;
3048                         } else if (rdata->result)
3049                                 rc = rdata->result;
3050                         else
3051                                 rc = cifs_readdata_to_iov(rdata, to);
3052
3053                         /* if there was a short read -- discard anything left */
3054                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3055                                 rc = -ENODATA;
3056                 }
3057                 list_del_init(&rdata->list);
3058                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3059         }
3060
3061         total_read = len - iov_iter_count(to);
3062
3063         cifs_stats_bytes_read(tcon, total_read);
3064
3065         /* mask nodata case */
3066         if (rc == -ENODATA)
3067                 rc = 0;
3068
3069         if (total_read) {
3070                 iocb->ki_pos += total_read;
3071                 return total_read;
3072         }
3073         return rc;
3074 }
3075
3076 ssize_t
3077 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3078 {
3079         struct inode *inode = file_inode(iocb->ki_filp);
3080         struct cifsInodeInfo *cinode = CIFS_I(inode);
3081         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3082         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3083                                                 iocb->ki_filp->private_data;
3084         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3085         int rc = -EACCES;
3086
3087         /*
3088          * In strict cache mode we need to read from the server all the time
3089          * if we don't have level II oplock because the server can delay mtime
3090          * change - so we can't make a decision about inode invalidating.
3091          * And we can also fail with pagereading if there are mandatory locks
3092          * on pages affected by this read but not on the region from pos to
3093          * pos+len-1.
3094          */
3095         if (!CIFS_CACHE_READ(cinode))
3096                 return cifs_user_readv(iocb, to);
3097
3098         if (cap_unix(tcon->ses) &&
3099             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3100             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3101                 return generic_file_read_iter(iocb, to);
3102
3103         /*
3104          * We need to hold the sem to be sure nobody modifies lock list
3105          * with a brlock that prevents reading.
3106          */
3107         down_read(&cinode->lock_sem);
3108         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3109                                      tcon->ses->server->vals->shared_lock_type,
3110                                      NULL, CIFS_READ_OP))
3111                 rc = generic_file_read_iter(iocb, to);
3112         up_read(&cinode->lock_sem);
3113         return rc;
3114 }
3115
3116 static ssize_t
3117 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3118 {
3119         int rc = -EACCES;
3120         unsigned int bytes_read = 0;
3121         unsigned int total_read;
3122         unsigned int current_read_size;
3123         unsigned int rsize;
3124         struct cifs_sb_info *cifs_sb;
3125         struct cifs_tcon *tcon;
3126         struct TCP_Server_Info *server;
3127         unsigned int xid;
3128         char *cur_offset;
3129         struct cifsFileInfo *open_file;
3130         struct cifs_io_parms io_parms;
3131         int buf_type = CIFS_NO_BUFFER;
3132         __u32 pid;
3133
3134         xid = get_xid();
3135         cifs_sb = CIFS_FILE_SB(file);
3136
3137         /* FIXME: set up handlers for larger reads and/or convert to async */
3138         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3139
3140         if (file->private_data == NULL) {
3141                 rc = -EBADF;
3142                 free_xid(xid);
3143                 return rc;
3144         }
3145         open_file = file->private_data;
3146         tcon = tlink_tcon(open_file->tlink);
3147         server = tcon->ses->server;
3148
3149         if (!server->ops->sync_read) {
3150                 free_xid(xid);
3151                 return -ENOSYS;
3152         }
3153
3154         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3155                 pid = open_file->pid;
3156         else
3157                 pid = current->tgid;
3158
3159         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3160                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3161
3162         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3163              total_read += bytes_read, cur_offset += bytes_read) {
3164                 do {
3165                         current_read_size = min_t(uint, read_size - total_read,
3166                                                   rsize);
3167                         /*
3168                          * For windows me and 9x we do not want to request more
3169                          * than it negotiated since it will refuse the read
3170                          * then.
3171                          */
3172                         if ((tcon->ses) && !(tcon->ses->capabilities &
3173                                 tcon->ses->server->vals->cap_large_files)) {
3174                                 current_read_size = min_t(uint,
3175                                         current_read_size, CIFSMaxBufSize);
3176                         }
3177                         if (open_file->invalidHandle) {
3178                                 rc = cifs_reopen_file(open_file, true);
3179                                 if (rc != 0)
3180                                         break;
3181                         }
3182                         io_parms.pid = pid;
3183                         io_parms.tcon = tcon;
3184                         io_parms.offset = *offset;
3185                         io_parms.length = current_read_size;
3186                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3187                                                     &bytes_read, &cur_offset,
3188                                                     &buf_type);
3189                 } while (rc == -EAGAIN);
3190
3191                 if (rc || (bytes_read == 0)) {
3192                         if (total_read) {
3193                                 break;
3194                         } else {
3195                                 free_xid(xid);
3196                                 return rc;
3197                         }
3198                 } else {
3199                         cifs_stats_bytes_read(tcon, total_read);
3200                         *offset += bytes_read;
3201                 }
3202         }
3203         free_xid(xid);
3204         return total_read;
3205 }
3206
3207 /*
3208  * If the page is mmap'ed into a process' page tables, then we need to make
3209  * sure that it doesn't change while being written back.
3210  */
3211 static int
3212 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3213 {
3214         struct page *page = vmf->page;
3215
3216         lock_page(page);
3217         return VM_FAULT_LOCKED;
3218 }
3219
3220 static struct vm_operations_struct cifs_file_vm_ops = {
3221         .fault = filemap_fault,
3222         .map_pages = filemap_map_pages,
3223         .page_mkwrite = cifs_page_mkwrite,
3224 };
3225
3226 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3227 {
3228         int rc, xid;
3229         struct inode *inode = file_inode(file);
3230
3231         xid = get_xid();
3232
3233         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3234                 rc = cifs_zap_mapping(inode);
3235                 if (rc)
3236                         return rc;
3237         }
3238
3239         rc = generic_file_mmap(file, vma);
3240         if (rc == 0)
3241                 vma->vm_ops = &cifs_file_vm_ops;
3242         free_xid(xid);
3243         return rc;
3244 }
3245
3246 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3247 {
3248         int rc, xid;
3249
3250         xid = get_xid();
3251         rc = cifs_revalidate_file(file);
3252         if (rc) {
3253                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3254                          rc);
3255                 free_xid(xid);
3256                 return rc;
3257         }
3258         rc = generic_file_mmap(file, vma);
3259         if (rc == 0)
3260                 vma->vm_ops = &cifs_file_vm_ops;
3261         free_xid(xid);
3262         return rc;
3263 }
3264
3265 static void
3266 cifs_readv_complete(struct work_struct *work)
3267 {
3268         unsigned int i, got_bytes;
3269         struct cifs_readdata *rdata = container_of(work,
3270                                                 struct cifs_readdata, work);
3271
3272         got_bytes = rdata->got_bytes;
3273         for (i = 0; i < rdata->nr_pages; i++) {
3274                 struct page *page = rdata->pages[i];
3275
3276                 lru_cache_add_file(page);
3277
3278                 if (rdata->result == 0 ||
3279                     (rdata->result == -EAGAIN && got_bytes)) {
3280                         flush_dcache_page(page);
3281                         SetPageUptodate(page);
3282                 }
3283
3284                 unlock_page(page);
3285
3286                 if (rdata->result == 0 ||
3287                     (rdata->result == -EAGAIN && got_bytes))
3288                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3289
3290                 got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes);
3291
3292                 page_cache_release(page);
3293                 rdata->pages[i] = NULL;
3294         }
3295         kref_put(&rdata->refcount, cifs_readdata_release);
3296 }
3297
3298 static int
3299 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3300                         struct cifs_readdata *rdata, unsigned int len)
3301 {
3302         int result = 0;
3303         unsigned int i;
3304         u64 eof;
3305         pgoff_t eof_index;
3306         unsigned int nr_pages = rdata->nr_pages;
3307         struct kvec iov;
3308
3309         /* determine the eof that the server (probably) has */
3310         eof = CIFS_I(rdata->mapping->host)->server_eof;
3311         eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3312         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3313
3314         rdata->got_bytes = 0;
3315         rdata->tailsz = PAGE_CACHE_SIZE;
3316         for (i = 0; i < nr_pages; i++) {
3317                 struct page *page = rdata->pages[i];
3318
3319                 if (len >= PAGE_CACHE_SIZE) {
3320                         /* enough data to fill the page */
3321                         iov.iov_base = kmap(page);
3322                         iov.iov_len = PAGE_CACHE_SIZE;
3323                         cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3324                                  i, page->index, iov.iov_base, iov.iov_len);
3325                         len -= PAGE_CACHE_SIZE;
3326                 } else if (len > 0) {
3327                         /* enough for partial page, fill and zero the rest */
3328                         iov.iov_base = kmap(page);
3329                         iov.iov_len = len;
3330                         cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3331                                  i, page->index, iov.iov_base, iov.iov_len);
3332                         memset(iov.iov_base + len,
3333                                 '\0', PAGE_CACHE_SIZE - len);
3334                         rdata->tailsz = len;
3335                         len = 0;
3336                 } else if (page->index > eof_index) {
3337                         /*
3338                          * The VFS will not try to do readahead past the
3339                          * i_size, but it's possible that we have outstanding
3340                          * writes with gaps in the middle and the i_size hasn't
3341                          * caught up yet. Populate those with zeroed out pages
3342                          * to prevent the VFS from repeatedly attempting to
3343                          * fill them until the writes are flushed.
3344                          */
3345                         zero_user(page, 0, PAGE_CACHE_SIZE);
3346                         lru_cache_add_file(page);
3347                         flush_dcache_page(page);
3348                         SetPageUptodate(page);
3349                         unlock_page(page);
3350                         page_cache_release(page);
3351                         rdata->pages[i] = NULL;
3352                         rdata->nr_pages--;
3353                         continue;
3354                 } else {
3355                         /* no need to hold page hostage */
3356                         lru_cache_add_file(page);
3357                         unlock_page(page);
3358                         page_cache_release(page);
3359                         rdata->pages[i] = NULL;
3360                         rdata->nr_pages--;
3361                         continue;
3362                 }
3363
3364                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3365                 kunmap(page);
3366                 if (result < 0)
3367                         break;
3368
3369                 rdata->got_bytes += result;
3370         }
3371
3372         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3373                                                 rdata->got_bytes : result;
3374 }
3375
3376 static int
3377 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3378                     unsigned int rsize, struct list_head *tmplist,
3379                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3380 {
3381         struct page *page, *tpage;
3382         unsigned int expected_index;
3383         int rc;
3384
3385         INIT_LIST_HEAD(tmplist);
3386
3387         page = list_entry(page_list->prev, struct page, lru);
3388
3389         /*
3390          * Lock the page and put it in the cache. Since no one else
3391          * should have access to this page, we're safe to simply set
3392          * PG_locked without checking it first.
3393          */
3394         __set_page_locked(page);
3395         rc = add_to_page_cache_locked(page, mapping,
3396                                       page->index, GFP_KERNEL);
3397
3398         /* give up if we can't stick it in the cache */
3399         if (rc) {
3400                 __clear_page_locked(page);
3401                 return rc;
3402         }
3403
3404         /* move first page to the tmplist */
3405         *offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3406         *bytes = PAGE_CACHE_SIZE;
3407         *nr_pages = 1;
3408         list_move_tail(&page->lru, tmplist);
3409
3410         /* now try and add more pages onto the request */
3411         expected_index = page->index + 1;
3412         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3413                 /* discontinuity ? */
3414                 if (page->index != expected_index)
3415                         break;
3416
3417                 /* would this page push the read over the rsize? */
3418                 if (*bytes + PAGE_CACHE_SIZE > rsize)
3419                         break;
3420
3421                 __set_page_locked(page);
3422                 if (add_to_page_cache_locked(page, mapping, page->index,
3423                                                                 GFP_KERNEL)) {
3424                         __clear_page_locked(page);
3425                         break;
3426                 }
3427                 list_move_tail(&page->lru, tmplist);
3428                 (*bytes) += PAGE_CACHE_SIZE;
3429                 expected_index++;
3430                 (*nr_pages)++;
3431         }
3432         return rc;
3433 }
3434
3435 static int cifs_readpages(struct file *file, struct address_space *mapping,
3436         struct list_head *page_list, unsigned num_pages)
3437 {
3438         int rc;
3439         struct list_head tmplist;
3440         struct cifsFileInfo *open_file = file->private_data;
3441         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3442         struct TCP_Server_Info *server;
3443         pid_t pid;
3444
3445         /*
3446          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3447          * immediately if the cookie is negative
3448          *
3449          * After this point, every page in the list might have PG_fscache set,
3450          * so we will need to clean that up off of every page we don't use.
3451          */
3452         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3453                                          &num_pages);
3454         if (rc == 0)
3455                 return rc;
3456
3457         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3458                 pid = open_file->pid;
3459         else
3460                 pid = current->tgid;
3461
3462         rc = 0;
3463         server = tlink_tcon(open_file->tlink)->ses->server;
3464
3465         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3466                  __func__, file, mapping, num_pages);
3467
3468         /*
3469          * Start with the page at end of list and move it to private
3470          * list. Do the same with any following pages until we hit
3471          * the rsize limit, hit an index discontinuity, or run out of
3472          * pages. Issue the async read and then start the loop again
3473          * until the list is empty.
3474          *
3475          * Note that list order is important. The page_list is in
3476          * the order of declining indexes. When we put the pages in
3477          * the rdata->pages, then we want them in increasing order.
3478          */
3479         while (!list_empty(page_list)) {
3480                 unsigned int i, nr_pages, bytes, rsize;
3481                 loff_t offset;
3482                 struct page *page, *tpage;
3483                 struct cifs_readdata *rdata;
3484                 unsigned credits;
3485
3486                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3487                                                    &rsize, &credits);
3488                 if (rc)
3489                         break;
3490
3491                 /*
3492                  * Give up immediately if rsize is too small to read an entire
3493                  * page. The VFS will fall back to readpage. We should never
3494                  * reach this point however since we set ra_pages to 0 when the
3495                  * rsize is smaller than a cache page.
3496                  */
3497                 if (unlikely(rsize < PAGE_CACHE_SIZE)) {
3498                         add_credits_and_wake_if(server, credits, 0);
3499                         return 0;
3500                 }
3501
3502                 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3503                                          &nr_pages, &offset, &bytes);
3504                 if (rc) {
3505                         add_credits_and_wake_if(server, credits, 0);
3506                         break;
3507                 }
3508
3509                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3510                 if (!rdata) {
3511                         /* best to give up if we're out of mem */
3512                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3513                                 list_del(&page->lru);
3514                                 lru_cache_add_file(page);
3515                                 unlock_page(page);
3516                                 page_cache_release(page);
3517                         }
3518                         rc = -ENOMEM;
3519                         add_credits_and_wake_if(server, credits, 0);
3520                         break;
3521                 }
3522
3523                 rdata->cfile = cifsFileInfo_get(open_file);
3524                 rdata->mapping = mapping;
3525                 rdata->offset = offset;
3526                 rdata->bytes = bytes;
3527                 rdata->pid = pid;
3528                 rdata->pagesz = PAGE_CACHE_SIZE;
3529                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3530                 rdata->credits = credits;
3531
3532                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3533                         list_del(&page->lru);
3534                         rdata->pages[rdata->nr_pages++] = page;
3535                 }
3536
3537                 if (!rdata->cfile->invalidHandle ||
3538                     !cifs_reopen_file(rdata->cfile, true))
3539                         rc = server->ops->async_readv(rdata);
3540                 if (rc) {
3541                         add_credits_and_wake_if(server, rdata->credits, 0);
3542                         for (i = 0; i < rdata->nr_pages; i++) {
3543                                 page = rdata->pages[i];
3544                                 lru_cache_add_file(page);
3545                                 unlock_page(page);
3546                                 page_cache_release(page);
3547                         }
3548                         /* Fallback to the readpage in error/reconnect cases */
3549                         kref_put(&rdata->refcount, cifs_readdata_release);
3550                         break;
3551                 }
3552
3553                 kref_put(&rdata->refcount, cifs_readdata_release);
3554         }
3555
3556         /* Any pages that have been shown to fscache but didn't get added to
3557          * the pagecache must be uncached before they get returned to the
3558          * allocator.
3559          */
3560         cifs_fscache_readpages_cancel(mapping->host, page_list);
3561         return rc;
3562 }
3563
3564 /*
3565  * cifs_readpage_worker must be called with the page pinned
3566  */
3567 static int cifs_readpage_worker(struct file *file, struct page *page,
3568         loff_t *poffset)
3569 {
3570         char *read_data;
3571         int rc;
3572
3573         /* Is the page cached? */
3574         rc = cifs_readpage_from_fscache(file_inode(file), page);
3575         if (rc == 0)
3576                 goto read_complete;
3577
3578         read_data = kmap(page);
3579         /* for reads over a certain size could initiate async read ahead */
3580
3581         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3582
3583         if (rc < 0)
3584                 goto io_error;
3585         else
3586                 cifs_dbg(FYI, "Bytes read %d\n", rc);
3587
3588         file_inode(file)->i_atime =
3589                 current_fs_time(file_inode(file)->i_sb);
3590
3591         if (PAGE_CACHE_SIZE > rc)
3592                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3593
3594         flush_dcache_page(page);
3595         SetPageUptodate(page);
3596
3597         /* send this page to the cache */
3598         cifs_readpage_to_fscache(file_inode(file), page);
3599
3600         rc = 0;
3601
3602 io_error:
3603         kunmap(page);
3604         unlock_page(page);
3605
3606 read_complete:
3607         return rc;
3608 }
3609
3610 static int cifs_readpage(struct file *file, struct page *page)
3611 {
3612         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3613         int rc = -EACCES;
3614         unsigned int xid;
3615
3616         xid = get_xid();
3617
3618         if (file->private_data == NULL) {
3619                 rc = -EBADF;
3620                 free_xid(xid);
3621                 return rc;
3622         }
3623
3624         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3625                  page, (int)offset, (int)offset);
3626
3627         rc = cifs_readpage_worker(file, page, &offset);
3628
3629         free_xid(xid);
3630         return rc;
3631 }
3632
3633 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3634 {
3635         struct cifsFileInfo *open_file;
3636
3637         spin_lock(&cifs_file_list_lock);
3638         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3639                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3640                         spin_unlock(&cifs_file_list_lock);
3641                         return 1;
3642                 }
3643         }
3644         spin_unlock(&cifs_file_list_lock);
3645         return 0;
3646 }
3647
3648 /* We do not want to update the file size from server for inodes
3649    open for write - to avoid races with writepage extending
3650    the file - in the future we could consider allowing
3651    refreshing the inode only on increases in the file size
3652    but this is tricky to do without racing with writebehind
3653    page caching in the current Linux kernel design */
3654 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3655 {
3656         if (!cifsInode)
3657                 return true;
3658
3659         if (is_inode_writable(cifsInode)) {
3660                 /* This inode is open for write at least once */
3661                 struct cifs_sb_info *cifs_sb;
3662
3663                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3664                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3665                         /* since no page cache to corrupt on directio
3666                         we can change size safely */
3667                         return true;
3668                 }
3669
3670                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3671                         return true;
3672
3673                 return false;
3674         } else
3675                 return true;
3676 }
3677
3678 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3679                         loff_t pos, unsigned len, unsigned flags,
3680                         struct page **pagep, void **fsdata)
3681 {
3682         int oncethru = 0;
3683         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3684         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3685         loff_t page_start = pos & PAGE_MASK;
3686         loff_t i_size;
3687         struct page *page;
3688         int rc = 0;
3689
3690         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3691
3692 start:
3693         page = grab_cache_page_write_begin(mapping, index, flags);
3694         if (!page) {
3695                 rc = -ENOMEM;
3696                 goto out;
3697         }
3698
3699         if (PageUptodate(page))
3700                 goto out;
3701
3702         /*
3703          * If we write a full page it will be up to date, no need to read from
3704          * the server. If the write is short, we'll end up doing a sync write
3705          * instead.
3706          */
3707         if (len == PAGE_CACHE_SIZE)
3708                 goto out;
3709
3710         /*
3711          * optimize away the read when we have an oplock, and we're not
3712          * expecting to use any of the data we'd be reading in. That
3713          * is, when the page lies beyond the EOF, or straddles the EOF
3714          * and the write will cover all of the existing data.
3715          */
3716         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3717                 i_size = i_size_read(mapping->host);
3718                 if (page_start >= i_size ||
3719                     (offset == 0 && (pos + len) >= i_size)) {
3720                         zero_user_segments(page, 0, offset,
3721                                            offset + len,
3722                                            PAGE_CACHE_SIZE);
3723                         /*
3724                          * PageChecked means that the parts of the page
3725                          * to which we're not writing are considered up
3726                          * to date. Once the data is copied to the
3727                          * page, it can be set uptodate.
3728                          */
3729                         SetPageChecked(page);
3730                         goto out;
3731                 }
3732         }
3733
3734         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3735                 /*
3736                  * might as well read a page, it is fast enough. If we get
3737                  * an error, we don't need to return it. cifs_write_end will
3738                  * do a sync write instead since PG_uptodate isn't set.
3739                  */
3740                 cifs_readpage_worker(file, page, &page_start);
3741                 page_cache_release(page);
3742                 oncethru = 1;
3743                 goto start;
3744         } else {
3745                 /* we could try using another file handle if there is one -
3746                    but how would we lock it to prevent close of that handle
3747                    racing with this read? In any case
3748                    this will be written out by write_end so is fine */
3749         }
3750 out:
3751         *pagep = page;
3752         return rc;
3753 }
3754
3755 static int cifs_release_page(struct page *page, gfp_t gfp)
3756 {
3757         if (PagePrivate(page))
3758                 return 0;
3759
3760         return cifs_fscache_release_page(page, gfp);
3761 }
3762
3763 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3764                                  unsigned int length)
3765 {
3766         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3767
3768         if (offset == 0 && length == PAGE_CACHE_SIZE)
3769                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3770 }
3771
3772 static int cifs_launder_page(struct page *page)
3773 {
3774         int rc = 0;
3775         loff_t range_start = page_offset(page);
3776         loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3777         struct writeback_control wbc = {
3778                 .sync_mode = WB_SYNC_ALL,
3779                 .nr_to_write = 0,
3780                 .range_start = range_start,
3781                 .range_end = range_end,
3782         };
3783
3784         cifs_dbg(FYI, "Launder page: %p\n", page);
3785
3786         if (clear_page_dirty_for_io(page))
3787                 rc = cifs_writepage_locked(page, &wbc);
3788
3789         cifs_fscache_invalidate_page(page, page->mapping->host);
3790         return rc;
3791 }
3792
3793 void cifs_oplock_break(struct work_struct *work)
3794 {
3795         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3796                                                   oplock_break);
3797         struct inode *inode = d_inode(cfile->dentry);
3798         struct cifsInodeInfo *cinode = CIFS_I(inode);
3799         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3800         struct TCP_Server_Info *server = tcon->ses->server;
3801         int rc = 0;
3802
3803         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3804                         TASK_UNINTERRUPTIBLE);
3805
3806         server->ops->downgrade_oplock(server, cinode,
3807                 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3808
3809         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3810                                                 cifs_has_mand_locks(cinode)) {
3811                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3812                          inode);
3813                 cinode->oplock = 0;
3814         }
3815
3816         if (inode && S_ISREG(inode->i_mode)) {
3817                 if (CIFS_CACHE_READ(cinode))
3818                         break_lease(inode, O_RDONLY);
3819                 else
3820                         break_lease(inode, O_WRONLY);
3821                 rc = filemap_fdatawrite(inode->i_mapping);
3822                 if (!CIFS_CACHE_READ(cinode)) {
3823                         rc = filemap_fdatawait(inode->i_mapping);
3824                         mapping_set_error(inode->i_mapping, rc);
3825                         cifs_zap_mapping(inode);
3826                 }
3827                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3828         }
3829
3830         rc = cifs_push_locks(cfile);
3831         if (rc)
3832                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3833
3834         /*
3835          * releasing stale oplock after recent reconnect of smb session using
3836          * a now incorrect file handle is not a data integrity issue but do
3837          * not bother sending an oplock release if session to server still is
3838          * disconnected since oplock already released by the server
3839          */
3840         if (!cfile->oplock_break_cancelled) {
3841                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3842                                                              cinode);
3843                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3844         }
3845         cifs_done_oplock_break(cinode);
3846 }
3847
3848 /*
3849  * The presence of cifs_direct_io() in the address space ops vector
3850  * allowes open() O_DIRECT flags which would have failed otherwise.
3851  *
3852  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3853  * so this method should never be called.
3854  *
3855  * Direct IO is not yet supported in the cached mode. 
3856  */
3857 static ssize_t
3858 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
3859 {
3860         /*
3861          * FIXME
3862          * Eventually need to support direct IO for non forcedirectio mounts
3863          */
3864         return -EINVAL;
3865 }
3866
3867
3868 const struct address_space_operations cifs_addr_ops = {
3869         .readpage = cifs_readpage,
3870         .readpages = cifs_readpages,
3871         .writepage = cifs_writepage,
3872         .writepages = cifs_writepages,
3873         .write_begin = cifs_write_begin,
3874         .write_end = cifs_write_end,
3875         .set_page_dirty = __set_page_dirty_nobuffers,
3876         .releasepage = cifs_release_page,
3877         .direct_IO = cifs_direct_io,
3878         .invalidatepage = cifs_invalidate_page,
3879         .launder_page = cifs_launder_page,
3880 };
3881
3882 /*
3883  * cifs_readpages requires the server to support a buffer large enough to
3884  * contain the header plus one complete page of data.  Otherwise, we need
3885  * to leave cifs_readpages out of the address space operations.
3886  */
3887 const struct address_space_operations cifs_addr_ops_smallbuf = {
3888         .readpage = cifs_readpage,
3889         .writepage = cifs_writepage,
3890         .writepages = cifs_writepages,
3891         .write_begin = cifs_write_begin,
3892         .write_end = cifs_write_end,
3893         .set_page_dirty = __set_page_dirty_nobuffers,
3894         .releasepage = cifs_release_page,
3895         .invalidatepage = cifs_invalidate_page,
3896         .launder_page = cifs_launder_page,
3897 };