cifs: Deferred close for files
[linux-2.6-microblaze.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47 #include "fs_context.h"
48
49 static inline int cifs_convert_flags(unsigned int flags)
50 {
51         if ((flags & O_ACCMODE) == O_RDONLY)
52                 return GENERIC_READ;
53         else if ((flags & O_ACCMODE) == O_WRONLY)
54                 return GENERIC_WRITE;
55         else if ((flags & O_ACCMODE) == O_RDWR) {
56                 /* GENERIC_ALL is too much permission to request
57                    can cause unnecessary access denied on create */
58                 /* return GENERIC_ALL; */
59                 return (GENERIC_READ | GENERIC_WRITE);
60         }
61
62         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
63                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
64                 FILE_READ_DATA);
65 }
66
67 static u32 cifs_posix_convert_flags(unsigned int flags)
68 {
69         u32 posix_flags = 0;
70
71         if ((flags & O_ACCMODE) == O_RDONLY)
72                 posix_flags = SMB_O_RDONLY;
73         else if ((flags & O_ACCMODE) == O_WRONLY)
74                 posix_flags = SMB_O_WRONLY;
75         else if ((flags & O_ACCMODE) == O_RDWR)
76                 posix_flags = SMB_O_RDWR;
77
78         if (flags & O_CREAT) {
79                 posix_flags |= SMB_O_CREAT;
80                 if (flags & O_EXCL)
81                         posix_flags |= SMB_O_EXCL;
82         } else if (flags & O_EXCL)
83                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
84                          current->comm, current->tgid);
85
86         if (flags & O_TRUNC)
87                 posix_flags |= SMB_O_TRUNC;
88         /* be safe and imply O_SYNC for O_DSYNC */
89         if (flags & O_DSYNC)
90                 posix_flags |= SMB_O_SYNC;
91         if (flags & O_DIRECTORY)
92                 posix_flags |= SMB_O_DIRECTORY;
93         if (flags & O_NOFOLLOW)
94                 posix_flags |= SMB_O_NOFOLLOW;
95         if (flags & O_DIRECT)
96                 posix_flags |= SMB_O_DIRECT;
97
98         return posix_flags;
99 }
100
101 static inline int cifs_get_disposition(unsigned int flags)
102 {
103         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
104                 return FILE_CREATE;
105         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
106                 return FILE_OVERWRITE_IF;
107         else if ((flags & O_CREAT) == O_CREAT)
108                 return FILE_OPEN_IF;
109         else if ((flags & O_TRUNC) == O_TRUNC)
110                 return FILE_OVERWRITE;
111         else
112                 return FILE_OPEN;
113 }
114
115 int cifs_posix_open(const char *full_path, struct inode **pinode,
116                         struct super_block *sb, int mode, unsigned int f_flags,
117                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
118 {
119         int rc;
120         FILE_UNIX_BASIC_INFO *presp_data;
121         __u32 posix_flags = 0;
122         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
123         struct cifs_fattr fattr;
124         struct tcon_link *tlink;
125         struct cifs_tcon *tcon;
126
127         cifs_dbg(FYI, "posix open %s\n", full_path);
128
129         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
130         if (presp_data == NULL)
131                 return -ENOMEM;
132
133         tlink = cifs_sb_tlink(cifs_sb);
134         if (IS_ERR(tlink)) {
135                 rc = PTR_ERR(tlink);
136                 goto posix_open_ret;
137         }
138
139         tcon = tlink_tcon(tlink);
140         mode &= ~current_umask();
141
142         posix_flags = cifs_posix_convert_flags(f_flags);
143         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
144                              poplock, full_path, cifs_sb->local_nls,
145                              cifs_remap(cifs_sb));
146         cifs_put_tlink(tlink);
147
148         if (rc)
149                 goto posix_open_ret;
150
151         if (presp_data->Type == cpu_to_le32(-1))
152                 goto posix_open_ret; /* open ok, caller does qpathinfo */
153
154         if (!pinode)
155                 goto posix_open_ret; /* caller does not need info */
156
157         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
158
159         /* get new inode and set it up */
160         if (*pinode == NULL) {
161                 cifs_fill_uniqueid(sb, &fattr);
162                 *pinode = cifs_iget(sb, &fattr);
163                 if (!*pinode) {
164                         rc = -ENOMEM;
165                         goto posix_open_ret;
166                 }
167         } else {
168                 cifs_revalidate_mapping(*pinode);
169                 cifs_fattr_to_inode(*pinode, &fattr);
170         }
171
172 posix_open_ret:
173         kfree(presp_data);
174         return rc;
175 }
176
177 static int
178 cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
179              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
180              struct cifs_fid *fid, unsigned int xid)
181 {
182         int rc;
183         int desired_access;
184         int disposition;
185         int create_options = CREATE_NOT_DIR;
186         FILE_ALL_INFO *buf;
187         struct TCP_Server_Info *server = tcon->ses->server;
188         struct cifs_open_parms oparms;
189
190         if (!server->ops->open)
191                 return -ENOSYS;
192
193         desired_access = cifs_convert_flags(f_flags);
194
195 /*********************************************************************
196  *  open flag mapping table:
197  *
198  *      POSIX Flag            CIFS Disposition
199  *      ----------            ----------------
200  *      O_CREAT               FILE_OPEN_IF
201  *      O_CREAT | O_EXCL      FILE_CREATE
202  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
203  *      O_TRUNC               FILE_OVERWRITE
204  *      none of the above     FILE_OPEN
205  *
206  *      Note that there is not a direct match between disposition
207  *      FILE_SUPERSEDE (ie create whether or not file exists although
208  *      O_CREAT | O_TRUNC is similar but truncates the existing
209  *      file rather than creating a new file as FILE_SUPERSEDE does
210  *      (which uses the attributes / metadata passed in on open call)
211  *?
212  *?  O_SYNC is a reasonable match to CIFS writethrough flag
213  *?  and the read write flags match reasonably.  O_LARGEFILE
214  *?  is irrelevant because largefile support is always used
215  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
216  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
217  *********************************************************************/
218
219         disposition = cifs_get_disposition(f_flags);
220
221         /* BB pass O_SYNC flag through on file attributes .. BB */
222
223         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
224         if (!buf)
225                 return -ENOMEM;
226
227         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
228         if (f_flags & O_SYNC)
229                 create_options |= CREATE_WRITE_THROUGH;
230
231         if (f_flags & O_DIRECT)
232                 create_options |= CREATE_NO_BUFFER;
233
234         oparms.tcon = tcon;
235         oparms.cifs_sb = cifs_sb;
236         oparms.desired_access = desired_access;
237         oparms.create_options = cifs_create_options(cifs_sb, create_options);
238         oparms.disposition = disposition;
239         oparms.path = full_path;
240         oparms.fid = fid;
241         oparms.reconnect = false;
242
243         rc = server->ops->open(xid, &oparms, oplock, buf);
244
245         if (rc)
246                 goto out;
247
248         /* TODO: Add support for calling posix query info but with passing in fid */
249         if (tcon->unix_ext)
250                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
251                                               xid);
252         else
253                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
254                                          xid, fid);
255
256         if (rc) {
257                 server->ops->close(xid, tcon, fid);
258                 if (rc == -ESTALE)
259                         rc = -EOPENSTALE;
260         }
261
262 out:
263         kfree(buf);
264         return rc;
265 }
266
267 static bool
268 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
269 {
270         struct cifs_fid_locks *cur;
271         bool has_locks = false;
272
273         down_read(&cinode->lock_sem);
274         list_for_each_entry(cur, &cinode->llist, llist) {
275                 if (!list_empty(&cur->locks)) {
276                         has_locks = true;
277                         break;
278                 }
279         }
280         up_read(&cinode->lock_sem);
281         return has_locks;
282 }
283
284 void
285 cifs_down_write(struct rw_semaphore *sem)
286 {
287         while (!down_write_trylock(sem))
288                 msleep(10);
289 }
290
291 static void cifsFileInfo_put_work(struct work_struct *work);
292
293 struct cifsFileInfo *
294 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
295                   struct tcon_link *tlink, __u32 oplock)
296 {
297         struct dentry *dentry = file_dentry(file);
298         struct inode *inode = d_inode(dentry);
299         struct cifsInodeInfo *cinode = CIFS_I(inode);
300         struct cifsFileInfo *cfile;
301         struct cifs_fid_locks *fdlocks;
302         struct cifs_tcon *tcon = tlink_tcon(tlink);
303         struct TCP_Server_Info *server = tcon->ses->server;
304
305         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
306         if (cfile == NULL)
307                 return cfile;
308
309         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
310         if (!fdlocks) {
311                 kfree(cfile);
312                 return NULL;
313         }
314
315         INIT_LIST_HEAD(&fdlocks->locks);
316         fdlocks->cfile = cfile;
317         cfile->llist = fdlocks;
318
319         cfile->count = 1;
320         cfile->pid = current->tgid;
321         cfile->uid = current_fsuid();
322         cfile->dentry = dget(dentry);
323         cfile->f_flags = file->f_flags;
324         cfile->invalidHandle = false;
325         cfile->oplock_break_received = false;
326         cfile->deferred_scheduled = false;
327         cfile->tlink = cifs_get_tlink(tlink);
328         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
329         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
330         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
331         mutex_init(&cfile->fh_mutex);
332         spin_lock_init(&cfile->file_info_lock);
333
334         cifs_sb_active(inode->i_sb);
335
336         /*
337          * If the server returned a read oplock and we have mandatory brlocks,
338          * set oplock level to None.
339          */
340         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
341                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
342                 oplock = 0;
343         }
344
345         cifs_down_write(&cinode->lock_sem);
346         list_add(&fdlocks->llist, &cinode->llist);
347         up_write(&cinode->lock_sem);
348
349         spin_lock(&tcon->open_file_lock);
350         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
351                 oplock = fid->pending_open->oplock;
352         list_del(&fid->pending_open->olist);
353
354         fid->purge_cache = false;
355         server->ops->set_fid(cfile, fid, oplock);
356
357         list_add(&cfile->tlist, &tcon->openFileList);
358         atomic_inc(&tcon->num_local_opens);
359
360         /* if readable file instance put first in list*/
361         spin_lock(&cinode->open_file_lock);
362         if (file->f_mode & FMODE_READ)
363                 list_add(&cfile->flist, &cinode->openFileList);
364         else
365                 list_add_tail(&cfile->flist, &cinode->openFileList);
366         spin_unlock(&cinode->open_file_lock);
367         spin_unlock(&tcon->open_file_lock);
368
369         if (fid->purge_cache)
370                 cifs_zap_mapping(inode);
371
372         file->private_data = cfile;
373         return cfile;
374 }
375
376 struct cifsFileInfo *
377 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
378 {
379         spin_lock(&cifs_file->file_info_lock);
380         cifsFileInfo_get_locked(cifs_file);
381         spin_unlock(&cifs_file->file_info_lock);
382         return cifs_file;
383 }
384
385 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
386 {
387         struct inode *inode = d_inode(cifs_file->dentry);
388         struct cifsInodeInfo *cifsi = CIFS_I(inode);
389         struct cifsLockInfo *li, *tmp;
390         struct super_block *sb = inode->i_sb;
391
392         /*
393          * Delete any outstanding lock records. We'll lose them when the file
394          * is closed anyway.
395          */
396         cifs_down_write(&cifsi->lock_sem);
397         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
398                 list_del(&li->llist);
399                 cifs_del_lock_waiters(li);
400                 kfree(li);
401         }
402         list_del(&cifs_file->llist->llist);
403         kfree(cifs_file->llist);
404         up_write(&cifsi->lock_sem);
405
406         cifs_put_tlink(cifs_file->tlink);
407         dput(cifs_file->dentry);
408         cifs_sb_deactive(sb);
409         kfree(cifs_file);
410 }
411
412 static void cifsFileInfo_put_work(struct work_struct *work)
413 {
414         struct cifsFileInfo *cifs_file = container_of(work,
415                         struct cifsFileInfo, put);
416
417         cifsFileInfo_put_final(cifs_file);
418 }
419
420 /**
421  * cifsFileInfo_put - release a reference of file priv data
422  *
423  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
424  *
425  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
426  */
427 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
428 {
429         _cifsFileInfo_put(cifs_file, true, true);
430 }
431
432 /**
433  * _cifsFileInfo_put - release a reference of file priv data
434  *
435  * This may involve closing the filehandle @cifs_file out on the
436  * server. Must be called without holding tcon->open_file_lock,
437  * cinode->open_file_lock and cifs_file->file_info_lock.
438  *
439  * If @wait_for_oplock_handler is true and we are releasing the last
440  * reference, wait for any running oplock break handler of the file
441  * and cancel any pending one.
442  *
443  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
444  * @wait_oplock_handler: must be false if called from oplock_break_handler
445  * @offload:    not offloaded on close and oplock breaks
446  *
447  */
448 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
449                        bool wait_oplock_handler, bool offload)
450 {
451         struct inode *inode = d_inode(cifs_file->dentry);
452         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
453         struct TCP_Server_Info *server = tcon->ses->server;
454         struct cifsInodeInfo *cifsi = CIFS_I(inode);
455         struct super_block *sb = inode->i_sb;
456         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
457         struct cifs_fid fid;
458         struct cifs_pending_open open;
459         bool oplock_break_cancelled;
460
461         spin_lock(&tcon->open_file_lock);
462         spin_lock(&cifsi->open_file_lock);
463         spin_lock(&cifs_file->file_info_lock);
464         if (--cifs_file->count > 0) {
465                 spin_unlock(&cifs_file->file_info_lock);
466                 spin_unlock(&cifsi->open_file_lock);
467                 spin_unlock(&tcon->open_file_lock);
468                 return;
469         }
470         spin_unlock(&cifs_file->file_info_lock);
471
472         if (server->ops->get_lease_key)
473                 server->ops->get_lease_key(inode, &fid);
474
475         /* store open in pending opens to make sure we don't miss lease break */
476         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
477
478         /* remove it from the lists */
479         list_del(&cifs_file->flist);
480         list_del(&cifs_file->tlist);
481         atomic_dec(&tcon->num_local_opens);
482
483         if (list_empty(&cifsi->openFileList)) {
484                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
485                          d_inode(cifs_file->dentry));
486                 /*
487                  * In strict cache mode we need invalidate mapping on the last
488                  * close  because it may cause a error when we open this file
489                  * again and get at least level II oplock.
490                  */
491                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
492                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
493                 cifs_set_oplock_level(cifsi, 0);
494         }
495
496         spin_unlock(&cifsi->open_file_lock);
497         spin_unlock(&tcon->open_file_lock);
498
499         oplock_break_cancelled = wait_oplock_handler ?
500                 cancel_work_sync(&cifs_file->oplock_break) : false;
501
502         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
503                 struct TCP_Server_Info *server = tcon->ses->server;
504                 unsigned int xid;
505
506                 xid = get_xid();
507                 if (server->ops->close_getattr)
508                         server->ops->close_getattr(xid, tcon, cifs_file);
509                 else if (server->ops->close)
510                         server->ops->close(xid, tcon, &cifs_file->fid);
511                 _free_xid(xid);
512         }
513
514         if (oplock_break_cancelled)
515                 cifs_done_oplock_break(cifsi);
516
517         cifs_del_pending_open(&open);
518
519         if (offload)
520                 queue_work(fileinfo_put_wq, &cifs_file->put);
521         else
522                 cifsFileInfo_put_final(cifs_file);
523 }
524
525 int cifs_open(struct inode *inode, struct file *file)
526
527 {
528         int rc = -EACCES;
529         unsigned int xid;
530         __u32 oplock;
531         struct cifs_sb_info *cifs_sb;
532         struct TCP_Server_Info *server;
533         struct cifs_tcon *tcon;
534         struct tcon_link *tlink;
535         struct cifsFileInfo *cfile = NULL;
536         void *page;
537         const char *full_path;
538         bool posix_open_ok = false;
539         struct cifs_fid fid;
540         struct cifs_pending_open open;
541
542         xid = get_xid();
543
544         cifs_sb = CIFS_SB(inode->i_sb);
545         tlink = cifs_sb_tlink(cifs_sb);
546         if (IS_ERR(tlink)) {
547                 free_xid(xid);
548                 return PTR_ERR(tlink);
549         }
550         tcon = tlink_tcon(tlink);
551         server = tcon->ses->server;
552
553         page = alloc_dentry_path();
554         full_path = build_path_from_dentry(file_dentry(file), page);
555         if (IS_ERR(full_path)) {
556                 rc = PTR_ERR(full_path);
557                 goto out;
558         }
559
560         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
561                  inode, file->f_flags, full_path);
562
563         if (file->f_flags & O_DIRECT &&
564             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
565                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
566                         file->f_op = &cifs_file_direct_nobrl_ops;
567                 else
568                         file->f_op = &cifs_file_direct_ops;
569         }
570
571         spin_lock(&CIFS_I(inode)->deferred_lock);
572         /* Get the cached handle as SMB2 close is deferred */
573         rc = cifs_get_readable_path(tcon, full_path, &cfile);
574         if (rc == 0) {
575                 if (file->f_flags == cfile->f_flags) {
576                         file->private_data = cfile;
577                         cifs_del_deferred_close(cfile);
578                         spin_unlock(&CIFS_I(inode)->deferred_lock);
579                         goto out;
580                 } else {
581                         spin_unlock(&CIFS_I(inode)->deferred_lock);
582                         _cifsFileInfo_put(cfile, true, false);
583                 }
584         } else {
585                 spin_unlock(&CIFS_I(inode)->deferred_lock);
586         }
587
588         if (server->oplocks)
589                 oplock = REQ_OPLOCK;
590         else
591                 oplock = 0;
592
593         if (!tcon->broken_posix_open && tcon->unix_ext &&
594             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
595                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
596                 /* can not refresh inode info since size could be stale */
597                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
598                                 cifs_sb->ctx->file_mode /* ignored */,
599                                 file->f_flags, &oplock, &fid.netfid, xid);
600                 if (rc == 0) {
601                         cifs_dbg(FYI, "posix open succeeded\n");
602                         posix_open_ok = true;
603                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
604                         if (tcon->ses->serverNOS)
605                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
606                                          tcon->ses->ip_addr,
607                                          tcon->ses->serverNOS);
608                         tcon->broken_posix_open = true;
609                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
610                          (rc != -EOPNOTSUPP)) /* path not found or net err */
611                         goto out;
612                 /*
613                  * Else fallthrough to retry open the old way on network i/o
614                  * or DFS errors.
615                  */
616         }
617
618         if (server->ops->get_lease_key)
619                 server->ops->get_lease_key(inode, &fid);
620
621         cifs_add_pending_open(&fid, tlink, &open);
622
623         if (!posix_open_ok) {
624                 if (server->ops->get_lease_key)
625                         server->ops->get_lease_key(inode, &fid);
626
627                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
628                                   file->f_flags, &oplock, &fid, xid);
629                 if (rc) {
630                         cifs_del_pending_open(&open);
631                         goto out;
632                 }
633         }
634
635         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
636         if (cfile == NULL) {
637                 if (server->ops->close)
638                         server->ops->close(xid, tcon, &fid);
639                 cifs_del_pending_open(&open);
640                 rc = -ENOMEM;
641                 goto out;
642         }
643
644         cifs_fscache_set_inode_cookie(inode, file);
645
646         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
647                 /*
648                  * Time to set mode which we can not set earlier due to
649                  * problems creating new read-only files.
650                  */
651                 struct cifs_unix_set_info_args args = {
652                         .mode   = inode->i_mode,
653                         .uid    = INVALID_UID, /* no change */
654                         .gid    = INVALID_GID, /* no change */
655                         .ctime  = NO_CHANGE_64,
656                         .atime  = NO_CHANGE_64,
657                         .mtime  = NO_CHANGE_64,
658                         .device = 0,
659                 };
660                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
661                                        cfile->pid);
662         }
663
664 out:
665         free_dentry_path(page);
666         free_xid(xid);
667         cifs_put_tlink(tlink);
668         return rc;
669 }
670
671 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
672
673 /*
674  * Try to reacquire byte range locks that were released when session
675  * to server was lost.
676  */
677 static int
678 cifs_relock_file(struct cifsFileInfo *cfile)
679 {
680         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
681         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
682         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
683         int rc = 0;
684
685         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
686         if (cinode->can_cache_brlcks) {
687                 /* can cache locks - no need to relock */
688                 up_read(&cinode->lock_sem);
689                 return rc;
690         }
691
692         if (cap_unix(tcon->ses) &&
693             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
694             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
695                 rc = cifs_push_posix_locks(cfile);
696         else
697                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
698
699         up_read(&cinode->lock_sem);
700         return rc;
701 }
702
703 static int
704 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
705 {
706         int rc = -EACCES;
707         unsigned int xid;
708         __u32 oplock;
709         struct cifs_sb_info *cifs_sb;
710         struct cifs_tcon *tcon;
711         struct TCP_Server_Info *server;
712         struct cifsInodeInfo *cinode;
713         struct inode *inode;
714         void *page;
715         const char *full_path;
716         int desired_access;
717         int disposition = FILE_OPEN;
718         int create_options = CREATE_NOT_DIR;
719         struct cifs_open_parms oparms;
720
721         xid = get_xid();
722         mutex_lock(&cfile->fh_mutex);
723         if (!cfile->invalidHandle) {
724                 mutex_unlock(&cfile->fh_mutex);
725                 free_xid(xid);
726                 return 0;
727         }
728
729         inode = d_inode(cfile->dentry);
730         cifs_sb = CIFS_SB(inode->i_sb);
731         tcon = tlink_tcon(cfile->tlink);
732         server = tcon->ses->server;
733
734         /*
735          * Can not grab rename sem here because various ops, including those
736          * that already have the rename sem can end up causing writepage to get
737          * called and if the server was down that means we end up here, and we
738          * can never tell if the caller already has the rename_sem.
739          */
740         page = alloc_dentry_path();
741         full_path = build_path_from_dentry(cfile->dentry, page);
742         if (IS_ERR(full_path)) {
743                 mutex_unlock(&cfile->fh_mutex);
744                 free_dentry_path(page);
745                 free_xid(xid);
746                 return PTR_ERR(full_path);
747         }
748
749         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
750                  inode, cfile->f_flags, full_path);
751
752         if (tcon->ses->server->oplocks)
753                 oplock = REQ_OPLOCK;
754         else
755                 oplock = 0;
756
757         if (tcon->unix_ext && cap_unix(tcon->ses) &&
758             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
759                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
760                 /*
761                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
762                  * original open. Must mask them off for a reopen.
763                  */
764                 unsigned int oflags = cfile->f_flags &
765                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
766
767                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
768                                      cifs_sb->ctx->file_mode /* ignored */,
769                                      oflags, &oplock, &cfile->fid.netfid, xid);
770                 if (rc == 0) {
771                         cifs_dbg(FYI, "posix reopen succeeded\n");
772                         oparms.reconnect = true;
773                         goto reopen_success;
774                 }
775                 /*
776                  * fallthrough to retry open the old way on errors, especially
777                  * in the reconnect path it is important to retry hard
778                  */
779         }
780
781         desired_access = cifs_convert_flags(cfile->f_flags);
782
783         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
784         if (cfile->f_flags & O_SYNC)
785                 create_options |= CREATE_WRITE_THROUGH;
786
787         if (cfile->f_flags & O_DIRECT)
788                 create_options |= CREATE_NO_BUFFER;
789
790         if (server->ops->get_lease_key)
791                 server->ops->get_lease_key(inode, &cfile->fid);
792
793         oparms.tcon = tcon;
794         oparms.cifs_sb = cifs_sb;
795         oparms.desired_access = desired_access;
796         oparms.create_options = cifs_create_options(cifs_sb, create_options);
797         oparms.disposition = disposition;
798         oparms.path = full_path;
799         oparms.fid = &cfile->fid;
800         oparms.reconnect = true;
801
802         /*
803          * Can not refresh inode by passing in file_info buf to be returned by
804          * ops->open and then calling get_inode_info with returned buf since
805          * file might have write behind data that needs to be flushed and server
806          * version of file size can be stale. If we knew for sure that inode was
807          * not dirty locally we could do this.
808          */
809         rc = server->ops->open(xid, &oparms, &oplock, NULL);
810         if (rc == -ENOENT && oparms.reconnect == false) {
811                 /* durable handle timeout is expired - open the file again */
812                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
813                 /* indicate that we need to relock the file */
814                 oparms.reconnect = true;
815         }
816
817         if (rc) {
818                 mutex_unlock(&cfile->fh_mutex);
819                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
820                 cifs_dbg(FYI, "oplock: %d\n", oplock);
821                 goto reopen_error_exit;
822         }
823
824 reopen_success:
825         cfile->invalidHandle = false;
826         mutex_unlock(&cfile->fh_mutex);
827         cinode = CIFS_I(inode);
828
829         if (can_flush) {
830                 rc = filemap_write_and_wait(inode->i_mapping);
831                 if (!is_interrupt_error(rc))
832                         mapping_set_error(inode->i_mapping, rc);
833
834                 if (tcon->posix_extensions)
835                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
836                 else if (tcon->unix_ext)
837                         rc = cifs_get_inode_info_unix(&inode, full_path,
838                                                       inode->i_sb, xid);
839                 else
840                         rc = cifs_get_inode_info(&inode, full_path, NULL,
841                                                  inode->i_sb, xid, NULL);
842         }
843         /*
844          * Else we are writing out data to server already and could deadlock if
845          * we tried to flush data, and since we do not know if we have data that
846          * would invalidate the current end of file on the server we can not go
847          * to the server to get the new inode info.
848          */
849
850         /*
851          * If the server returned a read oplock and we have mandatory brlocks,
852          * set oplock level to None.
853          */
854         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
855                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
856                 oplock = 0;
857         }
858
859         server->ops->set_fid(cfile, &cfile->fid, oplock);
860         if (oparms.reconnect)
861                 cifs_relock_file(cfile);
862
863 reopen_error_exit:
864         free_dentry_path(page);
865         free_xid(xid);
866         return rc;
867 }
868
869 void smb2_deferred_work_close(struct work_struct *work)
870 {
871         struct cifsFileInfo *cfile = container_of(work,
872                         struct cifsFileInfo, deferred.work);
873
874         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
875         cifs_del_deferred_close(cfile);
876         cfile->deferred_scheduled = false;
877         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
878         _cifsFileInfo_put(cfile, true, false);
879 }
880
881 int cifs_close(struct inode *inode, struct file *file)
882 {
883         struct cifsFileInfo *cfile;
884         struct cifsInodeInfo *cinode = CIFS_I(inode);
885         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
886         struct cifs_deferred_close *dclose;
887
888         if (file->private_data != NULL) {
889                 cfile = file->private_data;
890                 file->private_data = NULL;
891                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
892                 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
893                     dclose) {
894                         if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags))
895                                 inode->i_ctime = inode->i_mtime = current_time(inode);
896                         spin_lock(&cinode->deferred_lock);
897                         cifs_add_deferred_close(cfile, dclose);
898                         if (cfile->deferred_scheduled) {
899                                 mod_delayed_work(deferredclose_wq,
900                                                 &cfile->deferred, cifs_sb->ctx->acregmax);
901                         } else {
902                                 /* Deferred close for files */
903                                 queue_delayed_work(deferredclose_wq,
904                                                 &cfile->deferred, cifs_sb->ctx->acregmax);
905                                 cfile->deferred_scheduled = true;
906                                 spin_unlock(&cinode->deferred_lock);
907                                 return 0;
908                         }
909                         spin_unlock(&cinode->deferred_lock);
910                         _cifsFileInfo_put(cfile, true, false);
911                 } else {
912                         _cifsFileInfo_put(cfile, true, false);
913                         kfree(dclose);
914                 }
915         }
916
917         /* return code from the ->release op is always ignored */
918         return 0;
919 }
920
921 void
922 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
923 {
924         struct cifsFileInfo *open_file;
925         struct list_head *tmp;
926         struct list_head *tmp1;
927         struct list_head tmp_list;
928
929         if (!tcon->use_persistent || !tcon->need_reopen_files)
930                 return;
931
932         tcon->need_reopen_files = false;
933
934         cifs_dbg(FYI, "Reopen persistent handles\n");
935         INIT_LIST_HEAD(&tmp_list);
936
937         /* list all files open on tree connection, reopen resilient handles  */
938         spin_lock(&tcon->open_file_lock);
939         list_for_each(tmp, &tcon->openFileList) {
940                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
941                 if (!open_file->invalidHandle)
942                         continue;
943                 cifsFileInfo_get(open_file);
944                 list_add_tail(&open_file->rlist, &tmp_list);
945         }
946         spin_unlock(&tcon->open_file_lock);
947
948         list_for_each_safe(tmp, tmp1, &tmp_list) {
949                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
950                 if (cifs_reopen_file(open_file, false /* do not flush */))
951                         tcon->need_reopen_files = true;
952                 list_del_init(&open_file->rlist);
953                 cifsFileInfo_put(open_file);
954         }
955 }
956
957 int cifs_closedir(struct inode *inode, struct file *file)
958 {
959         int rc = 0;
960         unsigned int xid;
961         struct cifsFileInfo *cfile = file->private_data;
962         struct cifs_tcon *tcon;
963         struct TCP_Server_Info *server;
964         char *buf;
965
966         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
967
968         if (cfile == NULL)
969                 return rc;
970
971         xid = get_xid();
972         tcon = tlink_tcon(cfile->tlink);
973         server = tcon->ses->server;
974
975         cifs_dbg(FYI, "Freeing private data in close dir\n");
976         spin_lock(&cfile->file_info_lock);
977         if (server->ops->dir_needs_close(cfile)) {
978                 cfile->invalidHandle = true;
979                 spin_unlock(&cfile->file_info_lock);
980                 if (server->ops->close_dir)
981                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
982                 else
983                         rc = -ENOSYS;
984                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
985                 /* not much we can do if it fails anyway, ignore rc */
986                 rc = 0;
987         } else
988                 spin_unlock(&cfile->file_info_lock);
989
990         buf = cfile->srch_inf.ntwrk_buf_start;
991         if (buf) {
992                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
993                 cfile->srch_inf.ntwrk_buf_start = NULL;
994                 if (cfile->srch_inf.smallBuf)
995                         cifs_small_buf_release(buf);
996                 else
997                         cifs_buf_release(buf);
998         }
999
1000         cifs_put_tlink(cfile->tlink);
1001         kfree(file->private_data);
1002         file->private_data = NULL;
1003         /* BB can we lock the filestruct while this is going on? */
1004         free_xid(xid);
1005         return rc;
1006 }
1007
1008 static struct cifsLockInfo *
1009 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1010 {
1011         struct cifsLockInfo *lock =
1012                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1013         if (!lock)
1014                 return lock;
1015         lock->offset = offset;
1016         lock->length = length;
1017         lock->type = type;
1018         lock->pid = current->tgid;
1019         lock->flags = flags;
1020         INIT_LIST_HEAD(&lock->blist);
1021         init_waitqueue_head(&lock->block_q);
1022         return lock;
1023 }
1024
1025 void
1026 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1027 {
1028         struct cifsLockInfo *li, *tmp;
1029         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1030                 list_del_init(&li->blist);
1031                 wake_up(&li->block_q);
1032         }
1033 }
1034
1035 #define CIFS_LOCK_OP    0
1036 #define CIFS_READ_OP    1
1037 #define CIFS_WRITE_OP   2
1038
1039 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1040 static bool
1041 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1042                             __u64 length, __u8 type, __u16 flags,
1043                             struct cifsFileInfo *cfile,
1044                             struct cifsLockInfo **conf_lock, int rw_check)
1045 {
1046         struct cifsLockInfo *li;
1047         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1048         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1049
1050         list_for_each_entry(li, &fdlocks->locks, llist) {
1051                 if (offset + length <= li->offset ||
1052                     offset >= li->offset + li->length)
1053                         continue;
1054                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1055                     server->ops->compare_fids(cfile, cur_cfile)) {
1056                         /* shared lock prevents write op through the same fid */
1057                         if (!(li->type & server->vals->shared_lock_type) ||
1058                             rw_check != CIFS_WRITE_OP)
1059                                 continue;
1060                 }
1061                 if ((type & server->vals->shared_lock_type) &&
1062                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1063                      current->tgid == li->pid) || type == li->type))
1064                         continue;
1065                 if (rw_check == CIFS_LOCK_OP &&
1066                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1067                     server->ops->compare_fids(cfile, cur_cfile))
1068                         continue;
1069                 if (conf_lock)
1070                         *conf_lock = li;
1071                 return true;
1072         }
1073         return false;
1074 }
1075
1076 bool
1077 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1078                         __u8 type, __u16 flags,
1079                         struct cifsLockInfo **conf_lock, int rw_check)
1080 {
1081         bool rc = false;
1082         struct cifs_fid_locks *cur;
1083         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1084
1085         list_for_each_entry(cur, &cinode->llist, llist) {
1086                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1087                                                  flags, cfile, conf_lock,
1088                                                  rw_check);
1089                 if (rc)
1090                         break;
1091         }
1092
1093         return rc;
1094 }
1095
1096 /*
1097  * Check if there is another lock that prevents us to set the lock (mandatory
1098  * style). If such a lock exists, update the flock structure with its
1099  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1100  * or leave it the same if we can't. Returns 0 if we don't need to request to
1101  * the server or 1 otherwise.
1102  */
1103 static int
1104 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1105                __u8 type, struct file_lock *flock)
1106 {
1107         int rc = 0;
1108         struct cifsLockInfo *conf_lock;
1109         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1110         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1111         bool exist;
1112
1113         down_read(&cinode->lock_sem);
1114
1115         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1116                                         flock->fl_flags, &conf_lock,
1117                                         CIFS_LOCK_OP);
1118         if (exist) {
1119                 flock->fl_start = conf_lock->offset;
1120                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1121                 flock->fl_pid = conf_lock->pid;
1122                 if (conf_lock->type & server->vals->shared_lock_type)
1123                         flock->fl_type = F_RDLCK;
1124                 else
1125                         flock->fl_type = F_WRLCK;
1126         } else if (!cinode->can_cache_brlcks)
1127                 rc = 1;
1128         else
1129                 flock->fl_type = F_UNLCK;
1130
1131         up_read(&cinode->lock_sem);
1132         return rc;
1133 }
1134
1135 static void
1136 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1137 {
1138         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1139         cifs_down_write(&cinode->lock_sem);
1140         list_add_tail(&lock->llist, &cfile->llist->locks);
1141         up_write(&cinode->lock_sem);
1142 }
1143
1144 /*
1145  * Set the byte-range lock (mandatory style). Returns:
1146  * 1) 0, if we set the lock and don't need to request to the server;
1147  * 2) 1, if no locks prevent us but we need to request to the server;
1148  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1149  */
1150 static int
1151 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1152                  bool wait)
1153 {
1154         struct cifsLockInfo *conf_lock;
1155         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1156         bool exist;
1157         int rc = 0;
1158
1159 try_again:
1160         exist = false;
1161         cifs_down_write(&cinode->lock_sem);
1162
1163         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1164                                         lock->type, lock->flags, &conf_lock,
1165                                         CIFS_LOCK_OP);
1166         if (!exist && cinode->can_cache_brlcks) {
1167                 list_add_tail(&lock->llist, &cfile->llist->locks);
1168                 up_write(&cinode->lock_sem);
1169                 return rc;
1170         }
1171
1172         if (!exist)
1173                 rc = 1;
1174         else if (!wait)
1175                 rc = -EACCES;
1176         else {
1177                 list_add_tail(&lock->blist, &conf_lock->blist);
1178                 up_write(&cinode->lock_sem);
1179                 rc = wait_event_interruptible(lock->block_q,
1180                                         (lock->blist.prev == &lock->blist) &&
1181                                         (lock->blist.next == &lock->blist));
1182                 if (!rc)
1183                         goto try_again;
1184                 cifs_down_write(&cinode->lock_sem);
1185                 list_del_init(&lock->blist);
1186         }
1187
1188         up_write(&cinode->lock_sem);
1189         return rc;
1190 }
1191
1192 /*
1193  * Check if there is another lock that prevents us to set the lock (posix
1194  * style). If such a lock exists, update the flock structure with its
1195  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1196  * or leave it the same if we can't. Returns 0 if we don't need to request to
1197  * the server or 1 otherwise.
1198  */
1199 static int
1200 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1201 {
1202         int rc = 0;
1203         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1204         unsigned char saved_type = flock->fl_type;
1205
1206         if ((flock->fl_flags & FL_POSIX) == 0)
1207                 return 1;
1208
1209         down_read(&cinode->lock_sem);
1210         posix_test_lock(file, flock);
1211
1212         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1213                 flock->fl_type = saved_type;
1214                 rc = 1;
1215         }
1216
1217         up_read(&cinode->lock_sem);
1218         return rc;
1219 }
1220
1221 /*
1222  * Set the byte-range lock (posix style). Returns:
1223  * 1) <0, if the error occurs while setting the lock;
1224  * 2) 0, if we set the lock and don't need to request to the server;
1225  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1226  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1227  */
1228 static int
1229 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1230 {
1231         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1232         int rc = FILE_LOCK_DEFERRED + 1;
1233
1234         if ((flock->fl_flags & FL_POSIX) == 0)
1235                 return rc;
1236
1237         cifs_down_write(&cinode->lock_sem);
1238         if (!cinode->can_cache_brlcks) {
1239                 up_write(&cinode->lock_sem);
1240                 return rc;
1241         }
1242
1243         rc = posix_lock_file(file, flock, NULL);
1244         up_write(&cinode->lock_sem);
1245         return rc;
1246 }
1247
1248 int
1249 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1250 {
1251         unsigned int xid;
1252         int rc = 0, stored_rc;
1253         struct cifsLockInfo *li, *tmp;
1254         struct cifs_tcon *tcon;
1255         unsigned int num, max_num, max_buf;
1256         LOCKING_ANDX_RANGE *buf, *cur;
1257         static const int types[] = {
1258                 LOCKING_ANDX_LARGE_FILES,
1259                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1260         };
1261         int i;
1262
1263         xid = get_xid();
1264         tcon = tlink_tcon(cfile->tlink);
1265
1266         /*
1267          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1268          * and check it before using.
1269          */
1270         max_buf = tcon->ses->server->maxBuf;
1271         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1272                 free_xid(xid);
1273                 return -EINVAL;
1274         }
1275
1276         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1277                      PAGE_SIZE);
1278         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1279                         PAGE_SIZE);
1280         max_num = (max_buf - sizeof(struct smb_hdr)) /
1281                                                 sizeof(LOCKING_ANDX_RANGE);
1282         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1283         if (!buf) {
1284                 free_xid(xid);
1285                 return -ENOMEM;
1286         }
1287
1288         for (i = 0; i < 2; i++) {
1289                 cur = buf;
1290                 num = 0;
1291                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1292                         if (li->type != types[i])
1293                                 continue;
1294                         cur->Pid = cpu_to_le16(li->pid);
1295                         cur->LengthLow = cpu_to_le32((u32)li->length);
1296                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1297                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1298                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1299                         if (++num == max_num) {
1300                                 stored_rc = cifs_lockv(xid, tcon,
1301                                                        cfile->fid.netfid,
1302                                                        (__u8)li->type, 0, num,
1303                                                        buf);
1304                                 if (stored_rc)
1305                                         rc = stored_rc;
1306                                 cur = buf;
1307                                 num = 0;
1308                         } else
1309                                 cur++;
1310                 }
1311
1312                 if (num) {
1313                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1314                                                (__u8)types[i], 0, num, buf);
1315                         if (stored_rc)
1316                                 rc = stored_rc;
1317                 }
1318         }
1319
1320         kfree(buf);
1321         free_xid(xid);
1322         return rc;
1323 }
1324
1325 static __u32
1326 hash_lockowner(fl_owner_t owner)
1327 {
1328         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1329 }
1330
1331 struct lock_to_push {
1332         struct list_head llist;
1333         __u64 offset;
1334         __u64 length;
1335         __u32 pid;
1336         __u16 netfid;
1337         __u8 type;
1338 };
1339
1340 static int
1341 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1342 {
1343         struct inode *inode = d_inode(cfile->dentry);
1344         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1345         struct file_lock *flock;
1346         struct file_lock_context *flctx = inode->i_flctx;
1347         unsigned int count = 0, i;
1348         int rc = 0, xid, type;
1349         struct list_head locks_to_send, *el;
1350         struct lock_to_push *lck, *tmp;
1351         __u64 length;
1352
1353         xid = get_xid();
1354
1355         if (!flctx)
1356                 goto out;
1357
1358         spin_lock(&flctx->flc_lock);
1359         list_for_each(el, &flctx->flc_posix) {
1360                 count++;
1361         }
1362         spin_unlock(&flctx->flc_lock);
1363
1364         INIT_LIST_HEAD(&locks_to_send);
1365
1366         /*
1367          * Allocating count locks is enough because no FL_POSIX locks can be
1368          * added to the list while we are holding cinode->lock_sem that
1369          * protects locking operations of this inode.
1370          */
1371         for (i = 0; i < count; i++) {
1372                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1373                 if (!lck) {
1374                         rc = -ENOMEM;
1375                         goto err_out;
1376                 }
1377                 list_add_tail(&lck->llist, &locks_to_send);
1378         }
1379
1380         el = locks_to_send.next;
1381         spin_lock(&flctx->flc_lock);
1382         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1383                 if (el == &locks_to_send) {
1384                         /*
1385                          * The list ended. We don't have enough allocated
1386                          * structures - something is really wrong.
1387                          */
1388                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1389                         break;
1390                 }
1391                 length = 1 + flock->fl_end - flock->fl_start;
1392                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1393                         type = CIFS_RDLCK;
1394                 else
1395                         type = CIFS_WRLCK;
1396                 lck = list_entry(el, struct lock_to_push, llist);
1397                 lck->pid = hash_lockowner(flock->fl_owner);
1398                 lck->netfid = cfile->fid.netfid;
1399                 lck->length = length;
1400                 lck->type = type;
1401                 lck->offset = flock->fl_start;
1402         }
1403         spin_unlock(&flctx->flc_lock);
1404
1405         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1406                 int stored_rc;
1407
1408                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1409                                              lck->offset, lck->length, NULL,
1410                                              lck->type, 0);
1411                 if (stored_rc)
1412                         rc = stored_rc;
1413                 list_del(&lck->llist);
1414                 kfree(lck);
1415         }
1416
1417 out:
1418         free_xid(xid);
1419         return rc;
1420 err_out:
1421         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1422                 list_del(&lck->llist);
1423                 kfree(lck);
1424         }
1425         goto out;
1426 }
1427
1428 static int
1429 cifs_push_locks(struct cifsFileInfo *cfile)
1430 {
1431         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1432         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1433         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1434         int rc = 0;
1435
1436         /* we are going to update can_cache_brlcks here - need a write access */
1437         cifs_down_write(&cinode->lock_sem);
1438         if (!cinode->can_cache_brlcks) {
1439                 up_write(&cinode->lock_sem);
1440                 return rc;
1441         }
1442
1443         if (cap_unix(tcon->ses) &&
1444             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1445             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1446                 rc = cifs_push_posix_locks(cfile);
1447         else
1448                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1449
1450         cinode->can_cache_brlcks = false;
1451         up_write(&cinode->lock_sem);
1452         return rc;
1453 }
1454
1455 static void
1456 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1457                 bool *wait_flag, struct TCP_Server_Info *server)
1458 {
1459         if (flock->fl_flags & FL_POSIX)
1460                 cifs_dbg(FYI, "Posix\n");
1461         if (flock->fl_flags & FL_FLOCK)
1462                 cifs_dbg(FYI, "Flock\n");
1463         if (flock->fl_flags & FL_SLEEP) {
1464                 cifs_dbg(FYI, "Blocking lock\n");
1465                 *wait_flag = true;
1466         }
1467         if (flock->fl_flags & FL_ACCESS)
1468                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1469         if (flock->fl_flags & FL_LEASE)
1470                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1471         if (flock->fl_flags &
1472             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1473                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1474                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1475
1476         *type = server->vals->large_lock_type;
1477         if (flock->fl_type == F_WRLCK) {
1478                 cifs_dbg(FYI, "F_WRLCK\n");
1479                 *type |= server->vals->exclusive_lock_type;
1480                 *lock = 1;
1481         } else if (flock->fl_type == F_UNLCK) {
1482                 cifs_dbg(FYI, "F_UNLCK\n");
1483                 *type |= server->vals->unlock_lock_type;
1484                 *unlock = 1;
1485                 /* Check if unlock includes more than one lock range */
1486         } else if (flock->fl_type == F_RDLCK) {
1487                 cifs_dbg(FYI, "F_RDLCK\n");
1488                 *type |= server->vals->shared_lock_type;
1489                 *lock = 1;
1490         } else if (flock->fl_type == F_EXLCK) {
1491                 cifs_dbg(FYI, "F_EXLCK\n");
1492                 *type |= server->vals->exclusive_lock_type;
1493                 *lock = 1;
1494         } else if (flock->fl_type == F_SHLCK) {
1495                 cifs_dbg(FYI, "F_SHLCK\n");
1496                 *type |= server->vals->shared_lock_type;
1497                 *lock = 1;
1498         } else
1499                 cifs_dbg(FYI, "Unknown type of lock\n");
1500 }
1501
1502 static int
1503 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1504            bool wait_flag, bool posix_lck, unsigned int xid)
1505 {
1506         int rc = 0;
1507         __u64 length = 1 + flock->fl_end - flock->fl_start;
1508         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1509         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1510         struct TCP_Server_Info *server = tcon->ses->server;
1511         __u16 netfid = cfile->fid.netfid;
1512
1513         if (posix_lck) {
1514                 int posix_lock_type;
1515
1516                 rc = cifs_posix_lock_test(file, flock);
1517                 if (!rc)
1518                         return rc;
1519
1520                 if (type & server->vals->shared_lock_type)
1521                         posix_lock_type = CIFS_RDLCK;
1522                 else
1523                         posix_lock_type = CIFS_WRLCK;
1524                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1525                                       hash_lockowner(flock->fl_owner),
1526                                       flock->fl_start, length, flock,
1527                                       posix_lock_type, wait_flag);
1528                 return rc;
1529         }
1530
1531         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1532         if (!rc)
1533                 return rc;
1534
1535         /* BB we could chain these into one lock request BB */
1536         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1537                                     1, 0, false);
1538         if (rc == 0) {
1539                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1540                                             type, 0, 1, false);
1541                 flock->fl_type = F_UNLCK;
1542                 if (rc != 0)
1543                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1544                                  rc);
1545                 return 0;
1546         }
1547
1548         if (type & server->vals->shared_lock_type) {
1549                 flock->fl_type = F_WRLCK;
1550                 return 0;
1551         }
1552
1553         type &= ~server->vals->exclusive_lock_type;
1554
1555         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1556                                     type | server->vals->shared_lock_type,
1557                                     1, 0, false);
1558         if (rc == 0) {
1559                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1560                         type | server->vals->shared_lock_type, 0, 1, false);
1561                 flock->fl_type = F_RDLCK;
1562                 if (rc != 0)
1563                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1564                                  rc);
1565         } else
1566                 flock->fl_type = F_WRLCK;
1567
1568         return 0;
1569 }
1570
1571 void
1572 cifs_move_llist(struct list_head *source, struct list_head *dest)
1573 {
1574         struct list_head *li, *tmp;
1575         list_for_each_safe(li, tmp, source)
1576                 list_move(li, dest);
1577 }
1578
1579 void
1580 cifs_free_llist(struct list_head *llist)
1581 {
1582         struct cifsLockInfo *li, *tmp;
1583         list_for_each_entry_safe(li, tmp, llist, llist) {
1584                 cifs_del_lock_waiters(li);
1585                 list_del(&li->llist);
1586                 kfree(li);
1587         }
1588 }
1589
1590 int
1591 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1592                   unsigned int xid)
1593 {
1594         int rc = 0, stored_rc;
1595         static const int types[] = {
1596                 LOCKING_ANDX_LARGE_FILES,
1597                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1598         };
1599         unsigned int i;
1600         unsigned int max_num, num, max_buf;
1601         LOCKING_ANDX_RANGE *buf, *cur;
1602         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1603         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1604         struct cifsLockInfo *li, *tmp;
1605         __u64 length = 1 + flock->fl_end - flock->fl_start;
1606         struct list_head tmp_llist;
1607
1608         INIT_LIST_HEAD(&tmp_llist);
1609
1610         /*
1611          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1612          * and check it before using.
1613          */
1614         max_buf = tcon->ses->server->maxBuf;
1615         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1616                 return -EINVAL;
1617
1618         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1619                      PAGE_SIZE);
1620         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1621                         PAGE_SIZE);
1622         max_num = (max_buf - sizeof(struct smb_hdr)) /
1623                                                 sizeof(LOCKING_ANDX_RANGE);
1624         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1625         if (!buf)
1626                 return -ENOMEM;
1627
1628         cifs_down_write(&cinode->lock_sem);
1629         for (i = 0; i < 2; i++) {
1630                 cur = buf;
1631                 num = 0;
1632                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1633                         if (flock->fl_start > li->offset ||
1634                             (flock->fl_start + length) <
1635                             (li->offset + li->length))
1636                                 continue;
1637                         if (current->tgid != li->pid)
1638                                 continue;
1639                         if (types[i] != li->type)
1640                                 continue;
1641                         if (cinode->can_cache_brlcks) {
1642                                 /*
1643                                  * We can cache brlock requests - simply remove
1644                                  * a lock from the file's list.
1645                                  */
1646                                 list_del(&li->llist);
1647                                 cifs_del_lock_waiters(li);
1648                                 kfree(li);
1649                                 continue;
1650                         }
1651                         cur->Pid = cpu_to_le16(li->pid);
1652                         cur->LengthLow = cpu_to_le32((u32)li->length);
1653                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1654                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1655                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1656                         /*
1657                          * We need to save a lock here to let us add it again to
1658                          * the file's list if the unlock range request fails on
1659                          * the server.
1660                          */
1661                         list_move(&li->llist, &tmp_llist);
1662                         if (++num == max_num) {
1663                                 stored_rc = cifs_lockv(xid, tcon,
1664                                                        cfile->fid.netfid,
1665                                                        li->type, num, 0, buf);
1666                                 if (stored_rc) {
1667                                         /*
1668                                          * We failed on the unlock range
1669                                          * request - add all locks from the tmp
1670                                          * list to the head of the file's list.
1671                                          */
1672                                         cifs_move_llist(&tmp_llist,
1673                                                         &cfile->llist->locks);
1674                                         rc = stored_rc;
1675                                 } else
1676                                         /*
1677                                          * The unlock range request succeed -
1678                                          * free the tmp list.
1679                                          */
1680                                         cifs_free_llist(&tmp_llist);
1681                                 cur = buf;
1682                                 num = 0;
1683                         } else
1684                                 cur++;
1685                 }
1686                 if (num) {
1687                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1688                                                types[i], num, 0, buf);
1689                         if (stored_rc) {
1690                                 cifs_move_llist(&tmp_llist,
1691                                                 &cfile->llist->locks);
1692                                 rc = stored_rc;
1693                         } else
1694                                 cifs_free_llist(&tmp_llist);
1695                 }
1696         }
1697
1698         up_write(&cinode->lock_sem);
1699         kfree(buf);
1700         return rc;
1701 }
1702
1703 static int
1704 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1705            bool wait_flag, bool posix_lck, int lock, int unlock,
1706            unsigned int xid)
1707 {
1708         int rc = 0;
1709         __u64 length = 1 + flock->fl_end - flock->fl_start;
1710         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1711         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1712         struct TCP_Server_Info *server = tcon->ses->server;
1713         struct inode *inode = d_inode(cfile->dentry);
1714
1715         if (posix_lck) {
1716                 int posix_lock_type;
1717
1718                 rc = cifs_posix_lock_set(file, flock);
1719                 if (rc <= FILE_LOCK_DEFERRED)
1720                         return rc;
1721
1722                 if (type & server->vals->shared_lock_type)
1723                         posix_lock_type = CIFS_RDLCK;
1724                 else
1725                         posix_lock_type = CIFS_WRLCK;
1726
1727                 if (unlock == 1)
1728                         posix_lock_type = CIFS_UNLCK;
1729
1730                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1731                                       hash_lockowner(flock->fl_owner),
1732                                       flock->fl_start, length,
1733                                       NULL, posix_lock_type, wait_flag);
1734                 goto out;
1735         }
1736
1737         if (lock) {
1738                 struct cifsLockInfo *lock;
1739
1740                 lock = cifs_lock_init(flock->fl_start, length, type,
1741                                       flock->fl_flags);
1742                 if (!lock)
1743                         return -ENOMEM;
1744
1745                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1746                 if (rc < 0) {
1747                         kfree(lock);
1748                         return rc;
1749                 }
1750                 if (!rc)
1751                         goto out;
1752
1753                 /*
1754                  * Windows 7 server can delay breaking lease from read to None
1755                  * if we set a byte-range lock on a file - break it explicitly
1756                  * before sending the lock to the server to be sure the next
1757                  * read won't conflict with non-overlapted locks due to
1758                  * pagereading.
1759                  */
1760                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1761                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1762                         cifs_zap_mapping(inode);
1763                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1764                                  inode);
1765                         CIFS_I(inode)->oplock = 0;
1766                 }
1767
1768                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1769                                             type, 1, 0, wait_flag);
1770                 if (rc) {
1771                         kfree(lock);
1772                         return rc;
1773                 }
1774
1775                 cifs_lock_add(cfile, lock);
1776         } else if (unlock)
1777                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1778
1779 out:
1780         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1781                 /*
1782                  * If this is a request to remove all locks because we
1783                  * are closing the file, it doesn't matter if the
1784                  * unlocking failed as both cifs.ko and the SMB server
1785                  * remove the lock on file close
1786                  */
1787                 if (rc) {
1788                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1789                         if (!(flock->fl_flags & FL_CLOSE))
1790                                 return rc;
1791                 }
1792                 rc = locks_lock_file_wait(file, flock);
1793         }
1794         return rc;
1795 }
1796
1797 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1798 {
1799         int rc, xid;
1800         int lock = 0, unlock = 0;
1801         bool wait_flag = false;
1802         bool posix_lck = false;
1803         struct cifs_sb_info *cifs_sb;
1804         struct cifs_tcon *tcon;
1805         struct cifsFileInfo *cfile;
1806         __u32 type;
1807
1808         rc = -EACCES;
1809         xid = get_xid();
1810
1811         if (!(fl->fl_flags & FL_FLOCK))
1812                 return -ENOLCK;
1813
1814         cfile = (struct cifsFileInfo *)file->private_data;
1815         tcon = tlink_tcon(cfile->tlink);
1816
1817         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1818                         tcon->ses->server);
1819         cifs_sb = CIFS_FILE_SB(file);
1820
1821         if (cap_unix(tcon->ses) &&
1822             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1823             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1824                 posix_lck = true;
1825
1826         if (!lock && !unlock) {
1827                 /*
1828                  * if no lock or unlock then nothing to do since we do not
1829                  * know what it is
1830                  */
1831                 free_xid(xid);
1832                 return -EOPNOTSUPP;
1833         }
1834
1835         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1836                         xid);
1837         free_xid(xid);
1838         return rc;
1839
1840
1841 }
1842
1843 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1844 {
1845         int rc, xid;
1846         int lock = 0, unlock = 0;
1847         bool wait_flag = false;
1848         bool posix_lck = false;
1849         struct cifs_sb_info *cifs_sb;
1850         struct cifs_tcon *tcon;
1851         struct cifsFileInfo *cfile;
1852         __u32 type;
1853
1854         rc = -EACCES;
1855         xid = get_xid();
1856
1857         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1858                  cmd, flock->fl_flags, flock->fl_type,
1859                  flock->fl_start, flock->fl_end);
1860
1861         cfile = (struct cifsFileInfo *)file->private_data;
1862         tcon = tlink_tcon(cfile->tlink);
1863
1864         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1865                         tcon->ses->server);
1866         cifs_sb = CIFS_FILE_SB(file);
1867
1868         if (cap_unix(tcon->ses) &&
1869             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1870             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1871                 posix_lck = true;
1872         /*
1873          * BB add code here to normalize offset and length to account for
1874          * negative length which we can not accept over the wire.
1875          */
1876         if (IS_GETLK(cmd)) {
1877                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1878                 free_xid(xid);
1879                 return rc;
1880         }
1881
1882         if (!lock && !unlock) {
1883                 /*
1884                  * if no lock or unlock then nothing to do since we do not
1885                  * know what it is
1886                  */
1887                 free_xid(xid);
1888                 return -EOPNOTSUPP;
1889         }
1890
1891         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1892                         xid);
1893         free_xid(xid);
1894         return rc;
1895 }
1896
1897 /*
1898  * update the file size (if needed) after a write. Should be called with
1899  * the inode->i_lock held
1900  */
1901 void
1902 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1903                       unsigned int bytes_written)
1904 {
1905         loff_t end_of_write = offset + bytes_written;
1906
1907         if (end_of_write > cifsi->server_eof)
1908                 cifsi->server_eof = end_of_write;
1909 }
1910
1911 static ssize_t
1912 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1913            size_t write_size, loff_t *offset)
1914 {
1915         int rc = 0;
1916         unsigned int bytes_written = 0;
1917         unsigned int total_written;
1918         struct cifs_tcon *tcon;
1919         struct TCP_Server_Info *server;
1920         unsigned int xid;
1921         struct dentry *dentry = open_file->dentry;
1922         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1923         struct cifs_io_parms io_parms = {0};
1924
1925         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1926                  write_size, *offset, dentry);
1927
1928         tcon = tlink_tcon(open_file->tlink);
1929         server = tcon->ses->server;
1930
1931         if (!server->ops->sync_write)
1932                 return -ENOSYS;
1933
1934         xid = get_xid();
1935
1936         for (total_written = 0; write_size > total_written;
1937              total_written += bytes_written) {
1938                 rc = -EAGAIN;
1939                 while (rc == -EAGAIN) {
1940                         struct kvec iov[2];
1941                         unsigned int len;
1942
1943                         if (open_file->invalidHandle) {
1944                                 /* we could deadlock if we called
1945                                    filemap_fdatawait from here so tell
1946                                    reopen_file not to flush data to
1947                                    server now */
1948                                 rc = cifs_reopen_file(open_file, false);
1949                                 if (rc != 0)
1950                                         break;
1951                         }
1952
1953                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1954                                   (unsigned int)write_size - total_written);
1955                         /* iov[0] is reserved for smb header */
1956                         iov[1].iov_base = (char *)write_data + total_written;
1957                         iov[1].iov_len = len;
1958                         io_parms.pid = pid;
1959                         io_parms.tcon = tcon;
1960                         io_parms.offset = *offset;
1961                         io_parms.length = len;
1962                         rc = server->ops->sync_write(xid, &open_file->fid,
1963                                         &io_parms, &bytes_written, iov, 1);
1964                 }
1965                 if (rc || (bytes_written == 0)) {
1966                         if (total_written)
1967                                 break;
1968                         else {
1969                                 free_xid(xid);
1970                                 return rc;
1971                         }
1972                 } else {
1973                         spin_lock(&d_inode(dentry)->i_lock);
1974                         cifs_update_eof(cifsi, *offset, bytes_written);
1975                         spin_unlock(&d_inode(dentry)->i_lock);
1976                         *offset += bytes_written;
1977                 }
1978         }
1979
1980         cifs_stats_bytes_written(tcon, total_written);
1981
1982         if (total_written > 0) {
1983                 spin_lock(&d_inode(dentry)->i_lock);
1984                 if (*offset > d_inode(dentry)->i_size)
1985                         i_size_write(d_inode(dentry), *offset);
1986                 spin_unlock(&d_inode(dentry)->i_lock);
1987         }
1988         mark_inode_dirty_sync(d_inode(dentry));
1989         free_xid(xid);
1990         return total_written;
1991 }
1992
1993 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1994                                         bool fsuid_only)
1995 {
1996         struct cifsFileInfo *open_file = NULL;
1997         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1998
1999         /* only filter by fsuid on multiuser mounts */
2000         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2001                 fsuid_only = false;
2002
2003         spin_lock(&cifs_inode->open_file_lock);
2004         /* we could simply get the first_list_entry since write-only entries
2005            are always at the end of the list but since the first entry might
2006            have a close pending, we go through the whole list */
2007         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2008                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2009                         continue;
2010                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2011                         if ((!open_file->invalidHandle) &&
2012                                 (!open_file->oplock_break_received)) {
2013                                 /* found a good file */
2014                                 /* lock it so it will not be closed on us */
2015                                 cifsFileInfo_get(open_file);
2016                                 spin_unlock(&cifs_inode->open_file_lock);
2017                                 return open_file;
2018                         } /* else might as well continue, and look for
2019                              another, or simply have the caller reopen it
2020                              again rather than trying to fix this handle */
2021                 } else /* write only file */
2022                         break; /* write only files are last so must be done */
2023         }
2024         spin_unlock(&cifs_inode->open_file_lock);
2025         return NULL;
2026 }
2027
2028 /* Return -EBADF if no handle is found and general rc otherwise */
2029 int
2030 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2031                        struct cifsFileInfo **ret_file)
2032 {
2033         struct cifsFileInfo *open_file, *inv_file = NULL;
2034         struct cifs_sb_info *cifs_sb;
2035         bool any_available = false;
2036         int rc = -EBADF;
2037         unsigned int refind = 0;
2038         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2039         bool with_delete = flags & FIND_WR_WITH_DELETE;
2040         *ret_file = NULL;
2041
2042         /*
2043          * Having a null inode here (because mapping->host was set to zero by
2044          * the VFS or MM) should not happen but we had reports of on oops (due
2045          * to it being zero) during stress testcases so we need to check for it
2046          */
2047
2048         if (cifs_inode == NULL) {
2049                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2050                 dump_stack();
2051                 return rc;
2052         }
2053
2054         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2055
2056         /* only filter by fsuid on multiuser mounts */
2057         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2058                 fsuid_only = false;
2059
2060         spin_lock(&cifs_inode->open_file_lock);
2061 refind_writable:
2062         if (refind > MAX_REOPEN_ATT) {
2063                 spin_unlock(&cifs_inode->open_file_lock);
2064                 return rc;
2065         }
2066         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2067                 if (!any_available && open_file->pid != current->tgid)
2068                         continue;
2069                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2070                         continue;
2071                 if (with_delete && !(open_file->fid.access & DELETE))
2072                         continue;
2073                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2074                         if (!open_file->invalidHandle) {
2075                                 /* found a good writable file */
2076                                 cifsFileInfo_get(open_file);
2077                                 spin_unlock(&cifs_inode->open_file_lock);
2078                                 *ret_file = open_file;
2079                                 return 0;
2080                         } else {
2081                                 if (!inv_file)
2082                                         inv_file = open_file;
2083                         }
2084                 }
2085         }
2086         /* couldn't find useable FH with same pid, try any available */
2087         if (!any_available) {
2088                 any_available = true;
2089                 goto refind_writable;
2090         }
2091
2092         if (inv_file) {
2093                 any_available = false;
2094                 cifsFileInfo_get(inv_file);
2095         }
2096
2097         spin_unlock(&cifs_inode->open_file_lock);
2098
2099         if (inv_file) {
2100                 rc = cifs_reopen_file(inv_file, false);
2101                 if (!rc) {
2102                         *ret_file = inv_file;
2103                         return 0;
2104                 }
2105
2106                 spin_lock(&cifs_inode->open_file_lock);
2107                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2108                 spin_unlock(&cifs_inode->open_file_lock);
2109                 cifsFileInfo_put(inv_file);
2110                 ++refind;
2111                 inv_file = NULL;
2112                 spin_lock(&cifs_inode->open_file_lock);
2113                 goto refind_writable;
2114         }
2115
2116         return rc;
2117 }
2118
2119 struct cifsFileInfo *
2120 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2121 {
2122         struct cifsFileInfo *cfile;
2123         int rc;
2124
2125         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2126         if (rc)
2127                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2128
2129         return cfile;
2130 }
2131
2132 int
2133 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2134                        int flags,
2135                        struct cifsFileInfo **ret_file)
2136 {
2137         struct cifsFileInfo *cfile;
2138         void *page = alloc_dentry_path();
2139
2140         *ret_file = NULL;
2141
2142         spin_lock(&tcon->open_file_lock);
2143         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2144                 struct cifsInodeInfo *cinode;
2145                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2146                 if (IS_ERR(full_path)) {
2147                         spin_unlock(&tcon->open_file_lock);
2148                         free_dentry_path(page);
2149                         return PTR_ERR(full_path);
2150                 }
2151                 if (strcmp(full_path, name))
2152                         continue;
2153
2154                 cinode = CIFS_I(d_inode(cfile->dentry));
2155                 spin_unlock(&tcon->open_file_lock);
2156                 free_dentry_path(page);
2157                 return cifs_get_writable_file(cinode, flags, ret_file);
2158         }
2159
2160         spin_unlock(&tcon->open_file_lock);
2161         free_dentry_path(page);
2162         return -ENOENT;
2163 }
2164
2165 int
2166 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2167                        struct cifsFileInfo **ret_file)
2168 {
2169         struct cifsFileInfo *cfile;
2170         void *page = alloc_dentry_path();
2171
2172         *ret_file = NULL;
2173
2174         spin_lock(&tcon->open_file_lock);
2175         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2176                 struct cifsInodeInfo *cinode;
2177                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2178                 if (IS_ERR(full_path)) {
2179                         spin_unlock(&tcon->open_file_lock);
2180                         free_dentry_path(page);
2181                         return PTR_ERR(full_path);
2182                 }
2183                 if (strcmp(full_path, name))
2184                         continue;
2185
2186                 cinode = CIFS_I(d_inode(cfile->dentry));
2187                 spin_unlock(&tcon->open_file_lock);
2188                 free_dentry_path(page);
2189                 *ret_file = find_readable_file(cinode, 0);
2190                 return *ret_file ? 0 : -ENOENT;
2191         }
2192
2193         spin_unlock(&tcon->open_file_lock);
2194         free_dentry_path(page);
2195         return -ENOENT;
2196 }
2197
2198 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2199 {
2200         struct address_space *mapping = page->mapping;
2201         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2202         char *write_data;
2203         int rc = -EFAULT;
2204         int bytes_written = 0;
2205         struct inode *inode;
2206         struct cifsFileInfo *open_file;
2207
2208         if (!mapping || !mapping->host)
2209                 return -EFAULT;
2210
2211         inode = page->mapping->host;
2212
2213         offset += (loff_t)from;
2214         write_data = kmap(page);
2215         write_data += from;
2216
2217         if ((to > PAGE_SIZE) || (from > to)) {
2218                 kunmap(page);
2219                 return -EIO;
2220         }
2221
2222         /* racing with truncate? */
2223         if (offset > mapping->host->i_size) {
2224                 kunmap(page);
2225                 return 0; /* don't care */
2226         }
2227
2228         /* check to make sure that we are not extending the file */
2229         if (mapping->host->i_size - offset < (loff_t)to)
2230                 to = (unsigned)(mapping->host->i_size - offset);
2231
2232         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2233                                     &open_file);
2234         if (!rc) {
2235                 bytes_written = cifs_write(open_file, open_file->pid,
2236                                            write_data, to - from, &offset);
2237                 cifsFileInfo_put(open_file);
2238                 /* Does mm or vfs already set times? */
2239                 inode->i_atime = inode->i_mtime = current_time(inode);
2240                 if ((bytes_written > 0) && (offset))
2241                         rc = 0;
2242                 else if (bytes_written < 0)
2243                         rc = bytes_written;
2244                 else
2245                         rc = -EFAULT;
2246         } else {
2247                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2248                 if (!is_retryable_error(rc))
2249                         rc = -EIO;
2250         }
2251
2252         kunmap(page);
2253         return rc;
2254 }
2255
2256 static struct cifs_writedata *
2257 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2258                           pgoff_t end, pgoff_t *index,
2259                           unsigned int *found_pages)
2260 {
2261         struct cifs_writedata *wdata;
2262
2263         wdata = cifs_writedata_alloc((unsigned int)tofind,
2264                                      cifs_writev_complete);
2265         if (!wdata)
2266                 return NULL;
2267
2268         *found_pages = find_get_pages_range_tag(mapping, index, end,
2269                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2270         return wdata;
2271 }
2272
2273 static unsigned int
2274 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2275                     struct address_space *mapping,
2276                     struct writeback_control *wbc,
2277                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2278 {
2279         unsigned int nr_pages = 0, i;
2280         struct page *page;
2281
2282         for (i = 0; i < found_pages; i++) {
2283                 page = wdata->pages[i];
2284                 /*
2285                  * At this point we hold neither the i_pages lock nor the
2286                  * page lock: the page may be truncated or invalidated
2287                  * (changing page->mapping to NULL), or even swizzled
2288                  * back from swapper_space to tmpfs file mapping
2289                  */
2290
2291                 if (nr_pages == 0)
2292                         lock_page(page);
2293                 else if (!trylock_page(page))
2294                         break;
2295
2296                 if (unlikely(page->mapping != mapping)) {
2297                         unlock_page(page);
2298                         break;
2299                 }
2300
2301                 if (!wbc->range_cyclic && page->index > end) {
2302                         *done = true;
2303                         unlock_page(page);
2304                         break;
2305                 }
2306
2307                 if (*next && (page->index != *next)) {
2308                         /* Not next consecutive page */
2309                         unlock_page(page);
2310                         break;
2311                 }
2312
2313                 if (wbc->sync_mode != WB_SYNC_NONE)
2314                         wait_on_page_writeback(page);
2315
2316                 if (PageWriteback(page) ||
2317                                 !clear_page_dirty_for_io(page)) {
2318                         unlock_page(page);
2319                         break;
2320                 }
2321
2322                 /*
2323                  * This actually clears the dirty bit in the radix tree.
2324                  * See cifs_writepage() for more commentary.
2325                  */
2326                 set_page_writeback(page);
2327                 if (page_offset(page) >= i_size_read(mapping->host)) {
2328                         *done = true;
2329                         unlock_page(page);
2330                         end_page_writeback(page);
2331                         break;
2332                 }
2333
2334                 wdata->pages[i] = page;
2335                 *next = page->index + 1;
2336                 ++nr_pages;
2337         }
2338
2339         /* reset index to refind any pages skipped */
2340         if (nr_pages == 0)
2341                 *index = wdata->pages[0]->index + 1;
2342
2343         /* put any pages we aren't going to use */
2344         for (i = nr_pages; i < found_pages; i++) {
2345                 put_page(wdata->pages[i]);
2346                 wdata->pages[i] = NULL;
2347         }
2348
2349         return nr_pages;
2350 }
2351
2352 static int
2353 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2354                  struct address_space *mapping, struct writeback_control *wbc)
2355 {
2356         int rc;
2357
2358         wdata->sync_mode = wbc->sync_mode;
2359         wdata->nr_pages = nr_pages;
2360         wdata->offset = page_offset(wdata->pages[0]);
2361         wdata->pagesz = PAGE_SIZE;
2362         wdata->tailsz = min(i_size_read(mapping->host) -
2363                         page_offset(wdata->pages[nr_pages - 1]),
2364                         (loff_t)PAGE_SIZE);
2365         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2366         wdata->pid = wdata->cfile->pid;
2367
2368         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2369         if (rc)
2370                 return rc;
2371
2372         if (wdata->cfile->invalidHandle)
2373                 rc = -EAGAIN;
2374         else
2375                 rc = wdata->server->ops->async_writev(wdata,
2376                                                       cifs_writedata_release);
2377
2378         return rc;
2379 }
2380
2381 static int cifs_writepages(struct address_space *mapping,
2382                            struct writeback_control *wbc)
2383 {
2384         struct inode *inode = mapping->host;
2385         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2386         struct TCP_Server_Info *server;
2387         bool done = false, scanned = false, range_whole = false;
2388         pgoff_t end, index;
2389         struct cifs_writedata *wdata;
2390         struct cifsFileInfo *cfile = NULL;
2391         int rc = 0;
2392         int saved_rc = 0;
2393         unsigned int xid;
2394
2395         /*
2396          * If wsize is smaller than the page cache size, default to writing
2397          * one page at a time via cifs_writepage
2398          */
2399         if (cifs_sb->ctx->wsize < PAGE_SIZE)
2400                 return generic_writepages(mapping, wbc);
2401
2402         xid = get_xid();
2403         if (wbc->range_cyclic) {
2404                 index = mapping->writeback_index; /* Start from prev offset */
2405                 end = -1;
2406         } else {
2407                 index = wbc->range_start >> PAGE_SHIFT;
2408                 end = wbc->range_end >> PAGE_SHIFT;
2409                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2410                         range_whole = true;
2411                 scanned = true;
2412         }
2413         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2414
2415 retry:
2416         while (!done && index <= end) {
2417                 unsigned int i, nr_pages, found_pages, wsize;
2418                 pgoff_t next = 0, tofind, saved_index = index;
2419                 struct cifs_credits credits_on_stack;
2420                 struct cifs_credits *credits = &credits_on_stack;
2421                 int get_file_rc = 0;
2422
2423                 if (cfile)
2424                         cifsFileInfo_put(cfile);
2425
2426                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2427
2428                 /* in case of an error store it to return later */
2429                 if (rc)
2430                         get_file_rc = rc;
2431
2432                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2433                                                    &wsize, credits);
2434                 if (rc != 0) {
2435                         done = true;
2436                         break;
2437                 }
2438
2439                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2440
2441                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2442                                                   &found_pages);
2443                 if (!wdata) {
2444                         rc = -ENOMEM;
2445                         done = true;
2446                         add_credits_and_wake_if(server, credits, 0);
2447                         break;
2448                 }
2449
2450                 if (found_pages == 0) {
2451                         kref_put(&wdata->refcount, cifs_writedata_release);
2452                         add_credits_and_wake_if(server, credits, 0);
2453                         break;
2454                 }
2455
2456                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2457                                                end, &index, &next, &done);
2458
2459                 /* nothing to write? */
2460                 if (nr_pages == 0) {
2461                         kref_put(&wdata->refcount, cifs_writedata_release);
2462                         add_credits_and_wake_if(server, credits, 0);
2463                         continue;
2464                 }
2465
2466                 wdata->credits = credits_on_stack;
2467                 wdata->cfile = cfile;
2468                 wdata->server = server;
2469                 cfile = NULL;
2470
2471                 if (!wdata->cfile) {
2472                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2473                                  get_file_rc);
2474                         if (is_retryable_error(get_file_rc))
2475                                 rc = get_file_rc;
2476                         else
2477                                 rc = -EBADF;
2478                 } else
2479                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2480
2481                 for (i = 0; i < nr_pages; ++i)
2482                         unlock_page(wdata->pages[i]);
2483
2484                 /* send failure -- clean up the mess */
2485                 if (rc != 0) {
2486                         add_credits_and_wake_if(server, &wdata->credits, 0);
2487                         for (i = 0; i < nr_pages; ++i) {
2488                                 if (is_retryable_error(rc))
2489                                         redirty_page_for_writepage(wbc,
2490                                                            wdata->pages[i]);
2491                                 else
2492                                         SetPageError(wdata->pages[i]);
2493                                 end_page_writeback(wdata->pages[i]);
2494                                 put_page(wdata->pages[i]);
2495                         }
2496                         if (!is_retryable_error(rc))
2497                                 mapping_set_error(mapping, rc);
2498                 }
2499                 kref_put(&wdata->refcount, cifs_writedata_release);
2500
2501                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2502                         index = saved_index;
2503                         continue;
2504                 }
2505
2506                 /* Return immediately if we received a signal during writing */
2507                 if (is_interrupt_error(rc)) {
2508                         done = true;
2509                         break;
2510                 }
2511
2512                 if (rc != 0 && saved_rc == 0)
2513                         saved_rc = rc;
2514
2515                 wbc->nr_to_write -= nr_pages;
2516                 if (wbc->nr_to_write <= 0)
2517                         done = true;
2518
2519                 index = next;
2520         }
2521
2522         if (!scanned && !done) {
2523                 /*
2524                  * We hit the last page and there is more work to be done: wrap
2525                  * back to the start of the file
2526                  */
2527                 scanned = true;
2528                 index = 0;
2529                 goto retry;
2530         }
2531
2532         if (saved_rc != 0)
2533                 rc = saved_rc;
2534
2535         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2536                 mapping->writeback_index = index;
2537
2538         if (cfile)
2539                 cifsFileInfo_put(cfile);
2540         free_xid(xid);
2541         /* Indication to update ctime and mtime as close is deferred */
2542         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2543         return rc;
2544 }
2545
2546 static int
2547 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2548 {
2549         int rc;
2550         unsigned int xid;
2551
2552         xid = get_xid();
2553 /* BB add check for wbc flags */
2554         get_page(page);
2555         if (!PageUptodate(page))
2556                 cifs_dbg(FYI, "ppw - page not up to date\n");
2557
2558         /*
2559          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2560          *
2561          * A writepage() implementation always needs to do either this,
2562          * or re-dirty the page with "redirty_page_for_writepage()" in
2563          * the case of a failure.
2564          *
2565          * Just unlocking the page will cause the radix tree tag-bits
2566          * to fail to update with the state of the page correctly.
2567          */
2568         set_page_writeback(page);
2569 retry_write:
2570         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2571         if (is_retryable_error(rc)) {
2572                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2573                         goto retry_write;
2574                 redirty_page_for_writepage(wbc, page);
2575         } else if (rc != 0) {
2576                 SetPageError(page);
2577                 mapping_set_error(page->mapping, rc);
2578         } else {
2579                 SetPageUptodate(page);
2580         }
2581         end_page_writeback(page);
2582         put_page(page);
2583         free_xid(xid);
2584         return rc;
2585 }
2586
2587 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2588 {
2589         int rc = cifs_writepage_locked(page, wbc);
2590         unlock_page(page);
2591         return rc;
2592 }
2593
2594 static int cifs_write_end(struct file *file, struct address_space *mapping,
2595                         loff_t pos, unsigned len, unsigned copied,
2596                         struct page *page, void *fsdata)
2597 {
2598         int rc;
2599         struct inode *inode = mapping->host;
2600         struct cifsFileInfo *cfile = file->private_data;
2601         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2602         __u32 pid;
2603
2604         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2605                 pid = cfile->pid;
2606         else
2607                 pid = current->tgid;
2608
2609         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2610                  page, pos, copied);
2611
2612         if (PageChecked(page)) {
2613                 if (copied == len)
2614                         SetPageUptodate(page);
2615                 ClearPageChecked(page);
2616         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2617                 SetPageUptodate(page);
2618
2619         if (!PageUptodate(page)) {
2620                 char *page_data;
2621                 unsigned offset = pos & (PAGE_SIZE - 1);
2622                 unsigned int xid;
2623
2624                 xid = get_xid();
2625                 /* this is probably better than directly calling
2626                    partialpage_write since in this function the file handle is
2627                    known which we might as well leverage */
2628                 /* BB check if anything else missing out of ppw
2629                    such as updating last write time */
2630                 page_data = kmap(page);
2631                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2632                 /* if (rc < 0) should we set writebehind rc? */
2633                 kunmap(page);
2634
2635                 free_xid(xid);
2636         } else {
2637                 rc = copied;
2638                 pos += copied;
2639                 set_page_dirty(page);
2640         }
2641
2642         if (rc > 0) {
2643                 spin_lock(&inode->i_lock);
2644                 if (pos > inode->i_size)
2645                         i_size_write(inode, pos);
2646                 spin_unlock(&inode->i_lock);
2647         }
2648
2649         unlock_page(page);
2650         put_page(page);
2651         /* Indication to update ctime and mtime as close is deferred */
2652         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2653
2654         return rc;
2655 }
2656
2657 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2658                       int datasync)
2659 {
2660         unsigned int xid;
2661         int rc = 0;
2662         struct cifs_tcon *tcon;
2663         struct TCP_Server_Info *server;
2664         struct cifsFileInfo *smbfile = file->private_data;
2665         struct inode *inode = file_inode(file);
2666         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2667
2668         rc = file_write_and_wait_range(file, start, end);
2669         if (rc) {
2670                 trace_cifs_fsync_err(inode->i_ino, rc);
2671                 return rc;
2672         }
2673
2674         xid = get_xid();
2675
2676         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2677                  file, datasync);
2678
2679         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2680                 rc = cifs_zap_mapping(inode);
2681                 if (rc) {
2682                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2683                         rc = 0; /* don't care about it in fsync */
2684                 }
2685         }
2686
2687         tcon = tlink_tcon(smbfile->tlink);
2688         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2689                 server = tcon->ses->server;
2690                 if (server->ops->flush)
2691                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2692                 else
2693                         rc = -ENOSYS;
2694         }
2695
2696         free_xid(xid);
2697         return rc;
2698 }
2699
2700 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2701 {
2702         unsigned int xid;
2703         int rc = 0;
2704         struct cifs_tcon *tcon;
2705         struct TCP_Server_Info *server;
2706         struct cifsFileInfo *smbfile = file->private_data;
2707         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2708
2709         rc = file_write_and_wait_range(file, start, end);
2710         if (rc) {
2711                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2712                 return rc;
2713         }
2714
2715         xid = get_xid();
2716
2717         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2718                  file, datasync);
2719
2720         tcon = tlink_tcon(smbfile->tlink);
2721         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2722                 server = tcon->ses->server;
2723                 if (server->ops->flush)
2724                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2725                 else
2726                         rc = -ENOSYS;
2727         }
2728
2729         free_xid(xid);
2730         return rc;
2731 }
2732
2733 /*
2734  * As file closes, flush all cached write data for this inode checking
2735  * for write behind errors.
2736  */
2737 int cifs_flush(struct file *file, fl_owner_t id)
2738 {
2739         struct inode *inode = file_inode(file);
2740         int rc = 0;
2741
2742         if (file->f_mode & FMODE_WRITE)
2743                 rc = filemap_write_and_wait(inode->i_mapping);
2744
2745         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2746         if (rc)
2747                 trace_cifs_flush_err(inode->i_ino, rc);
2748         return rc;
2749 }
2750
2751 static int
2752 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2753 {
2754         int rc = 0;
2755         unsigned long i;
2756
2757         for (i = 0; i < num_pages; i++) {
2758                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2759                 if (!pages[i]) {
2760                         /*
2761                          * save number of pages we have already allocated and
2762                          * return with ENOMEM error
2763                          */
2764                         num_pages = i;
2765                         rc = -ENOMEM;
2766                         break;
2767                 }
2768         }
2769
2770         if (rc) {
2771                 for (i = 0; i < num_pages; i++)
2772                         put_page(pages[i]);
2773         }
2774         return rc;
2775 }
2776
2777 static inline
2778 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2779 {
2780         size_t num_pages;
2781         size_t clen;
2782
2783         clen = min_t(const size_t, len, wsize);
2784         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2785
2786         if (cur_len)
2787                 *cur_len = clen;
2788
2789         return num_pages;
2790 }
2791
2792 static void
2793 cifs_uncached_writedata_release(struct kref *refcount)
2794 {
2795         int i;
2796         struct cifs_writedata *wdata = container_of(refcount,
2797                                         struct cifs_writedata, refcount);
2798
2799         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2800         for (i = 0; i < wdata->nr_pages; i++)
2801                 put_page(wdata->pages[i]);
2802         cifs_writedata_release(refcount);
2803 }
2804
2805 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2806
2807 static void
2808 cifs_uncached_writev_complete(struct work_struct *work)
2809 {
2810         struct cifs_writedata *wdata = container_of(work,
2811                                         struct cifs_writedata, work);
2812         struct inode *inode = d_inode(wdata->cfile->dentry);
2813         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2814
2815         spin_lock(&inode->i_lock);
2816         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2817         if (cifsi->server_eof > inode->i_size)
2818                 i_size_write(inode, cifsi->server_eof);
2819         spin_unlock(&inode->i_lock);
2820
2821         complete(&wdata->done);
2822         collect_uncached_write_data(wdata->ctx);
2823         /* the below call can possibly free the last ref to aio ctx */
2824         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2825 }
2826
2827 static int
2828 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2829                       size_t *len, unsigned long *num_pages)
2830 {
2831         size_t save_len, copied, bytes, cur_len = *len;
2832         unsigned long i, nr_pages = *num_pages;
2833
2834         save_len = cur_len;
2835         for (i = 0; i < nr_pages; i++) {
2836                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2837                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2838                 cur_len -= copied;
2839                 /*
2840                  * If we didn't copy as much as we expected, then that
2841                  * may mean we trod into an unmapped area. Stop copying
2842                  * at that point. On the next pass through the big
2843                  * loop, we'll likely end up getting a zero-length
2844                  * write and bailing out of it.
2845                  */
2846                 if (copied < bytes)
2847                         break;
2848         }
2849         cur_len = save_len - cur_len;
2850         *len = cur_len;
2851
2852         /*
2853          * If we have no data to send, then that probably means that
2854          * the copy above failed altogether. That's most likely because
2855          * the address in the iovec was bogus. Return -EFAULT and let
2856          * the caller free anything we allocated and bail out.
2857          */
2858         if (!cur_len)
2859                 return -EFAULT;
2860
2861         /*
2862          * i + 1 now represents the number of pages we actually used in
2863          * the copy phase above.
2864          */
2865         *num_pages = i + 1;
2866         return 0;
2867 }
2868
2869 static int
2870 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2871         struct cifs_aio_ctx *ctx)
2872 {
2873         unsigned int wsize;
2874         struct cifs_credits credits;
2875         int rc;
2876         struct TCP_Server_Info *server = wdata->server;
2877
2878         do {
2879                 if (wdata->cfile->invalidHandle) {
2880                         rc = cifs_reopen_file(wdata->cfile, false);
2881                         if (rc == -EAGAIN)
2882                                 continue;
2883                         else if (rc)
2884                                 break;
2885                 }
2886
2887
2888                 /*
2889                  * Wait for credits to resend this wdata.
2890                  * Note: we are attempting to resend the whole wdata not in
2891                  * segments
2892                  */
2893                 do {
2894                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2895                                                 &wsize, &credits);
2896                         if (rc)
2897                                 goto fail;
2898
2899                         if (wsize < wdata->bytes) {
2900                                 add_credits_and_wake_if(server, &credits, 0);
2901                                 msleep(1000);
2902                         }
2903                 } while (wsize < wdata->bytes);
2904                 wdata->credits = credits;
2905
2906                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2907
2908                 if (!rc) {
2909                         if (wdata->cfile->invalidHandle)
2910                                 rc = -EAGAIN;
2911                         else {
2912 #ifdef CONFIG_CIFS_SMB_DIRECT
2913                                 if (wdata->mr) {
2914                                         wdata->mr->need_invalidate = true;
2915                                         smbd_deregister_mr(wdata->mr);
2916                                         wdata->mr = NULL;
2917                                 }
2918 #endif
2919                                 rc = server->ops->async_writev(wdata,
2920                                         cifs_uncached_writedata_release);
2921                         }
2922                 }
2923
2924                 /* If the write was successfully sent, we are done */
2925                 if (!rc) {
2926                         list_add_tail(&wdata->list, wdata_list);
2927                         return 0;
2928                 }
2929
2930                 /* Roll back credits and retry if needed */
2931                 add_credits_and_wake_if(server, &wdata->credits, 0);
2932         } while (rc == -EAGAIN);
2933
2934 fail:
2935         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2936         return rc;
2937 }
2938
2939 static int
2940 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2941                      struct cifsFileInfo *open_file,
2942                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2943                      struct cifs_aio_ctx *ctx)
2944 {
2945         int rc = 0;
2946         size_t cur_len;
2947         unsigned long nr_pages, num_pages, i;
2948         struct cifs_writedata *wdata;
2949         struct iov_iter saved_from = *from;
2950         loff_t saved_offset = offset;
2951         pid_t pid;
2952         struct TCP_Server_Info *server;
2953         struct page **pagevec;
2954         size_t start;
2955         unsigned int xid;
2956
2957         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2958                 pid = open_file->pid;
2959         else
2960                 pid = current->tgid;
2961
2962         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2963         xid = get_xid();
2964
2965         do {
2966                 unsigned int wsize;
2967                 struct cifs_credits credits_on_stack;
2968                 struct cifs_credits *credits = &credits_on_stack;
2969
2970                 if (open_file->invalidHandle) {
2971                         rc = cifs_reopen_file(open_file, false);
2972                         if (rc == -EAGAIN)
2973                                 continue;
2974                         else if (rc)
2975                                 break;
2976                 }
2977
2978                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2979                                                    &wsize, credits);
2980                 if (rc)
2981                         break;
2982
2983                 cur_len = min_t(const size_t, len, wsize);
2984
2985                 if (ctx->direct_io) {
2986                         ssize_t result;
2987
2988                         result = iov_iter_get_pages_alloc(
2989                                 from, &pagevec, cur_len, &start);
2990                         if (result < 0) {
2991                                 cifs_dbg(VFS,
2992                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
2993                                          result, iov_iter_type(from),
2994                                          from->iov_offset, from->count);
2995                                 dump_stack();
2996
2997                                 rc = result;
2998                                 add_credits_and_wake_if(server, credits, 0);
2999                                 break;
3000                         }
3001                         cur_len = (size_t)result;
3002                         iov_iter_advance(from, cur_len);
3003
3004                         nr_pages =
3005                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3006
3007                         wdata = cifs_writedata_direct_alloc(pagevec,
3008                                              cifs_uncached_writev_complete);
3009                         if (!wdata) {
3010                                 rc = -ENOMEM;
3011                                 add_credits_and_wake_if(server, credits, 0);
3012                                 break;
3013                         }
3014
3015
3016                         wdata->page_offset = start;
3017                         wdata->tailsz =
3018                                 nr_pages > 1 ?
3019                                         cur_len - (PAGE_SIZE - start) -
3020                                         (nr_pages - 2) * PAGE_SIZE :
3021                                         cur_len;
3022                 } else {
3023                         nr_pages = get_numpages(wsize, len, &cur_len);
3024                         wdata = cifs_writedata_alloc(nr_pages,
3025                                              cifs_uncached_writev_complete);
3026                         if (!wdata) {
3027                                 rc = -ENOMEM;
3028                                 add_credits_and_wake_if(server, credits, 0);
3029                                 break;
3030                         }
3031
3032                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3033                         if (rc) {
3034                                 kvfree(wdata->pages);
3035                                 kfree(wdata);
3036                                 add_credits_and_wake_if(server, credits, 0);
3037                                 break;
3038                         }
3039
3040                         num_pages = nr_pages;
3041                         rc = wdata_fill_from_iovec(
3042                                 wdata, from, &cur_len, &num_pages);
3043                         if (rc) {
3044                                 for (i = 0; i < nr_pages; i++)
3045                                         put_page(wdata->pages[i]);
3046                                 kvfree(wdata->pages);
3047                                 kfree(wdata);
3048                                 add_credits_and_wake_if(server, credits, 0);
3049                                 break;
3050                         }
3051
3052                         /*
3053                          * Bring nr_pages down to the number of pages we
3054                          * actually used, and free any pages that we didn't use.
3055                          */
3056                         for ( ; nr_pages > num_pages; nr_pages--)
3057                                 put_page(wdata->pages[nr_pages - 1]);
3058
3059                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3060                 }
3061
3062                 wdata->sync_mode = WB_SYNC_ALL;
3063                 wdata->nr_pages = nr_pages;
3064                 wdata->offset = (__u64)offset;
3065                 wdata->cfile = cifsFileInfo_get(open_file);
3066                 wdata->server = server;
3067                 wdata->pid = pid;
3068                 wdata->bytes = cur_len;
3069                 wdata->pagesz = PAGE_SIZE;
3070                 wdata->credits = credits_on_stack;
3071                 wdata->ctx = ctx;
3072                 kref_get(&ctx->refcount);
3073
3074                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3075
3076                 if (!rc) {
3077                         if (wdata->cfile->invalidHandle)
3078                                 rc = -EAGAIN;
3079                         else
3080                                 rc = server->ops->async_writev(wdata,
3081                                         cifs_uncached_writedata_release);
3082                 }
3083
3084                 if (rc) {
3085                         add_credits_and_wake_if(server, &wdata->credits, 0);
3086                         kref_put(&wdata->refcount,
3087                                  cifs_uncached_writedata_release);
3088                         if (rc == -EAGAIN) {
3089                                 *from = saved_from;
3090                                 iov_iter_advance(from, offset - saved_offset);
3091                                 continue;
3092                         }
3093                         break;
3094                 }
3095
3096                 list_add_tail(&wdata->list, wdata_list);
3097                 offset += cur_len;
3098                 len -= cur_len;
3099         } while (len > 0);
3100
3101         free_xid(xid);
3102         return rc;
3103 }
3104
3105 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3106 {
3107         struct cifs_writedata *wdata, *tmp;
3108         struct cifs_tcon *tcon;
3109         struct cifs_sb_info *cifs_sb;
3110         struct dentry *dentry = ctx->cfile->dentry;
3111         int rc;
3112
3113         tcon = tlink_tcon(ctx->cfile->tlink);
3114         cifs_sb = CIFS_SB(dentry->d_sb);
3115
3116         mutex_lock(&ctx->aio_mutex);
3117
3118         if (list_empty(&ctx->list)) {
3119                 mutex_unlock(&ctx->aio_mutex);
3120                 return;
3121         }
3122
3123         rc = ctx->rc;
3124         /*
3125          * Wait for and collect replies for any successful sends in order of
3126          * increasing offset. Once an error is hit, then return without waiting
3127          * for any more replies.
3128          */
3129 restart_loop:
3130         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3131                 if (!rc) {
3132                         if (!try_wait_for_completion(&wdata->done)) {
3133                                 mutex_unlock(&ctx->aio_mutex);
3134                                 return;
3135                         }
3136
3137                         if (wdata->result)
3138                                 rc = wdata->result;
3139                         else
3140                                 ctx->total_len += wdata->bytes;
3141
3142                         /* resend call if it's a retryable error */
3143                         if (rc == -EAGAIN) {
3144                                 struct list_head tmp_list;
3145                                 struct iov_iter tmp_from = ctx->iter;
3146
3147                                 INIT_LIST_HEAD(&tmp_list);
3148                                 list_del_init(&wdata->list);
3149
3150                                 if (ctx->direct_io)
3151                                         rc = cifs_resend_wdata(
3152                                                 wdata, &tmp_list, ctx);
3153                                 else {
3154                                         iov_iter_advance(&tmp_from,
3155                                                  wdata->offset - ctx->pos);
3156
3157                                         rc = cifs_write_from_iter(wdata->offset,
3158                                                 wdata->bytes, &tmp_from,
3159                                                 ctx->cfile, cifs_sb, &tmp_list,
3160                                                 ctx);
3161
3162                                         kref_put(&wdata->refcount,
3163                                                 cifs_uncached_writedata_release);
3164                                 }
3165
3166                                 list_splice(&tmp_list, &ctx->list);
3167                                 goto restart_loop;
3168                         }
3169                 }
3170                 list_del_init(&wdata->list);
3171                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3172         }
3173
3174         cifs_stats_bytes_written(tcon, ctx->total_len);
3175         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3176
3177         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3178
3179         mutex_unlock(&ctx->aio_mutex);
3180
3181         if (ctx->iocb && ctx->iocb->ki_complete)
3182                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3183         else
3184                 complete(&ctx->done);
3185 }
3186
3187 static ssize_t __cifs_writev(
3188         struct kiocb *iocb, struct iov_iter *from, bool direct)
3189 {
3190         struct file *file = iocb->ki_filp;
3191         ssize_t total_written = 0;
3192         struct cifsFileInfo *cfile;
3193         struct cifs_tcon *tcon;
3194         struct cifs_sb_info *cifs_sb;
3195         struct cifs_aio_ctx *ctx;
3196         struct iov_iter saved_from = *from;
3197         size_t len = iov_iter_count(from);
3198         int rc;
3199
3200         /*
3201          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3202          * In this case, fall back to non-direct write function.
3203          * this could be improved by getting pages directly in ITER_KVEC
3204          */
3205         if (direct && iov_iter_is_kvec(from)) {
3206                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3207                 direct = false;
3208         }
3209
3210         rc = generic_write_checks(iocb, from);
3211         if (rc <= 0)
3212                 return rc;
3213
3214         cifs_sb = CIFS_FILE_SB(file);
3215         cfile = file->private_data;
3216         tcon = tlink_tcon(cfile->tlink);
3217
3218         if (!tcon->ses->server->ops->async_writev)
3219                 return -ENOSYS;
3220
3221         ctx = cifs_aio_ctx_alloc();
3222         if (!ctx)
3223                 return -ENOMEM;
3224
3225         ctx->cfile = cifsFileInfo_get(cfile);
3226
3227         if (!is_sync_kiocb(iocb))
3228                 ctx->iocb = iocb;
3229
3230         ctx->pos = iocb->ki_pos;
3231
3232         if (direct) {
3233                 ctx->direct_io = true;
3234                 ctx->iter = *from;
3235                 ctx->len = len;
3236         } else {
3237                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3238                 if (rc) {
3239                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3240                         return rc;
3241                 }
3242         }
3243
3244         /* grab a lock here due to read response handlers can access ctx */
3245         mutex_lock(&ctx->aio_mutex);
3246
3247         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3248                                   cfile, cifs_sb, &ctx->list, ctx);
3249
3250         /*
3251          * If at least one write was successfully sent, then discard any rc
3252          * value from the later writes. If the other write succeeds, then
3253          * we'll end up returning whatever was written. If it fails, then
3254          * we'll get a new rc value from that.
3255          */
3256         if (!list_empty(&ctx->list))
3257                 rc = 0;
3258
3259         mutex_unlock(&ctx->aio_mutex);
3260
3261         if (rc) {
3262                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3263                 return rc;
3264         }
3265
3266         if (!is_sync_kiocb(iocb)) {
3267                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3268                 return -EIOCBQUEUED;
3269         }
3270
3271         rc = wait_for_completion_killable(&ctx->done);
3272         if (rc) {
3273                 mutex_lock(&ctx->aio_mutex);
3274                 ctx->rc = rc = -EINTR;
3275                 total_written = ctx->total_len;
3276                 mutex_unlock(&ctx->aio_mutex);
3277         } else {
3278                 rc = ctx->rc;
3279                 total_written = ctx->total_len;
3280         }
3281
3282         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3283
3284         if (unlikely(!total_written))
3285                 return rc;
3286
3287         iocb->ki_pos += total_written;
3288         return total_written;
3289 }
3290
3291 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3292 {
3293         return __cifs_writev(iocb, from, true);
3294 }
3295
3296 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3297 {
3298         return __cifs_writev(iocb, from, false);
3299 }
3300
3301 static ssize_t
3302 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3303 {
3304         struct file *file = iocb->ki_filp;
3305         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3306         struct inode *inode = file->f_mapping->host;
3307         struct cifsInodeInfo *cinode = CIFS_I(inode);
3308         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3309         ssize_t rc;
3310
3311         inode_lock(inode);
3312         /*
3313          * We need to hold the sem to be sure nobody modifies lock list
3314          * with a brlock that prevents writing.
3315          */
3316         down_read(&cinode->lock_sem);
3317
3318         rc = generic_write_checks(iocb, from);
3319         if (rc <= 0)
3320                 goto out;
3321
3322         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3323                                      server->vals->exclusive_lock_type, 0,
3324                                      NULL, CIFS_WRITE_OP))
3325                 rc = __generic_file_write_iter(iocb, from);
3326         else
3327                 rc = -EACCES;
3328 out:
3329         up_read(&cinode->lock_sem);
3330         inode_unlock(inode);
3331
3332         if (rc > 0)
3333                 rc = generic_write_sync(iocb, rc);
3334         return rc;
3335 }
3336
3337 ssize_t
3338 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3339 {
3340         struct inode *inode = file_inode(iocb->ki_filp);
3341         struct cifsInodeInfo *cinode = CIFS_I(inode);
3342         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3343         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3344                                                 iocb->ki_filp->private_data;
3345         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3346         ssize_t written;
3347
3348         written = cifs_get_writer(cinode);
3349         if (written)
3350                 return written;
3351
3352         if (CIFS_CACHE_WRITE(cinode)) {
3353                 if (cap_unix(tcon->ses) &&
3354                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3355                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3356                         written = generic_file_write_iter(iocb, from);
3357                         goto out;
3358                 }
3359                 written = cifs_writev(iocb, from);
3360                 goto out;
3361         }
3362         /*
3363          * For non-oplocked files in strict cache mode we need to write the data
3364          * to the server exactly from the pos to pos+len-1 rather than flush all
3365          * affected pages because it may cause a error with mandatory locks on
3366          * these pages but not on the region from pos to ppos+len-1.
3367          */
3368         written = cifs_user_writev(iocb, from);
3369         if (CIFS_CACHE_READ(cinode)) {
3370                 /*
3371                  * We have read level caching and we have just sent a write
3372                  * request to the server thus making data in the cache stale.
3373                  * Zap the cache and set oplock/lease level to NONE to avoid
3374                  * reading stale data from the cache. All subsequent read
3375                  * operations will read new data from the server.
3376                  */
3377                 cifs_zap_mapping(inode);
3378                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3379                          inode);
3380                 cinode->oplock = 0;
3381         }
3382 out:
3383         cifs_put_writer(cinode);
3384         return written;
3385 }
3386
3387 static struct cifs_readdata *
3388 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3389 {
3390         struct cifs_readdata *rdata;
3391
3392         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3393         if (rdata != NULL) {
3394                 rdata->pages = pages;
3395                 kref_init(&rdata->refcount);
3396                 INIT_LIST_HEAD(&rdata->list);
3397                 init_completion(&rdata->done);
3398                 INIT_WORK(&rdata->work, complete);
3399         }
3400
3401         return rdata;
3402 }
3403
3404 static struct cifs_readdata *
3405 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3406 {
3407         struct page **pages =
3408                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3409         struct cifs_readdata *ret = NULL;
3410
3411         if (pages) {
3412                 ret = cifs_readdata_direct_alloc(pages, complete);
3413                 if (!ret)
3414                         kfree(pages);
3415         }
3416
3417         return ret;
3418 }
3419
3420 void
3421 cifs_readdata_release(struct kref *refcount)
3422 {
3423         struct cifs_readdata *rdata = container_of(refcount,
3424                                         struct cifs_readdata, refcount);
3425 #ifdef CONFIG_CIFS_SMB_DIRECT
3426         if (rdata->mr) {
3427                 smbd_deregister_mr(rdata->mr);
3428                 rdata->mr = NULL;
3429         }
3430 #endif
3431         if (rdata->cfile)
3432                 cifsFileInfo_put(rdata->cfile);
3433
3434         kvfree(rdata->pages);
3435         kfree(rdata);
3436 }
3437
3438 static int
3439 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3440 {
3441         int rc = 0;
3442         struct page *page;
3443         unsigned int i;
3444
3445         for (i = 0; i < nr_pages; i++) {
3446                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3447                 if (!page) {
3448                         rc = -ENOMEM;
3449                         break;
3450                 }
3451                 rdata->pages[i] = page;
3452         }
3453
3454         if (rc) {
3455                 unsigned int nr_page_failed = i;
3456
3457                 for (i = 0; i < nr_page_failed; i++) {
3458                         put_page(rdata->pages[i]);
3459                         rdata->pages[i] = NULL;
3460                 }
3461         }
3462         return rc;
3463 }
3464
3465 static void
3466 cifs_uncached_readdata_release(struct kref *refcount)
3467 {
3468         struct cifs_readdata *rdata = container_of(refcount,
3469                                         struct cifs_readdata, refcount);
3470         unsigned int i;
3471
3472         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3473         for (i = 0; i < rdata->nr_pages; i++) {
3474                 put_page(rdata->pages[i]);
3475         }
3476         cifs_readdata_release(refcount);
3477 }
3478
3479 /**
3480  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3481  * @rdata:      the readdata response with list of pages holding data
3482  * @iter:       destination for our data
3483  *
3484  * This function copies data from a list of pages in a readdata response into
3485  * an array of iovecs. It will first calculate where the data should go
3486  * based on the info in the readdata and then copy the data into that spot.
3487  */
3488 static int
3489 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3490 {
3491         size_t remaining = rdata->got_bytes;
3492         unsigned int i;
3493
3494         for (i = 0; i < rdata->nr_pages; i++) {
3495                 struct page *page = rdata->pages[i];
3496                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3497                 size_t written;
3498
3499                 if (unlikely(iov_iter_is_pipe(iter))) {
3500                         void *addr = kmap_atomic(page);
3501
3502                         written = copy_to_iter(addr, copy, iter);
3503                         kunmap_atomic(addr);
3504                 } else
3505                         written = copy_page_to_iter(page, 0, copy, iter);
3506                 remaining -= written;
3507                 if (written < copy && iov_iter_count(iter) > 0)
3508                         break;
3509         }
3510         return remaining ? -EFAULT : 0;
3511 }
3512
3513 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3514
3515 static void
3516 cifs_uncached_readv_complete(struct work_struct *work)
3517 {
3518         struct cifs_readdata *rdata = container_of(work,
3519                                                 struct cifs_readdata, work);
3520
3521         complete(&rdata->done);
3522         collect_uncached_read_data(rdata->ctx);
3523         /* the below call can possibly free the last ref to aio ctx */
3524         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3525 }
3526
3527 static int
3528 uncached_fill_pages(struct TCP_Server_Info *server,
3529                     struct cifs_readdata *rdata, struct iov_iter *iter,
3530                     unsigned int len)
3531 {
3532         int result = 0;
3533         unsigned int i;
3534         unsigned int nr_pages = rdata->nr_pages;
3535         unsigned int page_offset = rdata->page_offset;
3536
3537         rdata->got_bytes = 0;
3538         rdata->tailsz = PAGE_SIZE;
3539         for (i = 0; i < nr_pages; i++) {
3540                 struct page *page = rdata->pages[i];
3541                 size_t n;
3542                 unsigned int segment_size = rdata->pagesz;
3543
3544                 if (i == 0)
3545                         segment_size -= page_offset;
3546                 else
3547                         page_offset = 0;
3548
3549
3550                 if (len <= 0) {
3551                         /* no need to hold page hostage */
3552                         rdata->pages[i] = NULL;
3553                         rdata->nr_pages--;
3554                         put_page(page);
3555                         continue;
3556                 }
3557
3558                 n = len;
3559                 if (len >= segment_size)
3560                         /* enough data to fill the page */
3561                         n = segment_size;
3562                 else
3563                         rdata->tailsz = len;
3564                 len -= n;
3565
3566                 if (iter)
3567                         result = copy_page_from_iter(
3568                                         page, page_offset, n, iter);
3569 #ifdef CONFIG_CIFS_SMB_DIRECT
3570                 else if (rdata->mr)
3571                         result = n;
3572 #endif
3573                 else
3574                         result = cifs_read_page_from_socket(
3575                                         server, page, page_offset, n);
3576                 if (result < 0)
3577                         break;
3578
3579                 rdata->got_bytes += result;
3580         }
3581
3582         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3583                                                 rdata->got_bytes : result;
3584 }
3585
3586 static int
3587 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3588                               struct cifs_readdata *rdata, unsigned int len)
3589 {
3590         return uncached_fill_pages(server, rdata, NULL, len);
3591 }
3592
3593 static int
3594 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3595                               struct cifs_readdata *rdata,
3596                               struct iov_iter *iter)
3597 {
3598         return uncached_fill_pages(server, rdata, iter, iter->count);
3599 }
3600
3601 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3602                         struct list_head *rdata_list,
3603                         struct cifs_aio_ctx *ctx)
3604 {
3605         unsigned int rsize;
3606         struct cifs_credits credits;
3607         int rc;
3608         struct TCP_Server_Info *server;
3609
3610         /* XXX: should we pick a new channel here? */
3611         server = rdata->server;
3612
3613         do {
3614                 if (rdata->cfile->invalidHandle) {
3615                         rc = cifs_reopen_file(rdata->cfile, true);
3616                         if (rc == -EAGAIN)
3617                                 continue;
3618                         else if (rc)
3619                                 break;
3620                 }
3621
3622                 /*
3623                  * Wait for credits to resend this rdata.
3624                  * Note: we are attempting to resend the whole rdata not in
3625                  * segments
3626                  */
3627                 do {
3628                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3629                                                 &rsize, &credits);
3630
3631                         if (rc)
3632                                 goto fail;
3633
3634                         if (rsize < rdata->bytes) {
3635                                 add_credits_and_wake_if(server, &credits, 0);
3636                                 msleep(1000);
3637                         }
3638                 } while (rsize < rdata->bytes);
3639                 rdata->credits = credits;
3640
3641                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3642                 if (!rc) {
3643                         if (rdata->cfile->invalidHandle)
3644                                 rc = -EAGAIN;
3645                         else {
3646 #ifdef CONFIG_CIFS_SMB_DIRECT
3647                                 if (rdata->mr) {
3648                                         rdata->mr->need_invalidate = true;
3649                                         smbd_deregister_mr(rdata->mr);
3650                                         rdata->mr = NULL;
3651                                 }
3652 #endif
3653                                 rc = server->ops->async_readv(rdata);
3654                         }
3655                 }
3656
3657                 /* If the read was successfully sent, we are done */
3658                 if (!rc) {
3659                         /* Add to aio pending list */
3660                         list_add_tail(&rdata->list, rdata_list);
3661                         return 0;
3662                 }
3663
3664                 /* Roll back credits and retry if needed */
3665                 add_credits_and_wake_if(server, &rdata->credits, 0);
3666         } while (rc == -EAGAIN);
3667
3668 fail:
3669         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3670         return rc;
3671 }
3672
3673 static int
3674 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3675                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3676                      struct cifs_aio_ctx *ctx)
3677 {
3678         struct cifs_readdata *rdata;
3679         unsigned int npages, rsize;
3680         struct cifs_credits credits_on_stack;
3681         struct cifs_credits *credits = &credits_on_stack;
3682         size_t cur_len;
3683         int rc;
3684         pid_t pid;
3685         struct TCP_Server_Info *server;
3686         struct page **pagevec;
3687         size_t start;
3688         struct iov_iter direct_iov = ctx->iter;
3689
3690         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3691
3692         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3693                 pid = open_file->pid;
3694         else
3695                 pid = current->tgid;
3696
3697         if (ctx->direct_io)
3698                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3699
3700         do {
3701                 if (open_file->invalidHandle) {
3702                         rc = cifs_reopen_file(open_file, true);
3703                         if (rc == -EAGAIN)
3704                                 continue;
3705                         else if (rc)
3706                                 break;
3707                 }
3708
3709                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3710                                                    &rsize, credits);
3711                 if (rc)
3712                         break;
3713
3714                 cur_len = min_t(const size_t, len, rsize);
3715
3716                 if (ctx->direct_io) {
3717                         ssize_t result;
3718
3719                         result = iov_iter_get_pages_alloc(
3720                                         &direct_iov, &pagevec,
3721                                         cur_len, &start);
3722                         if (result < 0) {
3723                                 cifs_dbg(VFS,
3724                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3725                                          result, iov_iter_type(&direct_iov),
3726                                          direct_iov.iov_offset,
3727                                          direct_iov.count);
3728                                 dump_stack();
3729
3730                                 rc = result;
3731                                 add_credits_and_wake_if(server, credits, 0);
3732                                 break;
3733                         }
3734                         cur_len = (size_t)result;
3735                         iov_iter_advance(&direct_iov, cur_len);
3736
3737                         rdata = cifs_readdata_direct_alloc(
3738                                         pagevec, cifs_uncached_readv_complete);
3739                         if (!rdata) {
3740                                 add_credits_and_wake_if(server, credits, 0);
3741                                 rc = -ENOMEM;
3742                                 break;
3743                         }
3744
3745                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3746                         rdata->page_offset = start;
3747                         rdata->tailsz = npages > 1 ?
3748                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3749                                 cur_len;
3750
3751                 } else {
3752
3753                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3754                         /* allocate a readdata struct */
3755                         rdata = cifs_readdata_alloc(npages,
3756                                             cifs_uncached_readv_complete);
3757                         if (!rdata) {
3758                                 add_credits_and_wake_if(server, credits, 0);
3759                                 rc = -ENOMEM;
3760                                 break;
3761                         }
3762
3763                         rc = cifs_read_allocate_pages(rdata, npages);
3764                         if (rc) {
3765                                 kvfree(rdata->pages);
3766                                 kfree(rdata);
3767                                 add_credits_and_wake_if(server, credits, 0);
3768                                 break;
3769                         }
3770
3771                         rdata->tailsz = PAGE_SIZE;
3772                 }
3773
3774                 rdata->server = server;
3775                 rdata->cfile = cifsFileInfo_get(open_file);
3776                 rdata->nr_pages = npages;
3777                 rdata->offset = offset;
3778                 rdata->bytes = cur_len;
3779                 rdata->pid = pid;
3780                 rdata->pagesz = PAGE_SIZE;
3781                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3782                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3783                 rdata->credits = credits_on_stack;
3784                 rdata->ctx = ctx;
3785                 kref_get(&ctx->refcount);
3786
3787                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3788
3789                 if (!rc) {
3790                         if (rdata->cfile->invalidHandle)
3791                                 rc = -EAGAIN;
3792                         else
3793                                 rc = server->ops->async_readv(rdata);
3794                 }
3795
3796                 if (rc) {
3797                         add_credits_and_wake_if(server, &rdata->credits, 0);
3798                         kref_put(&rdata->refcount,
3799                                 cifs_uncached_readdata_release);
3800                         if (rc == -EAGAIN) {
3801                                 iov_iter_revert(&direct_iov, cur_len);
3802                                 continue;
3803                         }
3804                         break;
3805                 }
3806
3807                 list_add_tail(&rdata->list, rdata_list);
3808                 offset += cur_len;
3809                 len -= cur_len;
3810         } while (len > 0);
3811
3812         return rc;
3813 }
3814
3815 static void
3816 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3817 {
3818         struct cifs_readdata *rdata, *tmp;
3819         struct iov_iter *to = &ctx->iter;
3820         struct cifs_sb_info *cifs_sb;
3821         int rc;
3822
3823         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3824
3825         mutex_lock(&ctx->aio_mutex);
3826
3827         if (list_empty(&ctx->list)) {
3828                 mutex_unlock(&ctx->aio_mutex);
3829                 return;
3830         }
3831
3832         rc = ctx->rc;
3833         /* the loop below should proceed in the order of increasing offsets */
3834 again:
3835         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3836                 if (!rc) {
3837                         if (!try_wait_for_completion(&rdata->done)) {
3838                                 mutex_unlock(&ctx->aio_mutex);
3839                                 return;
3840                         }
3841
3842                         if (rdata->result == -EAGAIN) {
3843                                 /* resend call if it's a retryable error */
3844                                 struct list_head tmp_list;
3845                                 unsigned int got_bytes = rdata->got_bytes;
3846
3847                                 list_del_init(&rdata->list);
3848                                 INIT_LIST_HEAD(&tmp_list);
3849
3850                                 /*
3851                                  * Got a part of data and then reconnect has
3852                                  * happened -- fill the buffer and continue
3853                                  * reading.
3854                                  */
3855                                 if (got_bytes && got_bytes < rdata->bytes) {
3856                                         rc = 0;
3857                                         if (!ctx->direct_io)
3858                                                 rc = cifs_readdata_to_iov(rdata, to);
3859                                         if (rc) {
3860                                                 kref_put(&rdata->refcount,
3861                                                         cifs_uncached_readdata_release);
3862                                                 continue;
3863                                         }
3864                                 }
3865
3866                                 if (ctx->direct_io) {
3867                                         /*
3868                                          * Re-use rdata as this is a
3869                                          * direct I/O
3870                                          */
3871                                         rc = cifs_resend_rdata(
3872                                                 rdata,
3873                                                 &tmp_list, ctx);
3874                                 } else {
3875                                         rc = cifs_send_async_read(
3876                                                 rdata->offset + got_bytes,
3877                                                 rdata->bytes - got_bytes,
3878                                                 rdata->cfile, cifs_sb,
3879                                                 &tmp_list, ctx);
3880
3881                                         kref_put(&rdata->refcount,
3882                                                 cifs_uncached_readdata_release);
3883                                 }
3884
3885                                 list_splice(&tmp_list, &ctx->list);
3886
3887                                 goto again;
3888                         } else if (rdata->result)
3889                                 rc = rdata->result;
3890                         else if (!ctx->direct_io)
3891                                 rc = cifs_readdata_to_iov(rdata, to);
3892
3893                         /* if there was a short read -- discard anything left */
3894                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3895                                 rc = -ENODATA;
3896
3897                         ctx->total_len += rdata->got_bytes;
3898                 }
3899                 list_del_init(&rdata->list);
3900                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3901         }
3902
3903         if (!ctx->direct_io)
3904                 ctx->total_len = ctx->len - iov_iter_count(to);
3905
3906         /* mask nodata case */
3907         if (rc == -ENODATA)
3908                 rc = 0;
3909
3910         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3911
3912         mutex_unlock(&ctx->aio_mutex);
3913
3914         if (ctx->iocb && ctx->iocb->ki_complete)
3915                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3916         else
3917                 complete(&ctx->done);
3918 }
3919
3920 static ssize_t __cifs_readv(
3921         struct kiocb *iocb, struct iov_iter *to, bool direct)
3922 {
3923         size_t len;
3924         struct file *file = iocb->ki_filp;
3925         struct cifs_sb_info *cifs_sb;
3926         struct cifsFileInfo *cfile;
3927         struct cifs_tcon *tcon;
3928         ssize_t rc, total_read = 0;
3929         loff_t offset = iocb->ki_pos;
3930         struct cifs_aio_ctx *ctx;
3931
3932         /*
3933          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3934          * fall back to data copy read path
3935          * this could be improved by getting pages directly in ITER_KVEC
3936          */
3937         if (direct && iov_iter_is_kvec(to)) {
3938                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3939                 direct = false;
3940         }
3941
3942         len = iov_iter_count(to);
3943         if (!len)
3944                 return 0;
3945
3946         cifs_sb = CIFS_FILE_SB(file);
3947         cfile = file->private_data;
3948         tcon = tlink_tcon(cfile->tlink);
3949
3950         if (!tcon->ses->server->ops->async_readv)
3951                 return -ENOSYS;
3952
3953         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3954                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3955
3956         ctx = cifs_aio_ctx_alloc();
3957         if (!ctx)
3958                 return -ENOMEM;
3959
3960         ctx->cfile = cifsFileInfo_get(cfile);
3961
3962         if (!is_sync_kiocb(iocb))
3963                 ctx->iocb = iocb;
3964
3965         if (iter_is_iovec(to))
3966                 ctx->should_dirty = true;
3967
3968         if (direct) {
3969                 ctx->pos = offset;
3970                 ctx->direct_io = true;
3971                 ctx->iter = *to;
3972                 ctx->len = len;
3973         } else {
3974                 rc = setup_aio_ctx_iter(ctx, to, READ);
3975                 if (rc) {
3976                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3977                         return rc;
3978                 }
3979                 len = ctx->len;
3980         }
3981
3982         /* grab a lock here due to read response handlers can access ctx */
3983         mutex_lock(&ctx->aio_mutex);
3984
3985         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3986
3987         /* if at least one read request send succeeded, then reset rc */
3988         if (!list_empty(&ctx->list))
3989                 rc = 0;
3990
3991         mutex_unlock(&ctx->aio_mutex);
3992
3993         if (rc) {
3994                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3995                 return rc;
3996         }
3997
3998         if (!is_sync_kiocb(iocb)) {
3999                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4000                 return -EIOCBQUEUED;
4001         }
4002
4003         rc = wait_for_completion_killable(&ctx->done);
4004         if (rc) {
4005                 mutex_lock(&ctx->aio_mutex);
4006                 ctx->rc = rc = -EINTR;
4007                 total_read = ctx->total_len;
4008                 mutex_unlock(&ctx->aio_mutex);
4009         } else {
4010                 rc = ctx->rc;
4011                 total_read = ctx->total_len;
4012         }
4013
4014         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4015
4016         if (total_read) {
4017                 iocb->ki_pos += total_read;
4018                 return total_read;
4019         }
4020         return rc;
4021 }
4022
4023 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4024 {
4025         return __cifs_readv(iocb, to, true);
4026 }
4027
4028 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4029 {
4030         return __cifs_readv(iocb, to, false);
4031 }
4032
4033 ssize_t
4034 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4035 {
4036         struct inode *inode = file_inode(iocb->ki_filp);
4037         struct cifsInodeInfo *cinode = CIFS_I(inode);
4038         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4039         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4040                                                 iocb->ki_filp->private_data;
4041         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4042         int rc = -EACCES;
4043
4044         /*
4045          * In strict cache mode we need to read from the server all the time
4046          * if we don't have level II oplock because the server can delay mtime
4047          * change - so we can't make a decision about inode invalidating.
4048          * And we can also fail with pagereading if there are mandatory locks
4049          * on pages affected by this read but not on the region from pos to
4050          * pos+len-1.
4051          */
4052         if (!CIFS_CACHE_READ(cinode))
4053                 return cifs_user_readv(iocb, to);
4054
4055         if (cap_unix(tcon->ses) &&
4056             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4057             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4058                 return generic_file_read_iter(iocb, to);
4059
4060         /*
4061          * We need to hold the sem to be sure nobody modifies lock list
4062          * with a brlock that prevents reading.
4063          */
4064         down_read(&cinode->lock_sem);
4065         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4066                                      tcon->ses->server->vals->shared_lock_type,
4067                                      0, NULL, CIFS_READ_OP))
4068                 rc = generic_file_read_iter(iocb, to);
4069         up_read(&cinode->lock_sem);
4070         return rc;
4071 }
4072
4073 static ssize_t
4074 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4075 {
4076         int rc = -EACCES;
4077         unsigned int bytes_read = 0;
4078         unsigned int total_read;
4079         unsigned int current_read_size;
4080         unsigned int rsize;
4081         struct cifs_sb_info *cifs_sb;
4082         struct cifs_tcon *tcon;
4083         struct TCP_Server_Info *server;
4084         unsigned int xid;
4085         char *cur_offset;
4086         struct cifsFileInfo *open_file;
4087         struct cifs_io_parms io_parms = {0};
4088         int buf_type = CIFS_NO_BUFFER;
4089         __u32 pid;
4090
4091         xid = get_xid();
4092         cifs_sb = CIFS_FILE_SB(file);
4093
4094         /* FIXME: set up handlers for larger reads and/or convert to async */
4095         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4096
4097         if (file->private_data == NULL) {
4098                 rc = -EBADF;
4099                 free_xid(xid);
4100                 return rc;
4101         }
4102         open_file = file->private_data;
4103         tcon = tlink_tcon(open_file->tlink);
4104         server = cifs_pick_channel(tcon->ses);
4105
4106         if (!server->ops->sync_read) {
4107                 free_xid(xid);
4108                 return -ENOSYS;
4109         }
4110
4111         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4112                 pid = open_file->pid;
4113         else
4114                 pid = current->tgid;
4115
4116         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4117                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4118
4119         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4120              total_read += bytes_read, cur_offset += bytes_read) {
4121                 do {
4122                         current_read_size = min_t(uint, read_size - total_read,
4123                                                   rsize);
4124                         /*
4125                          * For windows me and 9x we do not want to request more
4126                          * than it negotiated since it will refuse the read
4127                          * then.
4128                          */
4129                         if (!(tcon->ses->capabilities &
4130                                 tcon->ses->server->vals->cap_large_files)) {
4131                                 current_read_size = min_t(uint,
4132                                         current_read_size, CIFSMaxBufSize);
4133                         }
4134                         if (open_file->invalidHandle) {
4135                                 rc = cifs_reopen_file(open_file, true);
4136                                 if (rc != 0)
4137                                         break;
4138                         }
4139                         io_parms.pid = pid;
4140                         io_parms.tcon = tcon;
4141                         io_parms.offset = *offset;
4142                         io_parms.length = current_read_size;
4143                         io_parms.server = server;
4144                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4145                                                     &bytes_read, &cur_offset,
4146                                                     &buf_type);
4147                 } while (rc == -EAGAIN);
4148
4149                 if (rc || (bytes_read == 0)) {
4150                         if (total_read) {
4151                                 break;
4152                         } else {
4153                                 free_xid(xid);
4154                                 return rc;
4155                         }
4156                 } else {
4157                         cifs_stats_bytes_read(tcon, total_read);
4158                         *offset += bytes_read;
4159                 }
4160         }
4161         free_xid(xid);
4162         return total_read;
4163 }
4164
4165 /*
4166  * If the page is mmap'ed into a process' page tables, then we need to make
4167  * sure that it doesn't change while being written back.
4168  */
4169 static vm_fault_t
4170 cifs_page_mkwrite(struct vm_fault *vmf)
4171 {
4172         struct page *page = vmf->page;
4173
4174         lock_page(page);
4175         return VM_FAULT_LOCKED;
4176 }
4177
4178 static const struct vm_operations_struct cifs_file_vm_ops = {
4179         .fault = filemap_fault,
4180         .map_pages = filemap_map_pages,
4181         .page_mkwrite = cifs_page_mkwrite,
4182 };
4183
4184 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4185 {
4186         int xid, rc = 0;
4187         struct inode *inode = file_inode(file);
4188
4189         xid = get_xid();
4190
4191         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4192                 rc = cifs_zap_mapping(inode);
4193         if (!rc)
4194                 rc = generic_file_mmap(file, vma);
4195         if (!rc)
4196                 vma->vm_ops = &cifs_file_vm_ops;
4197
4198         free_xid(xid);
4199         return rc;
4200 }
4201
4202 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4203 {
4204         int rc, xid;
4205
4206         xid = get_xid();
4207
4208         rc = cifs_revalidate_file(file);
4209         if (rc)
4210                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4211                          rc);
4212         if (!rc)
4213                 rc = generic_file_mmap(file, vma);
4214         if (!rc)
4215                 vma->vm_ops = &cifs_file_vm_ops;
4216
4217         free_xid(xid);
4218         return rc;
4219 }
4220
4221 static void
4222 cifs_readv_complete(struct work_struct *work)
4223 {
4224         unsigned int i, got_bytes;
4225         struct cifs_readdata *rdata = container_of(work,
4226                                                 struct cifs_readdata, work);
4227
4228         got_bytes = rdata->got_bytes;
4229         for (i = 0; i < rdata->nr_pages; i++) {
4230                 struct page *page = rdata->pages[i];
4231
4232                 lru_cache_add(page);
4233
4234                 if (rdata->result == 0 ||
4235                     (rdata->result == -EAGAIN && got_bytes)) {
4236                         flush_dcache_page(page);
4237                         SetPageUptodate(page);
4238                 }
4239
4240                 unlock_page(page);
4241
4242                 if (rdata->result == 0 ||
4243                     (rdata->result == -EAGAIN && got_bytes))
4244                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4245
4246                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4247
4248                 put_page(page);
4249                 rdata->pages[i] = NULL;
4250         }
4251         kref_put(&rdata->refcount, cifs_readdata_release);
4252 }
4253
4254 static int
4255 readpages_fill_pages(struct TCP_Server_Info *server,
4256                      struct cifs_readdata *rdata, struct iov_iter *iter,
4257                      unsigned int len)
4258 {
4259         int result = 0;
4260         unsigned int i;
4261         u64 eof;
4262         pgoff_t eof_index;
4263         unsigned int nr_pages = rdata->nr_pages;
4264         unsigned int page_offset = rdata->page_offset;
4265
4266         /* determine the eof that the server (probably) has */
4267         eof = CIFS_I(rdata->mapping->host)->server_eof;
4268         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4269         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4270
4271         rdata->got_bytes = 0;
4272         rdata->tailsz = PAGE_SIZE;
4273         for (i = 0; i < nr_pages; i++) {
4274                 struct page *page = rdata->pages[i];
4275                 unsigned int to_read = rdata->pagesz;
4276                 size_t n;
4277
4278                 if (i == 0)
4279                         to_read -= page_offset;
4280                 else
4281                         page_offset = 0;
4282
4283                 n = to_read;
4284
4285                 if (len >= to_read) {
4286                         len -= to_read;
4287                 } else if (len > 0) {
4288                         /* enough for partial page, fill and zero the rest */
4289                         zero_user(page, len + page_offset, to_read - len);
4290                         n = rdata->tailsz = len;
4291                         len = 0;
4292                 } else if (page->index > eof_index) {
4293                         /*
4294                          * The VFS will not try to do readahead past the
4295                          * i_size, but it's possible that we have outstanding
4296                          * writes with gaps in the middle and the i_size hasn't
4297                          * caught up yet. Populate those with zeroed out pages
4298                          * to prevent the VFS from repeatedly attempting to
4299                          * fill them until the writes are flushed.
4300                          */
4301                         zero_user(page, 0, PAGE_SIZE);
4302                         lru_cache_add(page);
4303                         flush_dcache_page(page);
4304                         SetPageUptodate(page);
4305                         unlock_page(page);
4306                         put_page(page);
4307                         rdata->pages[i] = NULL;
4308                         rdata->nr_pages--;
4309                         continue;
4310                 } else {
4311                         /* no need to hold page hostage */
4312                         lru_cache_add(page);
4313                         unlock_page(page);
4314                         put_page(page);
4315                         rdata->pages[i] = NULL;
4316                         rdata->nr_pages--;
4317                         continue;
4318                 }
4319
4320                 if (iter)
4321                         result = copy_page_from_iter(
4322                                         page, page_offset, n, iter);
4323 #ifdef CONFIG_CIFS_SMB_DIRECT
4324                 else if (rdata->mr)
4325                         result = n;
4326 #endif
4327                 else
4328                         result = cifs_read_page_from_socket(
4329                                         server, page, page_offset, n);
4330                 if (result < 0)
4331                         break;
4332
4333                 rdata->got_bytes += result;
4334         }
4335
4336         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4337                                                 rdata->got_bytes : result;
4338 }
4339
4340 static int
4341 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4342                                struct cifs_readdata *rdata, unsigned int len)
4343 {
4344         return readpages_fill_pages(server, rdata, NULL, len);
4345 }
4346
4347 static int
4348 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4349                                struct cifs_readdata *rdata,
4350                                struct iov_iter *iter)
4351 {
4352         return readpages_fill_pages(server, rdata, iter, iter->count);
4353 }
4354
4355 static int
4356 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4357                     unsigned int rsize, struct list_head *tmplist,
4358                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4359 {
4360         struct page *page, *tpage;
4361         unsigned int expected_index;
4362         int rc;
4363         gfp_t gfp = readahead_gfp_mask(mapping);
4364
4365         INIT_LIST_HEAD(tmplist);
4366
4367         page = lru_to_page(page_list);
4368
4369         /*
4370          * Lock the page and put it in the cache. Since no one else
4371          * should have access to this page, we're safe to simply set
4372          * PG_locked without checking it first.
4373          */
4374         __SetPageLocked(page);
4375         rc = add_to_page_cache_locked(page, mapping,
4376                                       page->index, gfp);
4377
4378         /* give up if we can't stick it in the cache */
4379         if (rc) {
4380                 __ClearPageLocked(page);
4381                 return rc;
4382         }
4383
4384         /* move first page to the tmplist */
4385         *offset = (loff_t)page->index << PAGE_SHIFT;
4386         *bytes = PAGE_SIZE;
4387         *nr_pages = 1;
4388         list_move_tail(&page->lru, tmplist);
4389
4390         /* now try and add more pages onto the request */
4391         expected_index = page->index + 1;
4392         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4393                 /* discontinuity ? */
4394                 if (page->index != expected_index)
4395                         break;
4396
4397                 /* would this page push the read over the rsize? */
4398                 if (*bytes + PAGE_SIZE > rsize)
4399                         break;
4400
4401                 __SetPageLocked(page);
4402                 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4403                 if (rc) {
4404                         __ClearPageLocked(page);
4405                         break;
4406                 }
4407                 list_move_tail(&page->lru, tmplist);
4408                 (*bytes) += PAGE_SIZE;
4409                 expected_index++;
4410                 (*nr_pages)++;
4411         }
4412         return rc;
4413 }
4414
4415 static int cifs_readpages(struct file *file, struct address_space *mapping,
4416         struct list_head *page_list, unsigned num_pages)
4417 {
4418         int rc;
4419         int err = 0;
4420         struct list_head tmplist;
4421         struct cifsFileInfo *open_file = file->private_data;
4422         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4423         struct TCP_Server_Info *server;
4424         pid_t pid;
4425         unsigned int xid;
4426
4427         xid = get_xid();
4428         /*
4429          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4430          * immediately if the cookie is negative
4431          *
4432          * After this point, every page in the list might have PG_fscache set,
4433          * so we will need to clean that up off of every page we don't use.
4434          */
4435         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4436                                          &num_pages);
4437         if (rc == 0) {
4438                 free_xid(xid);
4439                 return rc;
4440         }
4441
4442         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4443                 pid = open_file->pid;
4444         else
4445                 pid = current->tgid;
4446
4447         rc = 0;
4448         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4449
4450         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4451                  __func__, file, mapping, num_pages);
4452
4453         /*
4454          * Start with the page at end of list and move it to private
4455          * list. Do the same with any following pages until we hit
4456          * the rsize limit, hit an index discontinuity, or run out of
4457          * pages. Issue the async read and then start the loop again
4458          * until the list is empty.
4459          *
4460          * Note that list order is important. The page_list is in
4461          * the order of declining indexes. When we put the pages in
4462          * the rdata->pages, then we want them in increasing order.
4463          */
4464         while (!list_empty(page_list) && !err) {
4465                 unsigned int i, nr_pages, bytes, rsize;
4466                 loff_t offset;
4467                 struct page *page, *tpage;
4468                 struct cifs_readdata *rdata;
4469                 struct cifs_credits credits_on_stack;
4470                 struct cifs_credits *credits = &credits_on_stack;
4471
4472                 if (open_file->invalidHandle) {
4473                         rc = cifs_reopen_file(open_file, true);
4474                         if (rc == -EAGAIN)
4475                                 continue;
4476                         else if (rc)
4477                                 break;
4478                 }
4479
4480                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4481                                                    &rsize, credits);
4482                 if (rc)
4483                         break;
4484
4485                 /*
4486                  * Give up immediately if rsize is too small to read an entire
4487                  * page. The VFS will fall back to readpage. We should never
4488                  * reach this point however since we set ra_pages to 0 when the
4489                  * rsize is smaller than a cache page.
4490                  */
4491                 if (unlikely(rsize < PAGE_SIZE)) {
4492                         add_credits_and_wake_if(server, credits, 0);
4493                         free_xid(xid);
4494                         return 0;
4495                 }
4496
4497                 nr_pages = 0;
4498                 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4499                                          &nr_pages, &offset, &bytes);
4500                 if (!nr_pages) {
4501                         add_credits_and_wake_if(server, credits, 0);
4502                         break;
4503                 }
4504
4505                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4506                 if (!rdata) {
4507                         /* best to give up if we're out of mem */
4508                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4509                                 list_del(&page->lru);
4510                                 lru_cache_add(page);
4511                                 unlock_page(page);
4512                                 put_page(page);
4513                         }
4514                         rc = -ENOMEM;
4515                         add_credits_and_wake_if(server, credits, 0);
4516                         break;
4517                 }
4518
4519                 rdata->cfile = cifsFileInfo_get(open_file);
4520                 rdata->server = server;
4521                 rdata->mapping = mapping;
4522                 rdata->offset = offset;
4523                 rdata->bytes = bytes;
4524                 rdata->pid = pid;
4525                 rdata->pagesz = PAGE_SIZE;
4526                 rdata->tailsz = PAGE_SIZE;
4527                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4528                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4529                 rdata->credits = credits_on_stack;
4530
4531                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4532                         list_del(&page->lru);
4533                         rdata->pages[rdata->nr_pages++] = page;
4534                 }
4535
4536                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4537
4538                 if (!rc) {
4539                         if (rdata->cfile->invalidHandle)
4540                                 rc = -EAGAIN;
4541                         else
4542                                 rc = server->ops->async_readv(rdata);
4543                 }
4544
4545                 if (rc) {
4546                         add_credits_and_wake_if(server, &rdata->credits, 0);
4547                         for (i = 0; i < rdata->nr_pages; i++) {
4548                                 page = rdata->pages[i];
4549                                 lru_cache_add(page);
4550                                 unlock_page(page);
4551                                 put_page(page);
4552                         }
4553                         /* Fallback to the readpage in error/reconnect cases */
4554                         kref_put(&rdata->refcount, cifs_readdata_release);
4555                         break;
4556                 }
4557
4558                 kref_put(&rdata->refcount, cifs_readdata_release);
4559         }
4560
4561         /* Any pages that have been shown to fscache but didn't get added to
4562          * the pagecache must be uncached before they get returned to the
4563          * allocator.
4564          */
4565         cifs_fscache_readpages_cancel(mapping->host, page_list);
4566         free_xid(xid);
4567         return rc;
4568 }
4569
4570 /*
4571  * cifs_readpage_worker must be called with the page pinned
4572  */
4573 static int cifs_readpage_worker(struct file *file, struct page *page,
4574         loff_t *poffset)
4575 {
4576         char *read_data;
4577         int rc;
4578
4579         /* Is the page cached? */
4580         rc = cifs_readpage_from_fscache(file_inode(file), page);
4581         if (rc == 0)
4582                 goto read_complete;
4583
4584         read_data = kmap(page);
4585         /* for reads over a certain size could initiate async read ahead */
4586
4587         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4588
4589         if (rc < 0)
4590                 goto io_error;
4591         else
4592                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4593
4594         /* we do not want atime to be less than mtime, it broke some apps */
4595         file_inode(file)->i_atime = current_time(file_inode(file));
4596         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4597                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4598         else
4599                 file_inode(file)->i_atime = current_time(file_inode(file));
4600
4601         if (PAGE_SIZE > rc)
4602                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4603
4604         flush_dcache_page(page);
4605         SetPageUptodate(page);
4606
4607         /* send this page to the cache */
4608         cifs_readpage_to_fscache(file_inode(file), page);
4609
4610         rc = 0;
4611
4612 io_error:
4613         kunmap(page);
4614         unlock_page(page);
4615
4616 read_complete:
4617         return rc;
4618 }
4619
4620 static int cifs_readpage(struct file *file, struct page *page)
4621 {
4622         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4623         int rc = -EACCES;
4624         unsigned int xid;
4625
4626         xid = get_xid();
4627
4628         if (file->private_data == NULL) {
4629                 rc = -EBADF;
4630                 free_xid(xid);
4631                 return rc;
4632         }
4633
4634         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4635                  page, (int)offset, (int)offset);
4636
4637         rc = cifs_readpage_worker(file, page, &offset);
4638
4639         free_xid(xid);
4640         return rc;
4641 }
4642
4643 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4644 {
4645         struct cifsFileInfo *open_file;
4646
4647         spin_lock(&cifs_inode->open_file_lock);
4648         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4649                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4650                         spin_unlock(&cifs_inode->open_file_lock);
4651                         return 1;
4652                 }
4653         }
4654         spin_unlock(&cifs_inode->open_file_lock);
4655         return 0;
4656 }
4657
4658 /* We do not want to update the file size from server for inodes
4659    open for write - to avoid races with writepage extending
4660    the file - in the future we could consider allowing
4661    refreshing the inode only on increases in the file size
4662    but this is tricky to do without racing with writebehind
4663    page caching in the current Linux kernel design */
4664 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4665 {
4666         if (!cifsInode)
4667                 return true;
4668
4669         if (is_inode_writable(cifsInode)) {
4670                 /* This inode is open for write at least once */
4671                 struct cifs_sb_info *cifs_sb;
4672
4673                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4674                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4675                         /* since no page cache to corrupt on directio
4676                         we can change size safely */
4677                         return true;
4678                 }
4679
4680                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4681                         return true;
4682
4683                 return false;
4684         } else
4685                 return true;
4686 }
4687
4688 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4689                         loff_t pos, unsigned len, unsigned flags,
4690                         struct page **pagep, void **fsdata)
4691 {
4692         int oncethru = 0;
4693         pgoff_t index = pos >> PAGE_SHIFT;
4694         loff_t offset = pos & (PAGE_SIZE - 1);
4695         loff_t page_start = pos & PAGE_MASK;
4696         loff_t i_size;
4697         struct page *page;
4698         int rc = 0;
4699
4700         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4701
4702 start:
4703         page = grab_cache_page_write_begin(mapping, index, flags);
4704         if (!page) {
4705                 rc = -ENOMEM;
4706                 goto out;
4707         }
4708
4709         if (PageUptodate(page))
4710                 goto out;
4711
4712         /*
4713          * If we write a full page it will be up to date, no need to read from
4714          * the server. If the write is short, we'll end up doing a sync write
4715          * instead.
4716          */
4717         if (len == PAGE_SIZE)
4718                 goto out;
4719
4720         /*
4721          * optimize away the read when we have an oplock, and we're not
4722          * expecting to use any of the data we'd be reading in. That
4723          * is, when the page lies beyond the EOF, or straddles the EOF
4724          * and the write will cover all of the existing data.
4725          */
4726         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4727                 i_size = i_size_read(mapping->host);
4728                 if (page_start >= i_size ||
4729                     (offset == 0 && (pos + len) >= i_size)) {
4730                         zero_user_segments(page, 0, offset,
4731                                            offset + len,
4732                                            PAGE_SIZE);
4733                         /*
4734                          * PageChecked means that the parts of the page
4735                          * to which we're not writing are considered up
4736                          * to date. Once the data is copied to the
4737                          * page, it can be set uptodate.
4738                          */
4739                         SetPageChecked(page);
4740                         goto out;
4741                 }
4742         }
4743
4744         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4745                 /*
4746                  * might as well read a page, it is fast enough. If we get
4747                  * an error, we don't need to return it. cifs_write_end will
4748                  * do a sync write instead since PG_uptodate isn't set.
4749                  */
4750                 cifs_readpage_worker(file, page, &page_start);
4751                 put_page(page);
4752                 oncethru = 1;
4753                 goto start;
4754         } else {
4755                 /* we could try using another file handle if there is one -
4756                    but how would we lock it to prevent close of that handle
4757                    racing with this read? In any case
4758                    this will be written out by write_end so is fine */
4759         }
4760 out:
4761         *pagep = page;
4762         return rc;
4763 }
4764
4765 static int cifs_release_page(struct page *page, gfp_t gfp)
4766 {
4767         if (PagePrivate(page))
4768                 return 0;
4769
4770         return cifs_fscache_release_page(page, gfp);
4771 }
4772
4773 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4774                                  unsigned int length)
4775 {
4776         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4777
4778         if (offset == 0 && length == PAGE_SIZE)
4779                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4780 }
4781
4782 static int cifs_launder_page(struct page *page)
4783 {
4784         int rc = 0;
4785         loff_t range_start = page_offset(page);
4786         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4787         struct writeback_control wbc = {
4788                 .sync_mode = WB_SYNC_ALL,
4789                 .nr_to_write = 0,
4790                 .range_start = range_start,
4791                 .range_end = range_end,
4792         };
4793
4794         cifs_dbg(FYI, "Launder page: %p\n", page);
4795
4796         if (clear_page_dirty_for_io(page))
4797                 rc = cifs_writepage_locked(page, &wbc);
4798
4799         cifs_fscache_invalidate_page(page, page->mapping->host);
4800         return rc;
4801 }
4802
4803 void cifs_oplock_break(struct work_struct *work)
4804 {
4805         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4806                                                   oplock_break);
4807         struct inode *inode = d_inode(cfile->dentry);
4808         struct cifsInodeInfo *cinode = CIFS_I(inode);
4809         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4810         struct TCP_Server_Info *server = tcon->ses->server;
4811         int rc = 0;
4812         bool purge_cache = false;
4813         bool is_deferred = false;
4814         struct cifs_deferred_close *dclose;
4815
4816         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4817                         TASK_UNINTERRUPTIBLE);
4818
4819         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4820                                       cfile->oplock_epoch, &purge_cache);
4821
4822         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4823                                                 cifs_has_mand_locks(cinode)) {
4824                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4825                          inode);
4826                 cinode->oplock = 0;
4827         }
4828
4829         if (inode && S_ISREG(inode->i_mode)) {
4830                 if (CIFS_CACHE_READ(cinode))
4831                         break_lease(inode, O_RDONLY);
4832                 else
4833                         break_lease(inode, O_WRONLY);
4834                 rc = filemap_fdatawrite(inode->i_mapping);
4835                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4836                         rc = filemap_fdatawait(inode->i_mapping);
4837                         mapping_set_error(inode->i_mapping, rc);
4838                         cifs_zap_mapping(inode);
4839                 }
4840                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4841                 if (CIFS_CACHE_WRITE(cinode))
4842                         goto oplock_break_ack;
4843         }
4844
4845         rc = cifs_push_locks(cfile);
4846         if (rc)
4847                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4848
4849 oplock_break_ack:
4850         /*
4851          * releasing stale oplock after recent reconnect of smb session using
4852          * a now incorrect file handle is not a data integrity issue but do
4853          * not bother sending an oplock release if session to server still is
4854          * disconnected since oplock already released by the server
4855          */
4856         if (!cfile->oplock_break_cancelled) {
4857                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4858                                                              cinode);
4859                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4860         }
4861         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4862         /*
4863          * When oplock break is received and there are no active
4864          * file handles but cached, then set the flag oplock_break_received.
4865          * So, new open will not use cached handle.
4866          */
4867         spin_lock(&CIFS_I(inode)->deferred_lock);
4868         is_deferred = cifs_is_deferred_close(cfile, &dclose);
4869         if (is_deferred) {
4870                 cfile->oplock_break_received = true;
4871                 mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
4872         }
4873         spin_unlock(&CIFS_I(inode)->deferred_lock);
4874         cifs_done_oplock_break(cinode);
4875 }
4876
4877 /*
4878  * The presence of cifs_direct_io() in the address space ops vector
4879  * allowes open() O_DIRECT flags which would have failed otherwise.
4880  *
4881  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4882  * so this method should never be called.
4883  *
4884  * Direct IO is not yet supported in the cached mode. 
4885  */
4886 static ssize_t
4887 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4888 {
4889         /*
4890          * FIXME
4891          * Eventually need to support direct IO for non forcedirectio mounts
4892          */
4893         return -EINVAL;
4894 }
4895
4896 static int cifs_swap_activate(struct swap_info_struct *sis,
4897                               struct file *swap_file, sector_t *span)
4898 {
4899         struct cifsFileInfo *cfile = swap_file->private_data;
4900         struct inode *inode = swap_file->f_mapping->host;
4901         unsigned long blocks;
4902         long long isize;
4903
4904         cifs_dbg(FYI, "swap activate\n");
4905
4906         spin_lock(&inode->i_lock);
4907         blocks = inode->i_blocks;
4908         isize = inode->i_size;
4909         spin_unlock(&inode->i_lock);
4910         if (blocks*512 < isize) {
4911                 pr_warn("swap activate: swapfile has holes\n");
4912                 return -EINVAL;
4913         }
4914         *span = sis->pages;
4915
4916         pr_warn_once("Swap support over SMB3 is experimental\n");
4917
4918         /*
4919          * TODO: consider adding ACL (or documenting how) to prevent other
4920          * users (on this or other systems) from reading it
4921          */
4922
4923
4924         /* TODO: add sk_set_memalloc(inet) or similar */
4925
4926         if (cfile)
4927                 cfile->swapfile = true;
4928         /*
4929          * TODO: Since file already open, we can't open with DENY_ALL here
4930          * but we could add call to grab a byte range lock to prevent others
4931          * from reading or writing the file
4932          */
4933
4934         return 0;
4935 }
4936
4937 static void cifs_swap_deactivate(struct file *file)
4938 {
4939         struct cifsFileInfo *cfile = file->private_data;
4940
4941         cifs_dbg(FYI, "swap deactivate\n");
4942
4943         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4944
4945         if (cfile)
4946                 cfile->swapfile = false;
4947
4948         /* do we need to unpin (or unlock) the file */
4949 }
4950
4951 const struct address_space_operations cifs_addr_ops = {
4952         .readpage = cifs_readpage,
4953         .readpages = cifs_readpages,
4954         .writepage = cifs_writepage,
4955         .writepages = cifs_writepages,
4956         .write_begin = cifs_write_begin,
4957         .write_end = cifs_write_end,
4958         .set_page_dirty = __set_page_dirty_nobuffers,
4959         .releasepage = cifs_release_page,
4960         .direct_IO = cifs_direct_io,
4961         .invalidatepage = cifs_invalidate_page,
4962         .launder_page = cifs_launder_page,
4963         /*
4964          * TODO: investigate and if useful we could add an cifs_migratePage
4965          * helper (under an CONFIG_MIGRATION) in the future, and also
4966          * investigate and add an is_dirty_writeback helper if needed
4967          */
4968         .swap_activate = cifs_swap_activate,
4969         .swap_deactivate = cifs_swap_deactivate,
4970 };
4971
4972 /*
4973  * cifs_readpages requires the server to support a buffer large enough to
4974  * contain the header plus one complete page of data.  Otherwise, we need
4975  * to leave cifs_readpages out of the address space operations.
4976  */
4977 const struct address_space_operations cifs_addr_ops_smallbuf = {
4978         .readpage = cifs_readpage,
4979         .writepage = cifs_writepage,
4980         .writepages = cifs_writepages,
4981         .write_begin = cifs_write_begin,
4982         .write_end = cifs_write_end,
4983         .set_page_dirty = __set_page_dirty_nobuffers,
4984         .releasepage = cifs_release_page,
4985         .invalidatepage = cifs_invalidate_page,
4986         .launder_page = cifs_launder_page,
4987 };