memalloc_noio_restore(noio_flag);
}
- static void zonefs_io_error(struct inode *inode, bool write)
+ static struct kmem_cache *zonefs_inode_cachep;
+
+ static struct inode *zonefs_alloc_inode(struct super_block *sb)
{
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ struct zonefs_inode_info *zi;
+
+ zi = alloc_inode_sb(sb, zonefs_inode_cachep, GFP_KERNEL);
+ if (!zi)
+ return NULL;
+
+ inode_init_once(&zi->i_vnode);
+ mutex_init(&zi->i_truncate_mutex);
+ zi->i_wr_refcnt = 0;
- mutex_lock(&zi->i_truncate_mutex);
- __zonefs_io_error(inode, write);
- mutex_unlock(&zi->i_truncate_mutex);
+ return &zi->i_vnode;
}
- static int zonefs_file_truncate(struct inode *inode, loff_t isize)
+ static void zonefs_free_inode(struct inode *inode)
{
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
- loff_t old_isize;
- enum req_op op;
- int ret = 0;
+ kmem_cache_free(zonefs_inode_cachep, ZONEFS_I(inode));
+ }
- /*
- * Only sequential zone files can be truncated and truncation is allowed
- * only down to a 0 size, which is equivalent to a zone reset, and to
- * the maximum file size, which is equivalent to a zone finish.
- */
- if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
- return -EPERM;
+ /*
+ * File system stat.
+ */
+ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
+ {
+ struct super_block *sb = dentry->d_sb;
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ enum zonefs_ztype t;
- if (!isize)
- op = REQ_OP_ZONE_RESET;
- else if (isize == zi->i_max_size)
- op = REQ_OP_ZONE_FINISH;
+ buf->f_type = ZONEFS_MAGIC;
+ buf->f_bsize = sb->s_blocksize;
+ buf->f_namelen = ZONEFS_NAME_MAX;
+
+ spin_lock(&sbi->s_lock);
+
+ buf->f_blocks = sbi->s_blocks;
+ if (WARN_ON(sbi->s_used_blocks > sbi->s_blocks))
+ buf->f_bfree = 0;
else
- return -EPERM;
+ buf->f_bfree = buf->f_blocks - sbi->s_used_blocks;
+ buf->f_bavail = buf->f_bfree;
+
+ for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) {
+ if (sbi->s_zgroup[t].g_nr_zones)
+ buf->f_files += sbi->s_zgroup[t].g_nr_zones + 1;
+ }
+ buf->f_ffree = 0;
- inode_dio_wait(inode);
+ spin_unlock(&sbi->s_lock);
- /* Serialize against page faults */
- filemap_invalidate_lock(inode->i_mapping);
+ buf->f_fsid = uuid_to_fsid(sbi->s_uuid.b);
- /* Serialize against zonefs_iomap_begin() */
- mutex_lock(&zi->i_truncate_mutex);
+ return 0;
+ }
- old_isize = i_size_read(inode);
- if (isize == old_isize)
- goto unlock;
+ enum {
+ Opt_errors_ro, Opt_errors_zro, Opt_errors_zol, Opt_errors_repair,
+ Opt_explicit_open, Opt_err,
+ };
- ret = zonefs_zone_mgmt(inode, op);
- if (ret)
- goto unlock;
+ static const match_table_t tokens = {
+ { Opt_errors_ro, "errors=remount-ro"},
+ { Opt_errors_zro, "errors=zone-ro"},
+ { Opt_errors_zol, "errors=zone-offline"},
+ { Opt_errors_repair, "errors=repair"},
+ { Opt_explicit_open, "explicit-open" },
+ { Opt_err, NULL}
+ };
- /*
- * If the mount option ZONEFS_MNTOPT_EXPLICIT_OPEN is set,
- * take care of open zones.
- */
- if (zi->i_flags & ZONEFS_ZONE_OPEN) {
- /*
- * Truncating a zone to EMPTY or FULL is the equivalent of
- * closing the zone. For a truncation to 0, we need to
- * re-open the zone to ensure new writes can be processed.
- * For a truncation to the maximum file size, the zone is
- * closed and writes cannot be accepted anymore, so clear
- * the open flag.
- */
- if (!isize)
- ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
- else
- zi->i_flags &= ~ZONEFS_ZONE_OPEN;
+ static int zonefs_parse_options(struct super_block *sb, char *options)
+ {
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ substring_t args[MAX_OPT_ARGS];
+ char *p;
+
+ if (!options)
+ return 0;
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token;
+
+ if (!*p)
+ continue;
+
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case Opt_errors_ro:
+ sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
+ sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_RO;
+ break;
+ case Opt_errors_zro:
+ sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
+ sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZRO;
+ break;
+ case Opt_errors_zol:
+ sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
+ sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZOL;
+ break;
+ case Opt_errors_repair:
+ sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
+ sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_REPAIR;
+ break;
+ case Opt_explicit_open:
+ sbi->s_mount_opts |= ZONEFS_MNTOPT_EXPLICIT_OPEN;
+ break;
+ default:
+ return -EINVAL;
+ }
}
- zonefs_update_stats(inode, isize);
- truncate_setsize(inode, isize);
- zi->i_wpoffset = isize;
- zonefs_account_active(inode);
+ return 0;
+ }
+
+ static int zonefs_show_options(struct seq_file *seq, struct dentry *root)
+ {
+ struct zonefs_sb_info *sbi = ZONEFS_SB(root->d_sb);
+
+ if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO)
+ seq_puts(seq, ",errors=remount-ro");
+ if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)
+ seq_puts(seq, ",errors=zone-ro");
+ if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)
+ seq_puts(seq, ",errors=zone-offline");
+ if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR)
+ seq_puts(seq, ",errors=repair");
- unlock:
- mutex_unlock(&zi->i_truncate_mutex);
- filemap_invalidate_unlock(inode->i_mapping);
+ return 0;
+ }
- return ret;
+ static int zonefs_remount(struct super_block *sb, int *flags, char *data)
+ {
+ sync_filesystem(sb);
+
+ return zonefs_parse_options(sb, data);
}
-static int zonefs_inode_setattr(struct user_namespace *mnt_userns,
+static int zonefs_inode_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
return ret;
}
- setattr_copy(&init_user_ns, inode, iattr);
+ setattr_copy(&nop_mnt_idmap, inode, iattr);
+ if (S_ISREG(inode->i_mode)) {
+ struct zonefs_zone *z = zonefs_inode_zone(inode);
+
+ z->z_mode = inode->i_mode;
+ z->z_uid = inode->i_uid;
+ z->z_gid = inode->i_gid;
+ }
+
return 0;
}
.setattr = zonefs_inode_setattr,
};
- static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end,
- int datasync)
+ static long zonefs_fname_to_fno(const struct qstr *fname)
{
- struct inode *inode = file_inode(file);
- int ret = 0;
-
- if (unlikely(IS_IMMUTABLE(inode)))
- return -EPERM;
+ const char *name = fname->name;
+ unsigned int len = fname->len;
+ long fno = 0, shift = 1;
+ const char *rname;
+ char c = *name;
+ unsigned int i;
/*
- * Since only direct writes are allowed in sequential files, page cache
- * flush is needed only for conventional zone files.
+ * File names are always a base-10 number string without any
+ * leading 0s.
*/
- if (ZONEFS_I(inode)->i_ztype == ZONEFS_ZTYPE_CNV)
- ret = file_write_and_wait_range(file, start, end);
- if (!ret)
- ret = blkdev_issue_flush(inode->i_sb->s_bdev);
+ if (!isdigit(c))
+ return -ENOENT;
- if (ret)
- zonefs_io_error(inode, true);
+ if (len > 1 && c == '0')
+ return -ENOENT;
- return ret;
- }
+ if (len == 1)
+ return c - '0';
- static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
- {
- struct inode *inode = file_inode(vmf->vma->vm_file);
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
- vm_fault_t ret;
-
- if (unlikely(IS_IMMUTABLE(inode)))
- return VM_FAULT_SIGBUS;
-
- /*
- * Sanity check: only conventional zone files can have shared
- * writeable mappings.
- */
- if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV))
- return VM_FAULT_NOPAGE;
-
- sb_start_pagefault(inode->i_sb);
- file_update_time(vmf->vma->vm_file);
-
- /* Serialize against truncates */
- filemap_invalidate_lock_shared(inode->i_mapping);
- ret = iomap_page_mkwrite(vmf, &zonefs_write_iomap_ops);
- filemap_invalidate_unlock_shared(inode->i_mapping);
-
- sb_end_pagefault(inode->i_sb);
- return ret;
- }
-
- static const struct vm_operations_struct zonefs_file_vm_ops = {
- .fault = filemap_fault,
- .map_pages = filemap_map_pages,
- .page_mkwrite = zonefs_filemap_page_mkwrite,
- };
-
- static int zonefs_file_mmap(struct file *file, struct vm_area_struct *vma)
- {
- /*
- * Conventional zones accept random writes, so their files can support
- * shared writable mappings. For sequential zone files, only read
- * mappings are possible since there are no guarantees for write
- * ordering between msync() and page cache writeback.
- */
- if (ZONEFS_I(file_inode(file))->i_ztype == ZONEFS_ZTYPE_SEQ &&
- (vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
- return -EINVAL;
-
- file_accessed(file);
- vma->vm_ops = &zonefs_file_vm_ops;
-
- return 0;
- }
-
- static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence)
- {
- loff_t isize = i_size_read(file_inode(file));
-
- /*
- * Seeks are limited to below the zone size for conventional zones
- * and below the zone write pointer for sequential zones. In both
- * cases, this limit is the inode size.
- */
- return generic_file_llseek_size(file, offset, whence, isize, isize);
- }
-
- static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
- int error, unsigned int flags)
- {
- struct inode *inode = file_inode(iocb->ki_filp);
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
-
- if (error) {
- zonefs_io_error(inode, true);
- return error;
- }
-
- if (size && zi->i_ztype != ZONEFS_ZTYPE_CNV) {
- /*
- * Note that we may be seeing completions out of order,
- * but that is not a problem since a write completed
- * successfully necessarily means that all preceding writes
- * were also successful. So we can safely increase the inode
- * size to the write end location.
- */
- mutex_lock(&zi->i_truncate_mutex);
- if (i_size_read(inode) < iocb->ki_pos + size) {
- zonefs_update_stats(inode, iocb->ki_pos + size);
- zonefs_i_size_write(inode, iocb->ki_pos + size);
- }
- mutex_unlock(&zi->i_truncate_mutex);
- }
-
- return 0;
- }
-
- static const struct iomap_dio_ops zonefs_write_dio_ops = {
- .end_io = zonefs_file_write_dio_end_io,
- };
-
- static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
- {
- struct inode *inode = file_inode(iocb->ki_filp);
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
- struct block_device *bdev = inode->i_sb->s_bdev;
- unsigned int max = bdev_max_zone_append_sectors(bdev);
- struct bio *bio;
- ssize_t size;
- int nr_pages;
- ssize_t ret;
-
- max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
- iov_iter_truncate(from, max);
-
- nr_pages = iov_iter_npages(from, BIO_MAX_VECS);
- if (!nr_pages)
- return 0;
-
- bio = bio_alloc(bdev, nr_pages,
- REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS);
- bio->bi_iter.bi_sector = zi->i_zsector;
- bio->bi_ioprio = iocb->ki_ioprio;
- if (iocb_is_dsync(iocb))
- bio->bi_opf |= REQ_FUA;
-
- ret = bio_iov_iter_get_pages(bio, from);
- if (unlikely(ret))
- goto out_release;
-
- size = bio->bi_iter.bi_size;
- task_io_account_write(size);
-
- if (iocb->ki_flags & IOCB_HIPRI)
- bio_set_polled(bio, iocb);
-
- ret = submit_bio_wait(bio);
-
- /*
- * If the file zone was written underneath the file system, the zone
- * write pointer may not be where we expect it to be, but the zone
- * append write can still succeed. So check manually that we wrote where
- * we intended to, that is, at zi->i_wpoffset.
- */
- if (!ret) {
- sector_t wpsector =
- zi->i_zsector + (zi->i_wpoffset >> SECTOR_SHIFT);
-
- if (bio->bi_iter.bi_sector != wpsector) {
- zonefs_warn(inode->i_sb,
- "Corrupted write pointer %llu for zone at %llu\n",
- wpsector, zi->i_zsector);
- ret = -EIO;
- }
- }
-
- zonefs_file_write_dio_end_io(iocb, size, ret, 0);
- trace_zonefs_file_dio_append(inode, size, ret);
-
- out_release:
- bio_release_pages(bio, false);
- bio_put(bio);
-
- if (ret >= 0) {
- iocb->ki_pos += size;
- return size;
- }
-
- return ret;
- }
-
- /*
- * Do not exceed the LFS limits nor the file zone size. If pos is under the
- * limit it becomes a short access. If it exceeds the limit, return -EFBIG.
- */
- static loff_t zonefs_write_check_limits(struct file *file, loff_t pos,
- loff_t count)
- {
- struct inode *inode = file_inode(file);
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
- loff_t limit = rlimit(RLIMIT_FSIZE);
- loff_t max_size = zi->i_max_size;
-
- if (limit != RLIM_INFINITY) {
- if (pos >= limit) {
- send_sig(SIGXFSZ, current, 0);
- return -EFBIG;
- }
- count = min(count, limit - pos);
- }
-
- if (!(file->f_flags & O_LARGEFILE))
- max_size = min_t(loff_t, MAX_NON_LFS, max_size);
-
- if (unlikely(pos >= max_size))
- return -EFBIG;
-
- return min(count, max_size - pos);
- }
-
- static ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from)
- {
- struct file *file = iocb->ki_filp;
- struct inode *inode = file_inode(file);
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
- loff_t count;
-
- if (IS_SWAPFILE(inode))
- return -ETXTBSY;
-
- if (!iov_iter_count(from))
- return 0;
-
- if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
- return -EINVAL;
-
- if (iocb->ki_flags & IOCB_APPEND) {
- if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
- return -EINVAL;
- mutex_lock(&zi->i_truncate_mutex);
- iocb->ki_pos = zi->i_wpoffset;
- mutex_unlock(&zi->i_truncate_mutex);
+ for (i = 0, rname = name + len - 1; i < len; i++, rname--) {
+ c = *rname;
+ if (!isdigit(c))
+ return -ENOENT;
+ fno += (c - '0') * shift;
+ shift *= 10;
}
- count = zonefs_write_check_limits(file, iocb->ki_pos,
- iov_iter_count(from));
- if (count < 0)
- return count;
-
- iov_iter_truncate(from, count);
- return iov_iter_count(from);
- }
-
- /*
- * Handle direct writes. For sequential zone files, this is the only possible
- * write path. For these files, check that the user is issuing writes
- * sequentially from the end of the file. This code assumes that the block layer
- * delivers write requests to the device in sequential order. This is always the
- * case if a block IO scheduler implementing the ELEVATOR_F_ZBD_SEQ_WRITE
- * elevator feature is being used (e.g. mq-deadline). The block layer always
- * automatically select such an elevator for zoned block devices during the
- * device initialization.
- */
- static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
- {
- struct inode *inode = file_inode(iocb->ki_filp);
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
- struct super_block *sb = inode->i_sb;
- bool sync = is_sync_kiocb(iocb);
- bool append = false;
- ssize_t ret, count;
-
- /*
- * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT
- * as this can cause write reordering (e.g. the first aio gets EAGAIN
- * on the inode lock but the second goes through but is now unaligned).
- */
- if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !sync &&
- (iocb->ki_flags & IOCB_NOWAIT))
- return -EOPNOTSUPP;
-
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (!inode_trylock(inode))
- return -EAGAIN;
- } else {
- inode_lock(inode);
- }
-
- count = zonefs_write_checks(iocb, from);
- if (count <= 0) {
- ret = count;
- goto inode_unlock;
- }
-
- if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
- ret = -EINVAL;
- goto inode_unlock;
- }
-
- /* Enforce sequential writes (append only) in sequential zones */
- if (zi->i_ztype == ZONEFS_ZTYPE_SEQ) {
- mutex_lock(&zi->i_truncate_mutex);
- if (iocb->ki_pos != zi->i_wpoffset) {
- mutex_unlock(&zi->i_truncate_mutex);
- ret = -EINVAL;
- goto inode_unlock;
- }
- mutex_unlock(&zi->i_truncate_mutex);
- append = sync;
- }
-
- if (append)
- ret = zonefs_file_dio_append(iocb, from);
- else
- ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops,
- &zonefs_write_dio_ops, 0, NULL, 0);
- if (zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
- (ret > 0 || ret == -EIOCBQUEUED)) {
- if (ret > 0)
- count = ret;
-
- /*
- * Update the zone write pointer offset assuming the write
- * operation succeeded. If it did not, the error recovery path
- * will correct it. Also do active seq file accounting.
- */
- mutex_lock(&zi->i_truncate_mutex);
- zi->i_wpoffset += count;
- zonefs_account_active(inode);
- mutex_unlock(&zi->i_truncate_mutex);
- }
-
- inode_unlock:
- inode_unlock(inode);
-
- return ret;
- }
-
- static ssize_t zonefs_file_buffered_write(struct kiocb *iocb,
- struct iov_iter *from)
- {
- struct inode *inode = file_inode(iocb->ki_filp);
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
- ssize_t ret;
-
- /*
- * Direct IO writes are mandatory for sequential zone files so that the
- * write IO issuing order is preserved.
- */
- if (zi->i_ztype != ZONEFS_ZTYPE_CNV)
- return -EIO;
-
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (!inode_trylock(inode))
- return -EAGAIN;
- } else {
- inode_lock(inode);
- }
-
- ret = zonefs_write_checks(iocb, from);
- if (ret <= 0)
- goto inode_unlock;
-
- ret = iomap_file_buffered_write(iocb, from, &zonefs_write_iomap_ops);
- if (ret > 0)
- iocb->ki_pos += ret;
- else if (ret == -EIO)
- zonefs_io_error(inode, true);
-
- inode_unlock:
- inode_unlock(inode);
- if (ret > 0)
- ret = generic_write_sync(iocb, ret);
-
- return ret;
+ return fno;
}
- static ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ static struct inode *zonefs_get_file_inode(struct inode *dir,
+ struct dentry *dentry)
{
- struct inode *inode = file_inode(iocb->ki_filp);
-
- if (unlikely(IS_IMMUTABLE(inode)))
- return -EPERM;
-
- if (sb_rdonly(inode->i_sb))
- return -EROFS;
-
- /* Write operations beyond the zone size are not allowed */
- if (iocb->ki_pos >= ZONEFS_I(inode)->i_max_size)
- return -EFBIG;
-
- if (iocb->ki_flags & IOCB_DIRECT) {
- ssize_t ret = zonefs_file_dio_write(iocb, from);
- if (ret != -ENOTBLK)
- return ret;
- }
-
- return zonefs_file_buffered_write(iocb, from);
- }
-
- static int zonefs_file_read_dio_end_io(struct kiocb *iocb, ssize_t size,
- int error, unsigned int flags)
- {
- if (error) {
- zonefs_io_error(file_inode(iocb->ki_filp), false);
- return error;
- }
-
- return 0;
- }
-
- static const struct iomap_dio_ops zonefs_read_dio_ops = {
- .end_io = zonefs_file_read_dio_end_io,
- };
-
- static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
- {
- struct inode *inode = file_inode(iocb->ki_filp);
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
- struct super_block *sb = inode->i_sb;
- loff_t isize;
- ssize_t ret;
-
- /* Offline zones cannot be read */
- if (unlikely(IS_IMMUTABLE(inode) && !(inode->i_mode & 0777)))
- return -EPERM;
-
- if (iocb->ki_pos >= zi->i_max_size)
- return 0;
-
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (!inode_trylock_shared(inode))
- return -EAGAIN;
- } else {
- inode_lock_shared(inode);
- }
+ struct zonefs_zone_group *zgroup = dir->i_private;
+ struct super_block *sb = dir->i_sb;
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ struct zonefs_zone *z;
+ struct inode *inode;
+ ino_t ino;
+ long fno;
- /* Limit read operations to written data */
- mutex_lock(&zi->i_truncate_mutex);
- isize = i_size_read(inode);
- if (iocb->ki_pos >= isize) {
- mutex_unlock(&zi->i_truncate_mutex);
- ret = 0;
- goto inode_unlock;
- }
- iov_iter_truncate(to, isize - iocb->ki_pos);
- mutex_unlock(&zi->i_truncate_mutex);
+ /* Get the file number from the file name */
+ fno = zonefs_fname_to_fno(&dentry->d_name);
+ if (fno < 0)
+ return ERR_PTR(fno);
- if (iocb->ki_flags & IOCB_DIRECT) {
- size_t count = iov_iter_count(to);
+ if (!zgroup->g_nr_zones || fno >= zgroup->g_nr_zones)
+ return ERR_PTR(-ENOENT);
- if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
- ret = -EINVAL;
- goto inode_unlock;
- }
- file_accessed(iocb->ki_filp);
- ret = iomap_dio_rw(iocb, to, &zonefs_read_iomap_ops,
- &zonefs_read_dio_ops, 0, NULL, 0);
- } else {
- ret = generic_file_read_iter(iocb, to);
- if (ret == -EIO)
- zonefs_io_error(inode, false);
+ z = &zgroup->g_zones[fno];
+ ino = z->z_sector >> sbi->s_zone_sectors_shift;
+ inode = iget_locked(sb, ino);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+ if (!(inode->i_state & I_NEW)) {
+ WARN_ON_ONCE(inode->i_private != z);
+ return inode;
}
- inode_unlock:
- inode_unlock_shared(inode);
-
- return ret;
- }
+ inode->i_ino = ino;
+ inode->i_mode = z->z_mode;
+ inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime;
+ inode->i_uid = z->z_uid;
+ inode->i_gid = z->z_gid;
+ inode->i_size = z->z_wpoffset;
+ inode->i_blocks = z->z_capacity >> SECTOR_SHIFT;
+ inode->i_private = z;
- /*
- * Write open accounting is done only for sequential files.
- */
- static inline bool zonefs_seq_file_need_wro(struct inode *inode,
- struct file *file)
- {
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ inode->i_op = &zonefs_file_inode_operations;
+ inode->i_fop = &zonefs_file_operations;
+ inode->i_mapping->a_ops = &zonefs_file_aops;
- if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
- return false;
+ /* Update the inode access rights depending on the zone condition */
+ zonefs_inode_update_mode(inode);
- if (!(file->f_mode & FMODE_WRITE))
- return false;
+ unlock_new_inode(inode);
- return true;
+ return inode;
}
- static int zonefs_seq_file_write_open(struct inode *inode)
+ static struct inode *zonefs_get_zgroup_inode(struct super_block *sb,
+ enum zonefs_ztype ztype)
{
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
- int ret = 0;
-
- mutex_lock(&zi->i_truncate_mutex);
-
- if (!zi->i_wr_refcnt) {
- struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
- unsigned int wro = atomic_inc_return(&sbi->s_wro_seq_files);
-
- if (sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
-
- if (sbi->s_max_wro_seq_files
- && wro > sbi->s_max_wro_seq_files) {
- atomic_dec(&sbi->s_wro_seq_files);
- ret = -EBUSY;
- goto unlock;
- }
+ struct inode *root = d_inode(sb->s_root);
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ struct inode *inode;
+ ino_t ino = bdev_nr_zones(sb->s_bdev) + ztype + 1;
- if (i_size_read(inode) < zi->i_max_size) {
- ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
- if (ret) {
- atomic_dec(&sbi->s_wro_seq_files);
- goto unlock;
- }
- zi->i_flags |= ZONEFS_ZONE_OPEN;
- zonefs_account_active(inode);
- }
- }
- }
+ inode = iget_locked(sb, ino);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+ if (!(inode->i_state & I_NEW))
+ return inode;
+
+ inode->i_ino = ino;
- inode_init_owner(&init_user_ns, inode, root, S_IFDIR | 0555);
++ inode_init_owner(&nop_mnt_idmap, inode, root, S_IFDIR | 0555);
+ inode->i_size = sbi->s_zgroup[ztype].g_nr_zones;
+ inode->i_ctime = inode->i_mtime = inode->i_atime = root->i_ctime;
+ inode->i_private = &sbi->s_zgroup[ztype];
+ set_nlink(inode, 2);
- zi->i_wr_refcnt++;
+ inode->i_op = &zonefs_dir_inode_operations;
+ inode->i_fop = &zonefs_dir_operations;
- unlock:
- mutex_unlock(&zi->i_truncate_mutex);
+ unlock_new_inode(inode);
- return ret;
+ return inode;
}
- static int zonefs_file_open(struct inode *inode, struct file *file)
- {
- int ret;
-
- ret = generic_file_open(inode, file);
- if (ret)
- return ret;
-
- if (zonefs_seq_file_need_wro(inode, file))
- return zonefs_seq_file_write_open(inode);
-
- return 0;
- }
- static void zonefs_seq_file_write_close(struct inode *inode)
+ static struct inode *zonefs_get_dir_inode(struct inode *dir,
+ struct dentry *dentry)
{
- struct zonefs_inode_info *zi = ZONEFS_I(inode);
- struct super_block *sb = inode->i_sb;
+ struct super_block *sb = dir->i_sb;
struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
- int ret = 0;
-
- mutex_lock(&zi->i_truncate_mutex);
-
- zi->i_wr_refcnt--;
- if (zi->i_wr_refcnt)
- goto unlock;
+ const char *name = dentry->d_name.name;
+ enum zonefs_ztype ztype;
/*
- * The file zone may not be open anymore (e.g. the file was truncated to
- * its maximum size or it was fully written). For this case, we only
- * need to decrement the write open count.
+ * We only need to check for the "seq" directory and
+ * the "cnv" directory if we have conventional zones.
*/
- if (zi->i_flags & ZONEFS_ZONE_OPEN) {
- ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
- if (ret) {
- __zonefs_io_error(inode, false);
- /*
- * Leaving zones explicitly open may lead to a state
- * where most zones cannot be written (zone resources
- * exhausted). So take preventive action by remounting
- * read-only.
- */
- if (zi->i_flags & ZONEFS_ZONE_OPEN &&
- !(sb->s_flags & SB_RDONLY)) {
- zonefs_warn(sb,
- "closing zone at %llu failed %d\n",
- zi->i_zsector, ret);
- zonefs_warn(sb,
- "remounting filesystem read-only\n");
- sb->s_flags |= SB_RDONLY;
- }
- goto unlock;
- }
+ if (dentry->d_name.len != 3)
+ return ERR_PTR(-ENOENT);
- zi->i_flags &= ~ZONEFS_ZONE_OPEN;
- zonefs_account_active(inode);
+ for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
+ if (sbi->s_zgroup[ztype].g_nr_zones &&
+ memcmp(name, zonefs_zgroup_name(ztype), 3) == 0)
+ break;
}
+ if (ztype == ZONEFS_ZTYPE_MAX)
+ return ERR_PTR(-ENOENT);
- atomic_dec(&sbi->s_wro_seq_files);
-
- unlock:
- mutex_unlock(&zi->i_truncate_mutex);
- }
-
- static int zonefs_file_release(struct inode *inode, struct file *file)
- {
- /*
- * If we explicitly open a zone we must close it again as well, but the
- * zone management operation can fail (either due to an IO error or as
- * the zone has gone offline or read-only). Make sure we don't fail the
- * close(2) for user-space.
- */
- if (zonefs_seq_file_need_wro(inode, file))
- zonefs_seq_file_write_close(inode);
-
- return 0;
+ return zonefs_get_zgroup_inode(sb, ztype);
}
- static const struct file_operations zonefs_file_operations = {
- .open = zonefs_file_open,
- .release = zonefs_file_release,
- .fsync = zonefs_file_fsync,
- .mmap = zonefs_file_mmap,
- .llseek = zonefs_file_llseek,
- .read_iter = zonefs_file_read_iter,
- .write_iter = zonefs_file_write_iter,
- .splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
- .iopoll = iocb_bio_iopoll,
- };
-
- static struct kmem_cache *zonefs_inode_cachep;
-
- static struct inode *zonefs_alloc_inode(struct super_block *sb)
+ static struct dentry *zonefs_lookup(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
{
- struct zonefs_inode_info *zi;
-
- zi = alloc_inode_sb(sb, zonefs_inode_cachep, GFP_KERNEL);
- if (!zi)
- return NULL;
-
- inode_init_once(&zi->i_vnode);
- mutex_init(&zi->i_truncate_mutex);
- zi->i_wr_refcnt = 0;
- zi->i_flags = 0;
-
- return &zi->i_vnode;
- }
-
- static void zonefs_free_inode(struct inode *inode)
- {
- kmem_cache_free(zonefs_inode_cachep, ZONEFS_I(inode));
- }
-
- /*
- * File system stat.
- */
- static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
- {
- struct super_block *sb = dentry->d_sb;
- struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
- enum zonefs_ztype t;
+ struct inode *inode;
- buf->f_type = ZONEFS_MAGIC;
- buf->f_bsize = sb->s_blocksize;
- buf->f_namelen = ZONEFS_NAME_MAX;
+ if (dentry->d_name.len > ZONEFS_NAME_MAX)
+ return ERR_PTR(-ENAMETOOLONG);
- spin_lock(&sbi->s_lock);
-
- buf->f_blocks = sbi->s_blocks;
- if (WARN_ON(sbi->s_used_blocks > sbi->s_blocks))
- buf->f_bfree = 0;
+ if (dir == d_inode(dir->i_sb->s_root))
+ inode = zonefs_get_dir_inode(dir, dentry);
else
- buf->f_bfree = buf->f_blocks - sbi->s_used_blocks;
- buf->f_bavail = buf->f_bfree;
+ inode = zonefs_get_file_inode(dir, dentry);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
- for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) {
- if (sbi->s_nr_files[t])
- buf->f_files += sbi->s_nr_files[t] + 1;
- }
- buf->f_ffree = 0;
-
- spin_unlock(&sbi->s_lock);
-
- buf->f_fsid = uuid_to_fsid(sbi->s_uuid.b);
-
- return 0;
+ return d_splice_alias(inode, dentry);
}
- enum {
- Opt_errors_ro, Opt_errors_zro, Opt_errors_zol, Opt_errors_repair,
- Opt_explicit_open, Opt_err,
- };
-
- static const match_table_t tokens = {
- { Opt_errors_ro, "errors=remount-ro"},
- { Opt_errors_zro, "errors=zone-ro"},
- { Opt_errors_zol, "errors=zone-offline"},
- { Opt_errors_repair, "errors=repair"},
- { Opt_explicit_open, "explicit-open" },
- { Opt_err, NULL}
- };
-
- static int zonefs_parse_options(struct super_block *sb, char *options)
+ static int zonefs_readdir_root(struct file *file, struct dir_context *ctx)
{
+ struct inode *inode = file_inode(file);
+ struct super_block *sb = inode->i_sb;
struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
- substring_t args[MAX_OPT_ARGS];
- char *p;
+ enum zonefs_ztype ztype = ZONEFS_ZTYPE_CNV;
+ ino_t base_ino = bdev_nr_zones(sb->s_bdev) + 1;
- if (!options)
+ if (ctx->pos >= inode->i_size)
return 0;
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
+ if (!dir_emit_dots(file, ctx))
+ return 0;
- if (!*p)
- continue;
+ if (ctx->pos == 2) {
+ if (!sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones)
+ ztype = ZONEFS_ZTYPE_SEQ;
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_errors_ro:
- sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
- sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_RO;
- break;
- case Opt_errors_zro:
- sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
- sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZRO;
- break;
- case Opt_errors_zol:
- sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
- sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZOL;
- break;
- case Opt_errors_repair:
- sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
- sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_REPAIR;
- break;
- case Opt_explicit_open:
- sbi->s_mount_opts |= ZONEFS_MNTOPT_EXPLICIT_OPEN;
- break;
- default:
- return -EINVAL;
- }
+ if (!dir_emit(ctx, zonefs_zgroup_name(ztype), 3,
+ base_ino + ztype, DT_DIR))
+ return 0;
+ ctx->pos++;
}
- return 0;
- }
-
- static int zonefs_show_options(struct seq_file *seq, struct dentry *root)
- {
- struct zonefs_sb_info *sbi = ZONEFS_SB(root->d_sb);
-
- if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO)
- seq_puts(seq, ",errors=remount-ro");
- if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)
- seq_puts(seq, ",errors=zone-ro");
- if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)
- seq_puts(seq, ",errors=zone-offline");
- if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR)
- seq_puts(seq, ",errors=repair");
+ if (ctx->pos == 3 && ztype != ZONEFS_ZTYPE_SEQ) {
+ ztype = ZONEFS_ZTYPE_SEQ;
+ if (!dir_emit(ctx, zonefs_zgroup_name(ztype), 3,
+ base_ino + ztype, DT_DIR))
+ return 0;
+ ctx->pos++;
+ }
return 0;
}