void (*d_iput)(struct dentry *, struct inode *);
char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
struct vfsmount *(*d_automount)(struct path *path);
- int (*d_manage)(struct dentry *, bool);
+ int (*d_manage)(const struct path *, bool);
struct dentry *(*d_real)(struct dentry *, const struct inode *,
unsigned int);
not block. If it's not possible to reach a page without blocking,
filesystem should skip it. Filesystem should use do_set_pte() to setup
page table entry. Pointer to entry associated with the page is passed in
-"pte" field in fault_env structure. Pointers to entries for other offsets
+"pte" field in vm_fault structure. Pointers to entries for other offsets
should be calculated relative to "pte".
->page_mkwrite() is called when a previously read-only pte is
#include <linux/idr.h>
#include <linux/sched.h>
#include <linux/uio.h>
+#include <linux/bvec.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);
- if (unlikely(copied < len)) {
- /*
- * zero out the rest of the area
- */
- unsigned from = pos & (PAGE_SIZE - 1);
-
- zero_user(page, from + copied, len - copied);
- flush_dcache_page(page);
+ if (unlikely(copied < len && !PageUptodate(page))) {
+ copied = 0;
+ goto out;
}
-
- if (!PageUptodate(page))
- SetPageUptodate(page);
/*
* No need to use i_size_read() here, the i_size
* cannot change under us because we hold the i_mutex.
i_size_write(inode, last_pos);
}
set_page_dirty(page);
+ out:
unlock_page(page);
put_page(page);
/* four bytes for CRC32 */
#define BTRFS_EMPTY_DIR_SIZE 0
-/* specific to btrfs_map_block(), therefore not in include/linux/blk_types.h */
-#define REQ_GET_READ_MIRRORS (1 << 30)
-
/* ioprio of readahead is set to idle */
#define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0))
unsigned int need_commit_sem:1;
unsigned int skip_release_on_error:1;
};
-#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r) >> 4) - \
+#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \
sizeof(struct btrfs_item))
struct btrfs_dev_replace {
u64 replace_state; /* see #define above */
struct list_head ro_bgs;
struct list_head priority_tickets;
struct list_head tickets;
+ /*
+ * tickets_id just indicates the next ticket will be handled, so note
+ * it's not stored per ticket.
+ */
u64 tickets_id;
struct rw_semaphore groups_sem;
void *cur, *orig;
struct page *page;
struct page **pages;
- struct btrfs_root *root;
+ struct btrfs_fs_info *fs_info;
struct inode *inode;
unsigned long size;
int index;
spinlock_t super_lock;
struct btrfs_super_block *super_copy;
struct btrfs_super_block *super_for_commit;
- struct block_device *__bdev;
struct super_block *sb;
struct inode *btree_inode;
struct backing_dev_info bdi;
/* Used to record internally whether fs has been frozen */
int fs_frozen;
+
+ /* Cached block sizes */
+ u32 nodesize;
+ u32 sectorsize;
+ u32 stripesize;
};
+static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
+{
+ return sb->s_fs_info;
+}
+
struct btrfs_subvolume_writers {
struct percpu_counter counter;
wait_queue_head_t wait;
u64 objectid;
u64 last_trans;
- /* data allocations are done in sectorsize units */
- u32 sectorsize;
-
- /* node allocations are done in nodesize units */
- u32 nodesize;
-
- u32 stripesize;
-
u32 type;
u64 highest_objectid;
/* For qgroup metadata space reserve */
atomic_t qgroup_meta_rsv;
};
+static inline u32 btrfs_inode_sectorsize(const struct inode *inode)
+{
+ return btrfs_sb(inode->i_sb)->sectorsize;
+}
static inline u32 __BTRFS_LEAF_DATA_SIZE(u32 blocksize)
{
return blocksize - sizeof(struct btrfs_header);
}
-static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_root *root)
+static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info)
{
- return __BTRFS_LEAF_DATA_SIZE(root->nodesize);
+ return __BTRFS_LEAF_DATA_SIZE(info->nodesize);
}
-static inline u32 BTRFS_MAX_ITEM_SIZE(const struct btrfs_root *root)
+static inline u32 BTRFS_MAX_ITEM_SIZE(const struct btrfs_fs_info *info)
{
- return BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
+ return BTRFS_LEAF_DATA_SIZE(info) - sizeof(struct btrfs_item);
}
-static inline u32 BTRFS_NODEPTRS_PER_BLOCK(const struct btrfs_root *root)
+static inline u32 BTRFS_NODEPTRS_PER_BLOCK(const struct btrfs_fs_info *info)
{
- return BTRFS_LEAF_DATA_SIZE(root) / sizeof(struct btrfs_key_ptr);
+ return BTRFS_LEAF_DATA_SIZE(info) / sizeof(struct btrfs_key_ptr);
}
#define BTRFS_FILE_EXTENT_INLINE_DATA_START \
(offsetof(struct btrfs_file_extent_item, disk_bytenr))
-static inline u32 BTRFS_MAX_INLINE_DATA_SIZE(const struct btrfs_root *root)
+static inline u32 BTRFS_MAX_INLINE_DATA_SIZE(const struct btrfs_fs_info *info)
{
- return BTRFS_MAX_ITEM_SIZE(root) -
+ return BTRFS_MAX_ITEM_SIZE(info) -
BTRFS_FILE_EXTENT_INLINE_DATA_START;
}
-static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_root *root)
+static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
{
- return BTRFS_MAX_ITEM_SIZE(root) - sizeof(struct btrfs_dir_item);
+ return BTRFS_MAX_ITEM_SIZE(info) - sizeof(struct btrfs_dir_item);
}
/*
#ifdef CONFIG_BTRFS_DEBUG
static inline int
-btrfs_should_fragment_free_space(struct btrfs_root *root,
- struct btrfs_block_group_cache *block_group)
+btrfs_should_fragment_free_space(struct btrfs_block_group_cache *block_group)
{
- return (btrfs_test_opt(root->fs_info, FRAGMENT_METADATA) &&
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
+
+ return (btrfs_test_opt(fs_info, FRAGMENT_METADATA) &&
block_group->flags & BTRFS_BLOCK_GROUP_METADATA) ||
- (btrfs_test_opt(root->fs_info, FRAGMENT_DATA) &&
+ (btrfs_test_opt(fs_info, FRAGMENT_DATA) &&
block_group->flags & BTRFS_BLOCK_GROUP_DATA);
}
#endif
cpu->target = le64_to_cpu(disk->target);
cpu->flags = le64_to_cpu(disk->flags);
cpu->limit = le64_to_cpu(disk->limit);
+ cpu->stripes_min = le32_to_cpu(disk->stripes_min);
+ cpu->stripes_max = le32_to_cpu(disk->stripes_max);
}
static inline void
disk->target = cpu_to_le64(cpu->target);
disk->flags = cpu_to_le64(cpu->flags);
disk->limit = cpu_to_le64(cpu->limit);
+ disk->stripes_min = cpu_to_le32(cpu->stripes_min);
+ disk->stripes_max = cpu_to_le32(cpu->stripes_max);
}
/* struct btrfs_super_block */
* this returns the address of the start of the last item,
* which is the stop of the leaf data stack
*/
-static inline unsigned int leaf_data_end(struct btrfs_root *root,
+static inline unsigned int leaf_data_end(struct btrfs_fs_info *fs_info,
struct extent_buffer *leaf)
{
u32 nr = btrfs_header_nritems(leaf);
if (nr == 0)
- return BTRFS_LEAF_DATA_SIZE(root);
+ return BTRFS_LEAF_DATA_SIZE(fs_info);
return btrfs_item_offset_nr(leaf, nr - 1);
}
BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_right,
struct btrfs_dev_replace_item, cursor_right, 64);
-static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
-{
- return sb->s_fs_info;
-}
-
/* helper function to cast into the data area of the leaf. */
#define btrfs_item_ptr(leaf, slot, type) \
((type *)(btrfs_leaf_data(leaf) + \
/* extent-tree.c */
-u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes);
+u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes);
-static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
+static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_fs_info *fs_info,
unsigned num_items)
{
- return root->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
+ return fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
}
/*
* Doing a truncate won't result in new nodes or leaves, just what we need for
* COW.
*/
-static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root,
+static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_fs_info *fs_info,
unsigned num_items)
{
- return root->nodesize * BTRFS_MAX_LEVEL * num_items;
+ return fs_info->nodesize * BTRFS_MAX_LEVEL * num_items;
}
int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
+ struct btrfs_fs_info *fs_info);
int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
+ struct btrfs_fs_info *fs_info);
void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
const u64 start);
void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg);
void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg);
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, unsigned long count);
-int btrfs_async_run_delayed_refs(struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info, unsigned long count);
+int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
unsigned long count, u64 transid, int wait);
-int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len);
+int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len);
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 bytenr,
+ struct btrfs_fs_info *fs_info, u64 bytenr,
u64 offset, int metadata, u64 *refs, u64 *flags);
-int btrfs_pin_extent(struct btrfs_root *root,
+int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num, int reserved);
-int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
+int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes);
-int btrfs_exclude_logged_extents(struct btrfs_root *root,
+int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb);
int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
u64 parent, int last_ref);
int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
u64 root_objectid, u64 owner,
u64 offset, u64 ram_bytes,
struct btrfs_key *ins);
int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
u64 root_objectid, u64 owner, u64 offset,
struct btrfs_key *ins);
int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes,
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref);
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes, u64 flags,
int level, int is_data);
int btrfs_free_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
u64 owner, u64 offset);
-int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len,
- int delalloc);
-int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
+int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
+ u64 start, u64 len, int delalloc);
+int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
u64 start, u64 len);
void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
+ struct btrfs_fs_info *fs_info);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
+ struct btrfs_fs_info *fs_info);
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset);
int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
+ struct btrfs_fs_info *fs_info);
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
+ struct btrfs_fs_info *fs_info);
int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
-int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
+ struct btrfs_fs_info *fs_info);
+int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr);
int btrfs_free_block_groups(struct btrfs_fs_info *info);
-int btrfs_read_block_groups(struct btrfs_root *root);
-int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr);
+int btrfs_read_block_groups(struct btrfs_fs_info *info);
+int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr);
int btrfs_make_block_group(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 bytes_used,
+ struct btrfs_fs_info *fs_info, u64 bytes_used,
u64 type, u64 chunk_objectid, u64 chunk_offset,
u64 size);
struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
struct btrfs_fs_info *fs_info,
const u64 chunk_offset);
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 group_start,
+ struct btrfs_fs_info *fs_info, u64 group_start,
struct extent_map *em);
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache);
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
+ struct btrfs_fs_info *fs_info);
u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
u64 len);
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
+ struct btrfs_fs_info *fs_info);
void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
struct inode *inode);
struct btrfs_block_rsv *rsv,
int nitems,
u64 *qgroup_reserved, bool use_global_rsv);
-void btrfs_subvolume_release_metadata(struct btrfs_root *root,
+void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv,
u64 qgroup_reserved);
int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes);
int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len);
void btrfs_delalloc_release_space(struct inode *inode, u64 start, u64 len);
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
-struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
+struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
unsigned short type);
-void btrfs_free_block_rsv(struct btrfs_root *root,
+void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv);
int btrfs_block_rsv_add(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, u64 num_bytes,
enum btrfs_reserve_flush_enum flush);
-int btrfs_block_rsv_check(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv, int min_factor);
+int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor);
int btrfs_block_rsv_refill(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, u64 min_reserved,
enum btrfs_reserve_flush_enum flush);
int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *dest, u64 num_bytes,
int min_factor);
-void btrfs_block_rsv_release(struct btrfs_root *root,
+void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
int btrfs_inc_block_group_ro(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
-void btrfs_dec_block_group_ro(struct btrfs_root *root,
- struct btrfs_block_group_cache *cache);
+void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
-int btrfs_error_unpin_extent_range(struct btrfs_root *root,
+int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
u64 start, u64 end);
-int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
+int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
u64 num_bytes, u64 *actual_bytes);
int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 type);
-int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range);
+ struct btrfs_fs_info *fs_info, u64 type);
+int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range);
int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
void btrfs_end_write_no_snapshoting(struct btrfs_root *root);
void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
void check_system_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- const u64 type);
+ struct btrfs_fs_info *fs_info, const u64 type);
u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
struct btrfs_fs_info *info, u64 start, u64 end);
struct extent_buffer **cow_ret, u64 new_root_objectid);
int btrfs_block_can_be_shared(struct btrfs_root *root,
struct extent_buffer *buf);
-void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
+void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
u32 data_size);
-void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
- u32 new_size, int from_end);
+void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path, u32 new_size, int from_end);
int btrfs_split_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
{
return btrfs_next_old_item(root, p, 0);
}
-int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
+int btrfs_leaf_free_space(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *leaf);
int __must_check btrfs_drop_snapshot(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
int update_ref, int for_reloc);
* anything except sleeping. This function is used to check the status of
* the fs.
*/
-static inline int btrfs_need_cleaner_sleep(struct btrfs_root *root)
+static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info)
{
- return (root->fs_info->sb->s_flags & MS_RDONLY ||
- btrfs_fs_closing(root->fs_info));
+ return fs_info->sb->s_flags & MS_RDONLY || btrfs_fs_closing(fs_info);
}
static inline void free_fs_info(struct btrfs_fs_info *fs_info)
/* root-item.c */
int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *tree_root,
+ struct btrfs_fs_info *fs_info,
u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
const char *name, int name_len);
int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *tree_root,
+ struct btrfs_fs_info *fs_info,
u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
const char *name, int name_len);
int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key,
struct btrfs_path *path, struct btrfs_root_item *root_item,
struct btrfs_key *root_key);
-int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
+int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info);
void btrfs_set_root_node(struct btrfs_root_item *item,
struct extent_buffer *node);
void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
/* uuid-tree.c */
int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans,
- struct btrfs_root *uuid_root, u8 *uuid, u8 type,
+ struct btrfs_fs_info *fs_info, u8 *uuid, u8 type,
u64 subid);
int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
- struct btrfs_root *uuid_root, u8 *uuid, u8 type,
+ struct btrfs_fs_info *fs_info, u8 *uuid, u8 type,
u64 subid);
int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
int (*check_func)(struct btrfs_fs_info *, u8 *, u8,
struct btrfs_path *path, u64 dir,
const char *name, u16 name_len,
int mod);
-int verify_dir_item(struct btrfs_root *root,
+int verify_dir_item(struct btrfs_fs_info *fs_info,
struct extent_buffer *leaf,
struct btrfs_dir_item *dir_item);
-struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
const char *name,
int name_len);
/* file-item.c */
struct btrfs_dio_private;
int btrfs_del_csums(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 bytenr, u64 len);
-int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
- struct bio *bio, u32 *dst);
-int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
- struct bio *bio, u64 logical_offset);
+ struct btrfs_fs_info *fs_info, u64 bytenr, u64 len);
+int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
+int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
+ u64 logical_offset);
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 objectid, u64 pos,
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_ordered_sum *sums);
-int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
- struct bio *bio, u64 file_start, int contig);
+int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
+ u64 file_start, int contig);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit);
void btrfs_extent_item_to_extent_map(struct inode *inode,
int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
void btrfs_invalidate_inodes(struct btrfs_root *root);
void btrfs_add_delayed_iput(struct inode *inode);
-void btrfs_run_delayed_iputs(struct btrfs_root *root);
+void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info);
int btrfs_prealloc_file_range(struct inode *inode, int mode,
u64 start, u64 num_bytes, u64 min_size,
loff_t actual_len, u64 *alloc_hint);
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
struct inode *inode, u64 start, u64 end);
int btrfs_release_file(struct inode *inode, struct file *file);
-int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
- struct page **pages, size_t num_pages,
- loff_t pos, size_t write_bytes,
+int btrfs_dirty_pages(struct inode *inode, struct page **pages,
+ size_t num_pages, loff_t pos, size_t write_bytes,
struct extent_state **cached);
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
- ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- size_t len, unsigned int flags);
int btrfs_clone_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, u64 len);
ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
/* super.c */
-int btrfs_parse_options(struct btrfs_root *root, char *options,
+int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
unsigned long new_flags);
int btrfs_sync_fs(struct super_block *sb, int wait);
/* Report first abort since mount */ \
if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \
&((trans)->fs_info->fs_state))) { \
- WARN(1, KERN_DEBUG \
- "BTRFS: Transaction aborted (error %d)\n", \
- (errno)); \
+ if ((errno) != -EIO) { \
+ WARN(1, KERN_DEBUG \
+ "BTRFS: Transaction aborted (error %d)\n", \
+ (errno)); \
+ } else { \
+ pr_debug("BTRFS: Transaction aborted (error %d)\n", \
+ (errno)); \
+ } \
} \
__btrfs_abort_transaction((trans), __func__, \
__LINE__, (errno)); \
#endif
/* relocation.c */
-int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start);
+int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start);
int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
u64 end, struct btrfs_scrub_progress *progress,
int readonly, int is_dev_replace);
-void btrfs_scrub_pause(struct btrfs_root *root);
-void btrfs_scrub_continue(struct btrfs_root *root);
+void btrfs_scrub_pause(struct btrfs_fs_info *fs_info);
+void btrfs_scrub_continue(struct btrfs_fs_info *fs_info);
int btrfs_scrub_cancel(struct btrfs_fs_info *info);
int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info,
struct btrfs_device *dev);
-int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
+int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
struct btrfs_scrub_progress *progress);
/* dev-replace.c */
/* reada.c */
struct reada_control {
- struct btrfs_root *root; /* tree to prefetch */
+ struct btrfs_fs_info *fs_info; /* tree to prefetch */
struct btrfs_key key_start;
struct btrfs_key key_end; /* exclusive */
atomic_t elems;
int btrfs_reada_wait(void *handle);
void btrfs_reada_detach(void *handle);
int btree_readahead_hook(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb, u64 start, int err);
+ struct extent_buffer *eb, int err);
static inline int is_fstree(u64 rootid)
{
#include <linux/falloc.h>
#include <linux/swap.h>
#include <linux/writeback.h>
-#include <linux/statfs.h>
#include <linux/compat.h>
#include <linux/slab.h>
#include <linux/btrfs.h>
static int __btrfs_add_inode_defrag(struct inode *inode,
struct inode_defrag *defrag)
{
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct inode_defrag *entry;
struct rb_node **p;
struct rb_node *parent = NULL;
int ret;
- p = &root->fs_info->defrag_inodes.rb_node;
+ p = &fs_info->defrag_inodes.rb_node;
while (*p) {
parent = *p;
entry = rb_entry(parent, struct inode_defrag, rb_node);
}
set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
rb_link_node(&defrag->rb_node, parent, p);
- rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
+ rb_insert_color(&defrag->rb_node, &fs_info->defrag_inodes);
return 0;
}
-static inline int __need_auto_defrag(struct btrfs_root *root)
+static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info)
{
- if (!btrfs_test_opt(root->fs_info, AUTO_DEFRAG))
+ if (!btrfs_test_opt(fs_info, AUTO_DEFRAG))
return 0;
- if (btrfs_fs_closing(root->fs_info))
+ if (btrfs_fs_closing(fs_info))
return 0;
return 1;
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
struct inode *inode)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct inode_defrag *defrag;
u64 transid;
int ret;
- if (!__need_auto_defrag(root))
+ if (!__need_auto_defrag(fs_info))
return 0;
if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
defrag->transid = transid;
defrag->root = root->root_key.objectid;
- spin_lock(&root->fs_info->defrag_inodes_lock);
+ spin_lock(&fs_info->defrag_inodes_lock);
if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) {
/*
* If we set IN_DEFRAG flag and evict the inode from memory,
} else {
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
}
- spin_unlock(&root->fs_info->defrag_inodes_lock);
+ spin_unlock(&fs_info->defrag_inodes_lock);
return 0;
}
static void btrfs_requeue_inode_defrag(struct inode *inode,
struct inode_defrag *defrag)
{
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
int ret;
- if (!__need_auto_defrag(root))
+ if (!__need_auto_defrag(fs_info))
goto out;
/*
* Here we don't check the IN_DEFRAG flag, because we need merge
* them together.
*/
- spin_lock(&root->fs_info->defrag_inodes_lock);
+ spin_lock(&fs_info->defrag_inodes_lock);
ret = __btrfs_add_inode_defrag(inode, defrag);
- spin_unlock(&root->fs_info->defrag_inodes_lock);
+ spin_unlock(&fs_info->defrag_inodes_lock);
if (ret)
goto out;
return;
&fs_info->fs_state))
break;
- if (!__need_auto_defrag(fs_info->tree_root))
+ if (!__need_auto_defrag(fs_info))
break;
/* find an inode to defrag */
* this also makes the decision about creating an inline extent vs
* doing real data extents, marking pages dirty and delalloc as required.
*/
-int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
- struct page **pages, size_t num_pages,
- loff_t pos, size_t write_bytes,
- struct extent_state **cached)
+int btrfs_dirty_pages(struct inode *inode, struct page **pages,
+ size_t num_pages, loff_t pos, size_t write_bytes,
+ struct extent_state **cached)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
int err = 0;
int i;
u64 num_bytes;
u64 end_pos = pos + write_bytes;
loff_t isize = i_size_read(inode);
- start_pos = pos & ~((u64)root->sectorsize - 1);
- num_bytes = round_up(write_bytes + pos - start_pos, root->sectorsize);
+ start_pos = pos & ~((u64) fs_info->sectorsize - 1);
+ num_bytes = round_up(write_bytes + pos - start_pos,
+ fs_info->sectorsize);
end_of_last_block = start_pos + num_bytes - 1;
err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
u32 extent_item_size,
int *key_inserted)
{
+ struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *leaf;
struct btrfs_file_extent_item *fi;
struct btrfs_key key;
u64 num_bytes = 0;
u64 extent_offset = 0;
u64 extent_end = 0;
+ u64 last_end = start;
int del_nr = 0;
int del_slot = 0;
int extent_type;
modify_tree = 0;
update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
- root == root->fs_info->tree_root);
+ root == fs_info->tree_root);
while (1) {
recow = 0;
ret = btrfs_lookup_file_extent(trans, root, path, ino,
* extent item in the call to setup_items_for_insert() later
* in this function.
*/
- if (extent_end == key.offset && extent_end >= search_start)
+ if (extent_end == key.offset && extent_end >= search_start) {
+ last_end = extent_end;
goto delete_extent_item;
+ }
if (extent_end <= search_start) {
path->slots[0]++;
btrfs_mark_buffer_dirty(leaf);
if (update_refs && disk_bytenr > 0) {
- ret = btrfs_inc_extent_ref(trans, root,
+ ret = btrfs_inc_extent_ref(trans, fs_info,
disk_bytenr, num_bytes, 0,
root->root_key.objectid,
new_key.objectid,
}
key.offset = start;
}
+ /*
+ * From here on out we will have actually dropped something, so
+ * last_end can be updated.
+ */
+ last_end = extent_end;
+
/*
* | ---- range to drop ----- |
* | -------- extent -------- |
memcpy(&new_key, &key, sizeof(new_key));
new_key.offset = end;
- btrfs_set_item_key_safe(root->fs_info, path, &new_key);
+ btrfs_set_item_key_safe(fs_info, path, &new_key);
extent_offset += end - key.offset;
btrfs_set_file_extent_offset(leaf, fi, extent_offset);
inode_sub_bytes(inode,
extent_end - key.offset);
extent_end = ALIGN(extent_end,
- root->sectorsize);
+ fs_info->sectorsize);
} else if (update_refs && disk_bytenr > 0) {
- ret = btrfs_free_extent(trans, root,
+ ret = btrfs_free_extent(trans, fs_info,
disk_bytenr, num_bytes, 0,
root->root_key.objectid,
key.objectid, key.offset -
if (!ret && replace_extent && leafs_visited == 1 &&
(path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING ||
path->locks[0] == BTRFS_WRITE_LOCK) &&
- btrfs_leaf_free_space(root, leaf) >=
+ btrfs_leaf_free_space(fs_info, leaf) >=
sizeof(struct btrfs_item) + extent_item_size) {
key.objectid = ino;
if (!replace_extent || !(*key_inserted))
btrfs_release_path(path);
if (drop_end)
- *drop_end = found ? min(end, extent_end) : end;
+ *drop_end = found ? min(end, last_end) : end;
return ret;
}
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
struct inode *inode, u64 start, u64 end)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_buffer *leaf;
struct btrfs_path *path;
ino, bytenr, orig_offset,
&other_start, &other_end)) {
new_key.offset = end;
- btrfs_set_item_key_safe(root->fs_info, path, &new_key);
+ btrfs_set_item_key_safe(fs_info, path, &new_key);
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
btrfs_set_file_extent_generation(leaf, fi,
trans->transid);
path->slots[0]++;
new_key.offset = start;
- btrfs_set_item_key_safe(root->fs_info, path, &new_key);
+ btrfs_set_item_key_safe(fs_info, path, &new_key);
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
extent_end - split);
btrfs_mark_buffer_dirty(leaf);
- ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
- root->root_key.objectid,
+ ret = btrfs_inc_extent_ref(trans, fs_info, bytenr, num_bytes,
+ 0, root->root_key.objectid,
ino, orig_offset);
if (ret) {
btrfs_abort_transaction(trans, ret);
extent_end = other_end;
del_slot = path->slots[0] + 1;
del_nr++;
- ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
+ ret = btrfs_free_extent(trans, fs_info, bytenr, num_bytes,
0, root->root_key.objectid,
ino, orig_offset);
if (ret) {
key.offset = other_start;
del_slot = path->slots[0];
del_nr++;
- ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
+ ret = btrfs_free_extent(trans, fs_info, bytenr, num_bytes,
0, root->root_key.objectid,
ino, orig_offset);
if (ret) {
u64 *lockstart, u64 *lockend,
struct extent_state **cached_state)
{
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
u64 start_pos;
u64 last_pos;
int i;
int ret = 0;
- start_pos = round_down(pos, root->sectorsize);
+ start_pos = round_down(pos, fs_info->sectorsize);
last_pos = start_pos
- + round_up(pos + write_bytes - start_pos, root->sectorsize) - 1;
+ + round_up(pos + write_bytes - start_pos,
+ fs_info->sectorsize) - 1;
if (start_pos < inode->i_size) {
struct btrfs_ordered_extent *ordered;
static noinline int check_can_nocow(struct inode *inode, loff_t pos,
size_t *write_bytes)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ordered_extent *ordered;
u64 lockstart, lockend;
if (!ret)
return -ENOSPC;
- lockstart = round_down(pos, root->sectorsize);
- lockend = round_up(pos + *write_bytes, root->sectorsize) - 1;
+ lockstart = round_down(pos, fs_info->sectorsize);
+ lockend = round_up(pos + *write_bytes,
+ fs_info->sectorsize) - 1;
while (1) {
lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
loff_t pos)
{
struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct page **pages = NULL;
struct extent_state *cached_state = NULL;
break;
}
- sector_offset = pos & (root->sectorsize - 1);
+ sector_offset = pos & (fs_info->sectorsize - 1);
reserve_bytes = round_up(write_bytes + sector_offset,
- root->sectorsize);
+ fs_info->sectorsize);
ret = btrfs_check_data_free_space(inode, pos, write_bytes);
if (ret < 0) {
PAGE_SIZE);
reserve_bytes = round_up(write_bytes +
sector_offset,
- root->sectorsize);
+ fs_info->sectorsize);
} else {
break;
}
copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
- num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
- reserve_bytes);
+ num_sectors = BTRFS_BYTES_TO_BLKS(fs_info, reserve_bytes);
dirty_sectors = round_up(copied + sector_offset,
- root->sectorsize);
- dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
- dirty_sectors);
+ fs_info->sectorsize);
+ dirty_sectors = BTRFS_BYTES_TO_BLKS(fs_info, dirty_sectors);
/*
* if we have trouble faulting in the pages, fall
* managed to copy.
*/
if (num_sectors > dirty_sectors) {
-
/* release everything except the sectors we dirtied */
release_bytes -= dirty_sectors <<
- root->fs_info->sb->s_blocksize_bits;
-
+ fs_info->sb->s_blocksize_bits;
if (copied > 0) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
} else {
u64 __pos;
- __pos = round_down(pos, root->sectorsize) +
+ __pos = round_down(pos,
+ fs_info->sectorsize) +
(dirty_pages << PAGE_SHIFT);
btrfs_delalloc_release_space(inode, __pos,
release_bytes);
}
release_bytes = round_up(copied + sector_offset,
- root->sectorsize);
+ fs_info->sectorsize);
if (copied > 0)
- ret = btrfs_dirty_pages(root, inode, pages,
- dirty_pages, pos, copied,
- NULL);
+ ret = btrfs_dirty_pages(inode, pages, dirty_pages,
+ pos, copied, NULL);
if (need_unlock)
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
lockstart, lockend, &cached_state,
btrfs_end_write_no_snapshoting(root);
if (only_release_metadata && copied > 0) {
- lockstart = round_down(pos, root->sectorsize);
- lockend = round_up(pos + copied, root->sectorsize) - 1;
+ lockstart = round_down(pos,
+ fs_info->sectorsize);
+ lockend = round_up(pos + copied,
+ fs_info->sectorsize) - 1;
set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
lockend, EXTENT_NORESERVE, NULL,
cond_resched();
balance_dirty_pages_ratelimited(inode->i_mapping);
- if (dirty_pages < (root->nodesize >> PAGE_SHIFT) + 1)
- btrfs_btree_balance_dirty(root);
+ if (dirty_pages < (fs_info->nodesize >> PAGE_SHIFT) + 1)
+ btrfs_btree_balance_dirty(fs_info);
pos += copied;
num_written += copied;
btrfs_delalloc_release_metadata(inode, release_bytes);
} else {
btrfs_delalloc_release_space(inode,
- round_down(pos, root->sectorsize),
+ round_down(pos, fs_info->sectorsize),
release_bytes);
}
}
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 start_pos;
u64 end_pos;
* although we have opened a file as writable, we have
* to stop this write operation to ensure FS consistency.
*/
- if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
+ if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
inode_unlock(inode);
err = -EROFS;
goto out;
pos = iocb->ki_pos;
count = iov_iter_count(from);
- start_pos = round_down(pos, root->sectorsize);
+ start_pos = round_down(pos, fs_info->sectorsize);
oldsize = i_size_read(inode);
if (start_pos > oldsize) {
/* Expand hole size to cover write data, preventing empty gap */
- end_pos = round_up(pos + count, root->sectorsize);
+ end_pos = round_up(pos + count,
+ fs_info->sectorsize);
err = btrfs_cont_expand(inode, oldsize, end_pos);
if (err) {
inode_unlock(inode);
goto out;
}
- if (start_pos > round_up(oldsize, root->sectorsize))
+ if (start_pos > round_up(oldsize, fs_info->sectorsize))
clean_page = 1;
}
{
struct dentry *dentry = file_dentry(file);
struct inode *inode = d_inode(dentry);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
struct btrfs_log_ctx ctx;
* commit does not start nor waits for ordered extents to complete.
*/
smp_mb();
- if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
+ if (btrfs_inode_in_log(inode, fs_info->generation) ||
(full_sync && BTRFS_I(inode)->last_trans <=
- root->fs_info->last_trans_committed) ||
+ fs_info->last_trans_committed) ||
(!btrfs_have_ordered_extents_in_range(inode, start, len) &&
BTRFS_I(inode)->last_trans
- <= root->fs_info->last_trans_committed)) {
+ <= fs_info->last_trans_committed)) {
/*
* We've had everything committed since the last time we were
* modified so clear this flag in case it was set for whatever
* which are indicated by ctx.io_err.
*/
if (ctx.io_err) {
- btrfs_end_transaction(trans, root);
+ btrfs_end_transaction(trans);
ret = ctx.io_err;
goto out;
}
if (!ret) {
ret = btrfs_sync_log(trans, root, &ctx);
if (!ret) {
- ret = btrfs_end_transaction(trans, root);
+ ret = btrfs_end_transaction(trans);
goto out;
}
}
if (!full_sync) {
ret = btrfs_wait_ordered_range(inode, start, len);
if (ret) {
- btrfs_end_transaction(trans, root);
+ btrfs_end_transaction(trans);
goto out;
}
}
- ret = btrfs_commit_transaction(trans, root);
+ ret = btrfs_commit_transaction(trans);
} else {
- ret = btrfs_end_transaction(trans, root);
+ ret = btrfs_end_transaction(trans);
}
out:
return ret > 0 ? -EIO : ret;
static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
struct btrfs_path *path, u64 offset, u64 end)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_buffer *leaf;
struct btrfs_file_extent_item *fi;
struct btrfs_key key;
int ret;
- if (btrfs_fs_incompat(root->fs_info, NO_HOLES))
+ if (btrfs_fs_incompat(fs_info, NO_HOLES))
goto out;
key.objectid = btrfs_ino(inode);
key.offset = offset;
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
- if (ret < 0)
+ if (ret <= 0) {
+ /*
+ * We should have dropped this offset, so if we find it then
+ * something has gone horribly wrong.
+ */
+ if (ret == 0)
+ ret = -EINVAL;
return ret;
- BUG_ON(!ret);
+ }
leaf = path->nodes[0];
if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) {
u64 num_bytes;
key.offset = offset;
- btrfs_set_item_key_safe(root->fs_info, path, &key);
+ btrfs_set_item_key_safe(fs_info, path, &key);
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
hole_em->block_start = EXTENT_MAP_HOLE;
hole_em->block_len = 0;
hole_em->orig_block_len = 0;
- hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
+ hole_em->bdev = fs_info->fs_devices->latest_bdev;
hole_em->compress_type = BTRFS_COMPRESS_NONE;
hole_em->generation = trans->transid;
static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_state *cached_state = NULL;
struct btrfs_path *path;
u64 tail_len;
u64 orig_start = offset;
u64 cur_offset;
- u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
+ u64 min_size = btrfs_calc_trans_metadata_size(fs_info, 1);
u64 drop_end;
int ret = 0;
int err = 0;
unsigned int rsv_count;
bool same_block;
- bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
+ bool no_holes = btrfs_fs_incompat(fs_info, NO_HOLES);
u64 ino_size;
bool truncated_block = false;
bool updated_inode = false;
return ret;
inode_lock(inode);
- ino_size = round_up(inode->i_size, root->sectorsize);
+ ino_size = round_up(inode->i_size, fs_info->sectorsize);
ret = find_first_non_hole(inode, &offset, &len);
if (ret < 0)
goto out_only_mutex;
goto out_only_mutex;
}
- lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
+ lockstart = round_up(offset, btrfs_inode_sectorsize(inode));
lockend = round_down(offset + len,
- BTRFS_I(inode)->root->sectorsize) - 1;
- same_block = (BTRFS_BYTES_TO_BLKS(root->fs_info, offset))
- == (BTRFS_BYTES_TO_BLKS(root->fs_info, offset + len - 1));
+ btrfs_inode_sectorsize(inode)) - 1;
+ same_block = (BTRFS_BYTES_TO_BLKS(fs_info, offset))
+ == (BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1));
/*
* We needn't truncate any block which is beyond the end of the file
* because we are sure there is no data there.
* Only do this if we are in the same block and we aren't doing the
* entire block.
*/
- if (same_block && len < root->sectorsize) {
+ if (same_block && len < fs_info->sectorsize) {
if (offset < ino_size) {
truncated_block = true;
ret = btrfs_truncate_block(inode, offset, len, 0);
goto out;
}
- rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
+ rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
if (!rsv) {
ret = -ENOMEM;
goto out_free;
}
- rsv->size = btrfs_calc_trunc_metadata_size(root, 1);
+ rsv->size = btrfs_calc_trans_metadata_size(fs_info, 1);
rsv->failfast = 1;
/*
goto out_free;
}
- ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
+ ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv,
min_size, 0);
BUG_ON(ret);
trans->block_rsv = rsv;
if (ret != -ENOSPC)
break;
- trans->block_rsv = &root->fs_info->trans_block_rsv;
+ trans->block_rsv = &fs_info->trans_block_rsv;
- if (cur_offset < ino_size) {
+ if (cur_offset < drop_end && cur_offset < ino_size) {
ret = fill_holes(trans, inode, path, cur_offset,
drop_end);
if (ret) {
+ /*
+ * If we failed then we didn't insert our hole
+ * entries for the area we dropped, so now the
+ * fs is corrupted, so we must abort the
+ * transaction.
+ */
+ btrfs_abort_transaction(trans, ret);
err = ret;
break;
}
break;
}
- btrfs_end_transaction(trans, root);
- btrfs_btree_balance_dirty(root);
+ btrfs_end_transaction(trans);
+ btrfs_btree_balance_dirty(fs_info);
trans = btrfs_start_transaction(root, rsv_count);
if (IS_ERR(trans)) {
break;
}
- ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
+ ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
rsv, min_size, 0);
BUG_ON(ret); /* shouldn't happen */
trans->block_rsv = rsv;
goto out_trans;
}
- trans->block_rsv = &root->fs_info->trans_block_rsv;
+ trans->block_rsv = &fs_info->trans_block_rsv;
/*
* If we are using the NO_HOLES feature we might have had already an
* hole that overlaps a part of the region [lockstart, lockend] and
if (cur_offset < ino_size && cur_offset < drop_end) {
ret = fill_holes(trans, inode, path, cur_offset, drop_end);
if (ret) {
+ /* Same comment as above. */
+ btrfs_abort_transaction(trans, ret);
err = ret;
goto out_trans;
}
inode_inc_iversion(inode);
inode->i_mtime = inode->i_ctime = current_time(inode);
- trans->block_rsv = &root->fs_info->trans_block_rsv;
+ trans->block_rsv = &fs_info->trans_block_rsv;
ret = btrfs_update_inode(trans, root, inode);
updated_inode = true;
- btrfs_end_transaction(trans, root);
- btrfs_btree_balance_dirty(root);
+ btrfs_end_transaction(trans);
+ btrfs_btree_balance_dirty(fs_info);
out_free:
btrfs_free_path(path);
- btrfs_free_block_rsv(root, rsv);
+ btrfs_free_block_rsv(fs_info, rsv);
out:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state, GFP_NOFS);
err = PTR_ERR(trans);
} else {
err = btrfs_update_inode(trans, root, inode);
- ret = btrfs_end_transaction(trans, root);
+ ret = btrfs_end_transaction(trans);
}
}
inode_unlock(inode);
u64 locked_end;
u64 actual_end = 0;
struct extent_map *em;
- int blocksize = BTRFS_I(inode)->root->sectorsize;
+ int blocksize = btrfs_inode_sectorsize(inode);
int ret;
alloc_start = round_down(offset, blocksize);
btrfs_ordered_update_i_size(inode, actual_end, NULL);
ret = btrfs_update_inode(trans, root, inode);
if (ret)
- btrfs_end_transaction(trans, root);
+ btrfs_end_transaction(trans);
else
- ret = btrfs_end_transaction(trans, root);
+ ret = btrfs_end_transaction(trans);
}
}
out_unlock:
static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
{
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
u64 lockstart;
*/
start = max_t(loff_t, 0, *offset);
- lockstart = round_down(start, root->sectorsize);
- lockend = round_up(i_size_read(inode), root->sectorsize);
+ lockstart = round_down(start, fs_info->sectorsize);
+ lockend = round_up(i_size_read(inode),
+ fs_info->sectorsize);
if (lockend <= lockstart)
- lockend = lockstart + root->sectorsize;
+ lockend = lockstart + fs_info->sectorsize;
lockend--;
len = lockend - lockstart + 1;
#ifdef CONFIG_COMPAT
.compat_ioctl = btrfs_compat_ioctl,
#endif
- .copy_file_range = btrfs_copy_file_range,
.clone_file_range = btrfs_clone_file_range,
.dedupe_file_range = btrfs_dedupe_file_range,
};
#include <linux/namei.h>
#include <linux/swap.h>
#include <linux/writeback.h>
-#include <linux/statfs.h>
#include <linux/compat.h>
#include <linux/bit_spinlock.h>
#include <linux/security.h>
static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
{
struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_inode *ip = BTRFS_I(inode);
struct btrfs_root *root = ip->root;
struct btrfs_trans_handle *trans;
ip->flags |= BTRFS_INODE_COMPRESS;
ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
- if (root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
+ if (fs_info->compress_type == BTRFS_COMPRESS_LZO)
comp = "lzo";
else
comp = "zlib";
inode->i_ctime = current_time(inode);
ret = btrfs_update_inode(trans, root, inode);
- btrfs_end_transaction(trans, root);
+ btrfs_end_transaction(trans);
out_drop:
if (ret) {
ip->flags = ip_oldflags;
static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(file_inode(file)->i_sb);
+ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_device *device;
struct request_queue *q;
struct fstrim_range range;
range.len = min(range.len, total_bytes - range.start);
range.minlen = max(range.minlen, minlen);
- ret = btrfs_trim_fs(fs_info->tree_root, &range);
+ ret = btrfs_trim_fs(fs_info, &range);
if (ret < 0)
return ret;
u64 *async_transid,
struct btrfs_qgroup_inherit *inherit)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
struct btrfs_trans_handle *trans;
struct btrfs_key key;
struct btrfs_root_item *root_item;
if (!root_item)
return -ENOMEM;
- ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
+ ret = btrfs_find_free_objectid(fs_info->tree_root, &objectid);
if (ret)
goto fail_free;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- btrfs_subvolume_release_metadata(root, &block_rsv,
+ btrfs_subvolume_release_metadata(fs_info, &block_rsv,
qgroup_reserved);
goto fail_free;
}
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
- ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid, inherit);
+ ret = btrfs_qgroup_inherit(trans, fs_info, 0, objectid, inherit);
if (ret)
goto fail;
goto fail;
}
- memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
+ memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header));
btrfs_set_header_bytenr(leaf, leaf->start);
btrfs_set_header_generation(leaf, trans->transid);
btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
btrfs_set_header_owner(leaf, objectid);
- write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(),
- BTRFS_FSID_SIZE);
- write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
- btrfs_header_chunk_tree_uuid(leaf),
- BTRFS_UUID_SIZE);
+ write_extent_buffer_fsid(leaf, fs_info->fsid);
+ write_extent_buffer_chunk_tree_uuid(leaf, fs_info->chunk_tree_uuid);
btrfs_mark_buffer_dirty(leaf);
inode_item = &root_item->inode;
btrfs_set_stack_inode_generation(inode_item, 1);
btrfs_set_stack_inode_size(inode_item, 3);
btrfs_set_stack_inode_nlink(inode_item, 1);
- btrfs_set_stack_inode_nbytes(inode_item, root->nodesize);
+ btrfs_set_stack_inode_nbytes(inode_item,
+ fs_info->nodesize);
btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
btrfs_set_root_flags(root_item, 0);
key.objectid = objectid;
key.offset = 0;
key.type = BTRFS_ROOT_ITEM_KEY;
- ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
+ ret = btrfs_insert_root(trans, fs_info->tree_root, &key,
root_item);
if (ret)
goto fail;
key.offset = (u64)-1;
- new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
+ new_root = btrfs_read_fs_root_no_name(fs_info, &key);
if (IS_ERR(new_root)) {
ret = PTR_ERR(new_root);
btrfs_abort_transaction(trans, ret);
ret = btrfs_update_inode(trans, root, dir);
BUG_ON(ret);
- ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
+ ret = btrfs_add_root_ref(trans, fs_info,
objectid, root->root_key.objectid,
btrfs_ino(dir), index, name, namelen);
BUG_ON(ret);
- ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root,
- root_item->uuid, BTRFS_UUID_KEY_SUBVOL,
- objectid);
+ ret = btrfs_uuid_tree_add(trans, fs_info, root_item->uuid,
+ BTRFS_UUID_KEY_SUBVOL, objectid);
if (ret)
btrfs_abort_transaction(trans, ret);
kfree(root_item);
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
- btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved);
+ btrfs_subvolume_release_metadata(fs_info, &block_rsv, qgroup_reserved);
if (async_transid) {
*async_transid = trans->transid;
- err = btrfs_commit_transaction_async(trans, root, 1);
+ err = btrfs_commit_transaction_async(trans, 1);
if (err)
- err = btrfs_commit_transaction(trans, root);
+ err = btrfs_commit_transaction(trans);
} else {
- err = btrfs_commit_transaction(trans, root);
+ err = btrfs_commit_transaction(trans);
}
if (err && !ret)
ret = err;
u64 *async_transid, bool readonly,
struct btrfs_qgroup_inherit *inherit)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
struct inode *inode;
struct btrfs_pending_snapshot *pending_snapshot;
struct btrfs_trans_handle *trans;
goto fail;
}
- spin_lock(&root->fs_info->trans_lock);
+ spin_lock(&fs_info->trans_lock);
list_add(&pending_snapshot->list,
&trans->transaction->pending_snapshots);
- spin_unlock(&root->fs_info->trans_lock);
+ spin_unlock(&fs_info->trans_lock);
if (async_transid) {
*async_transid = trans->transid;
- ret = btrfs_commit_transaction_async(trans,
- root->fs_info->extent_root, 1);
+ ret = btrfs_commit_transaction_async(trans, 1);
if (ret)
- ret = btrfs_commit_transaction(trans, root);
+ ret = btrfs_commit_transaction(trans);
} else {
- ret = btrfs_commit_transaction(trans,
- root->fs_info->extent_root);
+ ret = btrfs_commit_transaction(trans);
}
if (ret)
goto fail;
d_instantiate(dentry, inode);
ret = 0;
fail:
- btrfs_subvolume_release_metadata(BTRFS_I(dir)->root,
+ btrfs_subvolume_release_metadata(fs_info,
&pending_snapshot->block_rsv,
pending_snapshot->qgroup_reserved);
dec_and_free:
* sys_mkdirat and vfs_mkdir, but we only do a single component lookup
* inside this filesystem so it's quite a bit simpler.
*/
- static noinline int btrfs_mksubvol(struct path *parent,
+ static noinline int btrfs_mksubvol(const struct path *parent,
char *name, int namelen,
struct btrfs_root *snap_src,
u64 *async_transid, bool readonly,
struct btrfs_qgroup_inherit *inherit)
{
- struct inode *dir = d_inode(parent->dentry);
+ struct inode *dir = d_inode(parent->dentry);
+ struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
struct dentry *dentry;
int error;
if (error)
goto out_dput;
- down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
+ down_read(&fs_info->subvol_sem);
if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0)
goto out_up_read;
if (!error)
fsnotify_mkdir(dir, dentry);
out_up_read:
- up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
+ up_read(&fs_info->subvol_sem);
out_dput:
dput(dentry);
out_unlock:
struct btrfs_ioctl_defrag_range_args *range,
u64 newer_than, unsigned long max_to_defrag)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct file_ra_state *ra = NULL;
unsigned long last_index;
if (!(inode->i_sb->s_flags & MS_ACTIVE))
break;
- if (btrfs_defrag_cancelled(root->fs_info)) {
- btrfs_debug(root->fs_info, "defrag_file cancelled");
+ if (btrfs_defrag_cancelled(fs_info)) {
+ btrfs_debug(fs_info, "defrag_file cancelled");
ret = -EAGAIN;
break;
}
* we have to make sure the IO is actually started and that
* ordered extents get created before we return
*/
- atomic_inc(&root->fs_info->async_submit_draining);
- while (atomic_read(&root->fs_info->nr_async_submits) ||
- atomic_read(&root->fs_info->async_delalloc_pages)) {
- wait_event(root->fs_info->async_submit_wait,
- (atomic_read(&root->fs_info->nr_async_submits) == 0 &&
- atomic_read(&root->fs_info->async_delalloc_pages) == 0));
+ atomic_inc(&fs_info->async_submit_draining);
+ while (atomic_read(&fs_info->nr_async_submits) ||
+ atomic_read(&fs_info->async_delalloc_pages)) {
+ wait_event(fs_info->async_submit_wait,
+ (atomic_read(&fs_info->nr_async_submits) == 0 &&
+ atomic_read(&fs_info->async_delalloc_pages) == 0));
}
- atomic_dec(&root->fs_info->async_submit_draining);
+ atomic_dec(&fs_info->async_submit_draining);
}
if (range->compress_type == BTRFS_COMPRESS_LZO) {
- btrfs_set_fs_incompat(root->fs_info, COMPRESS_LZO);
+ btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
}
ret = defrag_count;
static noinline int btrfs_ioctl_resize(struct file *file,
void __user *arg)
{
+ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
u64 new_size;
u64 old_size;
u64 devid = 1;
- struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ioctl_vol_args *vol_args;
struct btrfs_trans_handle *trans;
struct btrfs_device *device = NULL;
if (ret)
return ret;
- if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
- 1)) {
+ if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
mnt_drop_write_file(file);
return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
}
- mutex_lock(&root->fs_info->volume_mutex);
+ mutex_lock(&fs_info->volume_mutex);
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args)) {
ret = PTR_ERR(vol_args);
ret = -EINVAL;
goto out_free;
}
- btrfs_info(root->fs_info, "resizing devid %llu", devid);
+ btrfs_info(fs_info, "resizing devid %llu", devid);
}
- device = btrfs_find_device(root->fs_info, devid, NULL, NULL);
+ device = btrfs_find_device(fs_info, devid, NULL, NULL);
if (!device) {
- btrfs_info(root->fs_info, "resizer unable to find device %llu",
- devid);
+ btrfs_info(fs_info, "resizer unable to find device %llu",
+ devid);
ret = -ENODEV;
goto out_free;
}
if (!device->writeable) {
- btrfs_info(root->fs_info,
+ btrfs_info(fs_info,
"resizer unable to apply on readonly device %llu",
devid);
ret = -EPERM;
goto out_free;
}
- new_size = div_u64(new_size, root->sectorsize);
- new_size *= root->sectorsize;
+ new_size = div_u64(new_size, fs_info->sectorsize);
+ new_size *= fs_info->sectorsize;
- btrfs_info_in_rcu(root->fs_info, "new size for %s is %llu",
- rcu_str_deref(device->name), new_size);
+ btrfs_info_in_rcu(fs_info, "new size for %s is %llu",
+ rcu_str_deref(device->name), new_size);
if (new_size > old_size) {
trans = btrfs_start_transaction(root, 0);
goto out_free;
}
ret = btrfs_grow_device(trans, device, new_size);
- btrfs_commit_transaction(trans, root);
+ btrfs_commit_transaction(trans);
} else if (new_size < old_size) {
ret = btrfs_shrink_device(device, new_size);
} /* equal, nothing need to do */
out_free:
kfree(vol_args);
out:
- mutex_unlock(&root->fs_info->volume_mutex);
- atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
+ mutex_unlock(&fs_info->volume_mutex);
+ atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
mnt_drop_write_file(file);
return ret;
}
void __user *arg)
{
struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
u64 flags = 0;
if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
return -EINVAL;
- down_read(&root->fs_info->subvol_sem);
+ down_read(&fs_info->subvol_sem);
if (btrfs_root_readonly(root))
flags |= BTRFS_SUBVOL_RDONLY;
- up_read(&root->fs_info->subvol_sem);
+ up_read(&fs_info->subvol_sem);
if (copy_to_user(arg, &flags, sizeof(flags)))
ret = -EFAULT;
void __user *arg)
{
struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
u64 root_flags;
goto out_drop_write;
}
- down_write(&root->fs_info->subvol_sem);
+ down_write(&fs_info->subvol_sem);
/* nothing to do */
if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root))
spin_unlock(&root->root_item_lock);
} else {
spin_unlock(&root->root_item_lock);
- btrfs_warn(root->fs_info,
- "Attempt to set subvolume %llu read-write during send",
- root->root_key.objectid);
+ btrfs_warn(fs_info,
+ "Attempt to set subvolume %llu read-write during send",
+ root->root_key.objectid);
ret = -EPERM;
goto out_drop_sem;
}
goto out_reset;
}
- ret = btrfs_update_root(trans, root->fs_info->tree_root,
+ ret = btrfs_update_root(trans, fs_info->tree_root,
&root->root_key, &root->root_item);
- btrfs_commit_transaction(trans, root);
+ btrfs_commit_transaction(trans);
out_reset:
if (ret)
btrfs_set_root_flags(&root->root_item, root_flags);
out_drop_sem:
- up_write(&root->fs_info->subvol_sem);
+ up_write(&fs_info->subvol_sem);
out_drop_write:
mnt_drop_write_file(file);
out:
*/
static noinline int may_destroy_subvol(struct btrfs_root *root)
{
+ struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_path *path;
struct btrfs_dir_item *di;
struct btrfs_key key;
return -ENOMEM;
/* Make sure this root isn't set as the default subvol */
- dir_id = btrfs_super_root_dir(root->fs_info->super_copy);
- di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, path,
+ dir_id = btrfs_super_root_dir(fs_info->super_copy);
+ di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path,
dir_id, "default", 7, 0);
if (di && !IS_ERR(di)) {
btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
if (key.objectid == root->root_key.objectid) {
ret = -EPERM;
- btrfs_err(root->fs_info,
+ btrfs_err(fs_info,
"deleting default subvolume %llu is not allowed",
key.objectid);
goto out;
key.type = BTRFS_ROOT_REF_KEY;
key.offset = (u64)-1;
- ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
- &key, path, 0, 0);
+ ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
if (ret < 0)
goto out;
BUG_ON(ret == 0);
size_t *buf_size,
char __user *ubuf)
{
+ struct btrfs_fs_info *info = btrfs_sb(inode->i_sb);
struct btrfs_root *root;
struct btrfs_key key;
struct btrfs_path *path;
- struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info;
int ret;
int num_found = 0;
unsigned long sk_offset = 0;
void __user *arg)
{
struct dentry *parent = file->f_path.dentry;
+ struct btrfs_fs_info *fs_info = btrfs_sb(parent->d_sb);
struct dentry *dentry;
struct inode *dir = d_inode(parent);
struct inode *inode;
* rmdir(2).
*/
err = -EPERM;
- if (!btrfs_test_opt(root->fs_info, USER_SUBVOL_RM_ALLOWED))
+ if (!btrfs_test_opt(fs_info, USER_SUBVOL_RM_ALLOWED))
goto out_dput;
/*
spin_unlock(&dest->root_item_lock);
} else {
spin_unlock(&dest->root_item_lock);
- btrfs_warn(root->fs_info,
- "Attempt to delete subvolume %llu during send",
- dest->root_key.objectid);
+ btrfs_warn(fs_info,
+ "Attempt to delete subvolume %llu during send",
+ dest->root_key.objectid);
err = -EPERM;
goto out_unlock_inode;
}
- down_write(&root->fs_info->subvol_sem);
+ down_write(&fs_info->subvol_sem);
err = may_destroy_subvol(dest);
if (err)
if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {
ret = btrfs_insert_orphan_item(trans,
- root->fs_info->tree_root,
+ fs_info->tree_root,
dest->root_key.objectid);
if (ret) {
btrfs_abort_transaction(trans, ret);
}
}
- ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root,
- dest->root_item.uuid, BTRFS_UUID_KEY_SUBVOL,
+ ret = btrfs_uuid_tree_rem(trans, fs_info, dest->root_item.uuid,
+ BTRFS_UUID_KEY_SUBVOL,
dest->root_key.objectid);
if (ret && ret != -ENOENT) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
- ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root,
+ ret = btrfs_uuid_tree_rem(trans, fs_info,
dest->root_item.received_uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
dest->root_key.objectid);
out_end_trans:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
- ret = btrfs_end_transaction(trans, root);
+ ret = btrfs_end_transaction(trans);
if (ret && !err)
err = ret;
inode->i_flags |= S_DEAD;
out_release:
- btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved);
+ btrfs_subvolume_release_metadata(fs_info, &block_rsv, qgroup_reserved);
out_up_write:
- up_write(&root->fs_info->subvol_sem);
+ up_write(&fs_info->subvol_sem);
if (err) {
spin_lock(&dest->root_item_lock);
root_flags = btrfs_root_flags(&dest->root_item);
return ret;
}
-static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
+static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
{
struct btrfs_ioctl_vol_args *vol_args;
int ret;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
- 1)) {
+ if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1))
return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
- }
- mutex_lock(&root->fs_info->volume_mutex);
+ mutex_lock(&fs_info->volume_mutex);
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args)) {
ret = PTR_ERR(vol_args);
}
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
- ret = btrfs_init_new_device(root, vol_args->name);
+ ret = btrfs_init_new_device(fs_info, vol_args->name);
if (!ret)
- btrfs_info(root->fs_info, "disk added %s",vol_args->name);
+ btrfs_info(fs_info, "disk added %s", vol_args->name);
kfree(vol_args);
out:
- mutex_unlock(&root->fs_info->volume_mutex);
- atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
+ mutex_unlock(&fs_info->volume_mutex);
+ atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
return ret;
}
static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
{
- struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
+ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ioctl_vol_args_v2 *vol_args;
int ret;
if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED)
return -EOPNOTSUPP;
- if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
- 1)) {
+ if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
goto out;
}
- mutex_lock(&root->fs_info->volume_mutex);
+ mutex_lock(&fs_info->volume_mutex);
if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) {
- ret = btrfs_rm_device(root, NULL, vol_args->devid);
+ ret = btrfs_rm_device(fs_info, NULL, vol_args->devid);
} else {
vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
- ret = btrfs_rm_device(root, vol_args->name, 0);
+ ret = btrfs_rm_device(fs_info, vol_args->name, 0);
}
- mutex_unlock(&root->fs_info->volume_mutex);
- atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
+ mutex_unlock(&fs_info->volume_mutex);
+ atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
if (!ret) {
if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID)
- btrfs_info(root->fs_info, "device deleted: id %llu",
+ btrfs_info(fs_info, "device deleted: id %llu",
vol_args->devid);
else
- btrfs_info(root->fs_info, "device deleted: %s",
+ btrfs_info(fs_info, "device deleted: %s",
vol_args->name);
}
out:
static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
{
- struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
+ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ioctl_vol_args *vol_args;
int ret;
if (ret)
return ret;
- if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
- 1)) {
+ if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
goto out_drop_write;
}
}
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
- mutex_lock(&root->fs_info->volume_mutex);
- ret = btrfs_rm_device(root, vol_args->name, 0);
- mutex_unlock(&root->fs_info->volume_mutex);
+ mutex_lock(&fs_info->volume_mutex);
+ ret = btrfs_rm_device(fs_info, vol_args->name, 0);
+ mutex_unlock(&fs_info->volume_mutex);
if (!ret)
- btrfs_info(root->fs_info, "disk deleted %s",vol_args->name);
+ btrfs_info(fs_info, "disk deleted %s", vol_args->name);
kfree(vol_args);
out:
- atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
+ atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
out_drop_write:
mnt_drop_write_file(file);
return ret;
}
-static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
+static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
+ void __user *arg)
{
struct btrfs_ioctl_fs_info_args *fi_args;
struct btrfs_device *device;
- struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
int ret = 0;
fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
mutex_lock(&fs_devices->device_list_mutex);
fi_args->num_devices = fs_devices->num_devices;
- memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid));
+ memcpy(&fi_args->fsid, fs_info->fsid, sizeof(fi_args->fsid));
list_for_each_entry(device, &fs_devices->devices, dev_list) {
if (device->devid > fi_args->max_id)
}
mutex_unlock(&fs_devices->device_list_mutex);
- fi_args->nodesize = root->fs_info->super_copy->nodesize;
- fi_args->sectorsize = root->fs_info->super_copy->sectorsize;
- fi_args->clone_alignment = root->fs_info->super_copy->sectorsize;
+ fi_args->nodesize = fs_info->super_copy->nodesize;
+ fi_args->sectorsize = fs_info->super_copy->sectorsize;
+ fi_args->clone_alignment = fs_info->super_copy->sectorsize;
if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
ret = -EFAULT;
return ret;
}
-static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
+static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
+ void __user *arg)
{
struct btrfs_ioctl_dev_info_args *di_args;
struct btrfs_device *dev;
- struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
int ret = 0;
char *s_uuid = NULL;
s_uuid = di_args->uuid;
mutex_lock(&fs_devices->device_list_mutex);
- dev = btrfs_find_device(root->fs_info, di_args->devid, s_uuid, NULL);
+ dev = btrfs_find_device(fs_info, di_args->devid, s_uuid, NULL);
if (!dev) {
ret = -ENODEV;
ret = btrfs_update_inode(trans, root, inode);
if (ret) {
btrfs_abort_transaction(trans, ret);
- btrfs_end_transaction(trans, root);
+ btrfs_end_transaction(trans);
goto out;
}
- ret = btrfs_end_transaction(trans, root);
+ ret = btrfs_end_transaction(trans);
out:
return ret;
}
const u64 size,
char *inline_data)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(dst->i_sb);
struct btrfs_root *root = BTRFS_I(dst)->root;
const u64 aligned_end = ALIGN(new_key->offset + datal,
- root->sectorsize);
+ fs_info->sectorsize);
int ret;
struct btrfs_key key;
const u64 off, const u64 olen, const u64 olen_aligned,
const u64 destoff, int no_time_update)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path = NULL;
struct extent_buffer *leaf;
u64 last_dest_end = destoff;
ret = -ENOMEM;
- buf = kmalloc(root->nodesize, GFP_KERNEL | __GFP_NOWARN);
+ buf = kmalloc(fs_info->nodesize, GFP_KERNEL | __GFP_NOWARN);
if (!buf) {
- buf = vmalloc(root->nodesize);
+ buf = vmalloc(fs_info->nodesize);
if (!buf)
return ret;
}
if (ret != -EOPNOTSUPP)
btrfs_abort_transaction(trans,
ret);
- btrfs_end_transaction(trans, root);
+ btrfs_end_transaction(trans);
goto out;
}
&new_key, size);
if (ret) {
btrfs_abort_transaction(trans, ret);
- btrfs_end_transaction(trans, root);
+ btrfs_end_transaction(trans);
goto out;
}
if (disko) {
inode_add_bytes(inode, datal);
- ret = btrfs_inc_extent_ref(trans, root,
+ ret = btrfs_inc_extent_ref(trans,
+ fs_info,
disko, diskl, 0,
root->root_key.objectid,
btrfs_ino(inode),
if (ret) {
btrfs_abort_transaction(trans,
ret);
- btrfs_end_transaction(trans,
- root);
+ btrfs_end_transaction(trans);
goto out;
}
if (comp && (skip || trim)) {
ret = -EINVAL;
- btrfs_end_transaction(trans, root);
+ btrfs_end_transaction(trans);
goto out;
}
size -= skip + trim;
if (ret != -EOPNOTSUPP)
btrfs_abort_transaction(trans,
ret);
- btrfs_end_transaction(trans, root);
+ btrfs_end_transaction(trans);
goto out;
}
leaf = path->nodes[0];
btrfs_release_path(path);
last_dest_end = ALIGN(new_key.offset + datal,
- root->sectorsize);
+ fs_info->sectorsize);
ret = clone_finish_inode_update(trans, inode,
last_dest_end,
destoff, olen,
if (ret) {
if (ret != -EOPNOTSUPP)
btrfs_abort_transaction(trans, ret);
- btrfs_end_transaction(trans, root);
+ btrfs_end_transaction(trans);
goto out;
}
clone_update_extent_map(inode, trans, NULL, last_dest_end,
{
struct inode *inode = file_inode(file);
struct inode *src = file_inode(file_src);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
u64 len = olen;
- u64 bs = root->fs_info->sb->s_blocksize;
+ u64 bs = fs_info->sb->s_blocksize;
int same_inode = src == inode;
/*
return ret;
}
- ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- size_t len, unsigned int flags)
- {
- ssize_t ret;
-
- ret = btrfs_clone_files(file_out, file_in, pos_in, len, pos_out);
- if (ret == 0)
- ret = len;
- return ret;
- }
-
int btrfs_clone_file_range(struct file *src_file, loff_t off,
struct file *dst_file, loff_t destoff, u64 len)
{
static long btrfs_ioctl_trans_start(struct file *file)
{
struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
int ret;
if (ret)
goto out;
- atomic_inc(&root->fs_info->open_ioctl_trans);
+ atomic_inc(&fs_info->open_ioctl_trans);
ret = -ENOMEM;
trans = btrfs_start_ioctl_transaction(root);
return 0;
out_drop:
- atomic_dec(&root->fs_info->open_ioctl_trans);
+ atomic_dec(&fs_info->open_ioctl_trans);
mnt_drop_write_file(file);
out:
return ret;
static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
{
struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_root *new_root;
struct btrfs_dir_item *di;
location.type = BTRFS_ROOT_ITEM_KEY;
location.offset = (u64)-1;
- new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
+ new_root = btrfs_read_fs_root_no_name(fs_info, &location);
if (IS_ERR(new_root)) {
ret = PTR_ERR(new_root);
goto out;
goto out;
}
- dir_id = btrfs_super_root_dir(root->fs_info->super_copy);
- di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path,
+ dir_id = btrfs_super_root_dir(fs_info->super_copy);
+ di = btrfs_lookup_dir_item(trans, fs_info->tree_root, path,
dir_id, "default", 7, 1);
if (IS_ERR_OR_NULL(di)) {
btrfs_free_path(path);
- btrfs_end_transaction(trans, root);
- btrfs_err(new_root->fs_info,
+ btrfs_end_transaction(trans);
+ btrfs_err(fs_info,
"Umm, you don't have the default diritem, this isn't going to work");
ret = -ENOENT;
goto out;
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path);
- btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL);
- btrfs_end_transaction(trans, root);
+ btrfs_set_fs_incompat(fs_info, DEFAULT_SUBVOL);
+ btrfs_end_transaction(trans);
out:
mnt_drop_write_file(file);
return ret;
}
}
-static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
+static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
+ void __user *arg)
{
struct btrfs_ioctl_space_args space_args;
struct btrfs_ioctl_space_info space;
info = NULL;
rcu_read_lock();
- list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
+ list_for_each_entry_rcu(tmp, &fs_info->space_info,
list) {
if (tmp->flags == types[i]) {
info = tmp;
info = NULL;
rcu_read_lock();
- list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
+ list_for_each_entry_rcu(tmp, &fs_info->space_info,
list) {
if (tmp->flags == types[i]) {
info = tmp;
* Add global block reserve
*/
if (slot_count) {
- struct btrfs_block_rsv *block_rsv = &root->fs_info->global_block_rsv;
+ struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
spin_lock(&block_rsv->lock);
space.total_bytes = block_rsv->size;
return -EINVAL;
file->private_data = NULL;
- btrfs_end_transaction(trans, root);
+ btrfs_end_transaction(trans);
atomic_dec(&root->fs_info->open_ioctl_trans);
goto out;
}
transid = trans->transid;
- ret = btrfs_commit_transaction_async(trans, root, 0);
+ ret = btrfs_commit_transaction_async(trans, 0);
if (ret) {
- btrfs_end_transaction(trans, root);
+ btrfs_end_transaction(trans);
return ret;
}
out:
return 0;
}
-static noinline long btrfs_ioctl_wait_sync(struct btrfs_root *root,
+static noinline long btrfs_ioctl_wait_sync(struct btrfs_fs_info *fs_info,
void __user *argp)
{
u64 transid;
} else {
transid = 0; /* current trans */
}
- return btrfs_wait_for_commit(root, transid);
+ return btrfs_wait_for_commit(fs_info, transid);
}
static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
{
- struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
+ struct btrfs_fs_info *fs_info = btrfs_sb(file_inode(file)->i_sb);
struct btrfs_ioctl_scrub_args *sa;
int ret;
goto out;
}
- ret = btrfs_scrub_dev(root->fs_info, sa->devid, sa->start, sa->end,
+ ret = btrfs_scrub_dev(fs_info, sa->devid, sa->start, sa->end,
&sa->progress, sa->flags & BTRFS_SCRUB_READONLY,
0);
return ret;
}
-static long btrfs_ioctl_scrub_cancel(struct btrfs_root *root, void __user *arg)
+static long btrfs_ioctl_scrub_cancel(struct btrfs_fs_info *fs_info)
{
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- return btrfs_scrub_cancel(root->fs_info);
+ return btrfs_scrub_cancel(fs_info);
}
-static long btrfs_ioctl_scrub_progress(struct btrfs_root *root,
+static long btrfs_ioctl_scrub_progress(struct btrfs_fs_info *fs_info,
void __user *arg)
{
struct btrfs_ioctl_scrub_args *sa;
if (IS_ERR(sa))
return PTR_ERR(sa);
- ret = btrfs_scrub_progress(root, sa->devid, &sa->progress);
+ ret = btrfs_scrub_progress(fs_info, sa->devid, &sa->progress);
if (copy_to_user(arg, sa, sizeof(*sa)))
ret = -EFAULT;
return ret;
}
-static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root,
+static long btrfs_ioctl_get_dev_stats(struct btrfs_fs_info *fs_info,
void __user *arg)
{
struct btrfs_ioctl_get_dev_stats *sa;
return -EPERM;
}
- ret = btrfs_get_dev_stats(root, sa);
+ ret = btrfs_get_dev_stats(fs_info, sa);
if (copy_to_user(arg, sa, sizeof(*sa)))
ret = -EFAULT;
return ret;
}
-static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg)
+static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info,
+ void __user *arg)
{
struct btrfs_ioctl_dev_replace_args *p;
int ret;
switch (p->cmd) {
case BTRFS_IOCTL_DEV_REPLACE_CMD_START:
- if (root->fs_info->sb->s_flags & MS_RDONLY) {
+ if (fs_info->sb->s_flags & MS_RDONLY) {
ret = -EROFS;
goto out;
}
if (atomic_xchg(
- &root->fs_info->mutually_exclusive_operation_running,
- 1)) {
+ &fs_info->mutually_exclusive_operation_running, 1)) {
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
} else {
- ret = btrfs_dev_replace_by_ioctl(root, p);
+ ret = btrfs_dev_replace_by_ioctl(fs_info, p);
atomic_set(
- &root->fs_info->mutually_exclusive_operation_running,
- 0);
+ &fs_info->mutually_exclusive_operation_running, 0);
}
break;
case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS:
- btrfs_dev_replace_status(root->fs_info, p);
+ btrfs_dev_replace_status(fs_info, p);
ret = 0;
break;
case BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL:
- ret = btrfs_dev_replace_cancel(root->fs_info, p);
+ ret = btrfs_dev_replace_cancel(fs_info, p);
break;
default:
ret = -EINVAL;
return 0;
}
-static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
+static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
void __user *arg)
{
int ret = 0;
return -EPERM;
loi = memdup_user(arg, sizeof(*loi));
- if (IS_ERR(loi)) {
- ret = PTR_ERR(loi);
- loi = NULL;
- goto out;
- }
+ if (IS_ERR(loi))
+ return PTR_ERR(loi);
path = btrfs_alloc_path();
if (!path) {
goto out;
}
- ret = iterate_inodes_from_logical(loi->logical, root->fs_info, path,
+ ret = iterate_inodes_from_logical(loi->logical, fs_info, path,
build_ino_list, inodes);
if (ret == -EINVAL)
ret = -ENOENT;
return ret;
}
-static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd)
+static long btrfs_ioctl_balance_ctl(struct btrfs_fs_info *fs_info, int cmd)
{
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
switch (cmd) {
case BTRFS_BALANCE_CTL_PAUSE:
- return btrfs_pause_balance(root->fs_info);
+ return btrfs_pause_balance(fs_info);
case BTRFS_BALANCE_CTL_CANCEL:
- return btrfs_cancel_balance(root->fs_info);
+ return btrfs_cancel_balance(fs_info);
}
return -EINVAL;
}
-static long btrfs_ioctl_balance_progress(struct btrfs_root *root,
+static long btrfs_ioctl_balance_progress(struct btrfs_fs_info *fs_info,
void __user *arg)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_ioctl_balance_args *bargs;
int ret = 0;
static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
{
- struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
+ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ioctl_quota_ctl_args *sa;
struct btrfs_trans_handle *trans = NULL;
int ret;
goto drop_write;
}
- down_write(&root->fs_info->subvol_sem);
- trans = btrfs_start_transaction(root->fs_info->tree_root, 2);
+ down_write(&fs_info->subvol_sem);
+ trans = btrfs_start_transaction(fs_info->tree_root, 2);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
switch (sa->cmd) {
case BTRFS_QUOTA_CTL_ENABLE:
- ret = btrfs_quota_enable(trans, root->fs_info);
+ ret = btrfs_quota_enable(trans, fs_info);
break;
case BTRFS_QUOTA_CTL_DISABLE:
- ret = btrfs_quota_disable(trans, root->fs_info);
+ ret = btrfs_quota_disable(trans, fs_info);
break;
default:
ret = -EINVAL;
break;
}
- err = btrfs_commit_transaction(trans, root->fs_info->tree_root);
+ err = btrfs_commit_transaction(trans);
if (err && !ret)
ret = err;
out:
kfree(sa);
- up_write(&root->fs_info->subvol_sem);
+ up_write(&fs_info->subvol_sem);
drop_write:
mnt_drop_write_file(file);
return ret;
static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
{
- struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
+ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ioctl_qgroup_assign_args *sa;
struct btrfs_trans_handle *trans;
int ret;
/* FIXME: check if the IDs really exist */
if (sa->assign) {
- ret = btrfs_add_qgroup_relation(trans, root->fs_info,
+ ret = btrfs_add_qgroup_relation(trans, fs_info,
sa->src, sa->dst);
} else {
- ret = btrfs_del_qgroup_relation(trans, root->fs_info,
+ ret = btrfs_del_qgroup_relation(trans, fs_info,
sa->src, sa->dst);
}
/* update qgroup status and info */
- err = btrfs_run_qgroups(trans, root->fs_info);
+ err = btrfs_run_qgroups(trans, fs_info);
if (err < 0)
- btrfs_handle_fs_error(root->fs_info, err,
- "failed to update qgroup status and info");
- err = btrfs_end_transaction(trans, root);
+ btrfs_handle_fs_error(fs_info, err,
+ "failed to update qgroup status and info");
+ err = btrfs_end_transaction(trans);
if (err && !ret)
ret = err;
static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
{
- struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
+ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ioctl_qgroup_create_args *sa;
struct btrfs_trans_handle *trans;
int ret;
/* FIXME: check if the IDs really exist */
if (sa->create) {
- ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid);
+ ret = btrfs_create_qgroup(trans, fs_info, sa->qgroupid);
} else {
- ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid);
+ ret = btrfs_remove_qgroup(trans, fs_info, sa->qgroupid);
}
- err = btrfs_end_transaction(trans, root);
+ err = btrfs_end_transaction(trans);
if (err && !ret)
ret = err;
static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg)
{
- struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
+ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ioctl_qgroup_limit_args *sa;
struct btrfs_trans_handle *trans;
int ret;
}
/* FIXME: check if the IDs really exist */
- ret = btrfs_limit_qgroup(trans, root->fs_info, qgroupid, &sa->lim);
+ ret = btrfs_limit_qgroup(trans, fs_info, qgroupid, &sa->lim);
- err = btrfs_end_transaction(trans, root);
+ err = btrfs_end_transaction(trans);
if (err && !ret)
ret = err;
static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg)
{
- struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
+ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ioctl_quota_rescan_args *qsa;
int ret;
goto out;
}
- ret = btrfs_qgroup_rescan(root->fs_info);
+ ret = btrfs_qgroup_rescan(fs_info);
out:
kfree(qsa);
static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg)
{
- struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
+ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ioctl_quota_rescan_args *qsa;
int ret = 0;
if (!qsa)
return -ENOMEM;
- if (root->fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+ if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
qsa->flags = 1;
- qsa->progress = root->fs_info->qgroup_rescan_progress.objectid;
+ qsa->progress = fs_info->qgroup_rescan_progress.objectid;
}
if (copy_to_user(arg, qsa, sizeof(*qsa)))
static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg)
{
- struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
+ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- return btrfs_qgroup_wait_for_completion(root->fs_info, true);
+ return btrfs_qgroup_wait_for_completion(fs_info, true);
}
static long _btrfs_ioctl_set_received_subvol(struct file *file,
struct btrfs_ioctl_received_subvol_args *sa)
{
struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_root_item *root_item = &root->root_item;
struct btrfs_trans_handle *trans;
if (ret < 0)
return ret;
- down_write(&root->fs_info->subvol_sem);
+ down_write(&fs_info->subvol_sem);
if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
ret = -EINVAL;
BTRFS_UUID_SIZE);
if (received_uuid_changed &&
!btrfs_is_empty_uuid(root_item->received_uuid))
- btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root,
- root_item->received_uuid,
+ btrfs_uuid_tree_rem(trans, fs_info, root_item->received_uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
root->root_key.objectid);
memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE);
btrfs_set_stack_timespec_sec(&root_item->rtime, sa->rtime.sec);
btrfs_set_stack_timespec_nsec(&root_item->rtime, sa->rtime.nsec);
- ret = btrfs_update_root(trans, root->fs_info->tree_root,
+ ret = btrfs_update_root(trans, fs_info->tree_root,
&root->root_key, &root->root_item);
if (ret < 0) {
- btrfs_end_transaction(trans, root);
+ btrfs_end_transaction(trans);
goto out;
}
if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) {
- ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root,
- sa->uuid,
+ ret = btrfs_uuid_tree_add(trans, fs_info, sa->uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
root->root_key.objectid);
if (ret < 0 && ret != -EEXIST) {
goto out;
}
}
- ret = btrfs_commit_transaction(trans, root);
+ ret = btrfs_commit_transaction(trans);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out;
}
out:
- up_write(&root->fs_info->subvol_sem);
+ up_write(&fs_info->subvol_sem);
mnt_drop_write_file(file);
return ret;
}
int ret = 0;
args32 = memdup_user(arg, sizeof(*args32));
- if (IS_ERR(args32)) {
- ret = PTR_ERR(args32);
- args32 = NULL;
- goto out;
- }
+ if (IS_ERR(args32))
+ return PTR_ERR(args32);
args64 = kmalloc(sizeof(*args64), GFP_KERNEL);
if (!args64) {
int ret = 0;
sa = memdup_user(arg, sizeof(*sa));
- if (IS_ERR(sa)) {
- ret = PTR_ERR(sa);
- sa = NULL;
- goto out;
- }
+ if (IS_ERR(sa))
+ return PTR_ERR(sa);
ret = _btrfs_ioctl_set_received_subvol(file, sa);
static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg)
{
- struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
+ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
size_t len;
int ret;
char label[BTRFS_LABEL_SIZE];
- spin_lock(&root->fs_info->super_lock);
- memcpy(label, root->fs_info->super_copy->label, BTRFS_LABEL_SIZE);
- spin_unlock(&root->fs_info->super_lock);
+ spin_lock(&fs_info->super_lock);
+ memcpy(label, fs_info->super_copy->label, BTRFS_LABEL_SIZE);
+ spin_unlock(&fs_info->super_lock);
len = strnlen(label, BTRFS_LABEL_SIZE);
if (len == BTRFS_LABEL_SIZE) {
- btrfs_warn(root->fs_info,
- "label is too long, return the first %zu bytes", --len);
+ btrfs_warn(fs_info,
+ "label is too long, return the first %zu bytes",
+ --len);
}
ret = copy_to_user(arg, label, len);
static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
{
- struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
- struct btrfs_super_block *super_block = root->fs_info->super_copy;
+ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_super_block *super_block = fs_info->super_copy;
struct btrfs_trans_handle *trans;
char label[BTRFS_LABEL_SIZE];
int ret;
return -EFAULT;
if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) {
- btrfs_err(root->fs_info,
+ btrfs_err(fs_info,
"unable to set label with more than %d bytes",
BTRFS_LABEL_SIZE - 1);
return -EINVAL;
goto out_unlock;
}
- spin_lock(&root->fs_info->super_lock);
+ spin_lock(&fs_info->super_lock);
strcpy(super_block->label, label);
- spin_unlock(&root->fs_info->super_lock);
- ret = btrfs_commit_transaction(trans, root);
+ spin_unlock(&fs_info->super_lock);
+ ret = btrfs_commit_transaction(trans);
out_unlock:
mnt_drop_write_file(file);
static int btrfs_ioctl_get_features(struct file *file, void __user *arg)
{
- struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
- struct btrfs_super_block *super_block = root->fs_info->super_copy;
+ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_super_block *super_block = fs_info->super_copy;
struct btrfs_ioctl_feature_flags features;
features.compat_flags = btrfs_super_compat_flags(super_block);
return 0;
}
-static int check_feature_bits(struct btrfs_root *root,
+static int check_feature_bits(struct btrfs_fs_info *fs_info,
enum btrfs_feature_set set,
u64 change_mask, u64 flags, u64 supported_flags,
u64 safe_set, u64 safe_clear)
if (unsupported) {
names = btrfs_printable_features(set, unsupported);
if (names) {
- btrfs_warn(root->fs_info,
- "this kernel does not support the %s feature bit%s",
- names, strchr(names, ',') ? "s" : "");
+ btrfs_warn(fs_info,
+ "this kernel does not support the %s feature bit%s",
+ names, strchr(names, ',') ? "s" : "");
kfree(names);
} else
- btrfs_warn(root->fs_info,
- "this kernel does not support %s bits 0x%llx",
- type, unsupported);
+ btrfs_warn(fs_info,
+ "this kernel does not support %s bits 0x%llx",
+ type, unsupported);
return -EOPNOTSUPP;
}
if (disallowed) {
names = btrfs_printable_features(set, disallowed);
if (names) {
- btrfs_warn(root->fs_info,
- "can't set the %s feature bit%s while mounted",
- names, strchr(names, ',') ? "s" : "");
+ btrfs_warn(fs_info,
+ "can't set the %s feature bit%s while mounted",
+ names, strchr(names, ',') ? "s" : "");
kfree(names);
} else
- btrfs_warn(root->fs_info,
- "can't set %s bits 0x%llx while mounted",
- type, disallowed);
+ btrfs_warn(fs_info,
+ "can't set %s bits 0x%llx while mounted",
+ type, disallowed);
return -EPERM;
}
if (disallowed) {
names = btrfs_printable_features(set, disallowed);
if (names) {
- btrfs_warn(root->fs_info,
- "can't clear the %s feature bit%s while mounted",
- names, strchr(names, ',') ? "s" : "");
+ btrfs_warn(fs_info,
+ "can't clear the %s feature bit%s while mounted",
+ names, strchr(names, ',') ? "s" : "");
kfree(names);
} else
- btrfs_warn(root->fs_info,
- "can't clear %s bits 0x%llx while mounted",
- type, disallowed);
+ btrfs_warn(fs_info,
+ "can't clear %s bits 0x%llx while mounted",
+ type, disallowed);
return -EPERM;
}
return 0;
}
-#define check_feature(root, change_mask, flags, mask_base) \
-check_feature_bits(root, FEAT_##mask_base, change_mask, flags, \
+#define check_feature(fs_info, change_mask, flags, mask_base) \
+check_feature_bits(fs_info, FEAT_##mask_base, change_mask, flags, \
BTRFS_FEATURE_ ## mask_base ## _SUPP, \
BTRFS_FEATURE_ ## mask_base ## _SAFE_SET, \
BTRFS_FEATURE_ ## mask_base ## _SAFE_CLEAR)
static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
{
- struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
- struct btrfs_super_block *super_block = root->fs_info->super_copy;
+ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_super_block *super_block = fs_info->super_copy;
struct btrfs_ioctl_feature_flags flags[2];
struct btrfs_trans_handle *trans;
u64 newflags;
!flags[0].incompat_flags)
return 0;
- ret = check_feature(root, flags[0].compat_flags,
+ ret = check_feature(fs_info, flags[0].compat_flags,
flags[1].compat_flags, COMPAT);
if (ret)
return ret;
- ret = check_feature(root, flags[0].compat_ro_flags,
+ ret = check_feature(fs_info, flags[0].compat_ro_flags,
flags[1].compat_ro_flags, COMPAT_RO);
if (ret)
return ret;
- ret = check_feature(root, flags[0].incompat_flags,
+ ret = check_feature(fs_info, flags[0].incompat_flags,
flags[1].incompat_flags, INCOMPAT);
if (ret)
return ret;
goto out_drop_write;
}
- spin_lock(&root->fs_info->super_lock);
+ spin_lock(&fs_info->super_lock);
newflags = btrfs_super_compat_flags(super_block);
newflags |= flags[0].compat_flags & flags[1].compat_flags;
newflags &= ~(flags[0].compat_flags & ~flags[1].compat_flags);
newflags |= flags[0].incompat_flags & flags[1].incompat_flags;
newflags &= ~(flags[0].incompat_flags & ~flags[1].incompat_flags);
btrfs_set_super_incompat_flags(super_block, newflags);
- spin_unlock(&root->fs_info->super_lock);
+ spin_unlock(&fs_info->super_lock);
- ret = btrfs_commit_transaction(trans, root);
+ ret = btrfs_commit_transaction(trans);
out_drop_write:
mnt_drop_write_file(file);
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
- struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
+ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_root *root = BTRFS_I(inode)->root;
void __user *argp = (void __user *)arg;
switch (cmd) {
case BTRFS_IOC_RESIZE:
return btrfs_ioctl_resize(file, argp);
case BTRFS_IOC_ADD_DEV:
- return btrfs_ioctl_add_dev(root, argp);
+ return btrfs_ioctl_add_dev(fs_info, argp);
case BTRFS_IOC_RM_DEV:
return btrfs_ioctl_rm_dev(file, argp);
case BTRFS_IOC_RM_DEV_V2:
return btrfs_ioctl_rm_dev_v2(file, argp);
case BTRFS_IOC_FS_INFO:
- return btrfs_ioctl_fs_info(root, argp);
+ return btrfs_ioctl_fs_info(fs_info, argp);
case BTRFS_IOC_DEV_INFO:
- return btrfs_ioctl_dev_info(root, argp);
+ return btrfs_ioctl_dev_info(fs_info, argp);
case BTRFS_IOC_BALANCE:
return btrfs_ioctl_balance(file, NULL);
case BTRFS_IOC_TRANS_START:
case BTRFS_IOC_INO_PATHS:
return btrfs_ioctl_ino_to_path(root, argp);
case BTRFS_IOC_LOGICAL_INO:
- return btrfs_ioctl_logical_to_ino(root, argp);
+ return btrfs_ioctl_logical_to_ino(fs_info, argp);
case BTRFS_IOC_SPACE_INFO:
- return btrfs_ioctl_space_info(root, argp);
+ return btrfs_ioctl_space_info(fs_info, argp);
case BTRFS_IOC_SYNC: {
int ret;
- ret = btrfs_start_delalloc_roots(root->fs_info, 0, -1);
+ ret = btrfs_start_delalloc_roots(fs_info, 0, -1);
if (ret)
return ret;
- ret = btrfs_sync_fs(file_inode(file)->i_sb, 1);
+ ret = btrfs_sync_fs(inode->i_sb, 1);
/*
* The transaction thread may want to do more work,
* namely it pokes the cleaner kthread that will start
* processing uncleaned subvols.
*/
- wake_up_process(root->fs_info->transaction_kthread);
+ wake_up_process(fs_info->transaction_kthread);
return ret;
}
case BTRFS_IOC_START_SYNC:
return btrfs_ioctl_start_sync(root, argp);
case BTRFS_IOC_WAIT_SYNC:
- return btrfs_ioctl_wait_sync(root, argp);
+ return btrfs_ioctl_wait_sync(fs_info, argp);
case BTRFS_IOC_SCRUB:
return btrfs_ioctl_scrub(file, argp);
case BTRFS_IOC_SCRUB_CANCEL:
- return btrfs_ioctl_scrub_cancel(root, argp);
+ return btrfs_ioctl_scrub_cancel(fs_info);
case BTRFS_IOC_SCRUB_PROGRESS:
- return btrfs_ioctl_scrub_progress(root, argp);
+ return btrfs_ioctl_scrub_progress(fs_info, argp);
case BTRFS_IOC_BALANCE_V2:
return btrfs_ioctl_balance(file, argp);
case BTRFS_IOC_BALANCE_CTL:
- return btrfs_ioctl_balance_ctl(root, arg);
+ return btrfs_ioctl_balance_ctl(fs_info, arg);
case BTRFS_IOC_BALANCE_PROGRESS:
- return btrfs_ioctl_balance_progress(root, argp);
+ return btrfs_ioctl_balance_progress(fs_info, argp);
case BTRFS_IOC_SET_RECEIVED_SUBVOL:
return btrfs_ioctl_set_received_subvol(file, argp);
#ifdef CONFIG_64BIT
case BTRFS_IOC_SEND:
return btrfs_ioctl_send(file, argp);
case BTRFS_IOC_GET_DEV_STATS:
- return btrfs_ioctl_get_dev_stats(root, argp);
+ return btrfs_ioctl_get_dev_stats(fs_info, argp);
case BTRFS_IOC_QUOTA_CTL:
return btrfs_ioctl_quota_ctl(file, argp);
case BTRFS_IOC_QGROUP_ASSIGN:
case BTRFS_IOC_QUOTA_RESCAN_WAIT:
return btrfs_ioctl_quota_rescan_wait(file, argp);
case BTRFS_IOC_DEV_REPLACE:
- return btrfs_ioctl_dev_replace(root, argp);
+ return btrfs_ioctl_dev_replace(fs_info, argp);
case BTRFS_IOC_GET_FSLABEL:
return btrfs_ioctl_get_fslabel(file, argp);
case BTRFS_IOC_SET_FSLABEL:
struct page **pages;
pgoff_t next_index;
int nr_pages = 0;
- int ret;
+ int got = 0;
+ int ret = 0;
+
+ if (!current->journal_info) {
+ /* caller of readpages does not hold buffer and read caps
+ * (fadvise, madvise and readahead cases) */
+ int want = CEPH_CAP_FILE_CACHE;
+ ret = ceph_try_get_caps(ci, CEPH_CAP_FILE_RD, want, &got);
+ if (ret < 0) {
+ dout("start_read %p, error getting cap\n", inode);
+ } else if (!(got & want)) {
+ dout("start_read %p, no cache cap\n", inode);
+ ret = 0;
+ }
+ if (ret <= 0) {
+ if (got)
+ ceph_put_cap_refs(ci, got);
+ while (!list_empty(page_list)) {
+ page = list_entry(page_list->prev,
+ struct page, lru);
+ list_del(&page->lru);
+ put_page(page);
+ }
+ return ret;
+ }
+ }
off = (u64) page_offset(page);
CEPH_OSD_FLAG_READ, NULL,
ci->i_truncate_seq, ci->i_truncate_size,
false);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ if (IS_ERR(req)) {
+ ret = PTR_ERR(req);
+ goto out;
+ }
/* build page vector */
nr_pages = calc_pages_for(0, len);
pages = kmalloc(sizeof(*pages) * nr_pages, GFP_KERNEL);
- ret = -ENOMEM;
- if (!pages)
- goto out;
+ if (!pages) {
+ ret = -ENOMEM;
+ goto out_put;
+ }
for (i = 0; i < nr_pages; ++i) {
page = list_entry(page_list->prev, struct page, lru);
BUG_ON(PageLocked(page));
if (ret < 0)
goto out_pages;
ceph_osdc_put_request(req);
+
+ /* After adding locked pages to page cache, the inode holds cache cap.
+ * So we can drop our cap refs. */
+ if (got)
+ ceph_put_cap_refs(ci, got);
+
return nr_pages;
out_pages:
unlock_page(pages[i]);
}
ceph_put_page_vector(pages, nr_pages, false);
-out:
+out_put:
ceph_osdc_put_request(req);
+out:
+ if (got)
+ ceph_put_cap_refs(ci, got);
return ret;
}
rc = start_read(inode, page_list, max);
if (rc < 0)
goto out;
- BUG_ON(rc == 0);
}
out:
ceph_fscache_readpages_cancel(inode, page_list);
* only snap context we are allowed to write back.
*/
static struct ceph_snap_context *get_oldest_context(struct inode *inode,
- loff_t *snap_size)
+ loff_t *snap_size,
+ u64 *truncate_size,
+ u32 *truncate_seq)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_snap_context *snapc = NULL;
snapc = ceph_get_snap_context(capsnap->context);
if (snap_size)
*snap_size = capsnap->size;
+ if (truncate_size)
+ *truncate_size = capsnap->truncate_size;
+ if (truncate_seq)
+ *truncate_seq = capsnap->truncate_seq;
break;
}
}
snapc = ceph_get_snap_context(ci->i_head_snapc);
dout(" head snapc %p has %d dirty pages\n",
snapc, ci->i_wrbuffer_ref_head);
+ if (truncate_size)
+ *truncate_size = capsnap->truncate_size;
+ if (truncate_seq)
+ *truncate_seq = capsnap->truncate_seq;
}
spin_unlock(&ci->i_ceph_lock);
return snapc;
dout("writepage %p page %p not dirty?\n", inode, page);
goto out;
}
- oldest = get_oldest_context(inode, &snap_size);
+ oldest = get_oldest_context(inode, &snap_size,
+ &truncate_size, &truncate_seq);
if (snapc->seq > oldest->seq) {
dout("writepage %p page %p snapc %p not writeable - noop\n",
inode, page, snapc);
}
ceph_put_snap_context(oldest);
- spin_lock(&ci->i_ceph_lock);
- truncate_seq = ci->i_truncate_seq;
- truncate_size = ci->i_truncate_size;
if (snap_size == -1)
snap_size = i_size_read(inode);
- spin_unlock(&ci->i_ceph_lock);
/* is this a partial page at end of file? */
if (page_off >= snap_size) {
/* find oldest snap context with dirty data */
ceph_put_snap_context(snapc);
snap_size = -1;
- snapc = get_oldest_context(inode, &snap_size);
+ snapc = get_oldest_context(inode, &snap_size,
+ &truncate_size, &truncate_seq);
if (!snapc) {
/* hmm, why does writepages get called when there
is no dirty data? */
dout(" oldest snapc is %p seq %lld (%d snaps)\n",
snapc, snapc->seq, snapc->num_snaps);
- spin_lock(&ci->i_ceph_lock);
- truncate_seq = ci->i_truncate_seq;
- truncate_size = ci->i_truncate_size;
i_size = i_size_read(inode);
- spin_unlock(&ci->i_ceph_lock);
if (last_snapc && snapc != last_snapc) {
/* if we switched to a newer snapc, restart our scan at the
static int context_is_writeable_or_written(struct inode *inode,
struct ceph_snap_context *snapc)
{
- struct ceph_snap_context *oldest = get_oldest_context(inode, NULL);
+ struct ceph_snap_context *oldest = get_oldest_context(inode, NULL,
+ NULL, NULL);
int ret = !oldest || snapc->seq <= oldest->seq;
ceph_put_snap_context(oldest);
* this page is already dirty in another (older) snap
* context! is it writeable now?
*/
- oldest = get_oldest_context(inode, NULL);
+ oldest = get_oldest_context(inode, NULL, NULL, NULL);
if (snapc->seq > oldest->seq) {
ceph_put_snap_context(oldest);
struct page *page, void *fsdata)
{
struct inode *inode = file_inode(file);
- unsigned from = pos & (PAGE_SIZE - 1);
int check_cap = 0;
dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
inode, page, (int)pos, (int)copied, (int)len);
/* zero the stale part of the page if we did a short copy */
- if (copied < len)
- zero_user_segment(page, from+copied, len);
+ if (!PageUptodate(page)) {
+ if (copied < len) {
+ copied = 0;
+ goto out;
+ }
+ SetPageUptodate(page);
+ }
/* did file size increase? */
if (pos+copied > i_size_read(inode))
check_cap = ceph_inode_set_size(inode, pos+copied);
- if (!PageUptodate(page))
- SetPageUptodate(page);
-
set_page_dirty(page);
+ out:
unlock_page(page);
put_page(page);
inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got));
if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
- ci->i_inline_version == CEPH_INLINE_NONE)
+ ci->i_inline_version == CEPH_INLINE_NONE) {
+ current->journal_info = vma->vm_file;
ret = filemap_fault(vma, vmf);
- else
+ current->journal_info = NULL;
+ } else
ret = -EAGAIN;
dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
struct ceph_string *pool_ns;
int ret, flags;
+ if (ci->i_vino.snap != CEPH_NOSNAP) {
+ /*
+ * Pool permission check needs to write to the first object.
+ * But for snapshot, head of the first object may have alread
+ * been deleted. Skip check to avoid creating orphan object.
+ */
+ return 0;
+ }
+
if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode),
NOPOOLPERM))
return 0;
percpu_counter_destroy(&sbi->s_dirs_counter);
percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
- brelse(sbi->s_sbh);
#ifdef CONFIG_QUOTA
for (i = 0; i < EXT4_MAXQUOTAS; i++)
kfree(sbi->s_qf_names[i]);
}
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
+ brelse(sbi->s_sbh);
sb->s_fs_info = NULL;
/*
* Now that we are completely done shutting down the
static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
void *fs_data)
{
- handle_t *handle;
- int res, res2;
+ handle_t *handle = fs_data;
+ int res, res2, retries = 0;
+
+ /*
+ * If a journal handle was specified, then the encryption context is
+ * being set on a new inode via inheritance and is part of a larger
+ * transaction to create the inode. Otherwise the encryption context is
+ * being set on an existing inode in its own transaction. Only in the
+ * latter case should the "retry on ENOSPC" logic be used.
+ */
- /* fs_data is null when internally used. */
- if (fs_data) {
- res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
- EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx,
- len, 0);
+ if (handle) {
+ res = ext4_xattr_set_handle(handle, inode,
+ EXT4_XATTR_INDEX_ENCRYPTION,
+ EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
+ ctx, len, 0);
if (!res) {
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
ext4_clear_inode_state(inode,
EXT4_STATE_MAY_INLINE_DATA);
+ /*
+ * Update inode->i_flags - e.g. S_DAX may get disabled
+ */
+ ext4_set_inode_flags(inode);
}
return res;
}
+retry:
handle = ext4_journal_start(inode, EXT4_HT_MISC,
ext4_jbd2_credits_xattr(inode));
if (IS_ERR(handle))
return PTR_ERR(handle);
- res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
- EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx,
- len, 0);
+ res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION,
+ EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
+ ctx, len, 0);
if (!res) {
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
+ /* Update inode->i_flags - e.g. S_DAX may get disabled */
+ ext4_set_inode_flags(inode);
res = ext4_mark_inode_dirty(handle, inode);
if (res)
EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
}
res2 = ext4_journal_stop(handle);
+
+ if (res == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+ goto retry;
if (!res)
res = res2;
return res;
static int ext4_mark_dquot_dirty(struct dquot *dquot);
static int ext4_write_info(struct super_block *sb, int type);
static int ext4_quota_on(struct super_block *sb, int type, int format_id,
- struct path *path);
+ const struct path *path);
static int ext4_quota_off(struct super_block *sb, int type);
static int ext4_quota_on_mount(struct super_block *sb, int type);
static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
return 0;
}
}
- if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
- test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
- ext4_msg(sb, KERN_ERR, "can't mount with journal_async_commit "
- "in data=ordered mode");
- return 0;
- }
return 1;
}
struct ext4_super_block *es)
{
unsigned int s_flags = sb->s_flags;
- int nr_orphans = 0, nr_truncates = 0;
+ int ret, nr_orphans = 0, nr_truncates = 0;
#ifdef CONFIG_QUOTA
int i;
#endif
inode->i_ino, inode->i_size);
inode_lock(inode);
truncate_inode_pages(inode->i_mapping, inode->i_size);
- ext4_truncate(inode);
+ ret = ext4_truncate(inode);
+ if (ret)
+ ext4_std_error(inode->i_sb, ret);
inode_unlock(inode);
nr_truncates++;
} else {
ext4_set_bit(s++, buf);
count++;
}
- for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) {
- ext4_set_bit(EXT4_B2C(sbi, s++), buf);
- count++;
+ j = ext4_bg_num_gdb(sb, grp);
+ if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
+ ext4_error(sb, "Invalid number of block group "
+ "descriptor blocks: %d", j);
+ j = EXT4_BLOCKS_PER_GROUP(sb) - s;
}
+ count += j;
+ for (; j > 0; j--)
+ ext4_set_bit(EXT4_B2C(sbi, s++), buf);
}
if (!count)
return 0;
char *orig_data = kstrdup(data, GFP_KERNEL);
struct buffer_head *bh;
struct ext4_super_block *es = NULL;
- struct ext4_sb_info *sbi;
+ struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
ext4_fsblk_t block;
ext4_fsblk_t sb_block = get_sb_block(&data);
ext4_fsblk_t logical_sb_block;
unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
ext4_group_t first_not_zeroed;
- sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
- if (!sbi)
- goto out_free_orig;
+ if ((data && !orig_data) || !sbi)
+ goto out_free_base;
sbi->s_blockgroup_lock =
kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
- if (!sbi->s_blockgroup_lock) {
- kfree(sbi);
- goto out_free_orig;
- }
+ if (!sbi->s_blockgroup_lock)
+ goto out_free_base;
+
sb->s_fs_info = sbi;
sbi->s_sb = sb;
sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
*/
sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
- if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
- &journal_devnum, &journal_ioprio, 0)) {
- ext4_msg(sb, KERN_WARNING,
- "failed to parse options in superblock: %s",
- sbi->s_es->s_mount_opts);
+ if (sbi->s_es->s_mount_opts[0]) {
+ char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
+ sizeof(sbi->s_es->s_mount_opts),
+ GFP_KERNEL);
+ if (!s_mount_opts)
+ goto failed_mount;
+ if (!parse_options(s_mount_opts, sb, &journal_devnum,
+ &journal_ioprio, 0)) {
+ ext4_msg(sb, KERN_WARNING,
+ "failed to parse options in superblock: %s",
+ s_mount_opts);
+ }
+ kfree(s_mount_opts);
}
sbi->s_def_mount_opt = sbi->s_mount_opt;
if (!parse_options((char *) data, sb, &journal_devnum,
"both data=journal and dax");
goto failed_mount;
}
+ if (ext4_has_feature_encrypt(sb)) {
+ ext4_msg(sb, KERN_WARNING,
+ "encrypted files will use data=ordered "
+ "instead of data journaling mode");
+ }
if (test_opt(sb, DELALLOC))
clear_opt(sb, DELALLOC);
} else {
sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
- if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
- goto cantfind_ext4;
sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
if (sbi->s_inodes_per_block == 0)
goto cantfind_ext4;
+ if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
+ sbi->s_inodes_per_group > blocksize * 8) {
+ ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
+ sbi->s_blocks_per_group);
+ goto failed_mount;
+ }
sbi->s_itb_per_group = sbi->s_inodes_per_group /
sbi->s_inodes_per_block;
sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
}
sbi->s_cluster_ratio = clustersize / blocksize;
- if (sbi->s_inodes_per_group > blocksize * 8) {
- ext4_msg(sb, KERN_ERR,
- "#inodes per group too big: %lu",
- sbi->s_inodes_per_group);
- goto failed_mount;
- }
-
/* Do we have standard group size of clustersize * 8 blocks ? */
if (sbi->s_blocks_per_group == clustersize << 3)
set_opt2(sb, STD_GROUP_SIZE);
(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
EXT4_DESC_PER_BLOCK(sb);
+ if (ext4_has_feature_meta_bg(sb)) {
+ if (le32_to_cpu(es->s_first_meta_bg) >= db_count) {
+ ext4_msg(sb, KERN_WARNING,
+ "first meta block group too large: %u "
+ "(group descriptor block count %u)",
+ le32_to_cpu(es->s_first_meta_bg), db_count);
+ goto failed_mount;
+ }
+ }
sbi->s_group_desc = ext4_kvmalloc(db_count *
sizeof(struct buffer_head *),
GFP_KERNEL);
default:
break;
}
+
+ if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
+ test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "journal_async_commit in data=ordered mode");
+ goto failed_mount_wq;
+ }
+
set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
- "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
+ "Opts: %.*s%s%s", descr,
+ (int) sizeof(sbi->s_es->s_mount_opts),
+ sbi->s_es->s_mount_opts,
*sbi->s_es->s_mount_opts ? "; " : "", orig_data);
if (es->s_error_count)
out_fail:
sb->s_fs_info = NULL;
kfree(sbi->s_blockgroup_lock);
+out_free_base:
kfree(sbi);
-out_free_orig:
kfree(orig_data);
return err ? err : ret;
}
&EXT4_SB(sb)->s_freeinodes_counter));
BUFFER_TRACE(sbh, "marking dirty");
ext4_superblock_csum_set(sb);
- lock_buffer(sbh);
+ if (sync)
+ lock_buffer(sbh);
if (buffer_write_io_error(sbh)) {
/*
* Oh, dear. A previous attempt to write the
set_buffer_uptodate(sbh);
}
mark_buffer_dirty(sbh);
- unlock_buffer(sbh);
if (sync) {
+ unlock_buffer(sbh);
error = __sync_dirty_buffer(sbh,
- test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC);
+ test_opt(sb, BARRIER) ? REQ_FUA : REQ_SYNC);
if (error)
return error;
err = -EINVAL;
goto restore_opts;
}
+ } else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
+ if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "journal_async_commit in data=ordered mode");
+ err = -EINVAL;
+ goto restore_opts;
+ }
}
if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
* Standard function to be called on quota_on
*/
static int ext4_quota_on(struct super_block *sb, int type, int format_id,
- struct path *path)
+ const struct path *path)
{
int err;
handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
if (IS_ERR(handle))
goto out;
- inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_mtime = inode->i_ctime = current_time(inode);
ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
extern void *copy_mount_options(const void __user *);
extern char *copy_mount_string(const void __user *);
- extern struct vfsmount *lookup_mnt(struct path *);
+ extern struct vfsmount *lookup_mnt(const struct path *);
extern int finish_automount(struct vfsmount *, struct path *);
extern int sb_prepare_remount_readonly(struct super_block *);
loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
unsigned flags, struct iomap_ops *ops, void *data,
iomap_actor_t actor);
+
+/* direct-io.c: */
+int sb_init_dio_done_wq(struct super_block *sb);
if (managed & DCACHE_MANAGE_TRANSIT) {
BUG_ON(!path->dentry->d_op);
BUG_ON(!path->dentry->d_op->d_manage);
- ret = path->dentry->d_op->d_manage(path->dentry, false);
+ ret = path->dentry->d_op->d_manage(path, false);
if (ret < 0)
break;
}
}
EXPORT_SYMBOL(follow_down_one);
- static inline int managed_dentry_rcu(struct dentry *dentry)
+ static inline int managed_dentry_rcu(const struct path *path)
{
- return (dentry->d_flags & DCACHE_MANAGE_TRANSIT) ?
- dentry->d_op->d_manage(dentry, true) : 0;
+ return (path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) ?
+ path->dentry->d_op->d_manage(path, true) : 0;
}
/*
* Don't forget we might have a non-mountpoint managed dentry
* that wants to block transit.
*/
- switch (managed_dentry_rcu(path->dentry)) {
+ switch (managed_dentry_rcu(path)) {
case -ECHILD:
default:
return false;
if (managed & DCACHE_MANAGE_TRANSIT) {
BUG_ON(!path->dentry->d_op);
BUG_ON(!path->dentry->d_op->d_manage);
- ret = path->dentry->d_op->d_manage(
- path->dentry, false);
+ ret = path->dentry->d_op->d_manage(path, false);
if (ret < 0)
return ret == -EISDIR ? 0 : ret;
}
return 1;
}
+enum {WALK_FOLLOW = 1, WALK_MORE = 2};
+
/*
* Do we need to follow links? We _really_ want to be able
* to do this check without having to look at inode->i_op,
* so we keep a cache of "no, this doesn't need follow_link"
* for the common case.
*/
-static inline int should_follow_link(struct nameidata *nd, struct path *link,
- int follow,
- struct inode *inode, unsigned seq)
+static inline int step_into(struct nameidata *nd, struct path *path,
+ int flags, struct inode *inode, unsigned seq)
{
- if (likely(!d_is_symlink(link->dentry)))
- return 0;
- if (!follow)
+ if (!(flags & WALK_MORE) && nd->depth)
+ put_link(nd);
+ if (likely(!d_is_symlink(path->dentry)) ||
+ !(flags & WALK_FOLLOW || nd->flags & LOOKUP_FOLLOW)) {
+ /* not a symlink or should not follow */
+ path_to_nameidata(path, nd);
+ nd->inode = inode;
+ nd->seq = seq;
return 0;
+ }
/* make sure that d_is_symlink above matches inode */
if (nd->flags & LOOKUP_RCU) {
- if (read_seqcount_retry(&link->dentry->d_seq, seq))
+ if (read_seqcount_retry(&path->dentry->d_seq, seq))
return -ECHILD;
}
- return pick_link(nd, link, inode, seq);
+ return pick_link(nd, path, inode, seq);
}
-enum {WALK_GET = 1, WALK_PUT = 2};
-
static int walk_component(struct nameidata *nd, int flags)
{
struct path path;
*/
if (unlikely(nd->last_type != LAST_NORM)) {
err = handle_dots(nd, nd->last_type);
- if (flags & WALK_PUT)
+ if (!(flags & WALK_MORE) && nd->depth)
put_link(nd);
return err;
}
inode = d_backing_inode(path.dentry);
}
- if (flags & WALK_PUT)
- put_link(nd);
- err = should_follow_link(nd, &path, flags & WALK_GET, inode, seq);
- if (unlikely(err))
- return err;
- path_to_nameidata(&path, nd);
- nd->inode = inode;
- nd->seq = seq;
- return 0;
+ return step_into(nd, &path, flags, inode, seq);
}
/*
if (!name)
return 0;
/* last component of nested symlink */
- err = walk_component(nd, WALK_GET | WALK_PUT);
+ err = walk_component(nd, WALK_FOLLOW);
} else {
- err = walk_component(nd, WALK_GET);
+ /* not the last component */
+ err = walk_component(nd, WALK_FOLLOW | WALK_MORE);
}
if (err < 0)
return err;
nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
nd->flags &= ~LOOKUP_PARENT;
- return walk_component(nd,
- nd->flags & LOOKUP_FOLLOW
- ? nd->depth
- ? WALK_PUT | WALK_GET
- : WALK_GET
- : 0);
+ return walk_component(nd, 0);
}
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
}
EXPORT_SYMBOL(user_path_at_empty);
-/*
- * NB: most callers don't do anything directly with the reference to the
- * to struct filename, but the nd->last pointer points into the name string
- * allocated by getname. So we must hold the reference to it until all
- * path-walking is complete.
- */
-static inline struct filename *
-user_path_parent(int dfd, const char __user *path,
- struct path *parent,
- struct qstr *last,
- int *type,
- unsigned int flags)
-{
- /* only LOOKUP_REVAL is allowed in extra flags */
- return filename_parentat(dfd, getname(path), flags & LOOKUP_REVAL,
- parent, last, type);
-}
-
/**
* mountpoint_last - look up last component for umount
* @nd: pathwalk nameidata - currently pointing at parent directory of "last"
- * @path: pointer to container for result
*
* This is a special lookup_last function just for umount. In this case, we
* need to resolve the path without doing any revalidation.
*
* Returns:
* -error: if there was an error during lookup. This includes -ENOENT if the
- * lookup found a negative dentry. The nd->path reference will also be
- * put in this case.
+ * lookup found a negative dentry.
*
- * 0: if we successfully resolved nd->path and found it to not to be a
- * symlink that needs to be followed. "path" will also be populated.
- * The nd->path reference will also be put.
+ * 0: if we successfully resolved nd->last and found it to not to be a
+ * symlink that needs to be followed.
*
* 1: if we successfully resolved nd->last and found it to be a symlink
- * that needs to be followed. "path" will be populated with the path
- * to the link, and nd->path will *not* be put.
+ * that needs to be followed.
*/
static int
-mountpoint_last(struct nameidata *nd, struct path *path)
+mountpoint_last(struct nameidata *nd)
{
int error = 0;
- struct dentry *dentry;
struct dentry *dir = nd->path.dentry;
+ struct path path;
/* If we're in rcuwalk, drop out of it to handle last component */
if (nd->flags & LOOKUP_RCU) {
error = handle_dots(nd, nd->last_type);
if (error)
return error;
- dentry = dget(nd->path.dentry);
+ path.dentry = dget(nd->path.dentry);
} else {
- dentry = d_lookup(dir, &nd->last);
- if (!dentry) {
+ path.dentry = d_lookup(dir, &nd->last);
+ if (!path.dentry) {
/*
* No cached dentry. Mounted dentries are pinned in the
* cache, so that means that this dentry is probably
* a symlink or the path doesn't actually point
* to a mounted dentry.
*/
- dentry = lookup_slow(&nd->last, dir,
+ path.dentry = lookup_slow(&nd->last, dir,
nd->flags | LOOKUP_NO_REVAL);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
+ if (IS_ERR(path.dentry))
+ return PTR_ERR(path.dentry);
}
}
- if (d_is_negative(dentry)) {
- dput(dentry);
+ if (d_is_negative(path.dentry)) {
+ dput(path.dentry);
return -ENOENT;
}
- if (nd->depth)
- put_link(nd);
- path->dentry = dentry;
- path->mnt = nd->path.mnt;
- error = should_follow_link(nd, path, nd->flags & LOOKUP_FOLLOW,
- d_backing_inode(dentry), 0);
- if (unlikely(error))
- return error;
- mntget(path->mnt);
- follow_mount(path);
- return 0;
+ path.mnt = nd->path.mnt;
+ return step_into(nd, &path, 0, d_backing_inode(path.dentry), 0);
}
/**
if (IS_ERR(s))
return PTR_ERR(s);
while (!(err = link_path_walk(s, nd)) &&
- (err = mountpoint_last(nd, path)) > 0) {
+ (err = mountpoint_last(nd)) > 0) {
s = trailing_symlink(nd);
if (IS_ERR(s)) {
err = PTR_ERR(s);
break;
}
}
+ if (!err) {
+ *path = nd->path;
+ nd->path.mnt = NULL;
+ nd->path.dentry = NULL;
+ follow_mount(path);
+ }
terminate_walk(nd);
return err;
}
!(path->mnt->mnt_sb->s_iflags & SB_I_NODEV);
}
- static int may_open(struct path *path, int acc_mode, int flag)
+ static int may_open(const struct path *path, int acc_mode, int flag)
{
struct dentry *dentry = path->dentry;
struct inode *inode = dentry->d_inode;
static int handle_truncate(struct file *filp)
{
- struct path *path = &filp->f_path;
+ const struct path *path = &filp->f_path;
struct inode *inode = path->dentry->d_inode;
int error = get_write_access(inode);
if (error)
seq = 0; /* out of RCU mode, so the value doesn't matter */
inode = d_backing_inode(path.dentry);
finish_lookup:
- if (nd->depth)
- put_link(nd);
- error = should_follow_link(nd, &path, nd->flags & LOOKUP_FOLLOW,
- inode, seq);
+ error = step_into(nd, &path, 0, inode, seq);
if (unlikely(error))
return error;
-
- path_to_nameidata(&path, nd);
- nd->inode = inode;
- nd->seq = seq;
- /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */
finish_open:
+ /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */
error = complete_walk(nd);
if (error)
return error;
int type;
unsigned int lookup_flags = 0;
retry:
- name = user_path_parent(dfd, pathname,
- &path, &last, &type, lookup_flags);
+ name = filename_parentat(dfd, getname(pathname), lookup_flags,
+ &path, &last, &type);
if (IS_ERR(name))
return PTR_ERR(name);
struct inode *delegated_inode = NULL;
unsigned int lookup_flags = 0;
retry:
- name = user_path_parent(dfd, pathname,
- &path, &last, &type, lookup_flags);
+ name = filename_parentat(dfd, getname(pathname), lookup_flags,
+ &path, &last, &type);
if (IS_ERR(name))
return PTR_ERR(name);
bool new_is_dir = false;
unsigned max_links = new_dir->i_sb->s_max_links;
- /*
- * Check source == target.
- * On overlayfs need to look at underlying inodes.
- */
- if (d_real_inode(old_dentry) == d_real_inode(new_dentry))
+ if (source == target)
return 0;
error = may_delete(old_dir, old_dentry, is_dir);
target_flags = 0;
retry:
- from = user_path_parent(olddfd, oldname,
- &old_path, &old_last, &old_type, lookup_flags);
+ from = filename_parentat(olddfd, getname(oldname), lookup_flags,
+ &old_path, &old_last, &old_type);
if (IS_ERR(from)) {
error = PTR_ERR(from);
goto exit;
}
- to = user_path_parent(newdfd, newname,
- &new_path, &new_last, &new_type, lookup_flags);
+ to = filename_parentat(newdfd, getname(newname), lookup_flags,
+ &new_path, &new_last, &new_type);
if (IS_ERR(to)) {
error = PTR_ERR(to);
goto exit1;
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_inode *nfsi = NFS_I(inode);
+ const unsigned long force_reval = NFS_INO_REVAL_PAGECACHE|NFS_INO_REVAL_FORCED;
+ unsigned long cache_validity = nfsi->cache_validity;
- if (nfs_have_delegated_attributes(inode))
+ if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) &&
+ (cache_validity & force_reval) != force_reval)
goto out_noreval;
if (filp->f_flags & O_DIRECT)
*/
if (!PageUptodate(page)) {
unsigned pglen = nfs_page_length(page);
- unsigned end = offset + len;
+ unsigned end = offset + copied;
if (pglen == 0) {
zero_user_segments(page, 0, offset,
trace_ocfs2_bmap((unsigned long long)OCFS2_I(inode)->ip_blkno,
(unsigned long long)block);
+ /*
+ * The swap code (ab-)uses ->bmap to get a block mapping and then
+ * bypasseѕ the file system for actual I/O. We really can't allow
+ * that on refcounted inodes, so we have to skip out here. And yes,
+ * 0 is the magic code for a bmap error..
+ */
+ if (ocfs2_is_refcount_inode(inode))
+ return 0;
+
/* We don't need to lock journal system files, since they aren't
* accessed concurrently from multiple nodes.
*/
if (!buffer_mapped(bh)) {
map_bh(bh, inode->i_sb, *p_blkno);
- unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+ clean_bdev_bh_alias(bh);
}
if (PageUptodate(page)) {
}
int ocfs2_write_end_nolock(struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ loff_t pos, unsigned len, unsigned copied, void *fsdata)
{
int i, ret;
unsigned from, to, start = pos & (PAGE_SIZE - 1);
int ret;
struct inode *inode = mapping->host;
- ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata);
+ ret = ocfs2_write_end_nolock(mapping, pos, len, copied, fsdata);
up_write(&OCFS2_I(inode)->ip_alloc_sem);
ocfs2_inode_unlock(inode, 1);
dwc->dw_zero_count++;
}
- ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, NULL, wc);
+ ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, wc);
BUG_ON(ret != len);
ret = 0;
unlock:
return ret;
}
- static void ocfs2_dio_end_io_write(struct inode *inode,
- struct ocfs2_dio_write_ctxt *dwc,
- loff_t offset,
- ssize_t bytes)
+ static int ocfs2_dio_end_io_write(struct inode *inode,
+ struct ocfs2_dio_write_ctxt *dwc,
+ loff_t offset,
+ ssize_t bytes)
{
struct ocfs2_cached_dealloc_ctxt dealloc;
struct ocfs2_extent_tree et;
mlog_errno(ret);
}
- di = (struct ocfs2_dinode *)di_bh;
+ di = (struct ocfs2_dinode *)di_bh->b_data;
ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
if (locked)
inode_unlock(inode);
ocfs2_dio_free_write_ctx(inode, dwc);
+
+ return ret;
}
/*
{
struct inode *inode = file_inode(iocb->ki_filp);
int level;
-
- if (bytes <= 0)
- return 0;
+ int ret = 0;
/* this io's submitter should not have unlocked this before we could */
BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
- if (private)
- ocfs2_dio_end_io_write(inode, private, offset, bytes);
+ if (bytes > 0 && private)
+ ret = ocfs2_dio_end_io_write(inode, private, offset, bytes);
ocfs2_iocb_clear_rw_locked(iocb);
level = ocfs2_iocb_rw_locked_level(iocb);
ocfs2_rw_unlock(inode, level);
- return 0;
+ return ret;
}
static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
#include "xattr.h"
#include "namei.h"
#include "ocfs2_trace.h"
+ #include "file.h"
#include <linux/bio.h>
#include <linux/blkdev.h>
goto out;
}
- BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
di = (struct ocfs2_dinode *)di_bh->b_data;
*ref_blkno = le64_to_cpu(di->i_refcount_loc);
if (ret) {
mlog_errno(ret);
ocfs2_unlock_refcount_tree(osb, tree, rw);
- ocfs2_refcount_tree_put(tree);
goto out;
}
u32 num_got;
u64 suballoc_loc, first_blkno;
- BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
+ BUG_ON(ocfs2_is_refcount_inode(inode));
trace_ocfs2_create_refcount_tree(
(unsigned long long)OCFS2_I(inode)->ip_blkno);
struct ocfs2_refcount_block *rb;
struct ocfs2_refcount_tree *ref_tree;
- BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
+ BUG_ON(ocfs2_is_refcount_inode(inode));
ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
&ref_tree, &ref_root_bh);
u64 blk = 0, bg_blkno = 0, ref_blkno = le64_to_cpu(di->i_refcount_loc);
u16 bit = 0;
- if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL))
+ if (!ocfs2_is_refcount_inode(inode))
return 0;
BUG_ON(!ref_blkno);
{
int ret;
u64 ref_blkno;
- struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct buffer_head *ref_root_bh = NULL;
struct ocfs2_refcount_tree *tree;
- BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
ret = ocfs2_get_refcount_block(inode, &ref_blkno);
if (ret) {
int *ref_blocks)
{
int ret;
- struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct buffer_head *ref_root_bh = NULL;
struct ocfs2_refcount_tree *tree;
u64 start_cpos = ocfs2_blocks_to_clusters(inode->i_sb, phys_blkno);
goto out;
}
- BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb),
refcount_loc, &tree);
{
int ret;
u32 cow_start = 0, cow_len = 0;
- struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct buffer_head *ref_root_bh = NULL;
struct ocfs2_refcount_tree *ref_tree;
struct ocfs2_cow_context *context = NULL;
- BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
ret = ocfs2_refcount_cal_cow_clusters(inode, &di->id2.i_list,
cpos, write_len, max_cpos,
{
int ret;
struct ocfs2_xattr_value_root *xv = vb->vb_xv;
- struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_cow_context *context = NULL;
u32 cow_start, cow_len;
- BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
ret = ocfs2_refcount_cal_cow_clusters(inode, &xv->xr_list,
cpos, write_len, UINT_MAX,
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_alloc_context *meta_ac = NULL;
+ /* We need to be able to handle at least an extent tree split. */
+ ref_blocks = ocfs2_extend_meta_needed(data_et->et_root_el);
+
ret = ocfs2_calc_refcount_meta_credits(inode->i_sb,
ref_ci, ref_root_bh,
p_cluster, num_clusters,
ocfs2_init_dealloc_ctxt(&dealloc);
- if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)) {
+ if (!ocfs2_is_refcount_inode(inode)) {
ret = ocfs2_create_refcount_tree(inode, di_bh);
if (ret) {
mlog_errno(ret);
ret = ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
p_cluster, num_clusters,
meta_ac, dealloc);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_commit;
+ }
+
+ ret = dquot_alloc_space_nodirty(inode,
+ ocfs2_clusters_to_bytes(osb->sb, num_clusters));
if (ret)
mlog_errno(ret);
return error;
}
+
+ /* Update destination inode size, if necessary. */
+ static int ocfs2_reflink_update_dest(struct inode *dest,
+ struct buffer_head *d_bh,
+ loff_t newlen)
+ {
+ handle_t *handle;
+ int ret;
+
+ dest->i_blocks = ocfs2_inode_sector_count(dest);
+
+ if (newlen <= i_size_read(dest))
+ return 0;
+
+ handle = ocfs2_start_trans(OCFS2_SB(dest->i_sb),
+ OCFS2_INODE_UPDATE_CREDITS);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ mlog_errno(ret);
+ return ret;
+ }
+
+ /* Extend i_size if needed. */
+ spin_lock(&OCFS2_I(dest)->ip_lock);
+ if (newlen > i_size_read(dest))
+ i_size_write(dest, newlen);
+ spin_unlock(&OCFS2_I(dest)->ip_lock);
+ dest->i_ctime = dest->i_mtime = current_time(dest);
+
+ ret = ocfs2_mark_inode_dirty(handle, dest, d_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_commit;
+ }
+
+ out_commit:
+ ocfs2_commit_trans(OCFS2_SB(dest->i_sb), handle);
+ return ret;
+ }
+
+ /* Remap the range pos_in:len in s_inode to pos_out:len in t_inode. */
+ static int ocfs2_reflink_remap_extent(struct inode *s_inode,
+ struct buffer_head *s_bh,
+ loff_t pos_in,
+ struct inode *t_inode,
+ struct buffer_head *t_bh,
+ loff_t pos_out,
+ loff_t len,
+ struct ocfs2_cached_dealloc_ctxt *dealloc)
+ {
+ struct ocfs2_extent_tree s_et;
+ struct ocfs2_extent_tree t_et;
+ struct ocfs2_dinode *dis;
+ struct buffer_head *ref_root_bh = NULL;
+ struct ocfs2_refcount_tree *ref_tree;
+ struct ocfs2_super *osb;
+ loff_t pstart, plen;
+ u32 p_cluster, num_clusters, slast, spos, tpos;
+ unsigned int ext_flags;
+ int ret = 0;
+
+ osb = OCFS2_SB(s_inode->i_sb);
+ dis = (struct ocfs2_dinode *)s_bh->b_data;
+ ocfs2_init_dinode_extent_tree(&s_et, INODE_CACHE(s_inode), s_bh);
+ ocfs2_init_dinode_extent_tree(&t_et, INODE_CACHE(t_inode), t_bh);
+
+ spos = ocfs2_bytes_to_clusters(s_inode->i_sb, pos_in);
+ tpos = ocfs2_bytes_to_clusters(t_inode->i_sb, pos_out);
+ slast = ocfs2_clusters_for_bytes(s_inode->i_sb, pos_in + len);
+
+ while (spos < slast) {
+ if (fatal_signal_pending(current)) {
+ ret = -EINTR;
+ goto out;
+ }
+
+ /* Look up the extent. */
+ ret = ocfs2_get_clusters(s_inode, spos, &p_cluster,
+ &num_clusters, &ext_flags);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ num_clusters = min_t(u32, num_clusters, slast - spos);
+
+ /* Punch out the dest range. */
+ pstart = ocfs2_clusters_to_bytes(t_inode->i_sb, tpos);
+ plen = ocfs2_clusters_to_bytes(t_inode->i_sb, num_clusters);
+ ret = ocfs2_remove_inode_range(t_inode, t_bh, pstart, plen);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ if (p_cluster == 0)
+ goto next_loop;
+
+ /* Lock the refcount btree... */
+ ret = ocfs2_lock_refcount_tree(osb,
+ le64_to_cpu(dis->i_refcount_loc),
+ 1, &ref_tree, &ref_root_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ /* Mark s_inode's extent as refcounted. */
+ if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) {
+ ret = ocfs2_add_refcount_flag(s_inode, &s_et,
+ &ref_tree->rf_ci,
+ ref_root_bh, spos,
+ p_cluster, num_clusters,
+ dealloc, NULL);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_unlock_refcount;
+ }
+ }
+
+ /* Map in the new extent. */
+ ext_flags |= OCFS2_EXT_REFCOUNTED;
+ ret = ocfs2_add_refcounted_extent(t_inode, &t_et,
+ &ref_tree->rf_ci,
+ ref_root_bh,
+ tpos, p_cluster,
+ num_clusters,
+ ext_flags,
+ dealloc);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_unlock_refcount;
+ }
+
+ ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
+ brelse(ref_root_bh);
+ next_loop:
+ spos += num_clusters;
+ tpos += num_clusters;
+ }
+
+ out:
+ return ret;
+ out_unlock_refcount:
+ ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
+ brelse(ref_root_bh);
+ return ret;
+ }
+
+ /* Set up refcount tree and remap s_inode to t_inode. */
+ static int ocfs2_reflink_remap_blocks(struct inode *s_inode,
+ struct buffer_head *s_bh,
+ loff_t pos_in,
+ struct inode *t_inode,
+ struct buffer_head *t_bh,
+ loff_t pos_out,
+ loff_t len)
+ {
+ struct ocfs2_cached_dealloc_ctxt dealloc;
+ struct ocfs2_super *osb;
+ struct ocfs2_dinode *dis;
+ struct ocfs2_dinode *dit;
+ int ret;
+
+ osb = OCFS2_SB(s_inode->i_sb);
+ dis = (struct ocfs2_dinode *)s_bh->b_data;
+ dit = (struct ocfs2_dinode *)t_bh->b_data;
+ ocfs2_init_dealloc_ctxt(&dealloc);
+
+ /*
+ * If we're reflinking the entire file and the source is inline
+ * data, just copy the contents.
+ */
+ if (pos_in == pos_out && pos_in == 0 && len == i_size_read(s_inode) &&
+ i_size_read(t_inode) <= len &&
+ (OCFS2_I(s_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)) {
+ ret = ocfs2_duplicate_inline_data(s_inode, s_bh, t_inode, t_bh);
+ if (ret)
+ mlog_errno(ret);
+ goto out;
+ }
+
+ /*
+ * If both inodes belong to two different refcount groups then
+ * forget it because we don't know how (or want) to go merging
+ * refcount trees.
+ */
+ ret = -EOPNOTSUPP;
+ if (ocfs2_is_refcount_inode(s_inode) &&
+ ocfs2_is_refcount_inode(t_inode) &&
+ le64_to_cpu(dis->i_refcount_loc) !=
+ le64_to_cpu(dit->i_refcount_loc))
+ goto out;
+
+ /* Neither inode has a refcount tree. Add one to s_inode. */
+ if (!ocfs2_is_refcount_inode(s_inode) &&
+ !ocfs2_is_refcount_inode(t_inode)) {
+ ret = ocfs2_create_refcount_tree(s_inode, s_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ /* Ensure that both inodes end up with the same refcount tree. */
+ if (!ocfs2_is_refcount_inode(s_inode)) {
+ ret = ocfs2_set_refcount_tree(s_inode, s_bh,
+ le64_to_cpu(dit->i_refcount_loc));
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+ if (!ocfs2_is_refcount_inode(t_inode)) {
+ ret = ocfs2_set_refcount_tree(t_inode, t_bh,
+ le64_to_cpu(dis->i_refcount_loc));
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ /* Turn off inline data in the dest file. */
+ if (OCFS2_I(t_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
+ ret = ocfs2_convert_inline_data_to_extents(t_inode, t_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ /* Actually remap extents now. */
+ ret = ocfs2_reflink_remap_extent(s_inode, s_bh, pos_in, t_inode, t_bh,
+ pos_out, len, &dealloc);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ out:
+ if (ocfs2_dealloc_has_cluster(&dealloc)) {
+ ocfs2_schedule_truncate_log_flush(osb, 1);
+ ocfs2_run_deallocs(osb, &dealloc);
+ }
+
+ return ret;
+ }
+
+ /* Lock an inode and grab a bh pointing to the inode. */
+ static int ocfs2_reflink_inodes_lock(struct inode *s_inode,
+ struct buffer_head **bh1,
+ struct inode *t_inode,
+ struct buffer_head **bh2)
+ {
+ struct inode *inode1;
+ struct inode *inode2;
+ struct ocfs2_inode_info *oi1;
+ struct ocfs2_inode_info *oi2;
+ bool same_inode = (s_inode == t_inode);
+ int status;
+
+ /* First grab the VFS and rw locks. */
+ lock_two_nondirectories(s_inode, t_inode);
+ inode1 = s_inode;
+ inode2 = t_inode;
+ if (inode1->i_ino > inode2->i_ino)
+ swap(inode1, inode2);
+
+ status = ocfs2_rw_lock(inode1, 1);
+ if (status) {
+ mlog_errno(status);
+ goto out_i1;
+ }
+ if (!same_inode) {
+ status = ocfs2_rw_lock(inode2, 1);
+ if (status) {
+ mlog_errno(status);
+ goto out_i2;
+ }
+ }
+
+ /* Now go for the cluster locks */
+ oi1 = OCFS2_I(inode1);
+ oi2 = OCFS2_I(inode2);
+
+ trace_ocfs2_double_lock((unsigned long long)oi1->ip_blkno,
+ (unsigned long long)oi2->ip_blkno);
+
+ if (*bh1)
+ *bh1 = NULL;
+ if (*bh2)
+ *bh2 = NULL;
+
+ /* We always want to lock the one with the lower lockid first. */
+ if (oi1->ip_blkno > oi2->ip_blkno)
+ mlog_errno(-ENOLCK);
+
+ /* lock id1 */
+ status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_REFLINK_TARGET);
+ if (status < 0) {
+ if (status != -ENOENT)
+ mlog_errno(status);
+ goto out_rw2;
+ }
+
+ /* lock id2 */
+ if (!same_inode) {
+ status = ocfs2_inode_lock_nested(inode2, bh2, 1,
+ OI_LS_REFLINK_TARGET);
+ if (status < 0) {
+ if (status != -ENOENT)
+ mlog_errno(status);
+ goto out_cl1;
+ }
+ } else
+ *bh2 = *bh1;
+
+ trace_ocfs2_double_lock_end(
+ (unsigned long long)OCFS2_I(inode1)->ip_blkno,
+ (unsigned long long)OCFS2_I(inode2)->ip_blkno);
+
+ return 0;
+
+ out_cl1:
+ ocfs2_inode_unlock(inode1, 1);
+ brelse(*bh1);
+ *bh1 = NULL;
+ out_rw2:
+ ocfs2_rw_unlock(inode2, 1);
+ out_i2:
+ ocfs2_rw_unlock(inode1, 1);
+ out_i1:
+ unlock_two_nondirectories(s_inode, t_inode);
+ return status;
+ }
+
+ /* Unlock both inodes and release buffers. */
+ static void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
+ struct buffer_head *s_bh,
+ struct inode *t_inode,
+ struct buffer_head *t_bh)
+ {
+ ocfs2_inode_unlock(s_inode, 1);
+ ocfs2_rw_unlock(s_inode, 1);
+ brelse(s_bh);
+ if (s_inode != t_inode) {
+ ocfs2_inode_unlock(t_inode, 1);
+ ocfs2_rw_unlock(t_inode, 1);
+ brelse(t_bh);
+ }
+ unlock_two_nondirectories(s_inode, t_inode);
+ }
+
+ /* Link a range of blocks from one file to another. */
+ int ocfs2_reflink_remap_range(struct file *file_in,
+ loff_t pos_in,
+ struct file *file_out,
+ loff_t pos_out,
+ u64 len,
+ bool is_dedupe)
+ {
+ struct inode *inode_in = file_inode(file_in);
+ struct inode *inode_out = file_inode(file_out);
+ struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb);
+ struct buffer_head *in_bh = NULL, *out_bh = NULL;
+ bool same_inode = (inode_in == inode_out);
+ ssize_t ret;
+
+ if (!ocfs2_refcount_tree(osb))
+ return -EOPNOTSUPP;
+ if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
+ return -EROFS;
+
+ /* Lock both files against IO */
+ ret = ocfs2_reflink_inodes_lock(inode_in, &in_bh, inode_out, &out_bh);
+ if (ret)
+ return ret;
+
+ /* Check file eligibility and prepare for block sharing. */
+ ret = -EINVAL;
+ if ((OCFS2_I(inode_in)->ip_flags & OCFS2_INODE_SYSTEM_FILE) ||
+ (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
+ goto out_unlock;
+
+ ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
+ &len, is_dedupe);
+ if (ret || len == 0)
+ goto out_unlock;
+
+ /* Lock out changes to the allocation maps and remap. */
+ down_write(&OCFS2_I(inode_in)->ip_alloc_sem);
+ if (!same_inode)
+ down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem,
+ SINGLE_DEPTH_NESTING);
+
+ ret = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in, inode_out,
+ out_bh, pos_out, len);
+
+ /* Zap any page cache for the destination file's range. */
+ if (!ret)
+ truncate_inode_pages_range(&inode_out->i_data, pos_out,
+ PAGE_ALIGN(pos_out + len) - 1);
+
+ up_write(&OCFS2_I(inode_in)->ip_alloc_sem);
+ if (!same_inode)
+ up_write(&OCFS2_I(inode_out)->ip_alloc_sem);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_unlock;
+ }
+
+ /*
+ * Empty the extent map so that we may get the right extent
+ * record from the disk.
+ */
+ ocfs2_extent_map_trunc(inode_in, 0);
+ ocfs2_extent_map_trunc(inode_out, 0);
+
+ ret = ocfs2_reflink_update_dest(inode_out, out_bh, pos_out + len);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_unlock;
+ }
+
+ ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
+ return 0;
+
+ out_unlock:
+ ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
+ return ret;
+ }
if (len == 0)
return 0;
- ret = mnt_want_write_file(file_out);
- if (ret)
- return ret;
+ sb_start_write(inode_out->i_sb);
- ret = -EOPNOTSUPP;
- if (file_out->f_op->copy_file_range)
+ /*
+ * Try cloning first, this is supported by more file systems, and
+ * more efficient if both clone and copy are supported (e.g. NFS).
+ */
+ if (file_in->f_op->clone_file_range) {
+ ret = file_in->f_op->clone_file_range(file_in, pos_in,
+ file_out, pos_out, len);
+ if (ret == 0) {
+ ret = len;
+ goto done;
+ }
+ }
+
+ if (file_out->f_op->copy_file_range) {
ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out,
pos_out, len, flags);
- if (ret == -EOPNOTSUPP)
- ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
- len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
+ if (ret != -EOPNOTSUPP)
+ goto done;
+ }
+
+ ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
+ len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
+ done:
if (ret > 0) {
fsnotify_access(file_in);
add_rchar(current, ret);
fsnotify_modify(file_out);
add_wchar(current, ret);
}
+
inc_syscr(current);
inc_syscw(current);
- mnt_drop_write_file(file_out);
+ sb_end_write(inode_out->i_sb);
return ret;
}
return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
}
+ /*
+ * Check that the two inodes are eligible for cloning, the ranges make
+ * sense, and then flush all dirty data. Caller must ensure that the
+ * inodes have been locked against any other modifications.
+ */
+ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
+ struct inode *inode_out, loff_t pos_out,
+ u64 *len, bool is_dedupe)
+ {
+ loff_t bs = inode_out->i_sb->s_blocksize;
+ loff_t blen;
+ loff_t isize;
+ bool same_inode = (inode_in == inode_out);
+ int ret;
+
+ /* Don't touch certain kinds of inodes */
+ if (IS_IMMUTABLE(inode_out))
+ return -EPERM;
+
+ if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
+ return -ETXTBSY;
+
+ /* Don't reflink dirs, pipes, sockets... */
+ if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+ return -EISDIR;
+ if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+ return -EINVAL;
+
+ /* Are we going all the way to the end? */
+ isize = i_size_read(inode_in);
+ if (isize == 0) {
+ *len = 0;
+ return 0;
+ }
+
+ /* Zero length dedupe exits immediately; reflink goes to EOF. */
+ if (*len == 0) {
+ if (is_dedupe) {
+ *len = 0;
+ return 0;
+ }
+ *len = isize - pos_in;
+ }
+
+ /* Ensure offsets don't wrap and the input is inside i_size */
+ if (pos_in + *len < pos_in || pos_out + *len < pos_out ||
+ pos_in + *len > isize)
+ return -EINVAL;
+
+ /* Don't allow dedupe past EOF in the dest file */
+ if (is_dedupe) {
+ loff_t disize;
+
+ disize = i_size_read(inode_out);
+ if (pos_out >= disize || pos_out + *len > disize)
+ return -EINVAL;
+ }
+
+ /* If we're linking to EOF, continue to the block boundary. */
+ if (pos_in + *len == isize)
+ blen = ALIGN(isize, bs) - pos_in;
+ else
+ blen = *len;
+
+ /* Only reflink if we're aligned to block boundaries */
+ if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
+ !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
+ return -EINVAL;
+
+ /* Don't allow overlapped reflink within the same file */
+ if (same_inode) {
+ if (pos_out + blen > pos_in && pos_out < pos_in + blen)
+ return -EINVAL;
+ }
+
+ /* Wait for the completion of any pending IOs on both files */
+ inode_dio_wait(inode_in);
+ if (!same_inode)
+ inode_dio_wait(inode_out);
+
+ ret = filemap_write_and_wait_range(inode_in->i_mapping,
+ pos_in, pos_in + *len - 1);
+ if (ret)
+ return ret;
+
+ ret = filemap_write_and_wait_range(inode_out->i_mapping,
+ pos_out, pos_out + *len - 1);
+ if (ret)
+ return ret;
+
+ /*
+ * Check that the extents are the same.
+ */
+ if (is_dedupe) {
+ bool is_same = false;
+
+ ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
+ inode_out, pos_out, *len, &is_same);
+ if (ret)
+ return ret;
+ if (!is_same)
+ return -EBADE;
+ }
+
+ return 0;
+ }
+ EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
+
int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, u64 len)
{
struct inode *inode_out = file_inode(file_out);
int ret;
- if (inode_in->i_sb != inode_out->i_sb ||
- file_in->f_path.mnt != file_out->f_path.mnt)
- return -EXDEV;
-
if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
return -EISDIR;
if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
return -EINVAL;
+ /*
+ * FICLONE/FICLONERANGE ioctls enforce that src and dest files are on
+ * the same mount. Practically, they only need to be on the same file
+ * system.
+ */
+ if (inode_in->i_sb != inode_out->i_sb)
+ return -EXDEV;
+
if (!(file_in->f_mode & FMODE_READ) ||
!(file_out->f_mode & FMODE_WRITE) ||
(file_out->f_flags & O_APPEND))
if (pos_in + len > i_size_read(inode_in))
return -EINVAL;
- ret = mnt_want_write_file(file_out);
- if (ret)
- return ret;
-
ret = file_in->f_op->clone_file_range(file_in, pos_in,
file_out, pos_out, len);
if (!ret) {
fsnotify_modify(file_out);
}
- mnt_drop_write_file(file_out);
return ret;
}
EXPORT_SYMBOL(vfs_clone_file_range);
+ /*
+ * Read a page's worth of file data into the page cache. Return the page
+ * locked.
+ */
+ static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
+ {
+ struct address_space *mapping;
+ struct page *page;
+ pgoff_t n;
+
+ n = offset >> PAGE_SHIFT;
+ mapping = inode->i_mapping;
+ page = read_mapping_page(mapping, n, NULL);
+ if (IS_ERR(page))
+ return page;
+ if (!PageUptodate(page)) {
+ put_page(page);
+ return ERR_PTR(-EIO);
+ }
+ lock_page(page);
+ return page;
+ }
+
+ /*
+ * Compare extents of two files to see if they are the same.
+ * Caller must have locked both inodes to prevent write races.
+ */
+ int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
+ struct inode *dest, loff_t destoff,
+ loff_t len, bool *is_same)
+ {
+ loff_t src_poff;
+ loff_t dest_poff;
+ void *src_addr;
+ void *dest_addr;
+ struct page *src_page;
+ struct page *dest_page;
+ loff_t cmp_len;
+ bool same;
+ int error;
+
+ error = -EINVAL;
+ same = true;
+ while (len) {
+ src_poff = srcoff & (PAGE_SIZE - 1);
+ dest_poff = destoff & (PAGE_SIZE - 1);
+ cmp_len = min(PAGE_SIZE - src_poff,
+ PAGE_SIZE - dest_poff);
+ cmp_len = min(cmp_len, len);
+ if (cmp_len <= 0)
+ goto out_error;
+
+ src_page = vfs_dedupe_get_page(src, srcoff);
+ if (IS_ERR(src_page)) {
+ error = PTR_ERR(src_page);
+ goto out_error;
+ }
+ dest_page = vfs_dedupe_get_page(dest, destoff);
+ if (IS_ERR(dest_page)) {
+ error = PTR_ERR(dest_page);
+ unlock_page(src_page);
+ put_page(src_page);
+ goto out_error;
+ }
+ src_addr = kmap_atomic(src_page);
+ dest_addr = kmap_atomic(dest_page);
+
+ flush_dcache_page(src_page);
+ flush_dcache_page(dest_page);
+
+ if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
+ same = false;
+
+ kunmap_atomic(dest_addr);
+ kunmap_atomic(src_addr);
+ unlock_page(dest_page);
+ unlock_page(src_page);
+ put_page(dest_page);
+ put_page(src_page);
+
+ if (!same)
+ break;
+
+ srcoff += cmp_len;
+ destoff += cmp_len;
+ len -= cmp_len;
+ }
+
+ *is_same = same;
+ return 0;
+
+ out_error:
+ return error;
+ }
+ EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
+
int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
{
struct file_dedupe_range_info *info;
static const struct vm_operations_struct xfs_file_vm_ops;
-/*
- * Locking primitives for read and write IO paths to ensure we consistently use
- * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
- */
-static inline void
-xfs_rw_ilock(
- struct xfs_inode *ip,
- int type)
-{
- if (type & XFS_IOLOCK_EXCL)
- inode_lock(VFS_I(ip));
- xfs_ilock(ip, type);
-}
-
-static inline void
-xfs_rw_iunlock(
- struct xfs_inode *ip,
- int type)
-{
- xfs_iunlock(ip, type);
- if (type & XFS_IOLOCK_EXCL)
- inode_unlock(VFS_I(ip));
-}
-
-static inline void
-xfs_rw_ilock_demote(
- struct xfs_inode *ip,
- int type)
-{
- xfs_ilock_demote(ip, type);
- if (type & XFS_IOLOCK_EXCL)
- inode_unlock(VFS_I(ip));
-}
-
/*
* Clear the specified ranges to zero through either the pagecache or DAX.
* Holes and unwritten extents will be left as-is as they already are zeroed.
xfs_iflags_clear(ip, XFS_ITRUNCATED);
- if (mp->m_flags & XFS_MOUNT_BARRIER) {
- /*
- * If we have an RT and/or log subvolume we need to make sure
- * to flush the write cache the device used for file data
- * first. This is to ensure newly written file data make
- * it to disk before logging the new inode size in case of
- * an extending write.
- */
- if (XFS_IS_REALTIME_INODE(ip))
- xfs_blkdev_issue_flush(mp->m_rtdev_targp);
- else if (mp->m_logdev_targp != mp->m_ddev_targp)
- xfs_blkdev_issue_flush(mp->m_ddev_targp);
- }
+ /*
+ * If we have an RT and/or log subvolume we need to make sure to flush
+ * the write cache the device used for file data first. This is to
+ * ensure newly written file data make it to disk before logging the new
+ * inode size in case of an extending write.
+ */
+ if (XFS_IS_REALTIME_INODE(ip))
+ xfs_blkdev_issue_flush(mp->m_rtdev_targp);
+ else if (mp->m_logdev_targp != mp->m_ddev_targp)
+ xfs_blkdev_issue_flush(mp->m_ddev_targp);
/*
* All metadata updates are logged, which means that we just have to
* an already allocated file and thus do not have any metadata to
* commit.
*/
- if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
- mp->m_logdev_targp == mp->m_ddev_targp &&
- !XFS_IS_REALTIME_INODE(ip) &&
- !log_flushed)
+ if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) &&
+ mp->m_logdev_targp == mp->m_ddev_targp)
xfs_blkdev_issue_flush(mp->m_ddev_targp);
return error;
struct kiocb *iocb,
struct iov_iter *to)
{
- struct address_space *mapping = iocb->ki_filp->f_mapping;
- struct inode *inode = mapping->host;
- struct xfs_inode *ip = XFS_I(inode);
- loff_t isize = i_size_read(inode);
+ struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp));
size_t count = iov_iter_count(to);
- loff_t end = iocb->ki_pos + count - 1;
- struct iov_iter data;
- struct xfs_buftarg *target;
- ssize_t ret = 0;
+ ssize_t ret;
trace_xfs_file_direct_read(ip, count, iocb->ki_pos);
if (!count)
return 0; /* skip atime */
- if (XFS_IS_REALTIME_INODE(ip))
- target = ip->i_mount->m_rtdev_targp;
- else
- target = ip->i_mount->m_ddev_targp;
-
- /* DIO must be aligned to device logical sector size */
- if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
- if (iocb->ki_pos == isize)
- return 0;
- return -EINVAL;
- }
-
file_accessed(iocb->ki_filp);
- xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
- if (mapping->nrpages) {
- ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
- if (ret)
- goto out_unlock;
-
- /*
- * Invalidate whole pages. This can return an error if we fail
- * to invalidate a page, but this should never happen on XFS.
- * Warn if it does fail.
- */
- ret = invalidate_inode_pages2_range(mapping,
- iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
- WARN_ON_ONCE(ret);
- ret = 0;
- }
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL);
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- data = *to;
- ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
- xfs_get_blocks_direct, NULL, NULL, 0);
- if (ret >= 0) {
- iocb->ki_pos += ret;
- iov_iter_advance(to, ret);
- }
-
-out_unlock:
- xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
}
if (!count)
return 0; /* skip atime */
- xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
- ret = iomap_dax_rw(iocb, to, &xfs_iomap_ops);
- xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
file_accessed(iocb->ki_filp);
return ret;
trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos);
- xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
ret = generic_file_read_iter(iocb, to);
- xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
}
if (error <= 0)
return error;
- error = xfs_break_layouts(inode, iolock, true);
+ error = xfs_break_layouts(inode, iolock);
if (error)
return error;
- /* For changing security info in file_remove_privs() we need i_mutex */
+ /*
+ * For changing security info in file_remove_privs() we need i_rwsem
+ * exclusively.
+ */
if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) {
- xfs_rw_iunlock(ip, *iolock);
+ xfs_iunlock(ip, *iolock);
*iolock = XFS_IOLOCK_EXCL;
- xfs_rw_ilock(ip, *iolock);
+ xfs_ilock(ip, *iolock);
goto restart;
}
/*
spin_unlock(&ip->i_flags_lock);
if (!drained_dio) {
if (*iolock == XFS_IOLOCK_SHARED) {
- xfs_rw_iunlock(ip, *iolock);
+ xfs_iunlock(ip, *iolock);
*iolock = XFS_IOLOCK_EXCL;
- xfs_rw_ilock(ip, *iolock);
+ xfs_ilock(ip, *iolock);
iov_iter_reexpand(from, count);
}
/*
return 0;
}
+static int
+xfs_dio_write_end_io(
+ struct kiocb *iocb,
+ ssize_t size,
+ unsigned flags)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct xfs_inode *ip = XFS_I(inode);
+ loff_t offset = iocb->ki_pos;
+ bool update_size = false;
+ int error = 0;
+
+ trace_xfs_end_io_direct_write(ip, offset, size);
+
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ return -EIO;
+
+ if (size <= 0)
+ return size;
+
+ /*
+ * We need to update the in-core inode size here so that we don't end up
+ * with the on-disk inode size being outside the in-core inode size. We
+ * have no other method of updating EOF for AIO, so always do it here
+ * if necessary.
+ *
+ * We need to lock the test/set EOF update as we can be racing with
+ * other IO completions here to update the EOF. Failing to serialise
+ * here can result in EOF moving backwards and Bad Things Happen when
+ * that occurs.
+ */
+ spin_lock(&ip->i_flags_lock);
+ if (offset + size > i_size_read(inode)) {
+ i_size_write(inode, offset + size);
+ update_size = true;
+ }
+ spin_unlock(&ip->i_flags_lock);
+
+ if (flags & IOMAP_DIO_COW) {
+ error = xfs_reflink_end_cow(ip, offset, size);
+ if (error)
+ return error;
+ }
+
+ if (flags & IOMAP_DIO_UNWRITTEN)
+ error = xfs_iomap_write_unwritten(ip, offset, size);
+ else if (update_size)
+ error = xfs_setfilesize(ip, offset, size);
+
+ return error;
+}
+
/*
* xfs_file_dio_aio_write - handle direct IO writes
*
int unaligned_io = 0;
int iolock;
size_t count = iov_iter_count(from);
- loff_t end;
- struct iov_iter data;
- struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
+ struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
/* DIO must be aligned to device logical sector size */
iolock = XFS_IOLOCK_SHARED;
}
- xfs_rw_ilock(ip, iolock);
+ xfs_ilock(ip, iolock);
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
goto out;
count = iov_iter_count(from);
- end = iocb->ki_pos + count - 1;
-
- if (mapping->nrpages) {
- ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
- if (ret)
- goto out;
-
- /*
- * Invalidate whole pages. This can return an error if we fail
- * to invalidate a page, but this should never happen on XFS.
- * Warn if it does fail.
- */
- ret = invalidate_inode_pages2_range(mapping,
- iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
- WARN_ON_ONCE(ret);
- ret = 0;
- }
/*
* If we are doing unaligned IO, wait for all other IO to drain,
if (unaligned_io)
inode_dio_wait(inode);
else if (iolock == XFS_IOLOCK_EXCL) {
- xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+ xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
iolock = XFS_IOLOCK_SHARED;
}
goto out;
}
- data = *from;
- ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
- xfs_get_blocks_direct, xfs_end_io_direct_write,
- NULL, DIO_ASYNC_EXTEND);
-
- /* see generic_file_direct_write() for why this is necessary */
- if (mapping->nrpages) {
- invalidate_inode_pages2_range(mapping,
- iocb->ki_pos >> PAGE_SHIFT,
- end >> PAGE_SHIFT);
- }
-
- if (ret > 0) {
- iocb->ki_pos += ret;
- iov_iter_advance(from, ret);
- }
+ ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
out:
- xfs_rw_iunlock(ip, iolock);
+ xfs_iunlock(ip, iolock);
/*
* No fallback to buffered IO on errors for XFS, direct IO will either
size_t count;
loff_t pos;
- xfs_rw_ilock(ip, iolock);
+ xfs_ilock(ip, iolock);
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
goto out;
count = iov_iter_count(from);
trace_xfs_file_dax_write(ip, count, pos);
-
- ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops);
+ ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops);
if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
i_size_write(inode, iocb->ki_pos);
error = xfs_setfilesize(ip, pos, ret);
}
-
out:
- xfs_rw_iunlock(ip, iolock);
+ xfs_iunlock(ip, iolock);
return error ? error : ret;
}
int enospc = 0;
int iolock = XFS_IOLOCK_EXCL;
- xfs_rw_ilock(ip, iolock);
+ xfs_ilock(ip, iolock);
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
current->backing_dev_info = NULL;
out:
- xfs_rw_iunlock(ip, iolock);
+ xfs_iunlock(ip, iolock);
return ret;
}
return -EOPNOTSUPP;
xfs_ilock(ip, iolock);
- error = xfs_break_layouts(inode, &iolock, false);
+ error = xfs_break_layouts(inode, &iolock);
if (error)
goto out_unlock;
return error;
}
- STATIC ssize_t
- xfs_file_copy_range(
- struct file *file_in,
- loff_t pos_in,
- struct file *file_out,
- loff_t pos_out,
- size_t len,
- unsigned int flags)
- {
- int error;
-
- error = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
- len, false);
- if (error)
- return error;
- return len;
- }
-
STATIC int
xfs_file_clone_range(
struct file *file_in,
len, false);
}
-#define XFS_MAX_DEDUPE_LEN (16 * 1024 * 1024)
STATIC ssize_t
xfs_file_dedupe_range(
struct file *src_file,
{
int error;
- /*
- * Limit the total length we will dedupe for each operation.
- * This is intended to bound the total time spent in this
- * ioctl to something sane.
- */
- if (len > XFS_MAX_DEDUPE_LEN)
- len = XFS_MAX_DEDUPE_LEN;
-
error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
len, true);
if (error)
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (IS_DAX(inode)) {
- ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
+ ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
} else {
ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
ret = block_page_mkwrite_return(ret);
return xfs_filemap_page_mkwrite(vma, vmf);
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
- if (IS_DAX(inode)) {
- /*
- * we do not want to trigger unwritten extent conversion on read
- * faults - that is unnecessary overhead and would also require
- * changes to xfs_get_blocks_direct() to map unwritten extent
- * ioend for conversion on read-only mappings.
- */
- ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
- } else
+ if (IS_DAX(inode))
+ ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
+ else
ret = filemap_fault(vma, vmf);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
}
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
- ret = dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault);
+ ret = dax_iomap_pmd_fault(vma, addr, pmd, flags, &xfs_iomap_ops);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (flags & FAULT_FLAG_WRITE)
.fsync = xfs_file_fsync,
.get_unmapped_area = thp_get_unmapped_area,
.fallocate = xfs_file_fallocate,
- .copy_file_range = xfs_file_copy_range,
.clone_file_range = xfs_file_clone_range,
.dedupe_file_range = xfs_file_dedupe_range,
};
struct xfs_bmbt_irec *imap,
bool *shared)
{
- struct xfs_bmbt_irec got, prev;
- xfs_fileoff_t end_fsb, orig_end_fsb;
- int eof = 0, error = 0;
- bool trimmed;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ struct xfs_bmbt_irec got;
+ int error = 0;
+ bool eof = false, trimmed;
xfs_extnum_t idx;
- xfs_extlen_t align;
/*
* Search the COW fork extent list first. This serves two purposes:
* extent list is generally faster than going out to the shared extent
* tree.
*/
- xfs_bmap_search_extents(ip, imap->br_startoff, XFS_COW_FORK, &eof, &idx,
- &got, &prev);
+
+ if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &idx, &got))
+ eof = true;
if (!eof && got.br_startoff <= imap->br_startoff) {
trace_xfs_reflink_cow_found(ip, imap);
xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
if (error)
return error;
- end_fsb = orig_end_fsb = imap->br_startoff + imap->br_blockcount;
-
- align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip));
- if (align)
- end_fsb = roundup_64(end_fsb, align);
-
-retry:
error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
- end_fsb - imap->br_startoff, &got, &prev, &idx, eof);
- switch (error) {
- case 0:
- break;
- case -ENOSPC:
- case -EDQUOT:
- /* retry without any preallocation */
+ imap->br_blockcount, 0, &got, &idx, eof);
+ if (error == -ENOSPC || error == -EDQUOT)
trace_xfs_reflink_cow_enospc(ip, imap);
- if (end_fsb != orig_end_fsb) {
- end_fsb = orig_end_fsb;
- goto retry;
- }
- /*FALLTHRU*/
- default:
+ if (error)
return error;
- }
-
- if (end_fsb != orig_end_fsb)
- xfs_inode_set_cowblocks_tag(ip);
trace_xfs_reflink_cow_alloc(ip, &got);
return 0;
}
/*
- * Find the CoW reservation (and whether or not it needs block allocation)
- * for a given byte offset of a file.
+ * Find the CoW reservation for a given byte offset of a file.
*/
bool
xfs_reflink_find_cow_mapping(
struct xfs_inode *ip,
xfs_off_t offset,
- struct xfs_bmbt_irec *imap,
- bool *need_alloc)
+ struct xfs_bmbt_irec *imap)
{
- struct xfs_bmbt_irec irec;
- struct xfs_ifork *ifp;
- struct xfs_bmbt_rec_host *gotp;
- xfs_fileoff_t bno;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ xfs_fileoff_t offset_fsb;
+ struct xfs_bmbt_irec got;
xfs_extnum_t idx;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
ASSERT(xfs_is_reflink_inode(ip));
- /* Find the extent in the CoW fork. */
- ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
- bno = XFS_B_TO_FSBT(ip->i_mount, offset);
- gotp = xfs_iext_bno_to_ext(ifp, bno, &idx);
- if (!gotp)
+ offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
+ if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got))
return false;
-
- xfs_bmbt_get_all(gotp, &irec);
- if (bno >= irec.br_startoff + irec.br_blockcount ||
- bno < irec.br_startoff)
+ if (got.br_startoff > offset_fsb)
return false;
trace_xfs_reflink_find_cow_mapping(ip, offset, 1, XFS_IO_OVERWRITE,
- &irec);
-
- /* If it's still delalloc, we must allocate later. */
- *imap = irec;
- *need_alloc = !!(isnullstartblock(irec.br_startblock));
-
+ &got);
+ *imap = got;
return true;
}
/*
* Trim an extent to end at the next CoW reservation past offset_fsb.
*/
-int
+void
xfs_reflink_trim_irec_to_next_cow(
struct xfs_inode *ip,
xfs_fileoff_t offset_fsb,
struct xfs_bmbt_irec *imap)
{
- struct xfs_bmbt_irec irec;
- struct xfs_ifork *ifp;
- struct xfs_bmbt_rec_host *gotp;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ struct xfs_bmbt_irec got;
xfs_extnum_t idx;
if (!xfs_is_reflink_inode(ip))
- return 0;
+ return;
/* Find the extent in the CoW fork. */
- ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
- gotp = xfs_iext_bno_to_ext(ifp, offset_fsb, &idx);
- if (!gotp)
- return 0;
- xfs_bmbt_get_all(gotp, &irec);
+ if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got))
+ return;
/* This is the extent before; try sliding up one. */
- if (irec.br_startoff < offset_fsb) {
- idx++;
- if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
- return 0;
- gotp = xfs_iext_get_ext(ifp, idx);
- xfs_bmbt_get_all(gotp, &irec);
+ if (got.br_startoff < offset_fsb) {
+ if (!xfs_iext_get_extent(ifp, idx + 1, &got))
+ return;
}
- if (irec.br_startoff >= imap->br_startoff + imap->br_blockcount)
- return 0;
+ if (got.br_startoff >= imap->br_startoff + imap->br_blockcount)
+ return;
- imap->br_blockcount = irec.br_startoff - imap->br_startoff;
+ imap->br_blockcount = got.br_startoff - imap->br_startoff;
trace_xfs_reflink_trim_irec(ip, imap);
-
- return 0;
}
/*
xfs_fileoff_t end_fsb)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
- struct xfs_bmbt_irec got, prev, del;
+ struct xfs_bmbt_irec got, del;
xfs_extnum_t idx;
xfs_fsblock_t firstfsb;
struct xfs_defer_ops dfops;
- int error = 0, eof = 0;
+ int error = 0;
if (!xfs_is_reflink_inode(ip))
return 0;
-
- xfs_bmap_search_extents(ip, offset_fsb, XFS_COW_FORK, &eof, &idx,
- &got, &prev);
- if (eof)
+ if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got))
return 0;
while (got.br_startoff < end_fsb) {
xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
}
- if (++idx >= ifp->if_bytes / sizeof(struct xfs_bmbt_rec))
+ if (!xfs_iext_get_extent(ifp, ++idx, &got))
break;
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
}
/* clear tag if cow fork is emptied */
xfs_off_t count)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
- struct xfs_bmbt_irec got, prev, del;
+ struct xfs_bmbt_irec got, del;
struct xfs_trans *tp;
xfs_fileoff_t offset_fsb;
xfs_fileoff_t end_fsb;
xfs_fsblock_t firstfsb;
struct xfs_defer_ops dfops;
- int error, eof = 0;
+ int error;
unsigned int resblks;
xfs_filblks_t rlen;
xfs_extnum_t idx;
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
- xfs_bmap_search_extents(ip, end_fsb - 1, XFS_COW_FORK, &eof, &idx,
- &got, &prev);
-
/* If there is a hole at end_fsb - 1 go to the previous extent */
- if (eof || got.br_startoff > end_fsb) {
+ if (!xfs_iext_lookup_extent(ip, ifp, end_fsb - 1, &idx, &got) ||
+ got.br_startoff > end_fsb) {
ASSERT(idx > 0);
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, --idx), &got);
+ xfs_iext_get_extent(ifp, --idx, &got);
}
/* Walk backwards until we're out of the I/O range... */
error = xfs_defer_finish(&tp, &dfops, ip);
if (error)
goto out_defer;
-
next_extent:
- if (idx < 0)
+ if (!xfs_iext_get_extent(ifp, idx, &got))
break;
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
}
error = xfs_trans_commit(tp);
return error;
}
- /*
- * Read a page's worth of file data into the page cache. Return the page
- * locked.
- */
- static struct page *
- xfs_get_page(
- struct inode *inode,
- xfs_off_t offset)
- {
- struct address_space *mapping;
- struct page *page;
- pgoff_t n;
-
- n = offset >> PAGE_SHIFT;
- mapping = inode->i_mapping;
- page = read_mapping_page(mapping, n, NULL);
- if (IS_ERR(page))
- return page;
- if (!PageUptodate(page)) {
- put_page(page);
- return ERR_PTR(-EIO);
- }
- lock_page(page);
- return page;
- }
-
- /*
- * Compare extents of two files to see if they are the same.
- */
- static int
- xfs_compare_extents(
- struct inode *src,
- xfs_off_t srcoff,
- struct inode *dest,
- xfs_off_t destoff,
- xfs_off_t len,
- bool *is_same)
- {
- xfs_off_t src_poff;
- xfs_off_t dest_poff;
- void *src_addr;
- void *dest_addr;
- struct page *src_page;
- struct page *dest_page;
- xfs_off_t cmp_len;
- bool same;
- int error;
-
- error = -EINVAL;
- same = true;
- while (len) {
- src_poff = srcoff & (PAGE_SIZE - 1);
- dest_poff = destoff & (PAGE_SIZE - 1);
- cmp_len = min(PAGE_SIZE - src_poff,
- PAGE_SIZE - dest_poff);
- cmp_len = min(cmp_len, len);
- ASSERT(cmp_len > 0);
-
- trace_xfs_reflink_compare_extents(XFS_I(src), srcoff, cmp_len,
- XFS_I(dest), destoff);
-
- src_page = xfs_get_page(src, srcoff);
- if (IS_ERR(src_page)) {
- error = PTR_ERR(src_page);
- goto out_error;
- }
- dest_page = xfs_get_page(dest, destoff);
- if (IS_ERR(dest_page)) {
- error = PTR_ERR(dest_page);
- unlock_page(src_page);
- put_page(src_page);
- goto out_error;
- }
- src_addr = kmap_atomic(src_page);
- dest_addr = kmap_atomic(dest_page);
-
- flush_dcache_page(src_page);
- flush_dcache_page(dest_page);
-
- if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
- same = false;
-
- kunmap_atomic(dest_addr);
- kunmap_atomic(src_addr);
- unlock_page(dest_page);
- unlock_page(src_page);
- put_page(dest_page);
- put_page(src_page);
-
- if (!same)
- break;
-
- srcoff += cmp_len;
- destoff += cmp_len;
- len -= cmp_len;
- }
-
- *is_same = same;
- return 0;
-
- out_error:
- trace_xfs_reflink_compare_extents_error(XFS_I(dest), error, _RET_IP_);
- return error;
- }
-
/*
* Link a range of blocks from one file to another.
*/
struct inode *inode_out = file_inode(file_out);
struct xfs_inode *dest = XFS_I(inode_out);
struct xfs_mount *mp = src->i_mount;
- loff_t bs = inode_out->i_sb->s_blocksize;
bool same_inode = (inode_in == inode_out);
xfs_fileoff_t sfsbno, dfsbno;
xfs_filblks_t fsblen;
xfs_extlen_t cowextsize;
- loff_t isize;
ssize_t ret;
- loff_t blen;
if (!xfs_sb_version_hasreflink(&mp->m_sb))
return -EOPNOTSUPP;
return -EIO;
/* Lock both files against IO */
- if (same_inode) {
- xfs_ilock(src, XFS_IOLOCK_EXCL);
+ lock_two_nondirectories(inode_in, inode_out);
+ if (same_inode)
xfs_ilock(src, XFS_MMAPLOCK_EXCL);
- } else {
- xfs_lock_two_inodes(src, dest, XFS_IOLOCK_EXCL);
+ else
xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
- }
- /* Don't touch certain kinds of inodes */
- ret = -EPERM;
- if (IS_IMMUTABLE(inode_out))
- goto out_unlock;
-
- ret = -ETXTBSY;
- if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
- goto out_unlock;
-
-
- /* Don't reflink dirs, pipes, sockets... */
- ret = -EISDIR;
- if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
- goto out_unlock;
+ /* Check file eligibility and prepare for block sharing. */
ret = -EINVAL;
- if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode))
- goto out_unlock;
- if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
- goto out_unlock;
-
/* Don't reflink realtime inodes */
if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
goto out_unlock;
if (IS_DAX(inode_in) || IS_DAX(inode_out))
goto out_unlock;
- /* Are we going all the way to the end? */
- isize = i_size_read(inode_in);
- if (isize == 0) {
- ret = 0;
- goto out_unlock;
- }
-
- /* Zero length dedupe exits immediately; reflink goes to EOF. */
- if (len == 0) {
- if (is_dedupe) {
- ret = 0;
- goto out_unlock;
- }
- len = isize - pos_in;
- }
-
- /* Ensure offsets don't wrap and the input is inside i_size */
- if (pos_in + len < pos_in || pos_out + len < pos_out ||
- pos_in + len > isize)
- goto out_unlock;
-
- /* Don't allow dedupe past EOF in the dest file */
- if (is_dedupe) {
- loff_t disize;
-
- disize = i_size_read(inode_out);
- if (pos_out >= disize || pos_out + len > disize)
- goto out_unlock;
- }
-
- /* If we're linking to EOF, continue to the block boundary. */
- if (pos_in + len == isize)
- blen = ALIGN(isize, bs) - pos_in;
- else
- blen = len;
-
- /* Only reflink if we're aligned to block boundaries */
- if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
- !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
- goto out_unlock;
-
- /* Don't allow overlapped reflink within the same file */
- if (same_inode) {
- if (pos_out + blen > pos_in && pos_out < pos_in + blen)
- goto out_unlock;
- }
-
- /* Wait for the completion of any pending IOs on both files */
- inode_dio_wait(inode_in);
- if (!same_inode)
- inode_dio_wait(inode_out);
-
- ret = filemap_write_and_wait_range(inode_in->i_mapping,
- pos_in, pos_in + len - 1);
- if (ret)
- goto out_unlock;
-
- ret = filemap_write_and_wait_range(inode_out->i_mapping,
- pos_out, pos_out + len - 1);
- if (ret)
+ ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
+ &len, is_dedupe);
+ if (ret || len == 0)
goto out_unlock;
trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
- /*
- * Check that the extents are the same.
- */
- if (is_dedupe) {
- bool is_same = false;
-
- ret = xfs_compare_extents(inode_in, pos_in, inode_out, pos_out,
- len, &is_same);
- if (ret)
- goto out_unlock;
- if (!is_same) {
- ret = -EBADE;
- goto out_unlock;
- }
- }
-
+ /* Set flags and remap blocks. */
ret = xfs_reflink_set_inode_flag(src, dest);
if (ret)
goto out_unlock;
- /*
- * Invalidate the page cache so that we can clear any CoW mappings
- * in the destination file.
- */
- truncate_inode_pages_range(&inode_out->i_data, pos_out,
- PAGE_ALIGN(pos_out + len) - 1);
-
dfsbno = XFS_B_TO_FSBT(mp, pos_out);
sfsbno = XFS_B_TO_FSBT(mp, pos_in);
fsblen = XFS_B_TO_FSB(mp, len);
if (ret)
goto out_unlock;
+ /* Zap any page cache for the destination file's range. */
+ truncate_inode_pages_range(&inode_out->i_data, pos_out,
+ PAGE_ALIGN(pos_out + len) - 1);
+
/*
* Carry the cowextsize hint from src to dest if we're sharing the
* entire source file to the entire destination file, the source file
out_unlock:
xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
- xfs_iunlock(src, XFS_IOLOCK_EXCL);
- if (src->i_ino != dest->i_ino) {
+ if (!same_inode)
xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
- xfs_iunlock(dest, XFS_IOLOCK_EXCL);
- }
+ unlock_two_nondirectories(inode_in, inode_out);
if (ret)
trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
return ret;
trace_xfs_reflink_unshare_error(ip, error, _RET_IP_);
return error;
}
-
-/*
- * Does this inode have any real CoW reservations?
- */
-bool
-xfs_reflink_has_real_cow_blocks(
- struct xfs_inode *ip)
-{
- struct xfs_bmbt_irec irec;
- struct xfs_ifork *ifp;
- struct xfs_bmbt_rec_host *gotp;
- xfs_extnum_t idx;
-
- if (!xfs_is_reflink_inode(ip))
- return false;
-
- /* Go find the old extent in the CoW fork. */
- ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
- gotp = xfs_iext_bno_to_ext(ifp, 0, &idx);
- while (gotp) {
- xfs_bmbt_get_all(gotp, &irec);
-
- if (!isnullstartblock(irec.br_startblock))
- return true;
-
- /* Roll on... */
- idx++;
- if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
- break;
- gotp = xfs_iext_get_ext(ifp, idx);
- }
-
- return false;
-}
#include <linux/uidgid.h>
#include <linux/lockdep.h>
#include <linux/percpu-rwsem.h>
-#include <linux/blk_types.h>
#include <linux/workqueue.h>
#include <linux/percpu-rwsem.h>
#include <linux/delayed_call.h>
struct backing_dev_info;
struct bdi_writeback;
+struct bio;
struct export_operations;
struct hd_geometry;
struct iovec;
*/
#define CHECK_IOVEC_ONLY -1
-/*
- * The below are the various read and write flags that we support. Some of
- * them include behavioral modifiers that send information down to the
- * block layer and IO scheduler. They should be used along with a req_op.
- * Terminology:
- *
- * The block layer uses device plugging to defer IO a little bit, in
- * the hope that we will see more IO very shortly. This increases
- * coalescing of adjacent IO and thus reduces the number of IOs we
- * have to send to the device. It also allows for better queuing,
- * if the IO isn't mergeable. If the caller is going to be waiting
- * for the IO, then he must ensure that the device is unplugged so
- * that the IO is dispatched to the driver.
- *
- * All IO is handled async in Linux. This is fine for background
- * writes, but for reads or writes that someone waits for completion
- * on, we want to notify the block layer and IO scheduler so that they
- * know about it. That allows them to make better scheduling
- * decisions. So when the below references 'sync' and 'async', it
- * is referencing this priority hint.
- *
- * With that in mind, the available types are:
- *
- * READ A normal read operation. Device will be plugged.
- * READ_SYNC A synchronous read. Device is not plugged, caller can
- * immediately wait on this read without caring about
- * unplugging.
- * WRITE A normal async write. Device will be plugged.
- * WRITE_SYNC Synchronous write. Identical to WRITE, but passes down
- * the hint that someone will be waiting on this IO
- * shortly. The write equivalent of READ_SYNC.
- * WRITE_ODIRECT Special case write for O_DIRECT only.
- * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush.
- * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on
- * non-volatile media on completion.
- * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded
- * by a cache flush and data is guaranteed to be on
- * non-volatile media on completion.
- *
- */
-#define RW_MASK REQ_OP_WRITE
-
-#define READ REQ_OP_READ
-#define WRITE REQ_OP_WRITE
-
-#define READ_SYNC REQ_SYNC
-#define WRITE_SYNC (REQ_SYNC | REQ_NOIDLE)
-#define WRITE_ODIRECT REQ_SYNC
-#define WRITE_FLUSH (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH)
-#define WRITE_FUA (REQ_SYNC | REQ_NOIDLE | REQ_FUA)
-#define WRITE_FLUSH_FUA (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA)
-
/*
* Attribute flags. These should be or-ed together to figure out what
* has been changed!
unsigned long, loff_t *, int);
extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
loff_t, size_t, unsigned int);
+ extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
+ struct inode *inode_out, loff_t pos_out,
+ u64 *len, bool is_dedupe);
extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, u64 len);
+ extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
+ struct inode *dest, loff_t destoff,
+ loff_t len, bool *is_same);
extern int vfs_dedupe_file_range(struct file *file,
struct file_dedupe_range *same);
+static inline int do_clone_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ u64 len)
+{
+ int ret;
+
+ sb_start_write(file_inode(file_out)->i_sb);
+ ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len);
+ sb_end_write(file_inode(file_out)->i_sb);
+
+ return ret;
+}
+
struct super_operations {
struct inode *(*alloc_inode)(struct super_block *sb);
void (*destroy_inode)(struct inode *);
extern int may_umount(struct vfsmount *);
extern long do_mount(const char *, const char __user *,
const char *, unsigned long, void *);
- extern struct vfsmount *collect_mounts(struct path *);
+ extern struct vfsmount *collect_mounts(const struct path *);
extern void drop_collected_mounts(struct vfsmount *);
extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
struct vfsmount *);
- extern int vfs_statfs(struct path *, struct kstatfs *);
+ extern int vfs_statfs(const struct path *, struct kstatfs *);
extern int user_statfs(const char __user *, struct kstatfs *);
extern int fd_statfs(int, struct kstatfs *);
extern int vfs_ustat(dev_t, struct kstatfs *);
extern bool is_bad_inode(struct inode *);
#ifdef CONFIG_BLOCK
-static inline bool op_is_write(unsigned int op)
-{
- return op == REQ_OP_READ ? false : true;
-}
-
-/*
- * return data direction, READ or WRITE
- */
-static inline int bio_data_dir(struct bio *bio)
-{
- return op_is_write(bio_op(bio)) ? WRITE : READ;
-}
-
extern void check_disk_size_change(struct gendisk *disk,
struct block_device *bdev);
extern int revalidate_disk(struct gendisk *);
/* fs/dcache.c -- generic fs support functions */
extern bool is_subdir(struct dentry *, struct dentry *);
- extern bool path_is_under(struct path *, struct path *);
+ extern bool path_is_under(const struct path *, const struct path *);
extern char *file_path(struct file *, char *, int);
extern void inode_sb_list_add(struct inode *inode);
#ifdef CONFIG_BLOCK
-extern blk_qc_t submit_bio(struct bio *);
extern int bdev_read_only(struct block_device *);
#endif
extern int set_blocksize(struct block_device *, int);
* When set to zero, this means unlimited. */
static u32 audit_backlog_limit = 64;
#define AUDIT_BACKLOG_WAIT_TIME (60 * HZ)
-static u32 audit_backlog_wait_time_master = AUDIT_BACKLOG_WAIT_TIME;
static u32 audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME;
/* The identity of the user shutting down the audit system. */
/* The netlink socket. */
static struct sock *audit_sock;
-static int audit_net_id;
+static unsigned int audit_net_id;
/* Hash for inode-based rules */
struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
static int audit_freelist_count;
static LIST_HEAD(audit_freelist);
-static struct sk_buff_head audit_skb_queue;
-/* queue of skbs to send to auditd when/if it comes back */
-static struct sk_buff_head audit_skb_hold_queue;
+/* queue msgs to send via kauditd_task */
+static struct sk_buff_head audit_queue;
+/* queue msgs due to temporary unicast send problems */
+static struct sk_buff_head audit_retry_queue;
+/* queue msgs waiting for new auditd connection */
+static struct sk_buff_head audit_hold_queue;
+
+/* queue servicing thread */
static struct task_struct *kauditd_task;
static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
+
+/* waitqueue for callers who are blocked on the audit backlog */
static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);
static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION,
static int audit_set_backlog_wait_time(u32 timeout)
{
return audit_do_config_change("audit_backlog_wait_time",
- &audit_backlog_wait_time_master, timeout);
+ &audit_backlog_wait_time, timeout);
}
static int audit_set_enabled(u32 state)
return audit_do_config_change("audit_failure", &audit_failure, state);
}
-/*
- * Queue skbs to be sent to auditd when/if it comes back. These skbs should
- * already have been sent via prink/syslog and so if these messages are dropped
- * it is not a huge concern since we already passed the audit_log_lost()
- * notification and stuff. This is just nice to get audit messages during
- * boot before auditd is running or messages generated while auditd is stopped.
- * This only holds messages is audit_default is set, aka booting with audit=1
- * or building your kernel that way.
- */
-static void audit_hold_skb(struct sk_buff *skb)
-{
- if (audit_default &&
- (!audit_backlog_limit ||
- skb_queue_len(&audit_skb_hold_queue) < audit_backlog_limit))
- skb_queue_tail(&audit_skb_hold_queue, skb);
- else
- kfree_skb(skb);
-}
-
/*
* For one reason or another this nlh isn't getting delivered to the userspace
* audit daemon, just send it to printk.
*/
-static void audit_printk_skb(struct sk_buff *skb)
+static void kauditd_printk_skb(struct sk_buff *skb)
{
struct nlmsghdr *nlh = nlmsg_hdr(skb);
char *data = nlmsg_data(nlh);
else
audit_log_lost("printk limit exceeded");
}
+}
+
+/**
+ * kauditd_hold_skb - Queue an audit record, waiting for auditd
+ * @skb: audit record
+ *
+ * Description:
+ * Queue the audit record, waiting for an instance of auditd. When this
+ * function is called we haven't given up yet on sending the record, but things
+ * are not looking good. The first thing we want to do is try to write the
+ * record via printk and then see if we want to try and hold on to the record
+ * and queue it, if we have room. If we want to hold on to the record, but we
+ * don't have room, record a record lost message.
+ */
+static void kauditd_hold_skb(struct sk_buff *skb)
+{
+ /* at this point it is uncertain if we will ever send this to auditd so
+ * try to send the message via printk before we go any further */
+ kauditd_printk_skb(skb);
+
+ /* can we just silently drop the message? */
+ if (!audit_default) {
+ kfree_skb(skb);
+ return;
+ }
+
+ /* if we have room, queue the message */
+ if (!audit_backlog_limit ||
+ skb_queue_len(&audit_hold_queue) < audit_backlog_limit) {
+ skb_queue_tail(&audit_hold_queue, skb);
+ return;
+ }
- audit_hold_skb(skb);
+ /* we have no other options - drop the message */
+ audit_log_lost("kauditd hold queue overflow");
+ kfree_skb(skb);
}
-static void kauditd_send_skb(struct sk_buff *skb)
+/**
+ * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd
+ * @skb: audit record
+ *
+ * Description:
+ * Not as serious as kauditd_hold_skb() as we still have a connected auditd,
+ * but for some reason we are having problems sending it audit records so
+ * queue the given record and attempt to resend.
+ */
+static void kauditd_retry_skb(struct sk_buff *skb)
{
- int err;
- int attempts = 0;
-#define AUDITD_RETRIES 5
+ /* NOTE: because records should only live in the retry queue for a
+ * short period of time, before either being sent or moved to the hold
+ * queue, we don't currently enforce a limit on this queue */
+ skb_queue_tail(&audit_retry_queue, skb);
+}
+
+/**
+ * auditd_reset - Disconnect the auditd connection
+ *
+ * Description:
+ * Break the auditd/kauditd connection and move all the records in the retry
+ * queue into the hold queue in case auditd reconnects. The audit_cmd_mutex
+ * must be held when calling this function.
+ */
+static void auditd_reset(void)
+{
+ struct sk_buff *skb;
+
+ /* break the connection */
+ if (audit_sock) {
+ sock_put(audit_sock);
+ audit_sock = NULL;
+ }
+ audit_pid = 0;
+ audit_nlk_portid = 0;
+
+ /* flush all of the retry queue to the hold queue */
+ while ((skb = skb_dequeue(&audit_retry_queue)))
+ kauditd_hold_skb(skb);
+}
+
+/**
+ * kauditd_send_unicast_skb - Send a record via unicast to auditd
+ * @skb: audit record
+ */
+static int kauditd_send_unicast_skb(struct sk_buff *skb)
+{
+ int rc;
-restart:
- /* take a reference in case we can't send it and we want to hold it */
+ /* if we know nothing is connected, don't even try the netlink call */
+ if (!audit_pid)
+ return -ECONNREFUSED;
+
+ /* get an extra skb reference in case we fail to send */
skb_get(skb);
- err = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
- if (err < 0) {
- pr_err("netlink_unicast sending to audit_pid=%d returned error: %d\n",
- audit_pid, err);
- if (audit_pid) {
- if (err == -ECONNREFUSED || err == -EPERM
- || ++attempts >= AUDITD_RETRIES) {
- char s[32];
-
- snprintf(s, sizeof(s), "audit_pid=%d reset", audit_pid);
- audit_log_lost(s);
- audit_pid = 0;
- audit_sock = NULL;
- } else {
- pr_warn("re-scheduling(#%d) write to audit_pid=%d\n",
- attempts, audit_pid);
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
- goto restart;
- }
- }
- /* we might get lucky and get this in the next auditd */
- audit_hold_skb(skb);
- } else
- /* drop the extra reference if sent ok */
+ rc = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
+ if (rc >= 0) {
consume_skb(skb);
+ rc = 0;
+ }
+
+ return rc;
}
/*
- * kauditd_send_multicast_skb - send the skb to multicast userspace listeners
+ * kauditd_send_multicast_skb - Send a record to any multicast listeners
+ * @skb: audit record
*
+ * Description:
* This function doesn't consume an skb as might be expected since it has to
* copy it anyways.
*/
-static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask)
+static void kauditd_send_multicast_skb(struct sk_buff *skb)
{
- struct sk_buff *copy;
- struct audit_net *aunet = net_generic(&init_net, audit_net_id);
- struct sock *sock = aunet->nlsk;
+ struct sk_buff *copy;
+ struct audit_net *aunet = net_generic(&init_net, audit_net_id);
+ struct sock *sock = aunet->nlsk;
+ struct nlmsghdr *nlh;
if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG))
return;
* no reason for new multicast clients to continue with this
* non-compliance.
*/
- copy = skb_copy(skb, gfp_mask);
+ copy = skb_copy(skb, GFP_KERNEL);
if (!copy)
return;
+ nlh = nlmsg_hdr(copy);
+ nlh->nlmsg_len = skb->len;
- nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, gfp_mask);
+ nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL);
}
-/*
- * flush_hold_queue - empty the hold queue if auditd appears
- *
- * If auditd just started, drain the queue of messages already
- * sent to syslog/printk. Remember loss here is ok. We already
- * called audit_log_lost() if it didn't go out normally. so the
- * race between the skb_dequeue and the next check for audit_pid
- * doesn't matter.
+/**
+ * kauditd_wake_condition - Return true when it is time to wake kauditd_thread
*
- * If you ever find kauditd to be too slow we can get a perf win
- * by doing our own locking and keeping better track if there
- * are messages in this queue. I don't see the need now, but
- * in 5 years when I want to play with this again I'll see this
- * note and still have no friggin idea what i'm thinking today.
+ * Description:
+ * This function is for use by the wait_event_freezable() call in
+ * kauditd_thread().
*/
-static void flush_hold_queue(void)
+static int kauditd_wake_condition(void)
{
- struct sk_buff *skb;
-
- if (!audit_default || !audit_pid)
- return;
-
- skb = skb_dequeue(&audit_skb_hold_queue);
- if (likely(!skb))
- return;
+ static int pid_last = 0;
+ int rc;
+ int pid = audit_pid;
- while (skb && audit_pid) {
- kauditd_send_skb(skb);
- skb = skb_dequeue(&audit_skb_hold_queue);
- }
+ /* wake on new messages or a change in the connected auditd */
+ rc = skb_queue_len(&audit_queue) || (pid && pid != pid_last);
+ if (rc)
+ pid_last = pid;
- /*
- * if auditd just disappeared but we
- * dequeued an skb we need to drop ref
- */
- consume_skb(skb);
+ return rc;
}
static int kauditd_thread(void *dummy)
{
+ int rc;
+ int auditd = 0;
+ int reschedule = 0;
+ struct sk_buff *skb;
+ struct nlmsghdr *nlh;
+
+#define UNICAST_RETRIES 5
+#define AUDITD_BAD(x,y) \
+ ((x) == -ECONNREFUSED || (x) == -EPERM || ++(y) >= UNICAST_RETRIES)
+
+ /* NOTE: we do invalidate the auditd connection flag on any sending
+ * errors, but we only "restore" the connection flag at specific places
+ * in the loop in order to help ensure proper ordering of audit
+ * records */
+
set_freezable();
while (!kthread_should_stop()) {
- struct sk_buff *skb;
-
- flush_hold_queue();
+ /* NOTE: possible area for future improvement is to look at
+ * the hold and retry queues, since only this thread
+ * has access to these queues we might be able to do
+ * our own queuing and skip some/all of the locking */
+
+ /* NOTE: it might be a fun experiment to split the hold and
+ * retry queue handling to another thread, but the
+ * synchronization issues and other overhead might kill
+ * any performance gains */
+
+ /* attempt to flush the hold queue */
+ while (auditd && (skb = skb_dequeue(&audit_hold_queue))) {
+ rc = kauditd_send_unicast_skb(skb);
+ if (rc) {
+ /* requeue to the same spot */
+ skb_queue_head(&audit_hold_queue, skb);
+
+ auditd = 0;
+ if (AUDITD_BAD(rc, reschedule)) {
+ mutex_lock(&audit_cmd_mutex);
+ auditd_reset();
+ mutex_unlock(&audit_cmd_mutex);
+ reschedule = 0;
+ }
+ } else
+ /* we were able to send successfully */
+ reschedule = 0;
+ }
- skb = skb_dequeue(&audit_skb_queue);
+ /* attempt to flush the retry queue */
+ while (auditd && (skb = skb_dequeue(&audit_retry_queue))) {
+ rc = kauditd_send_unicast_skb(skb);
+ if (rc) {
+ auditd = 0;
+ if (AUDITD_BAD(rc, reschedule)) {
+ kauditd_hold_skb(skb);
+ mutex_lock(&audit_cmd_mutex);
+ auditd_reset();
+ mutex_unlock(&audit_cmd_mutex);
+ reschedule = 0;
+ } else
+ /* temporary problem (we hope), queue
+ * to the same spot and retry */
+ skb_queue_head(&audit_retry_queue, skb);
+ } else
+ /* we were able to send successfully */
+ reschedule = 0;
+ }
+ /* standard queue processing, try to be as quick as possible */
+quick_loop:
+ skb = skb_dequeue(&audit_queue);
if (skb) {
- if (!audit_backlog_limit ||
- (skb_queue_len(&audit_skb_queue) <= audit_backlog_limit))
- wake_up(&audit_backlog_wait);
- if (audit_pid)
- kauditd_send_skb(skb);
+ /* setup the netlink header, see the comments in
+ * kauditd_send_multicast_skb() for length quirks */
+ nlh = nlmsg_hdr(skb);
+ nlh->nlmsg_len = skb->len - NLMSG_HDRLEN;
+
+ /* attempt to send to any multicast listeners */
+ kauditd_send_multicast_skb(skb);
+
+ /* attempt to send to auditd, queue on failure */
+ if (auditd) {
+ rc = kauditd_send_unicast_skb(skb);
+ if (rc) {
+ auditd = 0;
+ if (AUDITD_BAD(rc, reschedule)) {
+ mutex_lock(&audit_cmd_mutex);
+ auditd_reset();
+ mutex_unlock(&audit_cmd_mutex);
+ reschedule = 0;
+ }
+
+ /* move to the retry queue */
+ kauditd_retry_skb(skb);
+ } else
+ /* everything is working so go fast! */
+ goto quick_loop;
+ } else if (reschedule)
+ /* we are currently having problems, move to
+ * the retry queue */
+ kauditd_retry_skb(skb);
else
- audit_printk_skb(skb);
- continue;
- }
+ /* dump the message via printk and hold it */
+ kauditd_hold_skb(skb);
+ } else {
+ /* we have flushed the backlog so wake everyone */
+ wake_up(&audit_backlog_wait);
+
+ /* if everything is okay with auditd (if present), go
+ * to sleep until there is something new in the queue
+ * or we have a change in the connected auditd;
+ * otherwise simply reschedule to give things a chance
+ * to recover */
+ if (reschedule) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ } else
+ wait_event_freezable(kauditd_wait,
+ kauditd_wake_condition());
- wait_event_freezable(kauditd_wait, skb_queue_len(&audit_skb_queue));
+ /* update the auditd connection status */
+ auditd = (audit_pid ? 1 : 0);
+ }
}
+
return 0;
}
kfree(reply);
return 0;
}
+
/**
* audit_send_reply - send an audit reply message via netlink
* @request_skb: skb of request we are replying to (used to target the reply)
if (err)
return err;
- /* As soon as there's any sign of userspace auditd,
- * start kauditd to talk to it */
- if (!kauditd_task) {
- kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd");
- if (IS_ERR(kauditd_task)) {
- err = PTR_ERR(kauditd_task);
- kauditd_task = NULL;
- return err;
- }
- }
seq = nlh->nlmsg_seq;
data = nlmsg_data(nlh);
s.rate_limit = audit_rate_limit;
s.backlog_limit = audit_backlog_limit;
s.lost = atomic_read(&audit_lost);
- s.backlog = skb_queue_len(&audit_skb_queue);
+ s.backlog = skb_queue_len(&audit_queue);
s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL;
- s.backlog_wait_time = audit_backlog_wait_time_master;
+ s.backlog_wait_time = audit_backlog_wait_time;
audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s));
break;
}
}
if (audit_enabled != AUDIT_OFF)
audit_log_config_change("audit_pid", new_pid, audit_pid, 1);
- audit_pid = new_pid;
- audit_nlk_portid = NETLINK_CB(skb).portid;
- audit_sock = skb->sk;
+ if (new_pid) {
+ if (audit_sock)
+ sock_put(audit_sock);
+ audit_pid = new_pid;
+ audit_nlk_portid = NETLINK_CB(skb).portid;
+ sock_hold(skb->sk);
+ audit_sock = skb->sk;
+ } else {
+ auditd_reset();
+ }
+ wake_up_interruptible(&kauditd_wait);
}
if (s.mask & AUDIT_STATUS_RATE_LIMIT) {
err = audit_set_rate_limit(s.rate_limit);
{
struct audit_net *aunet = net_generic(net, audit_net_id);
struct sock *sock = aunet->nlsk;
- if (sock == audit_sock) {
- audit_pid = 0;
- audit_sock = NULL;
- }
+ mutex_lock(&audit_cmd_mutex);
+ if (sock == audit_sock)
+ auditd_reset();
+ mutex_unlock(&audit_cmd_mutex);
- RCU_INIT_POINTER(aunet->nlsk, NULL);
- synchronize_net();
netlink_kernel_release(sock);
+ aunet->nlsk = NULL;
}
static struct pernet_operations audit_net_ops __net_initdata = {
audit_default ? "enabled" : "disabled");
register_pernet_subsys(&audit_net_ops);
- skb_queue_head_init(&audit_skb_queue);
- skb_queue_head_init(&audit_skb_hold_queue);
+ skb_queue_head_init(&audit_queue);
+ skb_queue_head_init(&audit_retry_queue);
+ skb_queue_head_init(&audit_hold_queue);
audit_initialized = AUDIT_INITIALIZED;
audit_enabled = audit_default;
audit_ever_enabled |= !!audit_default;
- audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
-
for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
INIT_LIST_HEAD(&audit_inode_hash[i]);
+ kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd");
+ if (IS_ERR(kauditd_task)) {
+ int err = PTR_ERR(kauditd_task);
+ panic("audit: failed to start the kauditd thread (%d)\n", err);
+ }
+
+ audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
+
return 0;
}
__initcall(audit_init);
}
}
-/*
- * Wait for auditd to drain the queue a little
- */
-static long wait_for_auditd(long sleep_time)
-{
- DECLARE_WAITQUEUE(wait, current);
-
- if (audit_backlog_limit &&
- skb_queue_len(&audit_skb_queue) > audit_backlog_limit) {
- add_wait_queue_exclusive(&audit_backlog_wait, &wait);
- set_current_state(TASK_UNINTERRUPTIBLE);
- sleep_time = schedule_timeout(sleep_time);
- remove_wait_queue(&audit_backlog_wait, &wait);
- }
-
- return sleep_time;
-}
-
/**
* audit_log_start - obtain an audit buffer
* @ctx: audit_context (may be NULL)
struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
int type)
{
- struct audit_buffer *ab = NULL;
- struct timespec t;
- unsigned int uninitialized_var(serial);
- int reserve = 5; /* Allow atomic callers to go up to five
- entries over the normal backlog limit */
- unsigned long timeout_start = jiffies;
+ struct audit_buffer *ab;
+ struct timespec t;
+ unsigned int uninitialized_var(serial);
if (audit_initialized != AUDIT_INITIALIZED)
return NULL;
if (unlikely(!audit_filter(type, AUDIT_FILTER_TYPE)))
return NULL;
- if (gfp_mask & __GFP_DIRECT_RECLAIM) {
- if (audit_pid && audit_pid == current->tgid)
- gfp_mask &= ~__GFP_DIRECT_RECLAIM;
- else
- reserve = 0;
- }
-
- while (audit_backlog_limit
- && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) {
- if (gfp_mask & __GFP_DIRECT_RECLAIM && audit_backlog_wait_time) {
- long sleep_time;
+ /* don't ever fail/sleep on these two conditions:
+ * 1. auditd generated record - since we need auditd to drain the
+ * queue; also, when we are checking for auditd, compare PIDs using
+ * task_tgid_vnr() since auditd_pid is set in audit_receive_msg()
+ * using a PID anchored in the caller's namespace
+ * 2. audit command message - record types 1000 through 1099 inclusive
+ * are command messages/records used to manage the kernel subsystem
+ * and the audit userspace, blocking on these messages could cause
+ * problems under load so don't do it (note: not all of these
+ * command types are valid as record types, but it is quicker to
+ * just check two ints than a series of ints in a if/switch stmt) */
+ if (!((audit_pid && audit_pid == task_tgid_vnr(current)) ||
+ (type >= 1000 && type <= 1099))) {
+ long sleep_time = audit_backlog_wait_time;
+
+ while (audit_backlog_limit &&
+ (skb_queue_len(&audit_queue) > audit_backlog_limit)) {
+ /* wake kauditd to try and flush the queue */
+ wake_up_interruptible(&kauditd_wait);
- sleep_time = timeout_start + audit_backlog_wait_time - jiffies;
- if (sleep_time > 0) {
- sleep_time = wait_for_auditd(sleep_time);
- if (sleep_time > 0)
- continue;
+ /* sleep if we are allowed and we haven't exhausted our
+ * backlog wait limit */
+ if ((gfp_mask & __GFP_DIRECT_RECLAIM) &&
+ (sleep_time > 0)) {
+ DECLARE_WAITQUEUE(wait, current);
+
+ add_wait_queue_exclusive(&audit_backlog_wait,
+ &wait);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ sleep_time = schedule_timeout(sleep_time);
+ remove_wait_queue(&audit_backlog_wait, &wait);
+ } else {
+ if (audit_rate_check() && printk_ratelimit())
+ pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n",
+ skb_queue_len(&audit_queue),
+ audit_backlog_limit);
+ audit_log_lost("backlog limit exceeded");
+ return NULL;
}
}
- if (audit_rate_check() && printk_ratelimit())
- pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n",
- skb_queue_len(&audit_skb_queue),
- audit_backlog_limit);
- audit_log_lost("backlog limit exceeded");
- audit_backlog_wait_time = 0;
- wake_up(&audit_backlog_wait);
- return NULL;
}
- if (!reserve && !audit_backlog_wait_time)
- audit_backlog_wait_time = audit_backlog_wait_time_master;
-
ab = audit_buffer_alloc(ctx, gfp_mask, type);
if (!ab) {
audit_log_lost("out of memory in audit_log_start");
}
audit_get_stamp(ab->ctx, &t, &serial);
-
audit_log_format(ab, "audit(%lu.%03lu:%u): ",
t.tv_sec, t.tv_nsec/1000000, serial);
+
return ab;
}
* @call_panic: optional pointer to int that will be updated if secid fails
*/
void audit_log_name(struct audit_context *context, struct audit_names *n,
- struct path *path, int record_num, int *call_panic)
+ const struct path *path, int record_num, int *call_panic)
{
struct audit_buffer *ab;
ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
* @operation: specific link operation
* @link: the path that triggered the restriction
*/
- void audit_log_link_denied(const char *operation, struct path *link)
+ void audit_log_link_denied(const char *operation, const struct path *link)
{
struct audit_buffer *ab;
struct audit_names *name;
* audit_log_end - end one audit record
* @ab: the audit_buffer
*
- * netlink_unicast() cannot be called inside an irq context because it blocks
- * (last arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed
- * on a queue and a tasklet is scheduled to remove them from the queue outside
- * the irq context. May be called in any context.
+ * We can not do a netlink send inside an irq context because it blocks (last
+ * arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed on a
+ * queue and a tasklet is scheduled to remove them from the queue outside the
+ * irq context. May be called in any context.
*/
void audit_log_end(struct audit_buffer *ab)
{
if (!audit_rate_check()) {
audit_log_lost("rate limit exceeded");
} else {
- struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
-
- nlh->nlmsg_len = ab->skb->len;
- kauditd_send_multicast_skb(ab->skb, ab->gfp_mask);
-
- /*
- * The original kaudit unicast socket sends up messages with
- * nlmsg_len set to the payload length rather than the entire
- * message length. This breaks the standard set by netlink.
- * The existing auditd daemon assumes this breakage. Fixing
- * this would require co-ordinating a change in the established
- * protocol between the kaudit kernel subsystem and the auditd
- * userspace code.
- */
- nlh->nlmsg_len -= NLMSG_HDRLEN;
-
- if (audit_pid) {
- skb_queue_tail(&audit_skb_queue, ab->skb);
- wake_up_interruptible(&kauditd_wait);
- } else {
- audit_printk_skb(ab->skb);
- }
+ skb_queue_tail(&audit_queue, ab->skb);
+ wake_up_interruptible(&kauditd_wait);
ab->skb = NULL;
}
audit_buffer_free(ab);
}
static void audit_update_mark(struct audit_fsnotify_mark *audit_mark,
- struct inode *inode)
+ const struct inode *inode)
{
audit_mark->dev = inode ? inode->i_sb->s_dev : AUDIT_DEV_UNSET;
audit_mark->ino = inode ? inode->i_ino : AUDIT_INO_UNSET;
ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
if (unlikely(!ab))
return;
- audit_log_format(ab, "auid=%u ses=%u op=",
+ audit_log_format(ab, "auid=%u ses=%u op=%s",
from_kuid(&init_user_ns, audit_get_loginuid(current)),
- audit_get_sessionid(current));
- audit_log_string(ab, op);
+ audit_get_sessionid(current), op);
audit_log_format(ab, " path=");
audit_log_untrustedstring(ab, audit_mark->path);
audit_log_key(ab, rule->filterkey);
struct inode *to_tell,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
- u32 mask, void *data, int data_type,
+ u32 mask, const void *data, int data_type,
const unsigned char *dname, u32 cookie)
{
struct audit_fsnotify_mark *audit_mark;
- struct inode *inode = NULL;
+ const struct inode *inode = NULL;
audit_mark = container_of(inode_mark, struct audit_fsnotify_mark, mark);
switch (data_type) {
case (FSNOTIFY_EVENT_PATH):
- inode = ((struct path *)data)->dentry->d_inode;
+ inode = ((const struct path *)data)->dentry->d_inode;
break;
case (FSNOTIFY_EVENT_INODE):
- inode = (struct inode *)data;
+ inode = (const struct inode *)data;
break;
default:
BUG();
ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
if (unlikely(!ab))
return;
- audit_log_format(ab, "op=");
- audit_log_string(ab, "remove_rule");
+ audit_log_format(ab, "op=remove_rule");
audit_log_format(ab, " dir=");
audit_log_untrustedstring(ab, rule->tree->pathname);
audit_log_key(ab, rule->filterkey);
struct inode *to_tell,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
- u32 mask, void *data, int data_type,
+ u32 mask, const void *data, int data_type,
const unsigned char *file_name, u32 cookie)
{
return 0;
ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
if (unlikely(!ab))
return;
- audit_log_format(ab, "auid=%u ses=%u op=",
+ audit_log_format(ab, "auid=%u ses=%u op=%s",
from_kuid(&init_user_ns, audit_get_loginuid(current)),
- audit_get_sessionid(current));
- audit_log_string(ab, op);
+ audit_get_sessionid(current), op);
audit_log_format(ab, " path=");
audit_log_untrustedstring(ab, w->path);
audit_log_key(ab, r->filterkey);
struct inode *to_tell,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
- u32 mask, void *data, int data_type,
+ u32 mask, const void *data, int data_type,
const unsigned char *dname, u32 cookie)
{
- struct inode *inode;
+ const struct inode *inode;
struct audit_parent *parent;
parent = container_of(inode_mark, struct audit_parent, mark);
switch (data_type) {
case (FSNOTIFY_EVENT_PATH):
- inode = d_backing_inode(((struct path *)data)->dentry);
+ inode = d_backing_inode(((const struct path *)data)->dentry);
break;
case (FSNOTIFY_EVENT_INODE):
- inode = (struct inode *)data;
+ inode = (const struct inode *)data;
break;
default:
BUG();
exe_file = get_task_exe_file(tsk);
if (!exe_file)
return 0;
- ino = exe_file->f_inode->i_ino;
- dev = exe_file->f_inode->i_sb->s_dev;
+ ino = file_inode(exe_file)->i_ino;
+ dev = file_inode(exe_file)->i_sb->s_dev;
fput(exe_file);
return audit_mark_compare(mark, ino, dev);
}