aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile2
-rw-r--r--fs/aio.c33
-rw-r--r--fs/binfmt_misc.c7
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/disk-io.c6
-rw-r--r--fs/btrfs/extent-tree.c23
-rw-r--r--fs/btrfs/free-space-cache.c12
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/ceph/addr.c273
-rw-r--r--fs/ceph/caps.c132
-rw-r--r--fs/ceph/dir.c27
-rw-r--r--fs/ceph/file.c97
-rw-r--r--fs/ceph/inode.c59
-rw-r--r--fs/ceph/locks.c64
-rw-r--r--fs/ceph/mds_client.c41
-rw-r--r--fs/ceph/mds_client.h10
-rw-r--r--fs/ceph/snap.c37
-rw-r--r--fs/ceph/super.c16
-rw-r--r--fs/ceph/super.h55
-rw-r--r--fs/ceph/xattr.c7
-rw-r--r--fs/coda/dir.c4
-rw-r--r--fs/debugfs/file.c54
-rw-r--r--fs/ecryptfs/crypto.c1
-rw-r--r--fs/ecryptfs/file.c12
-rw-r--r--fs/ecryptfs/keystore.c6
-rw-r--r--fs/ecryptfs/main.c16
-rw-r--r--fs/ext4/move_extent.c4
-rw-r--r--fs/fuse/cuse.c2
-rw-r--r--fs/fuse/dev.c29
-rw-r--r--fs/fuse/dir.c538
-rw-r--r--fs/fuse/file.c230
-rw-r--r--fs/fuse/fuse_i.h45
-rw-r--r--fs/fuse/inode.c39
-rw-r--r--fs/hfsplus/catalog.c89
-rw-r--r--fs/hfsplus/dir.c11
-rw-r--r--fs/hfsplus/hfsplus_fs.h4
-rw-r--r--fs/hfsplus/super.c4
-rw-r--r--fs/inode.c11
-rw-r--r--fs/internal.h5
-rw-r--r--fs/ioctl.c2
-rw-r--r--fs/isofs/rock.c9
-rw-r--r--fs/jffs2/readinode.c2
-rw-r--r--fs/jffs2/summary.c1
-rw-r--r--fs/kernfs/file.c95
-rw-r--r--fs/lockd/mon.c2
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/mount.h3
-rw-r--r--fs/namei.c98
-rw-r--r--fs/namespace.c71
-rw-r--r--fs/nfsd/nfs4proc.c57
-rw-r--r--fs/nfsd/nfs4state.c68
-rw-r--r--fs/nfsd/nfs4xdr.c34
-rw-r--r--fs/nfsd/nfscache.c4
-rw-r--r--fs/nfsd/nfsctl.c6
-rw-r--r--fs/nfsd/nfsfh.c2
-rw-r--r--fs/nfsd/nfssvc.c2
-rw-r--r--fs/nfsd/state.h19
-rw-r--r--fs/nfsd/vfs.c37
-rw-r--r--fs/nfsd/vfs.h2
-rw-r--r--fs/nfsd/xdr4.h9
-rw-r--r--fs/nsfs.c161
-rw-r--r--fs/ocfs2/alloc.c28
-rw-r--r--fs/ocfs2/alloc.h2
-rw-r--r--fs/ocfs2/aops.c16
-rw-r--r--fs/ocfs2/dir.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c12
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/open.c5
-rw-r--r--fs/pnode.c1
-rw-r--r--fs/proc/base.c53
-rw-r--r--fs/proc/inode.c10
-rw-r--r--fs/proc/internal.h2
-rw-r--r--fs/proc/meminfo.c15
-rw-r--r--fs/proc/namespaces.c153
-rw-r--r--fs/proc/stat.c2
-rw-r--r--fs/proc_namespace.c16
-rw-r--r--fs/pstore/ram.c1
-rw-r--r--fs/read_write.c24
-rw-r--r--fs/reiserfs/super.c3
-rw-r--r--fs/squashfs/Kconfig15
-rw-r--r--fs/squashfs/Makefile1
-rw-r--r--fs/squashfs/decompressor.c7
-rw-r--r--fs/squashfs/decompressor.h4
-rw-r--r--fs/squashfs/lz4_wrapper.c142
-rw-r--r--fs/squashfs/squashfs_fs.h1
-rw-r--r--fs/sysfs/file.c59
-rw-r--r--fs/udf/dir.c31
-rw-r--r--fs/udf/inode.c14
-rw-r--r--fs/udf/namei.c17
-rw-r--r--fs/udf/symlink.c57
-rw-r--r--fs/udf/udfdecl.h3
-rw-r--r--fs/udf/unicode.c28
92 files changed, 2261 insertions, 1162 deletions
diff --git a/fs/Makefile b/fs/Makefile
index da0bbb456d3f..bedff48e8fdc 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o splice.o sync.o utimes.o \
- stack.o fs_struct.o statfs.o fs_pin.o
+ stack.o fs_struct.o statfs.o fs_pin.o nsfs.o
ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o block_dev.o direct-io.o mpage.o
diff --git a/fs/aio.c b/fs/aio.c
index 14b93159ef83..1b7893ecc296 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -286,12 +286,37 @@ static void aio_free_ring(struct kioctx *ctx)
static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
{
+ vma->vm_flags |= VM_DONTEXPAND;
vma->vm_ops = &generic_file_vm_ops;
return 0;
}
+static void aio_ring_remap(struct file *file, struct vm_area_struct *vma)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ struct kioctx_table *table;
+ int i;
+
+ spin_lock(&mm->ioctx_lock);
+ rcu_read_lock();
+ table = rcu_dereference(mm->ioctx_table);
+ for (i = 0; i < table->nr; i++) {
+ struct kioctx *ctx;
+
+ ctx = table->table[i];
+ if (ctx && ctx->aio_ring_file == file) {
+ ctx->user_id = ctx->mmap_base = vma->vm_start;
+ break;
+ }
+ }
+
+ rcu_read_unlock();
+ spin_unlock(&mm->ioctx_lock);
+}
+
static const struct file_operations aio_ring_fops = {
.mmap = aio_ring_mmap,
+ .mremap = aio_ring_remap,
};
#if IS_ENABLED(CONFIG_MIGRATION)
@@ -1228,8 +1253,12 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
* the ringbuffer empty. So in practice we should be ok, but it's
* something to be aware of when touching this code.
*/
- wait_event_interruptible_hrtimeout(ctx->wait,
- aio_read_events(ctx, min_nr, nr, event, &ret), until);
+ if (until.tv64 == 0)
+ aio_read_events(ctx, min_nr, nr, event, &ret);
+ else
+ wait_event_interruptible_hrtimeout(ctx->wait,
+ aio_read_events(ctx, min_nr, nr, event, &ret),
+ until);
if (!ret && signal_pending(current))
ret = -EINTR;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index c04ef1d4f18a..97aff2879cda 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -254,6 +254,7 @@ static char *scanarg(char *s, char del)
return NULL;
}
}
+ s[-1] ='\0';
return s;
}
@@ -378,8 +379,7 @@ static Node *create_entry(const char __user *buffer, size_t count)
p = scanarg(p, del);
if (!p)
goto einval;
- p[-1] = '\0';
- if (p == e->magic)
+ if (!e->magic[0])
goto einval;
if (USE_DEBUG)
print_hex_dump_bytes(
@@ -391,8 +391,7 @@ static Node *create_entry(const char __user *buffer, size_t count)
p = scanarg(p, del);
if (!p)
goto einval;
- p[-1] = '\0';
- if (p == e->mask) {
+ if (!e->mask[0]) {
e->mask = NULL;
pr_debug("register: mask[raw]: none\n");
} else if (USE_DEBUG)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e6fbbd74b716..7e607416755a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3481,8 +3481,8 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
int btrfs_error_unpin_extent_range(struct btrfs_root *root,
u64 start, u64 end);
-int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
- u64 num_bytes, u64 *actual_bytes);
+int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes, u64 *actual_bytes);
int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 type);
int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 30965120772b..8c63419a7f70 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4121,12 +4121,6 @@ again:
if (ret)
break;
- /* opt_discard */
- if (btrfs_test_opt(root, DISCARD))
- ret = btrfs_error_discard_extent(root, start,
- end + 1 - start,
- NULL);
-
clear_extent_dirty(unpin, start, end, GFP_NOFS);
btrfs_error_unpin_extent_range(root, start, end);
cond_resched();
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 222d6aea4a8a..a80b97100d90 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1889,8 +1889,8 @@ static int btrfs_issue_discard(struct block_device *bdev,
return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
}
-static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
- u64 num_bytes, u64 *actual_bytes)
+int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes, u64 *actual_bytes)
{
int ret;
u64 discarded_bytes = 0;
@@ -5727,7 +5727,8 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
update_global_block_rsv(fs_info);
}
-static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
+static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
+ const bool return_free_space)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_group_cache *cache = NULL;
@@ -5751,7 +5752,8 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
if (start < cache->last_byte_to_unpin) {
len = min(len, cache->last_byte_to_unpin - start);
- btrfs_add_free_space(cache, start, len);
+ if (return_free_space)
+ btrfs_add_free_space(cache, start, len);
}
start += len;
@@ -5815,7 +5817,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
end + 1 - start, NULL);
clear_extent_dirty(unpin, start, end, GFP_NOFS);
- unpin_extent_range(root, start, end);
+ unpin_extent_range(root, start, end, true);
cond_resched();
}
@@ -8872,6 +8874,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
cache_node);
rb_erase(&block_group->cache_node,
&info->block_group_cache_tree);
+ RB_CLEAR_NODE(&block_group->cache_node);
spin_unlock(&info->block_group_cache_lock);
down_write(&block_group->space_info->groups_sem);
@@ -9130,6 +9133,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
spin_lock(&info->block_group_cache_lock);
rb_erase(&cache->cache_node,
&info->block_group_cache_tree);
+ RB_CLEAR_NODE(&cache->cache_node);
spin_unlock(&info->block_group_cache_lock);
btrfs_put_block_group(cache);
goto error;
@@ -9271,6 +9275,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
spin_lock(&root->fs_info->block_group_cache_lock);
rb_erase(&cache->cache_node,
&root->fs_info->block_group_cache_tree);
+ RB_CLEAR_NODE(&cache->cache_node);
spin_unlock(&root->fs_info->block_group_cache_lock);
btrfs_put_block_group(cache);
return ret;
@@ -9690,13 +9695,7 @@ out:
int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
{
- return unpin_extent_range(root, start, end);
-}
-
-int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
- u64 num_bytes, u64 *actual_bytes)
-{
- return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes);
+ return unpin_extent_range(root, start, end, false);
}
int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 030847bf7cec..d6c03f7f136b 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2966,8 +2966,8 @@ static int do_trimming(struct btrfs_block_group_cache *block_group,
spin_unlock(&block_group->lock);
spin_unlock(&space_info->lock);
- ret = btrfs_error_discard_extent(fs_info->extent_root,
- start, bytes, &trimmed);
+ ret = btrfs_discard_extent(fs_info->extent_root,
+ start, bytes, &trimmed);
if (!ret)
*total_trimmed += trimmed;
@@ -3185,16 +3185,18 @@ out:
spin_unlock(&block_group->lock);
+ lock_chunks(block_group->fs_info->chunk_root);
em_tree = &block_group->fs_info->mapping_tree.map_tree;
write_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, block_group->key.objectid,
1);
BUG_ON(!em); /* logic error, can't happen */
+ /*
+ * remove_extent_mapping() will delete us from the pinned_chunks
+ * list, which is protected by the chunk mutex.
+ */
remove_extent_mapping(em_tree, em);
write_unlock(&em_tree->lock);
-
- lock_chunks(block_group->fs_info->chunk_root);
- list_del_init(&em->list);
unlock_chunks(block_group->fs_info->chunk_root);
/* once for us and once for the tree */
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0144790e296e..50c5a8762aed 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1485,7 +1485,7 @@ static void update_dev_time(char *path_name)
struct file *filp;
filp = filp_open(path_name, O_RDWR, 0);
- if (!filp)
+ if (IS_ERR(filp))
return;
file_update_time(filp);
filp_close(filp, NULL);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 18c06bbaf136..f5013d92a7e6 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -192,17 +192,30 @@ static int readpage_nounlock(struct file *filp, struct page *page)
struct ceph_osd_client *osdc =
&ceph_inode_to_client(inode)->client->osdc;
int err = 0;
+ u64 off = page_offset(page);
u64 len = PAGE_CACHE_SIZE;
- err = ceph_readpage_from_fscache(inode, page);
+ if (off >= i_size_read(inode)) {
+ zero_user_segment(page, err, PAGE_CACHE_SIZE);
+ SetPageUptodate(page);
+ return 0;
+ }
+ /*
+ * Uptodate inline data should have been added into page cache
+ * while getting Fcr caps.
+ */
+ if (ci->i_inline_version != CEPH_INLINE_NONE)
+ return -EINVAL;
+
+ err = ceph_readpage_from_fscache(inode, page);
if (err == 0)
goto out;
dout("readpage inode %p file %p page %p index %lu\n",
inode, filp, page, page->index);
err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
- (u64) page_offset(page), &len,
+ off, &len,
ci->i_truncate_seq, ci->i_truncate_size,
&page, 1, 0);
if (err == -ENOENT)
@@ -319,7 +332,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
off, len);
vino = ceph_vino(inode);
req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len,
- 1, CEPH_OSD_OP_READ,
+ 0, 1, CEPH_OSD_OP_READ,
CEPH_OSD_FLAG_READ, NULL,
ci->i_truncate_seq, ci->i_truncate_size,
false);
@@ -384,6 +397,9 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
int rc = 0;
int max = 0;
+ if (ceph_inode(inode)->i_inline_version != CEPH_INLINE_NONE)
+ return -EINVAL;
+
rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
&nr_pages);
@@ -673,7 +689,7 @@ static int ceph_writepages_start(struct address_space *mapping,
int rc = 0;
unsigned wsize = 1 << inode->i_blkbits;
struct ceph_osd_request *req = NULL;
- int do_sync;
+ int do_sync = 0;
u64 truncate_size, snap_size;
u32 truncate_seq;
@@ -750,7 +766,6 @@ retry:
last_snapc = snapc;
while (!done && index <= end) {
- int num_ops = do_sync ? 2 : 1;
unsigned i;
int first;
pgoff_t next;
@@ -850,7 +865,8 @@ get_more_pages:
len = wsize;
req = ceph_osdc_new_request(&fsc->client->osdc,
&ci->i_layout, vino,
- offset, &len, num_ops,
+ offset, &len, 0,
+ do_sync ? 2 : 1,
CEPH_OSD_OP_WRITE,
CEPH_OSD_FLAG_WRITE |
CEPH_OSD_FLAG_ONDISK,
@@ -862,6 +878,9 @@ get_more_pages:
break;
}
+ if (do_sync)
+ osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
+
req->r_callback = writepages_finish;
req->r_inode = inode;
@@ -1204,6 +1223,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct inode *inode = file_inode(vma->vm_file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = vma->vm_file->private_data;
+ struct page *pinned_page = NULL;
loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT;
int want, got, ret;
@@ -1215,7 +1235,8 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
want = CEPH_CAP_FILE_CACHE;
while (1) {
got = 0;
- ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
+ ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want,
+ -1, &got, &pinned_page);
if (ret == 0)
break;
if (ret != -ERESTARTSYS) {
@@ -1226,12 +1247,54 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got));
- ret = filemap_fault(vma, vmf);
+ if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
+ ci->i_inline_version == CEPH_INLINE_NONE)
+ ret = filemap_fault(vma, vmf);
+ else
+ ret = -EAGAIN;
dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret);
+ if (pinned_page)
+ page_cache_release(pinned_page);
ceph_put_cap_refs(ci, got);
+ if (ret != -EAGAIN)
+ return ret;
+
+ /* read inline data */
+ if (off >= PAGE_CACHE_SIZE) {
+ /* does not support inline data > PAGE_SIZE */
+ ret = VM_FAULT_SIGBUS;
+ } else {
+ int ret1;
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page = find_or_create_page(mapping, 0,
+ mapping_gfp_mask(mapping) &
+ ~__GFP_FS);
+ if (!page) {
+ ret = VM_FAULT_OOM;
+ goto out;
+ }
+ ret1 = __ceph_do_getattr(inode, page,
+ CEPH_STAT_CAP_INLINE_DATA, true);
+ if (ret1 < 0 || off >= i_size_read(inode)) {
+ unlock_page(page);
+ page_cache_release(page);
+ ret = VM_FAULT_SIGBUS;
+ goto out;
+ }
+ if (ret1 < PAGE_CACHE_SIZE)
+ zero_user_segment(page, ret1, PAGE_CACHE_SIZE);
+ else
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ vmf->page = page;
+ ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
+ }
+out:
+ dout("filemap_fault %p %llu~%zd read inline data ret %d\n",
+ inode, off, (size_t)PAGE_CACHE_SIZE, ret);
return ret;
}
@@ -1250,6 +1313,19 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
size_t len;
int want, got, ret;
+ if (ci->i_inline_version != CEPH_INLINE_NONE) {
+ struct page *locked_page = NULL;
+ if (off == 0) {
+ lock_page(page);
+ locked_page = page;
+ }
+ ret = ceph_uninline_data(vma->vm_file, locked_page);
+ if (locked_page)
+ unlock_page(locked_page);
+ if (ret < 0)
+ return VM_FAULT_SIGBUS;
+ }
+
if (off + PAGE_CACHE_SIZE <= size)
len = PAGE_CACHE_SIZE;
else
@@ -1263,7 +1339,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
want = CEPH_CAP_FILE_BUFFER;
while (1) {
got = 0;
- ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, off + len);
+ ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len,
+ &got, NULL);
if (ret == 0)
break;
if (ret != -ERESTARTSYS) {
@@ -1297,11 +1374,13 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
ret = VM_FAULT_SIGBUS;
}
out:
- if (ret != VM_FAULT_LOCKED) {
+ if (ret != VM_FAULT_LOCKED)
unlock_page(page);
- } else {
+ if (ret == VM_FAULT_LOCKED ||
+ ci->i_inline_version != CEPH_INLINE_NONE) {
int dirty;
spin_lock(&ci->i_ceph_lock);
+ ci->i_inline_version = CEPH_INLINE_NONE;
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
spin_unlock(&ci->i_ceph_lock);
if (dirty)
@@ -1315,6 +1394,178 @@ out:
return ret;
}
+void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
+ char *data, size_t len)
+{
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page;
+
+ if (locked_page) {
+ page = locked_page;
+ } else {
+ if (i_size_read(inode) == 0)
+ return;
+ page = find_or_create_page(mapping, 0,
+ mapping_gfp_mask(mapping) & ~__GFP_FS);
+ if (!page)
+ return;
+ if (PageUptodate(page)) {
+ unlock_page(page);
+ page_cache_release(page);
+ return;
+ }
+ }
+
+ dout("fill_inline_data %p %llx.%llx len %lu locked_page %p\n",
+ inode, ceph_vinop(inode), len, locked_page);
+
+ if (len > 0) {
+ void *kaddr = kmap_atomic(page);
+ memcpy(kaddr, data, len);
+ kunmap_atomic(kaddr);
+ }
+
+ if (page != locked_page) {
+ if (len < PAGE_CACHE_SIZE)
+ zero_user_segment(page, len, PAGE_CACHE_SIZE);
+ else
+ flush_dcache_page(page);
+
+ SetPageUptodate(page);
+ unlock_page(page);
+ page_cache_release(page);
+ }
+}
+
+int ceph_uninline_data(struct file *filp, struct page *locked_page)
+{
+ struct inode *inode = file_inode(filp);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_osd_request *req;
+ struct page *page = NULL;
+ u64 len, inline_version;
+ int err = 0;
+ bool from_pagecache = false;
+
+ spin_lock(&ci->i_ceph_lock);
+ inline_version = ci->i_inline_version;
+ spin_unlock(&ci->i_ceph_lock);
+
+ dout("uninline_data %p %llx.%llx inline_version %llu\n",
+ inode, ceph_vinop(inode), inline_version);
+
+ if (inline_version == 1 || /* initial version, no data */
+ inline_version == CEPH_INLINE_NONE)
+ goto out;
+
+ if (locked_page) {
+ page = locked_page;
+ WARN_ON(!PageUptodate(page));
+ } else if (ceph_caps_issued(ci) &
+ (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) {
+ page = find_get_page(inode->i_mapping, 0);
+ if (page) {
+ if (PageUptodate(page)) {
+ from_pagecache = true;
+ lock_page(page);
+ } else {
+ page_cache_release(page);
+ page = NULL;
+ }
+ }
+ }
+
+ if (page) {
+ len = i_size_read(inode);
+ if (len > PAGE_CACHE_SIZE)
+ len = PAGE_CACHE_SIZE;
+ } else {
+ page = __page_cache_alloc(GFP_NOFS);
+ if (!page) {
+ err = -ENOMEM;
+ goto out;
+ }
+ err = __ceph_do_getattr(inode, page,
+ CEPH_STAT_CAP_INLINE_DATA, true);
+ if (err < 0) {
+ /* no inline data */
+ if (err == -ENODATA)
+ err = 0;
+ goto out;
+ }
+ len = err;
+ }
+
+ req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
+ ceph_vino(inode), 0, &len, 0, 1,
+ CEPH_OSD_OP_CREATE,
+ CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+ ci->i_snap_realm->cached_context,
+ 0, 0, false);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto out;
+ }
+
+ ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
+ err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+ if (!err)
+ err = ceph_osdc_wait_request(&fsc->client->osdc, req);
+ ceph_osdc_put_request(req);
+ if (err < 0)
+ goto out;
+
+ req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
+ ceph_vino(inode), 0, &len, 1, 3,
+ CEPH_OSD_OP_WRITE,
+ CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+ ci->i_snap_realm->cached_context,
+ ci->i_truncate_seq, ci->i_truncate_size,
+ false);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto out;
+ }
+
+ osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);
+
+ err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
+ "inline_version", &inline_version,
+ sizeof(inline_version),
+ CEPH_OSD_CMPXATTR_OP_GT,
+ CEPH_OSD_CMPXATTR_MODE_U64);
+ if (err)
+ goto out_put;
+
+ err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
+ "inline_version", &inline_version,
+ sizeof(inline_version), 0, 0);
+ if (err)
+ goto out_put;
+
+ ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
+ err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+ if (!err)
+ err = ceph_osdc_wait_request(&fsc->client->osdc, req);
+out_put:
+ ceph_osdc_put_request(req);
+ if (err == -ECANCELED)
+ err = 0;
+out:
+ if (page && page != locked_page) {
+ if (from_pagecache) {
+ unlock_page(page);
+ page_cache_release(page);
+ } else
+ __free_pages(page, 0);
+ }
+
+ dout("uninline_data %p %llx.%llx inline_version %llu = %d\n",
+ inode, ceph_vinop(inode), inline_version, err);
+ return err;
+}
+
static struct vm_operations_struct ceph_vmops = {
.fault = ceph_filemap_fault,
.page_mkwrite = ceph_page_mkwrite,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index cefca661464b..b93c631c6c87 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -975,10 +975,12 @@ static int send_cap_msg(struct ceph_mds_session *session,
kuid_t uid, kgid_t gid, umode_t mode,
u64 xattr_version,
struct ceph_buffer *xattrs_buf,
- u64 follows)
+ u64 follows, bool inline_data)
{
struct ceph_mds_caps *fc;
struct ceph_msg *msg;
+ void *p;
+ size_t extra_len;
dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s"
" seq %u/%u mseq %u follows %lld size %llu/%llu"
@@ -988,7 +990,10 @@ static int send_cap_msg(struct ceph_mds_session *session,
seq, issue_seq, mseq, follows, size, max_size,
xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0);
- msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS, false);
+ /* flock buffer size + inline version + inline data size */
+ extra_len = 4 + 8 + 4;
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc) + extra_len,
+ GFP_NOFS, false);
if (!msg)
return -ENOMEM;
@@ -1020,6 +1025,14 @@ static int send_cap_msg(struct ceph_mds_session *session,
fc->gid = cpu_to_le32(from_kgid(&init_user_ns, gid));
fc->mode = cpu_to_le32(mode);
+ p = fc + 1;
+ /* flock buffer size */
+ ceph_encode_32(&p, 0);
+ /* inline version */
+ ceph_encode_64(&p, inline_data ? 0 : CEPH_INLINE_NONE);
+ /* inline data size */
+ ceph_encode_32(&p, 0);
+
fc->xattr_version = cpu_to_le64(xattr_version);
if (xattrs_buf) {
msg->middle = ceph_buffer_get(xattrs_buf);
@@ -1126,6 +1139,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
u64 flush_tid = 0;
int i;
int ret;
+ bool inline_data;
held = cap->issued | cap->implemented;
revoking = cap->implemented & ~cap->issued;
@@ -1209,13 +1223,15 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
xattr_version = ci->i_xattrs.version;
}
+ inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
+
spin_unlock(&ci->i_ceph_lock);
ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
size, max_size, &mtime, &atime, time_warp_seq,
uid, gid, mode, xattr_version, xattr_blob,
- follows);
+ follows, inline_data);
if (ret < 0) {
dout("error sending cap msg, must requeue %p\n", inode);
delayed = 1;
@@ -1336,7 +1352,7 @@ retry:
capsnap->time_warp_seq,
capsnap->uid, capsnap->gid, capsnap->mode,
capsnap->xattr_version, capsnap->xattr_blob,
- capsnap->follows);
+ capsnap->follows, capsnap->inline_data);
next_follows = capsnap->follows + 1;
ceph_put_cap_snap(capsnap);
@@ -2057,15 +2073,17 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got)
* requested from the MDS.
*/
static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
- int *got, loff_t endoff, int *check_max, int *err)
+ loff_t endoff, int *got, struct page **pinned_page,
+ int *check_max, int *err)
{
struct inode *inode = &ci->vfs_inode;
int ret = 0;
- int have, implemented;
+ int have, implemented, _got = 0;
int file_wanted;
dout("get_cap_refs %p need %s want %s\n", inode,
ceph_cap_string(need), ceph_cap_string(want));
+again:
spin_lock(&ci->i_ceph_lock);
/* make sure file is actually open */
@@ -2075,7 +2093,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
ceph_cap_string(need), ceph_cap_string(file_wanted));
*err = -EBADF;
ret = 1;
- goto out;
+ goto out_unlock;
}
/* finish pending truncate */
@@ -2095,7 +2113,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
*check_max = 1;
ret = 1;
}
- goto out;
+ goto out_unlock;
}
/*
* If a sync write is in progress, we must wait, so that we
@@ -2103,7 +2121,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
*/
if (__ceph_have_pending_cap_snap(ci)) {
dout("get_cap_refs %p cap_snap_pending\n", inode);
- goto out;
+ goto out_unlock;
}
}
@@ -2120,18 +2138,50 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
inode, ceph_cap_string(have), ceph_cap_string(not),
ceph_cap_string(revoking));
if ((revoking & not) == 0) {
- *got = need | (have & want);
- __take_cap_refs(ci, *got);
+ _got = need | (have & want);
+ __take_cap_refs(ci, _got);
ret = 1;
}
} else {
dout("get_cap_refs %p have %s needed %s\n", inode,
ceph_cap_string(have), ceph_cap_string(need));
}
-out:
+out_unlock:
spin_unlock(&ci->i_ceph_lock);
+
+ if (ci->i_inline_version != CEPH_INLINE_NONE &&
+ (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
+ i_size_read(inode) > 0) {
+ int ret1;
+ struct page *page = find_get_page(inode->i_mapping, 0);
+ if (page) {
+ if (PageUptodate(page)) {
+ *pinned_page = page;
+ goto out;
+ }
+ page_cache_release(page);
+ }
+ /*
+ * drop cap refs first because getattr while holding
+ * caps refs can cause deadlock.
+ */
+ ceph_put_cap_refs(ci, _got);
+ _got = 0;
+
+ /* getattr request will bring inline data into page cache */
+ ret1 = __ceph_do_getattr(inode, NULL,
+ CEPH_STAT_CAP_INLINE_DATA, true);
+ if (ret1 >= 0) {
+ ret = 0;
+ goto again;
+ }
+ *err = ret1;
+ ret = 1;
+ }
+out:
dout("get_cap_refs %p ret %d got %s\n", inode,
- ret, ceph_cap_string(*got));
+ ret, ceph_cap_string(_got));
+ *got = _got;
return ret;
}
@@ -2168,8 +2218,8 @@ static void check_max_size(struct inode *inode, loff_t endoff)
* due to a small max_size, make sure we check_max_size (and possibly
* ask the mds) so we don't get hung up indefinitely.
*/
-int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, int *got,
- loff_t endoff)
+int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
+ loff_t endoff, int *got, struct page **pinned_page)
{
int check_max, ret, err;
@@ -2179,8 +2229,8 @@ retry:
check_max = 0;
err = 0;
ret = wait_event_interruptible(ci->i_cap_wq,
- try_get_cap_refs(ci, need, want,
- got, endoff,
+ try_get_cap_refs(ci, need, want, endoff,
+ got, pinned_page,
&check_max, &err));
if (err)
ret = err;
@@ -2383,6 +2433,8 @@ static void invalidate_aliases(struct inode *inode)
static void handle_cap_grant(struct ceph_mds_client *mdsc,
struct inode *inode, struct ceph_mds_caps *grant,
void *snaptrace, int snaptrace_len,
+ u64 inline_version,
+ void *inline_data, int inline_len,
struct ceph_buffer *xattr_buf,
struct ceph_mds_session *session,
struct ceph_cap *cap, int issued)
@@ -2403,6 +2455,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
bool queue_invalidate = false;
bool queue_revalidate = false;
bool deleted_inode = false;
+ bool fill_inline = false;
dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
inode, cap, mds, seq, ceph_cap_string(newcaps));
@@ -2576,6 +2629,13 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
}
BUG_ON(cap->issued & ~cap->implemented);
+ if (inline_version > 0 && inline_version >= ci->i_inline_version) {
+ ci->i_inline_version = inline_version;
+ if (ci->i_inline_version != CEPH_INLINE_NONE &&
+ (newcaps & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)))
+ fill_inline = true;
+ }
+
spin_unlock(&ci->i_ceph_lock);
if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
@@ -2589,6 +2649,9 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
wake = true;
}
+ if (fill_inline)
+ ceph_fill_inline_data(inode, NULL, inline_data, inline_len);
+
if (queue_trunc) {
ceph_queue_vmtruncate(inode);
ceph_queue_revalidate(inode);
@@ -2996,11 +3059,12 @@ void ceph_handle_caps(struct ceph_mds_session *session,
u64 cap_id;
u64 size, max_size;
u64 tid;
+ u64 inline_version = 0;
+ void *inline_data = NULL;
+ u32 inline_len = 0;
void *snaptrace;
size_t snaptrace_len;
- void *flock;
- void *end;
- u32 flock_len;
+ void *p, *end;
dout("handle_caps from mds%d\n", mds);
@@ -3021,30 +3085,37 @@ void ceph_handle_caps(struct ceph_mds_session *session,
snaptrace = h + 1;
snaptrace_len = le32_to_cpu(h->snap_trace_len);
+ p = snaptrace + snaptrace_len;
if (le16_to_cpu(msg->hdr.version) >= 2) {
- void *p = snaptrace + snaptrace_len;
+ u32 flock_len;
ceph_decode_32_safe(&p, end, flock_len, bad);
if (p + flock_len > end)
goto bad;
- flock = p;
- } else {
- flock = NULL;
- flock_len = 0;
+ p += flock_len;
}
if (le16_to_cpu(msg->hdr.version) >= 3) {
if (op == CEPH_CAP_OP_IMPORT) {
- void *p = flock + flock_len;
if (p + sizeof(*peer) > end)
goto bad;
peer = p;
+ p += sizeof(*peer);
} else if (op == CEPH_CAP_OP_EXPORT) {
/* recorded in unused fields */
peer = (void *)&h->size;
}
}
+ if (le16_to_cpu(msg->hdr.version) >= 4) {
+ ceph_decode_64_safe(&p, end, inline_version, bad);
+ ceph_decode_32_safe(&p, end, inline_len, bad);
+ if (p + inline_len > end)
+ goto bad;
+ inline_data = p;
+ p += inline_len;
+ }
+
/* lookup ino */
inode = ceph_find_inode(sb, vino);
ci = ceph_inode(inode);
@@ -3085,6 +3156,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
handle_cap_import(mdsc, inode, h, peer, session,
&cap, &issued);
handle_cap_grant(mdsc, inode, h, snaptrace, snaptrace_len,
+ inline_version, inline_data, inline_len,
msg->middle, session, cap, issued);
goto done_unlocked;
}
@@ -3105,8 +3177,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
case CEPH_CAP_OP_GRANT:
__ceph_caps_issued(ci, &issued);
issued |= __ceph_caps_dirty(ci);
- handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle,
- session, cap, issued);
+ handle_cap_grant(mdsc, inode, h, NULL, 0,
+ inline_version, inline_data, inline_len,
+ msg->middle, session, cap, issued);
goto done_unlocked;
case CEPH_CAP_OP_FLUSH_ACK:
@@ -3137,8 +3210,7 @@ flush_cap_releases:
done:
mutex_unlock(&session->s_mutex);
done_unlocked:
- if (inode)
- iput(inode);
+ iput(inode);
return;
bad:
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 681a8537b64f..c241603764fd 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -183,7 +183,7 @@ more:
spin_unlock(&parent->d_lock);
/* make sure a dentry wasn't dropped while we didn't have parent lock */
- if (!ceph_dir_is_complete(dir)) {
+ if (!ceph_dir_is_complete_ordered(dir)) {
dout(" lost dir complete on %p; falling back to mds\n", dir);
dput(dentry);
err = -EAGAIN;
@@ -261,10 +261,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
/* always start with . and .. */
if (ctx->pos == 0) {
- /* note dir version at start of readdir so we can tell
- * if any dentries get dropped */
- fi->dir_release_count = atomic_read(&ci->i_release_count);
-
dout("readdir off 0 -> '.'\n");
if (!dir_emit(ctx, ".", 1,
ceph_translate_ino(inode->i_sb, inode->i_ino),
@@ -289,7 +285,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
if ((ctx->pos == 2 || fi->dentry) &&
!ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
ceph_snap(inode) != CEPH_SNAPDIR &&
- __ceph_dir_is_complete(ci) &&
+ __ceph_dir_is_complete_ordered(ci) &&
__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
u32 shared_gen = ci->i_shared_gen;
spin_unlock(&ci->i_ceph_lock);
@@ -312,6 +308,13 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
/* proceed with a normal readdir */
+ if (ctx->pos == 2) {
+ /* note dir version at start of readdir so we can tell
+ * if any dentries get dropped */
+ fi->dir_release_count = atomic_read(&ci->i_release_count);
+ fi->dir_ordered_count = ci->i_ordered_count;
+ }
+
more:
/* do we have the correct frag content buffered? */
if (fi->frag != frag || fi->last_readdir == NULL) {
@@ -446,8 +449,12 @@ more:
*/
spin_lock(&ci->i_ceph_lock);
if (atomic_read(&ci->i_release_count) == fi->dir_release_count) {
- dout(" marking %p complete\n", inode);
- __ceph_dir_set_complete(ci, fi->dir_release_count);
+ if (ci->i_ordered_count == fi->dir_ordered_count)
+ dout(" marking %p complete and ordered\n", inode);
+ else
+ dout(" marking %p complete\n", inode);
+ __ceph_dir_set_complete(ci, fi->dir_release_count,
+ fi->dir_ordered_count);
}
spin_unlock(&ci->i_ceph_lock);
@@ -805,7 +812,9 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
acls.pagelist = NULL;
}
err = ceph_mdsc_do_request(mdsc, dir, req);
- if (!err && !req->r_reply_info.head->is_dentry)
+ if (!err &&
+ !req->r_reply_info.head->is_target &&
+ !req->r_reply_info.head->is_dentry)
err = ceph_handle_notrace_create(dir, dentry);
ceph_mdsc_put_request(req);
out:
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 9f8e3572040e..ce74b394b49d 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -333,6 +333,11 @@ int ceph_release(struct inode *inode, struct file *file)
return 0;
}
+enum {
+ CHECK_EOF = 1,
+ READ_INLINE = 2,
+};
+
/*
* Read a range of bytes striped over one or more objects. Iterate over
* objects we stripe over. (That's not atomic, but good enough for now.)
@@ -412,7 +417,7 @@ more:
ret = read;
/* did we bounce off eof? */
if (pos + left > inode->i_size)
- *checkeof = 1;
+ *checkeof = CHECK_EOF;
}
dout("striped_read returns %d\n", ret);
@@ -598,7 +603,7 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
snapc = ci->i_snap_realm->cached_context;
vino = ceph_vino(inode);
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
- vino, pos, &len,
+ vino, pos, &len, 0,
2,/*include a 'startsync' command*/
CEPH_OSD_OP_WRITE, flags, snapc,
ci->i_truncate_seq,
@@ -609,6 +614,8 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
break;
}
+ osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
+
n = iov_iter_get_pages_alloc(from, &pages, len, &start);
if (unlikely(n < 0)) {
ret = n;
@@ -713,7 +720,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
snapc = ci->i_snap_realm->cached_context;
vino = ceph_vino(inode);
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
- vino, pos, &len, 1,
+ vino, pos, &len, 0, 1,
CEPH_OSD_OP_WRITE, flags, snapc,
ci->i_truncate_seq,
ci->i_truncate_size,
@@ -803,9 +810,10 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
size_t len = iocb->ki_nbytes;
struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
+ struct page *pinned_page = NULL;
ssize_t ret;
int want, got = 0;
- int checkeof = 0, read = 0;
+ int retry_op = 0, read = 0;
again:
dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
@@ -815,7 +823,7 @@ again:
want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_CACHE;
- ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
+ ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
if (ret < 0)
return ret;
@@ -827,8 +835,12 @@ again:
inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
ceph_cap_string(got));
- /* hmm, this isn't really async... */
- ret = ceph_sync_read(iocb, to, &checkeof);
+ if (ci->i_inline_version == CEPH_INLINE_NONE) {
+ /* hmm, this isn't really async... */
+ ret = ceph_sync_read(iocb, to, &retry_op);
+ } else {
+ retry_op = READ_INLINE;
+ }
} else {
dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
@@ -838,13 +850,55 @@ again:
}
dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
+ if (pinned_page) {
+ page_cache_release(pinned_page);
+ pinned_page = NULL;
+ }
ceph_put_cap_refs(ci, got);
+ if (retry_op && ret >= 0) {
+ int statret;
+ struct page *page = NULL;
+ loff_t i_size;
+ if (retry_op == READ_INLINE) {
+ page = __page_cache_alloc(GFP_NOFS);
+ if (!page)
+ return -ENOMEM;
+ }
- if (checkeof && ret >= 0) {
- int statret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
+ statret = __ceph_do_getattr(inode, page,
+ CEPH_STAT_CAP_INLINE_DATA, !!page);
+ if (statret < 0) {
+ __free_page(page);
+ if (statret == -ENODATA) {
+ BUG_ON(retry_op != READ_INLINE);
+ goto again;
+ }
+ return statret;
+ }
+
+ i_size = i_size_read(inode);
+ if (retry_op == READ_INLINE) {
+ /* does not support inline data > PAGE_SIZE */
+ if (i_size > PAGE_CACHE_SIZE) {
+ ret = -EIO;
+ } else if (iocb->ki_pos < i_size) {
+ loff_t end = min_t(loff_t, i_size,
+ iocb->ki_pos + len);
+ if (statret < end)
+ zero_user_segment(page, statret, end);
+ ret = copy_page_to_iter(page,
+ iocb->ki_pos & ~PAGE_MASK,
+ end - iocb->ki_pos, to);
+ iocb->ki_pos += ret;
+ } else {
+ ret = 0;
+ }
+ __free_pages(page, 0);
+ return ret;
+ }
/* hit EOF or hole? */
- if (statret == 0 && iocb->ki_pos < inode->i_size &&
+ if (retry_op == CHECK_EOF && iocb->ki_pos < i_size &&
ret < len) {
dout("sync_read hit hole, ppos %lld < size %lld"
", reading more\n", iocb->ki_pos,
@@ -852,7 +906,7 @@ again:
read += ret;
len -= ret;
- checkeof = 0;
+ retry_op = 0;
goto again;
}
}
@@ -909,6 +963,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (err)
goto out;
+ if (ci->i_inline_version != CEPH_INLINE_NONE) {
+ err = ceph_uninline_data(file, NULL);
+ if (err < 0)
+ goto out;
+ }
+
retry_snap:
if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) {
err = -ENOSPC;
@@ -922,7 +982,8 @@ retry_snap:
else
want = CEPH_CAP_FILE_BUFFER;
got = 0;
- err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos + count);
+ err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, pos + count,
+ &got, NULL);
if (err < 0)
goto out;
@@ -969,6 +1030,7 @@ retry_snap:
if (written >= 0) {
int dirty;
spin_lock(&ci->i_ceph_lock);
+ ci->i_inline_version = CEPH_INLINE_NONE;
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
spin_unlock(&ci->i_ceph_lock);
if (dirty)
@@ -1111,7 +1173,7 @@ static int ceph_zero_partial_object(struct inode *inode,
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
ceph_vino(inode),
offset, length,
- 1, op,
+ 0, 1, op,
CEPH_OSD_FLAG_WRITE |
CEPH_OSD_FLAG_ONDISK,
NULL, 0, 0, false);
@@ -1214,6 +1276,12 @@ static long ceph_fallocate(struct file *file, int mode,
goto unlock;
}
+ if (ci->i_inline_version != CEPH_INLINE_NONE) {
+ ret = ceph_uninline_data(file, NULL);
+ if (ret < 0)
+ goto unlock;
+ }
+
size = i_size_read(inode);
if (!(mode & FALLOC_FL_KEEP_SIZE))
endoff = offset + length;
@@ -1223,7 +1291,7 @@ static long ceph_fallocate(struct file *file, int mode,
else
want = CEPH_CAP_FILE_BUFFER;
- ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
+ ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, endoff, &got, NULL);
if (ret < 0)
goto unlock;
@@ -1240,6 +1308,7 @@ static long ceph_fallocate(struct file *file, int mode,
if (!ret) {
spin_lock(&ci->i_ceph_lock);
+ ci->i_inline_version = CEPH_INLINE_NONE;
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
spin_unlock(&ci->i_ceph_lock);
if (dirty)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index a5593d51d035..f61a74115beb 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -387,8 +387,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
spin_lock_init(&ci->i_ceph_lock);
ci->i_version = 0;
+ ci->i_inline_version = 0;
ci->i_time_warp_seq = 0;
ci->i_ceph_flags = 0;
+ ci->i_ordered_count = 0;
atomic_set(&ci->i_release_count, 1);
atomic_set(&ci->i_complete_count, 0);
ci->i_symlink = NULL;
@@ -657,7 +659,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
* Populate an inode based on info from mds. May be called on new or
* existing inodes.
*/
-static int fill_inode(struct inode *inode,
+static int fill_inode(struct inode *inode, struct page *locked_page,
struct ceph_mds_reply_info_in *iinfo,
struct ceph_mds_reply_dirfrag *dirinfo,
struct ceph_mds_session *session,
@@ -675,6 +677,7 @@ static int fill_inode(struct inode *inode,
bool wake = false;
bool queue_trunc = false;
bool new_version = false;
+ bool fill_inline = false;
dout("fill_inode %p ino %llx.%llx v %llu had %llu\n",
inode, ceph_vinop(inode), le64_to_cpu(info->version),
@@ -845,7 +848,8 @@ static int fill_inode(struct inode *inode,
(issued & CEPH_CAP_FILE_EXCL) == 0 &&
!__ceph_dir_is_complete(ci)) {
dout(" marking %p complete (empty)\n", inode);
- __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
+ __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count),
+ ci->i_ordered_count);
}
/* were we issued a capability? */
@@ -873,8 +877,23 @@ static int fill_inode(struct inode *inode,
ceph_vinop(inode));
__ceph_get_fmode(ci, cap_fmode);
}
+
+ if (iinfo->inline_version > 0 &&
+ iinfo->inline_version >= ci->i_inline_version) {
+ int cache_caps = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
+ ci->i_inline_version = iinfo->inline_version;
+ if (ci->i_inline_version != CEPH_INLINE_NONE &&
+ (locked_page ||
+ (le32_to_cpu(info->cap.caps) & cache_caps)))
+ fill_inline = true;
+ }
+
spin_unlock(&ci->i_ceph_lock);
+ if (fill_inline)
+ ceph_fill_inline_data(inode, locked_page,
+ iinfo->inline_data, iinfo->inline_len);
+
if (wake)
wake_up_all(&ci->i_cap_wq);
@@ -1062,7 +1081,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
struct inode *dir = req->r_locked_dir;
if (dir) {
- err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag,
+ err = fill_inode(dir, NULL,
+ &rinfo->diri, rinfo->dirfrag,
session, req->r_request_started, -1,
&req->r_caps_reservation);
if (err < 0)
@@ -1132,7 +1152,7 @@ retry_lookup:
}
req->r_target_inode = in;
- err = fill_inode(in, &rinfo->targeti, NULL,
+ err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL,
session, req->r_request_started,
(!req->r_aborted && rinfo->head->result == 0) ?
req->r_fmode : -1,
@@ -1204,8 +1224,8 @@ retry_lookup:
ceph_invalidate_dentry_lease(dn);
/* d_move screws up sibling dentries' offsets */
- ceph_dir_clear_complete(dir);
- ceph_dir_clear_complete(olddir);
+ ceph_dir_clear_ordered(dir);
+ ceph_dir_clear_ordered(olddir);
dout("dn %p gets new offset %lld\n", req->r_old_dentry,
ceph_dentry(req->r_old_dentry)->offset);
@@ -1217,6 +1237,7 @@ retry_lookup:
if (!rinfo->head->is_target) {
dout("fill_trace null dentry\n");
if (dn->d_inode) {
+ ceph_dir_clear_ordered(dir);
dout("d_delete %p\n", dn);
d_delete(dn);
} else {
@@ -1233,7 +1254,7 @@ retry_lookup:
/* attach proper inode */
if (!dn->d_inode) {
- ceph_dir_clear_complete(dir);
+ ceph_dir_clear_ordered(dir);
ihold(in);
dn = splice_dentry(dn, in, &have_lease);
if (IS_ERR(dn)) {
@@ -1263,7 +1284,7 @@ retry_lookup:
BUG_ON(!dir);
BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
dout(" linking snapped dir %p to dn %p\n", in, dn);
- ceph_dir_clear_complete(dir);
+ ceph_dir_clear_ordered(dir);
ihold(in);
dn = splice_dentry(dn, in, NULL);
if (IS_ERR(dn)) {
@@ -1300,7 +1321,7 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
dout("new_inode badness got %d\n", err);
continue;
}
- rc = fill_inode(in, &rinfo->dir_in[i], NULL, session,
+ rc = fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session,
req->r_request_started, -1,
&req->r_caps_reservation);
if (rc < 0) {
@@ -1416,7 +1437,7 @@ retry_lookup:
}
}
- if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
+ if (fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session,
req->r_request_started, -1,
&req->r_caps_reservation) < 0) {
pr_err("fill_inode badness on %p\n", in);
@@ -1899,7 +1920,8 @@ out_put:
* Verify that we have a lease on the given mask. If not,
* do a getattr against an mds.
*/
-int ceph_do_getattr(struct inode *inode, int mask, bool force)
+int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
+ int mask, bool force)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -1911,7 +1933,8 @@ int ceph_do_getattr(struct inode *inode, int mask, bool force)
return 0;
}
- dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode);
+ dout("do_getattr inode %p mask %s mode 0%o\n",
+ inode, ceph_cap_string(mask), inode->i_mode);
if (!force && ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
return 0;
@@ -1922,7 +1945,19 @@ int ceph_do_getattr(struct inode *inode, int mask, bool force)
ihold(inode);
req->r_num_caps = 1;
req->r_args.getattr.mask = cpu_to_le32(mask);
+ req->r_locked_page = locked_page;
err = ceph_mdsc_do_request(mdsc, NULL, req);
+ if (locked_page && err == 0) {
+ u64 inline_version = req->r_reply_info.targeti.inline_version;
+ if (inline_version == 0) {
+ /* the reply is supposed to contain inline data */
+ err = -EINVAL;
+ } else if (inline_version == CEPH_INLINE_NONE) {
+ err = -ENODATA;
+ } else {
+ err = req->r_reply_info.targeti.inline_len;
+ }
+ }
ceph_mdsc_put_request(req);
dout("do_getattr result=%d\n", err);
return err;
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index fbc39c47bacd..c35c5c614e38 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -9,6 +9,8 @@
#include <linux/ceph/pagelist.h>
static u64 lock_secret;
+static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req);
static inline u64 secure_addr(void *addr)
{
@@ -40,6 +42,9 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
u64 length = 0;
u64 owner;
+ if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
+ wait = 0;
+
req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -68,6 +73,9 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
req->r_args.filelock_change.length = cpu_to_le64(length);
req->r_args.filelock_change.wait = wait;
+ if (wait)
+ req->r_wait_for_completion = ceph_lock_wait_for_completion;
+
err = ceph_mdsc_do_request(mdsc, inode, req);
if (operation == CEPH_MDS_OP_GETFILELOCK) {
@@ -96,6 +104,52 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
return err;
}
+static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req)
+{
+ struct ceph_mds_request *intr_req;
+ struct inode *inode = req->r_inode;
+ int err, lock_type;
+
+ BUG_ON(req->r_op != CEPH_MDS_OP_SETFILELOCK);
+ if (req->r_args.filelock_change.rule == CEPH_LOCK_FCNTL)
+ lock_type = CEPH_LOCK_FCNTL_INTR;
+ else if (req->r_args.filelock_change.rule == CEPH_LOCK_FLOCK)
+ lock_type = CEPH_LOCK_FLOCK_INTR;
+ else
+ BUG_ON(1);
+ BUG_ON(req->r_args.filelock_change.type == CEPH_LOCK_UNLOCK);
+
+ err = wait_for_completion_interruptible(&req->r_completion);
+ if (!err)
+ return 0;
+
+ dout("ceph_lock_wait_for_completion: request %llu was interrupted\n",
+ req->r_tid);
+
+ intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK,
+ USE_AUTH_MDS);
+ if (IS_ERR(intr_req))
+ return PTR_ERR(intr_req);
+
+ intr_req->r_inode = inode;
+ ihold(inode);
+ intr_req->r_num_caps = 1;
+
+ intr_req->r_args.filelock_change = req->r_args.filelock_change;
+ intr_req->r_args.filelock_change.rule = lock_type;
+ intr_req->r_args.filelock_change.type = CEPH_LOCK_UNLOCK;
+
+ err = ceph_mdsc_do_request(mdsc, inode, intr_req);
+ ceph_mdsc_put_request(intr_req);
+
+ if (err && err != -ERESTARTSYS)
+ return err;
+
+ wait_for_completion(&req->r_completion);
+ return 0;
+}
+
/**
* Attempt to set an fcntl lock.
* For now, this just goes away to the server. Later it may be more awesome.
@@ -143,11 +197,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
err);
}
}
-
- } else if (err == -ERESTARTSYS) {
- dout("undoing lock\n");
- ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
- CEPH_LOCK_UNLOCK, 0, fl);
}
return err;
}
@@ -186,11 +235,6 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
file, CEPH_LOCK_UNLOCK, 0, fl);
dout("got %d on flock_lock_file_wait, undid lock", err);
}
- } else if (err == -ERESTARTSYS) {
- dout("undoing lock\n");
- ceph_lock_message(CEPH_LOCK_FLOCK,
- CEPH_MDS_OP_SETFILELOCK,
- file, CEPH_LOCK_UNLOCK, 0, fl);
}
return err;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a92d3f5c6c12..d2171f4a6980 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -89,6 +89,16 @@ static int parse_reply_info_in(void **p, void *end,
ceph_decode_need(p, end, info->xattr_len, bad);
info->xattr_data = *p;
*p += info->xattr_len;
+
+ if (features & CEPH_FEATURE_MDS_INLINE_DATA) {
+ ceph_decode_64_safe(p, end, info->inline_version, bad);
+ ceph_decode_32_safe(p, end, info->inline_len, bad);
+ ceph_decode_need(p, end, info->inline_len, bad);
+ info->inline_data = *p;
+ *p += info->inline_len;
+ } else
+ info->inline_version = CEPH_INLINE_NONE;
+
return 0;
bad:
return err;
@@ -524,8 +534,7 @@ void ceph_mdsc_release_request(struct kref *kref)
}
if (req->r_locked_dir)
ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
- if (req->r_target_inode)
- iput(req->r_target_inode);
+ iput(req->r_target_inode);
if (req->r_dentry)
dput(req->r_dentry);
if (req->r_old_dentry)
@@ -861,8 +870,11 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
/*
* Serialize client metadata into waiting buffer space, using
* the format that userspace expects for map<string, string>
+ *
+ * ClientSession messages with metadata are v2
*/
- msg->hdr.version = 2; /* ClientSession messages with metadata are v2 */
+ msg->hdr.version = cpu_to_le16(2);
+ msg->hdr.compat_version = cpu_to_le16(1);
/* The write pointer, following the session_head structure */
p = msg->front.iov_base + sizeof(*h);
@@ -1066,8 +1078,7 @@ out:
session->s_cap_iterator = NULL;
spin_unlock(&session->s_cap_lock);
- if (last_inode)
- iput(last_inode);
+ iput(last_inode);
if (old_cap)
ceph_put_cap(session->s_mdsc, old_cap);
@@ -1874,7 +1885,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
goto out_free2;
}
- msg->hdr.version = 2;
+ msg->hdr.version = cpu_to_le16(2);
msg->hdr.tid = cpu_to_le64(req->r_tid);
head = msg->front.iov_base;
@@ -2208,6 +2219,8 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
&req->r_completion, req->r_timeout);
if (err == 0)
err = -EIO;
+ } else if (req->r_wait_for_completion) {
+ err = req->r_wait_for_completion(mdsc, req);
} else {
err = wait_for_completion_killable(&req->r_completion);
}
@@ -3744,6 +3757,20 @@ static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con,
return msg;
}
+static int sign_message(struct ceph_connection *con, struct ceph_msg *msg)
+{
+ struct ceph_mds_session *s = con->private;
+ struct ceph_auth_handshake *auth = &s->s_auth;
+ return ceph_auth_sign_message(auth, msg);
+}
+
+static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg)
+{
+ struct ceph_mds_session *s = con->private;
+ struct ceph_auth_handshake *auth = &s->s_auth;
+ return ceph_auth_check_message_signature(auth, msg);
+}
+
static const struct ceph_connection_operations mds_con_ops = {
.get = con_get,
.put = con_put,
@@ -3753,6 +3780,8 @@ static const struct ceph_connection_operations mds_con_ops = {
.invalidate_authorizer = invalidate_authorizer,
.peer_reset = peer_reset,
.alloc_msg = mds_alloc_msg,
+ .sign_message = sign_message,
+ .check_message_signature = check_message_signature,
};
/* eof */
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 3288359353e9..e2817d00f7d9 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -41,6 +41,9 @@ struct ceph_mds_reply_info_in {
char *symlink;
u32 xattr_len;
char *xattr_data;
+ u64 inline_version;
+ u32 inline_len;
+ char *inline_data;
};
/*
@@ -166,6 +169,11 @@ struct ceph_mds_client;
*/
typedef void (*ceph_mds_request_callback_t) (struct ceph_mds_client *mdsc,
struct ceph_mds_request *req);
+/*
+ * wait for request completion callback
+ */
+typedef int (*ceph_mds_request_wait_callback_t) (struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req);
/*
* an in-flight mds request
@@ -215,6 +223,7 @@ struct ceph_mds_request {
int r_request_release_offset;
struct ceph_msg *r_reply;
struct ceph_mds_reply_info_parsed r_reply_info;
+ struct page *r_locked_page;
int r_err;
bool r_aborted;
@@ -239,6 +248,7 @@ struct ceph_mds_request {
struct completion r_completion;
struct completion r_safe_completion;
ceph_mds_request_callback_t r_callback;
+ ceph_mds_request_wait_callback_t r_wait_for_completion;
struct list_head r_unsafe_item; /* per-session unsafe list item */
bool r_got_unsafe, r_got_safe, r_got_result;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index f01645a27752..ce35fbd4ba5d 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -288,6 +288,9 @@ static int cmpu64_rev(const void *a, const void *b)
return 0;
}
+
+static struct ceph_snap_context *empty_snapc;
+
/*
* build the snap context for a given realm.
*/
@@ -328,6 +331,12 @@ static int build_snap_context(struct ceph_snap_realm *realm)
return 0;
}
+ if (num == 0 && realm->seq == empty_snapc->seq) {
+ ceph_get_snap_context(empty_snapc);
+ snapc = empty_snapc;
+ goto done;
+ }
+
/* alloc new snap context */
err = -ENOMEM;
if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64))
@@ -365,8 +374,8 @@ static int build_snap_context(struct ceph_snap_realm *realm)
realm->ino, realm, snapc, snapc->seq,
(unsigned int) snapc->num_snaps);
- if (realm->cached_context)
- ceph_put_snap_context(realm->cached_context);
+done:
+ ceph_put_snap_context(realm->cached_context);
realm->cached_context = snapc;
return 0;
@@ -466,6 +475,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
cap_snap. lucky us. */
dout("queue_cap_snap %p already pending\n", inode);
kfree(capsnap);
+ } else if (ci->i_snap_realm->cached_context == empty_snapc) {
+ dout("queue_cap_snap %p empty snapc\n", inode);
+ kfree(capsnap);
} else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) {
struct ceph_snap_context *snapc = ci->i_head_snapc;
@@ -504,6 +516,8 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
capsnap->xattr_version = 0;
}
+ capsnap->inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
+
/* dirty page count moved from _head to this cap_snap;
all subsequent writes page dirties occur _after_ this
snapshot. */
@@ -590,15 +604,13 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
if (!inode)
continue;
spin_unlock(&realm->inodes_with_caps_lock);
- if (lastinode)
- iput(lastinode);
+ iput(lastinode);
lastinode = inode;
ceph_queue_cap_snap(ci);
spin_lock(&realm->inodes_with_caps_lock);
}
spin_unlock(&realm->inodes_with_caps_lock);
- if (lastinode)
- iput(lastinode);
+ iput(lastinode);
list_for_each_entry(child, &realm->children, child_item) {
dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n",
@@ -928,5 +940,16 @@ out:
return;
}
+int __init ceph_snap_init(void)
+{
+ empty_snapc = ceph_create_snap_context(0, GFP_NOFS);
+ if (!empty_snapc)
+ return -ENOMEM;
+ empty_snapc->seq = 1;
+ return 0;
+}
-
+void ceph_snap_exit(void)
+{
+ ceph_put_snap_context(empty_snapc);
+}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index f6e12377335c..50f06cddc94b 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -515,7 +515,8 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
struct ceph_fs_client *fsc;
const u64 supported_features =
CEPH_FEATURE_FLOCK |
- CEPH_FEATURE_DIRLAYOUTHASH;
+ CEPH_FEATURE_DIRLAYOUTHASH |
+ CEPH_FEATURE_MDS_INLINE_DATA;
const u64 required_features = 0;
int page_count;
size_t size;
@@ -1017,9 +1018,6 @@ static struct file_system_type ceph_fs_type = {
};
MODULE_ALIAS_FS("ceph");
-#define _STRINGIFY(x) #x
-#define STRINGIFY(x) _STRINGIFY(x)
-
static int __init init_ceph(void)
{
int ret = init_caches();
@@ -1028,15 +1026,20 @@ static int __init init_ceph(void)
ceph_flock_init();
ceph_xattr_init();
+ ret = ceph_snap_init();
+ if (ret)
+ goto out_xattr;
ret = register_filesystem(&ceph_fs_type);
if (ret)
- goto out_icache;
+ goto out_snap;
pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
return 0;
-out_icache:
+out_snap:
+ ceph_snap_exit();
+out_xattr:
ceph_xattr_exit();
destroy_caches();
out:
@@ -1047,6 +1050,7 @@ static void __exit exit_ceph(void)
{
dout("exit_ceph\n");
unregister_filesystem(&ceph_fs_type);
+ ceph_snap_exit();
ceph_xattr_exit();
destroy_caches();
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b82f507979b8..e1aa32d0759d 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -161,6 +161,7 @@ struct ceph_cap_snap {
u64 time_warp_seq;
int writing; /* a sync write is still in progress */
int dirty_pages; /* dirty pages awaiting writeback */
+ bool inline_data;
};
static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
@@ -253,9 +254,11 @@ struct ceph_inode_info {
spinlock_t i_ceph_lock;
u64 i_version;
+ u64 i_inline_version;
u32 i_time_warp_seq;
unsigned i_ceph_flags;
+ int i_ordered_count;
atomic_t i_release_count;
atomic_t i_complete_count;
@@ -434,14 +437,19 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
/*
* Ceph inode.
*/
-#define CEPH_I_NODELAY 4 /* do not delay cap release */
-#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */
-#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */
+#define CEPH_I_DIR_ORDERED 1 /* dentries in dir are ordered */
+#define CEPH_I_NODELAY 4 /* do not delay cap release */
+#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */
+#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */
static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
- int release_count)
+ int release_count, int ordered_count)
{
atomic_set(&ci->i_complete_count, release_count);
+ if (ci->i_ordered_count == ordered_count)
+ ci->i_ceph_flags |= CEPH_I_DIR_ORDERED;
+ else
+ ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED;
}
static inline void __ceph_dir_clear_complete(struct ceph_inode_info *ci)
@@ -455,16 +463,35 @@ static inline bool __ceph_dir_is_complete(struct ceph_inode_info *ci)
atomic_read(&ci->i_release_count);
}
+static inline bool __ceph_dir_is_complete_ordered(struct ceph_inode_info *ci)
+{
+ return __ceph_dir_is_complete(ci) &&
+ (ci->i_ceph_flags & CEPH_I_DIR_ORDERED);
+}
+
static inline void ceph_dir_clear_complete(struct inode *inode)
{
__ceph_dir_clear_complete(ceph_inode(inode));
}
-static inline bool ceph_dir_is_complete(struct inode *inode)
+static inline void ceph_dir_clear_ordered(struct inode *inode)
{
- return __ceph_dir_is_complete(ceph_inode(inode));
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ spin_lock(&ci->i_ceph_lock);
+ ci->i_ordered_count++;
+ ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED;
+ spin_unlock(&ci->i_ceph_lock);
}
+static inline bool ceph_dir_is_complete_ordered(struct inode *inode)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ bool ret;
+ spin_lock(&ci->i_ceph_lock);
+ ret = __ceph_dir_is_complete_ordered(ci);
+ spin_unlock(&ci->i_ceph_lock);
+ return ret;
+}
/* find a specific frag @f */
extern struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci,
@@ -580,6 +607,7 @@ struct ceph_file_info {
char *last_name; /* last entry in previous chunk */
struct dentry *dentry; /* next dentry (for dcache readdir) */
int dir_release_count;
+ int dir_ordered_count;
/* used for -o dirstat read() on directory thing */
char *dir_info;
@@ -673,6 +701,8 @@ extern void ceph_queue_cap_snap(struct ceph_inode_info *ci);
extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
struct ceph_cap_snap *capsnap);
extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
+extern int ceph_snap_init(void);
+extern void ceph_snap_exit(void);
/*
* a cap_snap is "pending" if it is still awaiting an in-progress
@@ -715,7 +745,12 @@ extern void ceph_queue_vmtruncate(struct inode *inode);
extern void ceph_queue_invalidate(struct inode *inode);
extern void ceph_queue_writeback(struct inode *inode);
-extern int ceph_do_getattr(struct inode *inode, int mask, bool force);
+extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
+ int mask, bool force);
+static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
+{
+ return __ceph_do_getattr(inode, NULL, mask, force);
+}
extern int ceph_permission(struct inode *inode, int mask);
extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -830,7 +865,7 @@ extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
int mds, int drop, int unless);
extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
- int *got, loff_t endoff);
+ loff_t endoff, int *got, struct page **pinned_page);
/* for counting open files by mode */
static inline void __ceph_get_fmode(struct ceph_inode_info *ci, int mode)
@@ -852,7 +887,9 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned flags, umode_t mode,
int *opened);
extern int ceph_release(struct inode *inode, struct file *filp);
-
+extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
+ char *data, size_t len);
+int ceph_uninline_data(struct file *filp, struct page *locked_page);
/* dir.c */
extern const struct file_operations ceph_dir_fops;
extern const struct inode_operations ceph_dir_iops;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 678b0d2bbbc4..5a492caf34cb 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -854,7 +854,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
struct ceph_pagelist *pagelist = NULL;
int err;
- if (value) {
+ if (size > 0) {
/* copy value into pagelist */
pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
if (!pagelist)
@@ -864,7 +864,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
err = ceph_pagelist_append(pagelist, value, size);
if (err)
goto out;
- } else {
+ } else if (!value) {
flags |= CEPH_XATTR_REMOVE;
}
@@ -1001,6 +1001,9 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
return generic_setxattr(dentry, name, value, size, flags);
+ if (size == 0)
+ value = ""; /* empty EA, do not remove */
+
return __ceph_setxattr(dentry, name, value, size, flags);
}
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 7ff025966e4f..86c893884eb9 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -426,7 +426,6 @@ static int coda_venus_readdir(struct file *coda_file, struct dir_context *ctx)
struct coda_file_info *cfi;
struct coda_inode_info *cii;
struct file *host_file;
- struct dentry *de;
struct venus_dirent *vdir;
unsigned long vdir_size = offsetof(struct venus_dirent, d_name);
unsigned int type;
@@ -438,8 +437,7 @@ static int coda_venus_readdir(struct file *coda_file, struct dir_context *ctx)
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
host_file = cfi->cfi_container;
- de = coda_file->f_path.dentry;
- cii = ITOC(de->d_inode);
+ cii = ITOC(file_inode(coda_file));
vdir = kmalloc(sizeof(*vdir), GFP_KERNEL);
if (!vdir) return -ENOMEM;
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 8e0f2f410189..517e64938438 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -22,6 +22,7 @@
#include <linux/io.h>
#include <linux/slab.h>
#include <linux/atomic.h>
+#include <linux/device.h>
static ssize_t default_read_file(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
@@ -762,3 +763,56 @@ struct dentry *debugfs_create_regset32(const char *name, umode_t mode,
EXPORT_SYMBOL_GPL(debugfs_create_regset32);
#endif /* CONFIG_HAS_IOMEM */
+
+struct debugfs_devm_entry {
+ int (*read)(struct seq_file *seq, void *data);
+ struct device *dev;
+};
+
+static int debugfs_devm_entry_open(struct inode *inode, struct file *f)
+{
+ struct debugfs_devm_entry *entry = inode->i_private;
+
+ return single_open(f, entry->read, entry->dev);
+}
+
+static const struct file_operations debugfs_devm_entry_ops = {
+ .owner = THIS_MODULE,
+ .open = debugfs_devm_entry_open,
+ .release = single_release,
+ .read = seq_read,
+ .llseek = seq_lseek
+};
+
+/**
+ * debugfs_create_devm_seqfile - create a debugfs file that is bound to device.
+ *
+ * @dev: device related to this debugfs file.
+ * @name: name of the debugfs file.
+ * @parent: a pointer to the parent dentry for this file. This should be a
+ * directory dentry if set. If this parameter is %NULL, then the
+ * file will be created in the root of the debugfs filesystem.
+ * @read_fn: function pointer called to print the seq_file content.
+ */
+struct dentry *debugfs_create_devm_seqfile(struct device *dev, const char *name,
+ struct dentry *parent,
+ int (*read_fn)(struct seq_file *s,
+ void *data))
+{
+ struct debugfs_devm_entry *entry;
+
+ if (IS_ERR(parent))
+ return ERR_PTR(-ENOENT);
+
+ entry = devm_kzalloc(dev, sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return ERR_PTR(-ENOMEM);
+
+ entry->read = read_fn;
+ entry->dev = dev;
+
+ return debugfs_create_file(name, S_IRUGO, parent, entry,
+ &debugfs_devm_entry_ops);
+}
+EXPORT_SYMBOL_GPL(debugfs_create_devm_seqfile);
+
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index c2d6604667b0..719e1ce1c609 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1917,7 +1917,6 @@ ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size,
break;
case 2:
dst[dst_byte_offset++] |= (src_byte);
- dst[dst_byte_offset] = 0;
current_bit_offset = 0;
break;
}
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 80154ec4f8c2..6f4e659f508f 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -190,23 +190,11 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
{
int rc = 0;
struct ecryptfs_crypt_stat *crypt_stat = NULL;
- struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
struct dentry *ecryptfs_dentry = file->f_path.dentry;
/* Private value of ecryptfs_dentry allocated in
* ecryptfs_lookup() */
struct ecryptfs_file_info *file_info;
- mount_crypt_stat = &ecryptfs_superblock_to_private(
- ecryptfs_dentry->d_sb)->mount_crypt_stat;
- if ((mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
- && ((file->f_flags & O_WRONLY) || (file->f_flags & O_RDWR)
- || (file->f_flags & O_CREAT) || (file->f_flags & O_TRUNC)
- || (file->f_flags & O_APPEND))) {
- printk(KERN_WARNING "Mount has encrypted view enabled; "
- "files may only be read\n");
- rc = -EPERM;
- goto out;
- }
/* Released in ecryptfs_release or end of function if failure */
file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL);
ecryptfs_set_file_private(file, file_info);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 635e8e16a5b7..917bd5c9776a 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -100,12 +100,12 @@ int ecryptfs_parse_packet_length(unsigned char *data, size_t *size,
(*size) = 0;
if (data[0] < 192) {
/* One-byte length */
- (*size) = (unsigned char)data[0];
+ (*size) = data[0];
(*length_size) = 1;
} else if (data[0] < 224) {
/* Two-byte length */
- (*size) = (((unsigned char)(data[0]) - 192) * 256);
- (*size) += ((unsigned char)(data[1]) + 192);
+ (*size) = (data[0] - 192) * 256;
+ (*size) += data[1] + 192;
(*length_size) = 2;
} else if (data[0] == 255) {
/* If support is added, adjust ECRYPTFS_MAX_PKT_LEN_SIZE */
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index c4cd1fd86cc2..d9eb84bda559 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -493,6 +493,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
{
struct super_block *s;
struct ecryptfs_sb_info *sbi;
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
struct ecryptfs_dentry_info *root_info;
const char *err = "Getting sb failed";
struct inode *inode;
@@ -511,6 +512,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
err = "Error parsing options";
goto out;
}
+ mount_crypt_stat = &sbi->mount_crypt_stat;
s = sget(fs_type, NULL, set_anon_super, flags, NULL);
if (IS_ERR(s)) {
@@ -557,11 +559,19 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
/**
* Set the POSIX ACL flag based on whether they're enabled in the lower
- * mount. Force a read-only eCryptfs mount if the lower mount is ro.
- * Allow a ro eCryptfs mount even when the lower mount is rw.
+ * mount.
*/
s->s_flags = flags & ~MS_POSIXACL;
- s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL);
+ s->s_flags |= path.dentry->d_sb->s_flags & MS_POSIXACL;
+
+ /**
+ * Force a read-only eCryptfs mount when:
+ * 1) The lower mount is ro
+ * 2) The ecryptfs_encrypted_view mount option is specified
+ */
+ if (path.dentry->d_sb->s_flags & MS_RDONLY ||
+ mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
+ s->s_flags |= MS_RDONLY;
s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
s->s_blocksize = path.dentry->d_sb->s_blocksize;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 503ea15dc5db..370420bfae8d 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -267,7 +267,6 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
handle_t *handle;
ext4_lblk_t orig_blk_offset, donor_blk_offset;
unsigned long blocksize = orig_inode->i_sb->s_blocksize;
- unsigned int w_flags = 0;
unsigned int tmp_data_size, data_size, replaced_size;
int err2, jblocks, retries = 0;
int replaced_count = 0;
@@ -288,9 +287,6 @@ again:
return 0;
}
- if (segment_eq(get_fs(), KERNEL_DS))
- w_flags |= AOP_FLAG_UNINTERRUPTIBLE;
-
orig_blk_offset = orig_page_offset * blocks_per_page +
data_offset_in_page;
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 966ace8b243f..28d0c7abba1c 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -415,7 +415,7 @@ err_unlock:
err_region:
unregister_chrdev_region(devt, 1);
err:
- fuse_conn_kill(fc);
+ fuse_abort_conn(fc);
goto out;
}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ca887314aba9..ba1107977f2e 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -511,6 +511,35 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
}
EXPORT_SYMBOL_GPL(fuse_request_send);
+ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
+{
+ struct fuse_req *req;
+ ssize_t ret;
+
+ req = fuse_get_req(fc, 0);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ req->in.h.opcode = args->in.h.opcode;
+ req->in.h.nodeid = args->in.h.nodeid;
+ req->in.numargs = args->in.numargs;
+ memcpy(req->in.args, args->in.args,
+ args->in.numargs * sizeof(struct fuse_in_arg));
+ req->out.argvar = args->out.argvar;
+ req->out.numargs = args->out.numargs;
+ memcpy(req->out.args, args->out.args,
+ args->out.numargs * sizeof(struct fuse_arg));
+ fuse_request_send(fc, req);
+ ret = req->out.h.error;
+ if (!ret && args->out.argvar) {
+ BUG_ON(args->out.numargs != 1);
+ ret = req->out.args[0].size;
+ }
+ fuse_put_request(fc, req);
+
+ return ret;
+}
+
static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
struct fuse_req *req)
{
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index df562cc87763..252b8a5de8b5 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -145,22 +145,22 @@ static void fuse_invalidate_entry(struct dentry *entry)
fuse_invalidate_entry_cache(entry);
}
-static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req,
+static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
u64 nodeid, struct qstr *name,
struct fuse_entry_out *outarg)
{
memset(outarg, 0, sizeof(struct fuse_entry_out));
- req->in.h.opcode = FUSE_LOOKUP;
- req->in.h.nodeid = nodeid;
- req->in.numargs = 1;
- req->in.args[0].size = name->len + 1;
- req->in.args[0].value = name->name;
- req->out.numargs = 1;
+ args->in.h.opcode = FUSE_LOOKUP;
+ args->in.h.nodeid = nodeid;
+ args->in.numargs = 1;
+ args->in.args[0].size = name->len + 1;
+ args->in.args[0].value = name->name;
+ args->out.numargs = 1;
if (fc->minor < 9)
- req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+ args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
else
- req->out.args[0].size = sizeof(struct fuse_entry_out);
- req->out.args[0].value = outarg;
+ args->out.args[0].size = sizeof(struct fuse_entry_out);
+ args->out.args[0].value = outarg;
}
u64 fuse_get_attr_version(struct fuse_conn *fc)
@@ -200,9 +200,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
goto invalid;
else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
(flags & LOOKUP_REVAL)) {
- int err;
struct fuse_entry_out outarg;
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_forget_link *forget;
u64 attr_version;
@@ -215,31 +214,23 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
goto out;
fc = get_fuse_conn(inode);
- req = fuse_get_req_nopages(fc);
- ret = PTR_ERR(req);
- if (IS_ERR(req))
- goto out;
forget = fuse_alloc_forget();
- if (!forget) {
- fuse_put_request(fc, req);
- ret = -ENOMEM;
+ ret = -ENOMEM;
+ if (!forget)
goto out;
- }
attr_version = fuse_get_attr_version(fc);
parent = dget_parent(entry);
- fuse_lookup_init(fc, req, get_node_id(parent->d_inode),
+ fuse_lookup_init(fc, &args, get_node_id(parent->d_inode),
&entry->d_name, &outarg);
- fuse_request_send(fc, req);
+ ret = fuse_simple_request(fc, &args);
dput(parent);
- err = req->out.h.error;
- fuse_put_request(fc, req);
/* Zero nodeid is same as -ENOENT */
- if (!err && !outarg.nodeid)
- err = -ENOENT;
- if (!err) {
+ if (!ret && !outarg.nodeid)
+ ret = -ENOENT;
+ if (!ret) {
fi = get_fuse_inode(inode);
if (outarg.nodeid != get_node_id(inode)) {
fuse_queue_forget(fc, forget, outarg.nodeid, 1);
@@ -250,7 +241,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
spin_unlock(&fc->lock);
}
kfree(forget);
- if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
+ if (ret == -ENOMEM)
+ goto out;
+ if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
goto invalid;
fuse_change_attributes(inode, &outarg.attr,
@@ -296,7 +289,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
struct fuse_entry_out *outarg, struct inode **inode)
{
struct fuse_conn *fc = get_fuse_conn_super(sb);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_forget_link *forget;
u64 attr_version;
int err;
@@ -306,24 +299,16 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
if (name->len > FUSE_NAME_MAX)
goto out;
- req = fuse_get_req_nopages(fc);
- err = PTR_ERR(req);
- if (IS_ERR(req))
- goto out;
forget = fuse_alloc_forget();
err = -ENOMEM;
- if (!forget) {
- fuse_put_request(fc, req);
+ if (!forget)
goto out;
- }
attr_version = fuse_get_attr_version(fc);
- fuse_lookup_init(fc, req, nodeid, name, outarg);
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ fuse_lookup_init(fc, &args, nodeid, name, outarg);
+ err = fuse_simple_request(fc, &args);
/* Zero nodeid is same as -ENOENT, but with valid timeout */
if (err || !outarg->nodeid)
goto out_put_forget;
@@ -405,7 +390,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
int err;
struct inode *inode;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_forget_link *forget;
struct fuse_create_in inarg;
struct fuse_open_out outopen;
@@ -420,15 +405,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
if (!forget)
goto out_err;
- req = fuse_get_req_nopages(fc);
- err = PTR_ERR(req);
- if (IS_ERR(req))
- goto out_put_forget_req;
-
err = -ENOMEM;
ff = fuse_file_alloc(fc);
if (!ff)
- goto out_put_request;
+ goto out_put_forget_req;
if (!fc->dont_mask)
mode &= ~current_umask();
@@ -439,24 +419,23 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
inarg.flags = flags;
inarg.mode = mode;
inarg.umask = current_umask();
- req->in.h.opcode = FUSE_CREATE;
- req->in.h.nodeid = get_node_id(dir);
- req->in.numargs = 2;
- req->in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) :
+ args.in.h.opcode = FUSE_CREATE;
+ args.in.h.nodeid = get_node_id(dir);
+ args.in.numargs = 2;
+ args.in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) :
sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->in.args[1].size = entry->d_name.len + 1;
- req->in.args[1].value = entry->d_name.name;
- req->out.numargs = 2;
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = entry->d_name.len + 1;
+ args.in.args[1].value = entry->d_name.name;
+ args.out.numargs = 2;
if (fc->minor < 9)
- req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+ args.out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
else
- req->out.args[0].size = sizeof(outentry);
- req->out.args[0].value = &outentry;
- req->out.args[1].size = sizeof(outopen);
- req->out.args[1].value = &outopen;
- fuse_request_send(fc, req);
- err = req->out.h.error;
+ args.out.args[0].size = sizeof(outentry);
+ args.out.args[0].value = &outentry;
+ args.out.args[1].size = sizeof(outopen);
+ args.out.args[1].value = &outopen;
+ err = fuse_simple_request(fc, &args);
if (err)
goto out_free_ff;
@@ -464,7 +443,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
goto out_free_ff;
- fuse_put_request(fc, req);
ff->fh = outopen.fh;
ff->nodeid = outentry.nodeid;
ff->open_flags = outopen.open_flags;
@@ -492,8 +470,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
out_free_ff:
fuse_file_free(ff);
-out_put_request:
- fuse_put_request(fc, req);
out_put_forget_req:
kfree(forget);
out_err:
@@ -547,7 +523,7 @@ no_open:
/*
* Code shared between mknod, mkdir, symlink and link
*/
-static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
+static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
struct inode *dir, struct dentry *entry,
umode_t mode)
{
@@ -557,22 +533,18 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
struct fuse_forget_link *forget;
forget = fuse_alloc_forget();
- if (!forget) {
- fuse_put_request(fc, req);
+ if (!forget)
return -ENOMEM;
- }
memset(&outarg, 0, sizeof(outarg));
- req->in.h.nodeid = get_node_id(dir);
- req->out.numargs = 1;
+ args->in.h.nodeid = get_node_id(dir);
+ args->out.numargs = 1;
if (fc->minor < 9)
- req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+ args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
else
- req->out.args[0].size = sizeof(outarg);
- req->out.args[0].value = &outarg;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args->out.args[0].size = sizeof(outarg);
+ args->out.args[0].value = &outarg;
+ err = fuse_simple_request(fc, args);
if (err)
goto out_put_forget_req;
@@ -609,9 +581,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
{
struct fuse_mknod_in inarg;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ FUSE_ARGS(args);
if (!fc->dont_mask)
mode &= ~current_umask();
@@ -620,14 +590,14 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
inarg.mode = mode;
inarg.rdev = new_encode_dev(rdev);
inarg.umask = current_umask();
- req->in.h.opcode = FUSE_MKNOD;
- req->in.numargs = 2;
- req->in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE :
+ args.in.h.opcode = FUSE_MKNOD;
+ args.in.numargs = 2;
+ args.in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE :
sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->in.args[1].size = entry->d_name.len + 1;
- req->in.args[1].value = entry->d_name.name;
- return create_new_entry(fc, req, dir, entry, mode);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = entry->d_name.len + 1;
+ args.in.args[1].value = entry->d_name.name;
+ return create_new_entry(fc, &args, dir, entry, mode);
}
static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
@@ -640,9 +610,7 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
{
struct fuse_mkdir_in inarg;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ FUSE_ARGS(args);
if (!fc->dont_mask)
mode &= ~current_umask();
@@ -650,13 +618,13 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
memset(&inarg, 0, sizeof(inarg));
inarg.mode = mode;
inarg.umask = current_umask();
- req->in.h.opcode = FUSE_MKDIR;
- req->in.numargs = 2;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->in.args[1].size = entry->d_name.len + 1;
- req->in.args[1].value = entry->d_name.name;
- return create_new_entry(fc, req, dir, entry, S_IFDIR);
+ args.in.h.opcode = FUSE_MKDIR;
+ args.in.numargs = 2;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = entry->d_name.len + 1;
+ args.in.args[1].value = entry->d_name.name;
+ return create_new_entry(fc, &args, dir, entry, S_IFDIR);
}
static int fuse_symlink(struct inode *dir, struct dentry *entry,
@@ -664,17 +632,15 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
{
struct fuse_conn *fc = get_fuse_conn(dir);
unsigned len = strlen(link) + 1;
- struct fuse_req *req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ FUSE_ARGS(args);
- req->in.h.opcode = FUSE_SYMLINK;
- req->in.numargs = 2;
- req->in.args[0].size = entry->d_name.len + 1;
- req->in.args[0].value = entry->d_name.name;
- req->in.args[1].size = len;
- req->in.args[1].value = link;
- return create_new_entry(fc, req, dir, entry, S_IFLNK);
+ args.in.h.opcode = FUSE_SYMLINK;
+ args.in.numargs = 2;
+ args.in.args[0].size = entry->d_name.len + 1;
+ args.in.args[0].value = entry->d_name.name;
+ args.in.args[1].size = len;
+ args.in.args[1].value = link;
+ return create_new_entry(fc, &args, dir, entry, S_IFLNK);
}
static inline void fuse_update_ctime(struct inode *inode)
@@ -689,18 +655,14 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
{
int err;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- req->in.h.opcode = FUSE_UNLINK;
- req->in.h.nodeid = get_node_id(dir);
- req->in.numargs = 1;
- req->in.args[0].size = entry->d_name.len + 1;
- req->in.args[0].value = entry->d_name.name;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ FUSE_ARGS(args);
+
+ args.in.h.opcode = FUSE_UNLINK;
+ args.in.h.nodeid = get_node_id(dir);
+ args.in.numargs = 1;
+ args.in.args[0].size = entry->d_name.len + 1;
+ args.in.args[0].value = entry->d_name.name;
+ err = fuse_simple_request(fc, &args);
if (!err) {
struct inode *inode = entry->d_inode;
struct fuse_inode *fi = get_fuse_inode(inode);
@@ -729,18 +691,14 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
{
int err;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- req->in.h.opcode = FUSE_RMDIR;
- req->in.h.nodeid = get_node_id(dir);
- req->in.numargs = 1;
- req->in.args[0].size = entry->d_name.len + 1;
- req->in.args[0].value = entry->d_name.name;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ FUSE_ARGS(args);
+
+ args.in.h.opcode = FUSE_RMDIR;
+ args.in.h.nodeid = get_node_id(dir);
+ args.in.numargs = 1;
+ args.in.args[0].size = entry->d_name.len + 1;
+ args.in.args[0].value = entry->d_name.name;
+ err = fuse_simple_request(fc, &args);
if (!err) {
clear_nlink(entry->d_inode);
fuse_invalidate_attr(dir);
@@ -757,27 +715,21 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
int err;
struct fuse_rename2_in inarg;
struct fuse_conn *fc = get_fuse_conn(olddir);
- struct fuse_req *req;
-
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ FUSE_ARGS(args);
memset(&inarg, 0, argsize);
inarg.newdir = get_node_id(newdir);
inarg.flags = flags;
- req->in.h.opcode = opcode;
- req->in.h.nodeid = get_node_id(olddir);
- req->in.numargs = 3;
- req->in.args[0].size = argsize;
- req->in.args[0].value = &inarg;
- req->in.args[1].size = oldent->d_name.len + 1;
- req->in.args[1].value = oldent->d_name.name;
- req->in.args[2].size = newent->d_name.len + 1;
- req->in.args[2].value = newent->d_name.name;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.in.h.opcode = opcode;
+ args.in.h.nodeid = get_node_id(olddir);
+ args.in.numargs = 3;
+ args.in.args[0].size = argsize;
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = oldent->d_name.len + 1;
+ args.in.args[1].value = oldent->d_name.name;
+ args.in.args[2].size = newent->d_name.len + 1;
+ args.in.args[2].value = newent->d_name.name;
+ err = fuse_simple_request(fc, &args);
if (!err) {
/* ctime changes */
fuse_invalidate_attr(oldent->d_inode);
@@ -849,19 +801,17 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
struct fuse_link_in inarg;
struct inode *inode = entry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ FUSE_ARGS(args);
memset(&inarg, 0, sizeof(inarg));
inarg.oldnodeid = get_node_id(inode);
- req->in.h.opcode = FUSE_LINK;
- req->in.numargs = 2;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->in.args[1].size = newent->d_name.len + 1;
- req->in.args[1].value = newent->d_name.name;
- err = create_new_entry(fc, req, newdir, newent, inode->i_mode);
+ args.in.h.opcode = FUSE_LINK;
+ args.in.numargs = 2;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = newent->d_name.len + 1;
+ args.in.args[1].value = newent->d_name.name;
+ err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
/* Contrary to "normal" filesystems it can happen that link
makes two "logical" inodes point to the same "physical"
inode. We invalidate the attributes of the old one, so it
@@ -929,13 +879,9 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
struct fuse_getattr_in inarg;
struct fuse_attr_out outarg;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
u64 attr_version;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
attr_version = fuse_get_attr_version(fc);
memset(&inarg, 0, sizeof(inarg));
@@ -947,20 +893,18 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
inarg.getattr_flags |= FUSE_GETATTR_FH;
inarg.fh = ff->fh;
}
- req->in.h.opcode = FUSE_GETATTR;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->out.numargs = 1;
+ args.in.h.opcode = FUSE_GETATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.out.numargs = 1;
if (fc->minor < 9)
- req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+ args.out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
else
- req->out.args[0].size = sizeof(outarg);
- req->out.args[0].value = &outarg;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ err = fuse_simple_request(fc, &args);
if (!err) {
if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
make_bad_inode(inode);
@@ -1102,7 +1046,7 @@ int fuse_allow_current_process(struct fuse_conn *fc)
static int fuse_access(struct inode *inode, int mask)
{
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_access_in inarg;
int err;
@@ -1111,20 +1055,14 @@ static int fuse_access(struct inode *inode, int mask)
if (fc->no_access)
return 0;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
memset(&inarg, 0, sizeof(inarg));
inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
- req->in.h.opcode = FUSE_ACCESS;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.in.h.opcode = FUSE_ACCESS;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ err = fuse_simple_request(fc, &args);
if (err == -ENOSYS) {
fc->no_access = 1;
err = 0;
@@ -1445,31 +1383,27 @@ static char *read_link(struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req = fuse_get_req_nopages(fc);
+ FUSE_ARGS(args);
char *link;
-
- if (IS_ERR(req))
- return ERR_CAST(req);
+ ssize_t ret;
link = (char *) __get_free_page(GFP_KERNEL);
- if (!link) {
- link = ERR_PTR(-ENOMEM);
- goto out;
- }
- req->in.h.opcode = FUSE_READLINK;
- req->in.h.nodeid = get_node_id(inode);
- req->out.argvar = 1;
- req->out.numargs = 1;
- req->out.args[0].size = PAGE_SIZE - 1;
- req->out.args[0].value = link;
- fuse_request_send(fc, req);
- if (req->out.h.error) {
+ if (!link)
+ return ERR_PTR(-ENOMEM);
+
+ args.in.h.opcode = FUSE_READLINK;
+ args.in.h.nodeid = get_node_id(inode);
+ args.out.argvar = 1;
+ args.out.numargs = 1;
+ args.out.args[0].size = PAGE_SIZE - 1;
+ args.out.args[0].value = link;
+ ret = fuse_simple_request(fc, &args);
+ if (ret < 0) {
free_page((unsigned long) link);
- link = ERR_PTR(req->out.h.error);
- } else
- link[req->out.args[0].size] = '\0';
- out:
- fuse_put_request(fc, req);
+ link = ERR_PTR(ret);
+ } else {
+ link[ret] = '\0';
+ }
fuse_invalidate_atime(inode);
return link;
}
@@ -1629,22 +1563,22 @@ void fuse_release_nowrite(struct inode *inode)
spin_unlock(&fc->lock);
}
-static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req,
+static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
struct inode *inode,
struct fuse_setattr_in *inarg_p,
struct fuse_attr_out *outarg_p)
{
- req->in.h.opcode = FUSE_SETATTR;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(*inarg_p);
- req->in.args[0].value = inarg_p;
- req->out.numargs = 1;
+ args->in.h.opcode = FUSE_SETATTR;
+ args->in.h.nodeid = get_node_id(inode);
+ args->in.numargs = 1;
+ args->in.args[0].size = sizeof(*inarg_p);
+ args->in.args[0].value = inarg_p;
+ args->out.numargs = 1;
if (fc->minor < 9)
- req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+ args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
else
- req->out.args[0].size = sizeof(*outarg_p);
- req->out.args[0].value = outarg_p;
+ args->out.args[0].size = sizeof(*outarg_p);
+ args->out.args[0].value = outarg_p;
}
/*
@@ -1653,14 +1587,9 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req,
int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
{
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_setattr_in inarg;
struct fuse_attr_out outarg;
- int err;
-
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
memset(&inarg, 0, sizeof(inarg));
memset(&outarg, 0, sizeof(outarg));
@@ -1677,12 +1606,9 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
inarg.valid |= FATTR_FH;
inarg.fh = ff->fh;
}
- fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
- return err;
+ return fuse_simple_request(fc, &args);
}
/*
@@ -1698,7 +1624,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_setattr_in inarg;
struct fuse_attr_out outarg;
bool is_truncate = false;
@@ -1723,10 +1649,6 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
if (attr->ia_valid & ATTR_SIZE)
is_truncate = true;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
if (is_truncate) {
fuse_set_nowrite(inode);
set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
@@ -1747,10 +1669,8 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
inarg.valid |= FATTR_LOCKOWNER;
inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
}
- fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
+ err = fuse_simple_request(fc, &args);
if (err) {
if (err == -EINTR)
fuse_invalidate_attr(inode);
@@ -1837,32 +1757,26 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
{
struct inode *inode = entry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_setxattr_in inarg;
int err;
if (fc->no_setxattr)
return -EOPNOTSUPP;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
memset(&inarg, 0, sizeof(inarg));
inarg.size = size;
inarg.flags = flags;
- req->in.h.opcode = FUSE_SETXATTR;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 3;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->in.args[1].size = strlen(name) + 1;
- req->in.args[1].value = name;
- req->in.args[2].size = size;
- req->in.args[2].value = value;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.in.h.opcode = FUSE_SETXATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 3;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = strlen(name) + 1;
+ args.in.args[1].value = name;
+ args.in.args[2].size = size;
+ args.in.args[2].value = value;
+ err = fuse_simple_request(fc, &args);
if (err == -ENOSYS) {
fc->no_setxattr = 1;
err = -EOPNOTSUPP;
@@ -1879,7 +1793,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
{
struct inode *inode = entry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_getxattr_in inarg;
struct fuse_getxattr_out outarg;
ssize_t ret;
@@ -1887,40 +1801,32 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
if (fc->no_getxattr)
return -EOPNOTSUPP;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
memset(&inarg, 0, sizeof(inarg));
inarg.size = size;
- req->in.h.opcode = FUSE_GETXATTR;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 2;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->in.args[1].size = strlen(name) + 1;
- req->in.args[1].value = name;
+ args.in.h.opcode = FUSE_GETXATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 2;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = strlen(name) + 1;
+ args.in.args[1].value = name;
/* This is really two different operations rolled into one */
- req->out.numargs = 1;
+ args.out.numargs = 1;
if (size) {
- req->out.argvar = 1;
- req->out.args[0].size = size;
- req->out.args[0].value = value;
+ args.out.argvar = 1;
+ args.out.args[0].size = size;
+ args.out.args[0].value = value;
} else {
- req->out.args[0].size = sizeof(outarg);
- req->out.args[0].value = &outarg;
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
}
- fuse_request_send(fc, req);
- ret = req->out.h.error;
- if (!ret)
- ret = size ? req->out.args[0].size : outarg.size;
- else {
- if (ret == -ENOSYS) {
- fc->no_getxattr = 1;
- ret = -EOPNOTSUPP;
- }
+ ret = fuse_simple_request(fc, &args);
+ if (!ret && !size)
+ ret = outarg.size;
+ if (ret == -ENOSYS) {
+ fc->no_getxattr = 1;
+ ret = -EOPNOTSUPP;
}
- fuse_put_request(fc, req);
return ret;
}
@@ -1928,7 +1834,7 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
{
struct inode *inode = entry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_getxattr_in inarg;
struct fuse_getxattr_out outarg;
ssize_t ret;
@@ -1939,38 +1845,30 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
if (fc->no_listxattr)
return -EOPNOTSUPP;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
memset(&inarg, 0, sizeof(inarg));
inarg.size = size;
- req->in.h.opcode = FUSE_LISTXATTR;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
+ args.in.h.opcode = FUSE_LISTXATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
/* This is really two different operations rolled into one */
- req->out.numargs = 1;
+ args.out.numargs = 1;
if (size) {
- req->out.argvar = 1;
- req->out.args[0].size = size;
- req->out.args[0].value = list;
+ args.out.argvar = 1;
+ args.out.args[0].size = size;
+ args.out.args[0].value = list;
} else {
- req->out.args[0].size = sizeof(outarg);
- req->out.args[0].value = &outarg;
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
}
- fuse_request_send(fc, req);
- ret = req->out.h.error;
- if (!ret)
- ret = size ? req->out.args[0].size : outarg.size;
- else {
- if (ret == -ENOSYS) {
- fc->no_listxattr = 1;
- ret = -EOPNOTSUPP;
- }
+ ret = fuse_simple_request(fc, &args);
+ if (!ret && !size)
+ ret = outarg.size;
+ if (ret == -ENOSYS) {
+ fc->no_listxattr = 1;
+ ret = -EOPNOTSUPP;
}
- fuse_put_request(fc, req);
return ret;
}
@@ -1978,24 +1876,18 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
{
struct inode *inode = entry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
int err;
if (fc->no_removexattr)
return -EOPNOTSUPP;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- req->in.h.opcode = FUSE_REMOVEXATTR;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 1;
- req->in.args[0].size = strlen(name) + 1;
- req->in.args[0].value = name;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.in.h.opcode = FUSE_REMOVEXATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = strlen(name) + 1;
+ args.in.args[0].value = name;
+ err = fuse_simple_request(fc, &args);
if (err == -ENOSYS) {
fc->no_removexattr = 1;
err = -EOPNOTSUPP;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index bf50259012ab..760b2c552197 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -24,30 +24,22 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
int opcode, struct fuse_open_out *outargp)
{
struct fuse_open_in inarg;
- struct fuse_req *req;
- int err;
-
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ FUSE_ARGS(args);
memset(&inarg, 0, sizeof(inarg));
inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
if (!fc->atomic_o_trunc)
inarg.flags &= ~O_TRUNC;
- req->in.h.opcode = opcode;
- req->in.h.nodeid = nodeid;
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->out.numargs = 1;
- req->out.args[0].size = sizeof(*outargp);
- req->out.args[0].value = outargp;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.in.h.opcode = opcode;
+ args.in.h.nodeid = nodeid;
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.out.numargs = 1;
+ args.out.args[0].size = sizeof(*outargp);
+ args.out.args[0].value = outargp;
- return err;
+ return fuse_simple_request(fc, &args);
}
struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
@@ -89,37 +81,9 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff)
return ff;
}
-static void fuse_release_async(struct work_struct *work)
-{
- struct fuse_req *req;
- struct fuse_conn *fc;
- struct path path;
-
- req = container_of(work, struct fuse_req, misc.release.work);
- path = req->misc.release.path;
- fc = get_fuse_conn(path.dentry->d_inode);
-
- fuse_put_request(fc, req);
- path_put(&path);
-}
-
static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
{
- if (fc->destroy_req) {
- /*
- * If this is a fuseblk mount, then it's possible that
- * releasing the path will result in releasing the
- * super block and sending the DESTROY request. If
- * the server is single threaded, this would hang.
- * For this reason do the path_put() in a separate
- * thread.
- */
- atomic_inc(&req->count);
- INIT_WORK(&req->misc.release.work, fuse_release_async);
- schedule_work(&req->misc.release.work);
- } else {
- path_put(&req->misc.release.path);
- }
+ iput(req->misc.release.inode);
}
static void fuse_file_put(struct fuse_file *ff, bool sync)
@@ -133,12 +97,12 @@ static void fuse_file_put(struct fuse_file *ff, bool sync)
* implement 'open'
*/
req->background = 0;
- path_put(&req->misc.release.path);
+ iput(req->misc.release.inode);
fuse_put_request(ff->fc, req);
} else if (sync) {
req->background = 0;
fuse_request_send(ff->fc, req);
- path_put(&req->misc.release.path);
+ iput(req->misc.release.inode);
fuse_put_request(ff->fc, req);
} else {
req->end = fuse_release_end;
@@ -297,9 +261,8 @@ void fuse_release_common(struct file *file, int opcode)
inarg->lock_owner = fuse_lock_owner_id(ff->fc,
(fl_owner_t) file);
}
- /* Hold vfsmount and dentry until release is finished */
- path_get(&file->f_path);
- req->misc.release.path = file->f_path;
+ /* Hold inode until release is finished */
+ req->misc.release.inode = igrab(file_inode(file));
/*
* Normally this will send the RELEASE request, however if
@@ -480,7 +443,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
struct inode *inode = file->f_mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_file *ff = file->private_data;
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_fsync_in inarg;
int err;
@@ -506,23 +469,15 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
goto out;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req)) {
- err = PTR_ERR(req);
- goto out;
- }
-
memset(&inarg, 0, sizeof(inarg));
inarg.fh = ff->fh;
inarg.fsync_flags = datasync ? 1 : 0;
- req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ err = fuse_simple_request(fc, &args);
if (err == -ENOSYS) {
if (isdir)
fc->no_fsyncdir = 1;
@@ -2156,49 +2111,44 @@ static int convert_fuse_file_lock(const struct fuse_file_lock *ffl,
return 0;
}
-static void fuse_lk_fill(struct fuse_req *req, struct file *file,
+static void fuse_lk_fill(struct fuse_args *args, struct file *file,
const struct file_lock *fl, int opcode, pid_t pid,
- int flock)
+ int flock, struct fuse_lk_in *inarg)
{
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_file *ff = file->private_data;
- struct fuse_lk_in *arg = &req->misc.lk_in;
-
- arg->fh = ff->fh;
- arg->owner = fuse_lock_owner_id(fc, fl->fl_owner);
- arg->lk.start = fl->fl_start;
- arg->lk.end = fl->fl_end;
- arg->lk.type = fl->fl_type;
- arg->lk.pid = pid;
+
+ memset(inarg, 0, sizeof(*inarg));
+ inarg->fh = ff->fh;
+ inarg->owner = fuse_lock_owner_id(fc, fl->fl_owner);
+ inarg->lk.start = fl->fl_start;
+ inarg->lk.end = fl->fl_end;
+ inarg->lk.type = fl->fl_type;
+ inarg->lk.pid = pid;
if (flock)
- arg->lk_flags |= FUSE_LK_FLOCK;
- req->in.h.opcode = opcode;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(*arg);
- req->in.args[0].value = arg;
+ inarg->lk_flags |= FUSE_LK_FLOCK;
+ args->in.h.opcode = opcode;
+ args->in.h.nodeid = get_node_id(inode);
+ args->in.numargs = 1;
+ args->in.args[0].size = sizeof(*inarg);
+ args->in.args[0].value = inarg;
}
static int fuse_getlk(struct file *file, struct file_lock *fl)
{
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
+ struct fuse_lk_in inarg;
struct fuse_lk_out outarg;
int err;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- fuse_lk_fill(req, file, fl, FUSE_GETLK, 0, 0);
- req->out.numargs = 1;
- req->out.args[0].size = sizeof(outarg);
- req->out.args[0].value = &outarg;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ fuse_lk_fill(&args, file, fl, FUSE_GETLK, 0, 0, &inarg);
+ args.out.numargs = 1;
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ err = fuse_simple_request(fc, &args);
if (!err)
err = convert_fuse_file_lock(&outarg.lk, fl);
@@ -2209,7 +2159,8 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
{
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
+ struct fuse_lk_in inarg;
int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
int err;
@@ -2223,17 +2174,13 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
if (fl->fl_flags & FL_CLOSE)
return 0;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ fuse_lk_fill(&args, file, fl, opcode, pid, flock, &inarg);
+ err = fuse_simple_request(fc, &args);
- fuse_lk_fill(req, file, fl, opcode, pid, flock);
- fuse_request_send(fc, req);
- err = req->out.h.error;
/* locking is restartable */
if (err == -EINTR)
err = -ERESTARTSYS;
- fuse_put_request(fc, req);
+
return err;
}
@@ -2283,7 +2230,7 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
{
struct inode *inode = mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_bmap_in inarg;
struct fuse_bmap_out outarg;
int err;
@@ -2291,24 +2238,18 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
if (!inode->i_sb->s_bdev || fc->no_bmap)
return 0;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return 0;
-
memset(&inarg, 0, sizeof(inarg));
inarg.block = block;
inarg.blocksize = inode->i_sb->s_blocksize;
- req->in.h.opcode = FUSE_BMAP;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->out.numargs = 1;
- req->out.args[0].size = sizeof(outarg);
- req->out.args[0].value = &outarg;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.in.h.opcode = FUSE_BMAP;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.out.numargs = 1;
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ err = fuse_simple_request(fc, &args);
if (err == -ENOSYS)
fc->no_bmap = 1;
@@ -2776,7 +2717,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
struct fuse_conn *fc = ff->fc;
struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh };
struct fuse_poll_out outarg;
- struct fuse_req *req;
+ FUSE_ARGS(args);
int err;
if (fc->no_poll)
@@ -2794,21 +2735,15 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
fuse_register_polled_file(fc, ff);
}
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return POLLERR;
-
- req->in.h.opcode = FUSE_POLL;
- req->in.h.nodeid = ff->nodeid;
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->out.numargs = 1;
- req->out.args[0].size = sizeof(outarg);
- req->out.args[0].value = &outarg;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.in.h.opcode = FUSE_POLL;
+ args.in.h.nodeid = ff->nodeid;
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.out.numargs = 1;
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ err = fuse_simple_request(fc, &args);
if (!err)
return outarg.revents;
@@ -2949,10 +2884,10 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
loff_t length)
{
struct fuse_file *ff = file->private_data;
- struct inode *inode = file->f_inode;
+ struct inode *inode = file_inode(file);
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = ff->fc;
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_fallocate_in inarg = {
.fh = ff->fh,
.offset = offset,
@@ -2985,25 +2920,16 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
if (!(mode & FALLOC_FL_KEEP_SIZE))
set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req)) {
- err = PTR_ERR(req);
- goto out;
- }
-
- req->in.h.opcode = FUSE_FALLOCATE;
- req->in.h.nodeid = ff->nodeid;
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- fuse_request_send(fc, req);
- err = req->out.h.error;
+ args.in.h.opcode = FUSE_FALLOCATE;
+ args.in.h.nodeid = ff->nodeid;
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ err = fuse_simple_request(fc, &args);
if (err == -ENOSYS) {
fc->no_fallocate = 1;
err = -EOPNOTSUPP;
}
- fuse_put_request(fc, req);
-
if (err)
goto out;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e8e47a6ab518..e0fc6725d1d0 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -213,7 +213,7 @@ struct fuse_out {
unsigned numargs;
/** Array of arguments */
- struct fuse_arg args[3];
+ struct fuse_arg args[2];
};
/** FUSE page descriptor */
@@ -222,6 +222,25 @@ struct fuse_page_desc {
unsigned int offset;
};
+struct fuse_args {
+ struct {
+ struct {
+ uint32_t opcode;
+ uint64_t nodeid;
+ } h;
+ unsigned numargs;
+ struct fuse_in_arg args[3];
+
+ } in;
+ struct {
+ unsigned argvar:1;
+ unsigned numargs;
+ struct fuse_arg args[2];
+ } out;
+};
+
+#define FUSE_ARGS(args) struct fuse_args args = {}
+
/** The request state */
enum fuse_req_state {
FUSE_REQ_INIT = 0,
@@ -305,11 +324,8 @@ struct fuse_req {
/** Data for asynchronous requests */
union {
struct {
- union {
- struct fuse_release_in in;
- struct work_struct work;
- };
- struct path path;
+ struct fuse_release_in in;
+ struct inode *inode;
} release;
struct fuse_init_in init_in;
struct fuse_init_out init_out;
@@ -324,7 +340,6 @@ struct fuse_req {
struct fuse_req *next;
} write;
struct fuse_notify_retrieve_in retrieve_in;
- struct fuse_lk_in lk_in;
} misc;
/** page vector */
@@ -754,15 +769,6 @@ struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc,
void __fuse_get_request(struct fuse_req *req);
/**
- * Get a request, may fail with -ENOMEM,
- * useful for callers who doesn't use req->pages[]
- */
-static inline struct fuse_req *fuse_get_req_nopages(struct fuse_conn *fc)
-{
- return fuse_get_req(fc, 0);
-}
-
-/**
* Gets a requests for a file operation, always succeeds
*/
struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
@@ -780,6 +786,11 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req);
/**
+ * Simple request sending that does request allocation and freeing
+ */
+ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args);
+
+/**
* Send a request in the background
*/
void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req);
@@ -804,8 +815,6 @@ void fuse_invalidate_atime(struct inode *inode);
*/
struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
-void fuse_conn_kill(struct fuse_conn *fc);
-
/**
* Initialize fuse_conn
*/
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 03246cd9d47a..6749109f255d 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -376,28 +376,13 @@ static void fuse_bdi_destroy(struct fuse_conn *fc)
bdi_destroy(&fc->bdi);
}
-void fuse_conn_kill(struct fuse_conn *fc)
-{
- spin_lock(&fc->lock);
- fc->connected = 0;
- fc->blocked = 0;
- fc->initialized = 1;
- spin_unlock(&fc->lock);
- /* Flush all readers on this fs */
- kill_fasync(&fc->fasync, SIGIO, POLL_IN);
- wake_up_all(&fc->waitq);
- wake_up_all(&fc->blocked_waitq);
- wake_up_all(&fc->reserved_req_waitq);
-}
-EXPORT_SYMBOL_GPL(fuse_conn_kill);
-
static void fuse_put_super(struct super_block *sb)
{
struct fuse_conn *fc = get_fuse_conn_super(sb);
fuse_send_destroy(fc);
- fuse_conn_kill(fc);
+ fuse_abort_conn(fc);
mutex_lock(&fuse_mutex);
list_del(&fc->entry);
fuse_ctl_remove_conn(fc);
@@ -425,7 +410,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
struct fuse_conn *fc = get_fuse_conn_super(sb);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_statfs_out outarg;
int err;
@@ -434,23 +419,17 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
memset(&outarg, 0, sizeof(outarg));
- req->in.numargs = 0;
- req->in.h.opcode = FUSE_STATFS;
- req->in.h.nodeid = get_node_id(dentry->d_inode);
- req->out.numargs = 1;
- req->out.args[0].size =
+ args.in.numargs = 0;
+ args.in.h.opcode = FUSE_STATFS;
+ args.in.h.nodeid = get_node_id(dentry->d_inode);
+ args.out.numargs = 1;
+ args.out.args[0].size =
fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg);
- req->out.args[0].value = &outarg;
- fuse_request_send(fc, req);
- err = req->out.h.error;
+ args.out.args[0].value = &outarg;
+ err = fuse_simple_request(fc, &args);
if (!err)
convert_fuse_statfs(buf, &outarg.st);
- fuse_put_request(fc, req);
return err;
}
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 32602c667b4a..7892e6fddb66 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -38,21 +38,30 @@ int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1,
return hfsplus_strcmp(&k1->cat.name, &k2->cat.name);
}
-void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
- u32 parent, struct qstr *str)
+/* Generates key for catalog file/folders record. */
+int hfsplus_cat_build_key(struct super_block *sb,
+ hfsplus_btree_key *key, u32 parent, struct qstr *str)
{
- int len;
+ int len, err;
key->cat.parent = cpu_to_be32(parent);
- if (str) {
- hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
- str->name, str->len);
- len = be16_to_cpu(key->cat.name.length);
- } else {
- key->cat.name.length = 0;
- len = 0;
- }
+ err = hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
+ str->name, str->len);
+ if (unlikely(err < 0))
+ return err;
+
+ len = be16_to_cpu(key->cat.name.length);
key->key_len = cpu_to_be16(6 + 2 * len);
+ return 0;
+}
+
+/* Generates key for catalog thread record. */
+void hfsplus_cat_build_key_with_cnid(struct super_block *sb,
+ hfsplus_btree_key *key, u32 parent)
+{
+ key->cat.parent = cpu_to_be32(parent);
+ key->cat.name.length = 0;
+ key->key_len = cpu_to_be16(6);
}
static void hfsplus_cat_build_key_uni(hfsplus_btree_key *key, u32 parent,
@@ -167,11 +176,16 @@ static int hfsplus_fill_cat_thread(struct super_block *sb,
hfsplus_cat_entry *entry, int type,
u32 parentid, struct qstr *str)
{
+ int err;
+
entry->type = cpu_to_be16(type);
entry->thread.reserved = 0;
entry->thread.parentID = cpu_to_be32(parentid);
- hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
+ err = hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
str->name, str->len);
+ if (unlikely(err < 0))
+ return err;
+
return 10 + be16_to_cpu(entry->thread.nodeName.length) * 2;
}
@@ -183,7 +197,7 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid,
int err;
u16 type;
- hfsplus_cat_build_key(sb, fd->search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd->search_key, cnid);
err = hfs_brec_read(fd, &tmp, sizeof(hfsplus_cat_entry));
if (err)
return err;
@@ -250,11 +264,16 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
if (err)
return err;
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
entry_size = hfsplus_fill_cat_thread(sb, &entry,
S_ISDIR(inode->i_mode) ?
HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD,
dir->i_ino, str);
+ if (unlikely(entry_size < 0)) {
+ err = entry_size;
+ goto err2;
+ }
+
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
if (!err)
@@ -265,7 +284,10 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
if (err)
goto err2;
- hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ if (unlikely(err))
+ goto err1;
+
entry_size = hfsplus_cat_build_record(&entry, cnid, inode);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
@@ -288,7 +310,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
return 0;
err1:
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
if (!hfs_brec_find(&fd, hfs_find_rec_by_key))
hfs_brec_remove(&fd);
err2:
@@ -313,7 +335,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
if (!str) {
int len;
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -329,7 +351,9 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
off + 2, len);
fd.search_key->key_len = cpu_to_be16(6 + len);
} else
- hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ if (unlikely(err))
+ goto out;
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
@@ -360,7 +384,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
if (err)
goto out;
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -405,7 +429,11 @@ int hfsplus_rename_cat(u32 cnid,
dst_fd = src_fd;
/* find the old dir entry and read the data */
- hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
+ err = hfsplus_cat_build_key(sb, src_fd.search_key,
+ src_dir->i_ino, src_name);
+ if (unlikely(err))
+ goto out;
+
err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -419,7 +447,11 @@ int hfsplus_rename_cat(u32 cnid,
type = be16_to_cpu(entry.type);
/* create new dir entry with the data from the old entry */
- hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name);
+ err = hfsplus_cat_build_key(sb, dst_fd.search_key,
+ dst_dir->i_ino, dst_name);
+ if (unlikely(err))
+ goto out;
+
err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
if (!err)
@@ -436,7 +468,11 @@ int hfsplus_rename_cat(u32 cnid,
dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC;
/* finally remove the old entry */
- hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
+ err = hfsplus_cat_build_key(sb, src_fd.search_key,
+ src_dir->i_ino, src_name);
+ if (unlikely(err))
+ goto out;
+
err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -449,7 +485,7 @@ int hfsplus_rename_cat(u32 cnid,
src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC;
/* remove old thread entry */
- hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, src_fd.search_key, cnid);
err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -459,9 +495,14 @@ int hfsplus_rename_cat(u32 cnid,
goto out;
/* create new thread entry */
- hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, dst_fd.search_key, cnid);
entry_size = hfsplus_fill_cat_thread(sb, &entry, type,
dst_dir->i_ino, dst_name);
+ if (unlikely(entry_size < 0)) {
+ err = entry_size;
+ goto out;
+ }
+
err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
if (!err)
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 610a3260bef1..435bea231cc6 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -44,7 +44,10 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
if (err)
return ERR_PTR(err);
- hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
+ err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino,
+ &dentry->d_name);
+ if (unlikely(err < 0))
+ goto fail;
again:
err = hfs_brec_read(&fd, &entry, sizeof(entry));
if (err) {
@@ -97,9 +100,11 @@ again:
be32_to_cpu(entry.file.permissions.dev);
str.len = sprintf(name, "iNode%d", linkid);
str.name = name;
- hfsplus_cat_build_key(sb, fd.search_key,
+ err = hfsplus_cat_build_key(sb, fd.search_key,
HFSPLUS_SB(sb)->hidden_dir->i_ino,
&str);
+ if (unlikely(err < 0))
+ goto fail;
goto again;
}
} else if (!dentry->d_fsdata)
@@ -145,7 +150,7 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx)
err = -ENOMEM;
goto out;
}
- hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, inode->i_ino);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index eb5e059f481a..b0441d65fa54 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -443,8 +443,10 @@ int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *k1,
const hfsplus_btree_key *k2);
int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1,
const hfsplus_btree_key *k2);
-void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
+int hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
u32 parent, struct qstr *str);
+void hfsplus_cat_build_key_with_cnid(struct super_block *sb,
+ hfsplus_btree_key *key, u32 parent);
void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms);
int hfsplus_find_cat(struct super_block *sb, u32 cnid,
struct hfs_find_data *fd);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 4cf2024b87da..593af2fdcc2d 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -515,7 +515,9 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
err = hfs_find_init(sbi->cat_tree, &fd);
if (err)
goto out_put_root;
- hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
+ err = hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
+ if (unlikely(err < 0))
+ goto out_put_root;
if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
hfs_find_exit(&fd);
if (entry.type != cpu_to_be16(HFSPLUS_FOLDER))
diff --git a/fs/inode.c b/fs/inode.c
index ad60555b4768..aa149e7262ac 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -114,6 +114,11 @@ int proc_nr_inodes(struct ctl_table *table, int write,
}
#endif
+static int no_open(struct inode *inode, struct file *file)
+{
+ return -ENXIO;
+}
+
/**
* inode_init_always - perform inode structure intialisation
* @sb: superblock inode belongs to
@@ -125,7 +130,7 @@ int proc_nr_inodes(struct ctl_table *table, int write,
int inode_init_always(struct super_block *sb, struct inode *inode)
{
static const struct inode_operations empty_iops;
- static const struct file_operations empty_fops;
+ static const struct file_operations no_open_fops = {.open = no_open};
struct address_space *const mapping = &inode->i_data;
inode->i_sb = sb;
@@ -133,7 +138,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_flags = 0;
atomic_set(&inode->i_count, 1);
inode->i_op = &empty_iops;
- inode->i_fop = &empty_fops;
+ inode->i_fop = &no_open_fops;
inode->__i_nlink = 1;
inode->i_opflags = 0;
i_uid_write(inode, 0);
@@ -1798,7 +1803,7 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
} else if (S_ISFIFO(mode))
inode->i_fop = &pipefifo_fops;
else if (S_ISSOCK(mode))
- inode->i_fop = &bad_sock_fops;
+ ; /* leave it no_open_fops */
else
printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
" inode %s:%lu\n", mode, inode->i_sb->s_id,
diff --git a/fs/internal.h b/fs/internal.h
index 757ba2abf21e..e9a61fe67575 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -147,3 +147,8 @@ extern const struct file_operations pipefifo_fops;
*/
extern void sb_pin_kill(struct super_block *sb);
extern void mnt_pin_kill(struct mount *m);
+
+/*
+ * fs/nsfs.c
+ */
+extern struct dentry_operations ns_dentry_operations;
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 77c9a7812542..214c3c11fbc2 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -443,7 +443,7 @@ int ioctl_preallocate(struct file *filp, void __user *argp)
return -EINVAL;
}
- return do_fallocate(filp, FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len);
+ return vfs_fallocate(filp, FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len);
}
static int file_ioctl(struct file *filp, unsigned int cmd,
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index f488bbae541a..735d7522a3a9 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -30,6 +30,7 @@ struct rock_state {
int cont_size;
int cont_extent;
int cont_offset;
+ int cont_loops;
struct inode *inode;
};
@@ -73,6 +74,9 @@ static void init_rock_state(struct rock_state *rs, struct inode *inode)
rs->inode = inode;
}
+/* Maximum number of Rock Ridge continuation entries */
+#define RR_MAX_CE_ENTRIES 32
+
/*
* Returns 0 if the caller should continue scanning, 1 if the scan must end
* and -ve on error.
@@ -105,6 +109,8 @@ static int rock_continue(struct rock_state *rs)
goto out;
}
ret = -EIO;
+ if (++rs->cont_loops >= RR_MAX_CE_ENTRIES)
+ goto out;
bh = sb_bread(rs->inode->i_sb, rs->cont_extent);
if (bh) {
memcpy(rs->buffer, bh->b_data + rs->cont_offset,
@@ -356,6 +362,9 @@ repeat:
rs.cont_size = isonum_733(rr->u.CE.size);
break;
case SIG('E', 'R'):
+ /* Invalid length of ER tag id? */
+ if (rr->u.ER.len_id + offsetof(struct rock_ridge, u.ER.data) > rr->len)
+ goto out;
ISOFS_SB(inode->i_sb)->s_rock = 1;
printk(KERN_DEBUG "ISO 9660 Extensions: ");
{
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 386303dca382..dddbde4f56f4 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -224,7 +224,7 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c,
dbg_readinode("insert fragment %#04x-%#04x, ver %u at %08x\n", tn->fn->ofs, fn_end, tn->version, ref_offset(tn->fn->raw));
- /* If a node has zero dsize, we only have to keep if it if it might be the
+ /* If a node has zero dsize, we only have to keep it if it might be the
node with highest version -- i.e. the one which will end up as f->metadata.
Note that such nodes won't be REF_UNCHECKED since there are no data to
check anyway. */
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index c522d098bb4f..bc5385471a6e 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -844,6 +844,7 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
/* Write out summary information - called from jffs2_do_reserve_space */
int jffs2_sum_write_sumnode(struct jffs2_sb_info *c)
+ __must_hold(&c->erase_completion_block)
{
int datasize, infosize, padsize;
struct jffs2_eraseblock *jeb;
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 4429d6d9217f..ddc9f9612f16 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -106,7 +106,7 @@ static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
const struct kernfs_ops *ops;
/*
- * @of->mutex nests outside active ref and is just to ensure that
+ * @of->mutex nests outside active ref and is primarily to ensure that
* the ops aren't called concurrently for the same open file.
*/
mutex_lock(&of->mutex);
@@ -189,13 +189,16 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
const struct kernfs_ops *ops;
char *buf;
- buf = kmalloc(len, GFP_KERNEL);
+ buf = of->prealloc_buf;
+ if (!buf)
+ buf = kmalloc(len, GFP_KERNEL);
if (!buf)
return -ENOMEM;
/*
- * @of->mutex nests outside active ref and is just to ensure that
- * the ops aren't called concurrently for the same open file.
+ * @of->mutex nests outside active ref and is used both to ensure that
+ * the ops aren't called concurrently for the same open file, and
+ * to provide exclusive access to ->prealloc_buf (when that exists).
*/
mutex_lock(&of->mutex);
if (!kernfs_get_active(of->kn)) {
@@ -210,21 +213,22 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
else
len = -EINVAL;
- kernfs_put_active(of->kn);
- mutex_unlock(&of->mutex);
-
if (len < 0)
- goto out_free;
+ goto out_unlock;
if (copy_to_user(user_buf, buf, len)) {
len = -EFAULT;
- goto out_free;
+ goto out_unlock;
}
*ppos += len;
+ out_unlock:
+ kernfs_put_active(of->kn);
+ mutex_unlock(&of->mutex);
out_free:
- kfree(buf);
+ if (buf != of->prealloc_buf)
+ kfree(buf);
return len;
}
@@ -278,19 +282,16 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
len = min_t(size_t, count, PAGE_SIZE);
}
- buf = kmalloc(len + 1, GFP_KERNEL);
+ buf = of->prealloc_buf;
+ if (!buf)
+ buf = kmalloc(len + 1, GFP_KERNEL);
if (!buf)
return -ENOMEM;
- if (copy_from_user(buf, user_buf, len)) {
- len = -EFAULT;
- goto out_free;
- }
- buf[len] = '\0'; /* guarantee string termination */
-
/*
- * @of->mutex nests outside active ref and is just to ensure that
- * the ops aren't called concurrently for the same open file.
+ * @of->mutex nests outside active ref and is used both to ensure that
+ * the ops aren't called concurrently for the same open file, and
+ * to provide exclusive access to ->prealloc_buf (when that exists).
*/
mutex_lock(&of->mutex);
if (!kernfs_get_active(of->kn)) {
@@ -299,19 +300,27 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
goto out_free;
}
+ if (copy_from_user(buf, user_buf, len)) {
+ len = -EFAULT;
+ goto out_unlock;
+ }
+ buf[len] = '\0'; /* guarantee string termination */
+
ops = kernfs_ops(of->kn);
if (ops->write)
len = ops->write(of, buf, len, *ppos);
else
len = -EINVAL;
- kernfs_put_active(of->kn);
- mutex_unlock(&of->mutex);
-
if (len > 0)
*ppos += len;
+
+out_unlock:
+ kernfs_put_active(of->kn);
+ mutex_unlock(&of->mutex);
out_free:
- kfree(buf);
+ if (buf != of->prealloc_buf)
+ kfree(buf);
return len;
}
@@ -439,27 +448,6 @@ static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma,
return pol;
}
-static int kernfs_vma_migrate(struct vm_area_struct *vma,
- const nodemask_t *from, const nodemask_t *to,
- unsigned long flags)
-{
- struct file *file = vma->vm_file;
- struct kernfs_open_file *of = kernfs_of(file);
- int ret;
-
- if (!of->vm_ops)
- return 0;
-
- if (!kernfs_get_active(of->kn))
- return 0;
-
- ret = 0;
- if (of->vm_ops->migrate)
- ret = of->vm_ops->migrate(vma, from, to, flags);
-
- kernfs_put_active(of->kn);
- return ret;
-}
#endif
static const struct vm_operations_struct kernfs_vm_ops = {
@@ -470,7 +458,6 @@ static const struct vm_operations_struct kernfs_vm_ops = {
#ifdef CONFIG_NUMA
.set_policy = kernfs_vma_set_policy,
.get_policy = kernfs_vma_get_policy,
- .migrate = kernfs_vma_migrate,
#endif
};
@@ -685,6 +672,22 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
*/
of->atomic_write_len = ops->atomic_write_len;
+ error = -EINVAL;
+ /*
+ * ->seq_show is incompatible with ->prealloc,
+ * as seq_read does its own allocation.
+ * ->read must be used instead.
+ */
+ if (ops->prealloc && ops->seq_show)
+ goto err_free;
+ if (ops->prealloc) {
+ int len = of->atomic_write_len ?: PAGE_SIZE;
+ of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL);
+ error = -ENOMEM;
+ if (!of->prealloc_buf)
+ goto err_free;
+ }
+
/*
* Always instantiate seq_file even if read access doesn't use
* seq_file or is not requested. This unifies private data access
@@ -715,6 +718,7 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
err_close:
seq_release(inode, file);
err_free:
+ kfree(of->prealloc_buf);
kfree(of);
err_out:
kernfs_put_active(kn);
@@ -728,6 +732,7 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
kernfs_put_open_node(kn, of);
seq_release(inode, filp);
+ kfree(of->prealloc_buf);
kfree(of);
return 0;
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 9106f42c472c..1cc6ec51e6b1 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -214,7 +214,7 @@ int nsm_monitor(const struct nlm_host *host)
if (unlikely(res.status != 0))
status = -EIO;
if (unlikely(status < 0)) {
- printk(KERN_NOTICE "lockd: cannot monitor %s\n", nsm->sm_name);
+ pr_notice_ratelimited("lockd: cannot monitor %s\n", nsm->sm_name);
return status;
}
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index d1bb7ecfd201..e94c887da2d7 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -350,7 +350,7 @@ static struct svc_serv *lockd_create_svc(void)
printk(KERN_WARNING
"lockd_up: no pid, %d users??\n", nlmsvc_users);
- serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL);
+ serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, svc_rpcb_cleanup);
if (!serv) {
printk(KERN_WARNING "lockd_up: create service failed\n");
return ERR_PTR(-ENOMEM);
diff --git a/fs/mount.h b/fs/mount.h
index f82c62840905..0ad6f760ce52 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -1,10 +1,11 @@
#include <linux/mount.h>
#include <linux/seq_file.h>
#include <linux/poll.h>
+#include <linux/ns_common.h>
struct mnt_namespace {
atomic_t count;
- unsigned int proc_inum;
+ struct ns_common ns;
struct mount * root;
struct list_head list;
struct user_namespace *user_ns;
diff --git a/fs/namei.c b/fs/namei.c
index ca814165d84c..bc35b02883bb 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -487,6 +487,19 @@ void path_put(const struct path *path)
}
EXPORT_SYMBOL(path_put);
+struct nameidata {
+ struct path path;
+ struct qstr last;
+ struct path root;
+ struct inode *inode; /* path.dentry.d_inode */
+ unsigned int flags;
+ unsigned seq, m_seq;
+ int last_type;
+ unsigned depth;
+ struct file *base;
+ char *saved_names[MAX_NESTED_LINKS + 1];
+};
+
/*
* Path walking has 2 modes, rcu-walk and ref-walk (see
* Documentation/filesystems/path-lookup.txt). In situations when we can't
@@ -695,6 +708,18 @@ void nd_jump_link(struct nameidata *nd, struct path *path)
nd->flags |= LOOKUP_JUMPED;
}
+void nd_set_link(struct nameidata *nd, char *path)
+{
+ nd->saved_names[nd->depth] = path;
+}
+EXPORT_SYMBOL(nd_set_link);
+
+char *nd_get_link(struct nameidata *nd)
+{
+ return nd->saved_names[nd->depth];
+}
+EXPORT_SYMBOL(nd_get_link);
+
static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
{
struct inode *inode = link->dentry->d_inode;
@@ -1821,13 +1846,14 @@ static int link_path_walk(const char *name, struct nameidata *nd)
}
static int path_init(int dfd, const char *name, unsigned int flags,
- struct nameidata *nd, struct file **fp)
+ struct nameidata *nd)
{
int retval = 0;
nd->last_type = LAST_ROOT; /* if there are only slashes... */
- nd->flags = flags | LOOKUP_JUMPED;
+ nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
nd->depth = 0;
+ nd->base = NULL;
if (flags & LOOKUP_ROOT) {
struct dentry *root = nd->root.dentry;
struct inode *inode = root->d_inode;
@@ -1847,7 +1873,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
} else {
path_get(&nd->path);
}
- return 0;
+ goto done;
}
nd->root.mnt = NULL;
@@ -1897,7 +1923,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
nd->path = f.file->f_path;
if (flags & LOOKUP_RCU) {
if (f.flags & FDPUT_FPUT)
- *fp = f.file;
+ nd->base = f.file;
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
rcu_read_lock();
} else {
@@ -1908,13 +1934,26 @@ static int path_init(int dfd, const char *name, unsigned int flags,
nd->inode = nd->path.dentry->d_inode;
if (!(flags & LOOKUP_RCU))
- return 0;
+ goto done;
if (likely(!read_seqcount_retry(&nd->path.dentry->d_seq, nd->seq)))
- return 0;
+ goto done;
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
rcu_read_unlock();
return -ECHILD;
+done:
+ current->total_link_count = 0;
+ return link_path_walk(name, nd);
+}
+
+static void path_cleanup(struct nameidata *nd)
+{
+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+ path_put(&nd->root);
+ nd->root.mnt = NULL;
+ }
+ if (unlikely(nd->base))
+ fput(nd->base);
}
static inline int lookup_last(struct nameidata *nd, struct path *path)
@@ -1930,7 +1969,6 @@ static inline int lookup_last(struct nameidata *nd, struct path *path)
static int path_lookupat(int dfd, const char *name,
unsigned int flags, struct nameidata *nd)
{
- struct file *base = NULL;
struct path path;
int err;
@@ -1948,14 +1986,7 @@ static int path_lookupat(int dfd, const char *name,
* be handled by restarting a traditional ref-walk (which will always
* be able to complete).
*/
- err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
-
- if (unlikely(err))
- goto out;
-
- current->total_link_count = 0;
- err = link_path_walk(name, nd);
-
+ err = path_init(dfd, name, flags, nd);
if (!err && !(flags & LOOKUP_PARENT)) {
err = lookup_last(nd, &path);
while (err > 0) {
@@ -1983,14 +2014,7 @@ static int path_lookupat(int dfd, const char *name,
}
}
-out:
- if (base)
- fput(base);
-
- if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
- path_put(&nd->root);
- nd->root.mnt = NULL;
- }
+ path_cleanup(nd);
return err;
}
@@ -2297,19 +2321,13 @@ out:
static int
path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags)
{
- struct file *base = NULL;
struct nameidata nd;
int err;
- err = path_init(dfd, name, flags | LOOKUP_PARENT, &nd, &base);
+ err = path_init(dfd, name, flags, &nd);
if (unlikely(err))
goto out;
- current->total_link_count = 0;
- err = link_path_walk(name, &nd);
- if (err)
- goto out;
-
err = mountpoint_last(&nd, path);
while (err > 0) {
void *cookie;
@@ -2325,12 +2343,7 @@ path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags
put_link(&nd, &link, cookie);
}
out:
- if (base)
- fput(base);
-
- if (nd.root.mnt && !(nd.flags & LOOKUP_ROOT))
- path_put(&nd.root);
-
+ path_cleanup(&nd);
return err;
}
@@ -3181,7 +3194,6 @@ out:
static struct file *path_openat(int dfd, struct filename *pathname,
struct nameidata *nd, const struct open_flags *op, int flags)
{
- struct file *base = NULL;
struct file *file;
struct path path;
int opened = 0;
@@ -3198,12 +3210,7 @@ static struct file *path_openat(int dfd, struct filename *pathname,
goto out;
}
- error = path_init(dfd, pathname->name, flags | LOOKUP_PARENT, nd, &base);
- if (unlikely(error))
- goto out;
-
- current->total_link_count = 0;
- error = link_path_walk(pathname->name, nd);
+ error = path_init(dfd, pathname->name, flags, nd);
if (unlikely(error))
goto out;
@@ -3229,10 +3236,7 @@ static struct file *path_openat(int dfd, struct filename *pathname,
put_link(nd, &link, cookie);
}
out:
- if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
- path_put(&nd->root);
- if (base)
- fput(base);
+ path_cleanup(nd);
if (!(opened & FILE_OPENED)) {
BUG_ON(!error);
put_filp(file);
diff --git a/fs/namespace.c b/fs/namespace.c
index 5b66b2b3624d..cd1e9681a0cf 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -963,7 +963,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
}
/* Don't allow unprivileged users to reveal what is under a mount */
- if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire))
+ if ((flag & CL_UNPRIVILEGED) &&
+ (!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire)))
mnt->mnt.mnt_flags |= MNT_LOCKED;
atomic_inc(&sb->s_active);
@@ -1369,6 +1370,8 @@ void umount_tree(struct mount *mnt, int how)
}
if (last) {
last->mnt_hash.next = unmounted.first;
+ if (unmounted.first)
+ unmounted.first->pprev = &last->mnt_hash.next;
unmounted.first = tmp_list.first;
unmounted.first->pprev = &unmounted.first;
}
@@ -1544,6 +1547,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
goto dput_and_out;
if (mnt->mnt.mnt_flags & MNT_LOCKED)
goto dput_and_out;
+ retval = -EPERM;
+ if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
+ goto dput_and_out;
retval = do_umount(mnt, flags);
dput_and_out:
@@ -1569,17 +1575,13 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
static bool is_mnt_ns_file(struct dentry *dentry)
{
/* Is this a proxy for a mount namespace? */
- struct inode *inode = dentry->d_inode;
- struct proc_ns *ei;
-
- if (!proc_ns_inode(inode))
- return false;
-
- ei = get_proc_ns(inode);
- if (ei->ns_ops != &mntns_operations)
- return false;
+ return dentry->d_op == &ns_dentry_operations &&
+ dentry->d_fsdata == &mntns_operations;
+}
- return true;
+struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct mnt_namespace, ns);
}
static bool mnt_ns_loop(struct dentry *dentry)
@@ -1591,7 +1593,7 @@ static bool mnt_ns_loop(struct dentry *dentry)
if (!is_mnt_ns_file(dentry))
return false;
- mnt_ns = get_proc_ns(dentry->d_inode)->ns;
+ mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
}
@@ -1610,7 +1612,6 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
if (IS_ERR(q))
return q;
- q->mnt.mnt_flags &= ~MNT_LOCKED;
q->mnt_mountpoint = mnt->mnt_mountpoint;
p = mnt;
@@ -2020,7 +2021,10 @@ static int do_loopback(struct path *path, const char *old_name,
if (IS_MNT_UNBINDABLE(old))
goto out2;
- if (!check_mnt(parent) || !check_mnt(old))
+ if (!check_mnt(parent))
+ goto out2;
+
+ if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations)
goto out2;
if (!recurse && has_locked_children(old, old_path.dentry))
@@ -2098,7 +2102,13 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
}
if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
!(mnt_flags & MNT_NODEV)) {
- return -EPERM;
+ /* Was the nodev implicitly added in mount? */
+ if ((mnt->mnt_ns->user_ns != &init_user_ns) &&
+ !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) {
+ mnt_flags |= MNT_NODEV;
+ } else {
+ return -EPERM;
+ }
}
if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
!(mnt_flags & MNT_NOSUID)) {
@@ -2640,7 +2650,7 @@ dput_out:
static void free_mnt_ns(struct mnt_namespace *ns)
{
- proc_free_inum(ns->proc_inum);
+ ns_free_inum(&ns->ns);
put_user_ns(ns->user_ns);
kfree(ns);
}
@@ -2662,11 +2672,12 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
if (!new_ns)
return ERR_PTR(-ENOMEM);
- ret = proc_alloc_inum(&new_ns->proc_inum);
+ ret = ns_alloc_inum(&new_ns->ns);
if (ret) {
kfree(new_ns);
return ERR_PTR(ret);
}
+ new_ns->ns.ops = &mntns_operations;
new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
atomic_set(&new_ns->count, 1);
new_ns->root = NULL;
@@ -2958,6 +2969,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
/* mount new_root on / */
attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
touch_mnt_namespace(current->nsproxy->mnt_ns);
+ /* A moved mount should not expire automatically */
+ list_del_init(&new_mnt->mnt_expire);
unlock_mount_hash();
chroot_fs_refs(&root, &new);
put_mountpoint(root_mp);
@@ -3002,6 +3015,7 @@ static void __init init_mount_tree(void)
root.mnt = mnt;
root.dentry = mnt->mnt_root;
+ mnt->mnt_flags |= MNT_LOCKED;
set_fs_pwd(current->fs, &root);
set_fs_root(current->fs, &root);
@@ -3144,31 +3158,31 @@ found:
return visible;
}
-static void *mntns_get(struct task_struct *task)
+static struct ns_common *mntns_get(struct task_struct *task)
{
- struct mnt_namespace *ns = NULL;
+ struct ns_common *ns = NULL;
struct nsproxy *nsproxy;
task_lock(task);
nsproxy = task->nsproxy;
if (nsproxy) {
- ns = nsproxy->mnt_ns;
- get_mnt_ns(ns);
+ ns = &nsproxy->mnt_ns->ns;
+ get_mnt_ns(to_mnt_ns(ns));
}
task_unlock(task);
return ns;
}
-static void mntns_put(void *ns)
+static void mntns_put(struct ns_common *ns)
{
- put_mnt_ns(ns);
+ put_mnt_ns(to_mnt_ns(ns));
}
-static int mntns_install(struct nsproxy *nsproxy, void *ns)
+static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
{
struct fs_struct *fs = current->fs;
- struct mnt_namespace *mnt_ns = ns;
+ struct mnt_namespace *mnt_ns = to_mnt_ns(ns);
struct path root;
if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
@@ -3198,17 +3212,10 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns)
return 0;
}
-static unsigned int mntns_inum(void *ns)
-{
- struct mnt_namespace *mnt_ns = ns;
- return mnt_ns->proc_inum;
-}
-
const struct proc_ns_operations mntns_operations = {
.name = "mnt",
.type = CLONE_NEWNS,
.get = mntns_get,
.put = mntns_put,
.install = mntns_install,
- .inum = mntns_inum,
};
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 0beb023f25ac..ac71d13c69ef 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -33,6 +33,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/file.h>
+#include <linux/falloc.h>
#include <linux/slab.h>
#include "idmap.h"
@@ -772,7 +773,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* the client wants us to do more in this compound:
*/
if (!nfsd4_last_compound_op(rqstp))
- rqstp->rq_splice_ok = false;
+ clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* check stateid */
if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
@@ -1014,6 +1015,44 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
}
static __be32
+nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_fallocate *fallocate, int flags)
+{
+ __be32 status = nfserr_notsupp;
+ struct file *file;
+
+ status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
+ &fallocate->falloc_stateid,
+ WR_STATE, &file);
+ if (status != nfs_ok) {
+ dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
+ return status;
+ }
+
+ status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file,
+ fallocate->falloc_offset,
+ fallocate->falloc_length,
+ flags);
+ fput(file);
+ return status;
+}
+
+static __be32
+nfsd4_allocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_fallocate *fallocate)
+{
+ return nfsd4_fallocate(rqstp, cstate, fallocate, 0);
+}
+
+static __be32
+nfsd4_deallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_fallocate *fallocate)
+{
+ return nfsd4_fallocate(rqstp, cstate, fallocate,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE);
+}
+
+static __be32
nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_seek *seek)
{
@@ -1331,7 +1370,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
* Don't use the deferral mechanism for NFSv4; compounds make it
* too hard to avoid non-idempotency problems.
*/
- rqstp->rq_usedeferral = false;
+ clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
/*
* According to RFC3010, this takes precedence over all other errors.
@@ -1447,7 +1486,7 @@ encode_op:
BUG_ON(cstate->replay_owner);
out:
/* Reset deferral mechanism for RPC deferrals */
- rqstp->rq_usedeferral = true;
+ set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
dprintk("nfsv4 compound returned %d\n", ntohl(status));
return status;
}
@@ -1929,6 +1968,18 @@ static struct nfsd4_operation nfsd4_ops[] = {
},
/* NFSv4.2 operations */
+ [OP_ALLOCATE] = {
+ .op_func = (nfsd4op_func)nfsd4_allocate,
+ .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_name = "OP_ALLOCATE",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize,
+ },
+ [OP_DEALLOCATE] = {
+ .op_func = (nfsd4op_func)nfsd4_deallocate,
+ .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_name = "OP_DEALLOCATE",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize,
+ },
[OP_SEEK] = {
.op_func = (nfsd4op_func)nfsd4_seek,
.op_name = "OP_SEEK",
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4e1d7268b004..3550a9c87616 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -275,9 +275,11 @@ opaque_hashval(const void *ptr, int nbytes)
return x;
}
-static void nfsd4_free_file(struct nfs4_file *f)
+static void nfsd4_free_file_rcu(struct rcu_head *rcu)
{
- kmem_cache_free(file_slab, f);
+ struct nfs4_file *fp = container_of(rcu, struct nfs4_file, fi_rcu);
+
+ kmem_cache_free(file_slab, fp);
}
static inline void
@@ -286,9 +288,10 @@ put_nfs4_file(struct nfs4_file *fi)
might_lock(&state_lock);
if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) {
- hlist_del(&fi->fi_hash);
+ hlist_del_rcu(&fi->fi_hash);
spin_unlock(&state_lock);
- nfsd4_free_file(fi);
+ WARN_ON_ONCE(!list_empty(&fi->fi_delegations));
+ call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu);
}
}
@@ -1440,7 +1443,7 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
list_add(&new->se_perclnt, &clp->cl_sessions);
spin_unlock(&clp->cl_lock);
- if (cses->flags & SESSION4_BACK_CHAN) {
+ {
struct sockaddr *sa = svc_addr(rqstp);
/*
* This is a little silly; with sessions there's no real
@@ -1711,15 +1714,14 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source)
return 0;
}
-static long long
+static int
compare_blob(const struct xdr_netobj *o1, const struct xdr_netobj *o2)
{
- long long res;
-
- res = o1->len - o2->len;
- if (res)
- return res;
- return (long long)memcmp(o1->data, o2->data, o1->len);
+ if (o1->len < o2->len)
+ return -1;
+ if (o1->len > o2->len)
+ return 1;
+ return memcmp(o1->data, o2->data, o1->len);
}
static int same_name(const char *n1, const char *n2)
@@ -1907,7 +1909,7 @@ add_clp_to_name_tree(struct nfs4_client *new_clp, struct rb_root *root)
static struct nfs4_client *
find_clp_in_name_tree(struct xdr_netobj *name, struct rb_root *root)
{
- long long cmp;
+ int cmp;
struct rb_node *node = root->rb_node;
struct nfs4_client *clp;
@@ -3057,10 +3059,9 @@ static struct nfs4_file *nfsd4_alloc_file(void)
}
/* OPEN Share state helper functions */
-static void nfsd4_init_file(struct nfs4_file *fp, struct knfsd_fh *fh)
+static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
+ struct nfs4_file *fp)
{
- unsigned int hashval = file_hashval(fh);
-
lockdep_assert_held(&state_lock);
atomic_set(&fp->fi_ref, 1);
@@ -3073,7 +3074,7 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct knfsd_fh *fh)
fp->fi_share_deny = 0;
memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
memset(fp->fi_access, 0, sizeof(fp->fi_access));
- hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]);
+ hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]);
}
void
@@ -3294,17 +3295,14 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
/* search file_hashtbl[] for file */
static struct nfs4_file *
-find_file_locked(struct knfsd_fh *fh)
+find_file_locked(struct knfsd_fh *fh, unsigned int hashval)
{
- unsigned int hashval = file_hashval(fh);
struct nfs4_file *fp;
- lockdep_assert_held(&state_lock);
-
- hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
+ hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) {
if (nfsd_fh_match(&fp->fi_fhandle, fh)) {
- get_nfs4_file(fp);
- return fp;
+ if (atomic_inc_not_zero(&fp->fi_ref))
+ return fp;
}
}
return NULL;
@@ -3314,10 +3312,11 @@ static struct nfs4_file *
find_file(struct knfsd_fh *fh)
{
struct nfs4_file *fp;
+ unsigned int hashval = file_hashval(fh);
- spin_lock(&state_lock);
- fp = find_file_locked(fh);
- spin_unlock(&state_lock);
+ rcu_read_lock();
+ fp = find_file_locked(fh, hashval);
+ rcu_read_unlock();
return fp;
}
@@ -3325,11 +3324,18 @@ static struct nfs4_file *
find_or_add_file(struct nfs4_file *new, struct knfsd_fh *fh)
{
struct nfs4_file *fp;
+ unsigned int hashval = file_hashval(fh);
+
+ rcu_read_lock();
+ fp = find_file_locked(fh, hashval);
+ rcu_read_unlock();
+ if (fp)
+ return fp;
spin_lock(&state_lock);
- fp = find_file_locked(fh);
- if (fp == NULL) {
- nfsd4_init_file(new, fh);
+ fp = find_file_locked(fh, hashval);
+ if (likely(fp == NULL)) {
+ nfsd4_init_file(fh, hashval, new);
fp = new;
}
spin_unlock(&state_lock);
@@ -4127,7 +4133,7 @@ void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
nfs4_put_stateowner(so);
}
if (open->op_file)
- nfsd4_free_file(open->op_file);
+ kmem_cache_free(file_slab, open->op_file);
if (open->op_stp)
nfs4_put_stid(&open->op_stp->st_stid);
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b1eed4dd2eab..15f7b73e0c0f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1514,6 +1514,23 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str
}
static __be32
+nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
+ struct nfsd4_fallocate *fallocate)
+{
+ DECODE_HEAD;
+
+ status = nfsd4_decode_stateid(argp, &fallocate->falloc_stateid);
+ if (status)
+ return status;
+
+ READ_BUF(16);
+ p = xdr_decode_hyper(p, &fallocate->falloc_offset);
+ xdr_decode_hyper(p, &fallocate->falloc_length);
+
+ DECODE_TAIL;
+}
+
+static __be32
nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
{
DECODE_HEAD;
@@ -1604,10 +1621,10 @@ static nfsd4_dec nfsd4_dec_ops[] = {
[OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete,
/* new operations for NFSv4.2 */
- [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
[OP_COPY] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
[OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp,
@@ -1714,7 +1731,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
- argp->rqstp->rq_splice_ok = false;
+ clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
DECODE_TAIL;
}
@@ -1795,9 +1812,12 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep,
}
else
end++;
+ if (found_esc)
+ end = next;
+
str = end;
}
- pathlen = htonl(xdr->buf->len - pathlen_offset);
+ pathlen = htonl(count);
write_bytes_to_xdr_buf(xdr->buf, pathlen_offset, &pathlen, 4);
return 0;
}
@@ -3236,10 +3256,10 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
if (!p) {
- WARN_ON_ONCE(resp->rqstp->rq_splice_ok);
+ WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags));
return nfserr_resource;
}
- if (resp->xdr.buf->page_len && resp->rqstp->rq_splice_ok) {
+ if (resp->xdr.buf->page_len && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) {
WARN_ON_ONCE(1);
return nfserr_resource;
}
@@ -3256,7 +3276,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
goto err_truncate;
}
- if (file->f_op->splice_read && resp->rqstp->rq_splice_ok)
+ if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
err = nfsd4_encode_splice_read(resp, read, file, maxcount);
else
err = nfsd4_encode_readv(resp, read, file, maxcount);
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 122f69185ef5..83a9694ec485 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -490,7 +490,7 @@ found_entry:
/* From the hall of fame of impractical attacks:
* Is this a user who tries to snoop on the cache? */
rtn = RC_DOIT;
- if (!rqstp->rq_secure && rp->c_secure)
+ if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure)
goto out;
/* Compose RPC reply header */
@@ -579,7 +579,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
spin_lock(&b->cache_lock);
drc_mem_usage += bufsize;
lru_put_end(b, rp);
- rp->c_secure = rqstp->rq_secure;
+ rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags);
rp->c_type = cachetype;
rp->c_state = RC_DONE;
spin_unlock(&b->cache_lock);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 9506ea565610..19ace74d35f6 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -608,7 +608,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
num);
sep = " ";
- if (len > remaining)
+ if (len >= remaining)
break;
remaining -= len;
buf += len;
@@ -623,7 +623,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
'+' : '-',
minor);
- if (len > remaining)
+ if (len >= remaining)
break;
remaining -= len;
buf += len;
@@ -631,7 +631,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
}
len = snprintf(buf, remaining, "\n");
- if (len > remaining)
+ if (len >= remaining)
return -EINVAL;
return tlen + len;
}
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 88026fc6a981..965b478d50fc 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -86,7 +86,7 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
int flags = nfsexp_flags(rqstp, exp);
/* Check if the request originated from a secure port. */
- if (!rqstp->rq_secure && !(flags & NFSEXP_INSECURE_PORT)) {
+ if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && !(flags & NFSEXP_INSECURE_PORT)) {
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
dprintk("nfsd: request from insecure port %s!\n",
svc_print_addr(rqstp, buf, sizeof(buf)));
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 752d56bbe0ba..314f5c8f8f1a 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -692,7 +692,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
/* Now call the procedure handler, and encode NFS status. */
nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
nfserr = map_new_errors(rqstp->rq_vers, nfserr);
- if (nfserr == nfserr_dropit || rqstp->rq_dropme) {
+ if (nfserr == nfserr_dropit || test_bit(RQ_DROPME, &rqstp->rq_flags)) {
dprintk("nfsd: Dropping request; may be revisited later\n");
nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
return 0;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 2712042a66b1..9d3be371240a 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -463,17 +463,24 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so)
/*
* nfs4_file: a file opened by some number of (open) nfs4_stateowners.
*
- * These objects are global. nfsd only keeps one instance of a nfs4_file per
- * inode (though it may keep multiple file descriptors open per inode). These
- * are tracked in the file_hashtbl which is protected by the state_lock
- * spinlock.
+ * These objects are global. nfsd keeps one instance of a nfs4_file per
+ * filehandle (though it may keep multiple file descriptors for each). Each
+ * inode can have multiple filehandles associated with it, so there is
+ * (potentially) a many to one relationship between this struct and struct
+ * inode.
+ *
+ * These are hashed by filehandle in the file_hashtbl, which is protected by
+ * the global state_lock spinlock.
*/
struct nfs4_file {
atomic_t fi_ref;
spinlock_t fi_lock;
- struct hlist_node fi_hash; /* hash by "struct inode *" */
+ struct hlist_node fi_hash; /* hash on fi_fhandle */
struct list_head fi_stateids;
- struct list_head fi_delegations;
+ union {
+ struct list_head fi_delegations;
+ struct rcu_head fi_rcu;
+ };
/* One each for O_RDONLY, O_WRONLY, O_RDWR: */
struct file * fi_fds[3];
/*
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 0a82e3c033ee..5685c679dd93 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -16,6 +16,7 @@
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/splice.h>
+#include <linux/falloc.h>
#include <linux/fcntl.h>
#include <linux/namei.h>
#include <linux/delay.h>
@@ -533,6 +534,26 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
#endif
+__be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct file *file, loff_t offset, loff_t len,
+ int flags)
+{
+ __be32 err;
+ int error;
+
+ if (!S_ISREG(file_inode(file)->i_mode))
+ return nfserr_inval;
+
+ err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, NFSD_MAY_WRITE);
+ if (err)
+ return err;
+
+ error = vfs_fallocate(file, flags, offset, len);
+ if (!error)
+ error = commit_metadata(fhp);
+
+ return nfserrno(error);
+}
#endif /* defined(CONFIG_NFSD_V4) */
#ifdef CONFIG_NFSD_V3
@@ -881,7 +902,7 @@ static __be32
nfsd_vfs_read(struct svc_rqst *rqstp, struct file *file,
loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
{
- if (file->f_op->splice_read && rqstp->rq_splice_ok)
+ if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
return nfsd_splice_read(rqstp, file, offset, count);
else
return nfsd_readv(file, offset, vec, vlen, count);
@@ -937,9 +958,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
int stable = *stablep;
int use_wgather;
loff_t pos = offset;
+ loff_t end = LLONG_MAX;
unsigned int pflags = current->flags;
- if (rqstp->rq_local)
+ if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
/*
* We want less throttling in balance_dirty_pages()
* and shrink_inactive_list() so that nfs to
@@ -967,10 +989,13 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
fsnotify_modify(file);
if (stable) {
- if (use_wgather)
+ if (use_wgather) {
host_err = wait_for_concurrent_writes(file);
- else
- host_err = vfs_fsync_range(file, offset, offset+*cnt, 0);
+ } else {
+ if (*cnt)
+ end = offset + *cnt - 1;
+ host_err = vfs_fsync_range(file, offset, end, 0);
+ }
}
out_nfserr:
@@ -979,7 +1004,7 @@ out_nfserr:
err = 0;
else
err = nfserrno(host_err);
- if (rqstp->rq_local)
+ if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
tsk_restore_flags(current, pflags, PF_LESS_THROTTLE);
return err;
}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index b1796d6ee538..2050cb016998 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -54,6 +54,8 @@ int nfsd_mountpoint(struct dentry *, struct svc_export *);
#ifdef CONFIG_NFSD_V4
__be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,
struct xdr_netobj *);
+__be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
+ struct file *, loff_t, loff_t, int);
#endif /* CONFIG_NFSD_V4 */
__be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 5720e9457f33..90a5925bd6ab 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -428,6 +428,13 @@ struct nfsd4_reclaim_complete {
u32 rca_one_fs;
};
+struct nfsd4_fallocate {
+ /* request */
+ stateid_t falloc_stateid;
+ loff_t falloc_offset;
+ u64 falloc_length;
+};
+
struct nfsd4_seek {
/* request */
stateid_t seek_stateid;
@@ -486,6 +493,8 @@ struct nfsd4_op {
struct nfsd4_free_stateid free_stateid;
/* NFSv4.2 */
+ struct nfsd4_fallocate allocate;
+ struct nfsd4_fallocate deallocate;
struct nfsd4_seek seek;
} u;
struct nfs4_replay * replay;
diff --git a/fs/nsfs.c b/fs/nsfs.c
new file mode 100644
index 000000000000..af1b24fa899d
--- /dev/null
+++ b/fs/nsfs.c
@@ -0,0 +1,161 @@
+#include <linux/mount.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/proc_ns.h>
+#include <linux/magic.h>
+#include <linux/ktime.h>
+
+static struct vfsmount *nsfs_mnt;
+
+static const struct file_operations ns_file_operations = {
+ .llseek = no_llseek,
+};
+
+static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+ struct inode *inode = dentry->d_inode;
+ const struct proc_ns_operations *ns_ops = dentry->d_fsdata;
+
+ return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
+ ns_ops->name, inode->i_ino);
+}
+
+static void ns_prune_dentry(struct dentry *dentry)
+{
+ struct inode *inode = dentry->d_inode;
+ if (inode) {
+ struct ns_common *ns = inode->i_private;
+ atomic_long_set(&ns->stashed, 0);
+ }
+}
+
+const struct dentry_operations ns_dentry_operations =
+{
+ .d_prune = ns_prune_dentry,
+ .d_delete = always_delete_dentry,
+ .d_dname = ns_dname,
+};
+
+static void nsfs_evict(struct inode *inode)
+{
+ struct ns_common *ns = inode->i_private;
+ clear_inode(inode);
+ ns->ops->put(ns);
+}
+
+void *ns_get_path(struct path *path, struct task_struct *task,
+ const struct proc_ns_operations *ns_ops)
+{
+ struct vfsmount *mnt = mntget(nsfs_mnt);
+ struct qstr qname = { .name = "", };
+ struct dentry *dentry;
+ struct inode *inode;
+ struct ns_common *ns;
+ unsigned long d;
+
+again:
+ ns = ns_ops->get(task);
+ if (!ns) {
+ mntput(mnt);
+ return ERR_PTR(-ENOENT);
+ }
+ rcu_read_lock();
+ d = atomic_long_read(&ns->stashed);
+ if (!d)
+ goto slow;
+ dentry = (struct dentry *)d;
+ if (!lockref_get_not_dead(&dentry->d_lockref))
+ goto slow;
+ rcu_read_unlock();
+ ns_ops->put(ns);
+got_it:
+ path->mnt = mnt;
+ path->dentry = dentry;
+ return NULL;
+slow:
+ rcu_read_unlock();
+ inode = new_inode_pseudo(mnt->mnt_sb);
+ if (!inode) {
+ ns_ops->put(ns);
+ mntput(mnt);
+ return ERR_PTR(-ENOMEM);
+ }
+ inode->i_ino = ns->inum;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ inode->i_flags |= S_IMMUTABLE;
+ inode->i_mode = S_IFREG | S_IRUGO;
+ inode->i_fop = &ns_file_operations;
+ inode->i_private = ns;
+
+ dentry = d_alloc_pseudo(mnt->mnt_sb, &qname);
+ if (!dentry) {
+ iput(inode);
+ mntput(mnt);
+ return ERR_PTR(-ENOMEM);
+ }
+ d_instantiate(dentry, inode);
+ dentry->d_fsdata = (void *)ns_ops;
+ d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
+ if (d) {
+ d_delete(dentry); /* make sure ->d_prune() does nothing */
+ dput(dentry);
+ cpu_relax();
+ goto again;
+ }
+ goto got_it;
+}
+
+int ns_get_name(char *buf, size_t size, struct task_struct *task,
+ const struct proc_ns_operations *ns_ops)
+{
+ struct ns_common *ns;
+ int res = -ENOENT;
+ ns = ns_ops->get(task);
+ if (ns) {
+ res = snprintf(buf, size, "%s:[%u]", ns_ops->name, ns->inum);
+ ns_ops->put(ns);
+ }
+ return res;
+}
+
+struct file *proc_ns_fget(int fd)
+{
+ struct file *file;
+
+ file = fget(fd);
+ if (!file)
+ return ERR_PTR(-EBADF);
+
+ if (file->f_op != &ns_file_operations)
+ goto out_invalid;
+
+ return file;
+
+out_invalid:
+ fput(file);
+ return ERR_PTR(-EINVAL);
+}
+
+static const struct super_operations nsfs_ops = {
+ .statfs = simple_statfs,
+ .evict_inode = nsfs_evict,
+};
+static struct dentry *nsfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
+{
+ return mount_pseudo(fs_type, "nsfs:", &nsfs_ops,
+ &ns_dentry_operations, NSFS_MAGIC);
+}
+static struct file_system_type nsfs = {
+ .name = "nsfs",
+ .mount = nsfs_mount,
+ .kill_sb = kill_anon_super,
+};
+
+void __init nsfs_init(void)
+{
+ nsfs_mnt = kern_mount(&nsfs);
+ if (IS_ERR(nsfs_mnt))
+ panic("can't set nsfs up\n");
+ nsfs_mnt->mnt_sb->s_flags &= ~MS_NOUSER;
+}
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index a93bf9892256..fcae9ef1a328 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5662,7 +5662,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 cpos, u32 phys_cpos, u32 len, int flags,
struct ocfs2_cached_dealloc_ctxt *dealloc,
- u64 refcount_loc)
+ u64 refcount_loc, bool refcount_tree_locked)
{
int ret, credits = 0, extra_blocks = 0;
u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
@@ -5676,11 +5676,13 @@ int ocfs2_remove_btree_range(struct inode *inode,
BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
OCFS2_HAS_REFCOUNT_FL));
- ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
- &ref_tree, NULL);
- if (ret) {
- mlog_errno(ret);
- goto bail;
+ if (!refcount_tree_locked) {
+ ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
+ &ref_tree, NULL);
+ if (ret) {
+ mlog_errno(ret);
+ goto bail;
+ }
}
ret = ocfs2_prepare_refcount_change_for_del(inode,
@@ -7021,6 +7023,7 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
u64 refcount_loc = le64_to_cpu(di->i_refcount_loc);
struct ocfs2_extent_tree et;
struct ocfs2_cached_dealloc_ctxt dealloc;
+ struct ocfs2_refcount_tree *ref_tree = NULL;
ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
ocfs2_init_dealloc_ctxt(&dealloc);
@@ -7130,9 +7133,18 @@ start:
phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
+ if ((flags & OCFS2_EXT_REFCOUNTED) && trunc_len && !ref_tree) {
+ status = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
+ &ref_tree, NULL);
+ if (status) {
+ mlog_errno(status);
+ goto bail;
+ }
+ }
+
status = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
phys_cpos, trunc_len, flags, &dealloc,
- refcount_loc);
+ refcount_loc, true);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -7147,6 +7159,8 @@ start:
goto start;
bail:
+ if (ref_tree)
+ ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
ocfs2_schedule_truncate_log_flush(osb, 1);
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index ca381c584127..fb09b97db162 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -142,7 +142,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 cpos, u32 phys_cpos, u32 len, int flags,
struct ocfs2_cached_dealloc_ctxt *dealloc,
- u64 refcount_loc);
+ u64 refcount_loc, bool refcount_tree_locked);
int ocfs2_num_free_extents(struct ocfs2_super *osb,
struct ocfs2_extent_tree *et);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index d9f222987f24..46d93e941f3d 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -894,7 +894,7 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
}
}
-static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
+static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc)
{
int i;
@@ -915,7 +915,11 @@ static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
page_cache_release(wc->w_target_page);
}
ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
+}
+static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
+{
+ ocfs2_unlock_pages(wc);
brelse(wc->w_di_bh);
kfree(wc);
}
@@ -2042,11 +2046,19 @@ out_write_size:
ocfs2_update_inode_fsync_trans(handle, inode, 1);
ocfs2_journal_dirty(handle, wc->w_di_bh);
+ /* unlock pages before dealloc since it needs acquiring j_trans_barrier
+ * lock, or it will cause a deadlock since journal commit threads holds
+ * this lock and will ask for the page lock when flushing the data.
+ * put it here to preserve the unlock order.
+ */
+ ocfs2_unlock_pages(wc);
+
ocfs2_commit_trans(osb, handle);
ocfs2_run_deallocs(osb, &wc->w_dealloc);
- ocfs2_free_write_ctxt(wc);
+ brelse(wc->w_di_bh);
+ kfree(wc);
return copied;
}
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 79d56dc981bc..319e786175af 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -4479,7 +4479,7 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno);
ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0,
- &dealloc, 0);
+ &dealloc, 0, false);
if (ret) {
mlog_errno(ret);
goto out;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 3689b3592042..a6944b25fd5b 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -695,14 +695,6 @@ void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
res->inflight_assert_workers);
}
-static void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
- struct dlm_lock_resource *res)
-{
- spin_lock(&res->spinlock);
- __dlm_lockres_grab_inflight_worker(dlm, res);
- spin_unlock(&res->spinlock);
-}
-
static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
@@ -1646,6 +1638,7 @@ send_response:
}
mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
dlm->node_num, res->lockname.len, res->lockname.name);
+ spin_lock(&res->spinlock);
ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx,
DLM_ASSERT_MASTER_MLE_CLEANUP);
if (ret < 0) {
@@ -1653,7 +1646,8 @@ send_response:
response = DLM_MASTER_RESP_ERROR;
dlm_lockres_put(res);
} else
- dlm_lockres_grab_inflight_worker(dlm, res);
+ __dlm_lockres_grab_inflight_worker(dlm, res);
+ spin_unlock(&res->spinlock);
} else {
if (res)
dlm_lockres_put(res);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 69fb9f75b082..3950693dd0f6 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1803,7 +1803,7 @@ static int ocfs2_remove_inode_range(struct inode *inode,
ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
phys_cpos, trunc_len, flags,
- &dealloc, refcount_loc);
+ &dealloc, refcount_loc, false);
if (ret < 0) {
mlog_errno(ret);
goto out;
diff --git a/fs/open.c b/fs/open.c
index d45bd905d418..813be037b412 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -222,7 +222,7 @@ SYSCALL_DEFINE2(ftruncate64, unsigned int, fd, loff_t, length)
#endif /* BITS_PER_LONG == 32 */
-int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
+int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(file);
long ret;
@@ -309,6 +309,7 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
sb_end_write(inode->i_sb);
return ret;
}
+EXPORT_SYMBOL_GPL(vfs_fallocate);
SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
{
@@ -316,7 +317,7 @@ SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
int error = -EBADF;
if (f.file) {
- error = do_fallocate(f.file, mode, offset, len);
+ error = vfs_fallocate(f.file, mode, offset, len);
fdput(f);
}
return error;
diff --git a/fs/pnode.c b/fs/pnode.c
index aae331a5d03b..260ac8f898a4 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -242,6 +242,7 @@ static int propagate_one(struct mount *m)
child = copy_tree(last_source, last_source->mnt.mnt_root, type);
if (IS_ERR(child))
return PTR_ERR(child);
+ child->mnt.mnt_flags &= ~MNT_LOCKED;
mnt_set_mountpoint(m, mp, child);
last_dest = m;
last_source = child;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 590aeda5af12..3f3d7aeb0712 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2464,6 +2464,57 @@ static const struct file_operations proc_projid_map_operations = {
.llseek = seq_lseek,
.release = proc_id_map_release,
};
+
+static int proc_setgroups_open(struct inode *inode, struct file *file)
+{
+ struct user_namespace *ns = NULL;
+ struct task_struct *task;
+ int ret;
+
+ ret = -ESRCH;
+ task = get_proc_task(inode);
+ if (task) {
+ rcu_read_lock();
+ ns = get_user_ns(task_cred_xxx(task, user_ns));
+ rcu_read_unlock();
+ put_task_struct(task);
+ }
+ if (!ns)
+ goto err;
+
+ if (file->f_mode & FMODE_WRITE) {
+ ret = -EACCES;
+ if (!ns_capable(ns, CAP_SYS_ADMIN))
+ goto err_put_ns;
+ }
+
+ ret = single_open(file, &proc_setgroups_show, ns);
+ if (ret)
+ goto err_put_ns;
+
+ return 0;
+err_put_ns:
+ put_user_ns(ns);
+err:
+ return ret;
+}
+
+static int proc_setgroups_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct user_namespace *ns = seq->private;
+ int ret = single_release(inode, file);
+ put_user_ns(ns);
+ return ret;
+}
+
+static const struct file_operations proc_setgroups_operations = {
+ .open = proc_setgroups_open,
+ .write = proc_setgroups_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = proc_setgroups_release,
+};
#endif /* CONFIG_USER_NS */
static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
@@ -2572,6 +2623,7 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
+ REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
#endif
#ifdef CONFIG_CHECKPOINT_RESTORE
REG("timers", S_IRUGO, proc_timers_operations),
@@ -2916,6 +2968,7 @@ static const struct pid_entry tid_base_stuff[] = {
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
+ REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
#endif
};
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 333080d7a671..8420a2f80811 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -32,8 +32,6 @@ static void proc_evict_inode(struct inode *inode)
{
struct proc_dir_entry *de;
struct ctl_table_header *head;
- const struct proc_ns_operations *ns_ops;
- void *ns;
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
@@ -50,11 +48,6 @@ static void proc_evict_inode(struct inode *inode)
RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
sysctl_head_put(head);
}
- /* Release any associated namespace */
- ns_ops = PROC_I(inode)->ns.ns_ops;
- ns = PROC_I(inode)->ns.ns;
- if (ns_ops && ns)
- ns_ops->put(ns);
}
static struct kmem_cache * proc_inode_cachep;
@@ -73,8 +66,7 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
ei->pde = NULL;
ei->sysctl = NULL;
ei->sysctl_entry = NULL;
- ei->ns.ns = NULL;
- ei->ns.ns_ops = NULL;
+ ei->ns_ops = NULL;
inode = &ei->vfs_inode;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
return inode;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 7fb1a4869fd0..6fcdba573e0f 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -65,7 +65,7 @@ struct proc_inode {
struct proc_dir_entry *pde;
struct ctl_table_header *sysctl;
struct ctl_table *sysctl_entry;
- struct proc_ns ns;
+ const struct proc_ns_operations *ns_ops;
struct inode vfs_inode;
};
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index aa1eee06420f..d3ebf2e61853 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -12,6 +12,9 @@
#include <linux/vmstat.h>
#include <linux/atomic.h>
#include <linux/vmalloc.h>
+#ifdef CONFIG_CMA
+#include <linux/cma.h>
+#endif
#include <asm/page.h>
#include <asm/pgtable.h>
#include "internal.h"
@@ -138,6 +141,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
"AnonHugePages: %8lu kB\n"
#endif
+#ifdef CONFIG_CMA
+ "CmaTotal: %8lu kB\n"
+ "CmaFree: %8lu kB\n"
+#endif
,
K(i.totalram),
K(i.freeram),
@@ -187,12 +194,16 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
vmi.used >> 10,
vmi.largest_chunk >> 10
#ifdef CONFIG_MEMORY_FAILURE
- ,atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
+ , atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- ,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
+ , K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
HPAGE_PMD_NR)
#endif
+#ifdef CONFIG_CMA
+ , K(totalcma_pages)
+ , K(global_page_state(NR_FREE_CMA_PAGES))
+#endif
);
hugetlb_report_meminfo(m);
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 89026095f2b5..c9eac4563fa8 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -1,10 +1,6 @@
#include <linux/proc_fs.h>
#include <linux/nsproxy.h>
-#include <linux/sched.h>
#include <linux/ptrace.h>
-#include <linux/fs_struct.h>
-#include <linux/mount.h>
-#include <linux/path.h>
#include <linux/namei.h>
#include <linux/file.h>
#include <linux/utsname.h>
@@ -34,138 +30,45 @@ static const struct proc_ns_operations *ns_entries[] = {
&mntns_operations,
};
-static const struct file_operations ns_file_operations = {
- .llseek = no_llseek,
-};
-
-static const struct inode_operations ns_inode_operations = {
- .setattr = proc_setattr,
-};
-
-static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
-{
- struct inode *inode = dentry->d_inode;
- const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops;
-
- return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
- ns_ops->name, inode->i_ino);
-}
-
-const struct dentry_operations ns_dentry_operations =
-{
- .d_delete = always_delete_dentry,
- .d_dname = ns_dname,
-};
-
-static struct dentry *proc_ns_get_dentry(struct super_block *sb,
- struct task_struct *task, const struct proc_ns_operations *ns_ops)
-{
- struct dentry *dentry, *result;
- struct inode *inode;
- struct proc_inode *ei;
- struct qstr qname = { .name = "", };
- void *ns;
-
- ns = ns_ops->get(task);
- if (!ns)
- return ERR_PTR(-ENOENT);
-
- dentry = d_alloc_pseudo(sb, &qname);
- if (!dentry) {
- ns_ops->put(ns);
- return ERR_PTR(-ENOMEM);
- }
-
- inode = iget_locked(sb, ns_ops->inum(ns));
- if (!inode) {
- dput(dentry);
- ns_ops->put(ns);
- return ERR_PTR(-ENOMEM);
- }
-
- ei = PROC_I(inode);
- if (inode->i_state & I_NEW) {
- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
- inode->i_op = &ns_inode_operations;
- inode->i_mode = S_IFREG | S_IRUGO;
- inode->i_fop = &ns_file_operations;
- ei->ns.ns_ops = ns_ops;
- ei->ns.ns = ns;
- unlock_new_inode(inode);
- } else {
- ns_ops->put(ns);
- }
-
- d_set_d_op(dentry, &ns_dentry_operations);
- result = d_instantiate_unique(dentry, inode);
- if (result) {
- dput(dentry);
- dentry = result;
- }
-
- return dentry;
-}
-
static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode = dentry->d_inode;
- struct super_block *sb = inode->i_sb;
- struct proc_inode *ei = PROC_I(inode);
+ const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
struct task_struct *task;
struct path ns_path;
void *error = ERR_PTR(-EACCES);
task = get_proc_task(inode);
if (!task)
- goto out;
+ return error;
- if (!ptrace_may_access(task, PTRACE_MODE_READ))
- goto out_put_task;
-
- ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns.ns_ops);
- if (IS_ERR(ns_path.dentry)) {
- error = ERR_CAST(ns_path.dentry);
- goto out_put_task;
+ if (ptrace_may_access(task, PTRACE_MODE_READ)) {
+ error = ns_get_path(&ns_path, task, ns_ops);
+ if (!error)
+ nd_jump_link(nd, &ns_path);
}
-
- ns_path.mnt = mntget(nd->path.mnt);
- nd_jump_link(nd, &ns_path);
- error = NULL;
-
-out_put_task:
put_task_struct(task);
-out:
return error;
}
static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen)
{
struct inode *inode = dentry->d_inode;
- struct proc_inode *ei = PROC_I(inode);
- const struct proc_ns_operations *ns_ops = ei->ns.ns_ops;
+ const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
struct task_struct *task;
- void *ns;
char name[50];
int res = -EACCES;
task = get_proc_task(inode);
if (!task)
- goto out;
-
- if (!ptrace_may_access(task, PTRACE_MODE_READ))
- goto out_put_task;
+ return res;
- res = -ENOENT;
- ns = ns_ops->get(task);
- if (!ns)
- goto out_put_task;
-
- snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns));
- res = readlink_copy(buffer, buflen, name);
- ns_ops->put(ns);
-out_put_task:
+ if (ptrace_may_access(task, PTRACE_MODE_READ)) {
+ res = ns_get_name(name, sizeof(name), task, ns_ops);
+ if (res >= 0)
+ res = readlink_copy(buffer, buflen, name);
+ }
put_task_struct(task);
-out:
return res;
}
@@ -189,7 +92,7 @@ static int proc_ns_instantiate(struct inode *dir,
ei = PROC_I(inode);
inode->i_mode = S_IFLNK|S_IRWXUGO;
inode->i_op = &proc_ns_link_inode_operations;
- ei->ns.ns_ops = ns_ops;
+ ei->ns_ops = ns_ops;
d_set_d_op(dentry, &pid_dentry_operations);
d_add(dentry, inode);
@@ -267,31 +170,3 @@ const struct inode_operations proc_ns_dir_inode_operations = {
.getattr = pid_getattr,
.setattr = proc_setattr,
};
-
-struct file *proc_ns_fget(int fd)
-{
- struct file *file;
-
- file = fget(fd);
- if (!file)
- return ERR_PTR(-EBADF);
-
- if (file->f_op != &ns_file_operations)
- goto out_invalid;
-
- return file;
-
-out_invalid:
- fput(file);
- return ERR_PTR(-EINVAL);
-}
-
-struct proc_ns *get_proc_ns(struct inode *inode)
-{
- return &PROC_I(inode)->ns;
-}
-
-bool proc_ns_inode(struct inode *inode)
-{
- return inode->i_fop == &ns_file_operations;
-}
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index bf2d03f8fd3e..510413eb25b8 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -159,7 +159,7 @@ static int show_stat(struct seq_file *p, void *v)
/* sum again ? it could be updated? */
for_each_irq_nr(j)
- seq_put_decimal_ull(p, ' ', kstat_irqs(j));
+ seq_put_decimal_ull(p, ' ', kstat_irqs_usr(j));
seq_printf(p,
"\nctxt %llu\n"
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 73ca1740d839..0f96f71ab32b 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -91,6 +91,7 @@ static void show_type(struct seq_file *m, struct super_block *sb)
static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
{
+ struct proc_mounts *p = proc_mounts(m);
struct mount *r = real_mount(mnt);
int err = 0;
struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
@@ -104,7 +105,10 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
mangle(m, r->mnt_devname ? r->mnt_devname : "none");
}
seq_putc(m, ' ');
- seq_path(m, &mnt_path, " \t\n\\");
+ /* mountpoints outside of chroot jail will give SEQ_SKIP on this */
+ err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\");
+ if (err)
+ goto out;
seq_putc(m, ' ');
show_type(m, sb);
seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
@@ -125,7 +129,6 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
struct mount *r = real_mount(mnt);
struct super_block *sb = mnt->mnt_sb;
struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
- struct path root = p->root;
int err = 0;
seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id,
@@ -139,7 +142,7 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
seq_putc(m, ' ');
/* mountpoints outside of chroot jail will give SEQ_SKIP on this */
- err = seq_path_root(m, &mnt_path, &root, " \t\n\\");
+ err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\");
if (err)
goto out;
@@ -182,6 +185,7 @@ out:
static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
{
+ struct proc_mounts *p = proc_mounts(m);
struct mount *r = real_mount(mnt);
struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
struct super_block *sb = mnt_path.dentry->d_sb;
@@ -201,7 +205,10 @@ static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
/* mount point */
seq_puts(m, " mounted on ");
- seq_path(m, &mnt_path, " \t\n\\");
+ /* mountpoints outside of chroot jail will give SEQ_SKIP on this */
+ err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\");
+ if (err)
+ goto out;
seq_putc(m, ' ');
/* file system type */
@@ -216,6 +223,7 @@ static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
}
seq_putc(m, '\n');
+out:
return err;
}
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 2f389ce5023c..8613e5b35c22 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -561,7 +561,6 @@ static struct platform_driver ramoops_driver = {
.remove = __exit_p(ramoops_remove),
.driver = {
.name = "ramoops",
- .owner = THIS_MODULE,
},
};
diff --git a/fs/read_write.c b/fs/read_write.c
index 7d9318c3d43c..c0805c93b6fa 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -412,6 +412,23 @@ ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *p
EXPORT_SYMBOL(new_sync_read);
+ssize_t __vfs_read(struct file *file, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ ssize_t ret;
+
+ if (file->f_op->read)
+ ret = file->f_op->read(file, buf, count, pos);
+ else if (file->f_op->aio_read)
+ ret = do_sync_read(file, buf, count, pos);
+ else if (file->f_op->read_iter)
+ ret = new_sync_read(file, buf, count, pos);
+ else
+ ret = -EINVAL;
+
+ return ret;
+}
+
ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
{
ssize_t ret;
@@ -426,12 +443,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
ret = rw_verify_area(READ, file, pos, count);
if (ret >= 0) {
count = ret;
- if (file->f_op->read)
- ret = file->f_op->read(file, buf, count, pos);
- else if (file->f_op->aio_read)
- ret = do_sync_read(file, buf, count, pos);
- else
- ret = new_sync_read(file, buf, count, pos);
+ ret = __vfs_read(file, buf, count, pos);
if (ret > 0) {
fsnotify_access(file);
add_rchar(current, ret);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index ea63ab13ef92..71fbbe3e2dab 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2172,6 +2172,9 @@ error_unlocked:
reiserfs_write_unlock(s);
}
+ if (sbi->commit_wq)
+ destroy_workqueue(sbi->commit_wq);
+
cancel_delayed_work_sync(&REISERFS_SB(s)->old_work);
reiserfs_free_bitmap_cache(s);
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index b6fa8657dcbc..ffb093e72b6c 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -120,6 +120,21 @@ config SQUASHFS_ZLIB
If unsure, say Y.
+config SQUASHFS_LZ4
+ bool "Include support for LZ4 compressed file systems"
+ depends on SQUASHFS
+ select LZ4_DECOMPRESS
+ help
+ Saying Y here includes support for reading Squashfs file systems
+ compressed with LZ4 compression. LZ4 compression is mainly
+ aimed at embedded systems with slower CPUs where the overheads
+ of zlib are too high.
+
+ LZ4 is not the standard compression used in Squashfs and so most
+ file systems will be readable without selecting this option.
+
+ If unsure, say N.
+
config SQUASHFS_LZO
bool "Include support for LZO compressed file systems"
depends on SQUASHFS
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index 4132520b4ff2..246a6f329d89 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -11,6 +11,7 @@ squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o
+squashfs-$(CONFIG_SQUASHFS_LZ4) += lz4_wrapper.o
squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o
squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o
squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index ac22fe73b0ad..e9034bf6e5ae 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -41,6 +41,12 @@ static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = {
NULL, NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0
};
+#ifndef CONFIG_SQUASHFS_LZ4
+static const struct squashfs_decompressor squashfs_lz4_comp_ops = {
+ NULL, NULL, NULL, NULL, LZ4_COMPRESSION, "lz4", 0
+};
+#endif
+
#ifndef CONFIG_SQUASHFS_LZO
static const struct squashfs_decompressor squashfs_lzo_comp_ops = {
NULL, NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
@@ -65,6 +71,7 @@ static const struct squashfs_decompressor squashfs_unknown_comp_ops = {
static const struct squashfs_decompressor *decompressor[] = {
&squashfs_zlib_comp_ops,
+ &squashfs_lz4_comp_ops,
&squashfs_lzo_comp_ops,
&squashfs_xz_comp_ops,
&squashfs_lzma_unsupported_comp_ops,
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
index af0985321808..a25713c031a5 100644
--- a/fs/squashfs/decompressor.h
+++ b/fs/squashfs/decompressor.h
@@ -46,6 +46,10 @@ static inline void *squashfs_comp_opts(struct squashfs_sb_info *msblk,
extern const struct squashfs_decompressor squashfs_xz_comp_ops;
#endif
+#ifdef CONFIG_SQUASHFS_LZ4
+extern const struct squashfs_decompressor squashfs_lz4_comp_ops;
+#endif
+
#ifdef CONFIG_SQUASHFS_LZO
extern const struct squashfs_decompressor squashfs_lzo_comp_ops;
#endif
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
new file mode 100644
index 000000000000..c31e2bc9c081
--- /dev/null
+++ b/fs/squashfs/lz4_wrapper.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2013, 2014
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/buffer_head.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/lz4.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs.h"
+#include "decompressor.h"
+#include "page_actor.h"
+
+#define LZ4_LEGACY 1
+
+struct lz4_comp_opts {
+ __le32 version;
+ __le32 flags;
+};
+
+struct squashfs_lz4 {
+ void *input;
+ void *output;
+};
+
+
+static void *lz4_comp_opts(struct squashfs_sb_info *msblk,
+ void *buff, int len)
+{
+ struct lz4_comp_opts *comp_opts = buff;
+
+ /* LZ4 compressed filesystems always have compression options */
+ if (comp_opts == NULL || len < sizeof(*comp_opts))
+ return ERR_PTR(-EIO);
+
+ if (le32_to_cpu(comp_opts->version) != LZ4_LEGACY) {
+ /* LZ4 format currently used by the kernel is the 'legacy'
+ * format */
+ ERROR("Unknown LZ4 version\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ return NULL;
+}
+
+
+static void *lz4_init(struct squashfs_sb_info *msblk, void *buff)
+{
+ int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE);
+ struct squashfs_lz4 *stream;
+
+ stream = kzalloc(sizeof(*stream), GFP_KERNEL);
+ if (stream == NULL)
+ goto failed;
+ stream->input = vmalloc(block_size);
+ if (stream->input == NULL)
+ goto failed2;
+ stream->output = vmalloc(block_size);
+ if (stream->output == NULL)
+ goto failed3;
+
+ return stream;
+
+failed3:
+ vfree(stream->input);
+failed2:
+ kfree(stream);
+failed:
+ ERROR("Failed to initialise LZ4 decompressor\n");
+ return ERR_PTR(-ENOMEM);
+}
+
+
+static void lz4_free(void *strm)
+{
+ struct squashfs_lz4 *stream = strm;
+
+ if (stream) {
+ vfree(stream->input);
+ vfree(stream->output);
+ }
+ kfree(stream);
+}
+
+
+static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
+ struct buffer_head **bh, int b, int offset, int length,
+ struct squashfs_page_actor *output)
+{
+ struct squashfs_lz4 *stream = strm;
+ void *buff = stream->input, *data;
+ int avail, i, bytes = length, res;
+ size_t dest_len = output->length;
+
+ for (i = 0; i < b; i++) {
+ avail = min(bytes, msblk->devblksize - offset);
+ memcpy(buff, bh[i]->b_data + offset, avail);
+ buff += avail;
+ bytes -= avail;
+ offset = 0;
+ put_bh(bh[i]);
+ }
+
+ res = lz4_decompress_unknownoutputsize(stream->input, length,
+ stream->output, &dest_len);
+ if (res)
+ return -EIO;
+
+ bytes = dest_len;
+ data = squashfs_first_page(output);
+ buff = stream->output;
+ while (data) {
+ if (bytes <= PAGE_CACHE_SIZE) {
+ memcpy(data, buff, bytes);
+ break;
+ }
+ memcpy(data, buff, PAGE_CACHE_SIZE);
+ buff += PAGE_CACHE_SIZE;
+ bytes -= PAGE_CACHE_SIZE;
+ data = squashfs_next_page(output);
+ }
+ squashfs_finish_page(output);
+
+ return dest_len;
+}
+
+const struct squashfs_decompressor squashfs_lz4_comp_ops = {
+ .init = lz4_init,
+ .comp_opts = lz4_comp_opts,
+ .free = lz4_free,
+ .decompress = lz4_uncompress,
+ .id = LZ4_COMPRESSION,
+ .name = "lz4",
+ .supported = 1
+};
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index 4b2beda49498..506f4ba5b983 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -240,6 +240,7 @@ struct meta_index {
#define LZMA_COMPRESSION 2
#define LZO_COMPRESSION 3
#define XZ_COMPRESSION 4
+#define LZ4_COMPRESSION 5
struct squashfs_super_block {
__le32 s_magic;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index e9ef59b3abb1..dfe928a9540f 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -102,6 +102,22 @@ static ssize_t sysfs_kf_bin_read(struct kernfs_open_file *of, char *buf,
return battr->read(of->file, kobj, battr, buf, pos, count);
}
+/* kernfs read callback for regular sysfs files with pre-alloc */
+static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf,
+ size_t count, loff_t pos)
+{
+ const struct sysfs_ops *ops = sysfs_file_ops(of->kn);
+ struct kobject *kobj = of->kn->parent->priv;
+
+ /*
+ * If buf != of->prealloc_buf, we don't know how
+ * large it is, so cannot safely pass it to ->show
+ */
+ if (pos || WARN_ON_ONCE(buf != of->prealloc_buf))
+ return 0;
+ return ops->show(kobj, of->kn->priv, buf);
+}
+
/* kernfs write callback for regular sysfs files */
static ssize_t sysfs_kf_write(struct kernfs_open_file *of, char *buf,
size_t count, loff_t pos)
@@ -125,7 +141,7 @@ static ssize_t sysfs_kf_bin_write(struct kernfs_open_file *of, char *buf,
if (size) {
if (size <= pos)
- return 0;
+ return -EFBIG;
count = min_t(ssize_t, count, size - pos);
}
if (!count)
@@ -184,6 +200,22 @@ static const struct kernfs_ops sysfs_file_kfops_rw = {
.write = sysfs_kf_write,
};
+static const struct kernfs_ops sysfs_prealloc_kfops_ro = {
+ .read = sysfs_kf_read,
+ .prealloc = true,
+};
+
+static const struct kernfs_ops sysfs_prealloc_kfops_wo = {
+ .write = sysfs_kf_write,
+ .prealloc = true,
+};
+
+static const struct kernfs_ops sysfs_prealloc_kfops_rw = {
+ .read = sysfs_kf_read,
+ .write = sysfs_kf_write,
+ .prealloc = true,
+};
+
static const struct kernfs_ops sysfs_bin_kfops_ro = {
.read = sysfs_kf_bin_read,
};
@@ -222,13 +254,22 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
kobject_name(kobj)))
return -EINVAL;
- if (sysfs_ops->show && sysfs_ops->store)
- ops = &sysfs_file_kfops_rw;
- else if (sysfs_ops->show)
- ops = &sysfs_file_kfops_ro;
- else if (sysfs_ops->store)
- ops = &sysfs_file_kfops_wo;
- else
+ if (sysfs_ops->show && sysfs_ops->store) {
+ if (mode & SYSFS_PREALLOC)
+ ops = &sysfs_prealloc_kfops_rw;
+ else
+ ops = &sysfs_file_kfops_rw;
+ } else if (sysfs_ops->show) {
+ if (mode & SYSFS_PREALLOC)
+ ops = &sysfs_prealloc_kfops_ro;
+ else
+ ops = &sysfs_file_kfops_ro;
+ } else if (sysfs_ops->store) {
+ if (mode & SYSFS_PREALLOC)
+ ops = &sysfs_prealloc_kfops_wo;
+ else
+ ops = &sysfs_file_kfops_wo;
+ } else
ops = &sysfs_file_kfops_empty;
size = PAGE_SIZE;
@@ -253,7 +294,7 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
if (!attr->ignore_lockdep)
key = attr->key ?: (struct lock_class_key *)&attr->skey;
#endif
- kn = __kernfs_create_file(parent, attr->name, mode, size, ops,
+ kn = __kernfs_create_file(parent, attr->name, mode & 0777, size, ops,
(void *)attr, ns, true, key);
if (IS_ERR(kn)) {
if (PTR_ERR(kn) == -EEXIST)
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index a012c51caffd..05e90edd1992 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -57,6 +57,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
sector_t offset;
int i, num, ret = 0;
struct extent_position epos = { NULL, 0, {0, 0} };
+ struct super_block *sb = dir->i_sb;
if (ctx->pos == 0) {
if (!dir_emit_dot(file, ctx))
@@ -76,16 +77,16 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
if (nf_pos == 0)
nf_pos = udf_ext0_offset(dir);
- fibh.soffset = fibh.eoffset = nf_pos & (dir->i_sb->s_blocksize - 1);
+ fibh.soffset = fibh.eoffset = nf_pos & (sb->s_blocksize - 1);
if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
- if (inode_bmap(dir, nf_pos >> dir->i_sb->s_blocksize_bits,
+ if (inode_bmap(dir, nf_pos >> sb->s_blocksize_bits,
&epos, &eloc, &elen, &offset)
!= (EXT_RECORDED_ALLOCATED >> 30)) {
ret = -ENOENT;
goto out;
}
- block = udf_get_lb_pblock(dir->i_sb, &eloc, offset);
- if ((++offset << dir->i_sb->s_blocksize_bits) < elen) {
+ block = udf_get_lb_pblock(sb, &eloc, offset);
+ if ((++offset << sb->s_blocksize_bits) < elen) {
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
epos.offset -= sizeof(struct short_ad);
else if (iinfo->i_alloc_type ==
@@ -95,18 +96,18 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
offset = 0;
}
- if (!(fibh.sbh = fibh.ebh = udf_tread(dir->i_sb, block))) {
+ if (!(fibh.sbh = fibh.ebh = udf_tread(sb, block))) {
ret = -EIO;
goto out;
}
- if (!(offset & ((16 >> (dir->i_sb->s_blocksize_bits - 9)) - 1))) {
- i = 16 >> (dir->i_sb->s_blocksize_bits - 9);
- if (i + offset > (elen >> dir->i_sb->s_blocksize_bits))
- i = (elen >> dir->i_sb->s_blocksize_bits) - offset;
+ if (!(offset & ((16 >> (sb->s_blocksize_bits - 9)) - 1))) {
+ i = 16 >> (sb->s_blocksize_bits - 9);
+ if (i + offset > (elen >> sb->s_blocksize_bits))
+ i = (elen >> sb->s_blocksize_bits) - offset;
for (num = 0; i > 0; i--) {
- block = udf_get_lb_pblock(dir->i_sb, &eloc, offset + i);
- tmp = udf_tgetblk(dir->i_sb, block);
+ block = udf_get_lb_pblock(sb, &eloc, offset + i);
+ tmp = udf_tgetblk(sb, block);
if (tmp && !buffer_uptodate(tmp) && !buffer_locked(tmp))
bha[num++] = tmp;
else
@@ -152,12 +153,12 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
}
if ((cfi.fileCharacteristics & FID_FILE_CHAR_DELETED) != 0) {
- if (!UDF_QUERY_FLAG(dir->i_sb, UDF_FLAG_UNDELETE))
+ if (!UDF_QUERY_FLAG(sb, UDF_FLAG_UNDELETE))
continue;
}
if ((cfi.fileCharacteristics & FID_FILE_CHAR_HIDDEN) != 0) {
- if (!UDF_QUERY_FLAG(dir->i_sb, UDF_FLAG_UNHIDE))
+ if (!UDF_QUERY_FLAG(sb, UDF_FLAG_UNHIDE))
continue;
}
@@ -167,12 +168,12 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
continue;
}
- flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi);
+ flen = udf_get_filename(sb, nameptr, lfi, fname, UDF_NAME_LEN);
if (!flen)
continue;
tloc = lelb_to_cpu(cfi.icb.extLocation);
- iblock = udf_get_lb_pblock(dir->i_sb, &tloc, 0);
+ iblock = udf_get_lb_pblock(sb, &tloc, 0);
if (!dir_emit(ctx, fname, flen, iblock, DT_UNKNOWN))
goto out;
} /* end while */
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index c9b4df5810d5..5bc71d9a674a 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1489,6 +1489,20 @@ reread:
}
inode->i_generation = iinfo->i_unique;
+ /* Sanity checks for files in ICB so that we don't get confused later */
+ if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
+ /*
+ * For file in ICB data is stored in allocation descriptor
+ * so sizes should match
+ */
+ if (iinfo->i_lenAlloc != inode->i_size)
+ goto out;
+ /* File in ICB has to fit in there... */
+ if (inode->i_size > inode->i_sb->s_blocksize -
+ udf_file_entry_alloc_offset(inode))
+ goto out;
+ }
+
switch (fe->icbTag.fileType) {
case ICBTAG_FILE_TYPE_DIRECTORY:
inode->i_op = &udf_dir_inode_operations;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index c12e260fd6c4..33b246b82c98 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -159,18 +159,19 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
struct udf_inode_info *dinfo = UDF_I(dir);
int isdotdot = child->len == 2 &&
child->name[0] == '.' && child->name[1] == '.';
+ struct super_block *sb = dir->i_sb;
size = udf_ext0_offset(dir) + dir->i_size;
f_pos = udf_ext0_offset(dir);
fibh->sbh = fibh->ebh = NULL;
- fibh->soffset = fibh->eoffset = f_pos & (dir->i_sb->s_blocksize - 1);
+ fibh->soffset = fibh->eoffset = f_pos & (sb->s_blocksize - 1);
if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
- if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, &epos,
+ if (inode_bmap(dir, f_pos >> sb->s_blocksize_bits, &epos,
&eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30))
goto out_err;
- block = udf_get_lb_pblock(dir->i_sb, &eloc, offset);
- if ((++offset << dir->i_sb->s_blocksize_bits) < elen) {
+ block = udf_get_lb_pblock(sb, &eloc, offset);
+ if ((++offset << sb->s_blocksize_bits) < elen) {
if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
epos.offset -= sizeof(struct short_ad);
else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
@@ -178,7 +179,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
} else
offset = 0;
- fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block);
+ fibh->sbh = fibh->ebh = udf_tread(sb, block);
if (!fibh->sbh)
goto out_err;
}
@@ -217,12 +218,12 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
}
if ((cfi->fileCharacteristics & FID_FILE_CHAR_DELETED) != 0) {
- if (!UDF_QUERY_FLAG(dir->i_sb, UDF_FLAG_UNDELETE))
+ if (!UDF_QUERY_FLAG(sb, UDF_FLAG_UNDELETE))
continue;
}
if ((cfi->fileCharacteristics & FID_FILE_CHAR_HIDDEN) != 0) {
- if (!UDF_QUERY_FLAG(dir->i_sb, UDF_FLAG_UNHIDE))
+ if (!UDF_QUERY_FLAG(sb, UDF_FLAG_UNHIDE))
continue;
}
@@ -233,7 +234,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
if (!lfi)
continue;
- flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi);
+ flen = udf_get_filename(sb, nameptr, lfi, fname, UDF_NAME_LEN);
if (flen && udf_match(flen, fname, child->len, child->name))
goto out_ok;
}
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index 6fb7945c1e6e..ac10ca939f26 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -30,49 +30,73 @@
#include <linux/buffer_head.h>
#include "udf_i.h"
-static void udf_pc_to_char(struct super_block *sb, unsigned char *from,
- int fromlen, unsigned char *to)
+static int udf_pc_to_char(struct super_block *sb, unsigned char *from,
+ int fromlen, unsigned char *to, int tolen)
{
struct pathComponent *pc;
int elen = 0;
+ int comp_len;
unsigned char *p = to;
+ /* Reserve one byte for terminating \0 */
+ tolen--;
while (elen < fromlen) {
pc = (struct pathComponent *)(from + elen);
+ elen += sizeof(struct pathComponent);
switch (pc->componentType) {
case 1:
/*
* Symlink points to some place which should be agreed
* upon between originator and receiver of the media. Ignore.
*/
- if (pc->lengthComponentIdent > 0)
+ if (pc->lengthComponentIdent > 0) {
+ elen += pc->lengthComponentIdent;
break;
+ }
/* Fall through */
case 2:
+ if (tolen == 0)
+ return -ENAMETOOLONG;
p = to;
*p++ = '/';
+ tolen--;
break;
case 3:
+ if (tolen < 3)
+ return -ENAMETOOLONG;
memcpy(p, "../", 3);
p += 3;
+ tolen -= 3;
break;
case 4:
+ if (tolen < 2)
+ return -ENAMETOOLONG;
memcpy(p, "./", 2);
p += 2;
+ tolen -= 2;
/* that would be . - just ignore */
break;
case 5:
- p += udf_get_filename(sb, pc->componentIdent, p,
- pc->lengthComponentIdent);
+ elen += pc->lengthComponentIdent;
+ if (elen > fromlen)
+ return -EIO;
+ comp_len = udf_get_filename(sb, pc->componentIdent,
+ pc->lengthComponentIdent,
+ p, tolen);
+ p += comp_len;
+ tolen -= comp_len;
+ if (tolen == 0)
+ return -ENAMETOOLONG;
*p++ = '/';
+ tolen--;
break;
}
- elen += sizeof(struct pathComponent) + pc->lengthComponentIdent;
}
if (p > to + 1)
p[-1] = '\0';
else
p[0] = '\0';
+ return 0;
}
static int udf_symlink_filler(struct file *file, struct page *page)
@@ -80,11 +104,17 @@ static int udf_symlink_filler(struct file *file, struct page *page)
struct inode *inode = page->mapping->host;
struct buffer_head *bh = NULL;
unsigned char *symlink;
- int err = -EIO;
+ int err;
unsigned char *p = kmap(page);
struct udf_inode_info *iinfo;
uint32_t pos;
+ /* We don't support symlinks longer than one block */
+ if (inode->i_size > inode->i_sb->s_blocksize) {
+ err = -ENAMETOOLONG;
+ goto out_unmap;
+ }
+
iinfo = UDF_I(inode);
pos = udf_block_map(inode, 0);
@@ -94,14 +124,18 @@ static int udf_symlink_filler(struct file *file, struct page *page)
} else {
bh = sb_bread(inode->i_sb, pos);
- if (!bh)
- goto out;
+ if (!bh) {
+ err = -EIO;
+ goto out_unlock_inode;
+ }
symlink = bh->b_data;
}
- udf_pc_to_char(inode->i_sb, symlink, inode->i_size, p);
+ err = udf_pc_to_char(inode->i_sb, symlink, inode->i_size, p, PAGE_SIZE);
brelse(bh);
+ if (err)
+ goto out_unlock_inode;
up_read(&iinfo->i_data_sem);
SetPageUptodate(page);
@@ -109,9 +143,10 @@ static int udf_symlink_filler(struct file *file, struct page *page)
unlock_page(page);
return 0;
-out:
+out_unlock_inode:
up_read(&iinfo->i_data_sem);
SetPageError(page);
+out_unmap:
kunmap(page);
unlock_page(page);
return err;
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 1cc3c993ebd0..47bb3f5ca360 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -211,7 +211,8 @@ udf_get_lb_pblock(struct super_block *sb, struct kernel_lb_addr *loc,
}
/* unicode.c */
-extern int udf_get_filename(struct super_block *, uint8_t *, uint8_t *, int);
+extern int udf_get_filename(struct super_block *, uint8_t *, int, uint8_t *,
+ int);
extern int udf_put_filename(struct super_block *, const uint8_t *, uint8_t *,
int);
extern int udf_build_ustr(struct ustr *, dstring *, int);
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index afd470e588ff..b84fee372734 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -28,7 +28,8 @@
#include "udf_sb.h"
-static int udf_translate_to_linux(uint8_t *, uint8_t *, int, uint8_t *, int);
+static int udf_translate_to_linux(uint8_t *, int, uint8_t *, int, uint8_t *,
+ int);
static int udf_char_to_ustr(struct ustr *dest, const uint8_t *src, int strlen)
{
@@ -333,8 +334,8 @@ try_again:
return u_len + 1;
}
-int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname,
- int flen)
+int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen,
+ uint8_t *dname, int dlen)
{
struct ustr *filename, *unifilename;
int len = 0;
@@ -347,7 +348,7 @@ int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname,
if (!unifilename)
goto out1;
- if (udf_build_ustr_exact(unifilename, sname, flen))
+ if (udf_build_ustr_exact(unifilename, sname, slen))
goto out2;
if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
@@ -366,7 +367,8 @@ int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname,
} else
goto out2;
- len = udf_translate_to_linux(dname, filename->u_name, filename->u_len,
+ len = udf_translate_to_linux(dname, dlen,
+ filename->u_name, filename->u_len,
unifilename->u_name, unifilename->u_len);
out2:
kfree(unifilename);
@@ -403,10 +405,12 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname,
#define EXT_MARK '.'
#define CRC_MARK '#'
#define EXT_SIZE 5
+/* Number of chars we need to store generated CRC to make filename unique */
+#define CRC_LEN 5
-static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName,
- int udfLen, uint8_t *fidName,
- int fidNameLen)
+static int udf_translate_to_linux(uint8_t *newName, int newLen,
+ uint8_t *udfName, int udfLen,
+ uint8_t *fidName, int fidNameLen)
{
int index, newIndex = 0, needsCRC = 0;
int extIndex = 0, newExtIndex = 0, hasExt = 0;
@@ -439,7 +443,7 @@ static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName,
newExtIndex = newIndex;
}
}
- if (newIndex < 256)
+ if (newIndex < newLen)
newName[newIndex++] = curr;
else
needsCRC = 1;
@@ -467,13 +471,13 @@ static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName,
}
ext[localExtIndex++] = curr;
}
- maxFilenameLen = 250 - localExtIndex;
+ maxFilenameLen = newLen - CRC_LEN - localExtIndex;
if (newIndex > maxFilenameLen)
newIndex = maxFilenameLen;
else
newIndex = newExtIndex;
- } else if (newIndex > 250)
- newIndex = 250;
+ } else if (newIndex > newLen - CRC_LEN)
+ newIndex = newLen - CRC_LEN;
newName[newIndex++] = CRC_MARK;
valueCRC = crc_itu_t(0, fidName, fidNameLen);
newName[newIndex++] = hex_asc_upper_hi(valueCRC >> 8);

Privacy Policy