aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_inode.c2
-rw-r--r--fs/9p/vfs_inode_dotl.c4
-rw-r--r--fs/Kconfig.binfmt3
-rw-r--r--fs/Makefile2
-rw-r--r--fs/affs/affs.h2
-rw-r--r--fs/affs/amigaffs.c30
-rw-r--r--fs/affs/file.c76
-rw-r--r--fs/affs/inode.c4
-rw-r--r--fs/affs/namei.c40
-rw-r--r--fs/afs/dir.c80
-rw-r--r--fs/afs/inode.c4
-rw-r--r--fs/afs/mntpt.c22
-rw-r--r--fs/afs/rxrpc.c14
-rw-r--r--fs/afs/write.c9
-rw-r--r--fs/aio.c54
-rw-r--r--fs/autofs4/expire.c42
-rw-r--r--fs/autofs4/root.c25
-rw-r--r--fs/befs/linuxvfs.c20
-rw-r--r--fs/binfmt_aout.c8
-rw-r--r--fs/binfmt_elf.c278
-rw-r--r--fs/binfmt_em86.c4
-rw-r--r--fs/binfmt_misc.c398
-rw-r--r--fs/binfmt_script.c10
-rw-r--r--fs/block_dev.c10
-rw-r--r--fs/btrfs/compression.c33
-rw-r--r--fs/btrfs/compression.h4
-rw-r--r--fs/btrfs/inode.c2
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/lzo.c15
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/btrfs/zlib.c20
-rw-r--r--fs/cachefiles/namei.c21
-rw-r--r--fs/cachefiles/xattr.c15
-rw-r--r--fs/ceph/addr.c273
-rw-r--r--fs/ceph/caps.c132
-rw-r--r--fs/ceph/debugfs.c14
-rw-r--r--fs/ceph/dir.c83
-rw-r--r--fs/ceph/file.c103
-rw-r--r--fs/ceph/inode.c77
-rw-r--r--fs/ceph/locks.c64
-rw-r--r--fs/ceph/mds_client.c41
-rw-r--r--fs/ceph/mds_client.h10
-rw-r--r--fs/ceph/snap.c37
-rw-r--r--fs/ceph/super.c16
-rw-r--r--fs/ceph/super.h55
-rw-r--r--fs/ceph/xattr.c7
-rw-r--r--fs/char_dev.c1
-rw-r--r--fs/cifs/cifs_debug.c77
-rw-r--r--fs/cifs/cifs_debug.h7
-rw-r--r--fs/cifs/cifsacl.c2
-rw-r--r--fs/cifs/cifsfs.c3
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h12
-rw-r--r--fs/cifs/cifssmb.c20
-rw-r--r--fs/cifs/connect.c51
-rw-r--r--fs/cifs/file.c16
-rw-r--r--fs/cifs/inode.c2
-rw-r--r--fs/cifs/misc.c32
-rw-r--r--fs/cifs/netmisc.c12
-rw-r--r--fs/cifs/readdir.c24
-rw-r--r--fs/cifs/sess.c7
-rw-r--r--fs/cifs/smb2file.c4
-rw-r--r--fs/cifs/smb2misc.c50
-rw-r--r--fs/cifs/smb2ops.c68
-rw-r--r--fs/cifs/smb2pdu.c5
-rw-r--r--fs/cifs/smb2pdu.h49
-rw-r--r--fs/cifs/smb2transport.c2
-rw-r--r--fs/cifs/transport.c4
-rw-r--r--fs/coda/cache.c2
-rw-r--r--fs/coda/coda_linux.c6
-rw-r--r--fs/coda/coda_linux.h1
-rw-r--r--fs/coda/dir.c16
-rw-r--r--fs/compat.c21
-rw-r--r--fs/configfs/dir.c2
-rw-r--r--fs/dcache.c271
-rw-r--r--fs/debugfs/file.c69
-rw-r--r--fs/debugfs/inode.c2
-rw-r--r--fs/dlm/debug_fs.c263
-rw-r--r--fs/dlm/lock.c76
-rw-r--r--fs/dlm/lock.h3
-rw-r--r--fs/dlm/user.c13
-rw-r--r--fs/drop_caches.c11
-rw-r--r--fs/ecryptfs/crypto.c3
-rw-r--r--fs/ecryptfs/file.c18
-rw-r--r--fs/ecryptfs/keystore.c6
-rw-r--r--fs/ecryptfs/main.c16
-rw-r--r--fs/ecryptfs/mmap.c2
-rw-r--r--fs/efivarfs/file.c4
-rw-r--r--fs/efivarfs/super.c11
-rw-r--r--fs/eventfd.c9
-rw-r--r--fs/eventpoll.c13
-rw-r--r--fs/exec.c114
-rw-r--r--fs/exportfs/expfs.c7
-rw-r--r--fs/ext2/ext2.h3
-rw-r--r--fs/ext2/super.c10
-rw-r--r--fs/ext3/ext3.h4
-rw-r--r--fs/ext3/super.c10
-rw-r--r--fs/ext4/ext4.h45
-rw-r--r--fs/ext4/extents.c219
-rw-r--r--fs/ext4/extents_status.c321
-rw-r--r--fs/ext4/extents_status.h82
-rw-r--r--fs/ext4/inline.c35
-rw-r--r--fs/ext4/inode.c37
-rw-r--r--fs/ext4/ioctl.c2
-rw-r--r--fs/ext4/mballoc.c15
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/move_extent.c12
-rw-r--r--fs/ext4/namei.c1
-rw-r--r--fs/ext4/resize.c30
-rw-r--r--fs/ext4/super.c61
-rw-r--r--fs/f2fs/acl.c148
-rw-r--r--fs/f2fs/acl.h5
-rw-r--r--fs/f2fs/checkpoint.c186
-rw-r--r--fs/f2fs/data.c166
-rw-r--r--fs/f2fs/debug.c15
-rw-r--r--fs/f2fs/dir.c308
-rw-r--r--fs/f2fs/f2fs.h176
-rw-r--r--fs/f2fs/file.c212
-rw-r--r--fs/f2fs/gc.c89
-rw-r--r--fs/f2fs/gc.h5
-rw-r--r--fs/f2fs/inline.c482
-rw-r--r--fs/f2fs/inode.c44
-rw-r--r--fs/f2fs/namei.c58
-rw-r--r--fs/f2fs/node.c163
-rw-r--r--fs/f2fs/node.h8
-rw-r--r--fs/f2fs/recovery.c14
-rw-r--r--fs/f2fs/segment.c122
-rw-r--r--fs/f2fs/segment.h8
-rw-r--r--fs/f2fs/super.c29
-rw-r--r--fs/f2fs/xattr.c6
-rw-r--r--fs/f2fs/xattr.h6
-rw-r--r--fs/fat/dir.c5
-rw-r--r--fs/fat/fat.h1
-rw-r--r--fs/fat/file.c3
-rw-r--r--fs/fat/inode.c12
-rw-r--r--fs/fat/namei_vfat.c20
-rw-r--r--fs/fcntl.c5
-rw-r--r--fs/file.c2
-rw-r--r--fs/fs-writeback.c29
-rw-r--r--fs/fuse/cuse.c2
-rw-r--r--fs/fuse/dev.c29
-rw-r--r--fs/fuse/dir.c542
-rw-r--r--fs/fuse/file.c232
-rw-r--r--fs/fuse/fuse_i.h45
-rw-r--r--fs/fuse/inode.c39
-rw-r--r--fs/gfs2/dir.c39
-rw-r--r--fs/gfs2/export.c8
-rw-r--r--fs/gfs2/file.c83
-rw-r--r--fs/gfs2/glock.c3
-rw-r--r--fs/gfs2/glops.c26
-rw-r--r--fs/gfs2/glops.h2
-rw-r--r--fs/gfs2/incore.h19
-rw-r--r--fs/gfs2/inode.c72
-rw-r--r--fs/gfs2/log.c42
-rw-r--r--fs/gfs2/main.c11
-rw-r--r--fs/gfs2/ops_fstype.c19
-rw-r--r--fs/gfs2/quota.c9
-rw-r--r--fs/gfs2/rgrp.c69
-rw-r--r--fs/gfs2/rgrp.h1
-rw-r--r--fs/gfs2/super.c112
-rw-r--r--fs/gfs2/super.h1
-rw-r--r--fs/gfs2/trans.c17
-rw-r--r--fs/hfs/catalog.c14
-rw-r--r--fs/hfsplus/catalog.c89
-rw-r--r--fs/hfsplus/dir.c11
-rw-r--r--fs/hfsplus/hfsplus_fs.h4
-rw-r--r--fs/hfsplus/super.c4
-rw-r--r--fs/hppfs/hppfs.c5
-rw-r--r--fs/hugetlbfs/inode.c14
-rw-r--r--fs/inode.c16
-rw-r--r--fs/internal.h5
-rw-r--r--fs/ioctl.c8
-rw-r--r--fs/isofs/rock.c9
-rw-r--r--fs/jbd/journal.c3
-rw-r--r--fs/jbd2/journal.c8
-rw-r--r--fs/jffs2/readinode.c2
-rw-r--r--fs/jffs2/summary.c1
-rw-r--r--fs/jfs/jfs_incore.h3
-rw-r--r--fs/jfs/namei.c18
-rw-r--r--fs/jfs/super.c9
-rw-r--r--fs/kernfs/dir.c2
-rw-r--r--fs/kernfs/file.c95
-rw-r--r--fs/libfs.c12
-rw-r--r--fs/lockd/mon.c2
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/lockd/svclock.c2
-rw-r--r--fs/lockd/svcsubs.c2
-rw-r--r--fs/mount.h3
-rw-r--r--fs/namei.c100
-rw-r--r--fs/namespace.c71
-rw-r--r--fs/ncpfs/dir.c12
-rw-r--r--fs/ncpfs/file.c14
-rw-r--r--fs/ncpfs/ioctl.c1
-rw-r--r--fs/ncpfs/mmap.c4
-rw-r--r--fs/ncpfs/ncplib_kernel.h4
-rw-r--r--fs/nfs/blocklayout/blocklayout.c2
-rw-r--r--fs/nfs/blocklayout/rpc_pipefs.c2
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/dir.c6
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c3
-rw-r--r--fs/nfs/fscache.c24
-rw-r--r--fs/nfs/getroot.c4
-rw-r--r--fs/nfs/inode.c9
-rw-r--r--fs/nfs/iostat.h5
-rw-r--r--fs/nfs/nfs42.h2
-rw-r--r--fs/nfs/nfs42proc.c77
-rw-r--r--fs/nfs/nfs42xdr.c139
-rw-r--r--fs/nfs/nfs4_fs.h1
-rw-r--r--fs/nfs/nfs4client.c46
-rw-r--r--fs/nfs/nfs4file.c31
-rw-r--r--fs/nfs/nfs4proc.c12
-rw-r--r--fs/nfs/nfs4xdr.c12
-rw-r--r--fs/nfs/pagelist.c11
-rw-r--r--fs/nfs/read.c2
-rw-r--r--fs/nfs/write.c19
-rw-r--r--fs/nfsd/nfs4callback.c8
-rw-r--r--fs/nfsd/nfs4proc.c57
-rw-r--r--fs/nfsd/nfs4recover.c7
-rw-r--r--fs/nfsd/nfs4state.c74
-rw-r--r--fs/nfsd/nfs4xdr.c36
-rw-r--r--fs/nfsd/nfscache.c4
-rw-r--r--fs/nfsd/nfsctl.c43
-rw-r--r--fs/nfsd/nfsd.h9
-rw-r--r--fs/nfsd/nfsfh.c2
-rw-r--r--fs/nfsd/nfssvc.c2
-rw-r--r--fs/nfsd/state.h19
-rw-r--r--fs/nfsd/vfs.c53
-rw-r--r--fs/nfsd/vfs.h6
-rw-r--r--fs/nfsd/xdr4.h9
-rw-r--r--fs/nilfs2/file.c10
-rw-r--r--fs/nilfs2/inode.c32
-rw-r--r--fs/nilfs2/namei.c15
-rw-r--r--fs/nilfs2/the_nilfs.c3
-rw-r--r--fs/notify/dnotify/dnotify.c4
-rw-r--r--fs/notify/fdinfo.c84
-rw-r--r--fs/notify/fdinfo.h4
-rw-r--r--fs/notify/fsnotify.c8
-rw-r--r--fs/notify/fsnotify.h12
-rw-r--r--fs/notify/inode_mark.c113
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c2
-rw-r--r--fs/notify/inotify/inotify_user.c19
-rw-r--r--fs/notify/mark.c97
-rw-r--r--fs/notify/vfsmount_mark.c109
-rw-r--r--fs/nsfs.c161
-rw-r--r--fs/ntfs/namei.c4
-rw-r--r--fs/ocfs2/alloc.c28
-rw-r--r--fs/ocfs2/alloc.h2
-rw-r--r--fs/ocfs2/aops.c18
-rw-r--r--fs/ocfs2/cluster/heartbeat.c4
-rw-r--r--fs/ocfs2/cluster/tcp.c2
-rw-r--r--fs/ocfs2/dcache.c20
-rw-r--r--fs/ocfs2/dir.c12
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c24
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c23
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c4
-rw-r--r--fs/ocfs2/dlmglue.c40
-rw-r--r--fs/ocfs2/file.c6
-rw-r--r--fs/ocfs2/inode.c3
-rw-r--r--fs/ocfs2/inode.h2
-rw-r--r--fs/ocfs2/journal.c8
-rw-r--r--fs/ocfs2/move_extents.c3
-rw-r--r--fs/ocfs2/namei.c43
-rw-r--r--fs/ocfs2/ocfs2.h6
-rw-r--r--fs/ocfs2/slot_map.c2
-rw-r--r--fs/ocfs2/super.c11
-rw-r--r--fs/ocfs2/xattr.c2
-rw-r--r--fs/open.c20
-rw-r--r--fs/overlayfs/readdir.c8
-rw-r--r--fs/pnode.c1
-rw-r--r--fs/proc/array.c47
-rw-r--r--fs/proc/base.c60
-rw-r--r--fs/proc/fd.c3
-rw-r--r--fs/proc/generic.c163
-rw-r--r--fs/proc/inode.c10
-rw-r--r--fs/proc/internal.h13
-rw-r--r--fs/proc/meminfo.c15
-rw-r--r--fs/proc/namespaces.c153
-rw-r--r--fs/proc/proc_net.c1
-rw-r--r--fs/proc/root.c1
-rw-r--r--fs/proc/stat.c2
-rw-r--r--fs/proc/task_mmu.c107
-rw-r--r--fs/proc_namespace.c16
-rw-r--r--fs/pstore/inode.c22
-rw-r--r--fs/pstore/ram.c36
-rw-r--r--fs/pstore/ram_core.c31
-rw-r--r--fs/quota/dquot.c59
-rw-r--r--fs/quota/quota.c13
-rw-r--r--fs/read_write.c24
-rw-r--r--fs/readdir.c21
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/reiserfs/reiserfs.h4
-rw-r--r--fs/reiserfs/super.c14
-rw-r--r--fs/reiserfs/xattr.c21
-rw-r--r--fs/seq_file.c21
-rw-r--r--fs/signalfd.c4
-rw-r--r--fs/squashfs/Kconfig15
-rw-r--r--fs/squashfs/Makefile1
-rw-r--r--fs/squashfs/decompressor.c7
-rw-r--r--fs/squashfs/decompressor.h4
-rw-r--r--fs/squashfs/lz4_wrapper.c142
-rw-r--r--fs/squashfs/squashfs_fs.h1
-rw-r--r--fs/sync.c2
-rw-r--r--fs/sysfs/file.c59
-rw-r--r--fs/timerfd.c27
-rw-r--r--fs/ubifs/file.c1
-rw-r--r--fs/ubifs/journal.c7
-rw-r--r--fs/udf/dir.c31
-rw-r--r--fs/udf/inode.c14
-rw-r--r--fs/udf/namei.c17
-rw-r--r--fs/udf/super.c11
-rw-r--r--fs/udf/symlink.c57
-rw-r--r--fs/udf/udfdecl.h3
-rw-r--r--fs/udf/unicode.c28
-rw-r--r--fs/xattr.c16
-rw-r--r--fs/xfs/libxfs/xfs_ag.h281
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c1
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h3
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c1
-rw-r--r--fs/xfs/libxfs/xfs_attr.c3
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c2
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c2
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c77
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c3
-rw-r--r--fs/xfs/libxfs/xfs_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c6
-rw-r--r--fs/xfs/libxfs/xfs_da_format.c2
-rw-r--r--fs/xfs/libxfs/xfs_dinode.h243
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c20
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h140
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c11
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c12
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c14
-rw-r--r--fs/xfs/libxfs/xfs_dir2_priv.h140
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c13
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c2
-rw-r--r--fs/xfs/libxfs/xfs_format.h1107
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c43
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h4
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c3
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c4
-rw-r--r--fs/xfs/libxfs/xfs_inum.h60
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h2
-rw-r--r--fs/xfs/libxfs/xfs_log_rlimit.c2
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c3
-rw-r--r--fs/xfs/libxfs/xfs_sb.c2
-rw-r--r--fs/xfs/libxfs/xfs_sb.h584
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c2
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c2
-rw-r--r--fs/xfs/xfs_acl.c2
-rw-r--r--fs/xfs/xfs_acl.h36
-rw-r--r--fs/xfs/xfs_aops.c3
-rw-r--r--fs/xfs/xfs_attr_inactive.c3
-rw-r--r--fs/xfs/xfs_attr_list.c3
-rw-r--r--fs/xfs/xfs_bmap_util.c3
-rw-r--r--fs/xfs/xfs_buf.c27
-rw-r--r--fs/xfs/xfs_buf.h3
-rw-r--r--fs/xfs/xfs_buf_item.c2
-rw-r--r--fs/xfs/xfs_dir2_readdir.c21
-rw-r--r--fs/xfs/xfs_discard.c1
-rw-r--r--fs/xfs/xfs_dquot.c2
-rw-r--r--fs/xfs/xfs_dquot_item.c2
-rw-r--r--fs/xfs/xfs_error.c2
-rw-r--r--fs/xfs/xfs_export.c3
-rw-r--r--fs/xfs/xfs_extent_busy.c1
-rw-r--r--fs/xfs/xfs_extfree_item.c3
-rw-r--r--fs/xfs/xfs_file.c9
-rw-r--r--fs/xfs/xfs_filestream.c3
-rw-r--r--fs/xfs/xfs_fsops.c2
-rw-r--r--fs/xfs/xfs_icache.c4
-rw-r--r--fs/xfs/xfs_icache.h8
-rw-r--r--fs/xfs/xfs_icreate_item.c3
-rw-r--r--fs/xfs/xfs_inode.c29
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--fs/xfs/xfs_inode_item.c3
-rw-r--r--fs/xfs/xfs_ioctl.c3
-rw-r--r--fs/xfs/xfs_ioctl32.c2
-rw-r--r--fs/xfs/xfs_iomap.c18
-rw-r--r--fs/xfs/xfs_iops.c5
-rw-r--r--fs/xfs/xfs_itable.c6
-rw-r--r--fs/xfs/xfs_linux.h6
-rw-r--r--fs/xfs/xfs_log.c8
-rw-r--r--fs/xfs/xfs_log_cil.c3
-rw-r--r--fs/xfs/xfs_log_recover.c4
-rw-r--r--fs/xfs/xfs_message.c3
-rw-r--r--fs/xfs/xfs_mount.c33
-rw-r--r--fs/xfs/xfs_mount.h8
-rw-r--r--fs/xfs/xfs_qm.c14
-rw-r--r--fs/xfs/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/xfs_qm_syscalls.c27
-rw-r--r--fs/xfs/xfs_quotaops.c2
-rw-r--r--fs/xfs/xfs_rtalloc.c3
-rw-r--r--fs/xfs/xfs_super.c20
-rw-r--r--fs/xfs/xfs_symlink.c3
-rw-r--r--fs/xfs/xfs_trace.c2
-rw-r--r--fs/xfs/xfs_trans.c2
-rw-r--r--fs/xfs/xfs_trans_ail.c3
-rw-r--r--fs/xfs/xfs_trans_buf.c137
-rw-r--r--fs/xfs/xfs_trans_dquot.c2
-rw-r--r--fs/xfs/xfs_trans_extfree.c3
-rw-r--r--fs/xfs/xfs_trans_inode.c2
-rw-r--r--fs/xfs/xfs_xattr.c2
404 files changed, 8834 insertions, 6247 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 296482fc77a9..9ee5343d4884 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -832,7 +832,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
* moved b under k and client parallely did a lookup for
* k/b.
*/
- res = d_materialise_unique(dentry, inode);
+ res = d_splice_alias(inode, dentry);
if (!res)
v9fs_fid_add(dentry, fid);
else if (!IS_ERR(res))
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 02b64f4e576a..6054c16b8fae 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -826,8 +826,8 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
struct dentry *dir_dentry;
struct posix_acl *dacl = NULL, *pacl = NULL;
- p9_debug(P9_DEBUG_VFS, " %lu,%s mode: %hx MAJOR: %u MINOR: %u\n",
- dir->i_ino, dentry->d_name.name, omode,
+ p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %hx MAJOR: %u MINOR: %u\n",
+ dir->i_ino, dentry, omode,
MAJOR(rdev), MINOR(rdev));
if (!new_valid_dev(rdev))
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 370b24cee4d8..c055d56ec63d 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -30,6 +30,9 @@ config COMPAT_BINFMT_ELF
config ARCH_BINFMT_ELF_RANDOMIZE_PIE
bool
+config ARCH_BINFMT_ELF_STATE
+ bool
+
config BINFMT_ELF_FDPIC
bool "Kernel support for FDPIC ELF binaries"
default y
diff --git a/fs/Makefile b/fs/Makefile
index da0bbb456d3f..bedff48e8fdc 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o splice.o sync.o utimes.o \
- stack.o fs_struct.o statfs.o fs_pin.o
+ stack.o fs_struct.o statfs.o fs_pin.o nsfs.o
ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o block_dev.o direct-io.o mpage.o
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 9bca88159725..ff44ff3ff015 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -135,8 +135,10 @@ extern void affs_fix_checksum(struct super_block *sb, struct buffer_head *bh);
extern void secs_to_datestamp(time_t secs, struct affs_date *ds);
extern umode_t prot_to_mode(u32 prot);
extern void mode_to_prot(struct inode *inode);
+__printf(3, 4)
extern void affs_error(struct super_block *sb, const char *function,
const char *fmt, ...);
+__printf(3, 4)
extern void affs_warning(struct super_block *sb, const char *function,
const char *fmt, ...);
extern bool affs_nofilenametruncate(const struct dentry *dentry);
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index abc853968fed..c852f2fa1710 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -10,8 +10,6 @@
#include "affs.h"
-static char ErrorBuffer[256];
-
/*
* Functions for accessing Amiga-FFS structures.
*/
@@ -125,7 +123,7 @@ affs_fix_dcache(struct inode *inode, u32 entry_ino)
{
struct dentry *dentry;
spin_lock(&inode->i_lock);
- hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
if (entry_ino == (u32)(long)dentry->d_fsdata) {
dentry->d_fsdata = (void *)inode->i_ino;
break;
@@ -444,30 +442,30 @@ mode_to_prot(struct inode *inode)
void
affs_error(struct super_block *sb, const char *function, const char *fmt, ...)
{
- va_list args;
-
- va_start(args,fmt);
- vsnprintf(ErrorBuffer,sizeof(ErrorBuffer),fmt,args);
- va_end(args);
+ struct va_format vaf;
+ va_list args;
- pr_crit("error (device %s): %s(): %s\n", sb->s_id,
- function,ErrorBuffer);
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ pr_crit("error (device %s): %s(): %pV\n", sb->s_id, function, &vaf);
if (!(sb->s_flags & MS_RDONLY))
pr_warn("Remounting filesystem read-only\n");
sb->s_flags |= MS_RDONLY;
+ va_end(args);
}
void
affs_warning(struct super_block *sb, const char *function, const char *fmt, ...)
{
- va_list args;
+ struct va_format vaf;
+ va_list args;
- va_start(args,fmt);
- vsnprintf(ErrorBuffer,sizeof(ErrorBuffer),fmt,args);
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ pr_warn("(device %s): %s(): %pV\n", sb->s_id, function, &vaf);
va_end(args);
-
- pr_warn("(device %s): %s(): %s\n", sb->s_id,
- function,ErrorBuffer);
}
bool
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 1ed590aafecf..8faa6593ca6d 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -12,35 +12,10 @@
* affs regular file handling primitives
*/
+#include <linux/aio.h>
#include "affs.h"
-#if PAGE_SIZE < 4096
-#error PAGE_SIZE must be at least 4096
-#endif
-
-static int affs_grow_extcache(struct inode *inode, u32 lc_idx);
-static struct buffer_head *affs_alloc_extblock(struct inode *inode, struct buffer_head *bh, u32 ext);
-static inline struct buffer_head *affs_get_extblock(struct inode *inode, u32 ext);
static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext);
-static int affs_file_open(struct inode *inode, struct file *filp);
-static int affs_file_release(struct inode *inode, struct file *filp);
-
-const struct file_operations affs_file_operations = {
- .llseek = generic_file_llseek,
- .read = new_sync_read,
- .read_iter = generic_file_read_iter,
- .write = new_sync_write,
- .write_iter = generic_file_write_iter,
- .mmap = generic_file_mmap,
- .open = affs_file_open,
- .release = affs_file_release,
- .fsync = affs_file_fsync,
- .splice_read = generic_file_splice_read,
-};
-
-const struct inode_operations affs_file_inode_operations = {
- .setattr = affs_notify_change,
-};
static int
affs_file_open(struct inode *inode, struct file *filp)
@@ -355,7 +330,8 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul
/* store new block */
if (bh_result->b_blocknr)
- affs_warning(sb, "get_block", "block already set (%x)", bh_result->b_blocknr);
+ affs_warning(sb, "get_block", "block already set (%lx)",
+ (unsigned long)bh_result->b_blocknr);
AFFS_BLOCK(sb, ext_bh, block) = cpu_to_be32(blocknr);
AFFS_HEAD(ext_bh)->block_count = cpu_to_be32(block + 1);
affs_adjust_checksum(ext_bh, blocknr - bh_result->b_blocknr + 1);
@@ -377,7 +353,8 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul
return 0;
err_big:
- affs_error(inode->i_sb,"get_block","strange block request %d", block);
+ affs_error(inode->i_sb, "get_block", "strange block request %d",
+ (int)block);
return -EIO;
err_ext:
// unlock cache
@@ -412,6 +389,22 @@ static void affs_write_failed(struct address_space *mapping, loff_t to)
}
}
+static ssize_t
+affs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+ loff_t offset)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ size_t count = iov_iter_count(iter);
+ ssize_t ret;
+
+ ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, affs_get_block);
+ if (ret < 0 && (rw & WRITE))
+ affs_write_failed(mapping, offset + count);
+ return ret;
+}
+
static int affs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
@@ -438,6 +431,7 @@ const struct address_space_operations affs_aops = {
.writepage = affs_writepage,
.write_begin = affs_write_begin,
.write_end = generic_write_end,
+ .direct_IO = affs_direct_IO,
.bmap = _affs_bmap
};
@@ -867,8 +861,9 @@ affs_truncate(struct inode *inode)
// lock cache
ext_bh = affs_get_extblock(inode, ext);
if (IS_ERR(ext_bh)) {
- affs_warning(sb, "truncate", "unexpected read error for ext block %u (%d)",
- ext, PTR_ERR(ext_bh));
+ affs_warning(sb, "truncate",
+ "unexpected read error for ext block %u (%ld)",
+ (unsigned int)ext, PTR_ERR(ext_bh));
return;
}
if (AFFS_I(inode)->i_lc) {
@@ -914,8 +909,9 @@ affs_truncate(struct inode *inode)
struct buffer_head *bh = affs_bread_ino(inode, last_blk, 0);
u32 tmp;
if (IS_ERR(bh)) {
- affs_warning(sb, "truncate", "unexpected read error for last block %u (%d)",
- ext, PTR_ERR(bh));
+ affs_warning(sb, "truncate",
+ "unexpected read error for last block %u (%ld)",
+ (unsigned int)ext, PTR_ERR(bh));
return;
}
tmp = be32_to_cpu(AFFS_DATA_HEAD(bh)->next);
@@ -961,3 +957,19 @@ int affs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
mutex_unlock(&inode->i_mutex);
return ret;
}
+const struct file_operations affs_file_operations = {
+ .llseek = generic_file_llseek,
+ .read = new_sync_read,
+ .read_iter = generic_file_read_iter,
+ .write = new_sync_write,
+ .write_iter = generic_file_write_iter,
+ .mmap = generic_file_mmap,
+ .open = affs_file_open,
+ .release = affs_file_release,
+ .fsync = affs_file_fsync,
+ .splice_read = generic_file_splice_read,
+};
+
+const struct inode_operations affs_file_inode_operations = {
+ .setattr = affs_notify_change,
+};
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index e217c511459b..d0609a282e1d 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -348,9 +348,9 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
u32 block = 0;
int retval;
- pr_debug("%s(dir=%u, inode=%u, \"%*s\", type=%d)\n",
+ pr_debug("%s(dir=%u, inode=%u, \"%pd\", type=%d)\n",
__func__, (u32)dir->i_ino,
- (u32)inode->i_ino, (int)dentry->d_name.len, dentry->d_name.name, type);
+ (u32)inode->i_ino, dentry, type);
retval = -EIO;
bh = affs_bread(sb, inode->i_ino);
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 035bd31556fc..bbc38530e924 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -190,8 +190,7 @@ affs_find_entry(struct inode *dir, struct dentry *dentry)
toupper_t toupper = affs_get_toupper(sb);
u32 key;
- pr_debug("%s(\"%.*s\")\n",
- __func__, (int)dentry->d_name.len, dentry->d_name.name);
+ pr_debug("%s(\"%pd\")\n", __func__, dentry);
bh = affs_bread(sb, dir->i_ino);
if (!bh)
@@ -219,8 +218,7 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
struct buffer_head *bh;
struct inode *inode = NULL;
- pr_debug("%s(\"%.*s\")\n",
- __func__, (int)dentry->d_name.len, dentry->d_name.name);
+ pr_debug("%s(\"%pd\")\n", __func__, dentry);
affs_lock_dir(dir);
bh = affs_find_entry(dir, dentry);
@@ -250,9 +248,9 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
int
affs_unlink(struct inode *dir, struct dentry *dentry)
{
- pr_debug("%s(dir=%d, %lu \"%.*s\")\n",
+ pr_debug("%s(dir=%d, %lu \"%pd\")\n",
__func__, (u32)dir->i_ino, dentry->d_inode->i_ino,
- (int)dentry->d_name.len, dentry->d_name.name);
+ dentry);
return affs_remove_header(dentry);
}
@@ -264,9 +262,8 @@ affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
struct inode *inode;
int error;
- pr_debug("%s(%lu,\"%.*s\",0%ho)\n",
- __func__, dir->i_ino, (int)dentry->d_name.len,
- dentry->d_name.name,mode);
+ pr_debug("%s(%lu,\"%pd\",0%ho)\n",
+ __func__, dir->i_ino, dentry, mode);
inode = affs_new_inode(dir);
if (!inode)
@@ -294,9 +291,8 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct inode *inode;
int error;
- pr_debug("%s(%lu,\"%.*s\",0%ho)\n",
- __func__, dir->i_ino, (int)dentry->d_name.len,
- dentry->d_name.name, mode);
+ pr_debug("%s(%lu,\"%pd\",0%ho)\n",
+ __func__, dir->i_ino, dentry, mode);
inode = affs_new_inode(dir);
if (!inode)
@@ -321,9 +317,9 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
int
affs_rmdir(struct inode *dir, struct dentry *dentry)
{
- pr_debug("%s(dir=%u, %lu \"%.*s\")\n",
+ pr_debug("%s(dir=%u, %lu \"%pd\")\n",
__func__, (u32)dir->i_ino, dentry->d_inode->i_ino,
- (int)dentry->d_name.len, dentry->d_name.name);
+ dentry);
return affs_remove_header(dentry);
}
@@ -338,9 +334,8 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
int i, maxlen, error;
char c, lc;
- pr_debug("%s(%lu,\"%.*s\" -> \"%s\")\n",
- __func__, dir->i_ino, (int)dentry->d_name.len,
- dentry->d_name.name, symname);
+ pr_debug("%s(%lu,\"%pd\" -> \"%s\")\n",
+ __func__, dir->i_ino, dentry, symname);
maxlen = AFFS_SB(sb)->s_hashsize * sizeof(u32) - 1;
inode = affs_new_inode(dir);
@@ -409,9 +404,9 @@ affs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
{
struct inode *inode = old_dentry->d_inode;
- pr_debug("%s(%u, %u, \"%.*s\")\n",
+ pr_debug("%s(%u, %u, \"%pd\")\n",
__func__, (u32)inode->i_ino, (u32)dir->i_ino,
- (int)dentry->d_name.len,dentry->d_name.name);
+ dentry);
return affs_add_entry(dir, inode, dentry, ST_LINKFILE);
}
@@ -424,10 +419,9 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct buffer_head *bh = NULL;
int retval;
- pr_debug("%s(old=%u,\"%*s\" to new=%u,\"%*s\")\n",
- __func__, (u32)old_dir->i_ino, (int)old_dentry->d_name.len,
- old_dentry->d_name.name, (u32)new_dir->i_ino,
- (int)new_dentry->d_name.len, new_dentry->d_name.name);
+ pr_debug("%s(old=%u,\"%pd\" to new=%u,\"%pd\")\n",
+ __func__, (u32)old_dir->i_ino, old_dentry,
+ (u32)new_dir->i_ino, new_dentry);
retval = affs_check_name(new_dentry->d_name.name,
new_dentry->d_name.len,
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index a1645b88fe8a..4ec35e9130e1 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -26,7 +26,7 @@ static int afs_readdir(struct file *file, struct dir_context *ctx);
static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
static int afs_d_delete(const struct dentry *dentry);
static void afs_d_release(struct dentry *dentry);
-static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
+static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen,
loff_t fpos, u64 ino, unsigned dtype);
static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool excl);
@@ -391,10 +391,11 @@ static int afs_readdir(struct file *file, struct dir_context *ctx)
* - if afs_dir_iterate_block() spots this function, it'll pass the FID
* uniquifier through dtype
*/
-static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
- loff_t fpos, u64 ino, unsigned dtype)
+static int afs_lookup_filldir(struct dir_context *ctx, const char *name,
+ int nlen, loff_t fpos, u64 ino, unsigned dtype)
{
- struct afs_lookup_cookie *cookie = _cookie;
+ struct afs_lookup_cookie *cookie =
+ container_of(ctx, struct afs_lookup_cookie, ctx);
_enter("{%s,%u},%s,%u,,%llu,%u",
cookie->name.name, cookie->name.len, name, nlen,
@@ -433,7 +434,7 @@ static int afs_do_lookup(struct inode *dir, struct dentry *dentry,
};
int ret;
- _enter("{%lu},%p{%s},", dir->i_ino, dentry, dentry->d_name.name);
+ _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry);
/* search the directory */
ret = afs_dir_iterate(dir, &cookie.ctx, key);
@@ -465,8 +466,8 @@ static struct inode *afs_try_auto_mntpt(
struct afs_vnode *vnode = AFS_FS_I(dir);
struct inode *inode;
- _enter("%d, %p{%s}, {%x:%u}, %p",
- ret, dentry, devname, vnode->fid.vid, vnode->fid.vnode, key);
+ _enter("%d, %p{%pd}, {%x:%u}, %p",
+ ret, dentry, dentry, vnode->fid.vid, vnode->fid.vnode, key);
if (ret != -ENOENT ||
!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags))
@@ -501,8 +502,8 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
vnode = AFS_FS_I(dir);
- _enter("{%x:%u},%p{%s},",
- vnode->fid.vid, vnode->fid.vnode, dentry, dentry->d_name.name);
+ _enter("{%x:%u},%p{%pd},",
+ vnode->fid.vid, vnode->fid.vnode, dentry, dentry);
ASSERTCMP(dentry->d_inode, ==, NULL);
@@ -588,11 +589,11 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
vnode = AFS_FS_I(dentry->d_inode);
if (dentry->d_inode)
- _enter("{v={%x:%u} n=%s fl=%lx},",
- vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
+ _enter("{v={%x:%u} n=%pd fl=%lx},",
+ vnode->fid.vid, vnode->fid.vnode, dentry,
vnode->flags);
else
- _enter("{neg n=%s}", dentry->d_name.name);
+ _enter("{neg n=%pd}", dentry);
key = afs_request_key(AFS_FS_S(dentry->d_sb)->volume->cell);
if (IS_ERR(key))
@@ -607,7 +608,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
afs_validate(dir, key);
if (test_bit(AFS_VNODE_DELETED, &dir->flags)) {
- _debug("%s: parent dir deleted", dentry->d_name.name);
+ _debug("%pd: parent dir deleted", dentry);
goto out_bad;
}
@@ -625,16 +626,16 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
if (!dentry->d_inode)
goto out_bad;
if (is_bad_inode(dentry->d_inode)) {
- printk("kAFS: afs_d_revalidate: %s/%s has bad inode\n",
- parent->d_name.name, dentry->d_name.name);
+ printk("kAFS: afs_d_revalidate: %pd2 has bad inode\n",
+ dentry);
goto out_bad;
}
/* if the vnode ID has changed, then the dirent points to a
* different file */
if (fid.vnode != vnode->fid.vnode) {
- _debug("%s: dirent changed [%u != %u]",
- dentry->d_name.name, fid.vnode,
+ _debug("%pd: dirent changed [%u != %u]",
+ dentry, fid.vnode,
vnode->fid.vnode);
goto not_found;
}
@@ -643,8 +644,8 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
* been deleted and replaced, and the original vnode ID has
* been reused */
if (fid.unique != vnode->fid.unique) {
- _debug("%s: file deleted (uq %u -> %u I:%u)",
- dentry->d_name.name, fid.unique,
+ _debug("%pd: file deleted (uq %u -> %u I:%u)",
+ dentry, fid.unique,
vnode->fid.unique,
dentry->d_inode->i_generation);
spin_lock(&vnode->lock);
@@ -656,14 +657,14 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
case -ENOENT:
/* the filename is unknown */
- _debug("%s: dirent not found", dentry->d_name.name);
+ _debug("%pd: dirent not found", dentry);
if (dentry->d_inode)
goto not_found;
goto out_valid;
default:
- _debug("failed to iterate dir %s: %d",
- parent->d_name.name, ret);
+ _debug("failed to iterate dir %pd: %d",
+ parent, ret);
goto out_bad;
}
@@ -681,8 +682,7 @@ not_found:
spin_unlock(&dentry->d_lock);
out_bad:
- _debug("dropping dentry %s/%s",
- parent->d_name.name, dentry->d_name.name);
+ _debug("dropping dentry %pd2", dentry);
dput(parent);
key_put(key);
@@ -698,7 +698,7 @@ out_bad:
*/
static int afs_d_delete(const struct dentry *dentry)
{
- _enter("%s", dentry->d_name.name);
+ _enter("%pd", dentry);
if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
goto zap;
@@ -721,7 +721,7 @@ zap:
*/
static void afs_d_release(struct dentry *dentry)
{
- _enter("%s", dentry->d_name.name);
+ _enter("%pd", dentry);
}
/*
@@ -740,8 +740,8 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
dvnode = AFS_FS_I(dir);
- _enter("{%x:%u},{%s},%ho",
- dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
+ _enter("{%x:%u},{%pd},%ho",
+ dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
key = afs_request_key(dvnode->volume->cell);
if (IS_ERR(key)) {
@@ -801,8 +801,8 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
dvnode = AFS_FS_I(dir);
- _enter("{%x:%u},{%s}",
- dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name);
+ _enter("{%x:%u},{%pd}",
+ dvnode->fid.vid, dvnode->fid.vnode, dentry);
key = afs_request_key(dvnode->volume->cell);
if (IS_ERR(key)) {
@@ -843,8 +843,8 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
dvnode = AFS_FS_I(dir);
- _enter("{%x:%u},{%s}",
- dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name);
+ _enter("{%x:%u},{%pd}",
+ dvnode->fid.vid, dvnode->fid.vnode, dentry);
ret = -ENAMETOOLONG;
if (dentry->d_name.len >= AFSNAMEMAX)
@@ -917,8 +917,8 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
dvnode = AFS_FS_I(dir);
- _enter("{%x:%u},{%s},%ho,",
- dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
+ _enter("{%x:%u},{%pd},%ho,",
+ dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
key = afs_request_key(dvnode->volume->cell);
if (IS_ERR(key)) {
@@ -980,10 +980,10 @@ static int afs_link(struct dentry *from, struct inode *dir,
vnode = AFS_FS_I(from->d_inode);
dvnode = AFS_FS_I(dir);
- _enter("{%x:%u},{%x:%u},{%s}",
+ _enter("{%x:%u},{%x:%u},{%pd}",
vnode->fid.vid, vnode->fid.vnode,
dvnode->fid.vid, dvnode->fid.vnode,
- dentry->d_name.name);
+ dentry);
key = afs_request_key(dvnode->volume->cell);
if (IS_ERR(key)) {
@@ -1025,8 +1025,8 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
dvnode = AFS_FS_I(dir);
- _enter("{%x:%u},{%s},%s",
- dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name,
+ _enter("{%x:%u},{%pd},%s",
+ dvnode->fid.vid, dvnode->fid.vnode, dentry,
content);
ret = -EINVAL;
@@ -1093,11 +1093,11 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
orig_dvnode = AFS_FS_I(old_dir);
new_dvnode = AFS_FS_I(new_dir);
- _enter("{%x:%u},{%x:%u},{%x:%u},{%s}",
+ _enter("{%x:%u},{%x:%u},{%x:%u},{%pd}",
orig_dvnode->fid.vid, orig_dvnode->fid.vnode,
vnode->fid.vid, vnode->fid.vnode,
new_dvnode->fid.vid, new_dvnode->fid.vnode,
- new_dentry->d_name.name);
+ new_dentry);
key = afs_request_key(orig_dvnode->volume->cell);
if (IS_ERR(key)) {
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 294671288449..8a1d38ef0fc2 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -462,8 +462,8 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
struct key *key;
int ret;
- _enter("{%x:%u},{n=%s},%x",
- vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
+ _enter("{%x:%u},{n=%pd},%x",
+ vnode->fid.vid, vnode->fid.vnode, dentry,
attr->ia_valid);
if (!(attr->ia_valid & (ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID |
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 9682c33d5daf..938c5ab06d5a 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -106,14 +106,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
struct dentry *dentry,
unsigned int flags)
{
- _enter("%p,%p{%p{%s},%s}",
- dir,
- dentry,
- dentry->d_parent,
- dentry->d_parent ?
- dentry->d_parent->d_name.name : (const unsigned char *) "",
- dentry->d_name.name);
-
+ _enter("%p,%p{%pd2}", dir, dentry, dentry);
return ERR_PTR(-EREMOTE);
}
@@ -122,14 +115,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
*/
static int afs_mntpt_open(struct inode *inode, struct file *file)
{
- _enter("%p,%p{%p{%s},%s}",
- inode, file,
- file->f_path.dentry->d_parent,
- file->f_path.dentry->d_parent ?
- file->f_path.dentry->d_parent->d_name.name :
- (const unsigned char *) "",
- file->f_path.dentry->d_name.name);
-
+ _enter("%p,%p{%pD2}", inode, file, file);
return -EREMOTE;
}
@@ -146,7 +132,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
bool rwpath = false;
int ret;
- _enter("{%s}", mntpt->d_name.name);
+ _enter("{%pd}", mntpt);
BUG_ON(!mntpt->d_inode);
@@ -242,7 +228,7 @@ struct vfsmount *afs_d_automount(struct path *path)
{
struct vfsmount *newmnt;
- _enter("{%s}", path->dentry->d_name.name);
+ _enter("{%pd}", path->dentry);
newmnt = afs_mntpt_do_automount(path->dentry);
if (IS_ERR(newmnt))
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 03a3beb17004..06e14bfb3496 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -306,8 +306,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
_debug("- range %u-%u%s",
offset, to, msg->msg_flags ? " [more]" : "");
- msg->msg_iov = (struct iovec *) iov;
- msg->msg_iovlen = 1;
+ iov_iter_init(&msg->msg_iter, WRITE,
+ (struct iovec *) iov, 1, to - offset);
/* have to change the state *before* sending the last
* packet as RxRPC might give us the reply before it
@@ -384,8 +384,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
msg.msg_name = NULL;
msg.msg_namelen = 0;
- msg.msg_iov = (struct iovec *) iov;
- msg.msg_iovlen = 1;
+ iov_iter_init(&msg.msg_iter, WRITE, (struct iovec *)iov, 1,
+ call->request_size);
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = (call->send_pages ? MSG_MORE : 0);
@@ -778,8 +778,7 @@ void afs_send_empty_reply(struct afs_call *call)
iov[0].iov_len = 0;
msg.msg_name = NULL;
msg.msg_namelen = 0;
- msg.msg_iov = iov;
- msg.msg_iovlen = 0;
+ iov_iter_init(&msg.msg_iter, WRITE, iov, 0, 0); /* WTF? */
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
@@ -815,8 +814,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
iov[0].iov_len = len;
msg.msg_name = NULL;
msg.msg_namelen = 0;
- msg.msg_iov = iov;
- msg.msg_iovlen = 1;
+ iov_iter_init(&msg.msg_iter, WRITE, iov, 1, len);
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index ab6adfd52516..c13cb08964ed 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -682,14 +682,13 @@ int afs_writeback_all(struct afs_vnode *vnode)
*/
int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
- struct dentry *dentry = file->f_path.dentry;
- struct inode *inode = file->f_mapping->host;
+ struct inode *inode = file_inode(file);
struct afs_writeback *wb, *xwb;
- struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
+ struct afs_vnode *vnode = AFS_FS_I(inode);
int ret;
- _enter("{%x:%u},{n=%s},%d",
- vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
+ _enter("{%x:%u},{n=%pD},%d",
+ vnode->fid.vid, vnode->fid.vnode, file,
datasync);
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
diff --git a/fs/aio.c b/fs/aio.c
index 84a751005f5b..1b7893ecc296 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -165,6 +165,15 @@ static struct vfsmount *aio_mnt;
static const struct file_operations aio_ring_fops;
static const struct address_space_operations aio_ctx_aops;
+/* Backing dev info for aio fs.
+ * -no dirty page accounting or writeback happens
+ */
+static struct backing_dev_info aio_fs_backing_dev_info = {
+ .name = "aiofs",
+ .state = 0,
+ .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_MAP_COPY,
+};
+
static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
{
struct qstr this = QSTR_INIT("[aio]", 5);
@@ -176,6 +185,7 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
inode->i_mapping->a_ops = &aio_ctx_aops;
inode->i_mapping->private_data = ctx;
+ inode->i_mapping->backing_dev_info = &aio_fs_backing_dev_info;
inode->i_size = PAGE_SIZE * nr_pages;
path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this);
@@ -220,6 +230,9 @@ static int __init aio_setup(void)
if (IS_ERR(aio_mnt))
panic("Failed to create aio fs mount.");
+ if (bdi_init(&aio_fs_backing_dev_info))
+ panic("Failed to init aio fs backing dev info.");
+
kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
@@ -273,19 +286,39 @@ static void aio_free_ring(struct kioctx *ctx)
static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
{
+ vma->vm_flags |= VM_DONTEXPAND;
vma->vm_ops = &generic_file_vm_ops;
return 0;
}
+static void aio_ring_remap(struct file *file, struct vm_area_struct *vma)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ struct kioctx_table *table;
+ int i;
+
+ spin_lock(&mm->ioctx_lock);
+ rcu_read_lock();
+ table = rcu_dereference(mm->ioctx_table);
+ for (i = 0; i < table->nr; i++) {
+ struct kioctx *ctx;
+
+ ctx = table->table[i];
+ if (ctx && ctx->aio_ring_file == file) {
+ ctx->user_id = ctx->mmap_base = vma->vm_start;
+ break;
+ }
+ }
+
+ rcu_read_unlock();
+ spin_unlock(&mm->ioctx_lock);
+}
+
static const struct file_operations aio_ring_fops = {
.mmap = aio_ring_mmap,
+ .mremap = aio_ring_remap,
};
-static int aio_set_page_dirty(struct page *page)
-{
- return 0;
-}
-
#if IS_ENABLED(CONFIG_MIGRATION)
static int aio_migratepage(struct address_space *mapping, struct page *new,
struct page *old, enum migrate_mode mode)
@@ -357,7 +390,7 @@ out:
#endif
static const struct address_space_operations aio_ctx_aops = {
- .set_page_dirty = aio_set_page_dirty,
+ .set_page_dirty = __set_page_dirty_no_writeback,
#if IS_ENABLED(CONFIG_MIGRATION)
.migratepage = aio_migratepage,
#endif
@@ -412,7 +445,6 @@ static int aio_setup_ring(struct kioctx *ctx)
pr_debug("pid(%d) page[%d]->count=%d\n",
current->pid, i, page_count(page));
SetPageUptodate(page);
- SetPageDirty(page);
unlock_page(page);
ctx->ring_pages[i] = page;
@@ -1221,8 +1253,12 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
* the ringbuffer empty. So in practice we should be ok, but it's
* something to be aware of when touching this code.
*/
- wait_event_interruptible_hrtimeout(ctx->wait,
- aio_read_events(ctx, min_nr, nr, event, &ret), until);
+ if (until.tv64 == 0)
+ aio_read_events(ctx, min_nr, nr, event, &ret);
+ else
+ wait_event_interruptible_hrtimeout(ctx->wait,
+ aio_read_events(ctx, min_nr, nr, event, &ret),
+ until);
if (!ret && signal_pending(current))
ret = -EINTR;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 683a5b9ce22a..bfdbaba9c2ba 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -41,8 +41,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
struct path path = {.mnt = mnt, .dentry = dentry};
int status = 1;
- DPRINTK("dentry %p %.*s",
- dentry, (int)dentry->d_name.len, dentry->d_name.name);
+ DPRINTK("dentry %p %pd", dentry, dentry);
path_get(&path);
@@ -85,7 +84,7 @@ static struct dentry *get_next_positive_subdir(struct dentry *prev,
spin_lock(&root->d_lock);
if (prev)
- next = prev->d_u.d_child.next;
+ next = prev->d_child.next;
else {
prev = dget_dlock(root);
next = prev->d_subdirs.next;
@@ -99,13 +98,13 @@ cont:
return NULL;
}
- q = list_entry(next, struct dentry, d_u.d_child);
+ q = list_entry(next, struct dentry, d_child);
spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED);
/* Already gone or negative dentry (under construction) - try next */
if (!d_count(q) || !simple_positive(q)) {
spin_unlock(&q->d_lock);
- next = q->d_u.d_child.next;
+ next = q->d_child.next;
goto cont;
}
dget_dlock(q);
@@ -155,13 +154,13 @@ again:
goto relock;
}
spin_unlock(&p->d_lock);
- next = p->d_u.d_child.next;
+ next = p->d_child.next;
p = parent;
if (next != &parent->d_subdirs)
break;
}
}
- ret = list_entry(next, struct dentry, d_u.d_child);
+ ret = list_entry(next, struct dentry, d_child);
spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED);
/* Negative dentry - try next */
@@ -192,8 +191,7 @@ static int autofs4_direct_busy(struct vfsmount *mnt,
unsigned long timeout,
int do_now)
{
- DPRINTK("top %p %.*s",
- top, (int) top->d_name.len, top->d_name.name);
+ DPRINTK("top %p %pd", top, top);
/* If it's busy update the expiry counters */
if (!may_umount_tree(mnt)) {
@@ -221,8 +219,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
struct autofs_info *top_ino = autofs4_dentry_ino(top);
struct dentry *p;
- DPRINTK("top %p %.*s",
- top, (int)top->d_name.len, top->d_name.name);
+ DPRINTK("top %p %pd", top, top);
/* Negative dentry - give up */
if (!simple_positive(top))
@@ -230,8 +227,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
p = NULL;
while ((p = get_next_positive_dentry(p, top))) {
- DPRINTK("dentry %p %.*s",
- p, (int) p->d_name.len, p->d_name.name);
+ DPRINTK("dentry %p %pd", p, p);
/*
* Is someone visiting anywhere in the subtree ?
@@ -277,13 +273,11 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
{
struct dentry *p;
- DPRINTK("parent %p %.*s",
- parent, (int)parent->d_name.len, parent->d_name.name);
+ DPRINTK("parent %p %pd", parent, parent);
p = NULL;
while ((p = get_next_positive_dentry(p, parent))) {
- DPRINTK("dentry %p %.*s",
- p, (int) p->d_name.len, p->d_name.name);
+ DPRINTK("dentry %p %pd", p, p);
if (d_mountpoint(p)) {
/* Can we umount this guy */
@@ -368,8 +362,7 @@ static struct dentry *should_expire(struct dentry *dentry,
* offset (autofs-5.0+).
*/
if (d_mountpoint(dentry)) {
- DPRINTK("checking mountpoint %p %.*s",
- dentry, (int)dentry->d_name.len, dentry->d_name.name);
+ DPRINTK("checking mountpoint %p %pd", dentry, dentry);
/* Can we umount this guy */
if (autofs4_mount_busy(mnt, dentry))
@@ -382,8 +375,7 @@ static struct dentry *should_expire(struct dentry *dentry,
}
if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)) {
- DPRINTK("checking symlink %p %.*s",
- dentry, (int)dentry->d_name.len, dentry->d_name.name);
+ DPRINTK("checking symlink %p %pd", dentry, dentry);
/*
* A symlink can't be "busy" in the usual sense so
* just check last used for expire timeout.
@@ -479,8 +471,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
return NULL;
found:
- DPRINTK("returning %p %.*s",
- expired, (int)expired->d_name.len, expired->d_name.name);
+ DPRINTK("returning %p %pd", expired, expired);
ino->flags |= AUTOFS_INF_EXPIRING;
smp_mb();
ino->flags &= ~AUTOFS_INF_NO_RCU;
@@ -489,7 +480,7 @@ found:
spin_lock(&sbi->lookup_lock);
spin_lock(&expired->d_parent->d_lock);
spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
- list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
+ list_move(&expired->d_parent->d_subdirs, &expired->d_child);
spin_unlock(&expired->d_lock);
spin_unlock(&expired->d_parent->d_lock);
spin_unlock(&sbi->lookup_lock);
@@ -512,8 +503,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
if (ino->flags & AUTOFS_INF_EXPIRING) {
spin_unlock(&sbi->fs_lock);
- DPRINTK("waiting for expire %p name=%.*s",
- dentry, dentry->d_name.len, dentry->d_name.name);
+ DPRINTK("waiting for expire %p name=%pd", dentry, dentry);
status = autofs4_wait(sbi, dentry, NFY_NONE);
wait_for_completion(&ino->expire_complete);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index d76d083f2f06..dbb5b7212ce1 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -108,8 +108,7 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
struct dentry *dentry = file->f_path.dentry;
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
- DPRINTK("file=%p dentry=%p %.*s",
- file, dentry, dentry->d_name.len, dentry->d_name.name);
+ DPRINTK("file=%p dentry=%p %pD", file, dentry, dentry);
if (autofs4_oz_mode(sbi))
goto out;
@@ -279,8 +278,7 @@ static int autofs4_mount_wait(struct dentry *dentry, bool rcu_walk)
if (ino->flags & AUTOFS_INF_PENDING) {
if (rcu_walk)
return -ECHILD;
- DPRINTK("waiting for mount name=%.*s",
- dentry->d_name.len, dentry->d_name.name);
+ DPRINTK("waiting for mount name=%pd", dentry);
status = autofs4_wait(sbi, dentry, NFY_MOUNT);
DPRINTK("mount wait done status=%d", status);
}
@@ -340,8 +338,7 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
struct autofs_info *ino = autofs4_dentry_ino(dentry);
int status;
- DPRINTK("dentry=%p %.*s",
- dentry, dentry->d_name.len, dentry->d_name.name);
+ DPRINTK("dentry=%p %pd", dentry, dentry);
/* The daemon never triggers a mount. */
if (autofs4_oz_mode(sbi))
@@ -428,8 +425,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
struct autofs_info *ino = autofs4_dentry_ino(dentry);
int status;
- DPRINTK("dentry=%p %.*s",
- dentry, dentry->d_name.len, dentry->d_name.name);
+ DPRINTK("dentry=%p %pd", dentry, dentry);
/* The daemon never waits. */
if (autofs4_oz_mode(sbi)) {
@@ -504,7 +500,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, u
struct autofs_info *ino;
struct dentry *active;
- DPRINTK("name = %.*s", dentry->d_name.len, dentry->d_name.name);
+ DPRINTK("name = %pd", dentry);
/* File name too long to exist */
if (dentry->d_name.len > NAME_MAX)
@@ -558,8 +554,7 @@ static int autofs4_dir_symlink(struct inode *dir,
size_t size = strlen(symname);
char *cp;
- DPRINTK("%s <- %.*s", symname,
- dentry->d_name.len, dentry->d_name.name);
+ DPRINTK("%s <- %pd", symname, dentry);
if (!autofs4_oz_mode(sbi))
return -EACCES;
@@ -687,7 +682,7 @@ static void autofs_clear_leaf_automount_flags(struct dentry *dentry)
/* only consider parents below dentrys in the root */
if (IS_ROOT(parent->d_parent))
return;
- d_child = &dentry->d_u.d_child;
+ d_child = &dentry->d_child;
/* Set parent managed if it's becoming empty */
if (d_child->next == &parent->d_subdirs &&
d_child->prev == &parent->d_subdirs)
@@ -701,8 +696,7 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
struct autofs_info *ino = autofs4_dentry_ino(dentry);
struct autofs_info *p_ino;
- DPRINTK("dentry %p, removing %.*s",
- dentry, dentry->d_name.len, dentry->d_name.name);
+ DPRINTK("dentry %p, removing %pd", dentry, dentry);
if (!autofs4_oz_mode(sbi))
return -EACCES;
@@ -744,8 +738,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t m
if (!autofs4_oz_mode(sbi))
return -EACCES;
- DPRINTK("dentry %p, creating %.*s",
- dentry, dentry->d_name.len, dentry->d_name.name);
+ DPRINTK("dentry %p, creating %pd", dentry, dentry);
BUG_ON(!ino);
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 4cf61ec6b7a8..edf47774b03d 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -172,8 +172,8 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
char *utfname;
const char *name = dentry->d_name.name;
- befs_debug(sb, "---> %s name %s inode %ld", __func__,
- dentry->d_name.name, dir->i_ino);
+ befs_debug(sb, "---> %s name %pd inode %ld", __func__,
+ dentry, dir->i_ino);
/* Convert to UTF-8 */
if (BEFS_SB(sb)->nls) {
@@ -191,8 +191,7 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
}
if (ret == BEFS_BT_NOT_FOUND) {
- befs_debug(sb, "<--- %s %s not found", __func__,
- dentry->d_name.name);
+ befs_debug(sb, "<--- %s %pd not found", __func__, dentry);
return ERR_PTR(-ENOENT);
} else if (ret != BEFS_OK || offset == 0) {
@@ -222,10 +221,9 @@ befs_readdir(struct file *file, struct dir_context *ctx)
size_t keysize;
unsigned char d_type;
char keybuf[BEFS_NAME_LEN + 1];
- const char *dirname = file->f_path.dentry->d_name.name;
- befs_debug(sb, "---> %s name %s, inode %ld, ctx->pos %lld",
- __func__, dirname, inode->i_ino, ctx->pos);
+ befs_debug(sb, "---> %s name %pD, inode %ld, ctx->pos %lld",
+ __func__, file, inode->i_ino, ctx->pos);
more:
result = befs_btree_read(sb, ds, ctx->pos, BEFS_NAME_LEN + 1,
@@ -233,8 +231,8 @@ more:
if (result == BEFS_ERR) {
befs_debug(sb, "<--- %s ERROR", __func__);
- befs_error(sb, "IO error reading %s (inode %lu)",
- dirname, inode->i_ino);
+ befs_error(sb, "IO error reading %pD (inode %lu)",
+ file, inode->i_ino);
return -EIO;
} else if (result == BEFS_BT_END) {
@@ -271,10 +269,6 @@ more:
}
ctx->pos++;
goto more;
-
- befs_debug(sb, "<--- %s pos %lld", __func__, ctx->pos);
-
- return 0;
}
static struct inode *
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 929dec08c348..4c556680fa74 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -292,8 +292,8 @@ static int load_aout_binary(struct linux_binprm * bprm)
if ((fd_offset & ~PAGE_MASK) != 0 && printk_ratelimit())
{
printk(KERN_WARNING
- "fd_offset is not page aligned. Please convert program: %s\n",
- bprm->file->f_path.dentry->d_name.name);
+ "fd_offset is not page aligned. Please convert program: %pD\n",
+ bprm->file);
}
if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
@@ -375,8 +375,8 @@ static int load_aout_library(struct file *file)
if (printk_ratelimit())
{
printk(KERN_WARNING
- "N_TXTOFF is not page aligned. Please convert library: %s\n",
- file->f_path.dentry->d_name.name);
+ "N_TXTOFF is not page aligned. Please convert library: %pD\n",
+ file);
}
vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d8fc0605b9d2..02b16910f4c9 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -386,6 +386,127 @@ static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
ELF_PAGESTART(cmds[first_idx].p_vaddr);
}
+/**
+ * load_elf_phdrs() - load ELF program headers
+ * @elf_ex: ELF header of the binary whose program headers should be loaded
+ * @elf_file: the opened ELF binary file
+ *
+ * Loads ELF program headers from the binary file elf_file, which has the ELF
+ * header pointed to by elf_ex, into a newly allocated array. The caller is
+ * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
+ */
+static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
+ struct file *elf_file)
+{
+ struct elf_phdr *elf_phdata = NULL;
+ int retval, size, err = -1;
+
+ /*
+ * If the size of this structure has changed, then punt, since
+ * we will be doing the wrong thing.
+ */
+ if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
+ goto out;
+
+ /* Sanity check the number of program headers... */
+ if (elf_ex->e_phnum < 1 ||
+ elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
+ goto out;
+
+ /* ...and their total size. */
+ size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
+ if (size > ELF_MIN_ALIGN)
+ goto out;
+
+ elf_phdata = kmalloc(size, GFP_KERNEL);
+ if (!elf_phdata)
+ goto out;
+
+ /* Read in the program headers */
+ retval = kernel_read(elf_file, elf_ex->e_phoff,
+ (char *)elf_phdata, size);
+ if (retval != size) {
+ err = (retval < 0) ? retval : -EIO;
+ goto out;
+ }
+
+ /* Success! */
+ err = 0;
+out:
+ if (err) {
+ kfree(elf_phdata);
+ elf_phdata = NULL;
+ }
+ return elf_phdata;
+}
+
+#ifndef CONFIG_ARCH_BINFMT_ELF_STATE
+
+/**
+ * struct arch_elf_state - arch-specific ELF loading state
+ *
+ * This structure is used to preserve architecture specific data during
+ * the loading of an ELF file, throughout the checking of architecture
+ * specific ELF headers & through to the point where the ELF load is
+ * known to be proceeding (ie. SET_PERSONALITY).
+ *
+ * This implementation is a dummy for architectures which require no
+ * specific state.
+ */
+struct arch_elf_state {
+};
+
+#define INIT_ARCH_ELF_STATE {}
+
+/**
+ * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
+ * @ehdr: The main ELF header
+ * @phdr: The program header to check
+ * @elf: The open ELF file
+ * @is_interp: True if the phdr is from the interpreter of the ELF being
+ * loaded, else false.
+ * @state: Architecture-specific state preserved throughout the process
+ * of loading the ELF.
+ *
+ * Inspects the program header phdr to validate its correctness and/or
+ * suitability for the system. Called once per ELF program header in the
+ * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
+ * interpreter.
+ *
+ * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
+ * with that return code.
+ */
+static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
+ struct elf_phdr *phdr,
+ struct file *elf, bool is_interp,
+ struct arch_elf_state *state)
+{
+ /* Dummy implementation, always proceed */
+ return 0;
+}
+
+/**
+ * arch_check_elf() - check a PT_LOPROC..PT_HIPROC ELF program header
+ * @ehdr: The main ELF header
+ * @has_interp: True if the ELF has an interpreter, else false.
+ * @state: Architecture-specific state preserved throughout the process
+ * of loading the ELF.
+ *
+ * Provides a final opportunity for architecture code to reject the loading
+ * of the ELF & cause an exec syscall to return an error. This is called after
+ * all program headers to be checked by arch_elf_pt_proc have been.
+ *
+ * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
+ * with that return code.
+ */
+static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
+ struct arch_elf_state *state)
+{
+ /* Dummy implementation, always proceed */
+ return 0;
+}
+
+#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
/* This is much more generalized than the library routine read function,
so we keep this separate. Technically the library read function
@@ -394,16 +515,15 @@ static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
struct file *interpreter, unsigned long *interp_map_addr,
- unsigned long no_base)
+ unsigned long no_base, struct elf_phdr *interp_elf_phdata)
{
- struct elf_phdr *elf_phdata;
struct elf_phdr *eppnt;
unsigned long load_addr = 0;
int load_addr_set = 0;
unsigned long last_bss = 0, elf_bss = 0;
unsigned long error = ~0UL;
unsigned long total_size;
- int retval, i, size;
+ int i;
/* First of all, some simple consistency checks */
if (interp_elf_ex->e_type != ET_EXEC &&
@@ -414,40 +534,14 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
if (!interpreter->f_op->mmap)
goto out;
- /*
- * If the size of this structure has changed, then punt, since
- * we will be doing the wrong thing.
- */
- if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
- goto out;
- if (interp_elf_ex->e_phnum < 1 ||
- interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
- goto out;
-
- /* Now read in all of the header information */
- size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
- if (size > ELF_MIN_ALIGN)
- goto out;
- elf_phdata = kmalloc(size, GFP_KERNEL);
- if (!elf_phdata)
- goto out;
-
- retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
- (char *)elf_phdata, size);
- error = -EIO;
- if (retval != size) {
- if (retval < 0)
- error = retval;
- goto out_close;
- }
-
- total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
+ total_size = total_mapping_size(interp_elf_phdata,
+ interp_elf_ex->e_phnum);
if (!total_size) {
error = -EINVAL;
- goto out_close;
+ goto out;
}
- eppnt = elf_phdata;
+ eppnt = interp_elf_phdata;
for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
if (eppnt->p_type == PT_LOAD) {
int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
@@ -474,7 +568,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
*interp_map_addr = map_addr;
error = map_addr;
if (BAD_ADDR(map_addr))
- goto out_close;
+ goto out;
if (!load_addr_set &&
interp_elf_ex->e_type == ET_DYN) {
@@ -493,7 +587,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
eppnt->p_memsz > TASK_SIZE ||
TASK_SIZE - eppnt->p_memsz < k) {
error = -ENOMEM;
- goto out_close;
+ goto out;
}
/*
@@ -523,7 +617,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
*/
if (padzero(elf_bss)) {
error = -EFAULT;
- goto out_close;
+ goto out;
}
/* What we have mapped so far */
@@ -532,13 +626,10 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
/* Map the last of the bss segment */
error = vm_brk(elf_bss, last_bss - elf_bss);
if (BAD_ADDR(error))
- goto out_close;
+ goto out;
}
error = load_addr;
-
-out_close:
- kfree(elf_phdata);
out:
return error;
}
@@ -575,10 +666,9 @@ static int load_elf_binary(struct linux_binprm *bprm)
int load_addr_set = 0;
char * elf_interpreter = NULL;
unsigned long error;
- struct elf_phdr *elf_ppnt, *elf_phdata;
+ struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
unsigned long elf_bss, elf_brk;
int retval, i;
- unsigned int size;
unsigned long elf_entry;
unsigned long interp_load_addr = 0;
unsigned long start_code, end_code, start_data, end_data;
@@ -589,6 +679,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
struct elfhdr elf_ex;
struct elfhdr interp_elf_ex;
} *loc;
+ struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
loc = kmalloc(sizeof(*loc), GFP_KERNEL);
if (!loc) {
@@ -611,26 +702,10 @@ static int load_elf_binary(struct linux_binprm *bprm)
if (!bprm->file->f_op->mmap)
goto out;
- /* Now read in all of the header information */
- if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
- goto out;
- if (loc->elf_ex.e_phnum < 1 ||
- loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
- goto out;
- size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
- retval = -ENOMEM;
- elf_phdata = kmalloc(size, GFP_KERNEL);
+ elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
if (!elf_phdata)
goto out;
- retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
- (char *)elf_phdata, size);
- if (retval != size) {
- if (retval >= 0)
- retval = -EIO;
- goto out_free_ph;
- }
-
elf_ppnt = elf_phdata;
elf_bss = 0;
elf_brk = 0;
@@ -699,12 +774,21 @@ static int load_elf_binary(struct linux_binprm *bprm)
elf_ppnt = elf_phdata;
for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
- if (elf_ppnt->p_type == PT_GNU_STACK) {
+ switch (elf_ppnt->p_type) {
+ case PT_GNU_STACK:
if (elf_ppnt->p_flags & PF_X)
executable_stack = EXSTACK_ENABLE_X;
else
executable_stack = EXSTACK_DISABLE_X;
break;
+
+ case PT_LOPROC ... PT_HIPROC:
+ retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
+ bprm->file, false,
+ &arch_state);
+ if (retval)
+ goto out_free_dentry;
+ break;
}
/* Some simple consistency checks for the interpreter */
@@ -716,8 +800,36 @@ static int load_elf_binary(struct linux_binprm *bprm)
/* Verify the interpreter has a valid arch */
if (!elf_check_arch(&loc->interp_elf_ex))
goto out_free_dentry;
+
+ /* Load the interpreter program headers */
+ interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
+ interpreter);
+ if (!interp_elf_phdata)
+ goto out_free_dentry;
+
+ /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
+ elf_ppnt = interp_elf_phdata;
+ for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
+ switch (elf_ppnt->p_type) {
+ case PT_LOPROC ... PT_HIPROC:
+ retval = arch_elf_pt_proc(&loc->interp_elf_ex,
+ elf_ppnt, interpreter,
+ true, &arch_state);
+ if (retval)
+ goto out_free_dentry;
+ break;
+ }
}
+ /*
+ * Allow arch code to reject the ELF at this point, whilst it's
+ * still possible to return an error to the code that invoked
+ * the exec syscall.
+ */
+ retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
+ if (retval)
+ goto out_free_dentry;
+
/* Flush all traces of the currently running executable */
retval = flush_old_exec(bprm);
if (retval)
@@ -725,7 +837,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
/* Do this immediately, since STACK_TOP as used in setup_arg_pages
may depend on the personality. */
- SET_PERSONALITY(loc->elf_ex);
+ SET_PERSONALITY2(loc->elf_ex, &arch_state);
if (elf_read_implies_exec(loc->elf_ex, executable_stack))
current->personality |= READ_IMPLIES_EXEC;
@@ -890,7 +1002,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
elf_entry = load_elf_interp(&loc->interp_elf_ex,
interpreter,
&interp_map_addr,
- load_bias);
+ load_bias, interp_elf_phdata);
if (!IS_ERR((void *)elf_entry)) {
/*
* load_elf_interp() returns relocation
@@ -917,6 +1029,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
}
}
+ kfree(interp_elf_phdata);
kfree(elf_phdata);
set_binfmt(&elf_format);
@@ -981,6 +1094,7 @@ out_ret:
/* error cleanup */
out_free_dentry:
+ kfree(interp_elf_phdata);
allow_write_access(interpreter);
if (interpreter)
fput(interpreter);
@@ -1994,18 +2108,6 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
shdr4extnum->sh_info = segs;
}
-static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
- unsigned long mm_flags)
-{
- struct vm_area_struct *vma;
- size_t size = 0;
-
- for (vma = first_vma(current, gate_vma); vma != NULL;
- vma = next_vma(vma, gate_vma))
- size += vma_dump_size(vma, mm_flags);
- return size;
-}
-
/*
* Actual dumper
*
@@ -2017,7 +2119,8 @@ static int elf_core_dump(struct coredump_params *cprm)
{
int has_dumped = 0;
mm_segment_t fs;
- int segs;
+ int segs, i;
+ size_t vma_data_size = 0;
struct vm_area_struct *vma, *gate_vma;
struct elfhdr *elf = NULL;
loff_t offset = 0, dataoff;
@@ -2026,6 +2129,7 @@ static int elf_core_dump(struct coredump_params *cprm)
struct elf_shdr *shdr4extnum = NULL;
Elf_Half e_phnum;
elf_addr_t e_shoff;
+ elf_addr_t *vma_filesz = NULL;
/*
* We no longer stop all VM operations.
@@ -2093,7 +2197,20 @@ static int elf_core_dump(struct coredump_params *cprm)
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
- offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
+ vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
+ if (!vma_filesz)
+ goto end_coredump;
+
+ for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
+ vma = next_vma(vma, gate_vma)) {
+ unsigned long dump_size;
+
+ dump_size = vma_dump_size(vma, cprm->mm_flags);
+ vma_filesz[i++] = dump_size;
+ vma_data_size += dump_size;
+ }
+
+ offset += vma_data_size;
offset += elf_core_extra_data_size();
e_shoff = offset;
@@ -2113,7 +2230,7 @@ static int elf_core_dump(struct coredump_params *cprm)
goto end_coredump;
/* Write program headers for segments dump */
- for (vma = first_vma(current, gate_vma); vma != NULL;
+ for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
vma = next_vma(vma, gate_vma)) {
struct elf_phdr phdr;
@@ -2121,7 +2238,7 @@ static int elf_core_dump(struct coredump_params *cprm)
phdr.p_offset = offset;
phdr.p_vaddr = vma->vm_start;
phdr.p_paddr = 0;
- phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
+ phdr.p_filesz = vma_filesz[i++];
phdr.p_memsz = vma->vm_end - vma->vm_start;
offset += phdr.p_filesz;
phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
@@ -2149,12 +2266,12 @@ static int elf_core_dump(struct coredump_params *cprm)
if (!dump_skip(cprm, dataoff - cprm->written))
goto end_coredump;
- for (vma = first_vma(current, gate_vma); vma != NULL;
+ for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
vma = next_vma(vma, gate_vma)) {
unsigned long addr;
unsigned long end;
- end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
+ end = vma->vm_start + vma_filesz[i++];
for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
struct page *page;
@@ -2187,6 +2304,7 @@ end_coredump:
cleanup:
free_note_info(&info);
kfree(shdr4extnum);
+ kfree(vma_filesz);
kfree(phdr4note);
kfree(elf);
out:
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index f37b08cea1f7..490538536cb4 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -42,6 +42,10 @@ static int load_em86(struct linux_binprm *bprm)
return -ENOEXEC;
}
+ /* Need to be able to load the file after exec */
+ if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
+ return -ENOENT;
+
allow_write_access(bprm->file);
fput(bprm->file);
bprm->file = NULL;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index fd8beb9657a2..97aff2879cda 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -1,21 +1,14 @@
/*
- * binfmt_misc.c
+ * binfmt_misc.c
*
- * Copyright (C) 1997 Richard Günther
+ * Copyright (C) 1997 Richard Günther
*
- * binfmt_misc detects binaries via a magic or filename extension and invokes
- * a specified wrapper. This should obsolete binfmt_java, binfmt_em86 and
- * binfmt_mz.
- *
- * 1997-04-25 first version
- * [...]
- * 1997-05-19 cleanup
- * 1997-06-26 hpa: pass the real filename rather than argv[0]
- * 1997-06-30 minor cleanup
- * 1997-08-09 removed extension stripping, locking cleanup
- * 2001-02-28 AV: rewritten into something that resembles C. Original didn't.
+ * binfmt_misc detects binaries via a magic or filename extension and invokes
+ * a specified wrapper. See Documentation/binfmt_misc.txt for more details.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sched.h>
@@ -30,8 +23,13 @@
#include <linux/mount.h>
#include <linux/syscalls.h>
#include <linux/fs.h>
+#include <linux/uaccess.h>
-#include <asm/uaccess.h>
+#ifdef DEBUG
+# define USE_DEBUG 1
+#else
+# define USE_DEBUG 0
+#endif
enum {
VERBOSE_STATUS = 1 /* make it zero to save 400 bytes kernel memory */
@@ -41,9 +39,9 @@ static LIST_HEAD(entries);
static int enabled = 1;
enum {Enabled, Magic};
-#define MISC_FMT_PRESERVE_ARGV0 (1<<31)
-#define MISC_FMT_OPEN_BINARY (1<<30)
-#define MISC_FMT_CREDENTIALS (1<<29)
+#define MISC_FMT_PRESERVE_ARGV0 (1 << 31)
+#define MISC_FMT_OPEN_BINARY (1 << 30)
+#define MISC_FMT_CREDENTIALS (1 << 29)
typedef struct {
struct list_head list;
@@ -87,20 +85,24 @@ static Node *check_file(struct linux_binprm *bprm)
char *p = strrchr(bprm->interp, '.');
struct list_head *l;
+ /* Walk all the registered handlers. */
list_for_each(l, &entries) {
Node *e = list_entry(l, Node, list);
char *s;
int j;
+ /* Make sure this one is currently enabled. */
if (!test_bit(Enabled, &e->flags))
continue;
+ /* Do matching based on extension if applicable. */
if (!test_bit(Magic, &e->flags)) {
if (p && !strcmp(e->magic, p + 1))
return e;
continue;
}
+ /* Do matching based on magic & mask. */
s = bprm->buf + e->offset;
if (e->mask) {
for (j = 0; j < e->size; j++)
@@ -123,7 +125,7 @@ static Node *check_file(struct linux_binprm *bprm)
static int load_misc_binary(struct linux_binprm *bprm)
{
Node *fmt;
- struct file * interp_file = NULL;
+ struct file *interp_file = NULL;
char iname[BINPRM_BUF_SIZE];
const char *iname_addr = iname;
int retval;
@@ -131,7 +133,7 @@ static int load_misc_binary(struct linux_binprm *bprm)
retval = -ENOEXEC;
if (!enabled)
- goto _ret;
+ goto ret;
/* to keep locking time low, we copy the interpreter string */
read_lock(&entries_lock);
@@ -140,25 +142,30 @@ static int load_misc_binary(struct linux_binprm *bprm)
strlcpy(iname, fmt->interpreter, BINPRM_BUF_SIZE);
read_unlock(&entries_lock);
if (!fmt)
- goto _ret;
+ goto ret;
+
+ /* Need to be able to load the file after exec */
+ if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
+ return -ENOENT;
if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) {
retval = remove_arg_zero(bprm);
if (retval)
- goto _ret;
+ goto ret;
}
if (fmt->flags & MISC_FMT_OPEN_BINARY) {
/* if the binary should be opened on behalf of the
* interpreter than keep it open and assign descriptor
- * to it */
- fd_binary = get_unused_fd();
- if (fd_binary < 0) {
- retval = fd_binary;
- goto _ret;
- }
- fd_install(fd_binary, bprm->file);
+ * to it
+ */
+ fd_binary = get_unused_fd_flags(0);
+ if (fd_binary < 0) {
+ retval = fd_binary;
+ goto ret;
+ }
+ fd_install(fd_binary, bprm->file);
/* if the binary is not readable than enforce mm->dumpable=0
regardless of the interpreter's permissions */
@@ -171,32 +178,32 @@ static int load_misc_binary(struct linux_binprm *bprm)
bprm->interp_flags |= BINPRM_FLAGS_EXECFD;
bprm->interp_data = fd_binary;
- } else {
- allow_write_access(bprm->file);
- fput(bprm->file);
- bprm->file = NULL;
- }
+ } else {
+ allow_write_access(bprm->file);
+ fput(bprm->file);
+ bprm->file = NULL;
+ }
/* make argv[1] be the path to the binary */
- retval = copy_strings_kernel (1, &bprm->interp, bprm);
+ retval = copy_strings_kernel(1, &bprm->interp, bprm);
if (retval < 0)
- goto _error;
+ goto error;
bprm->argc++;
/* add the interp as argv[0] */
- retval = copy_strings_kernel (1, &iname_addr, bprm);
+ retval = copy_strings_kernel(1, &iname_addr, bprm);
if (retval < 0)
- goto _error;
- bprm->argc ++;
+ goto error;
+ bprm->argc++;
/* Update interp in case binfmt_script needs it. */
retval = bprm_change_interp(iname, bprm);
if (retval < 0)
- goto _error;
+ goto error;
- interp_file = open_exec (iname);
- retval = PTR_ERR (interp_file);
- if (IS_ERR (interp_file))
- goto _error;
+ interp_file = open_exec(iname);
+ retval = PTR_ERR(interp_file);
+ if (IS_ERR(interp_file))
+ goto error;
bprm->file = interp_file;
if (fmt->flags & MISC_FMT_CREDENTIALS) {
@@ -207,23 +214,23 @@ static int load_misc_binary(struct linux_binprm *bprm)
memset(bprm->buf, 0, BINPRM_BUF_SIZE);
retval = kernel_read(bprm->file, 0, bprm->buf, BINPRM_BUF_SIZE);
} else
- retval = prepare_binprm (bprm);
+ retval = prepare_binprm(bprm);
if (retval < 0)
- goto _error;
+ goto error;
retval = search_binary_handler(bprm);
if (retval < 0)
- goto _error;
+ goto error;
-_ret:
+ret:
return retval;
-_error:
+error:
if (fd_binary > 0)
sys_close(fd_binary);
bprm->interp_flags = 0;
bprm->interp_data = 0;
- goto _ret;
+ goto ret;
}
/* Command parsers */
@@ -247,39 +254,44 @@ static char *scanarg(char *s, char del)
return NULL;
}
}
+ s[-1] ='\0';
return s;
}
-static char * check_special_flags (char * sfs, Node * e)
+static char *check_special_flags(char *sfs, Node *e)
{
- char * p = sfs;
+ char *p = sfs;
int cont = 1;
/* special flags */
while (cont) {
switch (*p) {
- case 'P':
- p++;
- e->flags |= MISC_FMT_PRESERVE_ARGV0;
- break;
- case 'O':
- p++;
- e->flags |= MISC_FMT_OPEN_BINARY;
- break;
- case 'C':
- p++;
- /* this flags also implies the
- open-binary flag */
- e->flags |= (MISC_FMT_CREDENTIALS |
- MISC_FMT_OPEN_BINARY);
- break;
- default:
- cont = 0;
+ case 'P':
+ pr_debug("register: flag: P (preserve argv0)\n");
+ p++;
+ e->flags |= MISC_FMT_PRESERVE_ARGV0;
+ break;
+ case 'O':
+ pr_debug("register: flag: O (open binary)\n");
+ p++;
+ e->flags |= MISC_FMT_OPEN_BINARY;
+ break;
+ case 'C':
+ pr_debug("register: flag: C (preserve creds)\n");
+ p++;
+ /* this flags also implies the
+ open-binary flag */
+ e->flags |= (MISC_FMT_CREDENTIALS |
+ MISC_FMT_OPEN_BINARY);
+ break;
+ default:
+ cont = 0;
}
}
return p;
}
+
/*
* This registers a new binary format, it recognises the syntax
* ':name:type:offset:magic:mask:interpreter:flags'
@@ -292,6 +304,8 @@ static Node *create_entry(const char __user *buffer, size_t count)
char *buf, *p;
char del;
+ pr_debug("register: received %zu bytes\n", count);
+
/* some sanity checks */
err = -EINVAL;
if ((count < 11) || (count > MAX_REGISTER_LENGTH))
@@ -299,7 +313,7 @@ static Node *create_entry(const char __user *buffer, size_t count)
err = -ENOMEM;
memsize = sizeof(Node) + count + 8;
- e = kmalloc(memsize, GFP_USER);
+ e = kmalloc(memsize, GFP_KERNEL);
if (!e)
goto out;
@@ -307,98 +321,173 @@ static Node *create_entry(const char __user *buffer, size_t count)
memset(e, 0, sizeof(Node));
if (copy_from_user(buf, buffer, count))
- goto Efault;
+ goto efault;
del = *p++; /* delimeter */
- memset(buf+count, del, 8);
+ pr_debug("register: delim: %#x {%c}\n", del, del);
+
+ /* Pad the buffer with the delim to simplify parsing below. */
+ memset(buf + count, del, 8);
+ /* Parse the 'name' field. */
e->name = p;
p = strchr(p, del);
if (!p)
- goto Einval;
+ goto einval;
*p++ = '\0';
if (!e->name[0] ||
!strcmp(e->name, ".") ||
!strcmp(e->name, "..") ||
strchr(e->name, '/'))
- goto Einval;
+ goto einval;
+
+ pr_debug("register: name: {%s}\n", e->name);
+
+ /* Parse the 'type' field. */
switch (*p++) {
- case 'E': e->flags = 1<<Enabled; break;
- case 'M': e->flags = (1<<Enabled) | (1<<Magic); break;
- default: goto Einval;
+ case 'E':
+ pr_debug("register: type: E (extension)\n");
+ e->flags = 1 << Enabled;
+ break;
+ case 'M':
+ pr_debug("register: type: M (magic)\n");
+ e->flags = (1 << Enabled) | (1 << Magic);
+ break;
+ default:
+ goto einval;
}
if (*p++ != del)
- goto Einval;
+ goto einval;
+
if (test_bit(Magic, &e->flags)) {
- char *s = strchr(p, del);
+ /* Handle the 'M' (magic) format. */
+ char *s;
+
+ /* Parse the 'offset' field. */
+ s = strchr(p, del);
if (!s)
- goto Einval;
+ goto einval;
*s++ = '\0';
e->offset = simple_strtoul(p, &p, 10);
if (*p++)
- goto Einval;
+ goto einval;
+ pr_debug("register: offset: %#x\n", e->offset);
+
+ /* Parse the 'magic' field. */
e->magic = p;
p = scanarg(p, del);
if (!p)
- goto Einval;
- p[-1] = '\0';
+ goto einval;
if (!e->magic[0])
- goto Einval;
+ goto einval;
+ if (USE_DEBUG)
+ print_hex_dump_bytes(
+ KBUILD_MODNAME ": register: magic[raw]: ",
+ DUMP_PREFIX_NONE, e->magic, p - e->magic);
+
+ /* Parse the 'mask' field. */
e->mask = p;
p = scanarg(p, del);
if (!p)
- goto Einval;
- p[-1] = '\0';
- if (!e->mask[0])
+ goto einval;
+ if (!e->mask[0]) {
e->mask = NULL;
+ pr_debug("register: mask[raw]: none\n");
+ } else if (USE_DEBUG)
+ print_hex_dump_bytes(
+ KBUILD_MODNAME ": register: mask[raw]: ",
+ DUMP_PREFIX_NONE, e->mask, p - e->mask);
+
+ /*
+ * Decode the magic & mask fields.
+ * Note: while we might have accepted embedded NUL bytes from
+ * above, the unescape helpers here will stop at the first one
+ * it encounters.
+ */
e->size = string_unescape_inplace(e->magic, UNESCAPE_HEX);
if (e->mask &&
string_unescape_inplace(e->mask, UNESCAPE_HEX) != e->size)
- goto Einval;
+ goto einval;
if (e->size + e->offset > BINPRM_BUF_SIZE)
- goto Einval;
+ goto einval;
+ pr_debug("register: magic/mask length: %i\n", e->size);
+ if (USE_DEBUG) {
+ print_hex_dump_bytes(
+ KBUILD_MODNAME ": register: magic[decoded]: ",
+ DUMP_PREFIX_NONE, e->magic, e->size);
+
+ if (e->mask) {
+ int i;
+ char *masked = kmalloc(e->size, GFP_KERNEL);
+
+ print_hex_dump_bytes(
+ KBUILD_MODNAME ": register: mask[decoded]: ",
+ DUMP_PREFIX_NONE, e->mask, e->size);
+
+ if (masked) {
+ for (i = 0; i < e->size; ++i)
+ masked[i] = e->magic[i] & e->mask[i];
+ print_hex_dump_bytes(
+ KBUILD_MODNAME ": register: magic[masked]: ",
+ DUMP_PREFIX_NONE, masked, e->size);
+
+ kfree(masked);
+ }
+ }
+ }
} else {
+ /* Handle the 'E' (extension) format. */
+
+ /* Skip the 'offset' field. */
p = strchr(p, del);
if (!p)
- goto Einval;
+ goto einval;
*p++ = '\0';
+
+ /* Parse the 'magic' field. */
e->magic = p;
p = strchr(p, del);
if (!p)
- goto Einval;
+ goto einval;
*p++ = '\0';
if (!e->magic[0] || strchr(e->magic, '/'))
- goto Einval;
+ goto einval;
+ pr_debug("register: extension: {%s}\n", e->magic);
+
+ /* Skip the 'mask' field. */
p = strchr(p, del);
if (!p)
- goto Einval;
+ goto einval;
*p++ = '\0';
}
+
+ /* Parse the 'interpreter' field. */
e->interpreter = p;
p = strchr(p, del);
if (!p)
- goto Einval;
+ goto einval;
*p++ = '\0';
if (!e->interpreter[0])
- goto Einval;
-
-
- p = check_special_flags (p, e);
+ goto einval;
+ pr_debug("register: interpreter: {%s}\n", e->interpreter);
+ /* Parse the 'flags' field. */
+ p = check_special_flags(p, e);
if (*p == '\n')
p++;
if (p != buf + count)
- goto Einval;
+ goto einval;
+
return e;
out:
return ERR_PTR(err);
-Efault:
+efault:
kfree(e);
return ERR_PTR(-EFAULT);
-Einval:
+einval:
kfree(e);
return ERR_PTR(-EINVAL);
}
@@ -417,7 +506,7 @@ static int parse_command(const char __user *buffer, size_t count)
return -EFAULT;
if (!count)
return 0;
- if (s[count-1] == '\n')
+ if (s[count - 1] == '\n')
count--;
if (count == 1 && s[0] == '0')
return 1;
@@ -434,7 +523,7 @@ static void entry_status(Node *e, char *page)
{
char *dp;
char *status = "disabled";
- const char * flags = "flags: ";
+ const char *flags = "flags: ";
if (test_bit(Enabled, &e->flags))
status = "enabled";
@@ -448,19 +537,15 @@ static void entry_status(Node *e, char *page)
dp = page + strlen(page);
/* print the special flags */
- sprintf (dp, "%s", flags);
- dp += strlen (flags);
- if (e->flags & MISC_FMT_PRESERVE_ARGV0) {
- *dp ++ = 'P';
- }
- if (e->flags & MISC_FMT_OPEN_BINARY) {
- *dp ++ = 'O';
- }
- if (e->flags & MISC_FMT_CREDENTIALS) {
- *dp ++ = 'C';
- }
- *dp ++ = '\n';
-
+ sprintf(dp, "%s", flags);
+ dp += strlen(flags);
+ if (e->flags & MISC_FMT_PRESERVE_ARGV0)
+ *dp++ = 'P';
+ if (e->flags & MISC_FMT_OPEN_BINARY)
+ *dp++ = 'O';
+ if (e->flags & MISC_FMT_CREDENTIALS)
+ *dp++ = 'C';
+ *dp++ = '\n';
if (!test_bit(Magic, &e->flags)) {
sprintf(dp, "extension .%s\n", e->magic);
@@ -488,7 +573,7 @@ static void entry_status(Node *e, char *page)
static struct inode *bm_get_inode(struct super_block *sb, int mode)
{
- struct inode * inode = new_inode(sb);
+ struct inode *inode = new_inode(sb);
if (inode) {
inode->i_ino = get_next_ino();
@@ -528,13 +613,14 @@ static void kill_node(Node *e)
/* /<entry> */
static ssize_t
-bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
+bm_entry_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
{
Node *e = file_inode(file)->i_private;
ssize_t res;
char *page;
- if (!(page = (char*) __get_free_page(GFP_KERNEL)))
+ page = (char *) __get_free_page(GFP_KERNEL);
+ if (!page)
return -ENOMEM;
entry_status(e, page);
@@ -553,20 +639,28 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
int res = parse_command(buffer, count);
switch (res) {
- case 1: clear_bit(Enabled, &e->flags);
- break;
- case 2: set_bit(Enabled, &e->flags);
- break;
- case 3: root = dget(file->f_path.dentry->d_sb->s_root);
- mutex_lock(&root->d_inode->i_mutex);
-
- kill_node(e);
-
- mutex_unlock(&root->d_inode->i_mutex);
- dput(root);
- break;
- default: return res;
+ case 1:
+ /* Disable this handler. */
+ clear_bit(Enabled, &e->flags);
+ break;
+ case 2:
+ /* Enable this handler. */
+ set_bit(Enabled, &e->flags);
+ break;
+ case 3:
+ /* Delete this handler. */
+ root = dget(file->f_path.dentry->d_sb->s_root);
+ mutex_lock(&root->d_inode->i_mutex);
+
+ kill_node(e);
+
+ mutex_unlock(&root->d_inode->i_mutex);
+ dput(root);
+ break;
+ default:
+ return res;
}
+
return count;
}
@@ -654,26 +748,36 @@ bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s));
}
-static ssize_t bm_status_write(struct file * file, const char __user * buffer,
+static ssize_t bm_status_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
int res = parse_command(buffer, count);
struct dentry *root;
switch (res) {
- case 1: enabled = 0; break;
- case 2: enabled = 1; break;
- case 3: root = dget(file->f_path.dentry->d_sb->s_root);
- mutex_lock(&root->d_inode->i_mutex);
-
- while (!list_empty(&entries))
- kill_node(list_entry(entries.next, Node, list));
-
- mutex_unlock(&root->d_inode->i_mutex);
- dput(root);
- break;
- default: return res;
+ case 1:
+ /* Disable all handlers. */
+ enabled = 0;
+ break;
+ case 2:
+ /* Enable all handlers. */
+ enabled = 1;
+ break;
+ case 3:
+ /* Delete all handlers. */
+ root = dget(file->f_path.dentry->d_sb->s_root);
+ mutex_lock(&root->d_inode->i_mutex);
+
+ while (!list_empty(&entries))
+ kill_node(list_entry(entries.next, Node, list));
+
+ mutex_unlock(&root->d_inode->i_mutex);
+ dput(root);
+ break;
+ default:
+ return res;
}
+
return count;
}
@@ -690,14 +794,16 @@ static const struct super_operations s_ops = {
.evict_inode = bm_evict_inode,
};
-static int bm_fill_super(struct super_block * sb, void * data, int silent)
+static int bm_fill_super(struct super_block *sb, void *data, int silent)
{
+ int err;
static struct tree_descr bm_files[] = {
[2] = {"status", &bm_status_operations, S_IWUSR|S_IRUGO},
[3] = {"register", &bm_register_operations, S_IWUSR},
/* last one */ {""}
};
- int err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files);
+
+ err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files);
if (!err)
sb->s_op = &s_ops;
return err;
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 5027a3e14922..afdf4e3cafc2 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -24,6 +24,16 @@ static int load_script(struct linux_binprm *bprm)
if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!'))
return -ENOEXEC;
+
+ /*
+ * If the script filename will be inaccessible after exec, typically
+ * because it is a "/dev/fd/<fd>/.." path against an O_CLOEXEC fd, give
+ * up now (on the assumption that the interpreter will want to load
+ * this file).
+ */
+ if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
+ return -ENOENT;
+
/*
* This section does the #! interpretation.
* Sorta complicated, but hopefully it will work. -TYT
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1d9c9f3754f8..b48c41bf0f86 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -235,7 +235,10 @@ struct super_block *freeze_bdev(struct block_device *bdev)
sb = get_active_super(bdev);
if (!sb)
goto out;
- error = freeze_super(sb);
+ if (sb->s_op->freeze_super)
+ error = sb->s_op->freeze_super(sb);
+ else
+ error = freeze_super(sb);
if (error) {
deactivate_super(sb);
bdev->bd_fsfreeze_count--;
@@ -272,7 +275,10 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
if (!sb)
goto out;
- error = thaw_super(sb);
+ if (sb->s_op->thaw_super)
+ error = sb->s_op->thaw_super(sb);
+ else
+ error = thaw_super(sb);
if (error) {
bdev->bd_fsfreeze_count++;
mutex_unlock(&bdev->bd_fsfreeze_mutex);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 1bf411bc28fd..e9df8862012c 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -1017,8 +1017,6 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
bytes = min(bytes, working_bytes);
kaddr = kmap_atomic(page_out);
memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
- if (*pg_index == (vcnt - 1) && *pg_offset == 0)
- memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
kunmap_atomic(kaddr);
flush_dcache_page(page_out);
@@ -1060,3 +1058,34 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
return 1;
}
+
+/*
+ * When uncompressing data, we need to make sure and zero any parts of
+ * the biovec that were not filled in by the decompression code. pg_index
+ * and pg_offset indicate the last page and the last offset of that page
+ * that have been filled in. This will zero everything remaining in the
+ * biovec.
+ */
+void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt,
+ unsigned long pg_index,
+ unsigned long pg_offset)
+{
+ while (pg_index < vcnt) {
+ struct page *page = bvec[pg_index].bv_page;
+ unsigned long off = bvec[pg_index].bv_offset;
+ unsigned long len = bvec[pg_index].bv_len;
+
+ if (pg_offset < off)
+ pg_offset = off;
+ if (pg_offset < off + len) {
+ unsigned long bytes = off + len - pg_offset;
+ char *kaddr;
+
+ kaddr = kmap_atomic(page);
+ memset(kaddr + pg_offset, 0, bytes);
+ kunmap_atomic(kaddr);
+ }
+ pg_index++;
+ pg_offset = 0;
+ }
+}
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 0c803b4fbf93..d181f70caae0 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -45,7 +45,9 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long nr_pages);
int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
-
+void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt,
+ unsigned long pg_index,
+ unsigned long pg_offset);
struct btrfs_compress_op {
struct list_head *(*alloc_workspace)(void);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8a036ed234ad..8bf326affb94 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5355,7 +5355,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
return ERR_CAST(inode);
}
- return d_materialise_unique(dentry, inode);
+ return d_splice_alias(inode, dentry);
}
unsigned char btrfs_filetype_table[] = {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b590e23fa03e..d49fe8a0f6b5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5268,7 +5268,7 @@ long btrfs_ioctl(struct file *file, unsigned int
ret = btrfs_start_delalloc_roots(root->fs_info, 0, -1);
if (ret)
return ret;
- ret = btrfs_sync_fs(file->f_dentry->d_sb, 1);
+ ret = btrfs_sync_fs(file_inode(file)->i_sb, 1);
/*
* The transaction thread may want to do more work,
* namely it pokes the cleaner ktread that will start
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 78285f30909e..617553cdb7d3 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -373,6 +373,8 @@ cont:
}
done:
kunmap(pages_in[page_in_index]);
+ if (!ret)
+ btrfs_clear_biovec_end(bvec, vcnt, page_out_index, pg_offset);
return ret;
}
@@ -410,10 +412,23 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
goto out;
}
+ /*
+ * the caller is already checking against PAGE_SIZE, but lets
+ * move this check closer to the memcpy/memset
+ */
+ destlen = min_t(unsigned long, destlen, PAGE_SIZE);
bytes = min_t(unsigned long, destlen, out_len - start_byte);
kaddr = kmap_atomic(dest_page);
memcpy(kaddr, workspace->buf + start_byte, bytes);
+
+ /*
+ * btrfs_getblock is doing a zero on the tail of the page too,
+ * but this will cover anything missing from the decompressed
+ * data.
+ */
+ if (bytes < destlen)
+ memset(kaddr+bytes, 0, destlen-bytes);
kunmap_atomic(kaddr);
out:
return ret;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0144790e296e..50c5a8762aed 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1485,7 +1485,7 @@ static void update_dev_time(char *path_name)
struct file *filp;
filp = filp_open(path_name, O_RDWR, 0);
- if (!filp)
+ if (IS_ERR(filp))
return;
file_update_time(filp);
filp_close(filp, NULL);
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 759fa4e2de8f..fb22fd8d8fb8 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -299,6 +299,8 @@ done:
zlib_inflateEnd(&workspace->strm);
if (data_in)
kunmap(pages_in[page_in_index]);
+ if (!ret)
+ btrfs_clear_biovec_end(bvec, vcnt, page_out_index, pg_offset);
return ret;
}
@@ -310,10 +312,14 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
struct workspace *workspace = list_entry(ws, struct workspace, list);
int ret = 0;
int wbits = MAX_WBITS;
- unsigned long bytes_left = destlen;
+ unsigned long bytes_left;
unsigned long total_out = 0;
+ unsigned long pg_offset = 0;
char *kaddr;
+ destlen = min_t(unsigned long, destlen, PAGE_SIZE);
+ bytes_left = destlen;
+
workspace->strm.next_in = data_in;
workspace->strm.avail_in = srclen;
workspace->strm.total_in = 0;
@@ -341,7 +347,6 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
unsigned long buf_start;
unsigned long buf_offset;
unsigned long bytes;
- unsigned long pg_offset = 0;
ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH);
if (ret != Z_OK && ret != Z_STREAM_END)
@@ -384,6 +389,17 @@ next:
ret = 0;
zlib_inflateEnd(&workspace->strm);
+
+ /*
+ * this should only happen if zlib returned fewer bytes than we
+ * expected. btrfs_get_block is responsible for zeroing from the
+ * end of the inline extent (destlen) to the end of the page
+ */
+ if (pg_offset < destlen) {
+ kaddr = kmap_atomic(dest_page);
+ memset(kaddr + pg_offset, 0, destlen - pg_offset);
+ kunmap_atomic(kaddr);
+ }
return ret;
}
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index e12f189d539b..7f8e83f9d74e 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -102,8 +102,7 @@ static void cachefiles_mark_object_buried(struct cachefiles_cache *cache,
struct cachefiles_object *object;
struct rb_node *p;
- _enter(",'%*.*s'",
- dentry->d_name.len, dentry->d_name.len, dentry->d_name.name);
+ _enter(",'%pd'", dentry);
write_lock(&cache->active_lock);
@@ -273,9 +272,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
char nbuffer[8 + 8 + 1];
int ret;
- _enter(",'%*.*s','%*.*s'",
- dir->d_name.len, dir->d_name.len, dir->d_name.name,
- rep->d_name.len, rep->d_name.len, rep->d_name.name);
+ _enter(",'%pd','%pd'", dir, rep);
_debug("remove %p from %p", rep, dir);
@@ -597,8 +594,7 @@ lookup_again:
/* if we've found that the terminal object exists, then we need to
* check its attributes and delete it if it's out of date */
if (!object->new) {
- _debug("validate '%*.*s'",
- next->d_name.len, next->d_name.len, next->d_name.name);
+ _debug("validate '%pd'", next);
ret = cachefiles_check_object_xattr(object, auxdata);
if (ret == -ESTALE) {
@@ -827,8 +823,8 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
unsigned long start;
int ret;
- //_enter(",%*.*s/,%s",
- // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename);
+ //_enter(",%pd/,%s",
+ // dir, filename);
/* look up the victim */
mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
@@ -910,8 +906,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
struct dentry *victim;
int ret;
- _enter(",%*.*s/,%s",
- dir->d_name.len, dir->d_name.len, dir->d_name.name, filename);
+ _enter(",%pd/,%s", dir, filename);
victim = cachefiles_check_active(cache, dir, filename);
if (IS_ERR(victim))
@@ -969,8 +964,8 @@ int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir,
{
struct dentry *victim;
- //_enter(",%*.*s/,%s",
- // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename);
+ //_enter(",%pd/,%s",
+ // dir, filename);
victim = cachefiles_check_active(cache, dir, filename);
if (IS_ERR(victim))
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index acbc1f094fb1..a8a68745e11d 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -51,9 +51,8 @@ int cachefiles_check_object_type(struct cachefiles_object *object)
}
if (ret != -EEXIST) {
- pr_err("Can't set xattr on %*.*s [%lu] (err %d)\n",
- dentry->d_name.len, dentry->d_name.len,
- dentry->d_name.name, dentry->d_inode->i_ino,
+ pr_err("Can't set xattr on %pd [%lu] (err %d)\n",
+ dentry, dentry->d_inode->i_ino,
-ret);
goto error;
}
@@ -64,9 +63,8 @@ int cachefiles_check_object_type(struct cachefiles_object *object)
if (ret == -ERANGE)
goto bad_type_length;
- pr_err("Can't read xattr on %*.*s [%lu] (err %d)\n",
- dentry->d_name.len, dentry->d_name.len,
- dentry->d_name.name, dentry->d_inode->i_ino,
+ pr_err("Can't read xattr on %pd [%lu] (err %d)\n",
+ dentry, dentry->d_inode->i_ino,
-ret);
goto error;
}
@@ -92,9 +90,8 @@ bad_type_length:
bad_type:
xtype[2] = 0;
- pr_err("Cache object %*.*s [%lu] type %s not %s\n",
- dentry->d_name.len, dentry->d_name.len,
- dentry->d_name.name, dentry->d_inode->i_ino,
+ pr_err("Cache object %pd [%lu] type %s not %s\n",
+ dentry, dentry->d_inode->i_ino,
xtype, type);
ret = -EIO;
goto error;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 18c06bbaf136..f5013d92a7e6 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -192,17 +192,30 @@ static int readpage_nounlock(struct file *filp, struct page *page)
struct ceph_osd_client *osdc =
&ceph_inode_to_client(inode)->client->osdc;
int err = 0;
+ u64 off = page_offset(page);
u64 len = PAGE_CACHE_SIZE;
- err = ceph_readpage_from_fscache(inode, page);
+ if (off >= i_size_read(inode)) {
+ zero_user_segment(page, err, PAGE_CACHE_SIZE);
+ SetPageUptodate(page);
+ return 0;
+ }
+ /*
+ * Uptodate inline data should have been added into page cache
+ * while getting Fcr caps.
+ */
+ if (ci->i_inline_version != CEPH_INLINE_NONE)
+ return -EINVAL;
+
+ err = ceph_readpage_from_fscache(inode, page);
if (err == 0)
goto out;
dout("readpage inode %p file %p page %p index %lu\n",
inode, filp, page, page->index);
err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
- (u64) page_offset(page), &len,
+ off, &len,
ci->i_truncate_seq, ci->i_truncate_size,
&page, 1, 0);
if (err == -ENOENT)
@@ -319,7 +332,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
off, len);
vino = ceph_vino(inode);
req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len,
- 1, CEPH_OSD_OP_READ,
+ 0, 1, CEPH_OSD_OP_READ,
CEPH_OSD_FLAG_READ, NULL,
ci->i_truncate_seq, ci->i_truncate_size,
false);
@@ -384,6 +397,9 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
int rc = 0;
int max = 0;
+ if (ceph_inode(inode)->i_inline_version != CEPH_INLINE_NONE)
+ return -EINVAL;
+
rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
&nr_pages);
@@ -673,7 +689,7 @@ static int ceph_writepages_start(struct address_space *mapping,
int rc = 0;
unsigned wsize = 1 << inode->i_blkbits;
struct ceph_osd_request *req = NULL;
- int do_sync;
+ int do_sync = 0;
u64 truncate_size, snap_size;
u32 truncate_seq;
@@ -750,7 +766,6 @@ retry:
last_snapc = snapc;
while (!done && index <= end) {
- int num_ops = do_sync ? 2 : 1;
unsigned i;
int first;
pgoff_t next;
@@ -850,7 +865,8 @@ get_more_pages:
len = wsize;
req = ceph_osdc_new_request(&fsc->client->osdc,
&ci->i_layout, vino,
- offset, &len, num_ops,
+ offset, &len, 0,
+ do_sync ? 2 : 1,
CEPH_OSD_OP_WRITE,
CEPH_OSD_FLAG_WRITE |
CEPH_OSD_FLAG_ONDISK,
@@ -862,6 +878,9 @@ get_more_pages:
break;
}
+ if (do_sync)
+ osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
+
req->r_callback = writepages_finish;
req->r_inode = inode;
@@ -1204,6 +1223,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct inode *inode = file_inode(vma->vm_file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = vma->vm_file->private_data;
+ struct page *pinned_page = NULL;
loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT;
int want, got, ret;
@@ -1215,7 +1235,8 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
want = CEPH_CAP_FILE_CACHE;
while (1) {
got = 0;
- ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
+ ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want,
+ -1, &got, &pinned_page);
if (ret == 0)
break;
if (ret != -ERESTARTSYS) {
@@ -1226,12 +1247,54 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got));
- ret = filemap_fault(vma, vmf);
+ if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
+ ci->i_inline_version == CEPH_INLINE_NONE)
+ ret = filemap_fault(vma, vmf);
+ else
+ ret = -EAGAIN;
dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret);
+ if (pinned_page)
+ page_cache_release(pinned_page);
ceph_put_cap_refs(ci, got);
+ if (ret != -EAGAIN)
+ return ret;
+
+ /* read inline data */
+ if (off >= PAGE_CACHE_SIZE) {
+ /* does not support inline data > PAGE_SIZE */
+ ret = VM_FAULT_SIGBUS;
+ } else {
+ int ret1;
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page = find_or_create_page(mapping, 0,
+ mapping_gfp_mask(mapping) &
+ ~__GFP_FS);
+ if (!page) {
+ ret = VM_FAULT_OOM;
+ goto out;
+ }
+ ret1 = __ceph_do_getattr(inode, page,
+ CEPH_STAT_CAP_INLINE_DATA, true);
+ if (ret1 < 0 || off >= i_size_read(inode)) {
+ unlock_page(page);
+ page_cache_release(page);
+ ret = VM_FAULT_SIGBUS;
+ goto out;
+ }
+ if (ret1 < PAGE_CACHE_SIZE)
+ zero_user_segment(page, ret1, PAGE_CACHE_SIZE);
+ else
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ vmf->page = page;
+ ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
+ }
+out:
+ dout("filemap_fault %p %llu~%zd read inline data ret %d\n",
+ inode, off, (size_t)PAGE_CACHE_SIZE, ret);
return ret;
}
@@ -1250,6 +1313,19 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
size_t len;
int want, got, ret;
+ if (ci->i_inline_version != CEPH_INLINE_NONE) {
+ struct page *locked_page = NULL;
+ if (off == 0) {
+ lock_page(page);
+ locked_page = page;
+ }
+ ret = ceph_uninline_data(vma->vm_file, locked_page);
+ if (locked_page)
+ unlock_page(locked_page);
+ if (ret < 0)
+ return VM_FAULT_SIGBUS;
+ }
+
if (off + PAGE_CACHE_SIZE <= size)
len = PAGE_CACHE_SIZE;
else
@@ -1263,7 +1339,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
want = CEPH_CAP_FILE_BUFFER;
while (1) {
got = 0;
- ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, off + len);
+ ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len,
+ &got, NULL);
if (ret == 0)
break;
if (ret != -ERESTARTSYS) {
@@ -1297,11 +1374,13 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
ret = VM_FAULT_SIGBUS;
}
out:
- if (ret != VM_FAULT_LOCKED) {
+ if (ret != VM_FAULT_LOCKED)
unlock_page(page);
- } else {
+ if (ret == VM_FAULT_LOCKED ||
+ ci->i_inline_version != CEPH_INLINE_NONE) {
int dirty;
spin_lock(&ci->i_ceph_lock);
+ ci->i_inline_version = CEPH_INLINE_NONE;
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
spin_unlock(&ci->i_ceph_lock);
if (dirty)
@@ -1315,6 +1394,178 @@ out:
return ret;
}
+void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
+ char *data, size_t len)
+{
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page;
+
+ if (locked_page) {
+ page = locked_page;
+ } else {
+ if (i_size_read(inode) == 0)
+ return;
+ page = find_or_create_page(mapping, 0,
+ mapping_gfp_mask(mapping) & ~__GFP_FS);
+ if (!page)
+ return;
+ if (PageUptodate(page)) {
+ unlock_page(page);
+ page_cache_release(page);
+ return;
+ }
+ }
+
+ dout("fill_inline_data %p %llx.%llx len %lu locked_page %p\n",
+ inode, ceph_vinop(inode), len, locked_page);
+
+ if (len > 0) {
+ void *kaddr = kmap_atomic(page);
+ memcpy(kaddr, data, len);
+ kunmap_atomic(kaddr);
+ }
+
+ if (page != locked_page) {
+ if (len < PAGE_CACHE_SIZE)
+ zero_user_segment(page, len, PAGE_CACHE_SIZE);
+ else
+ flush_dcache_page(page);
+
+ SetPageUptodate(page);
+ unlock_page(page);
+ page_cache_release(page);
+ }
+}
+
+int ceph_uninline_data(struct file *filp, struct page *locked_page)
+{
+ struct inode *inode = file_inode(filp);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_osd_request *req;
+ struct page *page = NULL;
+ u64 len, inline_version;
+ int err = 0;
+ bool from_pagecache = false;
+
+ spin_lock(&ci->i_ceph_lock);
+ inline_version = ci->i_inline_version;
+ spin_unlock(&ci->i_ceph_lock);
+
+ dout("uninline_data %p %llx.%llx inline_version %llu\n",
+ inode, ceph_vinop(inode), inline_version);
+
+ if (inline_version == 1 || /* initial version, no data */
+ inline_version == CEPH_INLINE_NONE)
+ goto out;
+
+ if (locked_page) {
+ page = locked_page;
+ WARN_ON(!PageUptodate(page));
+ } else if (ceph_caps_issued(ci) &
+ (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) {
+ page = find_get_page(inode->i_mapping, 0);
+ if (page) {
+ if (PageUptodate(page)) {
+ from_pagecache = true;
+ lock_page(page);
+ } else {
+ page_cache_release(page);
+ page = NULL;
+ }
+ }
+ }
+
+ if (page) {
+ len = i_size_read(inode);
+ if (len > PAGE_CACHE_SIZE)
+ len = PAGE_CACHE_SIZE;
+ } else {
+ page = __page_cache_alloc(GFP_NOFS);
+ if (!page) {
+ err = -ENOMEM;
+ goto out;
+ }
+ err = __ceph_do_getattr(inode, page,
+ CEPH_STAT_CAP_INLINE_DATA, true);
+ if (err < 0) {
+ /* no inline data */
+ if (err == -ENODATA)
+ err = 0;
+ goto out;
+ }
+ len = err;
+ }
+
+ req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
+ ceph_vino(inode), 0, &len, 0, 1,
+ CEPH_OSD_OP_CREATE,
+ CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+ ci->i_snap_realm->cached_context,
+ 0, 0, false);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto out;
+ }
+
+ ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
+ err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+ if (!err)
+ err = ceph_osdc_wait_request(&fsc->client->osdc, req);
+ ceph_osdc_put_request(req);
+ if (err < 0)
+ goto out;
+
+ req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
+ ceph_vino(inode), 0, &len, 1, 3,
+ CEPH_OSD_OP_WRITE,
+ CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+ ci->i_snap_realm->cached_context,
+ ci->i_truncate_seq, ci->i_truncate_size,
+ false);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto out;
+ }
+
+ osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);
+
+ err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
+ "inline_version", &inline_version,
+ sizeof(inline_version),
+ CEPH_OSD_CMPXATTR_OP_GT,
+ CEPH_OSD_CMPXATTR_MODE_U64);
+ if (err)
+ goto out_put;
+
+ err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
+ "inline_version", &inline_version,
+ sizeof(inline_version), 0, 0);
+ if (err)
+ goto out_put;
+
+ ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
+ err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+ if (!err)
+ err = ceph_osdc_wait_request(&fsc->client->osdc, req);
+out_put:
+ ceph_osdc_put_request(req);
+ if (err == -ECANCELED)
+ err = 0;
+out:
+ if (page && page != locked_page) {
+ if (from_pagecache) {
+ unlock_page(page);
+ page_cache_release(page);
+ } else
+ __free_pages(page, 0);
+ }
+
+ dout("uninline_data %p %llx.%llx inline_version %llu = %d\n",
+ inode, ceph_vinop(inode), inline_version, err);
+ return err;
+}
+
static struct vm_operations_struct ceph_vmops = {
.fault = ceph_filemap_fault,
.page_mkwrite = ceph_page_mkwrite,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index cefca661464b..b93c631c6c87 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -975,10 +975,12 @@ static int send_cap_msg(struct ceph_mds_session *session,
kuid_t uid, kgid_t gid, umode_t mode,
u64 xattr_version,
struct ceph_buffer *xattrs_buf,
- u64 follows)
+ u64 follows, bool inline_data)
{
struct ceph_mds_caps *fc;
struct ceph_msg *msg;
+ void *p;
+ size_t extra_len;
dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s"
" seq %u/%u mseq %u follows %lld size %llu/%llu"
@@ -988,7 +990,10 @@ static int send_cap_msg(struct ceph_mds_session *session,
seq, issue_seq, mseq, follows, size, max_size,
xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0);
- msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS, false);
+ /* flock buffer size + inline version + inline data size */
+ extra_len = 4 + 8 + 4;
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc) + extra_len,
+ GFP_NOFS, false);
if (!msg)
return -ENOMEM;
@@ -1020,6 +1025,14 @@ static int send_cap_msg(struct ceph_mds_session *session,
fc->gid = cpu_to_le32(from_kgid(&init_user_ns, gid));
fc->mode = cpu_to_le32(mode);
+ p = fc + 1;
+ /* flock buffer size */
+ ceph_encode_32(&p, 0);
+ /* inline version */
+ ceph_encode_64(&p, inline_data ? 0 : CEPH_INLINE_NONE);
+ /* inline data size */
+ ceph_encode_32(&p, 0);
+
fc->xattr_version = cpu_to_le64(xattr_version);
if (xattrs_buf) {
msg->middle = ceph_buffer_get(xattrs_buf);
@@ -1126,6 +1139,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
u64 flush_tid = 0;
int i;
int ret;
+ bool inline_data;
held = cap->issued | cap->implemented;
revoking = cap->implemented & ~cap->issued;
@@ -1209,13 +1223,15 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
xattr_version = ci->i_xattrs.version;
}
+ inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
+
spin_unlock(&ci->i_ceph_lock);
ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
size, max_size, &mtime, &atime, time_warp_seq,
uid, gid, mode, xattr_version, xattr_blob,
- follows);
+ follows, inline_data);
if (ret < 0) {
dout("error sending cap msg, must requeue %p\n", inode);
delayed = 1;
@@ -1336,7 +1352,7 @@ retry:
capsnap->time_warp_seq,
capsnap->uid, capsnap->gid, capsnap->mode,
capsnap->xattr_version, capsnap->xattr_blob,
- capsnap->follows);
+ capsnap->follows, capsnap->inline_data);
next_follows = capsnap->follows + 1;
ceph_put_cap_snap(capsnap);
@@ -2057,15 +2073,17 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got)
* requested from the MDS.
*/
static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
- int *got, loff_t endoff, int *check_max, int *err)
+ loff_t endoff, int *got, struct page **pinned_page,
+ int *check_max, int *err)
{
struct inode *inode = &ci->vfs_inode;
int ret = 0;
- int have, implemented;
+ int have, implemented, _got = 0;
int file_wanted;
dout("get_cap_refs %p need %s want %s\n", inode,
ceph_cap_string(need), ceph_cap_string(want));
+again:
spin_lock(&ci->i_ceph_lock);
/* make sure file is actually open */
@@ -2075,7 +2093,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
ceph_cap_string(need), ceph_cap_string(file_wanted));
*err = -EBADF;
ret = 1;
- goto out;
+ goto out_unlock;
}
/* finish pending truncate */
@@ -2095,7 +2113,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
*check_max = 1;
ret = 1;
}
- goto out;
+ goto out_unlock;
}
/*
* If a sync write is in progress, we must wait, so that we
@@ -2103,7 +2121,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
*/
if (__ceph_have_pending_cap_snap(ci)) {
dout("get_cap_refs %p cap_snap_pending\n", inode);
- goto out;
+ goto out_unlock;
}
}
@@ -2120,18 +2138,50 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
inode, ceph_cap_string(have), ceph_cap_string(not),
ceph_cap_string(revoking));
if ((revoking & not) == 0) {
- *got = need | (have & want);
- __take_cap_refs(ci, *got);
+ _got = need | (have & want);
+ __take_cap_refs(ci, _got);
ret = 1;
}
} else {
dout("get_cap_refs %p have %s needed %s\n", inode,
ceph_cap_string(have), ceph_cap_string(need));
}
-out:
+out_unlock:
spin_unlock(&ci->i_ceph_lock);
+
+ if (ci->i_inline_version != CEPH_INLINE_NONE &&
+ (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
+ i_size_read(inode) > 0) {
+ int ret1;
+ struct page *page = find_get_page(inode->i_mapping, 0);
+ if (page) {
+ if (PageUptodate(page)) {
+ *pinned_page = page;
+ goto out;
+ }
+ page_cache_release(page);
+ }
+ /*
+ * drop cap refs first because getattr while holding
+ * caps refs can cause deadlock.
+ */
+ ceph_put_cap_refs(ci, _got);
+ _got = 0;
+
+ /* getattr request will bring inline data into page cache */
+ ret1 = __ceph_do_getattr(inode, NULL,
+ CEPH_STAT_CAP_INLINE_DATA, true);
+ if (ret1 >= 0) {
+ ret = 0;
+ goto again;
+ }
+ *err = ret1;
+ ret = 1;
+ }
+out:
dout("get_cap_refs %p ret %d got %s\n", inode,
- ret, ceph_cap_string(*got));
+ ret, ceph_cap_string(_got));
+ *got = _got;
return ret;
}
@@ -2168,8 +2218,8 @@ static void check_max_size(struct inode *inode, loff_t endoff)
* due to a small max_size, make sure we check_max_size (and possibly
* ask the mds) so we don't get hung up indefinitely.
*/
-int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, int *got,
- loff_t endoff)
+int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
+ loff_t endoff, int *got, struct page **pinned_page)
{
int check_max, ret, err;
@@ -2179,8 +2229,8 @@ retry:
check_max = 0;
err = 0;
ret = wait_event_interruptible(ci->i_cap_wq,
- try_get_cap_refs(ci, need, want,
- got, endoff,
+ try_get_cap_refs(ci, need, want, endoff,
+ got, pinned_page,
&check_max, &err));
if (err)
ret = err;
@@ -2383,6 +2433,8 @@ static void invalidate_aliases(struct inode *inode)
static void handle_cap_grant(struct ceph_mds_client *mdsc,
struct inode *inode, struct ceph_mds_caps *grant,
void *snaptrace, int snaptrace_len,
+ u64 inline_version,
+ void *inline_data, int inline_len,
struct ceph_buffer *xattr_buf,
struct ceph_mds_session *session,
struct ceph_cap *cap, int issued)
@@ -2403,6 +2455,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
bool queue_invalidate = false;
bool queue_revalidate = false;
bool deleted_inode = false;
+ bool fill_inline = false;
dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
inode, cap, mds, seq, ceph_cap_string(newcaps));
@@ -2576,6 +2629,13 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
}
BUG_ON(cap->issued & ~cap->implemented);
+ if (inline_version > 0 && inline_version >= ci->i_inline_version) {
+ ci->i_inline_version = inline_version;
+ if (ci->i_inline_version != CEPH_INLINE_NONE &&
+ (newcaps & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)))
+ fill_inline = true;
+ }
+
spin_unlock(&ci->i_ceph_lock);
if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
@@ -2589,6 +2649,9 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
wake = true;
}
+ if (fill_inline)
+ ceph_fill_inline_data(inode, NULL, inline_data, inline_len);
+
if (queue_trunc) {
ceph_queue_vmtruncate(inode);
ceph_queue_revalidate(inode);
@@ -2996,11 +3059,12 @@ void ceph_handle_caps(struct ceph_mds_session *session,
u64 cap_id;
u64 size, max_size;
u64 tid;
+ u64 inline_version = 0;
+ void *inline_data = NULL;
+ u32 inline_len = 0;
void *snaptrace;
size_t snaptrace_len;
- void *flock;
- void *end;
- u32 flock_len;
+ void *p, *end;
dout("handle_caps from mds%d\n", mds);
@@ -3021,30 +3085,37 @@ void ceph_handle_caps(struct ceph_mds_session *session,
snaptrace = h + 1;
snaptrace_len = le32_to_cpu(h->snap_trace_len);
+ p = snaptrace + snaptrace_len;
if (le16_to_cpu(msg->hdr.version) >= 2) {
- void *p = snaptrace + snaptrace_len;
+ u32 flock_len;
ceph_decode_32_safe(&p, end, flock_len, bad);
if (p + flock_len > end)
goto bad;
- flock = p;
- } else {
- flock = NULL;
- flock_len = 0;
+ p += flock_len;
}
if (le16_to_cpu(msg->hdr.version) >= 3) {
if (op == CEPH_CAP_OP_IMPORT) {
- void *p = flock + flock_len;
if (p + sizeof(*peer) > end)
goto bad;
peer = p;
+ p += sizeof(*peer);
} else if (op == CEPH_CAP_OP_EXPORT) {
/* recorded in unused fields */
peer = (void *)&h->size;
}
}
+ if (le16_to_cpu(msg->hdr.version) >= 4) {
+ ceph_decode_64_safe(&p, end, inline_version, bad);
+ ceph_decode_32_safe(&p, end, inline_len, bad);
+ if (p + inline_len > end)
+ goto bad;
+ inline_data = p;
+ p += inline_len;
+ }
+
/* lookup ino */
inode = ceph_find_inode(sb, vino);
ci = ceph_inode(inode);
@@ -3085,6 +3156,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
handle_cap_import(mdsc, inode, h, peer, session,
&cap, &issued);
handle_cap_grant(mdsc, inode, h, snaptrace, snaptrace_len,
+ inline_version, inline_data, inline_len,
msg->middle, session, cap, issued);
goto done_unlocked;
}
@@ -3105,8 +3177,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
case CEPH_CAP_OP_GRANT:
__ceph_caps_issued(ci, &issued);
issued |= __ceph_caps_dirty(ci);
- handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle,
- session, cap, issued);
+ handle_cap_grant(mdsc, inode, h, NULL, 0,
+ inline_version, inline_data, inline_len,
+ msg->middle, session, cap, issued);
goto done_unlocked;
case CEPH_CAP_OP_FLUSH_ACK:
@@ -3137,8 +3210,7 @@ flush_cap_releases:
done:
mutex_unlock(&session->s_mutex);
done_unlocked:
- if (inode)
- iput(inode);
+ iput(inode);
return;
bad:
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 5d5a4c8c8496..1b2355109b9f 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -83,10 +83,9 @@ static int mdsc_show(struct seq_file *s, void *p)
if (IS_ERR(path))
path = NULL;
spin_lock(&req->r_dentry->d_lock);
- seq_printf(s, " #%llx/%.*s (%s)",
+ seq_printf(s, " #%llx/%pd (%s)",
ceph_ino(req->r_dentry->d_parent->d_inode),
- req->r_dentry->d_name.len,
- req->r_dentry->d_name.name,
+ req->r_dentry,
path ? path : "");
spin_unlock(&req->r_dentry->d_lock);
kfree(path);
@@ -103,11 +102,10 @@ static int mdsc_show(struct seq_file *s, void *p)
if (IS_ERR(path))
path = NULL;
spin_lock(&req->r_old_dentry->d_lock);
- seq_printf(s, " #%llx/%.*s (%s)",
+ seq_printf(s, " #%llx/%pd (%s)",
req->r_old_dentry_dir ?
ceph_ino(req->r_old_dentry_dir) : 0,
- req->r_old_dentry->d_name.len,
- req->r_old_dentry->d_name.name,
+ req->r_old_dentry,
path ? path : "");
spin_unlock(&req->r_old_dentry->d_lock);
kfree(path);
@@ -150,8 +148,8 @@ static int dentry_lru_show(struct seq_file *s, void *ptr)
spin_lock(&mdsc->dentry_lru_lock);
list_for_each_entry(di, &mdsc->dentry_lru, lru) {
struct dentry *dentry = di->dentry;
- seq_printf(s, "%p %p\t%.*s\n",
- di, dentry, dentry->d_name.len, dentry->d_name.name);
+ seq_printf(s, "%p %p\t%pd\n",
+ di, dentry, dentry);
}
spin_unlock(&mdsc->dentry_lru_lock);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index e6d63f8f98c0..c241603764fd 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -111,7 +111,7 @@ static int fpos_cmp(loff_t l, loff_t r)
/*
* When possible, we try to satisfy a readdir by peeking at the
* dcache. We make this work by carefully ordering dentries on
- * d_u.d_child when we initially get results back from the MDS, and
+ * d_child when we initially get results back from the MDS, and
* falling back to a "normal" sync readdir if any dentries in the dir
* are dropped.
*
@@ -123,7 +123,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx,
u32 shared_gen)
{
struct ceph_file_info *fi = file->private_data;
- struct dentry *parent = file->f_dentry;
+ struct dentry *parent = file->f_path.dentry;
struct inode *dir = parent->d_inode;
struct list_head *p;
struct dentry *dentry, *last;
@@ -147,11 +147,11 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx,
p = parent->d_subdirs.prev;
dout(" initial p %p/%p\n", p->prev, p->next);
} else {
- p = last->d_u.d_child.prev;
+ p = last->d_child.prev;
}
more:
- dentry = list_entry(p, struct dentry, d_u.d_child);
+ dentry = list_entry(p, struct dentry, d_child);
di = ceph_dentry(dentry);
while (1) {
dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next,
@@ -168,13 +168,13 @@ more:
ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
fpos_cmp(ctx->pos, di->offset) <= 0)
break;
- dout(" skipping %p %.*s at %llu (%llu)%s%s\n", dentry,
- dentry->d_name.len, dentry->d_name.name, di->offset,
+ dout(" skipping %p %pd at %llu (%llu)%s%s\n", dentry,
+ dentry, di->offset,
ctx->pos, d_unhashed(dentry) ? " unhashed" : "",
!dentry->d_inode ? " null" : "");
spin_unlock(&dentry->d_lock);
p = p->prev;
- dentry = list_entry(p, struct dentry, d_u.d_child);
+ dentry = list_entry(p, struct dentry, d_child);
di = ceph_dentry(dentry);
}
@@ -183,15 +183,15 @@ more:
spin_unlock(&parent->d_lock);
/* make sure a dentry wasn't dropped while we didn't have parent lock */
- if (!ceph_dir_is_complete(dir)) {
+ if (!ceph_dir_is_complete_ordered(dir)) {
dout(" lost dir complete on %p; falling back to mds\n", dir);
dput(dentry);
err = -EAGAIN;
goto out;
}
- dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, ctx->pos,
- dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
+ dout(" %llu (%llu) dentry %p %pd %p\n", di->offset, ctx->pos,
+ dentry, dentry, dentry->d_inode);
if (!dir_emit(ctx, dentry->d_name.name,
dentry->d_name.len,
ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino),
@@ -261,10 +261,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
/* always start with . and .. */
if (ctx->pos == 0) {
- /* note dir version at start of readdir so we can tell
- * if any dentries get dropped */
- fi->dir_release_count = atomic_read(&ci->i_release_count);
-
dout("readdir off 0 -> '.'\n");
if (!dir_emit(ctx, ".", 1,
ceph_translate_ino(inode->i_sb, inode->i_ino),
@@ -274,7 +270,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
off = 1;
}
if (ctx->pos == 1) {
- ino_t ino = parent_ino(file->f_dentry);
+ ino_t ino = parent_ino(file->f_path.dentry);
dout("readdir off 1 -> '..'\n");
if (!dir_emit(ctx, "..", 2,
ceph_translate_ino(inode->i_sb, ino),
@@ -289,7 +285,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
if ((ctx->pos == 2 || fi->dentry) &&
!ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
ceph_snap(inode) != CEPH_SNAPDIR &&
- __ceph_dir_is_complete(ci) &&
+ __ceph_dir_is_complete_ordered(ci) &&
__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
u32 shared_gen = ci->i_shared_gen;
spin_unlock(&ci->i_ceph_lock);
@@ -312,6 +308,13 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
/* proceed with a normal readdir */
+ if (ctx->pos == 2) {
+ /* note dir version at start of readdir so we can tell
+ * if any dentries get dropped */
+ fi->dir_release_count = atomic_read(&ci->i_release_count);
+ fi->dir_ordered_count = ci->i_ordered_count;
+ }
+
more:
/* do we have the correct frag content buffered? */
if (fi->frag != frag || fi->last_readdir == NULL) {
@@ -337,7 +340,7 @@ more:
}
req->r_inode = inode;
ihold(inode);
- req->r_dentry = dget(file->f_dentry);
+ req->r_dentry = dget(file->f_path.dentry);
/* hints to request -> mds selection code */
req->r_direct_mode = USE_AUTH_MDS;
req->r_direct_hash = ceph_frag_value(frag);
@@ -446,8 +449,12 @@ more:
*/
spin_lock(&ci->i_ceph_lock);
if (atomic_read(&ci->i_release_count) == fi->dir_release_count) {
- dout(" marking %p complete\n", inode);
- __ceph_dir_set_complete(ci, fi->dir_release_count);
+ if (ci->i_ordered_count == fi->dir_ordered_count)
+ dout(" marking %p complete and ordered\n", inode);
+ else
+ dout(" marking %p complete\n", inode);
+ __ceph_dir_set_complete(ci, fi->dir_release_count,
+ fi->dir_ordered_count);
}
spin_unlock(&ci->i_ceph_lock);
@@ -538,8 +545,8 @@ int ceph_handle_snapdir(struct ceph_mds_request *req,
strcmp(dentry->d_name.name,
fsc->mount_options->snapdir_name) == 0) {
struct inode *inode = ceph_get_snapdir(parent);
- dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n",
- dentry, dentry->d_name.len, dentry->d_name.name, inode);
+ dout("ENOENT on snapdir %p '%pd', linking to snapdir %p\n",
+ dentry, dentry, inode);
BUG_ON(!d_unhashed(dentry));
d_add(dentry, inode);
err = 0;
@@ -603,8 +610,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
int op;
int err;
- dout("lookup %p dentry %p '%.*s'\n",
- dir, dentry, dentry->d_name.len, dentry->d_name.name);
+ dout("lookup %p dentry %p '%pd'\n",
+ dir, dentry, dentry);
if (dentry->d_name.len > NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);
@@ -774,8 +781,8 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (ceph_snap(dir) == CEPH_SNAPDIR) {
/* mkdir .snap/foo is a MKSNAP */
op = CEPH_MDS_OP_MKSNAP;
- dout("mksnap dir %p snap '%.*s' dn %p\n", dir,
- dentry->d_name.len, dentry->d_name.name, dentry);
+ dout("mksnap dir %p snap '%pd' dn %p\n", dir,
+ dentry, dentry);
} else if (ceph_snap(dir) == CEPH_NOSNAP) {
dout("mkdir dir %p dn %p mode 0%ho\n", dir, dentry, mode);
op = CEPH_MDS_OP_MKDIR;
@@ -805,7 +812,9 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
acls.pagelist = NULL;
}
err = ceph_mdsc_do_request(mdsc, dir, req);
- if (!err && !req->r_reply_info.head->is_dentry)
+ if (!err &&
+ !req->r_reply_info.head->is_target &&
+ !req->r_reply_info.head->is_dentry)
err = ceph_handle_notrace_create(dir, dentry);
ceph_mdsc_put_request(req);
out:
@@ -888,8 +897,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
if (ceph_snap(dir) == CEPH_SNAPDIR) {
/* rmdir .snap/foo is RMSNAP */
- dout("rmsnap dir %p '%.*s' dn %p\n", dir, dentry->d_name.len,
- dentry->d_name.name, dentry);
+ dout("rmsnap dir %p '%pd' dn %p\n", dir, dentry, dentry);
op = CEPH_MDS_OP_RMSNAP;
} else if (ceph_snap(dir) == CEPH_NOSNAP) {
dout("unlink/rmdir dir %p dn %p inode %p\n",
@@ -1063,16 +1071,15 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
if (flags & LOOKUP_RCU)
return -ECHILD;
- dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
- dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
- ceph_dentry(dentry)->offset);
+ dout("d_revalidate %p '%pd' inode %p offset %lld\n", dentry,
+ dentry, dentry->d_inode, ceph_dentry(dentry)->offset);
dir = ceph_get_dentry_parent_inode(dentry);
/* always trust cached snapped dentries, snapdir dentry */
if (ceph_snap(dir) != CEPH_NOSNAP) {
- dout("d_revalidate %p '%.*s' inode %p is SNAPPED\n", dentry,
- dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
+ dout("d_revalidate %p '%pd' inode %p is SNAPPED\n", dentry,
+ dentry, dentry->d_inode);
valid = 1;
} else if (dentry->d_inode &&
ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) {
@@ -1265,8 +1272,7 @@ void ceph_dentry_lru_add(struct dentry *dn)
struct ceph_dentry_info *di = ceph_dentry(dn);
struct ceph_mds_client *mdsc;
- dout("dentry_lru_add %p %p '%.*s'\n", di, dn,
- dn->d_name.len, dn->d_name.name);
+ dout("dentry_lru_add %p %p '%pd'\n", di, dn, dn);
mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
spin_lock(&mdsc->dentry_lru_lock);
list_add_tail(&di->lru, &mdsc->dentry_lru);
@@ -1279,8 +1285,8 @@ void ceph_dentry_lru_touch(struct dentry *dn)
struct ceph_dentry_info *di = ceph_dentry(dn);
struct ceph_mds_client *mdsc;
- dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn,
- dn->d_name.len, dn->d_name.name, di->offset);
+ dout("dentry_lru_touch %p %p '%pd' (offset %lld)\n", di, dn, dn,
+ di->offset);
mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
spin_lock(&mdsc->dentry_lru_lock);
list_move_tail(&di->lru, &mdsc->dentry_lru);
@@ -1292,8 +1298,7 @@ void ceph_dentry_lru_del(struct dentry *dn)
struct ceph_dentry_info *di = ceph_dentry(dn);
struct ceph_mds_client *mdsc;
- dout("dentry_lru_del %p %p '%.*s'\n", di, dn,
- dn->d_name.len, dn->d_name.name);
+ dout("dentry_lru_del %p %p '%pd'\n", di, dn, dn);
mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
spin_lock(&mdsc->dentry_lru_lock);
list_del_init(&di->lru);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d7e0da8366e6..ce74b394b49d 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -211,7 +211,7 @@ int ceph_open(struct inode *inode, struct file *file)
req->r_num_caps = 1;
if (flags & O_CREAT)
- parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
+ parent_inode = ceph_get_dentry_parent_inode(file->f_path.dentry);
err = ceph_mdsc_do_request(mdsc, parent_inode, req);
iput(parent_inode);
if (!err)
@@ -238,8 +238,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
struct ceph_acls_info acls = {};
int err;
- dout("atomic_open %p dentry %p '%.*s' %s flags %d mode 0%o\n",
- dir, dentry, dentry->d_name.len, dentry->d_name.name,
+ dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o\n",
+ dir, dentry, dentry,
d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode);
if (dentry->d_name.len > NAME_MAX)
@@ -333,6 +333,11 @@ int ceph_release(struct inode *inode, struct file *file)
return 0;
}
+enum {
+ CHECK_EOF = 1,
+ READ_INLINE = 2,
+};
+
/*
* Read a range of bytes striped over one or more objects. Iterate over
* objects we stripe over. (That's not atomic, but good enough for now.)
@@ -412,7 +417,7 @@ more:
ret = read;
/* did we bounce off eof? */
if (pos + left > inode->i_size)
- *checkeof = 1;
+ *checkeof = CHECK_EOF;
}
dout("striped_read returns %d\n", ret);
@@ -598,7 +603,7 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
snapc = ci->i_snap_realm->cached_context;
vino = ceph_vino(inode);
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
- vino, pos, &len,
+ vino, pos, &len, 0,
2,/*include a 'startsync' command*/
CEPH_OSD_OP_WRITE, flags, snapc,
ci->i_truncate_seq,
@@ -609,6 +614,8 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
break;
}
+ osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
+
n = iov_iter_get_pages_alloc(from, &pages, len, &start);
if (unlikely(n < 0)) {
ret = n;
@@ -713,7 +720,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
snapc = ci->i_snap_realm->cached_context;
vino = ceph_vino(inode);
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
- vino, pos, &len, 1,
+ vino, pos, &len, 0, 1,
CEPH_OSD_OP_WRITE, flags, snapc,
ci->i_truncate_seq,
ci->i_truncate_size,
@@ -803,9 +810,10 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
size_t len = iocb->ki_nbytes;
struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
+ struct page *pinned_page = NULL;
ssize_t ret;
int want, got = 0;
- int checkeof = 0, read = 0;
+ int retry_op = 0, read = 0;
again:
dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
@@ -815,7 +823,7 @@ again:
want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_CACHE;
- ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
+ ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
if (ret < 0)
return ret;
@@ -827,8 +835,12 @@ again:
inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
ceph_cap_string(got));
- /* hmm, this isn't really async... */
- ret = ceph_sync_read(iocb, to, &checkeof);
+ if (ci->i_inline_version == CEPH_INLINE_NONE) {
+ /* hmm, this isn't really async... */
+ ret = ceph_sync_read(iocb, to, &retry_op);
+ } else {
+ retry_op = READ_INLINE;
+ }
} else {
dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
@@ -838,13 +850,55 @@ again:
}
dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
+ if (pinned_page) {
+ page_cache_release(pinned_page);
+ pinned_page = NULL;
+ }
ceph_put_cap_refs(ci, got);
+ if (retry_op && ret >= 0) {
+ int statret;
+ struct page *page = NULL;
+ loff_t i_size;
+ if (retry_op == READ_INLINE) {
+ page = __page_cache_alloc(GFP_NOFS);
+ if (!page)
+ return -ENOMEM;
+ }
- if (checkeof && ret >= 0) {
- int statret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
+ statret = __ceph_do_getattr(inode, page,
+ CEPH_STAT_CAP_INLINE_DATA, !!page);
+ if (statret < 0) {
+ __free_page(page);
+ if (statret == -ENODATA) {
+ BUG_ON(retry_op != READ_INLINE);
+ goto again;
+ }
+ return statret;
+ }
+
+ i_size = i_size_read(inode);
+ if (retry_op == READ_INLINE) {
+ /* does not support inline data > PAGE_SIZE */
+ if (i_size > PAGE_CACHE_SIZE) {
+ ret = -EIO;
+ } else if (iocb->ki_pos < i_size) {
+ loff_t end = min_t(loff_t, i_size,
+ iocb->ki_pos + len);
+ if (statret < end)
+ zero_user_segment(page, statret, end);
+ ret = copy_page_to_iter(page,
+ iocb->ki_pos & ~PAGE_MASK,
+ end - iocb->ki_pos, to);
+ iocb->ki_pos += ret;
+ } else {
+ ret = 0;
+ }
+ __free_pages(page, 0);
+ return ret;
+ }
/* hit EOF or hole? */
- if (statret == 0 && iocb->ki_pos < inode->i_size &&
+ if (retry_op == CHECK_EOF && iocb->ki_pos < i_size &&
ret < len) {
dout("sync_read hit hole, ppos %lld < size %lld"
", reading more\n", iocb->ki_pos,
@@ -852,7 +906,7 @@ again:
read += ret;
len -= ret;
- checkeof = 0;
+ retry_op = 0;
goto again;
}
}
@@ -909,6 +963,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (err)
goto out;
+ if (ci->i_inline_version != CEPH_INLINE_NONE) {
+ err = ceph_uninline_data(file, NULL);
+ if (err < 0)
+ goto out;
+ }
+
retry_snap:
if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) {
err = -ENOSPC;
@@ -922,7 +982,8 @@ retry_snap:
else
want = CEPH_CAP_FILE_BUFFER;
got = 0;
- err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos + count);
+ err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, pos + count,
+ &got, NULL);
if (err < 0)
goto out;
@@ -969,6 +1030,7 @@ retry_snap:
if (written >= 0) {
int dirty;
spin_lock(&ci->i_ceph_lock);
+ ci->i_inline_version = CEPH_INLINE_NONE;
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
spin_unlock(&ci->i_ceph_lock);
if (dirty)
@@ -1111,7 +1173,7 @@ static int ceph_zero_partial_object(struct inode *inode,
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
ceph_vino(inode),
offset, length,
- 1, op,
+ 0, 1, op,
CEPH_OSD_FLAG_WRITE |
CEPH_OSD_FLAG_ONDISK,
NULL, 0, 0, false);
@@ -1214,6 +1276,12 @@ static long ceph_fallocate(struct file *file, int mode,
goto unlock;
}
+ if (ci->i_inline_version != CEPH_INLINE_NONE) {
+ ret = ceph_uninline_data(file, NULL);
+ if (ret < 0)
+ goto unlock;
+ }
+
size = i_size_read(inode);
if (!(mode & FALLOC_FL_KEEP_SIZE))
endoff = offset + length;
@@ -1223,7 +1291,7 @@ static long ceph_fallocate(struct file *file, int mode,
else
want = CEPH_CAP_FILE_BUFFER;
- ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
+ ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, endoff, &got, NULL);
if (ret < 0)
goto unlock;
@@ -1240,6 +1308,7 @@ static long ceph_fallocate(struct file *file, int mode,
if (!ret) {
spin_lock(&ci->i_ceph_lock);
+ ci->i_inline_version = CEPH_INLINE_NONE;
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
spin_unlock(&ci->i_ceph_lock);
if (dirty)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 7b6139004401..f61a74115beb 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -387,8 +387,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
spin_lock_init(&ci->i_ceph_lock);
ci->i_version = 0;
+ ci->i_inline_version = 0;
ci->i_time_warp_seq = 0;
ci->i_ceph_flags = 0;
+ ci->i_ordered_count = 0;
atomic_set(&ci->i_release_count, 1);
atomic_set(&ci->i_complete_count, 0);
ci->i_symlink = NULL;
@@ -657,7 +659,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
* Populate an inode based on info from mds. May be called on new or
* existing inodes.
*/
-static int fill_inode(struct inode *inode,
+static int fill_inode(struct inode *inode, struct page *locked_page,
struct ceph_mds_reply_info_in *iinfo,
struct ceph_mds_reply_dirfrag *dirinfo,
struct ceph_mds_session *session,
@@ -675,6 +677,7 @@ static int fill_inode(struct inode *inode,
bool wake = false;
bool queue_trunc = false;
bool new_version = false;
+ bool fill_inline = false;
dout("fill_inode %p ino %llx.%llx v %llu had %llu\n",
inode, ceph_vinop(inode), le64_to_cpu(info->version),
@@ -845,7 +848,8 @@ static int fill_inode(struct inode *inode,
(issued & CEPH_CAP_FILE_EXCL) == 0 &&
!__ceph_dir_is_complete(ci)) {
dout(" marking %p complete (empty)\n", inode);
- __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
+ __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count),
+ ci->i_ordered_count);
}
/* were we issued a capability? */
@@ -873,8 +877,23 @@ static int fill_inode(struct inode *inode,
ceph_vinop(inode));
__ceph_get_fmode(ci, cap_fmode);
}
+
+ if (iinfo->inline_version > 0 &&
+ iinfo->inline_version >= ci->i_inline_version) {
+ int cache_caps = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
+ ci->i_inline_version = iinfo->inline_version;
+ if (ci->i_inline_version != CEPH_INLINE_NONE &&
+ (locked_page ||
+ (le32_to_cpu(info->cap.caps) & cache_caps)))
+ fill_inline = true;
+ }
+
spin_unlock(&ci->i_ceph_lock);
+ if (fill_inline)
+ ceph_fill_inline_data(inode, locked_page,
+ iinfo->inline_data, iinfo->inline_len);
+
if (wake)
wake_up_all(&ci->i_cap_wq);
@@ -967,7 +986,7 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
/* dn must be unhashed */
if (!d_unhashed(dn))
d_drop(dn);
- realdn = d_materialise_unique(dn, in);
+ realdn = d_splice_alias(in, dn);
if (IS_ERR(realdn)) {
pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
PTR_ERR(realdn), dn, in, ceph_vinop(in));
@@ -1062,7 +1081,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
struct inode *dir = req->r_locked_dir;
if (dir) {
- err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag,
+ err = fill_inode(dir, NULL,
+ &rinfo->diri, rinfo->dirfrag,
session, req->r_request_started, -1,
&req->r_caps_reservation);
if (err < 0)
@@ -1132,7 +1152,7 @@ retry_lookup:
}
req->r_target_inode = in;
- err = fill_inode(in, &rinfo->targeti, NULL,
+ err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL,
session, req->r_request_started,
(!req->r_aborted && rinfo->head->result == 0) ?
req->r_fmode : -1,
@@ -1186,28 +1206,26 @@ retry_lookup:
struct inode *olddir = req->r_old_dentry_dir;
BUG_ON(!olddir);
- dout(" src %p '%.*s' dst %p '%.*s'\n",
+ dout(" src %p '%pd' dst %p '%pd'\n",
+ req->r_old_dentry,
req->r_old_dentry,
- req->r_old_dentry->d_name.len,
- req->r_old_dentry->d_name.name,
- dn, dn->d_name.len, dn->d_name.name);
+ dn, dn);
dout("fill_trace doing d_move %p -> %p\n",
req->r_old_dentry, dn);
d_move(req->r_old_dentry, dn);
- dout(" src %p '%.*s' dst %p '%.*s'\n",
+ dout(" src %p '%pd' dst %p '%pd'\n",
req->r_old_dentry,
- req->r_old_dentry->d_name.len,
- req->r_old_dentry->d_name.name,
- dn, dn->d_name.len, dn->d_name.name);
+ req->r_old_dentry,
+ dn, dn);
/* ensure target dentry is invalidated, despite
rehashing bug in vfs_rename_dir */
ceph_invalidate_dentry_lease(dn);
/* d_move screws up sibling dentries' offsets */
- ceph_dir_clear_complete(dir);
- ceph_dir_clear_complete(olddir);
+ ceph_dir_clear_ordered(dir);
+ ceph_dir_clear_ordered(olddir);
dout("dn %p gets new offset %lld\n", req->r_old_dentry,
ceph_dentry(req->r_old_dentry)->offset);
@@ -1219,6 +1237,7 @@ retry_lookup:
if (!rinfo->head->is_target) {
dout("fill_trace null dentry\n");
if (dn->d_inode) {
+ ceph_dir_clear_ordered(dir);
dout("d_delete %p\n", dn);
d_delete(dn);
} else {
@@ -1235,7 +1254,7 @@ retry_lookup:
/* attach proper inode */
if (!dn->d_inode) {
- ceph_dir_clear_complete(dir);
+ ceph_dir_clear_ordered(dir);
ihold(in);
dn = splice_dentry(dn, in, &have_lease);
if (IS_ERR(dn)) {
@@ -1265,7 +1284,7 @@ retry_lookup:
BUG_ON(!dir);
BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
dout(" linking snapped dir %p to dn %p\n", in, dn);
- ceph_dir_clear_complete(dir);
+ ceph_dir_clear_ordered(dir);
ihold(in);
dn = splice_dentry(dn, in, NULL);
if (IS_ERR(dn)) {
@@ -1302,7 +1321,7 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
dout("new_inode badness got %d\n", err);
continue;
}
- rc = fill_inode(in, &rinfo->dir_in[i], NULL, session,
+ rc = fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session,
req->r_request_started, -1,
&req->r_caps_reservation);
if (rc < 0) {
@@ -1399,7 +1418,7 @@ retry_lookup:
/* reorder parent's d_subdirs */
spin_lock(&parent->d_lock);
spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
- list_move(&dn->d_u.d_child, &parent->d_subdirs);
+ list_move(&dn->d_child, &parent->d_subdirs);
spin_unlock(&dn->d_lock);
spin_unlock(&parent->d_lock);
}
@@ -1418,7 +1437,7 @@ retry_lookup:
}
}
- if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
+ if (fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session,
req->r_request_started, -1,
&req->r_caps_reservation) < 0) {
pr_err("fill_inode badness on %p\n", in);
@@ -1901,7 +1920,8 @@ out_put:
* Verify that we have a lease on the given mask. If not,
* do a getattr against an mds.
*/
-int ceph_do_getattr(struct inode *inode, int mask, bool force)
+int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
+ int mask, bool force)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -1913,7 +1933,8 @@ int ceph_do_getattr(struct inode *inode, int mask, bool force)
return 0;
}
- dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode);
+ dout("do_getattr inode %p mask %s mode 0%o\n",
+ inode, ceph_cap_string(mask), inode->i_mode);
if (!force && ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
return 0;
@@ -1924,7 +1945,19 @@ int ceph_do_getattr(struct inode *inode, int mask, bool force)
ihold(inode);
req->r_num_caps = 1;
req->r_args.getattr.mask = cpu_to_le32(mask);
+ req->r_locked_page = locked_page;
err = ceph_mdsc_do_request(mdsc, NULL, req);
+ if (locked_page && err == 0) {
+ u64 inline_version = req->r_reply_info.targeti.inline_version;
+ if (inline_version == 0) {
+ /* the reply is supposed to contain inline data */
+ err = -EINVAL;
+ } else if (inline_version == CEPH_INLINE_NONE) {
+ err = -ENODATA;
+ } else {
+ err = req->r_reply_info.targeti.inline_len;
+ }
+ }
ceph_mdsc_put_request(req);
dout("do_getattr result=%d\n", err);
return err;
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index fbc39c47bacd..c35c5c614e38 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -9,6 +9,8 @@
#include <linux/ceph/pagelist.h>
static u64 lock_secret;
+static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req);
static inline u64 secure_addr(void *addr)
{
@@ -40,6 +42,9 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
u64 length = 0;
u64 owner;
+ if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
+ wait = 0;
+
req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -68,6 +73,9 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
req->r_args.filelock_change.length = cpu_to_le64(length);
req->r_args.filelock_change.wait = wait;
+ if (wait)
+ req->r_wait_for_completion = ceph_lock_wait_for_completion;
+
err = ceph_mdsc_do_request(mdsc, inode, req);
if (operation == CEPH_MDS_OP_GETFILELOCK) {
@@ -96,6 +104,52 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
return err;
}
+static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req)
+{
+ struct ceph_mds_request *intr_req;
+ struct inode *inode = req->r_inode;
+ int err, lock_type;
+
+ BUG_ON(req->r_op != CEPH_MDS_OP_SETFILELOCK);
+ if (req->r_args.filelock_change.rule == CEPH_LOCK_FCNTL)
+ lock_type = CEPH_LOCK_FCNTL_INTR;
+ else if (req->r_args.filelock_change.rule == CEPH_LOCK_FLOCK)
+ lock_type = CEPH_LOCK_FLOCK_INTR;
+ else
+ BUG_ON(1);
+ BUG_ON(req->r_args.filelock_change.type == CEPH_LOCK_UNLOCK);
+
+ err = wait_for_completion_interruptible(&req->r_completion);
+ if (!err)
+ return 0;
+
+ dout("ceph_lock_wait_for_completion: request %llu was interrupted\n",
+ req->r_tid);
+
+ intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK,
+ USE_AUTH_MDS);
+ if (IS_ERR(intr_req))
+ return PTR_ERR(intr_req);
+
+ intr_req->r_inode = inode;
+ ihold(inode);
+ intr_req->r_num_caps = 1;
+
+ intr_req->r_args.filelock_change = req->r_args.filelock_change;
+ intr_req->r_args.filelock_change.rule = lock_type;
+ intr_req->r_args.filelock_change.type = CEPH_LOCK_UNLOCK;
+
+ err = ceph_mdsc_do_request(mdsc, inode, intr_req);
+ ceph_mdsc_put_request(intr_req);
+
+ if (err && err != -ERESTARTSYS)
+ return err;
+
+ wait_for_completion(&req->r_completion);
+ return 0;
+}
+
/**
* Attempt to set an fcntl lock.
* For now, this just goes away to the server. Later it may be more awesome.
@@ -143,11 +197,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
err);
}
}
-
- } else if (err == -ERESTARTSYS) {
- dout("undoing lock\n");
- ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
- CEPH_LOCK_UNLOCK, 0, fl);
}
return err;
}
@@ -186,11 +235,6 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
file, CEPH_LOCK_UNLOCK, 0, fl);
dout("got %d on flock_lock_file_wait, undid lock", err);
}
- } else if (err == -ERESTARTSYS) {
- dout("undoing lock\n");
- ceph_lock_message(CEPH_LOCK_FLOCK,
- CEPH_MDS_OP_SETFILELOCK,
- file, CEPH_LOCK_UNLOCK, 0, fl);
}
return err;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a92d3f5c6c12..d2171f4a6980 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -89,6 +89,16 @@ static int parse_reply_info_in(void **p, void *end,
ceph_decode_need(p, end, info->xattr_len, bad);
info->xattr_data = *p;
*p += info->xattr_len;
+
+ if (features & CEPH_FEATURE_MDS_INLINE_DATA) {
+ ceph_decode_64_safe(p, end, info->inline_version, bad);
+ ceph_decode_32_safe(p, end, info->inline_len, bad);
+ ceph_decode_need(p, end, info->inline_len, bad);
+ info->inline_data = *p;
+ *p += info->inline_len;
+ } else
+ info->inline_version = CEPH_INLINE_NONE;
+
return 0;
bad:
return err;
@@ -524,8 +534,7 @@ void ceph_mdsc_release_request(struct kref *kref)
}
if (req->r_locked_dir)
ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
- if (req->r_target_inode)
- iput(req->r_target_inode);
+ iput(req->r_target_inode);
if (req->r_dentry)
dput(req->r_dentry);
if (req->r_old_dentry)
@@ -861,8 +870,11 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
/*
* Serialize client metadata into waiting buffer space, using
* the format that userspace expects for map<string, string>
+ *
+ * ClientSession messages with metadata are v2
*/
- msg->hdr.version = 2; /* ClientSession messages with metadata are v2 */
+ msg->hdr.version = cpu_to_le16(2);
+ msg->hdr.compat_version = cpu_to_le16(1);
/* The write pointer, following the session_head structure */
p = msg->front.iov_base + sizeof(*h);
@@ -1066,8 +1078,7 @@ out:
session->s_cap_iterator = NULL;
spin_unlock(&session->s_cap_lock);
- if (last_inode)
- iput(last_inode);
+ iput(last_inode);
if (old_cap)
ceph_put_cap(session->s_mdsc, old_cap);
@@ -1874,7 +1885,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
goto out_free2;
}
- msg->hdr.version = 2;
+ msg->hdr.version = cpu_to_le16(2);
msg->hdr.tid = cpu_to_le64(req->r_tid);
head = msg->front.iov_base;
@@ -2208,6 +2219,8 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
&req->r_completion, req->r_timeout);
if (err == 0)
err = -EIO;
+ } else if (req->r_wait_for_completion) {
+ err = req->r_wait_for_completion(mdsc, req);
} else {
err = wait_for_completion_killable(&req->r_completion);
}
@@ -3744,6 +3757,20 @@ static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con,
return msg;
}
+static int sign_message(struct ceph_connection *con, struct ceph_msg *msg)
+{
+ struct ceph_mds_session *s = con->private;
+ struct ceph_auth_handshake *auth = &s->s_auth;
+ return ceph_auth_sign_message(auth, msg);
+}
+
+static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg)
+{
+ struct ceph_mds_session *s = con->private;
+ struct ceph_auth_handshake *auth = &s->s_auth;
+ return ceph_auth_check_message_signature(auth, msg);
+}
+
static const struct ceph_connection_operations mds_con_ops = {
.get = con_get,
.put = con_put,
@@ -3753,6 +3780,8 @@ static const struct ceph_connection_operations mds_con_ops = {
.invalidate_authorizer = invalidate_authorizer,
.peer_reset = peer_reset,
.alloc_msg = mds_alloc_msg,
+ .sign_message = sign_message,
+ .check_message_signature = check_message_signature,
};
/* eof */
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 3288359353e9..e2817d00f7d9 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -41,6 +41,9 @@ struct ceph_mds_reply_info_in {
char *symlink;
u32 xattr_len;
char *xattr_data;
+ u64 inline_version;
+ u32 inline_len;
+ char *inline_data;
};
/*
@@ -166,6 +169,11 @@ struct ceph_mds_client;
*/
typedef void (*ceph_mds_request_callback_t) (struct ceph_mds_client *mdsc,
struct ceph_mds_request *req);
+/*
+ * wait for request completion callback
+ */
+typedef int (*ceph_mds_request_wait_callback_t) (struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req);
/*
* an in-flight mds request
@@ -215,6 +223,7 @@ struct ceph_mds_request {
int r_request_release_offset;
struct ceph_msg *r_reply;
struct ceph_mds_reply_info_parsed r_reply_info;
+ struct page *r_locked_page;
int r_err;
bool r_aborted;
@@ -239,6 +248,7 @@ struct ceph_mds_request {
struct completion r_completion;
struct completion r_safe_completion;
ceph_mds_request_callback_t r_callback;
+ ceph_mds_request_wait_callback_t r_wait_for_completion;
struct list_head r_unsafe_item; /* per-session unsafe list item */
bool r_got_unsafe, r_got_safe, r_got_result;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index f01645a27752..ce35fbd4ba5d 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -288,6 +288,9 @@ static int cmpu64_rev(const void *a, const void *b)
return 0;
}
+
+static struct ceph_snap_context *empty_snapc;
+
/*
* build the snap context for a given realm.
*/
@@ -328,6 +331,12 @@ static int build_snap_context(struct ceph_snap_realm *realm)
return 0;
}
+ if (num == 0 && realm->seq == empty_snapc->seq) {
+ ceph_get_snap_context(empty_snapc);
+ snapc = empty_snapc;
+ goto done;
+ }
+
/* alloc new snap context */
err = -ENOMEM;
if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64))
@@ -365,8 +374,8 @@ static int build_snap_context(struct ceph_snap_realm *realm)
realm->ino, realm, snapc, snapc->seq,
(unsigned int) snapc->num_snaps);
- if (realm->cached_context)
- ceph_put_snap_context(realm->cached_context);
+done:
+ ceph_put_snap_context(realm->cached_context);
realm->cached_context = snapc;
return 0;
@@ -466,6 +475,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
cap_snap. lucky us. */
dout("queue_cap_snap %p already pending\n", inode);
kfree(capsnap);
+ } else if (ci->i_snap_realm->cached_context == empty_snapc) {
+ dout("queue_cap_snap %p empty snapc\n", inode);
+ kfree(capsnap);
} else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) {
struct ceph_snap_context *snapc = ci->i_head_snapc;
@@ -504,6 +516,8 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
capsnap->xattr_version = 0;
}
+ capsnap->inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
+
/* dirty page count moved from _head to this cap_snap;
all subsequent writes page dirties occur _after_ this
snapshot. */
@@ -590,15 +604,13 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
if (!inode)
continue;
spin_unlock(&realm->inodes_with_caps_lock);
- if (lastinode)
- iput(lastinode);
+ iput(lastinode);
lastinode = inode;
ceph_queue_cap_snap(ci);
spin_lock(&realm->inodes_with_caps_lock);
}
spin_unlock(&realm->inodes_with_caps_lock);
- if (lastinode)
- iput(lastinode);
+ iput(lastinode);
list_for_each_entry(child, &realm->children, child_item) {
dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n",
@@ -928,5 +940,16 @@ out:
return;
}
+int __init ceph_snap_init(void)
+{
+ empty_snapc = ceph_create_snap_context(0, GFP_NOFS);
+ if (!empty_snapc)
+ return -ENOMEM;
+ empty_snapc->seq = 1;
+ return 0;
+}
-
+void ceph_snap_exit(void)
+{
+ ceph_put_snap_context(empty_snapc);
+}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index f6e12377335c..50f06cddc94b 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -515,7 +515,8 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
struct ceph_fs_client *fsc;
const u64 supported_features =
CEPH_FEATURE_FLOCK |
- CEPH_FEATURE_DIRLAYOUTHASH;
+ CEPH_FEATURE_DIRLAYOUTHASH |
+ CEPH_FEATURE_MDS_INLINE_DATA;
const u64 required_features = 0;
int page_count;
size_t size;
@@ -1017,9 +1018,6 @@ static struct file_system_type ceph_fs_type = {
};
MODULE_ALIAS_FS("ceph");
-#define _STRINGIFY(x) #x
-#define STRINGIFY(x) _STRINGIFY(x)
-
static int __init init_ceph(void)
{
int ret = init_caches();
@@ -1028,15 +1026,20 @@ static int __init init_ceph(void)
ceph_flock_init();
ceph_xattr_init();
+ ret = ceph_snap_init();
+ if (ret)
+ goto out_xattr;
ret = register_filesystem(&ceph_fs_type);
if (ret)
- goto out_icache;
+ goto out_snap;
pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
return 0;
-out_icache:
+out_snap:
+ ceph_snap_exit();
+out_xattr:
ceph_xattr_exit();
destroy_caches();
out:
@@ -1047,6 +1050,7 @@ static void __exit exit_ceph(void)
{
dout("exit_ceph\n");
unregister_filesystem(&ceph_fs_type);
+ ceph_snap_exit();
ceph_xattr_exit();
destroy_caches();
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b82f507979b8..e1aa32d0759d 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -161,6 +161,7 @@ struct ceph_cap_snap {
u64 time_warp_seq;
int writing; /* a sync write is still in progress */
int dirty_pages; /* dirty pages awaiting writeback */
+ bool inline_data;
};
static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
@@ -253,9 +254,11 @@ struct ceph_inode_info {
spinlock_t i_ceph_lock;
u64 i_version;
+ u64 i_inline_version;
u32 i_time_warp_seq;
unsigned i_ceph_flags;
+ int i_ordered_count;
atomic_t i_release_count;
atomic_t i_complete_count;
@@ -434,14 +437,19 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
/*
* Ceph inode.
*/
-#define CEPH_I_NODELAY 4 /* do not delay cap release */
-#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */
-#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */
+#define CEPH_I_DIR_ORDERED 1 /* dentries in dir are ordered */
+#define CEPH_I_NODELAY 4 /* do not delay cap release */
+#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */
+#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */
static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
- int release_count)
+ int release_count, int ordered_count)
{
atomic_set(&ci->i_complete_count, release_count);
+ if (ci->i_ordered_count == ordered_count)
+ ci->i_ceph_flags |= CEPH_I_DIR_ORDERED;
+ else
+ ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED;
}
static inline void __ceph_dir_clear_complete(struct ceph_inode_info *ci)
@@ -455,16 +463,35 @@ static inline bool __ceph_dir_is_complete(struct ceph_inode_info *ci)
atomic_read(&ci->i_release_count);
}
+static inline bool __ceph_dir_is_complete_ordered(struct ceph_inode_info *ci)
+{
+ return __ceph_dir_is_complete(ci) &&
+ (ci->i_ceph_flags & CEPH_I_DIR_ORDERED);
+}
+
static inline void ceph_dir_clear_complete(struct inode *inode)
{
__ceph_dir_clear_complete(ceph_inode(inode));
}
-static inline bool ceph_dir_is_complete(struct inode *inode)
+static inline void ceph_dir_clear_ordered(struct inode *inode)
{
- return __ceph_dir_is_complete(ceph_inode(inode));
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ spin_lock(&ci->i_ceph_lock);
+ ci->i_ordered_count++;
+ ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED;
+ spin_unlock(&ci->i_ceph_lock);
}
+static inline bool ceph_dir_is_complete_ordered(struct inode *inode)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ bool ret;
+ spin_lock(&ci->i_ceph_lock);
+ ret = __ceph_dir_is_complete_ordered(ci);
+ spin_unlock(&ci->i_ceph_lock);
+ return ret;
+}
/* find a specific frag @f */
extern struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci,
@@ -580,6 +607,7 @@ struct ceph_file_info {
char *last_name; /* last entry in previous chunk */
struct dentry *dentry; /* next dentry (for dcache readdir) */
int dir_release_count;
+ int dir_ordered_count;
/* used for -o dirstat read() on directory thing */
char *dir_info;
@@ -673,6 +701,8 @@ extern void ceph_queue_cap_snap(struct ceph_inode_info *ci);
extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
struct ceph_cap_snap *capsnap);
extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
+extern int ceph_snap_init(void);
+extern void ceph_snap_exit(void);
/*
* a cap_snap is "pending" if it is still awaiting an in-progress
@@ -715,7 +745,12 @@ extern void ceph_queue_vmtruncate(struct inode *inode);
extern void ceph_queue_invalidate(struct inode *inode);
extern void ceph_queue_writeback(struct inode *inode);
-extern int ceph_do_getattr(struct inode *inode, int mask, bool force);
+extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
+ int mask, bool force);
+static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
+{
+ return __ceph_do_getattr(inode, NULL, mask, force);
+}
extern int ceph_permission(struct inode *inode, int mask);
extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -830,7 +865,7 @@ extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
int mds, int drop, int unless);
extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
- int *got, loff_t endoff);
+ loff_t endoff, int *got, struct page **pinned_page);
/* for counting open files by mode */
static inline void __ceph_get_fmode(struct ceph_inode_info *ci, int mode)
@@ -852,7 +887,9 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned flags, umode_t mode,
int *opened);
extern int ceph_release(struct inode *inode, struct file *filp);
-
+extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
+ char *data, size_t len);
+int ceph_uninline_data(struct file *filp, struct page *locked_page);
/* dir.c */
extern const struct file_operations ceph_dir_fops;
extern const struct inode_operations ceph_dir_iops;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 678b0d2bbbc4..5a492caf34cb 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -854,7 +854,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
struct ceph_pagelist *pagelist = NULL;
int err;
- if (value) {
+ if (size > 0) {
/* copy value into pagelist */
pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
if (!pagelist)
@@ -864,7 +864,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
err = ceph_pagelist_append(pagelist, value, size);
if (err)
goto out;
- } else {
+ } else if (!value) {
flags |= CEPH_XATTR_REMOVE;
}
@@ -1001,6 +1001,9 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
return generic_setxattr(dentry, name, value, size, flags);
+ if (size == 0)
+ value = ""; /* empty EA, do not remove */
+
return __ceph_setxattr(dentry, name, value, size, flags);
}
diff --git a/fs/char_dev.c b/fs/char_dev.c
index f77f7702fabe..67b2007f10fe 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -117,7 +117,6 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
goto out;
}
major = i;
- ret = major;
}
cd->major = major;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 44ec72684df5..9c56ef776407 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -34,27 +34,9 @@
void
cifs_dump_mem(char *label, void *data, int length)
{
- int i, j;
- int *intptr = data;
- char *charptr = data;
- char buf[10], line[80];
-
- printk(KERN_DEBUG "%s: dump of %d bytes of data at 0x%p\n",
- label, length, data);
- for (i = 0; i < length; i += 16) {
- line[0] = 0;
- for (j = 0; (j < 4) && (i + j * 4 < length); j++) {
- sprintf(buf, " %08x", intptr[i / 4 + j]);
- strcat(line, buf);
- }
- buf[0] = ' ';
- buf[2] = 0;
- for (j = 0; (j < 16) && (i + j < length); j++) {
- buf[1] = isprint(charptr[i + j]) ? charptr[i + j] : '.';
- strcat(line, buf);
- }
- printk(KERN_DEBUG "%s\n", line);
- }
+ pr_debug("%s: dump of %d bytes of data at 0x%p\n", label, length, data);
+ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 16, 4,
+ data, length, true);
}
#ifdef CONFIG_CIFS_DEBUG
@@ -68,7 +50,7 @@ void cifs_vfs_err(const char *fmt, ...)
vaf.fmt = fmt;
vaf.va = &args;
- printk(KERN_ERR "CIFS VFS: %pV", &vaf);
+ pr_err("CIFS VFS: %pV", &vaf);
va_end(args);
}
@@ -274,6 +256,7 @@ static ssize_t cifs_stats_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos)
{
char c;
+ bool bv;
int rc;
struct list_head *tmp1, *tmp2, *tmp3;
struct TCP_Server_Info *server;
@@ -284,7 +267,7 @@ static ssize_t cifs_stats_proc_write(struct file *file,
if (rc)
return rc;
- if (c == '1' || c == 'y' || c == 'Y' || c == '0') {
+ if (strtobool(&c, &bv) == 0) {
#ifdef CONFIG_CIFS_STATS2
atomic_set(&totBufAllocCount, 0);
atomic_set(&totSmBufAllocCount, 0);
@@ -451,15 +434,14 @@ static ssize_t cifsFYI_proc_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
char c;
+ bool bv;
int rc;
rc = get_user(c, buffer);
if (rc)
return rc;
- if (c == '0' || c == 'n' || c == 'N')
- cifsFYI = 0;
- else if (c == '1' || c == 'y' || c == 'Y')
- cifsFYI = 1;
+ if (strtobool(&c, &bv) == 0)
+ cifsFYI = bv;
else if ((c > '1') && (c <= '9'))
cifsFYI = (int) (c - '0'); /* see cifs_debug.h for meanings */
@@ -490,15 +472,18 @@ static ssize_t cifs_linux_ext_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos)
{
char c;
+ bool bv;
int rc;
rc = get_user(c, buffer);
if (rc)
return rc;
- if (c == '0' || c == 'n' || c == 'N')
- linuxExtEnabled = 0;
- else if (c == '1' || c == 'y' || c == 'Y')
- linuxExtEnabled = 1;
+
+ rc = strtobool(&c, &bv);
+ if (rc)
+ return rc;
+
+ linuxExtEnabled = bv;
return count;
}
@@ -527,15 +512,18 @@ static ssize_t cifs_lookup_cache_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos)
{
char c;
+ bool bv;
int rc;
rc = get_user(c, buffer);
if (rc)
return rc;
- if (c == '0' || c == 'n' || c == 'N')
- lookupCacheEnabled = 0;
- else if (c == '1' || c == 'y' || c == 'Y')
- lookupCacheEnabled = 1;
+
+ rc = strtobool(&c, &bv);
+ if (rc)
+ return rc;
+
+ lookupCacheEnabled = bv;
return count;
}
@@ -564,15 +552,18 @@ static ssize_t traceSMB_proc_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
char c;
+ bool bv;
int rc;
rc = get_user(c, buffer);
if (rc)
return rc;
- if (c == '0' || c == 'n' || c == 'N')
- traceSMB = 0;
- else if (c == '1' || c == 'y' || c == 'Y')
- traceSMB = 1;
+
+ rc = strtobool(&c, &bv);
+ if (rc)
+ return rc;
+
+ traceSMB = bv;
return count;
}
@@ -630,6 +621,7 @@ static ssize_t cifs_security_flags_proc_write(struct file *file,
unsigned int flags;
char flags_string[12];
char c;
+ bool bv;
if ((count < 1) || (count > 11))
return -EINVAL;
@@ -642,11 +634,8 @@ static ssize_t cifs_security_flags_proc_write(struct file *file,
if (count < 3) {
/* single char or single char followed by null */
c = flags_string[0];
- if (c == '0' || c == 'n' || c == 'N') {
- global_secflags = CIFSSEC_DEF; /* default */
- return count;
- } else if (c == '1' || c == 'y' || c == 'Y') {
- global_secflags = CIFSSEC_MAX;
+ if (strtobool(&c, &bv) == 0) {
+ global_secflags = bv ? CIFSSEC_MAX : CIFSSEC_DEF;
return count;
} else if (!isdigit(c)) {
cifs_dbg(VFS, "Invalid SecurityFlags: %s\n",
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index c99b40fb609b..f40fbaca1b2a 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -53,13 +53,12 @@ __printf(1, 2) void cifs_vfs_err(const char *fmt, ...);
do { \
if (type == FYI) { \
if (cifsFYI & CIFS_INFO) { \
- printk(KERN_DEBUG "%s: " fmt, \
- __FILE__, ##__VA_ARGS__); \
+ pr_debug("%s: " fmt, __FILE__, ##__VA_ARGS__); \
} \
} else if (type == VFS) { \
cifs_vfs_err(fmt, ##__VA_ARGS__); \
} else if (type == NOISY && type != 0) { \
- printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
+ pr_debug(fmt, ##__VA_ARGS__); \
} \
} while (0)
@@ -71,7 +70,7 @@ do { \
#define cifs_dbg(type, fmt, ...) \
do { \
if (0) \
- printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
+ pr_debug(fmt, ##__VA_ARGS__); \
} while (0)
#endif
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 6d00c419cbae..1ea780bc6376 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -38,7 +38,7 @@ static const struct cifs_sid sid_everyone = {
1, 1, {0, 0, 0, 0, 0, 1}, {0} };
/* security id for Authenticated Users system group */
static const struct cifs_sid sid_authusers = {
- 1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11)} };
+ 1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11)} };
/* group users */
static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 9d7996e8e793..d72fe37f5420 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -209,8 +209,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len)
{
- struct super_block *sb = file->f_path.dentry->d_sb;
- struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
+ struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
struct TCP_Server_Info *server = tcon->ses->server;
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 002e0c173939..252f5c15806b 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -136,5 +136,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.05"
+#define CIFS_VERSION "2.06"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 02a33e529904..22b289a3b1c4 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -661,16 +661,16 @@ set_credits(struct TCP_Server_Info *server, const int val)
server->ops->set_credits(server, val);
}
-static inline __u64
+static inline __le64
get_next_mid64(struct TCP_Server_Info *server)
{
- return server->ops->get_next_mid(server);
+ return cpu_to_le64(server->ops->get_next_mid(server));
}
static inline __le16
get_next_mid(struct TCP_Server_Info *server)
{
- __u16 mid = get_next_mid64(server);
+ __u16 mid = server->ops->get_next_mid(server);
/*
* The value in the SMB header should be little endian for easy
* on-the-wire decoding.
@@ -1168,6 +1168,12 @@ CIFS_SB(struct super_block *sb)
return sb->s_fs_info;
}
+static inline struct cifs_sb_info *
+CIFS_FILE_SB(struct file *file)
+{
+ return CIFS_SB(file_inode(file)->i_sb);
+}
+
static inline char CIFS_DIR_SEP(const struct cifs_sb_info *cifs_sb)
{
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 61d00a6e398f..fa13d5e79f64 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2477,14 +2477,14 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon,
}
parm_data = (struct cifs_posix_lock *)
((char *)&pSMBr->hdr.Protocol + data_offset);
- if (parm_data->lock_type == __constant_cpu_to_le16(CIFS_UNLCK))
+ if (parm_data->lock_type == cpu_to_le16(CIFS_UNLCK))
pLockData->fl_type = F_UNLCK;
else {
if (parm_data->lock_type ==
- __constant_cpu_to_le16(CIFS_RDLCK))
+ cpu_to_le16(CIFS_RDLCK))
pLockData->fl_type = F_RDLCK;
else if (parm_data->lock_type ==
- __constant_cpu_to_le16(CIFS_WRLCK))
+ cpu_to_le16(CIFS_WRLCK))
pLockData->fl_type = F_WRLCK;
pLockData->fl_start = le64_to_cpu(parm_data->start);
@@ -3276,25 +3276,25 @@ CIFSSMB_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
pSMB->compression_state = cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
pSMB->TotalParameterCount = 0;
- pSMB->TotalDataCount = __constant_cpu_to_le32(2);
+ pSMB->TotalDataCount = cpu_to_le32(2);
pSMB->MaxParameterCount = 0;
pSMB->MaxDataCount = 0;
pSMB->MaxSetupCount = 4;
pSMB->Reserved = 0;
pSMB->ParameterOffset = 0;
- pSMB->DataCount = __constant_cpu_to_le32(2);
+ pSMB->DataCount = cpu_to_le32(2);
pSMB->DataOffset =
cpu_to_le32(offsetof(struct smb_com_transaction_compr_ioctl_req,
compression_state) - 4); /* 84 */
pSMB->SetupCount = 4;
- pSMB->SubCommand = __constant_cpu_to_le16(NT_TRANSACT_IOCTL);
+ pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL);
pSMB->ParameterCount = 0;
- pSMB->FunctionCode = __constant_cpu_to_le32(FSCTL_SET_COMPRESSION);
+ pSMB->FunctionCode = cpu_to_le32(FSCTL_SET_COMPRESSION);
pSMB->IsFsctl = 1; /* FSCTL */
pSMB->IsRootFlag = 0;
pSMB->Fid = fid; /* file handle always le */
/* 3 byte pad, followed by 2 byte compress state */
- pSMB->ByteCount = __constant_cpu_to_le16(5);
+ pSMB->ByteCount = cpu_to_le16(5);
inc_rfc1001_len(pSMB, 5);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3430,10 +3430,10 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
cifs_acl->version = cpu_to_le16(1);
if (acl_type == ACL_TYPE_ACCESS) {
cifs_acl->access_entry_count = cpu_to_le16(count);
- cifs_acl->default_entry_count = __constant_cpu_to_le16(0xFFFF);
+ cifs_acl->default_entry_count = cpu_to_le16(0xFFFF);
} else if (acl_type == ACL_TYPE_DEFAULT) {
cifs_acl->default_entry_count = cpu_to_le16(count);
- cifs_acl->access_entry_count = __constant_cpu_to_le16(0xFFFF);
+ cifs_acl->access_entry_count = cpu_to_le16(0xFFFF);
} else {
cifs_dbg(FYI, "unknown ACL type %d\n", acl_type);
return 0;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 24fa08d261fb..2a772da16b83 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1466,9 +1466,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
vol->seal = 1;
break;
case Opt_noac:
- printk(KERN_WARNING "CIFS: Mount option noac not "
- "supported. Instead set "
- "/proc/fs/cifs/LookupCacheEnabled to 0\n");
+ pr_warn("CIFS: Mount option noac not supported. Instead set /proc/fs/cifs/LookupCacheEnabled to 0\n");
break;
case Opt_fsc:
#ifndef CONFIG_CIFS_FSCACHE
@@ -1598,7 +1596,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
if (strnlen(string, CIFS_MAX_USERNAME_LEN) >
CIFS_MAX_USERNAME_LEN) {
- printk(KERN_WARNING "CIFS: username too long\n");
+ pr_warn("CIFS: username too long\n");
goto cifs_parse_mount_err;
}
vol->username = kstrdup(string, GFP_KERNEL);
@@ -1662,8 +1660,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
temp_len = strlen(value);
vol->password = kzalloc(temp_len+1, GFP_KERNEL);
if (vol->password == NULL) {
- printk(KERN_WARNING "CIFS: no memory "
- "for password\n");
+ pr_warn("CIFS: no memory for password\n");
goto cifs_parse_mount_err;
}
@@ -1687,8 +1684,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
if (!cifs_convert_address(dstaddr, string,
strlen(string))) {
- printk(KERN_ERR "CIFS: bad ip= option (%s).\n",
- string);
+ pr_err("CIFS: bad ip= option (%s).\n", string);
goto cifs_parse_mount_err;
}
got_ip = true;
@@ -1700,15 +1696,13 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
if (strnlen(string, CIFS_MAX_DOMAINNAME_LEN)
== CIFS_MAX_DOMAINNAME_LEN) {
- printk(KERN_WARNING "CIFS: domain name too"
- " long\n");
+ pr_warn("CIFS: domain name too long\n");
goto cifs_parse_mount_err;
}
vol->domainname = kstrdup(string, GFP_KERNEL);
if (!vol->domainname) {
- printk(KERN_WARNING "CIFS: no memory "
- "for domainname\n");
+ pr_warn("CIFS: no memory for domainname\n");
goto cifs_parse_mount_err;
}
cifs_dbg(FYI, "Domain name set\n");
@@ -1721,8 +1715,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
if (!cifs_convert_address(
(struct sockaddr *)&vol->srcaddr,
string, strlen(string))) {
- printk(KERN_WARNING "CIFS: Could not parse"
- " srcaddr: %s\n", string);
+ pr_warn("CIFS: Could not parse srcaddr: %s\n",
+ string);
goto cifs_parse_mount_err;
}
break;
@@ -1732,8 +1726,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
goto out_nomem;
if (strnlen(string, 1024) >= 65) {
- printk(KERN_WARNING "CIFS: iocharset name "
- "too long.\n");
+ pr_warn("CIFS: iocharset name too long.\n");
goto cifs_parse_mount_err;
}
@@ -1741,8 +1734,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
vol->iocharset = kstrdup(string,
GFP_KERNEL);
if (!vol->iocharset) {
- printk(KERN_WARNING "CIFS: no memory"
- "for charset\n");
+ pr_warn("CIFS: no memory for charset\n");
goto cifs_parse_mount_err;
}
}
@@ -1773,9 +1765,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
* set at top of the function
*/
if (i == RFC1001_NAME_LEN && string[i] != 0)
- printk(KERN_WARNING "CIFS: netbiosname"
- " longer than 15 truncated.\n");
-
+ pr_warn("CIFS: netbiosname longer than 15 truncated.\n");
break;
case Opt_servern:
/* servernetbiosname specified override *SMBSERVER */
@@ -1801,8 +1791,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
/* The string has 16th byte zero still from
set at top of the function */
if (i == RFC1001_NAME_LEN && string[i] != 0)
- printk(KERN_WARNING "CIFS: server net"
- "biosname longer than 15 truncated.\n");
+ pr_warn("CIFS: server netbiosname longer than 15 truncated.\n");
break;
case Opt_ver:
string = match_strdup(args);
@@ -1814,8 +1803,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
break;
}
/* For all other value, error */
- printk(KERN_WARNING "CIFS: Invalid version"
- " specified\n");
+ pr_warn("CIFS: Invalid version specified\n");
goto cifs_parse_mount_err;
case Opt_vers:
string = match_strdup(args);
@@ -1856,7 +1844,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
}
if (!sloppy && invalid) {
- printk(KERN_ERR "CIFS: Unknown mount option \"%s\"\n", invalid);
+ pr_err("CIFS: Unknown mount option \"%s\"\n", invalid);
goto cifs_parse_mount_err;
}
@@ -1882,8 +1870,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
/* No ip= option specified? Try to get it from UNC */
if (!cifs_convert_address(dstaddr, &vol->UNC[2],
strlen(&vol->UNC[2]))) {
- printk(KERN_ERR "Unable to determine destination "
- "address.\n");
+ pr_err("Unable to determine destination address.\n");
goto cifs_parse_mount_err;
}
}
@@ -1894,20 +1881,18 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
if (uid_specified)
vol->override_uid = override_uid;
else if (override_uid == 1)
- printk(KERN_NOTICE "CIFS: ignoring forceuid mount option "
- "specified with no uid= option.\n");
+ pr_notice("CIFS: ignoring forceuid mount option specified with no uid= option.\n");
if (gid_specified)
vol->override_gid = override_gid;
else if (override_gid == 1)
- printk(KERN_NOTICE "CIFS: ignoring forcegid mount option "
- "specified with no gid= option.\n");
+ pr_notice("CIFS: ignoring forcegid mount option specified with no gid= option.\n");
kfree(mountdata_copy);
return 0;
out_nomem:
- printk(KERN_WARNING "Could not allocate temporary buffer\n");
+ pr_warn("Could not allocate temporary buffer\n");
cifs_parse_mount_err:
kfree(string);
kfree(mountdata_copy);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 3e4d00a06c44..96b7e9b7706d 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1066,7 +1066,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
max_num = (max_buf - sizeof(struct smb_hdr)) /
sizeof(LOCKING_ANDX_RANGE);
- buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
+ buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
if (!buf) {
free_xid(xid);
return -ENOMEM;
@@ -1401,7 +1401,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
max_num = (max_buf - sizeof(struct smb_hdr)) /
sizeof(LOCKING_ANDX_RANGE);
- buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
+ buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -1586,7 +1586,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
tcon->ses->server);
- cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+ cifs_sb = CIFS_FILE_SB(file);
netfid = cfile->fid.netfid;
cinode = CIFS_I(file_inode(file));
@@ -2305,7 +2305,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
struct cifs_tcon *tcon;
struct TCP_Server_Info *server;
struct cifsFileInfo *smbfile = file->private_data;
- struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
struct inode *inode = file->f_mapping->host;
rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
@@ -2585,7 +2585,7 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
iov_iter_truncate(from, len);
INIT_LIST_HEAD(&wdata_list);
- cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+ cifs_sb = CIFS_FILE_SB(file);
open_file = file->private_data;
tcon = tlink_tcon(open_file->tlink);
@@ -3010,7 +3010,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
return 0;
INIT_LIST_HEAD(&rdata_list);
- cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+ cifs_sb = CIFS_FILE_SB(file);
open_file = file->private_data;
tcon = tlink_tcon(open_file->tlink);
@@ -3155,7 +3155,7 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
__u32 pid;
xid = get_xid();
- cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+ cifs_sb = CIFS_FILE_SB(file);
/* FIXME: set up handlers for larger reads and/or convert to async */
rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
@@ -3462,7 +3462,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
int rc;
struct list_head tmplist;
struct cifsFileInfo *open_file = file->private_data;
- struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
struct TCP_Server_Info *server;
pid_t pid;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 197cb503d528..0c3ce464cae4 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -895,7 +895,7 @@ inode_has_hashed_dentries(struct inode *inode)
struct dentry *dentry;
spin_lock(&inode->i_lock);
- hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
spin_unlock(&inode->i_lock);
return true;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index b7415d596dbd..337946355b29 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -513,39 +513,11 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
void
dump_smb(void *buf, int smb_buf_length)
{
- int i, j;
- char debug_line[17];
- unsigned char *buffer = buf;
-
if (traceSMB == 0)
return;
- for (i = 0, j = 0; i < smb_buf_length; i++, j++) {
- if (i % 8 == 0) {
- /* have reached the beginning of line */
- printk(KERN_DEBUG "| ");
- j = 0;
- }
- printk("%0#4x ", buffer[i]);
- debug_line[2 * j] = ' ';
- if (isprint(buffer[i]))
- debug_line[1 + (2 * j)] = buffer[i];
- else
- debug_line[1 + (2 * j)] = '_';
-
- if (i % 8 == 7) {
- /* reached end of line, time to print ascii */
- debug_line[16] = 0;
- printk(" | %s\n", debug_line);
- }
- }
- for (; j < 8; j++) {
- printk(" ");
- debug_line[2 * j] = ' ';
- debug_line[1 + (2 * j)] = ' ';
- }
- printk(" | %s\n", debug_line);
- return;
+ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 8, 2, buf,
+ smb_buf_length, true);
}
void
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index b333ff60781d..abae6dd2c6b9 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -926,6 +926,7 @@ cifs_NTtimeToUnix(__le64 ntutc)
/* Subtract the NTFS time offset, then convert to 1s intervals. */
s64 t = le64_to_cpu(ntutc) - NTFS_TIME_OFFSET;
+ u64 abs_t;
/*
* Unfortunately can not use normal 64 bit division on 32 bit arch, but
@@ -933,13 +934,14 @@ cifs_NTtimeToUnix(__le64 ntutc)
* to special case them
*/
if (t < 0) {
- t = -t;
- ts.tv_nsec = (long)(do_div(t, 10000000) * 100);
+ abs_t = -t;
+ ts.tv_nsec = (long)(do_div(abs_t, 10000000) * 100);
ts.tv_nsec = -ts.tv_nsec;
- ts.tv_sec = -t;
+ ts.tv_sec = -abs_t;
} else {
- ts.tv_nsec = (long)do_div(t, 10000000) * 100;
- ts.tv_sec = t;
+ abs_t = t;
+ ts.tv_nsec = (long)do_div(abs_t, 10000000) * 100;
+ ts.tv_sec = abs_t;
}
return ts;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 8fd2a95860ba..c295338e0a98 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -69,7 +69,8 @@ static inline void dump_cifs_file_struct(struct file *file, char *label)
* Attempt to preload the dcache with the results from the FIND_FIRST/NEXT
*
* Find the dentry that matches "name". If there isn't one, create one. If it's
- * a negative dentry or the uniqueid changed, then drop it and recreate it.
+ * a negative dentry or the uniqueid or filetype(mode) changed,
+ * then drop it and recreate it.
*/
static void
cifs_prime_dcache(struct dentry *parent, struct qstr *name,
@@ -97,8 +98,11 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM))
fattr->cf_uniqueid = CIFS_I(inode)->uniqueid;
- /* update inode in place if i_ino didn't change */
- if (CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) {
+ /* update inode in place
+ * if both i_ino and i_mode didn't change */
+ if (CIFS_I(inode)->uniqueid == fattr->cf_uniqueid &&
+ (inode->i_mode & S_IFMT) ==
+ (fattr->cf_mode & S_IFMT)) {
cifs_fattr_to_inode(inode, fattr);
goto out;
}
@@ -123,7 +127,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
if (!inode)
goto out;
- alias = d_materialise_unique(dentry, inode);
+ alias = d_splice_alias(inode, dentry);
if (alias && !IS_ERR(alias))
dput(alias);
out:
@@ -261,7 +265,7 @@ initiate_cifs_search(const unsigned int xid, struct file *file)
int rc = 0;
char *full_path = NULL;
struct cifsFileInfo *cifsFile;
- struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
struct tcon_link *tlink = NULL;
struct cifs_tcon *tcon;
struct TCP_Server_Info *server;
@@ -561,7 +565,7 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
loff_t first_entry_in_buffer;
loff_t index_to_find = pos;
struct cifsFileInfo *cfile = file->private_data;
- struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
struct TCP_Server_Info *server = tcon->ses->server;
/* check if index in the buffer */
@@ -679,7 +683,7 @@ static int cifs_filldir(char *find_entry, struct file *file,
char *scratch_buf, unsigned int max_len)
{
struct cifsFileInfo *file_info = file->private_data;
- struct super_block *sb = file->f_path.dentry->d_sb;
+ struct super_block *sb = file_inode(file)->i_sb;
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
struct cifs_dirent de = { NULL, };
struct cifs_fattr fattr;
@@ -753,7 +757,7 @@ static int cifs_filldir(char *find_entry, struct file *file,
*/
fattr.cf_flags |= CIFS_FATTR_NEED_REVAL;
- cifs_prime_dcache(file->f_dentry, &name, &fattr);
+ cifs_prime_dcache(file->f_path.dentry, &name, &fattr);
ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid);
return !dir_emit(ctx, name.name, name.len, ino, fattr.cf_dtype);
@@ -794,10 +798,6 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
if it before then restart search
if after then keep searching till find it */
- if (file->private_data == NULL) {
- rc = -EINVAL;
- goto rddir2_exit;
- }
cifsFile = file->private_data;
if (cifsFile->srch_inf.endOfSearch) {
if (cifsFile->srch_inf.emptyDir) {
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 57db63ff88da..bce6fdcd5d48 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -46,7 +46,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4,
USHRT_MAX));
pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
- pSMB->req.VcNumber = __constant_cpu_to_le16(1);
+ pSMB->req.VcNumber = cpu_to_le16(1);
/* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
@@ -1303,6 +1303,11 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
+ if (ses->Suid != smb_buf->Uid) {
+ ses->Suid = smb_buf->Uid;
+ cifs_dbg(FYI, "UID changed! new UID = %llu\n", ses->Suid);
+ }
+
bytes_remaining = get_bcc(smb_buf);
bcc_ptr = pByteArea(smb_buf);
blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength);
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 45992944e238..7198eac5dddd 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -111,7 +111,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
return -EINVAL;
max_num = max_buf / sizeof(struct smb2_lock_element);
- buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL);
+ buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -247,7 +247,7 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
}
max_num = max_buf / sizeof(struct smb2_lock_element);
- buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL);
+ buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
if (!buf) {
free_xid(xid);
return -ENOMEM;
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 1a08a34838fc..689f035915cf 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -32,12 +32,14 @@
static int
check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
{
+ __u64 wire_mid = le64_to_cpu(hdr->MessageId);
+
/*
* Make sure that this really is an SMB, that it is a response,
* and that the message ids match.
*/
if ((*(__le32 *)hdr->ProtocolId == SMB2_PROTO_NUMBER) &&
- (mid == hdr->MessageId)) {
+ (mid == wire_mid)) {
if (hdr->Flags & SMB2_FLAGS_SERVER_TO_REDIR)
return 0;
else {
@@ -51,11 +53,11 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
if (*(__le32 *)hdr->ProtocolId != SMB2_PROTO_NUMBER)
cifs_dbg(VFS, "Bad protocol string signature header %x\n",
*(unsigned int *) hdr->ProtocolId);
- if (mid != hdr->MessageId)
+ if (mid != wire_mid)
cifs_dbg(VFS, "Mids do not match: %llu and %llu\n",
- mid, hdr->MessageId);
+ mid, wire_mid);
}
- cifs_dbg(VFS, "Bad SMB detected. The Mid=%llu\n", hdr->MessageId);
+ cifs_dbg(VFS, "Bad SMB detected. The Mid=%llu\n", wire_mid);
return 1;
}
@@ -67,27 +69,27 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
* indexed by command in host byte order
*/
static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
- /* SMB2_NEGOTIATE */ __constant_cpu_to_le16(65),
- /* SMB2_SESSION_SETUP */ __constant_cpu_to_le16(9),
- /* SMB2_LOGOFF */ __constant_cpu_to_le16(4),
- /* SMB2_TREE_CONNECT */ __constant_cpu_to_le16(16),
- /* SMB2_TREE_DISCONNECT */ __constant_cpu_to_le16(4),
- /* SMB2_CREATE */ __constant_cpu_to_le16(89),
- /* SMB2_CLOSE */ __constant_cpu_to_le16(60),
- /* SMB2_FLUSH */ __constant_cpu_to_le16(4),
- /* SMB2_READ */ __constant_cpu_to_le16(17),
- /* SMB2_WRITE */ __constant_cpu_to_le16(17),
- /* SMB2_LOCK */ __constant_cpu_to_le16(4),
- /* SMB2_IOCTL */ __constant_cpu_to_le16(49),
+ /* SMB2_NEGOTIATE */ cpu_to_le16(65),
+ /* SMB2_SESSION_SETUP */ cpu_to_le16(9),
+ /* SMB2_LOGOFF */ cpu_to_le16(4),
+ /* SMB2_TREE_CONNECT */ cpu_to_le16(16),
+ /* SMB2_TREE_DISCONNECT */ cpu_to_le16(4),
+ /* SMB2_CREATE */ cpu_to_le16(89),
+ /* SMB2_CLOSE */ cpu_to_le16(60),
+ /* SMB2_FLUSH */ cpu_to_le16(4),
+ /* SMB2_READ */ cpu_to_le16(17),
+ /* SMB2_WRITE */ cpu_to_le16(17),
+ /* SMB2_LOCK */ cpu_to_le16(4),
+ /* SMB2_IOCTL */ cpu_to_le16(49),
/* BB CHECK this ... not listed in documentation */
- /* SMB2_CANCEL */ __constant_cpu_to_le16(0),
- /* SMB2_ECHO */ __constant_cpu_to_le16(4),
- /* SMB2_QUERY_DIRECTORY */ __constant_cpu_to_le16(9),
- /* SMB2_CHANGE_NOTIFY */ __constant_cpu_to_le16(9),
- /* SMB2_QUERY_INFO */ __constant_cpu_to_le16(9),
- /* SMB2_SET_INFO */ __constant_cpu_to_le16(2),
+ /* SMB2_CANCEL */ cpu_to_le16(0),
+ /* SMB2_ECHO */ cpu_to_le16(4),
+ /* SMB2_QUERY_DIRECTORY */ cpu_to_le16(9),
+ /* SMB2_CHANGE_NOTIFY */ cpu_to_le16(9),
+ /* SMB2_QUERY_INFO */ cpu_to_le16(9),
+ /* SMB2_SET_INFO */ cpu_to_le16(2),
/* BB FIXME can also be 44 for lease break */
- /* SMB2_OPLOCK_BREAK */ __constant_cpu_to_le16(24)
+ /* SMB2_OPLOCK_BREAK */ cpu_to_le16(24)
};
int
@@ -95,7 +97,7 @@ smb2_check_message(char *buf, unsigned int length)
{
struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
struct smb2_pdu *pdu = (struct smb2_pdu *)hdr;
- __u64 mid = hdr->MessageId;
+ __u64 mid = le64_to_cpu(hdr->MessageId);
__u32 len = get_rfc1002_length(buf);
__u32 clc_len; /* calculated length */
int command;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index c5f521bcdee2..96b5d40a2ece 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -176,10 +176,11 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf)
{
struct mid_q_entry *mid;
struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
+ __u64 wire_mid = le64_to_cpu(hdr->MessageId);
spin_lock(&GlobalMid_Lock);
list_for_each_entry(mid, &server->pending_mid_q, qhead) {
- if ((mid->mid == hdr->MessageId) &&
+ if ((mid->mid == wire_mid) &&
(mid->mid_state == MID_REQUEST_SUBMITTED) &&
(mid->command == hdr->Command)) {
spin_unlock(&GlobalMid_Lock);
@@ -600,7 +601,7 @@ smb2_clone_range(const unsigned int xid,
goto cchunk_out;
/* For now array only one chunk long, will make more flexible later */
- pcchunk->ChunkCount = __constant_cpu_to_le32(1);
+ pcchunk->ChunkCount = cpu_to_le32(1);
pcchunk->Reserved = 0;
pcchunk->Reserved2 = 0;
@@ -1102,6 +1103,64 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
return rc;
}
+static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
+ loff_t off, loff_t len, bool keep_size)
+{
+ struct inode *inode;
+ struct cifsInodeInfo *cifsi;
+ struct cifsFileInfo *cfile = file->private_data;
+ long rc = -EOPNOTSUPP;
+ unsigned int xid;
+
+ xid = get_xid();
+
+ inode = cfile->dentry->d_inode;
+ cifsi = CIFS_I(inode);
+
+ /* if file not oplocked can't be sure whether asking to extend size */
+ if (!CIFS_CACHE_READ(cifsi))
+ if (keep_size == false)
+ return -EOPNOTSUPP;
+
+ /*
+ * Files are non-sparse by default so falloc may be a no-op
+ * Must check if file sparse. If not sparse, and not extending
+ * then no need to do anything since file already allocated
+ */
+ if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) == 0) {
+ if (keep_size == true)
+ return 0;
+ /* check if extending file */
+ else if (i_size_read(inode) >= off + len)
+ /* not extending file and already not sparse */
+ return 0;
+ /* BB: in future add else clause to extend file */
+ else
+ return -EOPNOTSUPP;
+ }
+
+ if ((keep_size == true) || (i_size_read(inode) >= off + len)) {
+ /*
+ * Check if falloc starts within first few pages of file
+ * and ends within a few pages of the end of file to
+ * ensure that most of file is being forced to be
+ * fallocated now. If so then setting whole file sparse
+ * ie potentially making a few extra pages at the beginning
+ * or end of the file non-sparse via set_sparse is harmless.
+ */
+ if ((off > 8192) || (off + len + 8192 < i_size_read(inode)))
+ return -EOPNOTSUPP;
+
+ rc = smb2_set_sparse(xid, tcon, cfile, inode, false);
+ }
+ /* BB: else ... in future add code to extend file and set sparse */
+
+
+ free_xid(xid);
+ return rc;
+}
+
+
static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode,
loff_t off, loff_t len)
{
@@ -1112,7 +1171,10 @@ static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode,
if (mode & FALLOC_FL_KEEP_SIZE)
return smb3_zero_range(file, tcon, off, len, true);
return smb3_zero_range(file, tcon, off, len, false);
- }
+ } else if (mode == FALLOC_FL_KEEP_SIZE)
+ return smb3_simple_falloc(file, tcon, off, len, true);
+ else if (mode == 0)
+ return smb3_simple_falloc(file, tcon, off, len, false);
return -EOPNOTSUPP;
}
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 8f1672bb82d5..3417340bf89e 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -431,8 +431,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
if (rc)
goto neg_exit;
if (blob_length)
- rc = decode_neg_token_init(security_blob, blob_length,
- &server->sec_type);
+ rc = decode_negTokenInit(security_blob, blob_length, server);
if (rc == 1)
rc = 0;
else if (rc == 0) {
@@ -1359,7 +1358,7 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
char *ret_data = NULL;
fsctl_input.CompressionState =
- __constant_cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
+ cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
FSCTL_SET_COMPRESSION, true /* is_fsctl */,
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index e3188abdafd0..70867d54fb8b 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -85,7 +85,7 @@
/* BB FIXME - analyze following length BB */
#define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */
-#define SMB2_PROTO_NUMBER __constant_cpu_to_le32(0x424d53fe)
+#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe)
/*
* SMB2 Header Definition
@@ -96,7 +96,7 @@
*
*/
-#define SMB2_HEADER_STRUCTURE_SIZE __constant_cpu_to_le16(64)
+#define SMB2_HEADER_STRUCTURE_SIZE cpu_to_le16(64)
struct smb2_hdr {
__be32 smb2_buf_length; /* big endian on wire */
@@ -110,7 +110,7 @@ struct smb2_hdr {
__le16 CreditRequest; /* CreditResponse */
__le32 Flags;
__le32 NextCommand;
- __u64 MessageId; /* opaque - so can stay little endian */
+ __le64 MessageId;
__le32 ProcessId;
__u32 TreeId; /* opaque - so do not make little endian */
__u64 SessionId; /* opaque - so do not make little endian */
@@ -137,16 +137,16 @@ struct smb2_transform_hdr {
} __packed;
/* Encryption Algorithms */
-#define SMB2_ENCRYPTION_AES128_CCM __constant_cpu_to_le16(0x0001)
+#define SMB2_ENCRYPTION_AES128_CCM cpu_to_le16(0x0001)
/*
* SMB2 flag definitions
*/
-#define SMB2_FLAGS_SERVER_TO_REDIR __constant_cpu_to_le32(0x00000001)
-#define SMB2_FLAGS_ASYNC_COMMAND __constant_cpu_to_le32(0x00000002)
-#define SMB2_FLAGS_RELATED_OPERATIONS __constant_cpu_to_le32(0x00000004)
-#define SMB2_FLAGS_SIGNED __constant_cpu_to_le32(0x00000008)
-#define SMB2_FLAGS_DFS_OPERATIONS __constant_cpu_to_le32(0x10000000)
+#define SMB2_FLAGS_SERVER_TO_REDIR cpu_to_le32(0x00000001)
+#define SMB2_FLAGS_ASYNC_COMMAND cpu_to_le32(0x00000002)
+#define SMB2_FLAGS_RELATED_OPERATIONS cpu_to_le32(0x00000004)
+#define SMB2_FLAGS_SIGNED cpu_to_le32(0x00000008)
+#define SMB2_FLAGS_DFS_OPERATIONS cpu_to_le32(0x10000000)
/*
* Definitions for SMB2 Protocol Data Units (network frames)
@@ -157,7 +157,7 @@ struct smb2_transform_hdr {
*
*/
-#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_cpu_to_le16(9)
+#define SMB2_ERROR_STRUCTURE_SIZE2 cpu_to_le16(9)
struct smb2_err_rsp {
struct smb2_hdr hdr;
@@ -502,12 +502,12 @@ struct create_context {
#define SMB2_LEASE_HANDLE_CACHING_HE 0x02
#define SMB2_LEASE_WRITE_CACHING_HE 0x04
-#define SMB2_LEASE_NONE __constant_cpu_to_le32(0x00)
-#define SMB2_LEASE_READ_CACHING __constant_cpu_to_le32(0x01)
-#define SMB2_LEASE_HANDLE_CACHING __constant_cpu_to_le32(0x02)
-#define SMB2_LEASE_WRITE_CACHING __constant_cpu_to_le32(0x04)
+#define SMB2_LEASE_NONE cpu_to_le32(0x00)
+#define SMB2_LEASE_READ_CACHING cpu_to_le32(0x01)
+#define SMB2_LEASE_HANDLE_CACHING cpu_to_le32(0x02)
+#define SMB2_LEASE_WRITE_CACHING cpu_to_le32(0x04)
-#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS __constant_cpu_to_le32(0x02)
+#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS cpu_to_le32(0x02)
#define SMB2_LEASE_KEY_SIZE 16
@@ -836,6 +836,25 @@ struct smb2_query_directory_rsp {
#define SMB2_O_INFO_SECURITY 0x03
#define SMB2_O_INFO_QUOTA 0x04
+/* Security info type additionalinfo flags. See MS-SMB2 (2.2.37) or MS-DTYP */
+#define OWNER_SECINFO 0x00000001
+#define GROUP_SECINFO 0x00000002
+#define DACL_SECINFO 0x00000004
+#define SACL_SECINFO 0x00000008
+#define LABEL_SECINFO 0x00000010
+#define ATTRIBUTE_SECINFO 0x00000020
+#define SCOPE_SECINFO 0x00000040
+#define BACKUP_SECINFO 0x00010000
+#define UNPROTECTED_SACL_SECINFO 0x10000000
+#define UNPROTECTED_DACL_SECINFO 0x20000000
+#define PROTECTED_SACL_SECINFO 0x40000000
+#define PROTECTED_DACL_SECINFO 0x80000000
+
+/* Flags used for FileFullEAinfo */
+#define SL_RESTART_SCAN 0x00000001
+#define SL_RETURN_SINGLE_ENTRY 0x00000002
+#define SL_INDEX_SPECIFIED 0x00000004
+
struct smb2_query_info_req {
struct smb2_hdr hdr;
__le16 StructureSize; /* Must be 41 */
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 5111e7272db6..d4c5b6f109a7 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -490,7 +490,7 @@ smb2_mid_entry_alloc(const struct smb2_hdr *smb_buffer,
return temp;
else {
memset(temp, 0, sizeof(struct mid_q_entry));
- temp->mid = smb_buffer->MessageId; /* always LE */
+ temp->mid = le64_to_cpu(smb_buffer->MessageId);
temp->pid = current->pid;
temp->command = smb_buffer->Command; /* Always LE */
temp->when_alloc = jiffies;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 9d087f4e7d4e..126f46b887cc 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -99,9 +99,9 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
something is wrong, unless it is quite a slow link or server */
if ((now - midEntry->when_alloc) > HZ) {
if ((cifsFYI & CIFS_TIMER) && (midEntry->command != command)) {
- printk(KERN_DEBUG " CIFS slow rsp: cmd %d mid %llu",
+ pr_debug(" CIFS slow rsp: cmd %d mid %llu",
midEntry->command, midEntry->mid);
- printk(" A: 0x%lx S: 0x%lx R: 0x%lx\n",
+ pr_info(" A: 0x%lx S: 0x%lx R: 0x%lx\n",
now - midEntry->when_alloc,
now - midEntry->when_sent,
now - midEntry->when_received);
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 278f8fdeb9ef..46ee6f238985 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -92,7 +92,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
struct dentry *de;
spin_lock(&parent->d_lock);
- list_for_each_entry(de, &parent->d_subdirs, d_u.d_child) {
+ list_for_each_entry(de, &parent->d_subdirs, d_child) {
/* don't know what to do with negative dentries */
if (de->d_inode )
coda_flag_inode(de->d_inode, flag);
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 1326d38960db..f1714cfb589c 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -40,12 +40,6 @@ int coda_iscontrol(const char *name, size_t length)
(strncmp(name, CODA_CONTROL, CODA_CONTROLLEN) == 0));
}
-/* recognize /coda inode */
-int coda_isroot(struct inode *i)
-{
- return ( i->i_sb->s_root->d_inode == i );
-}
-
unsigned short coda_flags_to_cflags(unsigned short flags)
{
unsigned short coda_flags = 0;
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index d42b725b1d21..d6f7a76a1f5b 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -52,7 +52,6 @@ int coda_setattr(struct dentry *, struct iattr *);
/* this file: heloers */
char *coda_f2s(struct CodaFid *f);
-int coda_isroot(struct inode *i);
int coda_iscontrol(const char *name, size_t length);
void coda_vattr_to_iattr(struct inode *, struct coda_vattr *);
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 9c3dedc000d1..86c893884eb9 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -107,7 +107,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, unsig
}
/* control object, create inode on the fly */
- if (coda_isroot(dir) && coda_iscontrol(name, length)) {
+ if (is_root_inode(dir) && coda_iscontrol(name, length)) {
inode = coda_cnode_makectl(sb);
type = CODA_NOCACHE;
} else {
@@ -195,7 +195,7 @@ static int coda_create(struct inode *dir, struct dentry *de, umode_t mode, bool
struct CodaFid newfid;
struct coda_vattr attrs;
- if (coda_isroot(dir) && coda_iscontrol(name, length))
+ if (is_root_inode(dir) && coda_iscontrol(name, length))
return -EPERM;
error = venus_create(dir->i_sb, coda_i2f(dir), name, length,
@@ -227,7 +227,7 @@ static int coda_mkdir(struct inode *dir, struct dentry *de, umode_t mode)
int error;
struct CodaFid newfid;
- if (coda_isroot(dir) && coda_iscontrol(name, len))
+ if (is_root_inode(dir) && coda_iscontrol(name, len))
return -EPERM;
attrs.va_mode = mode;
@@ -261,7 +261,7 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode,
int len = de->d_name.len;
int error;
- if (coda_isroot(dir_inode) && coda_iscontrol(name, len))
+ if (is_root_inode(dir_inode) && coda_iscontrol(name, len))
return -EPERM;
error = venus_link(dir_inode->i_sb, coda_i2f(inode),
@@ -287,7 +287,7 @@ static int coda_symlink(struct inode *dir_inode, struct dentry *de,
int symlen;
int error;
- if (coda_isroot(dir_inode) && coda_iscontrol(name, len))
+ if (is_root_inode(dir_inode) && coda_iscontrol(name, len))
return -EPERM;
symlen = strlen(symname);
@@ -426,7 +426,6 @@ static int coda_venus_readdir(struct file *coda_file, struct dir_context *ctx)
struct coda_file_info *cfi;
struct coda_inode_info *cii;
struct file *host_file;
- struct dentry *de;
struct venus_dirent *vdir;
unsigned long vdir_size = offsetof(struct venus_dirent, d_name);
unsigned int type;
@@ -438,8 +437,7 @@ static int coda_venus_readdir(struct file *coda_file, struct dir_context *ctx)
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
host_file = cfi->cfi_container;
- de = coda_file->f_path.dentry;
- cii = ITOC(de->d_inode);
+ cii = ITOC(file_inode(coda_file));
vdir = kmalloc(sizeof(*vdir), GFP_KERNEL);
if (!vdir) return -ENOMEM;
@@ -507,7 +505,7 @@ static int coda_dentry_revalidate(struct dentry *de, unsigned int flags)
return -ECHILD;
inode = de->d_inode;
- if (!inode || coda_isroot(inode))
+ if (!inode || is_root_inode(inode))
goto out;
if (is_bad_inode(inode))
goto bad;
diff --git a/fs/compat.c b/fs/compat.c
index b13df99f3534..6fd272d455e4 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -847,10 +847,12 @@ struct compat_readdir_callback {
int result;
};
-static int compat_fillonedir(void *__buf, const char *name, int namlen,
- loff_t offset, u64 ino, unsigned int d_type)
+static int compat_fillonedir(struct dir_context *ctx, const char *name,
+ int namlen, loff_t offset, u64 ino,
+ unsigned int d_type)
{
- struct compat_readdir_callback *buf = __buf;
+ struct compat_readdir_callback *buf =
+ container_of(ctx, struct compat_readdir_callback, ctx);
struct compat_old_linux_dirent __user *dirent;
compat_ulong_t d_ino;
@@ -915,11 +917,12 @@ struct compat_getdents_callback {
int error;
};
-static int compat_filldir(void *__buf, const char *name, int namlen,
+static int compat_filldir(struct dir_context *ctx, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type)
{
struct compat_linux_dirent __user * dirent;
- struct compat_getdents_callback *buf = __buf;
+ struct compat_getdents_callback *buf =
+ container_of(ctx, struct compat_getdents_callback, ctx);
compat_ulong_t d_ino;
int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) +
namlen + 2, sizeof(compat_long_t));
@@ -1001,11 +1004,13 @@ struct compat_getdents_callback64 {
int error;
};
-static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t offset,
- u64 ino, unsigned int d_type)
+static int compat_filldir64(struct dir_context *ctx, const char *name,
+ int namlen, loff_t offset, u64 ino,
+ unsigned int d_type)
{
struct linux_dirent64 __user *dirent;
- struct compat_getdents_callback64 *buf = __buf;
+ struct compat_getdents_callback64 *buf =
+ container_of(ctx, struct compat_getdents_callback64, ctx);
int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
sizeof(u64));
u64 off;
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 668dcabc5695..c9c298bd3058 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -386,7 +386,7 @@ static void remove_dir(struct dentry * d)
if (d->d_inode)
simple_rmdir(parent->d_inode,d);
- pr_debug(" o %s removing done (%d)\n",d->d_name.name, d_count(d));
+ pr_debug(" o %pd removing done (%d)\n", d, d_count(d));
dput(parent);
}
diff --git a/fs/dcache.c b/fs/dcache.c
index 5bc72b07fde2..e368d4f412f9 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -44,7 +44,7 @@
/*
* Usage:
* dcache->d_inode->i_lock protects:
- * - i_dentry, d_alias, d_inode of aliases
+ * - i_dentry, d_u.d_alias, d_inode of aliases
* dcache_hash_bucket lock protects:
* - the dcache hash table
* s_anon bl list spinlock protects:
@@ -59,7 +59,7 @@
* - d_unhashed()
* - d_parent and d_subdirs
* - childrens' d_child and d_parent
- * - d_alias, d_inode
+ * - d_u.d_alias, d_inode
*
* Ordering:
* dentry->d_inode->i_lock
@@ -252,14 +252,12 @@ static void __d_free(struct rcu_head *head)
{
struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
- WARN_ON(!hlist_unhashed(&dentry->d_alias));
kmem_cache_free(dentry_cache, dentry);
}
static void __d_free_external(struct rcu_head *head)
{
struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
- WARN_ON(!hlist_unhashed(&dentry->d_alias));
kfree(external_name(dentry));
kmem_cache_free(dentry_cache, dentry);
}
@@ -271,6 +269,7 @@ static inline int dname_external(const struct dentry *dentry)
static void dentry_free(struct dentry *dentry)
{
+ WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
if (unlikely(dname_external(dentry))) {
struct external_name *p = external_name(dentry);
if (likely(atomic_dec_and_test(&p->u.count))) {
@@ -311,7 +310,7 @@ static void dentry_iput(struct dentry * dentry)
struct inode *inode = dentry->d_inode;
if (inode) {
dentry->d_inode = NULL;
- hlist_del_init(&dentry->d_alias);
+ hlist_del_init(&dentry->d_u.d_alias);
spin_unlock(&dentry->d_lock);
spin_unlock(&inode->i_lock);
if (!inode->i_nlink)
@@ -336,7 +335,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
struct inode *inode = dentry->d_inode;
__d_clear_type(dentry);
dentry->d_inode = NULL;
- hlist_del_init(&dentry->d_alias);
+ hlist_del_init(&dentry->d_u.d_alias);
dentry_rcuwalk_barrier(dentry);
spin_unlock(&dentry->d_lock);
spin_unlock(&inode->i_lock);
@@ -496,7 +495,7 @@ static void __dentry_kill(struct dentry *dentry)
}
/* if it was on the hash then remove it */
__d_drop(dentry);
- list_del(&dentry->d_u.d_child);
+ __list_del_entry(&dentry->d_child);
/*
* Inform d_walk() that we are no longer attached to the
* dentry tree
@@ -722,7 +721,7 @@ static struct dentry *__d_find_alias(struct inode *inode)
again:
discon_alias = NULL;
- hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
spin_lock(&alias->d_lock);
if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
if (IS_ROOT(alias) &&
@@ -772,7 +771,7 @@ void d_prune_aliases(struct inode *inode)
struct dentry *dentry;
restart:
spin_lock(&inode->i_lock);
- hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
spin_lock(&dentry->d_lock);
if (!dentry->d_lockref.count) {
struct dentry *parent = lock_parent(dentry);
@@ -1051,7 +1050,7 @@ repeat:
resume:
while (next != &this_parent->d_subdirs) {
struct list_head *tmp = next;
- struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
next = tmp->next;
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
@@ -1083,33 +1082,31 @@ resume:
/*
* All done at this level ... ascend and resume the search.
*/
+ rcu_read_lock();
+ascend:
if (this_parent != parent) {
struct dentry *child = this_parent;
this_parent = child->d_parent;
- rcu_read_lock();
spin_unlock(&child->d_lock);
spin_lock(&this_parent->d_lock);
- /*
- * might go back up the wrong parent if we have had a rename
- * or deletion
- */
- if (this_parent != child->d_parent ||
- (child->d_flags & DCACHE_DENTRY_KILLED) ||
- need_seqretry(&rename_lock, seq)) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
+ /* might go back up the wrong parent if we have had a rename. */
+ if (need_seqretry(&rename_lock, seq))
goto rename_retry;
+ next = child->d_child.next;
+ while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) {
+ if (next == &this_parent->d_subdirs)
+ goto ascend;
+ child = list_entry(next, struct dentry, d_child);
+ next = next->next;
}
rcu_read_unlock();
- next = child->d_u.d_child.next;
goto resume;
}
- if (need_seqretry(&rename_lock, seq)) {
- spin_unlock(&this_parent->d_lock);
+ if (need_seqretry(&rename_lock, seq))
goto rename_retry;
- }
+ rcu_read_unlock();
if (finish)
finish(data);
@@ -1119,6 +1116,9 @@ out_unlock:
return;
rename_retry:
+ spin_unlock(&this_parent->d_lock);
+ rcu_read_unlock();
+ BUG_ON(seq & 1);
if (!retry)
return;
seq = 1;
@@ -1455,8 +1455,8 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
INIT_HLIST_BL_NODE(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
- INIT_HLIST_NODE(&dentry->d_alias);
- INIT_LIST_HEAD(&dentry->d_u.d_child);
+ INIT_HLIST_NODE(&dentry->d_u.d_alias);
+ INIT_LIST_HEAD(&dentry->d_child);
d_set_d_op(dentry, dentry->d_sb->s_d_op);
this_cpu_inc(nr_dentry);
@@ -1486,7 +1486,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
*/
__dget_dlock(parent);
dentry->d_parent = parent;
- list_add(&dentry->d_u.d_child, &parent->d_subdirs);
+ list_add(&dentry->d_child, &parent->d_subdirs);
spin_unlock(&parent->d_lock);
return dentry;
@@ -1579,7 +1579,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
spin_lock(&dentry->d_lock);
__d_set_type(dentry, add_flags);
if (inode)
- hlist_add_head(&dentry->d_alias, &inode->i_dentry);
+ hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
dentry->d_inode = inode;
dentry_rcuwalk_barrier(dentry);
spin_unlock(&dentry->d_lock);
@@ -1603,7 +1603,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
void d_instantiate(struct dentry *entry, struct inode * inode)
{
- BUG_ON(!hlist_unhashed(&entry->d_alias));
+ BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
if (inode)
spin_lock(&inode->i_lock);
__d_instantiate(entry, inode);
@@ -1642,7 +1642,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
return NULL;
}
- hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
/*
* Don't need alias->d_lock here, because aliases with
* d_parent == entry->d_parent are not subject to name or
@@ -1668,7 +1668,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
{
struct dentry *result;
- BUG_ON(!hlist_unhashed(&entry->d_alias));
+ BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
if (inode)
spin_lock(&inode->i_lock);
@@ -1699,7 +1699,7 @@ EXPORT_SYMBOL(d_instantiate_unique);
*/
int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode)
{
- BUG_ON(!hlist_unhashed(&entry->d_alias));
+ BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
spin_lock(&inode->i_lock);
if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) {
@@ -1738,7 +1738,7 @@ static struct dentry * __d_find_any_alias(struct inode *inode)
if (hlist_empty(&inode->i_dentry))
return NULL;
- alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
+ alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
__dget(alias);
return alias;
}
@@ -1800,7 +1800,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
spin_lock(&tmp->d_lock);
tmp->d_inode = inode;
tmp->d_flags |= add_flags;
- hlist_add_head(&tmp->d_alias, &inode->i_dentry);
+ hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry);
hlist_bl_lock(&tmp->d_sb->s_anon);
hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
hlist_bl_unlock(&tmp->d_sb->s_anon);
@@ -1889,51 +1889,19 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
* if not go ahead and create it now.
*/
found = d_hash_and_lookup(dentry->d_parent, name);
- if (unlikely(IS_ERR(found)))
- goto err_out;
if (!found) {
new = d_alloc(dentry->d_parent, name);
if (!new) {
found = ERR_PTR(-ENOMEM);
- goto err_out;
- }
-
- found = d_splice_alias(inode, new);
- if (found) {
- dput(new);
- return found;
- }
- return new;
- }
-
- /*
- * If a matching dentry exists, and it's not negative use it.
- *
- * Decrement the reference count to balance the iget() done
- * earlier on.
- */
- if (found->d_inode) {
- if (unlikely(found->d_inode != inode)) {
- /* This can't happen because bad inodes are unhashed. */
- BUG_ON(!is_bad_inode(inode));
- BUG_ON(!is_bad_inode(found->d_inode));
+ } else {
+ found = d_splice_alias(inode, new);
+ if (found) {
+ dput(new);
+ return found;
+ }
+ return new;
}
- iput(inode);
- return found;
}
-
- /*
- * Negative dentry: instantiate it unless the inode is a directory and
- * already has a dentry.
- */
- new = d_splice_alias(inode, found);
- if (new) {
- dput(found);
- found = new;
- }
- return found;
-
-err_out:
iput(inode);
return found;
}
@@ -2235,7 +2203,7 @@ int d_validate(struct dentry *dentry, struct dentry *dparent)
struct dentry *child;
spin_lock(&dparent->d_lock);
- list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
+ list_for_each_entry(child, &dparent->d_subdirs, d_child) {
if (dentry == child) {
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
__dget_dlock(dentry);
@@ -2393,6 +2361,8 @@ static void swap_names(struct dentry *dentry, struct dentry *target)
*/
unsigned int i;
BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
+ kmemcheck_mark_initialized(dentry->d_iname, DNAME_INLINE_LEN);
+ kmemcheck_mark_initialized(target->d_iname, DNAME_INLINE_LEN);
for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
swap(((long *) &dentry->d_iname)[i],
((long *) &target->d_iname)[i]);
@@ -2526,13 +2496,13 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
/* splicing a tree */
dentry->d_parent = target->d_parent;
target->d_parent = target;
- list_del_init(&target->d_u.d_child);
- list_move(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+ list_del_init(&target->d_child);
+ list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
} else {
/* swapping two dentries */
swap(dentry->d_parent, target->d_parent);
- list_move(&target->d_u.d_child, &target->d_parent->d_subdirs);
- list_move(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+ list_move(&target->d_child, &target->d_parent->d_subdirs);
+ list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
if (exchange)
fsnotify_d_move(target);
fsnotify_d_move(dentry);
@@ -2608,11 +2578,11 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
* Note: If ever the locking in lock_rename() changes, then please
* remember to update this too...
*/
-static struct dentry *__d_unalias(struct inode *inode,
+static int __d_unalias(struct inode *inode,
struct dentry *dentry, struct dentry *alias)
{
struct mutex *m1 = NULL, *m2 = NULL;
- struct dentry *ret = ERR_PTR(-EBUSY);
+ int ret = -EBUSY;
/* If alias and dentry share a parent, then no extra locks required */
if (alias->d_parent == dentry->d_parent)
@@ -2627,7 +2597,7 @@ static struct dentry *__d_unalias(struct inode *inode,
m2 = &alias->d_parent->d_inode->i_mutex;
out_unalias:
__d_move(alias, dentry, false);
- ret = alias;
+ ret = 0;
out_err:
spin_unlock(&inode->i_lock);
if (m2)
@@ -2662,130 +2632,57 @@ out_err:
*/
struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
{
- struct dentry *new = NULL;
-
if (IS_ERR(inode))
return ERR_CAST(inode);
- if (inode && S_ISDIR(inode->i_mode)) {
- spin_lock(&inode->i_lock);
- new = __d_find_any_alias(inode);
- if (new) {
- if (!IS_ROOT(new)) {
- spin_unlock(&inode->i_lock);
- dput(new);
- iput(inode);
- return ERR_PTR(-EIO);
- }
- if (d_ancestor(new, dentry)) {
- spin_unlock(&inode->i_lock);
- dput(new);
- iput(inode);
- return ERR_PTR(-EIO);
- }
- write_seqlock(&rename_lock);
- __d_move(new, dentry, false);
- write_sequnlock(&rename_lock);
- spin_unlock(&inode->i_lock);
- security_d_instantiate(new, inode);
- iput(inode);
- } else {
- /* already taking inode->i_lock, so d_add() by hand */
- __d_instantiate(dentry, inode);
- spin_unlock(&inode->i_lock);
- security_d_instantiate(dentry, inode);
- d_rehash(dentry);
- }
- } else {
- d_instantiate(dentry, inode);
- if (d_unhashed(dentry))
- d_rehash(dentry);
- }
- return new;
-}
-EXPORT_SYMBOL(d_splice_alias);
-
-/**
- * d_materialise_unique - introduce an inode into the tree
- * @dentry: candidate dentry
- * @inode: inode to bind to the dentry, to which aliases may be attached
- *
- * Introduces an dentry into the tree, substituting an extant disconnected
- * root directory alias in its place if there is one. Caller must hold the
- * i_mutex of the parent directory.
- */
-struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
-{
- struct dentry *actual;
-
BUG_ON(!d_unhashed(dentry));
if (!inode) {
- actual = dentry;
__d_instantiate(dentry, NULL);
- d_rehash(actual);
- goto out_nolock;
+ goto out;
}
-
spin_lock(&inode->i_lock);
-
if (S_ISDIR(inode->i_mode)) {
- struct dentry *alias;
-
- /* Does an aliased dentry already exist? */
- alias = __d_find_alias(inode);
- if (alias) {
- actual = alias;
+ struct dentry *new = __d_find_any_alias(inode);
+ if (unlikely(new)) {
write_seqlock(&rename_lock);
-
- if (d_ancestor(alias, dentry)) {
- /* Check for loops */
- actual = ERR_PTR(-ELOOP);
+ if (unlikely(d_ancestor(new, dentry))) {
+ write_sequnlock(&rename_lock);
spin_unlock(&inode->i_lock);
- } else if (IS_ROOT(alias)) {
- /* Is this an anonymous mountpoint that we
- * could splice into our tree? */
- __d_move(alias, dentry, false);
+ dput(new);
+ new = ERR_PTR(-ELOOP);
+ pr_warn_ratelimited(
+ "VFS: Lookup of '%s' in %s %s"
+ " would have caused loop\n",
+ dentry->d_name.name,
+ inode->i_sb->s_type->name,
+ inode->i_sb->s_id);
+ } else if (!IS_ROOT(new)) {
+ int err = __d_unalias(inode, dentry, new);
write_sequnlock(&rename_lock);
- goto found;
+ if (err) {
+ dput(new);
+ new = ERR_PTR(err);
+ }
} else {
- /* Nope, but we must(!) avoid directory
- * aliasing. This drops inode->i_lock */
- actual = __d_unalias(inode, dentry, alias);
- }
- write_sequnlock(&rename_lock);
- if (IS_ERR(actual)) {
- if (PTR_ERR(actual) == -ELOOP)
- pr_warn_ratelimited(
- "VFS: Lookup of '%s' in %s %s"
- " would have caused loop\n",
- dentry->d_name.name,
- inode->i_sb->s_type->name,
- inode->i_sb->s_id);
- dput(alias);
+ __d_move(new, dentry, false);
+ write_sequnlock(&rename_lock);
+ spin_unlock(&inode->i_lock);
+ security_d_instantiate(new, inode);
}
- goto out_nolock;
+ iput(inode);
+ return new;
}
}
-
- /* Add a unique reference */
- actual = __d_instantiate_unique(dentry, inode);
- if (!actual)
- actual = dentry;
-
- d_rehash(actual);
-found:
+ /* already taking inode->i_lock, so d_add() by hand */
+ __d_instantiate(dentry, inode);
spin_unlock(&inode->i_lock);
-out_nolock:
- if (actual == dentry) {
- security_d_instantiate(dentry, inode);
- return NULL;
- }
-
- iput(inode);
- return actual;
+out:
+ security_d_instantiate(dentry, inode);
+ d_rehash(dentry);
+ return NULL;
}
-EXPORT_SYMBOL_GPL(d_materialise_unique);
+EXPORT_SYMBOL(d_splice_alias);
static int prepend(char **buffer, int *buflen, const char *str, int namelen)
{
@@ -3321,7 +3218,7 @@ void d_tmpfile(struct dentry *dentry, struct inode *inode)
{
inode_dec_link_count(inode);
BUG_ON(dentry->d_name.name != dentry->d_iname ||
- !hlist_unhashed(&dentry->d_alias) ||
+ !hlist_unhashed(&dentry->d_u.d_alias) ||
!d_unlinked(dentry));
spin_lock(&dentry->d_parent->d_lock);
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 76c08c2beb2f..517e64938438 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -22,6 +22,7 @@
#include <linux/io.h>
#include <linux/slab.h>
#include <linux/atomic.h>
+#include <linux/device.h>
static ssize_t default_read_file(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
@@ -692,18 +693,19 @@ EXPORT_SYMBOL_GPL(debugfs_create_u32_array);
* because some peripherals have several blocks of identical registers,
* for example configuration of dma channels
*/
-int debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
- int nregs, void __iomem *base, char *prefix)
+void debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
+ int nregs, void __iomem *base, char *prefix)
{
- int i, ret = 0;
+ int i;
for (i = 0; i < nregs; i++, regs++) {
if (prefix)
- ret += seq_printf(s, "%s", prefix);
- ret += seq_printf(s, "%s = 0x%08x\n", regs->name,
- readl(base + regs->offset));
+ seq_printf(s, "%s", prefix);
+ seq_printf(s, "%s = 0x%08x\n", regs->name,
+ readl(base + regs->offset));
+ if (seq_has_overflowed(s))
+ break;
}
- return ret;
}
EXPORT_SYMBOL_GPL(debugfs_print_regs32);
@@ -761,3 +763,56 @@ struct dentry *debugfs_create_regset32(const char *name, umode_t mode,
EXPORT_SYMBOL_GPL(debugfs_create_regset32);
#endif /* CONFIG_HAS_IOMEM */
+
+struct debugfs_devm_entry {
+ int (*read)(struct seq_file *seq, void *data);
+ struct device *dev;
+};
+
+static int debugfs_devm_entry_open(struct inode *inode, struct file *f)
+{
+ struct debugfs_devm_entry *entry = inode->i_private;
+
+ return single_open(f, entry->read, entry->dev);
+}
+
+static const struct file_operations debugfs_devm_entry_ops = {
+ .owner = THIS_MODULE,
+ .open = debugfs_devm_entry_open,
+ .release = single_release,
+ .read = seq_read,
+ .llseek = seq_lseek
+};
+
+/**
+ * debugfs_create_devm_seqfile - create a debugfs file that is bound to device.
+ *
+ * @dev: device related to this debugfs file.
+ * @name: name of the debugfs file.
+ * @parent: a pointer to the parent dentry for this file. This should be a
+ * directory dentry if set. If this parameter is %NULL, then the
+ * file will be created in the root of the debugfs filesystem.
+ * @read_fn: function pointer called to print the seq_file content.
+ */
+struct dentry *debugfs_create_devm_seqfile(struct device *dev, const char *name,
+ struct dentry *parent,
+ int (*read_fn)(struct seq_file *s,
+ void *data))
+{
+ struct debugfs_devm_entry *entry;
+
+ if (IS_ERR(parent))
+ return ERR_PTR(-ENOENT);
+
+ entry = devm_kzalloc(dev, sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return ERR_PTR(-ENOMEM);
+
+ entry->read = read_fn;
+ entry->dev = dev;
+
+ return debugfs_create_file(name, S_IRUGO, parent, entry,
+ &debugfs_devm_entry_ops);
+}
+EXPORT_SYMBOL_GPL(debugfs_create_devm_seqfile);
+
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 1e3b99d3db0d..05f2960ed7c3 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -553,7 +553,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
* use the d_u.d_child as the rcu head and corrupt this list.
*/
spin_lock(&parent->d_lock);
- list_for_each_entry(child, &parent->d_subdirs, d_u.d_child) {
+ list_for_each_entry(child, &parent->d_subdirs, d_child) {
if (!debugfs_positive(child))
continue;
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 1323c568e362..eea64912c9c0 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -48,8 +48,8 @@ static char *print_lockmode(int mode)
}
}
-static int print_format1_lock(struct seq_file *s, struct dlm_lkb *lkb,
- struct dlm_rsb *res)
+static void print_format1_lock(struct seq_file *s, struct dlm_lkb *lkb,
+ struct dlm_rsb *res)
{
seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode));
@@ -68,21 +68,17 @@ static int print_format1_lock(struct seq_file *s, struct dlm_lkb *lkb,
if (lkb->lkb_wait_type)
seq_printf(s, " wait_type: %d", lkb->lkb_wait_type);
- return seq_puts(s, "\n");
+ seq_puts(s, "\n");
}
-static int print_format1(struct dlm_rsb *res, struct seq_file *s)
+static void print_format1(struct dlm_rsb *res, struct seq_file *s)
{
struct dlm_lkb *lkb;
int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list;
- int rv;
lock_rsb(res);
- rv = seq_printf(s, "\nResource %p Name (len=%d) \"",
- res, res->res_length);
- if (rv)
- goto out;
+ seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length);
for (i = 0; i < res->res_length; i++) {
if (isprint(res->res_name[i]))
@@ -92,17 +88,16 @@ static int print_format1(struct dlm_rsb *res, struct seq_file *s)
}
if (res->res_nodeid > 0)
- rv = seq_printf(s, "\"\nLocal Copy, Master is node %d\n",
- res->res_nodeid);
+ seq_printf(s, "\"\nLocal Copy, Master is node %d\n",
+ res->res_nodeid);
else if (res->res_nodeid == 0)
- rv = seq_puts(s, "\"\nMaster Copy\n");
+ seq_puts(s, "\"\nMaster Copy\n");
else if (res->res_nodeid == -1)
- rv = seq_printf(s, "\"\nLooking up master (lkid %x)\n",
- res->res_first_lkid);
+ seq_printf(s, "\"\nLooking up master (lkid %x)\n",
+ res->res_first_lkid);
else
- rv = seq_printf(s, "\"\nInvalid master %d\n",
- res->res_nodeid);
- if (rv)
+ seq_printf(s, "\"\nInvalid master %d\n", res->res_nodeid);
+ if (seq_has_overflowed(s))
goto out;
/* Print the LVB: */
@@ -116,8 +111,8 @@ static int print_format1(struct dlm_rsb *res, struct seq_file *s)
}
if (rsb_flag(res, RSB_VALNOTVALID))
seq_puts(s, " (INVALID)");
- rv = seq_puts(s, "\n");
- if (rv)
+ seq_puts(s, "\n");
+ if (seq_has_overflowed(s))
goto out;
}
@@ -125,32 +120,30 @@ static int print_format1(struct dlm_rsb *res, struct seq_file *s)
recover_list = !list_empty(&res->res_recover_list);
if (root_list || recover_list) {
- rv = seq_printf(s, "Recovery: root %d recover %d flags %lx "
- "count %d\n", root_list, recover_list,
- res->res_flags, res->res_recover_locks_count);
- if (rv)
- goto out;
+ seq_printf(s, "Recovery: root %d recover %d flags %lx count %d\n",
+ root_list, recover_list,
+ res->res_flags, res->res_recover_locks_count);
}
/* Print the locks attached to this resource */
seq_puts(s, "Granted Queue\n");
list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue) {
- rv = print_format1_lock(s, lkb, res);
- if (rv)
+ print_format1_lock(s, lkb, res);
+ if (seq_has_overflowed(s))
goto out;
}
seq_puts(s, "Conversion Queue\n");
list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue) {
- rv = print_format1_lock(s, lkb, res);
- if (rv)
+ print_format1_lock(s, lkb, res);
+ if (seq_has_overflowed(s))
goto out;
}
seq_puts(s, "Waiting Queue\n");
list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue) {
- rv = print_format1_lock(s, lkb, res);
- if (rv)
+ print_format1_lock(s, lkb, res);
+ if (seq_has_overflowed(s))
goto out;
}
@@ -159,23 +152,23 @@ static int print_format1(struct dlm_rsb *res, struct seq_file *s)
seq_puts(s, "Lookup Queue\n");
list_for_each_entry(lkb, &res->res_lookup, lkb_rsb_lookup) {
- rv = seq_printf(s, "%08x %s", lkb->lkb_id,
- print_lockmode(lkb->lkb_rqmode));
+ seq_printf(s, "%08x %s",
+ lkb->lkb_id, print_lockmode(lkb->lkb_rqmode));
if (lkb->lkb_wait_type)
seq_printf(s, " wait_type: %d", lkb->lkb_wait_type);
- rv = seq_puts(s, "\n");
+ seq_puts(s, "\n");
+ if (seq_has_overflowed(s))
+ goto out;
}
out:
unlock_rsb(res);
- return rv;
}
-static int print_format2_lock(struct seq_file *s, struct dlm_lkb *lkb,
- struct dlm_rsb *r)
+static void print_format2_lock(struct seq_file *s, struct dlm_lkb *lkb,
+ struct dlm_rsb *r)
{
u64 xid = 0;
u64 us;
- int rv;
if (lkb->lkb_flags & DLM_IFL_USER) {
if (lkb->lkb_ua)
@@ -188,103 +181,97 @@ static int print_format2_lock(struct seq_file *s, struct dlm_lkb *lkb,
/* id nodeid remid pid xid exflags flags sts grmode rqmode time_us
r_nodeid r_len r_name */
- rv = seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %llu %u %d \"%s\"\n",
- lkb->lkb_id,
- lkb->lkb_nodeid,
- lkb->lkb_remid,
- lkb->lkb_ownpid,
- (unsigned long long)xid,
- lkb->lkb_exflags,
- lkb->lkb_flags,
- lkb->lkb_status,
- lkb->lkb_grmode,
- lkb->lkb_rqmode,
- (unsigned long long)us,
- r->res_nodeid,
- r->res_length,
- r->res_name);
- return rv;
+ seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %llu %u %d \"%s\"\n",
+ lkb->lkb_id,
+ lkb->lkb_nodeid,
+ lkb->lkb_remid,
+ lkb->lkb_ownpid,
+ (unsigned long long)xid,
+ lkb->lkb_exflags,
+ lkb->lkb_flags,
+ lkb->lkb_status,
+ lkb->lkb_grmode,
+ lkb->lkb_rqmode,
+ (unsigned long long)us,
+ r->res_nodeid,
+ r->res_length,
+ r->res_name);
}
-static int print_format2(struct dlm_rsb *r, struct seq_file *s)
+static void print_format2(struct dlm_rsb *r, struct seq_file *s)
{
struct dlm_lkb *lkb;
- int rv = 0;
lock_rsb(r);
list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
- rv = print_format2_lock(s, lkb, r);
- if (rv)
+ print_format2_lock(s, lkb, r);
+ if (seq_has_overflowed(s))
goto out;
}
list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
- rv = print_format2_lock(s, lkb, r);
- if (rv)
+ print_format2_lock(s, lkb, r);
+ if (seq_has_overflowed(s))
goto out;
}
list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue) {
- rv = print_format2_lock(s, lkb, r);
- if (rv)
+ print_format2_lock(s, lkb, r);
+ if (seq_has_overflowed(s))
goto out;
}
out:
unlock_rsb(r);
- return rv;
}
-static int print_format3_lock(struct seq_file *s, struct dlm_lkb *lkb,
+static void print_format3_lock(struct seq_file *s, struct dlm_lkb *lkb,
int rsb_lookup)
{
u64 xid = 0;
- int rv;
if (lkb->lkb_flags & DLM_IFL_USER) {
if (lkb->lkb_ua)
xid = lkb->lkb_ua->xid;
}
- rv = seq_printf(s, "lkb %x %d %x %u %llu %x %x %d %d %d %d %d %d %u %llu %llu\n",
- lkb->lkb_id,
- lkb->lkb_nodeid,
- lkb->lkb_remid,
- lkb->lkb_ownpid,
- (unsigned long long)xid,
- lkb->lkb_exflags,
- lkb->lkb_flags,
- lkb->lkb_status,
- lkb->lkb_grmode,
- lkb->lkb_rqmode,
- lkb->lkb_last_bast.mode,
- rsb_lookup,
- lkb->lkb_wait_type,
- lkb->lkb_lvbseq,
- (unsigned long long)ktime_to_ns(lkb->lkb_timestamp),
- (unsigned long long)ktime_to_ns(lkb->lkb_last_bast_time));
- return rv;
+ seq_printf(s, "lkb %x %d %x %u %llu %x %x %d %d %d %d %d %d %u %llu %llu\n",
+ lkb->lkb_id,
+ lkb->lkb_nodeid,
+ lkb->lkb_remid,
+ lkb->lkb_ownpid,
+ (unsigned long long)xid,
+ lkb->lkb_exflags,
+ lkb->lkb_flags,
+ lkb->lkb_status,
+ lkb->lkb_grmode,
+ lkb->lkb_rqmode,
+ lkb->lkb_last_bast.mode,
+ rsb_lookup,
+ lkb->lkb_wait_type,
+ lkb->lkb_lvbseq,
+ (unsigned long long)ktime_to_ns(lkb->lkb_timestamp),
+ (unsigned long long)ktime_to_ns(lkb->lkb_last_bast_time));
}
-static int print_format3(struct dlm_rsb *r, struct seq_file *s)
+static void print_format3(struct dlm_rsb *r, struct seq_file *s)
{
struct dlm_lkb *lkb;
int i, lvblen = r->res_ls->ls_lvblen;
int print_name = 1;
- int rv;
lock_rsb(r);
- rv = seq_printf(s, "rsb %p %d %x %lx %d %d %u %d ",
- r,
- r->res_nodeid,
- r->res_first_lkid,
- r->res_flags,
- !list_empty(&r->res_root_list),
- !list_empty(&r->res_recover_list),
- r->res_recover_locks_count,
- r->res_length);
- if (rv)
+ seq_printf(s, "rsb %p %d %x %lx %d %d %u %d ",
+ r,
+ r->res_nodeid,
+ r->res_first_lkid,
+ r->res_flags,
+ !list_empty(&r->res_root_list),
+ !list_empty(&r->res_recover_list),
+ r->res_recover_locks_count,
+ r->res_length);
+ if (seq_has_overflowed(s))
goto out;
for (i = 0; i < r->res_length; i++) {
@@ -292,7 +279,7 @@ static int print_format3(struct dlm_rsb *r, struct seq_file *s)
print_name = 0;
}
- seq_printf(s, "%s", print_name ? "str " : "hex");
+ seq_puts(s, print_name ? "str " : "hex");
for (i = 0; i < r->res_length; i++) {
if (print_name)
@@ -300,8 +287,8 @@ static int print_format3(struct dlm_rsb *r, struct seq_file *s)
else
seq_printf(s, " %02x", (unsigned char)r->res_name[i]);
}
- rv = seq_puts(s, "\n");
- if (rv)
+ seq_puts(s, "\n");
+ if (seq_has_overflowed(s))
goto out;
if (!r->res_lvbptr)
@@ -311,65 +298,62 @@ static int print_format3(struct dlm_rsb *r, struct seq_file *s)
for (i = 0; i < lvblen; i++)
seq_printf(s, " %02x", (unsigned char)r->res_lvbptr[i]);
- rv = seq_puts(s, "\n");
- if (rv)
+ seq_puts(s, "\n");
+ if (seq_has_overflowed(s))
goto out;
do_locks:
list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
- rv = print_format3_lock(s, lkb, 0);
- if (rv)
+ print_format3_lock(s, lkb, 0);
+ if (seq_has_overflowed(s))
goto out;
}
list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
- rv = print_format3_lock(s, lkb, 0);
- if (rv)
+ print_format3_lock(s, lkb, 0);
+ if (seq_has_overflowed(s))
goto out;
}
list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue) {
- rv = print_format3_lock(s, lkb, 0);
- if (rv)
+ print_format3_lock(s, lkb, 0);
+ if (seq_has_overflowed(s))
goto out;
}
list_for_each_entry(lkb, &r->res_lookup, lkb_rsb_lookup) {
- rv = print_format3_lock(s, lkb, 1);
- if (rv)
+ print_format3_lock(s, lkb, 1);
+ if (seq_has_overflowed(s))
goto out;
}
out:
unlock_rsb(r);
- return rv;
}
-static int print_format4(struct dlm_rsb *r, struct seq_file *s)
+static void print_format4(struct dlm_rsb *r, struct seq_file *s)
{
int our_nodeid = dlm_our_nodeid();
int print_name = 1;
- int i, rv;
+ int i;
lock_rsb(r);
- rv = seq_printf(s, "rsb %p %d %d %d %d %lu %lx %d ",
- r,
- r->res_nodeid,
- r->res_master_nodeid,
- r->res_dir_nodeid,
- our_nodeid,
- r->res_toss_time,
- r->res_flags,
- r->res_length);
- if (rv)
- goto out;
+ seq_printf(s, "rsb %p %d %d %d %d %lu %lx %d ",
+ r,
+ r->res_nodeid,
+ r->res_master_nodeid,
+ r->res_dir_nodeid,
+ our_nodeid,
+ r->res_toss_time,
+ r->res_flags,
+ r->res_length);
for (i = 0; i < r->res_length; i++) {
if (!isascii(r->res_name[i]) || !isprint(r->res_name[i]))
print_name = 0;
}
- seq_printf(s, "%s", print_name ? "str " : "hex");
+ seq_puts(s, print_name ? "str " : "hex");
for (i = 0; i < r->res_length; i++) {
if (print_name)
@@ -377,10 +361,9 @@ static int print_format4(struct dlm_rsb *r, struct seq_file *s)
else
seq_printf(s, " %02x", (unsigned char)r->res_name[i]);
}
- rv = seq_puts(s, "\n");
- out:
+ seq_puts(s, "\n");
+
unlock_rsb(r);
- return rv;
}
struct rsbtbl_iter {
@@ -390,47 +373,45 @@ struct rsbtbl_iter {
int header;
};
-/* seq_printf returns -1 if the buffer is full, and 0 otherwise.
- If the buffer is full, seq_printf can be called again, but it
- does nothing and just returns -1. So, the these printing routines
- periodically check the return value to avoid wasting too much time
- trying to print to a full buffer. */
+/*
+ * If the buffer is full, seq_printf can be called again, but it
+ * does nothing. So, the these printing routines periodically check
+ * seq_has_overflowed to avoid wasting too much time trying to print to
+ * a full buffer.
+ */
static int table_seq_show(struct seq_file *seq, void *iter_ptr)
{
struct rsbtbl_iter *ri = iter_ptr;
- int rv = 0;
switch (ri->format) {
case 1:
- rv = print_format1(ri->rsb, seq);
+ print_format1(ri->rsb, seq);
break;
case 2:
if (ri->header) {
- seq_printf(seq, "id nodeid remid pid xid exflags "
- "flags sts grmode rqmode time_ms "
- "r_nodeid r_len r_name\n");
+ seq_puts(seq, "id nodeid remid pid xid exflags flags sts grmode rqmode time_ms r_nodeid r_len r_name\n");
ri->header = 0;
}
- rv = print_format2(ri->rsb, seq);
+ print_format2(ri->rsb, seq);
break;
case 3:
if (ri->header) {
- seq_printf(seq, "version rsb 1.1 lvb 1.1 lkb 1.1\n");
+ seq_puts(seq, "version rsb 1.1 lvb 1.1 lkb 1.1\n");
ri->header = 0;
}
- rv = print_format3(ri->rsb, seq);
+ print_format3(ri->rsb, seq);
break;
case 4:
if (ri->header) {
- seq_printf(seq, "version 4 rsb 2\n");
+ seq_puts(seq, "version 4 rsb 2\n");
ri->header = 0;
}
- rv = print_format4(ri->rsb, seq);
+ print_format4(ri->rsb, seq);
break;
}
- return rv;
+ return 0;
}
static const struct seq_operations format1_seq_ops;
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 83f3d5520307..35502d4046f5 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -5886,6 +5886,78 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
return error;
}
+/*
+ * The caller asks for an orphan lock on a given resource with a given mode.
+ * If a matching lock exists, it's moved to the owner's list of locks and
+ * the lkid is returned.
+ */
+
+int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
+ int mode, uint32_t flags, void *name, unsigned int namelen,
+ unsigned long timeout_cs, uint32_t *lkid)
+{
+ struct dlm_lkb *lkb;
+ struct dlm_user_args *ua;
+ int found_other_mode = 0;
+ int found = 0;
+ int rv = 0;
+
+ mutex_lock(&ls->ls_orphans_mutex);
+ list_for_each_entry(lkb, &ls->ls_orphans, lkb_ownqueue) {
+ if (lkb->lkb_resource->res_length != namelen)
+ continue;
+ if (memcmp(lkb->lkb_resource->res_name, name, namelen))
+ continue;
+ if (lkb->lkb_grmode != mode) {
+ found_other_mode = 1;
+ continue;
+ }
+
+ found = 1;
+ list_del_init(&lkb->lkb_ownqueue);
+ lkb->lkb_flags &= ~DLM_IFL_ORPHAN;
+ *lkid = lkb->lkb_id;
+ break;
+ }
+ mutex_unlock(&ls->ls_orphans_mutex);
+
+ if (!found && found_other_mode) {
+ rv = -EAGAIN;
+ goto out;
+ }
+
+ if (!found) {
+ rv = -ENOENT;
+ goto out;
+ }
+
+ lkb->lkb_exflags = flags;
+ lkb->lkb_ownpid = (int) current->pid;
+
+ ua = lkb->lkb_ua;
+
+ ua->proc = ua_tmp->proc;
+ ua->xid = ua_tmp->xid;
+ ua->castparam = ua_tmp->castparam;
+ ua->castaddr = ua_tmp->castaddr;
+ ua->bastparam = ua_tmp->bastparam;
+ ua->bastaddr = ua_tmp->bastaddr;
+ ua->user_lksb = ua_tmp->user_lksb;
+
+ /*
+ * The lkb reference from the ls_orphans list was not
+ * removed above, and is now considered the reference
+ * for the proc locks list.
+ */
+
+ spin_lock(&ua->proc->locks_spin);
+ list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks);
+ spin_unlock(&ua->proc->locks_spin);
+ out:
+ kfree(ua_tmp);
+ return rv;
+}
+
int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
uint32_t flags, uint32_t lkid, char *lvb_in)
{
@@ -6029,7 +6101,7 @@ static int orphan_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb)
struct dlm_args args;
int error;
- hold_lkb(lkb);
+ hold_lkb(lkb); /* reference for the ls_orphans list */
mutex_lock(&ls->ls_orphans_mutex);
list_add_tail(&lkb->lkb_ownqueue, &ls->ls_orphans);
mutex_unlock(&ls->ls_orphans_mutex);
@@ -6217,7 +6289,7 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
{
int error = 0;
- if (nodeid != dlm_our_nodeid()) {
+ if (nodeid && (nodeid != dlm_our_nodeid())) {
error = send_purge(ls, nodeid, pid);
} else {
dlm_lock_recovery(ls);
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 5e0c72e36a9b..ed8ebd3a8593 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -49,6 +49,9 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
unsigned long timeout_cs);
+int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
+ int mode, uint32_t flags, void *name, unsigned int namelen,
+ unsigned long timeout_cs, uint32_t *lkid);
int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
uint32_t flags, uint32_t lkid, char *lvb_in);
int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 142e21655eed..fb85f32e9eca 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -238,6 +238,7 @@ static int device_user_lock(struct dlm_user_proc *proc,
{
struct dlm_ls *ls;
struct dlm_user_args *ua;
+ uint32_t lkid;
int error = -ENOMEM;
ls = dlm_find_lockspace_local(proc->lockspace);
@@ -260,12 +261,20 @@ static int device_user_lock(struct dlm_user_proc *proc,
ua->bastaddr = params->bastaddr;
ua->xid = params->xid;
- if (params->flags & DLM_LKF_CONVERT)
+ if (params->flags & DLM_LKF_CONVERT) {
error = dlm_user_convert(ls, ua,
params->mode, params->flags,
params->lkid, params->lvb,
(unsigned long) params->timeout);
- else {
+ } else if (params->flags & DLM_LKF_ORPHAN) {
+ error = dlm_user_adopt_orphan(ls, ua,
+ params->mode, params->flags,
+ params->name, params->namelen,
+ (unsigned long) params->timeout,
+ &lkid);
+ if (!error)
+ error = lkid;
+ } else {
error = dlm_user_request(ls, ua,
params->mode, params->flags,
params->name, params->namelen,
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 1de7294aad20..2bc2c87f35e7 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -40,13 +40,14 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
static void drop_slab(void)
{
int nr_objects;
- struct shrink_control shrink = {
- .gfp_mask = GFP_KERNEL,
- };
- nodes_setall(shrink.nodes_to_scan);
do {
- nr_objects = shrink_slab(&shrink, 1000, 1000);
+ int nid;
+
+ nr_objects = 0;
+ for_each_online_node(nid)
+ nr_objects += shrink_node_slabs(GFP_KERNEL, nid,
+ 1000, 1000);
} while (nr_objects > 10);
}
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 2f6735dbf1a9..719e1ce1c609 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1373,7 +1373,7 @@ out:
int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode)
{
struct dentry *lower_dentry =
- ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry;
+ ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_path.dentry;
ssize_t size;
int rc = 0;
@@ -1917,7 +1917,6 @@ ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size,
break;
case 2:
dst[dst_byte_offset++] |= (src_byte);
- dst[dst_byte_offset] = 0;
current_bit_offset = 0;
break;
}
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index f5bce9096555..6f4e659f508f 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -75,11 +75,11 @@ struct ecryptfs_getdents_callback {
/* Inspired by generic filldir in fs/readdir.c */
static int
-ecryptfs_filldir(void *dirent, const char *lower_name, int lower_namelen,
- loff_t offset, u64 ino, unsigned int d_type)
+ecryptfs_filldir(struct dir_context *ctx, const char *lower_name,
+ int lower_namelen, loff_t offset, u64 ino, unsigned int d_type)
{
struct ecryptfs_getdents_callback *buf =
- (struct ecryptfs_getdents_callback *)dirent;
+ container_of(ctx, struct ecryptfs_getdents_callback, ctx);
size_t name_size;
char *name;
int rc;
@@ -190,23 +190,11 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
{
int rc = 0;
struct ecryptfs_crypt_stat *crypt_stat = NULL;
- struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
struct dentry *ecryptfs_dentry = file->f_path.dentry;
/* Private value of ecryptfs_dentry allocated in
* ecryptfs_lookup() */
struct ecryptfs_file_info *file_info;
- mount_crypt_stat = &ecryptfs_superblock_to_private(
- ecryptfs_dentry->d_sb)->mount_crypt_stat;
- if ((mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
- && ((file->f_flags & O_WRONLY) || (file->f_flags & O_RDWR)
- || (file->f_flags & O_CREAT) || (file->f_flags & O_TRUNC)
- || (file->f_flags & O_APPEND))) {
- printk(KERN_WARNING "Mount has encrypted view enabled; "
- "files may only be read\n");
- rc = -EPERM;
- goto out;
- }
/* Released in ecryptfs_release or end of function if failure */
file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL);
ecryptfs_set_file_private(file, file_info);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 635e8e16a5b7..917bd5c9776a 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -100,12 +100,12 @@ int ecryptfs_parse_packet_length(unsigned char *data, size_t *size,
(*size) = 0;
if (data[0] < 192) {
/* One-byte length */
- (*size) = (unsigned char)data[0];
+ (*size) = data[0];
(*length_size) = 1;
} else if (data[0] < 224) {
/* Two-byte length */
- (*size) = (((unsigned char)(data[0]) - 192) * 256);
- (*size) += ((unsigned char)(data[1]) + 192);
+ (*size) = (data[0] - 192) * 256;
+ (*size) += data[1] + 192;
(*length_size) = 2;
} else if (data[0] == 255) {
/* If support is added, adjust ECRYPTFS_MAX_PKT_LEN_SIZE */
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index c4cd1fd86cc2..d9eb84bda559 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -493,6 +493,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
{
struct super_block *s;
struct ecryptfs_sb_info *sbi;
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
struct ecryptfs_dentry_info *root_info;
const char *err = "Getting sb failed";
struct inode *inode;
@@ -511,6 +512,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
err = "Error parsing options";
goto out;
}
+ mount_crypt_stat = &sbi->mount_crypt_stat;
s = sget(fs_type, NULL, set_anon_super, flags, NULL);
if (IS_ERR(s)) {
@@ -557,11 +559,19 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
/**
* Set the POSIX ACL flag based on whether they're enabled in the lower
- * mount. Force a read-only eCryptfs mount if the lower mount is ro.
- * Allow a ro eCryptfs mount even when the lower mount is rw.
+ * mount.
*/
s->s_flags = flags & ~MS_POSIXACL;
- s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL);
+ s->s_flags |= path.dentry->d_sb->s_flags & MS_POSIXACL;
+
+ /**
+ * Force a read-only eCryptfs mount when:
+ * 1) The lower mount is ro
+ * 2) The ecryptfs_encrypted_view mount option is specified
+ */
+ if (path.dentry->d_sb->s_flags & MS_RDONLY ||
+ mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
+ s->s_flags |= MS_RDONLY;
s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
s->s_blocksize = path.dentry->d_sb->s_blocksize;
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 564a1fa34b99..4626976794e7 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -419,7 +419,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
ssize_t size;
void *xattr_virt;
struct dentry *lower_dentry =
- ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry;
+ ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_path.dentry;
struct inode *lower_inode = lower_dentry->d_inode;
int rc;
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index cdb2971192a5..90001da9abfd 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -47,8 +47,8 @@ static ssize_t efivarfs_file_write(struct file *file,
if (bytes == -ENOENT) {
drop_nlink(inode);
- d_delete(file->f_dentry);
- dput(file->f_dentry);
+ d_delete(file->f_path.dentry);
+ dput(file->f_path.dentry);
} else {
mutex_lock(&inode->i_mutex);
i_size_write(inode, datasize + sizeof(attributes));
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 0a48886e069c..6dad1176ec52 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -236,6 +236,7 @@ static void efivarfs_kill_sb(struct super_block *sb)
}
static struct file_system_type efivarfs_type = {
+ .owner = THIS_MODULE,
.name = "efivarfs",
.mount = efivarfs_mount,
.kill_sb = efivarfs_kill_sb,
@@ -244,17 +245,23 @@ static struct file_system_type efivarfs_type = {
static __init int efivarfs_init(void)
{
if (!efi_enabled(EFI_RUNTIME_SERVICES))
- return 0;
+ return -ENODEV;
if (!efivars_kobject())
- return 0;
+ return -ENODEV;
return register_filesystem(&efivarfs_type);
}
+static __exit void efivarfs_exit(void)
+{
+ unregister_filesystem(&efivarfs_type);
+}
+
MODULE_AUTHOR("Matthew Garrett, Jeremy Kerr");
MODULE_DESCRIPTION("EFI Variable Filesystem");
MODULE_LICENSE("GPL");
MODULE_ALIAS_FS("efivarfs");
module_init(efivarfs_init);
+module_exit(efivarfs_exit);
diff --git a/fs/eventfd.c b/fs/eventfd.c
index d6a88e7812f3..4b0a226024fa 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -287,17 +287,14 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c
}
#ifdef CONFIG_PROC_FS
-static int eventfd_show_fdinfo(struct seq_file *m, struct file *f)
+static void eventfd_show_fdinfo(struct seq_file *m, struct file *f)
{
struct eventfd_ctx *ctx = f->private_data;
- int ret;
spin_lock_irq(&ctx->wqh.lock);
- ret = seq_printf(m, "eventfd-count: %16llx\n",
- (unsigned long long)ctx->count);
+ seq_printf(m, "eventfd-count: %16llx\n",
+ (unsigned long long)ctx->count);
spin_unlock_irq(&ctx->wqh.lock);
-
- return ret;
}
#endif
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 7bcfff900f05..d77f94491352 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -870,25 +870,22 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
}
#ifdef CONFIG_PROC_FS
-static int ep_show_fdinfo(struct seq_file *m, struct file *f)
+static void ep_show_fdinfo(struct seq_file *m, struct file *f)
{
struct eventpoll *ep = f->private_data;
struct rb_node *rbp;
- int ret = 0;
mutex_lock(&ep->mtx);
for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
struct epitem *epi = rb_entry(rbp, struct epitem, rbn);
- ret = seq_printf(m, "tfd: %8d events: %8x data: %16llx\n",
- epi->ffd.fd, epi->event.events,
- (long long)epi->event.data);
- if (ret)
+ seq_printf(m, "tfd: %8d events: %8x data: %16llx\n",
+ epi->ffd.fd, epi->event.events,
+ (long long)epi->event.data);
+ if (seq_has_overflowed(m))
break;
}
mutex_unlock(&ep->mtx);
-
- return ret;
}
#endif
diff --git a/fs/exec.c b/fs/exec.c
index 7302b75a9820..ad8798e26be9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -277,6 +277,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
goto err;
mm->stack_vm = mm->total_vm = 1;
+ arch_bprm_mm_init(mm, vma);
up_write(&mm->mmap_sem);
bprm->p = vma->vm_end - sizeof(void *);
return 0;
@@ -747,18 +748,25 @@ EXPORT_SYMBOL(setup_arg_pages);
#endif /* CONFIG_MMU */
-static struct file *do_open_exec(struct filename *name)
+static struct file *do_open_execat(int fd, struct filename *name, int flags)
{
struct file *file;
int err;
- static const struct open_flags open_exec_flags = {
+ struct open_flags open_exec_flags = {
.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
.acc_mode = MAY_EXEC | MAY_OPEN,
.intent = LOOKUP_OPEN,
.lookup_flags = LOOKUP_FOLLOW,
};
- file = do_filp_open(AT_FDCWD, name, &open_exec_flags);
+ if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
+ return ERR_PTR(-EINVAL);
+ if (flags & AT_SYMLINK_NOFOLLOW)
+ open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW;
+ if (flags & AT_EMPTY_PATH)
+ open_exec_flags.lookup_flags |= LOOKUP_EMPTY;
+
+ file = do_filp_open(fd, name, &open_exec_flags);
if (IS_ERR(file))
goto out;
@@ -769,12 +777,13 @@ static struct file *do_open_exec(struct filename *name)
if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
goto exit;
- fsnotify_open(file);
-
err = deny_write_access(file);
if (err)
goto exit;
+ if (name->name[0] != '\0')
+ fsnotify_open(file);
+
out:
return file;
@@ -786,7 +795,7 @@ exit:
struct file *open_exec(const char *name)
{
struct filename tmp = { .name = name };
- return do_open_exec(&tmp);
+ return do_open_execat(AT_FDCWD, &tmp, 0);
}
EXPORT_SYMBOL(open_exec);
@@ -1427,10 +1436,12 @@ static int exec_binprm(struct linux_binprm *bprm)
/*
* sys_execve() executes a new program.
*/
-static int do_execve_common(struct filename *filename,
- struct user_arg_ptr argv,
- struct user_arg_ptr envp)
+static int do_execveat_common(int fd, struct filename *filename,
+ struct user_arg_ptr argv,
+ struct user_arg_ptr envp,
+ int flags)
{
+ char *pathbuf = NULL;
struct linux_binprm *bprm;
struct file *file;
struct files_struct *displaced;
@@ -1471,7 +1482,7 @@ static int do_execve_common(struct filename *filename,
check_unsafe_exec(bprm);
current->in_execve = 1;
- file = do_open_exec(filename);
+ file = do_open_execat(fd, filename, flags);
retval = PTR_ERR(file);
if (IS_ERR(file))
goto out_unmark;
@@ -1479,7 +1490,28 @@ static int do_execve_common(struct filename *filename,
sched_exec();
bprm->file = file;
- bprm->filename = bprm->interp = filename->name;
+ if (fd == AT_FDCWD || filename->name[0] == '/') {
+ bprm->filename = filename->name;
+ } else {
+ if (filename->name[0] == '\0')
+ pathbuf = kasprintf(GFP_TEMPORARY, "/dev/fd/%d", fd);
+ else
+ pathbuf = kasprintf(GFP_TEMPORARY, "/dev/fd/%d/%s",
+ fd, filename->name);
+ if (!pathbuf) {
+ retval = -ENOMEM;
+ goto out_unmark;
+ }
+ /*
+ * Record that a name derived from an O_CLOEXEC fd will be
+ * inaccessible after exec. Relies on having exclusive access to
+ * current->files (due to unshare_files above).
+ */
+ if (close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
+ bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
+ bprm->filename = pathbuf;
+ }
+ bprm->interp = bprm->filename;
retval = bprm_mm_init(bprm);
if (retval)
@@ -1520,6 +1552,7 @@ static int do_execve_common(struct filename *filename,
acct_update_integrals(current);
task_numa_free(current);
free_bprm(bprm);
+ kfree(pathbuf);
putname(filename);
if (displaced)
put_files_struct(displaced);
@@ -1537,6 +1570,7 @@ out_unmark:
out_free:
free_bprm(bprm);
+ kfree(pathbuf);
out_files:
if (displaced)
@@ -1552,7 +1586,18 @@ int do_execve(struct filename *filename,
{
struct user_arg_ptr argv = { .ptr.native = __argv };
struct user_arg_ptr envp = { .ptr.native = __envp };
- return do_execve_common(filename, argv, envp);
+ return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
+}
+
+int do_execveat(int fd, struct filename *filename,
+ const char __user *const __user *__argv,
+ const char __user *const __user *__envp,
+ int flags)
+{
+ struct user_arg_ptr argv = { .ptr.native = __argv };
+ struct user_arg_ptr envp = { .ptr.native = __envp };
+
+ return do_execveat_common(fd, filename, argv, envp, flags);
}
#ifdef CONFIG_COMPAT
@@ -1568,7 +1613,23 @@ static int compat_do_execve(struct filename *filename,
.is_compat = true,
.ptr.compat = __envp,
};
- return do_execve_common(filename, argv, envp);
+ return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
+}
+
+static int compat_do_execveat(int fd, struct filename *filename,
+ const compat_uptr_t __user *__argv,
+ const compat_uptr_t __user *__envp,
+ int flags)
+{
+ struct user_arg_ptr argv = {
+ .is_compat = true,
+ .ptr.compat = __argv,
+ };
+ struct user_arg_ptr envp = {
+ .is_compat = true,
+ .ptr.compat = __envp,
+ };
+ return do_execveat_common(fd, filename, argv, envp, flags);
}
#endif
@@ -1608,6 +1669,20 @@ SYSCALL_DEFINE3(execve,
{
return do_execve(getname(filename), argv, envp);
}
+
+SYSCALL_DEFINE5(execveat,
+ int, fd, const char __user *, filename,
+ const char __user *const __user *, argv,
+ const char __user *const __user *, envp,
+ int, flags)
+{
+ int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
+
+ return do_execveat(fd,
+ getname_flags(filename, lookup_flags, NULL),
+ argv, envp, flags);
+}
+
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
const compat_uptr_t __user *, argv,
@@ -1615,4 +1690,17 @@ COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
{
return compat_do_execve(getname(filename), argv, envp);
}
+
+COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
+ const char __user *, filename,
+ const compat_uptr_t __user *, argv,
+ const compat_uptr_t __user *, envp,
+ int, flags)
+{
+ int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
+
+ return compat_do_execveat(fd,
+ getname_flags(filename, lookup_flags, NULL),
+ argv, envp, flags);
+}
#endif
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index b01fbfb51f43..fdfd206c737a 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -50,7 +50,7 @@ find_acceptable_alias(struct dentry *result,
inode = result->d_inode;
spin_lock(&inode->i_lock);
- hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
dget(dentry);
spin_unlock(&inode->i_lock);
if (toput)
@@ -241,10 +241,11 @@ struct getdents_callback {
* A rather strange filldir function to capture
* the name matching the specified inode number.
*/
-static int filldir_one(void * __buf, const char * name, int len,
+static int filldir_one(struct dir_context *ctx, const char *name, int len,
loff_t pos, u64 ino, unsigned int d_type)
{
- struct getdents_callback *buf = __buf;
+ struct getdents_callback *buf =
+ container_of(ctx, struct getdents_callback, ctx);
int result = 0;
buf->sequence++;
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index d9a17d0b124d..e4279ead4a05 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -689,6 +689,9 @@ struct ext2_inode_info {
struct mutex truncate_mutex;
struct inode vfs_inode;
struct list_head i_orphan; /* unlinked but open inodes */
+#ifdef CONFIG_QUOTA
+ struct dquot *i_dquot[MAXQUOTAS];
+#endif
};
/*
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 170dc41e8bf4..ae55fddc26a9 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -166,6 +166,10 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
return NULL;
ei->i_block_alloc_info = NULL;
ei->vfs_inode.i_version = 1;
+#ifdef CONFIG_QUOTA
+ memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
+#endif
+
return &ei->vfs_inode;
}
@@ -303,6 +307,10 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root)
#ifdef CONFIG_QUOTA
static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off);
static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off);
+static struct dquot **ext2_get_dquots(struct inode *inode)
+{
+ return EXT2_I(inode)->i_dquot;
+}
#endif
static const struct super_operations ext2_sops = {
@@ -320,6 +328,7 @@ static const struct super_operations ext2_sops = {
#ifdef CONFIG_QUOTA
.quota_read = ext2_quota_read,
.quota_write = ext2_quota_write,
+ .get_dquots = ext2_get_dquots,
#endif
};
@@ -1090,6 +1099,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
#ifdef CONFIG_QUOTA
sb->dq_op = &dquot_operations;
sb->s_qcop = &dquot_quotactl_ops;
+ sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
#endif
root = ext2_iget(sb, EXT2_ROOT_INO);
diff --git a/fs/ext3/ext3.h b/fs/ext3/ext3.h
index fc3cdcf24aed..f483a80b3fe7 100644
--- a/fs/ext3/ext3.h
+++ b/fs/ext3/ext3.h
@@ -615,6 +615,10 @@ struct ext3_inode_info {
atomic_t i_sync_tid;
atomic_t i_datasync_tid;
+#ifdef CONFIG_QUOTA
+ struct dquot *i_dquot[MAXQUOTAS];
+#endif
+
struct inode vfs_inode;
};
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index eb742d0e67ff..9b4e7d750d4f 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -485,6 +485,10 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
ei->vfs_inode.i_version = 1;
atomic_set(&ei->i_datasync_tid, 0);
atomic_set(&ei->i_sync_tid, 0);
+#ifdef CONFIG_QUOTA
+ memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
+#endif
+
return &ei->vfs_inode;
}
@@ -764,6 +768,10 @@ static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off);
static ssize_t ext3_quota_write(struct super_block *sb, int type,
const char *data, size_t len, loff_t off);
+static struct dquot **ext3_get_dquots(struct inode *inode)
+{
+ return EXT3_I(inode)->i_dquot;
+}
static const struct dquot_operations ext3_quota_operations = {
.write_dquot = ext3_write_dquot,
@@ -803,6 +811,7 @@ static const struct super_operations ext3_sops = {
#ifdef CONFIG_QUOTA
.quota_read = ext3_quota_read,
.quota_write = ext3_quota_write,
+ .get_dquots = ext3_get_dquots,
#endif
.bdev_try_to_free_page = bdev_try_to_free_page,
};
@@ -2001,6 +2010,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
#ifdef CONFIG_QUOTA
sb->s_qcop = &ext3_qctl_operations;
sb->dq_op = &ext3_quota_operations;
+ sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
#endif
memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index c55a1faaed58..a75fba67bb1f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -158,17 +158,8 @@ struct ext4_allocation_request {
#define EXT4_MAP_MAPPED (1 << BH_Mapped)
#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten)
#define EXT4_MAP_BOUNDARY (1 << BH_Boundary)
-/* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
- * ext4_map_blocks wants to know whether or not the underlying cluster has
- * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
- * the requested mapping was from previously mapped (or delayed allocated)
- * cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster
- * should never appear on buffer_head's state flags.
- */
-#define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster)
#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
- EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
- EXT4_MAP_FROM_CLUSTER)
+ EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY)
struct ext4_map_blocks {
ext4_fsblk_t m_pblk;
@@ -565,10 +556,8 @@ enum {
#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
/* Do not take i_data_sem locking in ext4_map_blocks */
#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
- /* Do not put hole in extent cache */
-#define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200
/* Convert written extents to unwritten */
-#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0400
+#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0200
/*
* The bit position of these flags must not overlap with any of the
@@ -889,10 +878,12 @@ struct ext4_inode_info {
/* extents status tree */
struct ext4_es_tree i_es_tree;
rwlock_t i_es_lock;
- struct list_head i_es_lru;
+ struct list_head i_es_list;
unsigned int i_es_all_nr; /* protected by i_es_lock */
- unsigned int i_es_lru_nr; /* protected by i_es_lock */
- unsigned long i_touch_when; /* jiffies of last accessing */
+ unsigned int i_es_shk_nr; /* protected by i_es_lock */
+ ext4_lblk_t i_es_shrink_lblk; /* Offset where we start searching for
+ extents to shrink. Protected by
+ i_es_lock */
/* ialloc */
ext4_group_t i_last_alloc_group;
@@ -941,6 +932,10 @@ struct ext4_inode_info {
tid_t i_sync_tid;
tid_t i_datasync_tid;
+#ifdef CONFIG_QUOTA
+ struct dquot *i_dquot[MAXQUOTAS];
+#endif
+
/* Precomputed uuid+inum+igen checksum for seeding inode checksums */
__u32 i_csum_seed;
};
@@ -1333,10 +1328,11 @@ struct ext4_sb_info {
/* Reclaim extents from extent status tree */
struct shrinker s_es_shrinker;
- struct list_head s_es_lru;
+ struct list_head s_es_list; /* List of inodes with reclaimable extents */
+ long s_es_nr_inode;
struct ext4_es_stats s_es_stats;
struct mb_cache *s_mb_cache;
- spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
+ spinlock_t s_es_lock ____cacheline_aligned_in_smp;
/* Ratelimit ext4 messages. */
struct ratelimit_state s_err_ratelimit_state;
@@ -2192,7 +2188,6 @@ extern int ext4_calculate_overhead(struct super_block *sb);
extern void ext4_superblock_csum_set(struct super_block *sb);
extern void *ext4_kvmalloc(size_t size, gfp_t flags);
extern void *ext4_kvzalloc(size_t size, gfp_t flags);
-extern void ext4_kvfree(void *ptr);
extern int ext4_alloc_flex_bg_array(struct super_block *sb,
ext4_group_t ngroup);
extern const char *ext4_decode_error(struct super_block *sb, int errno,
@@ -2643,7 +2638,7 @@ extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
int *retval);
extern int ext4_inline_data_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo,
- int *has_inline);
+ int *has_inline, __u64 start, __u64 len);
extern int ext4_try_to_evict_inline_data(handle_t *handle,
struct inode *inode,
int needed);
@@ -2791,16 +2786,6 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
/*
- * Note that these flags will never ever appear in a buffer_head's state flag.
- * See EXT4_MAP_... to see where this is used.
- */
-enum ext4_state_bits {
- BH_AllocFromCluster /* allocated blocks were part of already
- * allocated cluster. */
- = BH_JBDPrivateStart
-};
-
-/*
* Add new method to test whether block and inode bitmaps are properly
* initialized. With uninit_bg reading the block from disk is not enough
* to mark the bitmap uptodate. We need to also zero-out the bitmap
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 0b16fb4c06d3..bed43081720f 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2306,16 +2306,16 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
ext4_lblk_t block)
{
int depth = ext_depth(inode);
- unsigned long len = 0;
- ext4_lblk_t lblock = 0;
+ ext4_lblk_t len;
+ ext4_lblk_t lblock;
struct ext4_extent *ex;
+ struct extent_status es;
ex = path[depth].p_ext;
if (ex == NULL) {
- /*
- * there is no extent yet, so gap is [0;-] and we
- * don't cache it
- */
+ /* there is no extent yet, so gap is [0;-] */
+ lblock = 0;
+ len = EXT_MAX_BLOCKS;
ext_debug("cache gap(whole file):");
} else if (block < le32_to_cpu(ex->ee_block)) {
lblock = block;
@@ -2324,9 +2324,6 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
block,
le32_to_cpu(ex->ee_block),
ext4_ext_get_actual_len(ex));
- if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
- ext4_es_insert_extent(inode, lblock, len, ~0,
- EXTENT_STATUS_HOLE);
} else if (block >= le32_to_cpu(ex->ee_block)
+ ext4_ext_get_actual_len(ex)) {
ext4_lblk_t next;
@@ -2340,14 +2337,19 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
block);
BUG_ON(next == lblock);
len = next - lblock;
- if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
- ext4_es_insert_extent(inode, lblock, len, ~0,
- EXTENT_STATUS_HOLE);
} else {
BUG();
}
- ext_debug(" -> %u:%lu\n", lblock, len);
+ ext4_es_find_delayed_extent_range(inode, lblock, lblock + len - 1, &es);
+ if (es.es_len) {
+ /* There's delayed extent containing lblock? */
+ if (es.es_lblk <= lblock)
+ return;
+ len = min(es.es_lblk - lblock, len);
+ }
+ ext_debug(" -> %u:%u\n", lblock, len);
+ ext4_es_insert_extent(inode, lblock, len, ~0, EXTENT_STATUS_HOLE);
}
/*
@@ -2481,7 +2483,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t from, ext4_lblk_t to)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- unsigned short ee_len = ext4_ext_get_actual_len(ex);
+ unsigned short ee_len = ext4_ext_get_actual_len(ex);
ext4_fsblk_t pblk;
int flags = get_default_free_blocks_flags(inode);
@@ -2490,7 +2492,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
* at the beginning of the extent. Instead, we make a note
* that we tried freeing the cluster, and check to see if we
* need to free it on a subsequent call to ext4_remove_blocks,
- * or at the end of the ext4_truncate() operation.
+ * or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space.
*/
flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
@@ -2501,8 +2503,8 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
* partial cluster here.
*/
pblk = ext4_ext_pblock(ex) + ee_len - 1;
- if ((*partial_cluster > 0) &&
- (EXT4_B2C(sbi, pblk) != *partial_cluster)) {
+ if (*partial_cluster > 0 &&
+ *partial_cluster != (long long) EXT4_B2C(sbi, pblk)) {
ext4_free_blocks(handle, inode, NULL,
EXT4_C2B(sbi, *partial_cluster),
sbi->s_cluster_ratio, flags);
@@ -2528,7 +2530,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
&& to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
/* tail removal */
ext4_lblk_t num;
- unsigned int unaligned;
+ long long first_cluster;
num = le32_to_cpu(ex->ee_block) + ee_len - from;
pblk = ext4_ext_pblock(ex) + ee_len - num;
@@ -2538,7 +2540,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
* used by any other extent (partial_cluster is negative).
*/
if (*partial_cluster < 0 &&
- -(*partial_cluster) == EXT4_B2C(sbi, pblk + num - 1))
+ *partial_cluster == -(long long) EXT4_B2C(sbi, pblk+num-1))
flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
ext_debug("free last %u blocks starting %llu partial %lld\n",
@@ -2549,21 +2551,24 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
* beginning of a cluster, and we removed the entire
* extent and the cluster is not used by any other extent,
* save the partial cluster here, since we might need to
- * delete if we determine that the truncate operation has
- * removed all of the blocks in the cluster.
+ * delete if we determine that the truncate or punch hole
+ * operation has removed all of the blocks in the cluster.
+ * If that cluster is used by another extent, preserve its
+ * negative value so it isn't freed later on.
*
- * On the other hand, if we did not manage to free the whole
- * extent, we have to mark the cluster as used (store negative
- * cluster number in partial_cluster).
+ * If the whole extent wasn't freed, we've reached the
+ * start of the truncated/punched region and have finished
+ * removing blocks. If there's a partial cluster here it's
+ * shared with the remainder of the extent and is no longer
+ * a candidate for removal.
*/
- unaligned = EXT4_PBLK_COFF(sbi, pblk);
- if (unaligned && (ee_len == num) &&
- (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk))))
- *partial_cluster = EXT4_B2C(sbi, pblk);
- else if (unaligned)
- *partial_cluster = -((long long)EXT4_B2C(sbi, pblk));
- else if (*partial_cluster > 0)
+ if (EXT4_PBLK_COFF(sbi, pblk) && ee_len == num) {
+ first_cluster = (long long) EXT4_B2C(sbi, pblk);
+ if (first_cluster != -*partial_cluster)
+ *partial_cluster = first_cluster;
+ } else {
*partial_cluster = 0;
+ }
} else
ext4_error(sbi->s_sb, "strange request: removal(2) "
"%u-%u from %u:%u\n",
@@ -2574,15 +2579,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
/*
* ext4_ext_rm_leaf() Removes the extents associated with the
- * blocks appearing between "start" and "end", and splits the extents
- * if "start" and "end" appear in the same extent
+ * blocks appearing between "start" and "end". Both "start"
+ * and "end" must appear in the same extent or EIO is returned.
*
* @handle: The journal handle
* @inode: The files inode
* @path: The path to the leaf
* @partial_cluster: The cluster which we'll have to free if all extents
- * has been released from it. It gets negative in case
- * that the cluster is still used.
+ * has been released from it. However, if this value is
+ * negative, it's a cluster just to the right of the
+ * punched region and it must not be freed.
* @start: The first block to remove
* @end: The last block to remove
*/
@@ -2621,27 +2627,6 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
ex_ee_block = le32_to_cpu(ex->ee_block);
ex_ee_len = ext4_ext_get_actual_len(ex);
- /*
- * If we're starting with an extent other than the last one in the
- * node, we need to see if it shares a cluster with the extent to
- * the right (towards the end of the file). If its leftmost cluster
- * is this extent's rightmost cluster and it is not cluster aligned,
- * we'll mark it as a partial that is not to be deallocated.
- */
-
- if (ex != EXT_LAST_EXTENT(eh)) {
- ext4_fsblk_t current_pblk, right_pblk;
- long long current_cluster, right_cluster;
-
- current_pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
- current_cluster = (long long)EXT4_B2C(sbi, current_pblk);
- right_pblk = ext4_ext_pblock(ex + 1);
- right_cluster = (long long)EXT4_B2C(sbi, right_pblk);
- if (current_cluster == right_cluster &&
- EXT4_PBLK_COFF(sbi, right_pblk))
- *partial_cluster = -right_cluster;
- }
-
trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster);
while (ex >= EXT_FIRST_EXTENT(eh) &&
@@ -2666,14 +2651,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
if (end < ex_ee_block) {
/*
* We're going to skip this extent and move to another,
- * so if this extent is not cluster aligned we have
- * to mark the current cluster as used to avoid
- * accidentally freeing it later on
+ * so note that its first cluster is in use to avoid
+ * freeing it when removing blocks. Eventually, the
+ * right edge of the truncated/punched region will
+ * be just to the left.
*/
- pblk = ext4_ext_pblock(ex);
- if (EXT4_PBLK_COFF(sbi, pblk))
+ if (sbi->s_cluster_ratio > 1) {
+ pblk = ext4_ext_pblock(ex);
*partial_cluster =
- -((long long)EXT4_B2C(sbi, pblk));
+ -(long long) EXT4_B2C(sbi, pblk);
+ }
ex--;
ex_ee_block = le32_to_cpu(ex->ee_block);
ex_ee_len = ext4_ext_get_actual_len(ex);
@@ -2749,8 +2736,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
sizeof(struct ext4_extent));
}
le16_add_cpu(&eh->eh_entries, -1);
- } else if (*partial_cluster > 0)
- *partial_cluster = 0;
+ }
err = ext4_ext_dirty(handle, inode, path + depth);
if (err)
@@ -2769,20 +2755,18 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
/*
* If there's a partial cluster and at least one extent remains in
* the leaf, free the partial cluster if it isn't shared with the
- * current extent. If there's a partial cluster and no extents
- * remain in the leaf, it can't be freed here. It can only be
- * freed when it's possible to determine if it's not shared with
- * any other extent - when the next leaf is processed or when space
- * removal is complete.
+ * current extent. If it is shared with the current extent
+ * we zero partial_cluster because we've reached the start of the
+ * truncated/punched region and we're done removing blocks.
*/
- if (*partial_cluster > 0 && eh->eh_entries &&
- (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) !=
- *partial_cluster)) {
- int flags = get_default_free_blocks_flags(inode);
-
- ext4_free_blocks(handle, inode, NULL,
- EXT4_C2B(sbi, *partial_cluster),
- sbi->s_cluster_ratio, flags);
+ if (*partial_cluster > 0 && ex >= EXT_FIRST_EXTENT(eh)) {
+ pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
+ if (*partial_cluster != (long long) EXT4_B2C(sbi, pblk)) {
+ ext4_free_blocks(handle, inode, NULL,
+ EXT4_C2B(sbi, *partial_cluster),
+ sbi->s_cluster_ratio,
+ get_default_free_blocks_flags(inode));
+ }
*partial_cluster = 0;
}
@@ -2819,7 +2803,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path)
int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
ext4_lblk_t end)
{
- struct super_block *sb = inode->i_sb;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
int depth = ext_depth(inode);
struct ext4_ext_path *path = NULL;
long long partial_cluster = 0;
@@ -2845,9 +2829,10 @@ again:
*/
if (end < EXT_MAX_BLOCKS - 1) {
struct ext4_extent *ex;
- ext4_lblk_t ee_block;
+ ext4_lblk_t ee_block, ex_end, lblk;
+ ext4_fsblk_t pblk;
- /* find extent for this block */
+ /* find extent for or closest extent to this block */
path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
if (IS_ERR(path)) {
ext4_journal_stop(handle);
@@ -2867,6 +2852,7 @@ again:
}
ee_block = le32_to_cpu(ex->ee_block);
+ ex_end = ee_block + ext4_ext_get_actual_len(ex) - 1;
/*
* See if the last block is inside the extent, if so split
@@ -2874,8 +2860,19 @@ again:
* tail of the first part of the split extent in
* ext4_ext_rm_leaf().
*/
- if (end >= ee_block &&
- end < ee_block + ext4_ext_get_actual_len(ex) - 1) {
+ if (end >= ee_block && end < ex_end) {
+
+ /*
+ * If we're going to split the extent, note that
+ * the cluster containing the block after 'end' is
+ * in use to avoid freeing it when removing blocks.
+ */
+ if (sbi->s_cluster_ratio > 1) {
+ pblk = ext4_ext_pblock(ex) + end - ee_block + 2;
+ partial_cluster =
+ -(long long) EXT4_B2C(sbi, pblk);
+ }
+
/*
* Split the extent in two so that 'end' is the last
* block in the first new extent. Also we should not
@@ -2886,6 +2883,24 @@ again:
end + 1, 1);
if (err < 0)
goto out;
+
+ } else if (sbi->s_cluster_ratio > 1 && end >= ex_end) {
+ /*
+ * If there's an extent to the right its first cluster
+ * contains the immediate right boundary of the
+ * truncated/punched region. Set partial_cluster to
+ * its negative value so it won't be freed if shared
+ * with the current extent. The end < ee_block case
+ * is handled in ext4_ext_rm_leaf().
+ */
+ lblk = ex_end + 1;
+ err = ext4_ext_search_right(inode, path, &lblk, &pblk,
+ &ex);
+ if (err)
+ goto out;
+ if (pblk)
+ partial_cluster =
+ -(long long) EXT4_B2C(sbi, pblk);
}
}
/*
@@ -2996,16 +3011,18 @@ again:
trace_ext4_ext_remove_space_done(inode, start, end, depth,
partial_cluster, path->p_hdr->eh_entries);
- /* If we still have something in the partial cluster and we have removed
+ /*
+ * If we still have something in the partial cluster and we have removed
* even the first extent, then we should free the blocks in the partial
- * cluster as well. */
- if (partial_cluster > 0 && path->p_hdr->eh_entries == 0) {
- int flags = get_default_free_blocks_flags(inode);
-
+ * cluster as well. (This code will only run when there are no leaves
+ * to the immediate left of the truncated/punched region.)
+ */
+ if (partial_cluster > 0 && err == 0) {
+ /* don't zero partial_cluster since it's not used afterwards */
ext4_free_blocks(handle, inode, NULL,
- EXT4_C2B(EXT4_SB(sb), partial_cluster),
- EXT4_SB(sb)->s_cluster_ratio, flags);
- partial_cluster = 0;
+ EXT4_C2B(sbi, partial_cluster),
+ sbi->s_cluster_ratio,
+ get_default_free_blocks_flags(inode));
}
/* TODO: flexible tree reduction should be here */
@@ -4267,6 +4284,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ext4_io_end_t *io = ext4_inode_aio(inode);
ext4_lblk_t cluster_offset;
int set_unwritten = 0;
+ bool map_from_cluster = false;
ext_debug("blocks %u/%u requested for inode %lu\n",
map->m_lblk, map->m_len, inode->i_ino);
@@ -4343,10 +4361,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
}
}
- if ((sbi->s_cluster_ratio > 1) &&
- ext4_find_delalloc_cluster(inode, map->m_lblk))
- map->m_flags |= EXT4_MAP_FROM_CLUSTER;
-
/*
* requested block isn't allocated yet;
* we couldn't try to create block if create flag is zero
@@ -4356,15 +4370,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* put just found gap into cache to speed up
* subsequent requests
*/
- if ((flags & EXT4_GET_BLOCKS_NO_PUT_HOLE) == 0)
- ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
+ ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
goto out2;
}
/*
* Okay, we need to do block allocation.
*/
- map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
newex.ee_block = cpu_to_le32(map->m_lblk);
cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
@@ -4376,7 +4388,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
ar.len = allocated = map->m_len;
newblock = map->m_pblk;
- map->m_flags |= EXT4_MAP_FROM_CLUSTER;
+ map_from_cluster = true;
goto got_allocated_blocks;
}
@@ -4397,7 +4409,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) {
ar.len = allocated = map->m_len;
newblock = map->m_pblk;
- map->m_flags |= EXT4_MAP_FROM_CLUSTER;
+ map_from_cluster = true;
goto got_allocated_blocks;
}
@@ -4523,7 +4535,7 @@ got_allocated_blocks:
*/
reserved_clusters = get_reserved_cluster_alloc(inode,
map->m_lblk, allocated);
- if (map->m_flags & EXT4_MAP_FROM_CLUSTER) {
+ if (map_from_cluster) {
if (reserved_clusters) {
/*
* We have clusters reserved for this range.
@@ -4620,7 +4632,6 @@ out2:
trace_ext4_ext_map_blocks_exit(inode, flags, map,
err ? err : allocated);
- ext4_es_lru_add(inode);
return err ? err : allocated;
}
@@ -5140,7 +5151,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (ext4_has_inline_data(inode)) {
int has_inline = 1;
- error = ext4_inline_data_fiemap(inode, fieinfo, &has_inline);
+ error = ext4_inline_data_fiemap(inode, fieinfo, &has_inline,
+ start, len);
if (has_inline)
return error;
@@ -5179,7 +5191,6 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
error = ext4_fill_fiemap_extents(inode, start_blk,
len_blks, fieinfo);
}
- ext4_es_lru_add(inode);
return error;
}
@@ -5239,8 +5250,6 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
return -EIO;
ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
- if (!ex_last)
- return -EIO;
err = ext4_access_path(handle, inode, path + depth);
if (err)
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 94e7855ae71b..e04d45733976 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -147,10 +147,9 @@ static struct kmem_cache *ext4_es_cachep;
static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t end);
-static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
- int nr_to_scan);
-static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
- struct ext4_inode_info *locked_ei);
+static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
+static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
+ struct ext4_inode_info *locked_ei);
int __init ext4_init_es(void)
{
@@ -298,6 +297,36 @@ out:
trace_ext4_es_find_delayed_extent_range_exit(inode, es);
}
+static void ext4_es_list_add(struct inode *inode)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+ if (!list_empty(&ei->i_es_list))
+ return;
+
+ spin_lock(&sbi->s_es_lock);
+ if (list_empty(&ei->i_es_list)) {
+ list_add_tail(&ei->i_es_list, &sbi->s_es_list);
+ sbi->s_es_nr_inode++;
+ }
+ spin_unlock(&sbi->s_es_lock);
+}
+
+static void ext4_es_list_del(struct inode *inode)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+ spin_lock(&sbi->s_es_lock);
+ if (!list_empty(&ei->i_es_list)) {
+ list_del_init(&ei->i_es_list);
+ sbi->s_es_nr_inode--;
+ WARN_ON_ONCE(sbi->s_es_nr_inode < 0);
+ }
+ spin_unlock(&sbi->s_es_lock);
+}
+
static struct extent_status *
ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
ext4_fsblk_t pblk)
@@ -314,9 +343,10 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
* We don't count delayed extent because we never try to reclaim them
*/
if (!ext4_es_is_delayed(es)) {
- EXT4_I(inode)->i_es_lru_nr++;
+ if (!EXT4_I(inode)->i_es_shk_nr++)
+ ext4_es_list_add(inode);
percpu_counter_inc(&EXT4_SB(inode->i_sb)->
- s_es_stats.es_stats_lru_cnt);
+ s_es_stats.es_stats_shk_cnt);
}
EXT4_I(inode)->i_es_all_nr++;
@@ -330,12 +360,13 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
EXT4_I(inode)->i_es_all_nr--;
percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
- /* Decrease the lru counter when this es is not delayed */
+ /* Decrease the shrink counter when this es is not delayed */
if (!ext4_es_is_delayed(es)) {
- BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
- EXT4_I(inode)->i_es_lru_nr--;
+ BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
+ if (!--EXT4_I(inode)->i_es_shk_nr)
+ ext4_es_list_del(inode);
percpu_counter_dec(&EXT4_SB(inode->i_sb)->
- s_es_stats.es_stats_lru_cnt);
+ s_es_stats.es_stats_shk_cnt);
}
kmem_cache_free(ext4_es_cachep, es);
@@ -351,7 +382,7 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
static int ext4_es_can_be_merged(struct extent_status *es1,
struct extent_status *es2)
{
- if (ext4_es_status(es1) != ext4_es_status(es2))
+ if (ext4_es_type(es1) != ext4_es_type(es2))
return 0;
if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) {
@@ -394,6 +425,8 @@ ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es)
es1 = rb_entry(node, struct extent_status, rb_node);
if (ext4_es_can_be_merged(es1, es)) {
es1->es_len += es->es_len;
+ if (ext4_es_is_referenced(es))
+ ext4_es_set_referenced(es1);
rb_erase(&es->rb_node, &tree->root);
ext4_es_free_extent(inode, es);
es = es1;
@@ -416,6 +449,8 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
es1 = rb_entry(node, struct extent_status, rb_node);
if (ext4_es_can_be_merged(es, es1)) {
es->es_len += es1->es_len;
+ if (ext4_es_is_referenced(es1))
+ ext4_es_set_referenced(es);
rb_erase(node, &tree->root);
ext4_es_free_extent(inode, es1);
}
@@ -683,8 +718,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
goto error;
retry:
err = __es_insert_extent(inode, &newes);
- if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
- EXT4_I(inode)))
+ if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
+ 128, EXT4_I(inode)))
goto retry;
if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
err = 0;
@@ -782,6 +817,8 @@ out:
es->es_lblk = es1->es_lblk;
es->es_len = es1->es_len;
es->es_pblk = es1->es_pblk;
+ if (!ext4_es_is_referenced(es))
+ ext4_es_set_referenced(es);
stats->es_stats_cache_hits++;
} else {
stats->es_stats_cache_misses++;
@@ -841,8 +878,8 @@ retry:
es->es_lblk = orig_es.es_lblk;
es->es_len = orig_es.es_len;
if ((err == -ENOMEM) &&
- __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
- EXT4_I(inode)))
+ __es_shrink(EXT4_SB(inode->i_sb),
+ 128, EXT4_I(inode)))
goto retry;
goto out;
}
@@ -914,6 +951,11 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
end = lblk + len - 1;
BUG_ON(end < lblk);
+ /*
+ * ext4_clear_inode() depends on us taking i_es_lock unconditionally
+ * so that we are sure __es_shrink() is done with the inode before it
+ * is reclaimed.
+ */
write_lock(&EXT4_I(inode)->i_es_lock);
err = __es_remove_extent(inode, lblk, end);
write_unlock(&EXT4_I(inode)->i_es_lock);
@@ -921,114 +963,75 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
return err;
}
-static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a,
- struct list_head *b)
-{
- struct ext4_inode_info *eia, *eib;
- eia = list_entry(a, struct ext4_inode_info, i_es_lru);
- eib = list_entry(b, struct ext4_inode_info, i_es_lru);
-
- if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
- !ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
- return 1;
- if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
- ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
- return -1;
- if (eia->i_touch_when == eib->i_touch_when)
- return 0;
- if (time_after(eia->i_touch_when, eib->i_touch_when))
- return 1;
- else
- return -1;
-}
-
-static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
- struct ext4_inode_info *locked_ei)
+static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
+ struct ext4_inode_info *locked_ei)
{
struct ext4_inode_info *ei;
struct ext4_es_stats *es_stats;
- struct list_head *cur, *tmp;
- LIST_HEAD(skipped);
ktime_t start_time;
u64 scan_time;
+ int nr_to_walk;
int nr_shrunk = 0;
- int retried = 0, skip_precached = 1, nr_skipped = 0;
+ int retried = 0, nr_skipped = 0;
es_stats = &sbi->s_es_stats;
start_time = ktime_get();
- spin_lock(&sbi->s_es_lru_lock);
retry:
- list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
- int shrunk;
-
- /*
- * If we have already reclaimed all extents from extent
- * status tree, just stop the loop immediately.
- */
- if (percpu_counter_read_positive(
- &es_stats->es_stats_lru_cnt) == 0)
- break;
-
- ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
+ spin_lock(&sbi->s_es_lock);
+ nr_to_walk = sbi->s_es_nr_inode;
+ while (nr_to_walk-- > 0) {
+ if (list_empty(&sbi->s_es_list)) {
+ spin_unlock(&sbi->s_es_lock);
+ goto out;
+ }
+ ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info,
+ i_es_list);
+ /* Move the inode to the tail */
+ list_move_tail(&ei->i_es_list, &sbi->s_es_list);
/*
- * Skip the inode that is newer than the last_sorted
- * time. Normally we try hard to avoid shrinking
- * precached inodes, but we will as a last resort.
+ * Normally we try hard to avoid shrinking precached inodes,
+ * but we will as a last resort.
*/
- if ((es_stats->es_stats_last_sorted < ei->i_touch_when) ||
- (skip_precached && ext4_test_inode_state(&ei->vfs_inode,
- EXT4_STATE_EXT_PRECACHED))) {
+ if (!retried && ext4_test_inode_state(&ei->vfs_inode,
+ EXT4_STATE_EXT_PRECACHED)) {
nr_skipped++;
- list_move_tail(cur, &skipped);
continue;
}
- if (ei->i_es_lru_nr == 0 || ei == locked_ei ||
- !write_trylock(&ei->i_es_lock))
+ if (ei == locked_ei || !write_trylock(&ei->i_es_lock)) {
+ nr_skipped++;
continue;
+ }
+ /*
+ * Now we hold i_es_lock which protects us from inode reclaim
+ * freeing inode under us
+ */
+ spin_unlock(&sbi->s_es_lock);
- shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
- if (ei->i_es_lru_nr == 0)
- list_del_init(&ei->i_es_lru);
+ nr_shrunk += es_reclaim_extents(ei, &nr_to_scan);
write_unlock(&ei->i_es_lock);
- nr_shrunk += shrunk;
- nr_to_scan -= shrunk;
- if (nr_to_scan == 0)
- break;
+ if (nr_to_scan <= 0)
+ goto out;
+ spin_lock(&sbi->s_es_lock);
}
-
- /* Move the newer inodes into the tail of the LRU list. */
- list_splice_tail(&skipped, &sbi->s_es_lru);
- INIT_LIST_HEAD(&skipped);
+ spin_unlock(&sbi->s_es_lock);
/*
* If we skipped any inodes, and we weren't able to make any
- * forward progress, sort the list and try again.
+ * forward progress, try again to scan precached inodes.
*/
if ((nr_shrunk == 0) && nr_skipped && !retried) {
retried++;
- list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
- es_stats->es_stats_last_sorted = jiffies;
- ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info,
- i_es_lru);
- /*
- * If there are no non-precached inodes left on the
- * list, start releasing precached extents.
- */
- if (ext4_test_inode_state(&ei->vfs_inode,
- EXT4_STATE_EXT_PRECACHED))
- skip_precached = 0;
goto retry;
}
- spin_unlock(&sbi->s_es_lru_lock);
-
if (locked_ei && nr_shrunk == 0)
- nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
+ nr_shrunk = es_reclaim_extents(locked_ei, &nr_to_scan);
+out:
scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
if (likely(es_stats->es_stats_scan_time))
es_stats->es_stats_scan_time = (scan_time +
@@ -1043,7 +1046,7 @@ retry:
else
es_stats->es_stats_shrunk = nr_shrunk;
- trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached,
+ trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time,
nr_skipped, retried);
return nr_shrunk;
}
@@ -1055,7 +1058,7 @@ static unsigned long ext4_es_count(struct shrinker *shrink,
struct ext4_sb_info *sbi;
sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
- nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt);
+ nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
return nr;
}
@@ -1068,13 +1071,13 @@ static unsigned long ext4_es_scan(struct shrinker *shrink,
int nr_to_scan = sc->nr_to_scan;
int ret, nr_shrunk;
- ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt);
+ ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
if (!nr_to_scan)
return ret;
- nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
+ nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL);
trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
return nr_shrunk;
@@ -1102,28 +1105,24 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v)
return 0;
/* here we just find an inode that has the max nr. of objects */
- spin_lock(&sbi->s_es_lru_lock);
- list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) {
+ spin_lock(&sbi->s_es_lock);
+ list_for_each_entry(ei, &sbi->s_es_list, i_es_list) {
inode_cnt++;
if (max && max->i_es_all_nr < ei->i_es_all_nr)
max = ei;
else if (!max)
max = ei;
}
- spin_unlock(&sbi->s_es_lru_lock);
+ spin_unlock(&sbi->s_es_lock);
seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n",
percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
- percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt));
+ percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt));
seq_printf(seq, " %lu/%lu cache hits/misses\n",
es_stats->es_stats_cache_hits,
es_stats->es_stats_cache_misses);
- if (es_stats->es_stats_last_sorted != 0)
- seq_printf(seq, " %u ms last sorted interval\n",
- jiffies_to_msecs(jiffies -
- es_stats->es_stats_last_sorted));
if (inode_cnt)
- seq_printf(seq, " %d inodes on lru list\n", inode_cnt);
+ seq_printf(seq, " %d inodes on list\n", inode_cnt);
seq_printf(seq, "average:\n %llu us scan time\n",
div_u64(es_stats->es_stats_scan_time, 1000));
@@ -1132,7 +1131,7 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v)
seq_printf(seq,
"maximum:\n %lu inode (%u objects, %u reclaimable)\n"
" %llu us max scan time\n",
- max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr,
+ max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr,
div_u64(es_stats->es_stats_max_scan_time, 1000));
return 0;
@@ -1181,9 +1180,11 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
{
int err;
- INIT_LIST_HEAD(&sbi->s_es_lru);
- spin_lock_init(&sbi->s_es_lru_lock);
- sbi->s_es_stats.es_stats_last_sorted = 0;
+ /* Make sure we have enough bits for physical block number */
+ BUILD_BUG_ON(ES_SHIFT < 48);
+ INIT_LIST_HEAD(&sbi->s_es_list);
+ sbi->s_es_nr_inode = 0;
+ spin_lock_init(&sbi->s_es_lock);
sbi->s_es_stats.es_stats_shrunk = 0;
sbi->s_es_stats.es_stats_cache_hits = 0;
sbi->s_es_stats.es_stats_cache_misses = 0;
@@ -1192,7 +1193,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
if (err)
return err;
- err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, 0, GFP_KERNEL);
+ err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL);
if (err)
goto err1;
@@ -1210,7 +1211,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
return 0;
err2:
- percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt);
+ percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
err1:
percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
return err;
@@ -1221,71 +1222,83 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
if (sbi->s_proc)
remove_proc_entry("es_shrinker_info", sbi->s_proc);
percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
- percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt);
+ percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
unregister_shrinker(&sbi->s_es_shrinker);
}
-void ext4_es_lru_add(struct inode *inode)
+/*
+ * Shrink extents in given inode from ei->i_es_shrink_lblk till end. Scan at
+ * most *nr_to_scan extents, update *nr_to_scan accordingly.
+ *
+ * Return 0 if we hit end of tree / interval, 1 if we exhausted nr_to_scan.
+ * Increment *nr_shrunk by the number of reclaimed extents. Also update
+ * ei->i_es_shrink_lblk to where we should continue scanning.
+ */
+static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
+ int *nr_to_scan, int *nr_shrunk)
{
- struct ext4_inode_info *ei = EXT4_I(inode);
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-
- ei->i_touch_when = jiffies;
-
- if (!list_empty(&ei->i_es_lru))
- return;
+ struct inode *inode = &ei->vfs_inode;
+ struct ext4_es_tree *tree = &ei->i_es_tree;
+ struct extent_status *es;
+ struct rb_node *node;
- spin_lock(&sbi->s_es_lru_lock);
- if (list_empty(&ei->i_es_lru))
- list_add_tail(&ei->i_es_lru, &sbi->s_es_lru);
- spin_unlock(&sbi->s_es_lru_lock);
-}
+ es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk);
+ if (!es)
+ goto out_wrap;
+ node = &es->rb_node;
+ while (*nr_to_scan > 0) {
+ if (es->es_lblk > end) {
+ ei->i_es_shrink_lblk = end + 1;
+ return 0;
+ }
-void ext4_es_lru_del(struct inode *inode)
-{
- struct ext4_inode_info *ei = EXT4_I(inode);
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ (*nr_to_scan)--;
+ node = rb_next(&es->rb_node);
+ /*
+ * We can't reclaim delayed extent from status tree because
+ * fiemap, bigallic, and seek_data/hole need to use it.
+ */
+ if (ext4_es_is_delayed(es))
+ goto next;
+ if (ext4_es_is_referenced(es)) {
+ ext4_es_clear_referenced(es);
+ goto next;
+ }
- spin_lock(&sbi->s_es_lru_lock);
- if (!list_empty(&ei->i_es_lru))
- list_del_init(&ei->i_es_lru);
- spin_unlock(&sbi->s_es_lru_lock);
+ rb_erase(&es->rb_node, &tree->root);
+ ext4_es_free_extent(inode, es);
+ (*nr_shrunk)++;
+next:
+ if (!node)
+ goto out_wrap;
+ es = rb_entry(node, struct extent_status, rb_node);
+ }
+ ei->i_es_shrink_lblk = es->es_lblk;
+ return 1;
+out_wrap:
+ ei->i_es_shrink_lblk = 0;
+ return 0;
}
-static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
- int nr_to_scan)
+static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan)
{
struct inode *inode = &ei->vfs_inode;
- struct ext4_es_tree *tree = &ei->i_es_tree;
- struct rb_node *node;
- struct extent_status *es;
- unsigned long nr_shrunk = 0;
+ int nr_shrunk = 0;
+ ext4_lblk_t start = ei->i_es_shrink_lblk;
static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
- if (ei->i_es_lru_nr == 0)
+ if (ei->i_es_shk_nr == 0)
return 0;
if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
__ratelimit(&_rs))
ext4_warning(inode->i_sb, "forced shrink of precached extents");
- node = rb_first(&tree->root);
- while (node != NULL) {
- es = rb_entry(node, struct extent_status, rb_node);
- node = rb_next(&es->rb_node);
- /*
- * We can't reclaim delayed extent from status tree because
- * fiemap, bigallic, and seek_data/hole need to use it.
- */
- if (!ext4_es_is_delayed(es)) {
- rb_erase(&es->rb_node, &tree->root);
- ext4_es_free_extent(inode, es);
- nr_shrunk++;
- if (--nr_to_scan == 0)
- break;
- }
- }
- tree->cache_es = NULL;
+ if (!es_do_reclaim_extents(ei, EXT_MAX_BLOCKS, nr_to_scan, &nr_shrunk) &&
+ start != 0)
+ es_do_reclaim_extents(ei, start - 1, nr_to_scan, &nr_shrunk);
+
+ ei->i_es_tree.cache_es = NULL;
return nr_shrunk;
}
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index efd5f970b501..691b52613ce4 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -29,25 +29,28 @@
/*
* These flags live in the high bits of extent_status.es_pblk
*/
-#define ES_SHIFT 60
-
-#define EXTENT_STATUS_WRITTEN (1 << 3)
-#define EXTENT_STATUS_UNWRITTEN (1 << 2)
-#define EXTENT_STATUS_DELAYED (1 << 1)
-#define EXTENT_STATUS_HOLE (1 << 0)
+enum {
+ ES_WRITTEN_B,
+ ES_UNWRITTEN_B,
+ ES_DELAYED_B,
+ ES_HOLE_B,
+ ES_REFERENCED_B,
+ ES_FLAGS
+};
-#define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \
- EXTENT_STATUS_UNWRITTEN | \
- EXTENT_STATUS_DELAYED | \
- EXTENT_STATUS_HOLE)
+#define ES_SHIFT (sizeof(ext4_fsblk_t)*8 - ES_FLAGS)
+#define ES_MASK (~((ext4_fsblk_t)0) << ES_SHIFT)
-#define ES_WRITTEN (1ULL << 63)
-#define ES_UNWRITTEN (1ULL << 62)
-#define ES_DELAYED (1ULL << 61)
-#define ES_HOLE (1ULL << 60)
+#define EXTENT_STATUS_WRITTEN (1 << ES_WRITTEN_B)
+#define EXTENT_STATUS_UNWRITTEN (1 << ES_UNWRITTEN_B)
+#define EXTENT_STATUS_DELAYED (1 << ES_DELAYED_B)
+#define EXTENT_STATUS_HOLE (1 << ES_HOLE_B)
+#define EXTENT_STATUS_REFERENCED (1 << ES_REFERENCED_B)
-#define ES_MASK (ES_WRITTEN | ES_UNWRITTEN | \
- ES_DELAYED | ES_HOLE)
+#define ES_TYPE_MASK ((ext4_fsblk_t)(EXTENT_STATUS_WRITTEN | \
+ EXTENT_STATUS_UNWRITTEN | \
+ EXTENT_STATUS_DELAYED | \
+ EXTENT_STATUS_HOLE) << ES_SHIFT)
struct ext4_sb_info;
struct ext4_extent;
@@ -65,14 +68,13 @@ struct ext4_es_tree {
};
struct ext4_es_stats {
- unsigned long es_stats_last_sorted;
unsigned long es_stats_shrunk;
unsigned long es_stats_cache_hits;
unsigned long es_stats_cache_misses;
u64 es_stats_scan_time;
u64 es_stats_max_scan_time;
struct percpu_counter es_stats_all_cnt;
- struct percpu_counter es_stats_lru_cnt;
+ struct percpu_counter es_stats_shk_cnt;
};
extern int __init ext4_init_es(void);
@@ -93,29 +95,49 @@ extern void ext4_es_find_delayed_extent_range(struct inode *inode,
extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
struct extent_status *es);
+static inline unsigned int ext4_es_status(struct extent_status *es)
+{
+ return es->es_pblk >> ES_SHIFT;
+}
+
+static inline unsigned int ext4_es_type(struct extent_status *es)
+{
+ return (es->es_pblk & ES_TYPE_MASK) >> ES_SHIFT;
+}
+
static inline int ext4_es_is_written(struct extent_status *es)
{
- return (es->es_pblk & ES_WRITTEN) != 0;
+ return (ext4_es_type(es) & EXTENT_STATUS_WRITTEN) != 0;
}
static inline int ext4_es_is_unwritten(struct extent_status *es)
{
- return (es->es_pblk & ES_UNWRITTEN) != 0;
+ return (ext4_es_type(es) & EXTENT_STATUS_UNWRITTEN) != 0;
}
static inline int ext4_es_is_delayed(struct extent_status *es)
{
- return (es->es_pblk & ES_DELAYED) != 0;
+ return (ext4_es_type(es) & EXTENT_STATUS_DELAYED) != 0;
}
static inline int ext4_es_is_hole(struct extent_status *es)
{
- return (es->es_pblk & ES_HOLE) != 0;
+ return (ext4_es_type(es) & EXTENT_STATUS_HOLE) != 0;
}
-static inline unsigned int ext4_es_status(struct extent_status *es)
+static inline void ext4_es_set_referenced(struct extent_status *es)
{
- return es->es_pblk >> ES_SHIFT;
+ es->es_pblk |= ((ext4_fsblk_t)EXTENT_STATUS_REFERENCED) << ES_SHIFT;
+}
+
+static inline void ext4_es_clear_referenced(struct extent_status *es)
+{
+ es->es_pblk &= ~(((ext4_fsblk_t)EXTENT_STATUS_REFERENCED) << ES_SHIFT);
+}
+
+static inline int ext4_es_is_referenced(struct extent_status *es)
+{
+ return (ext4_es_status(es) & EXTENT_STATUS_REFERENCED) != 0;
}
static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es)
@@ -135,23 +157,19 @@ static inline void ext4_es_store_pblock(struct extent_status *es,
static inline void ext4_es_store_status(struct extent_status *es,
unsigned int status)
{
- es->es_pblk = (((ext4_fsblk_t)
- (status & EXTENT_STATUS_FLAGS) << ES_SHIFT) |
- (es->es_pblk & ~ES_MASK));
+ es->es_pblk = (((ext4_fsblk_t)status << ES_SHIFT) & ES_MASK) |
+ (es->es_pblk & ~ES_MASK);
}
static inline void ext4_es_store_pblock_status(struct extent_status *es,
ext4_fsblk_t pb,
unsigned int status)
{
- es->es_pblk = (((ext4_fsblk_t)
- (status & EXTENT_STATUS_FLAGS) << ES_SHIFT) |
- (pb & ~ES_MASK));
+ es->es_pblk = (((ext4_fsblk_t)status << ES_SHIFT) & ES_MASK) |
+ (pb & ~ES_MASK);
}
extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi);
extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);
-extern void ext4_es_lru_add(struct inode *inode);
-extern void ext4_es_lru_del(struct inode *inode);
#endif /* _EXT4_EXTENTS_STATUS_H */
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 3ea62695abce..4b143febf21f 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -811,8 +811,11 @@ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
ret = __block_write_begin(page, 0, inline_size,
ext4_da_get_block_prep);
if (ret) {
+ up_read(&EXT4_I(inode)->xattr_sem);
+ unlock_page(page);
+ page_cache_release(page);
ext4_truncate_failed_write(inode);
- goto out;
+ return ret;
}
SetPageDirty(page);
@@ -870,6 +873,12 @@ retry_journal:
goto out_journal;
}
+ /*
+ * We cannot recurse into the filesystem as the transaction
+ * is already started.
+ */
+ flags |= AOP_FLAG_NOFS;
+
if (ret == -ENOSPC) {
ret = ext4_da_convert_inline_data_to_extent(mapping,
inode,
@@ -882,11 +891,6 @@ retry_journal:
goto out;
}
- /*
- * We cannot recurse into the filesystem as the transaction
- * is already started.
- */
- flags |= AOP_FLAG_NOFS;
page = grab_cache_page_write_begin(mapping, 0, flags);
if (!page) {
@@ -1807,11 +1811,12 @@ int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
int ext4_inline_data_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo,
- int *has_inline)
+ int *has_inline, __u64 start, __u64 len)
{
__u64 physical = 0;
- __u64 length;
- __u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_LAST;
+ __u64 inline_len;
+ __u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED |
+ FIEMAP_EXTENT_LAST;
int error = 0;
struct ext4_iloc iloc;
@@ -1820,6 +1825,13 @@ int ext4_inline_data_fiemap(struct inode *inode,
*has_inline = 0;
goto out;
}
+ inline_len = min_t(size_t, ext4_get_inline_size(inode),
+ i_size_read(inode));
+ if (start >= inline_len)
+ goto out;
+ if (start + len < inline_len)
+ inline_len = start + len;
+ inline_len -= start;
error = ext4_get_inode_loc(inode, &iloc);
if (error)
@@ -1828,11 +1840,10 @@ int ext4_inline_data_fiemap(struct inode *inode,
physical = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits;
physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data;
physical += offsetof(struct ext4_inode, i_block);
- length = i_size_read(inode);
if (physical)
- error = fiemap_fill_next_extent(fieinfo, 0, physical,
- length, flags);
+ error = fiemap_fill_next_extent(fieinfo, start, physical,
+ inline_len, flags);
brelse(iloc.bh);
out:
up_read(&EXT4_I(inode)->xattr_sem);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3356ab5395f4..5653fa42930b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -416,11 +416,6 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
}
if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
up_read((&EXT4_I(inode)->i_data_sem));
- /*
- * Clear EXT4_MAP_FROM_CLUSTER and EXT4_MAP_BOUNDARY flag
- * because it shouldn't be marked in es_map->m_flags.
- */
- map->m_flags &= ~(EXT4_MAP_FROM_CLUSTER | EXT4_MAP_BOUNDARY);
/*
* We don't check m_len because extent will be collpased in status
@@ -491,7 +486,6 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
/* Lookup extent status tree firstly */
if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
- ext4_es_lru_add(inode);
if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
map->m_pblk = ext4_es_pblock(&es) +
map->m_lblk - es.es_lblk;
@@ -1393,7 +1387,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
/* Lookup extent status tree firstly */
if (ext4_es_lookup_extent(inode, iblock, &es)) {
- ext4_es_lru_add(inode);
if (ext4_es_is_hole(&es)) {
retval = 0;
down_read(&EXT4_I(inode)->i_data_sem);
@@ -1434,24 +1427,12 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
* file system block.
*/
down_read(&EXT4_I(inode)->i_data_sem);
- if (ext4_has_inline_data(inode)) {
- /*
- * We will soon create blocks for this page, and let
- * us pretend as if the blocks aren't allocated yet.
- * In case of clusters, we have to handle the work
- * of mapping from cluster so that the reserved space
- * is calculated properly.
- */
- if ((EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) &&
- ext4_find_delalloc_cluster(inode, map->m_lblk))
- map->m_flags |= EXT4_MAP_FROM_CLUSTER;
+ if (ext4_has_inline_data(inode))
retval = 0;
- } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
- retval = ext4_ext_map_blocks(NULL, inode, map,
- EXT4_GET_BLOCKS_NO_PUT_HOLE);
+ else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+ retval = ext4_ext_map_blocks(NULL, inode, map, 0);
else
- retval = ext4_ind_map_blocks(NULL, inode, map,
- EXT4_GET_BLOCKS_NO_PUT_HOLE);
+ retval = ext4_ind_map_blocks(NULL, inode, map, 0);
add_delayed:
if (retval == 0) {
@@ -1465,7 +1446,8 @@ add_delayed:
* then we don't need to reserve it again. However we still need
* to reserve metadata for every block we're going to write.
*/
- if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
+ if (EXT4_SB(inode->i_sb)->s_cluster_ratio <= 1 ||
+ !ext4_find_delalloc_cluster(inode, map->m_lblk)) {
ret = ext4_da_reserve_space(inode, iblock);
if (ret) {
/* not enough space to reserve */
@@ -1481,11 +1463,6 @@ add_delayed:
goto out_unlock;
}
- /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
- * and it should not appear on the bh->b_state.
- */
- map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
-
map_bh(bh, inode->i_sb, invalid_block);
set_buffer_new(bh);
set_buffer_delay(bh);
@@ -3643,7 +3620,7 @@ out_stop:
* If this was a simple ftruncate() and the file will remain alive,
* then we need to clear up the orphan record which we created above.
* However, if this was a real unlink then we were called by
- * ext4_delete_inode(), and we allow that function to clean up the
+ * ext4_evict_inode(), and we allow that function to clean up the
* orphan info for us.
*/
if (inode->i_nlink)
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index bfda18a15592..f58a0d106726 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -78,8 +78,6 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2)
memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize));
ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS);
ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS);
- ext4_es_lru_del(inode1);
- ext4_es_lru_del(inode2);
isize = i_size_read(inode1);
i_size_write(inode1, i_size_read(inode2));
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index dbfe15c2533c..8d1e60214ef0 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2358,7 +2358,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
if (sbi->s_group_info) {
memcpy(new_groupinfo, sbi->s_group_info,
sbi->s_group_info_size * sizeof(*sbi->s_group_info));
- ext4_kvfree(sbi->s_group_info);
+ kvfree(sbi->s_group_info);
}
sbi->s_group_info = new_groupinfo;
sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
@@ -2385,7 +2385,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
metalen = sizeof(*meta_group_info) <<
EXT4_DESC_PER_BLOCK_BITS(sb);
- meta_group_info = kmalloc(metalen, GFP_KERNEL);
+ meta_group_info = kmalloc(metalen, GFP_NOFS);
if (meta_group_info == NULL) {
ext4_msg(sb, KERN_ERR, "can't allocate mem "
"for a buddy group");
@@ -2399,7 +2399,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
- meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_KERNEL);
+ meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
if (meta_group_info[i] == NULL) {
ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
goto exit_group_info;
@@ -2428,7 +2428,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
{
struct buffer_head *bh;
meta_group_info[i]->bb_bitmap =
- kmalloc(sb->s_blocksize, GFP_KERNEL);
+ kmalloc(sb->s_blocksize, GFP_NOFS);
BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
bh = ext4_read_block_bitmap(sb, group);
BUG_ON(bh == NULL);
@@ -2495,7 +2495,7 @@ err_freebuddy:
kfree(sbi->s_group_info[i]);
iput(sbi->s_buddy_cache);
err_freesgi:
- ext4_kvfree(sbi->s_group_info);
+ kvfree(sbi->s_group_info);
return -ENOMEM;
}
@@ -2708,12 +2708,11 @@ int ext4_mb_release(struct super_block *sb)
EXT4_DESC_PER_BLOCK_BITS(sb);
for (i = 0; i < num_meta_group_infos; i++)
kfree(sbi->s_group_info[i]);
- ext4_kvfree(sbi->s_group_info);
+ kvfree(sbi->s_group_info);
}
kfree(sbi->s_mb_offsets);
kfree(sbi->s_mb_maxs);
- if (sbi->s_buddy_cache)
- iput(sbi->s_buddy_cache);
+ iput(sbi->s_buddy_cache);
if (sbi->s_mb_stats) {
ext4_msg(sb, KERN_INFO,
"mballoc: %u blocks %u reqs (%u success)",
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index a432634f2e6a..3cb267aee802 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -592,7 +592,7 @@ err_out:
/*
* set the i_blocks count to zero
- * so that the ext4_delete_inode does the
+ * so that the ext4_evict_inode() does the
* right job
*
* We don't need to take the i_lock because
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 9f2311bc9c4f..370420bfae8d 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -267,12 +267,12 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
handle_t *handle;
ext4_lblk_t orig_blk_offset, donor_blk_offset;
unsigned long blocksize = orig_inode->i_sb->s_blocksize;
- unsigned int w_flags = 0;
unsigned int tmp_data_size, data_size, replaced_size;
int err2, jblocks, retries = 0;
int replaced_count = 0;
int from = data_offset_in_page << orig_inode->i_blkbits;
int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
+ struct super_block *sb = orig_inode->i_sb;
/*
* It needs twice the amount of ordinary journal buffers because
@@ -287,9 +287,6 @@ again:
return 0;
}
- if (segment_eq(get_fs(), KERNEL_DS))
- w_flags |= AOP_FLAG_UNINTERRUPTIBLE;
-
orig_blk_offset = orig_page_offset * blocks_per_page +
data_offset_in_page;
@@ -405,10 +402,13 @@ unlock_pages:
page_cache_release(pagep[1]);
stop_journal:
ext4_journal_stop(handle);
+ if (*err == -ENOSPC &&
+ ext4_should_retry_alloc(sb, &retries))
+ goto again;
/* Buffer was busy because probably is pinned to journal transaction,
* force transaction commit may help to free it. */
- if (*err == -EBUSY && ext4_should_retry_alloc(orig_inode->i_sb,
- &retries))
+ if (*err == -EBUSY && retries++ < 4 && EXT4_SB(sb)->s_journal &&
+ jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal))
goto again;
return replaced_count;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 426211882f72..2291923dae4e 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2814,7 +2814,6 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
ext4_orphan_add(handle, inode);
inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
- retval = 0;
end_unlink:
brelse(bh);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index ca4588388fc3..8a8ec6293b19 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -24,6 +24,18 @@ int ext4_resize_begin(struct super_block *sb)
return -EPERM;
/*
+ * If we are not using the primary superblock/GDT copy don't resize,
+ * because the user tools have no way of handling this. Probably a
+ * bad time to do it anyways.
+ */
+ if (EXT4_SB(sb)->s_sbh->b_blocknr !=
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
+ ext4_warning(sb, "won't resize using backup superblock at %llu",
+ (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
+ return -EPERM;
+ }
+
+ /*
* We are not allowed to do online-resizing on a filesystem mounted
* with error, because it can destroy the filesystem easily.
*/
@@ -758,18 +770,6 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
"EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
gdb_num);
- /*
- * If we are not using the primary superblock/GDT copy don't resize,
- * because the user tools have no way of handling this. Probably a
- * bad time to do it anyways.
- */
- if (EXT4_SB(sb)->s_sbh->b_blocknr !=
- le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
- ext4_warning(sb, "won't resize using backup superblock at %llu",
- (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
- return -EPERM;
- }
-
gdb_bh = sb_bread(sb, gdblock);
if (!gdb_bh)
return -EIO;
@@ -856,7 +856,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
n_group_desc[gdb_num] = gdb_bh;
EXT4_SB(sb)->s_group_desc = n_group_desc;
EXT4_SB(sb)->s_gdb_count++;
- ext4_kvfree(o_group_desc);
+ kvfree(o_group_desc);
le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
err = ext4_handle_dirty_super(handle, sb);
@@ -866,7 +866,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
return err;
exit_inode:
- ext4_kvfree(n_group_desc);
+ kvfree(n_group_desc);
brelse(iloc.bh);
exit_dind:
brelse(dind);
@@ -909,7 +909,7 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
n_group_desc[gdb_num] = gdb_bh;
EXT4_SB(sb)->s_group_desc = n_group_desc;
EXT4_SB(sb)->s_gdb_count++;
- ext4_kvfree(o_group_desc);
+ kvfree(o_group_desc);
BUFFER_TRACE(gdb_bh, "get_write_access");
err = ext4_journal_get_write_access(handle, gdb_bh);
if (unlikely(err))
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2c9e6864abd9..74c5f53595fb 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -176,15 +176,6 @@ void *ext4_kvzalloc(size_t size, gfp_t flags)
return ret;
}
-void ext4_kvfree(void *ptr)
-{
- if (is_vmalloc_addr(ptr))
- vfree(ptr);
- else
- kfree(ptr);
-
-}
-
ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
struct ext4_group_desc *bg)
{
@@ -811,8 +802,8 @@ static void ext4_put_super(struct super_block *sb)
for (i = 0; i < sbi->s_gdb_count; i++)
brelse(sbi->s_group_desc[i]);
- ext4_kvfree(sbi->s_group_desc);
- ext4_kvfree(sbi->s_flex_groups);
+ kvfree(sbi->s_group_desc);
+ kvfree(sbi->s_flex_groups);
percpu_counter_destroy(&sbi->s_freeclusters_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -880,10 +871,10 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
spin_lock_init(&ei->i_prealloc_lock);
ext4_es_init_tree(&ei->i_es_tree);
rwlock_init(&ei->i_es_lock);
- INIT_LIST_HEAD(&ei->i_es_lru);
+ INIT_LIST_HEAD(&ei->i_es_list);
ei->i_es_all_nr = 0;
- ei->i_es_lru_nr = 0;
- ei->i_touch_when = 0;
+ ei->i_es_shk_nr = 0;
+ ei->i_es_shrink_lblk = 0;
ei->i_reserved_data_blocks = 0;
ei->i_reserved_meta_blocks = 0;
ei->i_allocated_meta_blocks = 0;
@@ -892,6 +883,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
spin_lock_init(&(ei->i_block_reservation_lock));
#ifdef CONFIG_QUOTA
ei->i_reserved_quota = 0;
+ memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
#endif
ei->jinode = NULL;
INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
@@ -972,7 +964,6 @@ void ext4_clear_inode(struct inode *inode)
dquot_drop(inode);
ext4_discard_preallocations(inode);
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
- ext4_es_lru_del(inode);
if (EXT4_I(inode)->jinode) {
jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
EXT4_I(inode)->jinode);
@@ -1068,6 +1059,11 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
unsigned int flags);
static int ext4_enable_quotas(struct super_block *sb);
+static struct dquot **ext4_get_dquots(struct inode *inode)
+{
+ return EXT4_I(inode)->i_dquot;
+}
+
static const struct dquot_operations ext4_quota_operations = {
.get_reserved_space = ext4_get_reserved_space,
.write_dquot = ext4_write_dquot,
@@ -1117,6 +1113,7 @@ static const struct super_operations ext4_sops = {
#ifdef CONFIG_QUOTA
.quota_read = ext4_quota_read,
.quota_write = ext4_quota_write,
+ .get_dquots = ext4_get_dquots,
#endif
.bdev_try_to_free_page = bdev_try_to_free_page,
};
@@ -1146,7 +1143,7 @@ enum {
Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
- Opt_max_dir_size_kb,
+ Opt_max_dir_size_kb, Opt_nojournal_checksum,
};
static const match_table_t tokens = {
@@ -1180,6 +1177,7 @@ static const match_table_t tokens = {
{Opt_journal_dev, "journal_dev=%u"},
{Opt_journal_path, "journal_path=%s"},
{Opt_journal_checksum, "journal_checksum"},
+ {Opt_nojournal_checksum, "nojournal_checksum"},
{Opt_journal_async_commit, "journal_async_commit"},
{Opt_abort, "abort"},
{Opt_data_journal, "data=journal"},
@@ -1361,6 +1359,8 @@ static const struct mount_opts {
MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
{Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
MOPT_EXT4_ONLY | MOPT_CLEAR},
+ {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
+ MOPT_EXT4_ONLY | MOPT_CLEAR},
{Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
MOPT_EXT4_ONLY | MOPT_SET},
{Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
@@ -1702,6 +1702,12 @@ static int parse_options(char *options, struct super_block *sb,
return 0;
}
}
+ if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
+ test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with journal_async_commit "
+ "in data=ordered mode");
+ return 0;
+ }
return 1;
}
@@ -1939,7 +1945,7 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
memcpy(new_groups, sbi->s_flex_groups,
(sbi->s_flex_groups_allocated *
sizeof(struct flex_groups)));
- ext4_kvfree(sbi->s_flex_groups);
+ kvfree(sbi->s_flex_groups);
}
sbi->s_flex_groups = new_groups;
sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
@@ -3310,7 +3316,7 @@ int ext4_calculate_overhead(struct super_block *sb)
struct ext4_super_block *es = sbi->s_es;
ext4_group_t i, ngroups = ext4_get_groups_count(sb);
ext4_fsblk_t overhead = 0;
- char *buf = (char *) get_zeroed_page(GFP_KERNEL);
+ char *buf = (char *) get_zeroed_page(GFP_NOFS);
if (!buf)
return -ENOMEM;
@@ -3338,8 +3344,8 @@ int ext4_calculate_overhead(struct super_block *sb)
memset(buf, 0, PAGE_SIZE);
cond_resched();
}
- /* Add the journal blocks as well */
- if (sbi->s_journal)
+ /* Add the internal journal blocks as well */
+ if (sbi->s_journal && !sbi->journal_bdev)
overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
sbi->s_overhead = overhead;
@@ -3476,7 +3482,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
- ext4_warning(sb, KERN_INFO "metadata_csum and uninit_bg are "
+ ext4_warning(sb, "metadata_csum and uninit_bg are "
"redundant flags; please run fsck.");
/* Check for a known checksum algorithm */
@@ -3932,6 +3938,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sb->s_qcop = &ext4_qctl_sysfile_operations;
else
sb->s_qcop = &ext4_qctl_operations;
+ sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
#endif
memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
@@ -4224,7 +4231,7 @@ failed_mount7:
failed_mount6:
ext4_mb_release(sb);
if (sbi->s_flex_groups)
- ext4_kvfree(sbi->s_flex_groups);
+ kvfree(sbi->s_flex_groups);
percpu_counter_destroy(&sbi->s_freeclusters_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -4253,7 +4260,7 @@ failed_mount3:
failed_mount2:
for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]);
- ext4_kvfree(sbi->s_group_desc);
+ kvfree(sbi->s_group_desc);
failed_mount:
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
@@ -4854,6 +4861,14 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
+ if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
+ test_opt(sb, JOURNAL_CHECKSUM)) {
+ ext4_msg(sb, KERN_ERR, "changing journal_checksum "
+ "during remount not supported");
+ err = -EINVAL;
+ goto restore_opts;
+ }
+
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
if (test_opt2(sb, EXPLICIT_DELALLOC)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 83b9b5a8d112..1ccb26bc2a0b 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -162,7 +162,8 @@ fail:
return ERR_PTR(-EINVAL);
}
-struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
+static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type,
+ struct page *dpage)
{
int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT;
void *value = NULL;
@@ -172,12 +173,13 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
if (type == ACL_TYPE_ACCESS)
name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
- retval = f2fs_getxattr(inode, name_index, "", NULL, 0);
+ retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dpage);
if (retval > 0) {
value = kmalloc(retval, GFP_F2FS_ZERO);
if (!value)
return ERR_PTR(-ENOMEM);
- retval = f2fs_getxattr(inode, name_index, "", value, retval);
+ retval = f2fs_getxattr(inode, name_index, "", value,
+ retval, dpage);
}
if (retval > 0)
@@ -194,6 +196,11 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
return acl;
}
+struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
+{
+ return __f2fs_get_acl(inode, type, NULL);
+}
+
static int __f2fs_set_acl(struct inode *inode, int type,
struct posix_acl *acl, struct page *ipage)
{
@@ -229,7 +236,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
if (acl) {
value = f2fs_acl_to_disk(acl, &size);
if (IS_ERR(value)) {
- cond_clear_inode_flag(fi, FI_ACL_MODE);
+ clear_inode_flag(fi, FI_ACL_MODE);
return (int)PTR_ERR(value);
}
}
@@ -240,7 +247,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
if (!error)
set_cached_acl(inode, type, acl);
- cond_clear_inode_flag(fi, FI_ACL_MODE);
+ clear_inode_flag(fi, FI_ACL_MODE);
return error;
}
@@ -249,12 +256,137 @@ int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
return __f2fs_set_acl(inode, type, acl, NULL);
}
-int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage)
+/*
+ * Most part of f2fs_acl_clone, f2fs_acl_create_masq, f2fs_acl_create
+ * are copied from posix_acl.c
+ */
+static struct posix_acl *f2fs_acl_clone(const struct posix_acl *acl,
+ gfp_t flags)
+{
+ struct posix_acl *clone = NULL;
+
+ if (acl) {
+ int size = sizeof(struct posix_acl) + acl->a_count *
+ sizeof(struct posix_acl_entry);
+ clone = kmemdup(acl, size, flags);
+ if (clone)
+ atomic_set(&clone->a_refcount, 1);
+ }
+ return clone;
+}
+
+static int f2fs_acl_create_masq(struct posix_acl *acl, umode_t *mode_p)
+{
+ struct posix_acl_entry *pa, *pe;
+ struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL;
+ umode_t mode = *mode_p;
+ int not_equiv = 0;
+
+ /* assert(atomic_read(acl->a_refcount) == 1); */
+
+ FOREACH_ACL_ENTRY(pa, acl, pe) {
+ switch(pa->e_tag) {
+ case ACL_USER_OBJ:
+ pa->e_perm &= (mode >> 6) | ~S_IRWXO;
+ mode &= (pa->e_perm << 6) | ~S_IRWXU;
+ break;
+
+ case ACL_USER:
+ case ACL_GROUP:
+ not_equiv = 1;
+ break;
+
+ case ACL_GROUP_OBJ:
+ group_obj = pa;
+ break;
+
+ case ACL_OTHER:
+ pa->e_perm &= mode | ~S_IRWXO;
+ mode &= pa->e_perm | ~S_IRWXO;
+ break;
+
+ case ACL_MASK:
+ mask_obj = pa;
+ not_equiv = 1;
+ break;
+
+ default:
+ return -EIO;
+ }
+ }
+
+ if (mask_obj) {
+ mask_obj->e_perm &= (mode >> 3) | ~S_IRWXO;
+ mode &= (mask_obj->e_perm << 3) | ~S_IRWXG;
+ } else {
+ if (!group_obj)
+ return -EIO;
+ group_obj->e_perm &= (mode >> 3) | ~S_IRWXO;
+ mode &= (group_obj->e_perm << 3) | ~S_IRWXG;
+ }
+
+ *mode_p = (*mode_p & ~S_IRWXUGO) | mode;
+ return not_equiv;
+}
+
+static int f2fs_acl_create(struct inode *dir, umode_t *mode,
+ struct posix_acl **default_acl, struct posix_acl **acl,
+ struct page *dpage)
+{
+ struct posix_acl *p;
+ int ret;
+
+ if (S_ISLNK(*mode) || !IS_POSIXACL(dir))
+ goto no_acl;
+
+ p = __f2fs_get_acl(dir, ACL_TYPE_DEFAULT, dpage);
+ if (IS_ERR(p)) {
+ if (p == ERR_PTR(-EOPNOTSUPP))
+ goto apply_umask;
+ return PTR_ERR(p);
+ }
+
+ if (!p)
+ goto apply_umask;
+
+ *acl = f2fs_acl_clone(p, GFP_NOFS);
+ if (!*acl)
+ return -ENOMEM;
+
+ ret = f2fs_acl_create_masq(*acl, mode);
+ if (ret < 0) {
+ posix_acl_release(*acl);
+ return -ENOMEM;
+ }
+
+ if (ret == 0) {
+ posix_acl_release(*acl);
+ *acl = NULL;
+ }
+
+ if (!S_ISDIR(*mode)) {
+ posix_acl_release(p);
+ *default_acl = NULL;
+ } else {
+ *default_acl = p;
+ }
+ return 0;
+
+apply_umask:
+ *mode &= ~current_umask();
+no_acl:
+ *default_acl = NULL;
+ *acl = NULL;
+ return 0;
+}
+
+int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
+ struct page *dpage)
{
- struct posix_acl *default_acl, *acl;
+ struct posix_acl *default_acl = NULL, *acl = NULL;
int error = 0;
- error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
+ error = f2fs_acl_create(dir, &inode->i_mode, &default_acl, &acl, dpage);
if (error)
return error;
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
index e0864651cdc1..997ca8edb6cb 100644
--- a/fs/f2fs/acl.h
+++ b/fs/f2fs/acl.h
@@ -38,14 +38,15 @@ struct f2fs_acl_header {
extern struct posix_acl *f2fs_get_acl(struct inode *, int);
extern int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
-extern int f2fs_init_acl(struct inode *, struct inode *, struct page *);
+extern int f2fs_init_acl(struct inode *, struct inode *, struct page *,
+ struct page *);
#else
#define f2fs_check_acl NULL
#define f2fs_get_acl NULL
#define f2fs_set_acl NULL
static inline int f2fs_init_acl(struct inode *inode, struct inode *dir,
- struct page *page)
+ struct page *ipage, struct page *dpage)
{
return 0;
}
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index dd10a031c052..e6c271fefaca 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -72,36 +72,36 @@ out:
return page;
}
-struct page *get_meta_page_ra(struct f2fs_sb_info *sbi, pgoff_t index)
-{
- bool readahead = false;
- struct page *page;
-
- page = find_get_page(META_MAPPING(sbi), index);
- if (!page || (page && !PageUptodate(page)))
- readahead = true;
- f2fs_put_page(page, 0);
-
- if (readahead)
- ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR);
- return get_meta_page(sbi, index);
-}
-
-static inline block_t get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
+static inline bool is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
{
switch (type) {
case META_NAT:
- return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK;
+ break;
case META_SIT:
- return SIT_BLK_CNT(sbi);
+ if (unlikely(blkaddr >= SIT_BLK_CNT(sbi)))
+ return false;
+ break;
case META_SSA:
+ if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) ||
+ blkaddr < SM_I(sbi)->ssa_blkaddr))
+ return false;
+ break;
case META_CP:
- return 0;
+ if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr ||
+ blkaddr < __start_cp_addr(sbi)))
+ return false;
+ break;
case META_POR:
- return MAX_BLKADDR(sbi);
+ if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
+ blkaddr < MAIN_BLKADDR(sbi)))
+ return false;
+ break;
default:
BUG();
}
+
+ return true;
}
/*
@@ -112,7 +112,6 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
block_t prev_blk_addr = 0;
struct page *page;
block_t blkno = start;
- block_t max_blks = get_max_meta_blks(sbi, type);
struct f2fs_io_info fio = {
.type = META,
@@ -122,18 +121,20 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
for (; nrpages-- > 0; blkno++) {
block_t blk_addr;
+ if (!is_valid_blkaddr(sbi, blkno, type))
+ goto out;
+
switch (type) {
case META_NAT:
- /* get nat block addr */
- if (unlikely(blkno >= max_blks))
+ if (unlikely(blkno >=
+ NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid)))
blkno = 0;
+ /* get nat block addr */
blk_addr = current_nat_addr(sbi,
blkno * NAT_ENTRY_PER_BLOCK);
break;
case META_SIT:
/* get sit block addr */
- if (unlikely(blkno >= max_blks))
- goto out;
blk_addr = current_sit_addr(sbi,
blkno * SIT_ENTRY_PER_BLOCK);
if (blkno != start && prev_blk_addr + 1 != blk_addr)
@@ -143,10 +144,6 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
case META_SSA:
case META_CP:
case META_POR:
- if (unlikely(blkno >= max_blks))
- goto out;
- if (unlikely(blkno < SEG0_BLKADDR(sbi)))
- goto out;
blk_addr = blkno;
break;
default:
@@ -169,6 +166,20 @@ out:
return blkno - start;
}
+void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
+{
+ struct page *page;
+ bool readahead = false;
+
+ page = find_get_page(META_MAPPING(sbi), index);
+ if (!page || (page && !PageUptodate(page)))
+ readahead = true;
+ f2fs_put_page(page, 0);
+
+ if (readahead)
+ ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR);
+}
+
static int f2fs_write_meta_page(struct page *page,
struct writeback_control *wbc)
{
@@ -178,7 +189,7 @@ static int f2fs_write_meta_page(struct page *page,
if (unlikely(sbi->por_doing))
goto redirty_out;
- if (wbc->for_reclaim)
+ if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
goto redirty_out;
if (unlikely(f2fs_cp_error(sbi)))
goto redirty_out;
@@ -187,6 +198,9 @@ static int f2fs_write_meta_page(struct page *page,
write_meta_page(sbi, page);
dec_page_count(sbi, F2FS_DIRTY_META);
unlock_page(page);
+
+ if (wbc->for_reclaim)
+ f2fs_submit_merged_bio(sbi, META, WRITE);
return 0;
redirty_out:
@@ -298,46 +312,57 @@ const struct address_space_operations f2fs_meta_aops = {
static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
+ struct inode_management *im = &sbi->im[type];
struct ino_entry *e;
retry:
- spin_lock(&sbi->ino_lock[type]);
+ if (radix_tree_preload(GFP_NOFS)) {
+ cond_resched();
+ goto retry;
+ }
+
+ spin_lock(&im->ino_lock);
- e = radix_tree_lookup(&sbi->ino_root[type], ino);
+ e = radix_tree_lookup(&im->ino_root, ino);
if (!e) {
e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
if (!e) {
- spin_unlock(&sbi->ino_lock[type]);
+ spin_unlock(&im->ino_lock);
+ radix_tree_preload_end();
goto retry;
}
- if (radix_tree_insert(&sbi->ino_root[type], ino, e)) {
- spin_unlock(&sbi->ino_lock[type]);
+ if (radix_tree_insert(&im->ino_root, ino, e)) {
+ spin_unlock(&im->ino_lock);
kmem_cache_free(ino_entry_slab, e);
+ radix_tree_preload_end();
goto retry;
}
memset(e, 0, sizeof(struct ino_entry));
e->ino = ino;
- list_add_tail(&e->list, &sbi->ino_list[type]);
+ list_add_tail(&e->list, &im->ino_list);
+ if (type != ORPHAN_INO)
+ im->ino_num++;
}
- spin_unlock(&sbi->ino_lock[type]);
+ spin_unlock(&im->ino_lock);
+ radix_tree_preload_end();
}
static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
+ struct inode_management *im = &sbi->im[type];
struct ino_entry *e;
- spin_lock(&sbi->ino_lock[type]);
- e = radix_tree_lookup(&sbi->ino_root[type], ino);
+ spin_lock(&im->ino_lock);
+ e = radix_tree_lookup(&im->ino_root, ino);
if (e) {
list_del(&e->list);
- radix_tree_delete(&sbi->ino_root[type], ino);
- if (type == ORPHAN_INO)
- sbi->n_orphans--;
- spin_unlock(&sbi->ino_lock[type]);
+ radix_tree_delete(&im->ino_root, ino);
+ im->ino_num--;
+ spin_unlock(&im->ino_lock);
kmem_cache_free(ino_entry_slab, e);
return;
}
- spin_unlock(&sbi->ino_lock[type]);
+ spin_unlock(&im->ino_lock);
}
void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
@@ -355,10 +380,12 @@ void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
/* mode should be APPEND_INO or UPDATE_INO */
bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
{
+ struct inode_management *im = &sbi->im[mode];
struct ino_entry *e;
- spin_lock(&sbi->ino_lock[mode]);
- e = radix_tree_lookup(&sbi->ino_root[mode], ino);
- spin_unlock(&sbi->ino_lock[mode]);
+
+ spin_lock(&im->ino_lock);
+ e = radix_tree_lookup(&im->ino_root, ino);
+ spin_unlock(&im->ino_lock);
return e ? true : false;
}
@@ -368,36 +395,42 @@ void release_dirty_inode(struct f2fs_sb_info *sbi)
int i;
for (i = APPEND_INO; i <= UPDATE_INO; i++) {
- spin_lock(&sbi->ino_lock[i]);
- list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) {
+ struct inode_management *im = &sbi->im[i];
+
+ spin_lock(&im->ino_lock);
+ list_for_each_entry_safe(e, tmp, &im->ino_list, list) {
list_del(&e->list);
- radix_tree_delete(&sbi->ino_root[i], e->ino);
+ radix_tree_delete(&im->ino_root, e->ino);
kmem_cache_free(ino_entry_slab, e);
+ im->ino_num--;
}
- spin_unlock(&sbi->ino_lock[i]);
+ spin_unlock(&im->ino_lock);
}
}
int acquire_orphan_inode(struct f2fs_sb_info *sbi)
{
+ struct inode_management *im = &sbi->im[ORPHAN_INO];
int err = 0;
- spin_lock(&sbi->ino_lock[ORPHAN_INO]);
- if (unlikely(sbi->n_orphans >= sbi->max_orphans))
+ spin_lock(&im->ino_lock);
+ if (unlikely(im->ino_num >= sbi->max_orphans))
err = -ENOSPC;
else
- sbi->n_orphans++;
- spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
+ im->ino_num++;
+ spin_unlock(&im->ino_lock);
return err;
}
void release_orphan_inode(struct f2fs_sb_info *sbi)
{
- spin_lock(&sbi->ino_lock[ORPHAN_INO]);
- f2fs_bug_on(sbi, sbi->n_orphans == 0);
- sbi->n_orphans--;
- spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
+ struct inode_management *im = &sbi->im[ORPHAN_INO];
+
+ spin_lock(&im->ino_lock);
+ f2fs_bug_on(sbi, im->ino_num == 0);
+ im->ino_num--;
+ spin_unlock(&im->ino_lock);
}
void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@@ -460,17 +493,19 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
struct f2fs_orphan_block *orphan_blk = NULL;
unsigned int nentries = 0;
unsigned short index;
- unsigned short orphan_blocks =
- (unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans);
+ unsigned short orphan_blocks;
struct page *page = NULL;
struct ino_entry *orphan = NULL;
+ struct inode_management *im = &sbi->im[ORPHAN_INO];
+
+ orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num);
for (index = 0; index < orphan_blocks; index++)
grab_meta_page(sbi, start_blk + index);
index = 1;
- spin_lock(&sbi->ino_lock[ORPHAN_INO]);
- head = &sbi->ino_list[ORPHAN_INO];
+ spin_lock(&im->ino_lock);
+ head = &im->ino_list;
/* loop for each orphan inode entry and write them in Jornal block */
list_for_each_entry(orphan, head, list) {
@@ -510,7 +545,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
f2fs_put_page(page, 1);
}
- spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
+ spin_unlock(&im->ino_lock);
}
static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
@@ -731,6 +766,9 @@ void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
struct dir_inode_entry *entry;
struct inode *inode;
retry:
+ if (unlikely(f2fs_cp_error(sbi)))
+ return;
+
spin_lock(&sbi->dir_inode_lock);
head = &sbi->dir_inode_list;
@@ -830,6 +868,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
struct f2fs_nm_info *nm_i = NM_I(sbi);
+ unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
nid_t last_nid = nm_i->next_scan_nid;
block_t start_blk;
struct page *cp_page;
@@ -889,7 +928,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
else
clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
- orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans);
+ orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
orphan_blocks);
@@ -905,7 +944,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
orphan_blocks);
}
- if (sbi->n_orphans)
+ if (orphan_num)
set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
else
clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
@@ -940,7 +979,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_put_page(cp_page, 1);
}
- if (sbi->n_orphans) {
+ if (orphan_num) {
write_orphan_inodes(sbi, start_blk);
start_blk += orphan_blocks;
}
@@ -975,6 +1014,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* Here, we only have one bio having CP pack */
sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
+ /* wait for previous submitted meta pages writeback */
+ wait_on_all_pages_writeback(sbi);
+
release_dirty_inode(sbi);
if (unlikely(f2fs_cp_error(sbi)))
@@ -1036,9 +1078,12 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
int i;
for (i = 0; i < MAX_INO_ENTRY; i++) {
- INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC);
- spin_lock_init(&sbi->ino_lock[i]);
- INIT_LIST_HEAD(&sbi->ino_list[i]);
+ struct inode_management *im = &sbi->im[i];
+
+ INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC);
+ spin_lock_init(&im->ino_lock);
+ INIT_LIST_HEAD(&im->ino_list);
+ im->ino_num = 0;
}
/*
@@ -1047,7 +1092,6 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
* orphan entries with the limitation one reserved segment
* for cp pack we can have max 1020*504 orphan entries
*/
- sbi->n_orphans = 0;
sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK;
}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8e58c4cc2cb9..7ec697b37f19 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -61,11 +61,6 @@ static void f2fs_write_end_io(struct bio *bio, int err)
dec_page_count(sbi, F2FS_WRITEBACK);
}
- if (sbi->wait_io) {
- complete(sbi->wait_io);
- sbi->wait_io = NULL;
- }
-
if (!get_pages(sbi, F2FS_WRITEBACK) &&
!list_empty(&sbi->cp_wait.task_list))
wake_up(&sbi->cp_wait);
@@ -95,34 +90,18 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
static void __submit_merged_bio(struct f2fs_bio_info *io)
{
struct f2fs_io_info *fio = &io->fio;
- int rw;
if (!io->bio)
return;
- rw = fio->rw;
-
- if (is_read_io(rw)) {
- trace_f2fs_submit_read_bio(io->sbi->sb, rw,
- fio->type, io->bio);
- submit_bio(rw, io->bio);
- } else {
- trace_f2fs_submit_write_bio(io->sbi->sb, rw,
- fio->type, io->bio);
- /*
- * META_FLUSH is only from the checkpoint procedure, and we
- * should wait this metadata bio for FS consistency.
- */
- if (fio->type == META_FLUSH) {
- DECLARE_COMPLETION_ONSTACK(wait);
- io->sbi->wait_io = &wait;
- submit_bio(rw, io->bio);
- wait_for_completion(&wait);
- } else {
- submit_bio(rw, io->bio);
- }
- }
+ if (is_read_io(fio->rw))
+ trace_f2fs_submit_read_bio(io->sbi->sb, fio->rw,
+ fio->type, io->bio);
+ else
+ trace_f2fs_submit_write_bio(io->sbi->sb, fio->rw,
+ fio->type, io->bio);
+ submit_bio(fio->rw, io->bio);
io->bio = NULL;
}
@@ -257,9 +236,6 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
bool need_put = dn->inode_page ? false : true;
int err;
- /* if inode_page exists, index should be zero */
- f2fs_bug_on(F2FS_I_SB(dn->inode), !need_put && index);
-
err = get_dnode_of_data(dn, index, ALLOC_NODE);
if (err)
return err;
@@ -740,14 +716,14 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
static int f2fs_read_data_page(struct file *file, struct page *page)
{
struct inode *inode = page->mapping->host;
- int ret;
+ int ret = -EAGAIN;
trace_f2fs_readpage(page, DATA);
/* If the file has inline data, try to read it directly */
if (f2fs_has_inline_data(inode))
ret = f2fs_read_inline_data(inode, page);
- else
+ if (ret == -EAGAIN)
ret = mpage_readpage(page, get_data_block);
return ret;
@@ -859,10 +835,11 @@ write:
else if (has_not_enough_free_secs(sbi, 0))
goto redirty_out;
+ err = -EAGAIN;
f2fs_lock_op(sbi);
- if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode))
- err = f2fs_write_inline_data(inode, page, offset);
- else
+ if (f2fs_has_inline_data(inode))
+ err = f2fs_write_inline_data(inode, page);
+ if (err == -EAGAIN)
err = do_write_data_page(page, &fio);
f2fs_unlock_op(sbi);
done:
@@ -951,7 +928,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
{
struct inode *inode = mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct page *page;
+ struct page *page, *ipage;
pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
struct dnode_of_data dn;
int err = 0;
@@ -959,45 +936,60 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
trace_f2fs_write_begin(inode, pos, len, flags);
f2fs_balance_fs(sbi);
-repeat:
- err = f2fs_convert_inline_data(inode, pos + len, NULL);
- if (err)
- goto fail;
+ /*
+ * We should check this at this moment to avoid deadlock on inode page
+ * and #0 page. The locking rule for inline_data conversion should be:
+ * lock_page(page #0) -> lock_page(inode_page)
+ */
+ if (index != 0) {
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ goto fail;
+ }
+repeat:
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) {
err = -ENOMEM;
goto fail;
}
- /* to avoid latency during memory pressure */
- unlock_page(page);
-
*pagep = page;
- if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA)
- goto inline_data;
-
f2fs_lock_op(sbi);
- set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = f2fs_reserve_block(&dn, index);
- f2fs_unlock_op(sbi);
- if (err) {
- f2fs_put_page(page, 0);
- goto fail;
- }
-inline_data:
- lock_page(page);
- if (unlikely(page->mapping != mapping)) {
- f2fs_put_page(page, 1);
- goto repeat;
+
+ /* check inline_data */
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage)) {
+ err = PTR_ERR(ipage);
+ goto unlock_fail;
}
- f2fs_wait_on_page_writeback(page, DATA);
+ set_new_dnode(&dn, inode, ipage, ipage, 0);
+
+ if (f2fs_has_inline_data(inode)) {
+ if (pos + len <= MAX_INLINE_DATA) {
+ read_inline_data(page, ipage);
+ set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+ sync_inode_page(&dn);
+ goto put_next;
+ }
+ err = f2fs_convert_inline_page(&dn, page);
+ if (err)
+ goto put_fail;
+ }
+ err = f2fs_reserve_block(&dn, index);
+ if (err)
+ goto put_fail;
+put_next:
+ f2fs_put_dnode(&dn);
+ f2fs_unlock_op(sbi);
if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
return 0;
+ f2fs_wait_on_page_writeback(page, DATA);
+
if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
unsigned start = pos & (PAGE_CACHE_SIZE - 1);
unsigned end = start + len;
@@ -1010,18 +1002,10 @@ inline_data:
if (dn.data_blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
} else {
- if (f2fs_has_inline_data(inode)) {
- err = f2fs_read_inline_data(inode, page);
- if (err) {
- page_cache_release(page);
- goto fail;
- }
- } else {
- err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
- READ_SYNC);
- if (err)
- goto fail;
- }
+ err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
+ READ_SYNC);
+ if (err)
+ goto fail;
lock_page(page);
if (unlikely(!PageUptodate(page))) {
@@ -1038,6 +1022,12 @@ out:
SetPageUptodate(page);
clear_cold_data(page);
return 0;
+
+put_fail:
+ f2fs_put_dnode(&dn);
+unlock_fail:
+ f2fs_unlock_op(sbi);
+ f2fs_put_page(page, 1);
fail:
f2fs_write_failed(mapping, pos + len);
return err;
@@ -1052,10 +1042,7 @@ static int f2fs_write_end(struct file *file,
trace_f2fs_write_end(inode, pos, len, copied);
- if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
- register_inmem_page(inode, page);
- else
- set_page_dirty(page);
+ set_page_dirty(page);
if (pos + copied > i_size_read(inode)) {
i_size_write(inode, pos + copied);
@@ -1093,9 +1080,12 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
size_t count = iov_iter_count(iter);
int err;
- /* Let buffer I/O handle the inline data case. */
- if (f2fs_has_inline_data(inode))
- return 0;
+ /* we don't need to use inline_data strictly */
+ if (f2fs_has_inline_data(inode)) {
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
+ }
if (check_direct_IO(inode, rw, iter, offset))
return 0;
@@ -1119,6 +1109,9 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE)
return;
+ if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
+ invalidate_inmem_page(inode, page);
+
if (PageDirty(page))
inode_dec_dirty_pages(inode);
ClearPagePrivate(page);
@@ -1138,6 +1131,12 @@ static int f2fs_set_data_page_dirty(struct page *page)
trace_f2fs_set_page_dirty(page, DATA);
SetPageUptodate(page);
+
+ if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) {
+ register_inmem_page(inode, page);
+ return 1;
+ }
+
mark_inode_dirty(inode);
if (!PageDirty(page)) {
@@ -1152,9 +1151,12 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
{
struct inode *inode = mapping->host;
- if (f2fs_has_inline_data(inode))
- return 0;
-
+ /* we don't need to use inline_data strictly */
+ if (f2fs_has_inline_data(inode)) {
+ int err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
+ }
return generic_block_bmap(mapping, block, get_data_block);
}
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 0a91ab813a9e..91e8f699ab30 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -39,13 +39,15 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
si->ndirty_dirs = sbi->n_dirty_dirs;
si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
+ si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
si->rsvd_segs = reserved_segments(sbi);
si->overp_segs = overprovision_segments(sbi);
si->valid_count = valid_user_blocks(sbi);
si->valid_node_count = valid_node_count(sbi);
si->valid_inode_count = valid_inode_count(sbi);
- si->inline_inode = sbi->inline_inode;
+ si->inline_inode = atomic_read(&sbi->inline_inode);
+ si->inline_dir = atomic_read(&sbi->inline_dir);
si->utilization = utilization(sbi);
si->free_segs = free_segments(sbi);
@@ -118,6 +120,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
unsigned npages;
+ int i;
if (si->base_mem)
goto get_cache;
@@ -167,8 +170,9 @@ get_cache:
si->cache_mem += npages << PAGE_CACHE_SHIFT;
npages = META_MAPPING(sbi)->nrpages;
si->cache_mem += npages << PAGE_CACHE_SHIFT;
- si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry);
si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
+ for (i = 0; i <= UPDATE_INO; i++)
+ si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
}
static int stat_show(struct seq_file *s, void *v)
@@ -200,6 +204,8 @@ static int stat_show(struct seq_file *s, void *v)
si->valid_count - si->valid_node_count);
seq_printf(s, " - Inline_data Inode: %u\n",
si->inline_inode);
+ seq_printf(s, " - Inline_dentry Inode: %u\n",
+ si->inline_dir);
seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
si->main_area_segs, si->main_area_sections,
si->main_area_zones);
@@ -244,6 +250,8 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, "\nExtent Hit Ratio: %d / %d\n",
si->hit_ext, si->total_ext);
seq_puts(s, "\nBalancing F2FS Async:\n");
+ seq_printf(s, " - inmem: %4d\n",
+ si->inmem_pages);
seq_printf(s, " - nodes: %4d in %4d\n",
si->ndirty_node, si->node_pages);
seq_printf(s, " - dents: %4d in dirs:%4d\n",
@@ -321,6 +329,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
si->sbi = sbi;
sbi->stat_info = si;
+ atomic_set(&sbi->inline_inode, 0);
+ atomic_set(&sbi->inline_dir, 0);
+
mutex_lock(&f2fs_stat_mutex);
list_add_tail(&si->stat_list, &f2fs_stat_list);
mutex_unlock(&f2fs_stat_mutex);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index b54f87149c09..b1a7d5737cd0 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -37,7 +37,7 @@ static unsigned int bucket_blocks(unsigned int level)
return 4;
}
-static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = {
+unsigned char f2fs_filetype_table[F2FS_FT_MAX] = {
[F2FS_FT_UNKNOWN] = DT_UNKNOWN,
[F2FS_FT_REG_FILE] = DT_REG,
[F2FS_FT_DIR] = DT_DIR,
@@ -59,7 +59,7 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
[S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK,
};
-static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
+void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
{
umode_t mode = inode->i_mode;
de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
@@ -90,51 +90,70 @@ static bool early_match_name(size_t namelen, f2fs_hash_t namehash,
}
static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
- struct qstr *name, int *max_slots,
- f2fs_hash_t namehash, struct page **res_page)
+ struct qstr *name, int *max_slots,
+ struct page **res_page)
+{
+ struct f2fs_dentry_block *dentry_blk;
+ struct f2fs_dir_entry *de;
+ struct f2fs_dentry_ptr d;
+
+ dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page);
+
+ make_dentry_ptr(&d, (void *)dentry_blk, 1);
+ de = find_target_dentry(name, max_slots, &d);
+
+ if (de)
+ *res_page = dentry_page;
+ else
+ kunmap(dentry_page);
+
+ /*
+ * For the most part, it should be a bug when name_len is zero.
+ * We stop here for figuring out where the bugs has occurred.
+ */
+ f2fs_bug_on(F2FS_P_SB(dentry_page), d.max < 0);
+ return de;
+}
+
+struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots,
+ struct f2fs_dentry_ptr *d)
{
struct f2fs_dir_entry *de;
unsigned long bit_pos = 0;
- struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
- const void *dentry_bits = &dentry_blk->dentry_bitmap;
+ f2fs_hash_t namehash = f2fs_dentry_hash(name);
int max_len = 0;
- while (bit_pos < NR_DENTRY_IN_BLOCK) {
- if (!test_bit_le(bit_pos, dentry_bits)) {
+ if (max_slots)
+ *max_slots = 0;
+ while (bit_pos < d->max) {
+ if (!test_bit_le(bit_pos, d->bitmap)) {
if (bit_pos == 0)
max_len = 1;
- else if (!test_bit_le(bit_pos - 1, dentry_bits))
+ else if (!test_bit_le(bit_pos - 1, d->bitmap))
max_len++;
bit_pos++;
continue;
}
- de = &dentry_blk->dentry[bit_pos];
- if (early_match_name(name->len, namehash, de)) {
- if (!memcmp(dentry_blk->filename[bit_pos],
- name->name,
- name->len)) {
- *res_page = dentry_page;
- goto found;
- }
- }
- if (max_len > *max_slots) {
+ de = &d->dentry[bit_pos];
+ if (early_match_name(name->len, namehash, de) &&
+ !memcmp(d->filename[bit_pos], name->name, name->len))
+ goto found;
+
+ if (max_slots && *max_slots >= 0 && max_len > *max_slots) {
*max_slots = max_len;
max_len = 0;
}
- /*
- * For the most part, it should be a bug when name_len is zero.
- * We stop here for figuring out where the bugs has occurred.
- */
- f2fs_bug_on(F2FS_P_SB(dentry_page), !de->name_len);
+ /* remain bug on condition */
+ if (unlikely(!de->name_len))
+ d->max = -1;
bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
}
de = NULL;
- kunmap(dentry_page);
found:
- if (max_len > *max_slots)
+ if (max_slots && max_len > *max_slots)
*max_slots = max_len;
return de;
}
@@ -149,7 +168,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
struct page *dentry_page;
struct f2fs_dir_entry *de = NULL;
bool room = false;
- int max_slots = 0;
+ int max_slots;
f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH);
@@ -168,8 +187,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
continue;
}
- de = find_in_block(dentry_page, name, &max_slots,
- namehash, res_page);
+ de = find_in_block(dentry_page, name, &max_slots, res_page);
if (de)
break;
@@ -201,6 +219,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
unsigned int max_depth;
unsigned int level;
+ if (f2fs_has_inline_dentry(dir))
+ return find_in_inline_dir(dir, child, res_page);
+
if (npages == 0)
return NULL;
@@ -227,6 +248,9 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p)
struct f2fs_dir_entry *de;
struct f2fs_dentry_block *dentry_blk;
+ if (f2fs_has_inline_dentry(dir))
+ return f2fs_parent_inline_dir(dir, p);
+
page = get_lock_data_page(dir, 0);
if (IS_ERR(page))
return NULL;
@@ -247,7 +271,7 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr)
de = f2fs_find_entry(dir, qstr, &page);
if (de) {
res = le32_to_cpu(de->ino);
- kunmap(page);
+ f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
}
@@ -257,11 +281,13 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr)
void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
struct page *page, struct inode *inode)
{
+ enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA;
lock_page(page);
- f2fs_wait_on_page_writeback(page, DATA);
+ f2fs_wait_on_page_writeback(page, type);
de->ino = cpu_to_le32(inode->i_ino);
set_de_type(de, inode);
- kunmap(page);
+ if (!f2fs_has_inline_dentry(dir))
+ kunmap(page);
set_page_dirty(page);
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
mark_inode_dirty(dir);
@@ -296,36 +322,48 @@ int update_dent_inode(struct inode *inode, const struct qstr *name)
return 0;
}
-static int make_empty_dir(struct inode *inode,
- struct inode *parent, struct page *page)
+void do_make_empty_dir(struct inode *inode, struct inode *parent,
+ struct f2fs_dentry_ptr *d)
{
- struct page *dentry_page;
- struct f2fs_dentry_block *dentry_blk;
struct f2fs_dir_entry *de;
- dentry_page = get_new_data_page(inode, page, 0, true);
- if (IS_ERR(dentry_page))
- return PTR_ERR(dentry_page);
-
-
- dentry_blk = kmap_atomic(dentry_page);
-
- de = &dentry_blk->dentry[0];
+ de = &d->dentry[0];
de->name_len = cpu_to_le16(1);
de->hash_code = 0;
de->ino = cpu_to_le32(inode->i_ino);
- memcpy(dentry_blk->filename[0], ".", 1);
+ memcpy(d->filename[0], ".", 1);
set_de_type(de, inode);
- de = &dentry_blk->dentry[1];
+ de = &d->dentry[1];
de->hash_code = 0;
de->name_len = cpu_to_le16(2);
de->ino = cpu_to_le32(parent->i_ino);
- memcpy(dentry_blk->filename[1], "..", 2);
+ memcpy(d->filename[1], "..", 2);
set_de_type(de, inode);
- test_and_set_bit_le(0, &dentry_blk->dentry_bitmap);
- test_and_set_bit_le(1, &dentry_blk->dentry_bitmap);
+ test_and_set_bit_le(0, (void *)d->bitmap);
+ test_and_set_bit_le(1, (void *)d->bitmap);
+}
+
+static int make_empty_dir(struct inode *inode,
+ struct inode *parent, struct page *page)
+{
+ struct page *dentry_page;
+ struct f2fs_dentry_block *dentry_blk;
+ struct f2fs_dentry_ptr d;
+
+ if (f2fs_has_inline_dentry(inode))
+ return make_empty_inline_dir(inode, parent, page);
+
+ dentry_page = get_new_data_page(inode, page, 0, true);
+ if (IS_ERR(dentry_page))
+ return PTR_ERR(dentry_page);
+
+ dentry_blk = kmap_atomic(dentry_page);
+
+ make_dentry_ptr(&d, (void *)dentry_blk, 1);
+ do_make_empty_dir(inode, parent, &d);
+
kunmap_atomic(dentry_blk);
set_page_dirty(dentry_page);
@@ -333,8 +371,8 @@ static int make_empty_dir(struct inode *inode,
return 0;
}
-static struct page *init_inode_metadata(struct inode *inode,
- struct inode *dir, const struct qstr *name)
+struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
+ const struct qstr *name, struct page *dpage)
{
struct page *page;
int err;
@@ -350,7 +388,7 @@ static struct page *init_inode_metadata(struct inode *inode,
goto error;
}
- err = f2fs_init_acl(inode, dir, page);
+ err = f2fs_init_acl(inode, dir, page, dpage);
if (err)
goto put_error;
@@ -395,7 +433,7 @@ error:
return ERR_PTR(err);
}
-static void update_parent_metadata(struct inode *dir, struct inode *inode,
+void update_parent_metadata(struct inode *dir, struct inode *inode,
unsigned int current_depth)
{
if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
@@ -417,27 +455,23 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode,
clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
}
-static int room_for_filename(struct f2fs_dentry_block *dentry_blk, int slots)
+int room_for_filename(const void *bitmap, int slots, int max_slots)
{
int bit_start = 0;
int zero_start, zero_end;
next:
- zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap,
- NR_DENTRY_IN_BLOCK,
- bit_start);
- if (zero_start >= NR_DENTRY_IN_BLOCK)
- return NR_DENTRY_IN_BLOCK;
+ zero_start = find_next_zero_bit_le(bitmap, max_slots, bit_start);
+ if (zero_start >= max_slots)
+ return max_slots;
- zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap,
- NR_DENTRY_IN_BLOCK,
- zero_start);
+ zero_end = find_next_bit_le(bitmap, max_slots, zero_start);
if (zero_end - zero_start >= slots)
return zero_start;
bit_start = zero_end + 1;
- if (zero_end + 1 >= NR_DENTRY_IN_BLOCK)
- return NR_DENTRY_IN_BLOCK;
+ if (zero_end + 1 >= max_slots)
+ return max_slots;
goto next;
}
@@ -463,6 +497,14 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
int err = 0;
int i;
+ if (f2fs_has_inline_dentry(dir)) {
+ err = f2fs_add_inline_entry(dir, name, inode);
+ if (!err || err != -EAGAIN)
+ return err;
+ else
+ err = 0;
+ }
+
dentry_hash = f2fs_dentry_hash(name);
level = 0;
current_depth = F2FS_I(dir)->i_current_depth;
@@ -491,7 +533,8 @@ start:
return PTR_ERR(dentry_page);
dentry_blk = kmap(dentry_page);
- bit_pos = room_for_filename(dentry_blk, slots);
+ bit_pos = room_for_filename(&dentry_blk->dentry_bitmap,
+ slots, NR_DENTRY_IN_BLOCK);
if (bit_pos < NR_DENTRY_IN_BLOCK)
goto add_dentry;
@@ -506,7 +549,7 @@ add_dentry:
f2fs_wait_on_page_writeback(dentry_page, DATA);
down_write(&F2FS_I(inode)->i_sem);
- page = init_inode_metadata(inode, dir, name);
+ page = init_inode_metadata(inode, dir, name, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
@@ -545,7 +588,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
int err = 0;
down_write(&F2FS_I(inode)->i_sem);
- page = init_inode_metadata(inode, dir, NULL);
+ page = init_inode_metadata(inode, dir, NULL, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
@@ -560,26 +603,57 @@ fail:
return err;
}
+void f2fs_drop_nlink(struct inode *dir, struct inode *inode, struct page *page)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+
+ down_write(&F2FS_I(inode)->i_sem);
+
+ if (S_ISDIR(inode->i_mode)) {
+ drop_nlink(dir);
+ if (page)
+ update_inode(dir, page);
+ else
+ update_inode_page(dir);
+ }
+ inode->i_ctime = CURRENT_TIME;
+
+ drop_nlink(inode);
+ if (S_ISDIR(inode->i_mode)) {
+ drop_nlink(inode);
+ i_size_write(inode, 0);
+ }
+ up_write(&F2FS_I(inode)->i_sem);
+ update_inode_page(inode);
+
+ if (inode->i_nlink == 0)
+ add_orphan_inode(sbi, inode->i_ino);
+ else
+ release_orphan_inode(sbi);
+}
+
/*
* It only removes the dentry from the dentry page, corresponding name
* entry in name page does not need to be touched during deletion.
*/
void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
- struct inode *inode)
+ struct inode *dir, struct inode *inode)
{
struct f2fs_dentry_block *dentry_blk;
unsigned int bit_pos;
- struct inode *dir = page->mapping->host;
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
int i;
+ if (f2fs_has_inline_dentry(dir))
+ return f2fs_delete_inline_entry(dentry, page, dir, inode);
+
lock_page(page);
f2fs_wait_on_page_writeback(page, DATA);
dentry_blk = page_address(page);
bit_pos = dentry - dentry_blk->dentry;
for (i = 0; i < slots; i++)
- test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
+ clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
/* Let's check and deallocate this dentry page */
bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
@@ -590,29 +664,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
- if (inode) {
- struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
-
- down_write(&F2FS_I(inode)->i_sem);
-
- if (S_ISDIR(inode->i_mode)) {
- drop_nlink(dir);
- update_inode_page(dir);
- }
- inode->i_ctime = CURRENT_TIME;
- drop_nlink(inode);
- if (S_ISDIR(inode->i_mode)) {
- drop_nlink(inode);
- i_size_write(inode, 0);
- }
- up_write(&F2FS_I(inode)->i_sem);
- update_inode_page(inode);
-
- if (inode->i_nlink == 0)
- add_orphan_inode(sbi, inode->i_ino);
- else
- release_orphan_inode(sbi);
- }
+ if (inode)
+ f2fs_drop_nlink(dir, inode, NULL);
if (bit_pos == NR_DENTRY_IN_BLOCK) {
truncate_hole(dir, page->index, page->index + 1);
@@ -628,9 +681,12 @@ bool f2fs_empty_dir(struct inode *dir)
unsigned long bidx;
struct page *dentry_page;
unsigned int bit_pos;
- struct f2fs_dentry_block *dentry_blk;
+ struct f2fs_dentry_block *dentry_blk;
unsigned long nblock = dir_blocks(dir);
+ if (f2fs_has_inline_dentry(dir))
+ return f2fs_empty_inline_dir(dir);
+
for (bidx = 0; bidx < nblock; bidx++) {
dentry_page = get_lock_data_page(dir, bidx);
if (IS_ERR(dentry_page)) {
@@ -640,7 +696,6 @@ bool f2fs_empty_dir(struct inode *dir)
return false;
}
-
dentry_blk = kmap_atomic(dentry_page);
if (bidx == 0)
bit_pos = 2;
@@ -659,19 +714,48 @@ bool f2fs_empty_dir(struct inode *dir)
return true;
}
+bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
+ unsigned int start_pos)
+{
+ unsigned char d_type = DT_UNKNOWN;
+ unsigned int bit_pos;
+ struct f2fs_dir_entry *de = NULL;
+
+ bit_pos = ((unsigned long)ctx->pos % d->max);
+
+ while (bit_pos < d->max) {
+ bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos);
+ if (bit_pos >= d->max)
+ break;
+
+ de = &d->dentry[bit_pos];
+ if (de->file_type < F2FS_FT_MAX)
+ d_type = f2fs_filetype_table[de->file_type];
+ else
+ d_type = DT_UNKNOWN;
+ if (!dir_emit(ctx, d->filename[bit_pos],
+ le16_to_cpu(de->name_len),
+ le32_to_cpu(de->ino), d_type))
+ return true;
+
+ bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
+ ctx->pos = start_pos + bit_pos;
+ }
+ return false;
+}
+
static int f2fs_readdir(struct file *file, struct dir_context *ctx)
{
struct inode *inode = file_inode(file);
unsigned long npages = dir_blocks(inode);
- unsigned int bit_pos = 0;
struct f2fs_dentry_block *dentry_blk = NULL;
- struct f2fs_dir_entry *de = NULL;
struct page *dentry_page = NULL;
struct file_ra_state *ra = &file->f_ra;
unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK);
- unsigned char d_type = DT_UNKNOWN;
+ struct f2fs_dentry_ptr d;
- bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK);
+ if (f2fs_has_inline_dentry(inode))
+ return f2fs_read_inline_dir(file, ctx);
/* readahead for multi pages of dir */
if (npages - n > 1 && !ra_has_index(ra, n))
@@ -684,28 +768,12 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
continue;
dentry_blk = kmap(dentry_page);
- while (bit_pos < NR_DENTRY_IN_BLOCK) {
- bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
- NR_DENTRY_IN_BLOCK,
- bit_pos);
- if (bit_pos >= NR_DENTRY_IN_BLOCK)
- break;
-
- de = &dentry_blk->dentry[bit_pos];
- if (de->file_type < F2FS_FT_MAX)
- d_type = f2fs_filetype_table[de->file_type];
- else
- d_type = DT_UNKNOWN;
- if (!dir_emit(ctx,
- dentry_blk->filename[bit_pos],
- le16_to_cpu(de->name_len),
- le32_to_cpu(de->ino), d_type))
- goto stop;
- bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
- ctx->pos = n * NR_DENTRY_IN_BLOCK + bit_pos;
- }
- bit_pos = 0;
+ make_dentry_ptr(&d, (void *)dentry_blk, 1);
+
+ if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK))
+ goto stop;
+
ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK;
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 8171e80b2ee9..ec58bb2373fc 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -46,8 +46,10 @@
#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040
#define F2FS_MOUNT_INLINE_XATTR 0x00000080
#define F2FS_MOUNT_INLINE_DATA 0x00000100
-#define F2FS_MOUNT_FLUSH_MERGE 0x00000200
-#define F2FS_MOUNT_NOBARRIER 0x00000400
+#define F2FS_MOUNT_INLINE_DENTRY 0x00000200
+#define F2FS_MOUNT_FLUSH_MERGE 0x00000400
+#define F2FS_MOUNT_NOBARRIER 0x00000800
+#define F2FS_MOUNT_FASTBOOT 0x00001000
#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -211,6 +213,32 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
/*
* For INODE and NODE manager
*/
+/* for directory operations */
+struct f2fs_dentry_ptr {
+ const void *bitmap;
+ struct f2fs_dir_entry *dentry;
+ __u8 (*filename)[F2FS_SLOT_LEN];
+ int max;
+};
+
+static inline void make_dentry_ptr(struct f2fs_dentry_ptr *d,
+ void *src, int type)
+{
+ if (type == 1) {
+ struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src;
+ d->max = NR_DENTRY_IN_BLOCK;
+ d->bitmap = &t->dentry_bitmap;
+ d->dentry = t->dentry;
+ d->filename = t->filename;
+ } else {
+ struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src;
+ d->max = NR_INLINE_DENTRY;
+ d->bitmap = &t->dentry_bitmap;
+ d->dentry = t->dentry;
+ d->filename = t->filename;
+ }
+}
+
/*
* XATTR_NODE_OFFSET stores xattrs to one node block per file keeping -1
* as its node offset to distinguish from index node blocks.
@@ -269,6 +297,7 @@ struct f2fs_inode_info {
struct extent_info ext; /* in-memory extent cache entry */
struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */
+ struct radix_tree_root inmem_root; /* radix tree for inmem pages */
struct list_head inmem_pages; /* inmemory pages managed by f2fs */
struct mutex inmem_lock; /* lock for inmemory pages */
};
@@ -303,7 +332,7 @@ struct f2fs_nm_info {
/* NAT cache management */
struct radix_tree_root nat_root;/* root of the nat entry cache */
struct radix_tree_root nat_set_root;/* root of the nat set cache */
- rwlock_t nat_tree_lock; /* protect nat_tree_lock */
+ struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */
struct list_head nat_entries; /* cached nat entry list (clean) */
unsigned int nat_cnt; /* the # of cached nat entries */
unsigned int dirty_nat_cnt; /* total num of nat entries in set */
@@ -433,6 +462,7 @@ enum count_type {
F2FS_DIRTY_DENTS,
F2FS_DIRTY_NODES,
F2FS_DIRTY_META,
+ F2FS_INMEM_PAGES,
NR_COUNT_TYPE,
};
@@ -470,6 +500,14 @@ struct f2fs_bio_info {
struct rw_semaphore io_rwsem; /* blocking op for bio */
};
+/* for inner inode cache management */
+struct inode_management {
+ struct radix_tree_root ino_root; /* ino entry array */
+ spinlock_t ino_lock; /* for ino entry lock */
+ struct list_head ino_list; /* inode list head */
+ unsigned long ino_num; /* number of entries */
+};
+
struct f2fs_sb_info {
struct super_block *sb; /* pointer to VFS super block */
struct proc_dir_entry *s_proc; /* proc entry */
@@ -488,7 +526,6 @@ struct f2fs_sb_info {
/* for bio operations */
struct f2fs_bio_info read_io; /* for read bios */
struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */
- struct completion *wait_io; /* for completion bios */
/* for checkpoint */
struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
@@ -500,13 +537,9 @@ struct f2fs_sb_info {
bool por_doing; /* recovery is doing or not */
wait_queue_head_t cp_wait;
- /* for inode management */
- struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */
- spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */
- struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */
+ struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
/* for orphan inode, use 0'th array */
- unsigned int n_orphans; /* # of orphan inodes */
unsigned int max_orphans; /* max orphan inodes */
/* for directory inode management */
@@ -557,7 +590,8 @@ struct f2fs_sb_info {
unsigned int segment_count[2]; /* # of allocated segments */
unsigned int block_count[2]; /* # of allocated blocks */
int total_hit_ext, read_hit_ext; /* extent cache hit ratio */
- int inline_inode; /* # of inline_data inodes */
+ atomic_t inline_inode; /* # of inline_data inodes */
+ atomic_t inline_dir; /* # of inline_dentry inodes */
int bg_gc; /* background gc calls */
unsigned int n_dirty_dirs; /* # of dir inodes */
#endif
@@ -988,6 +1022,13 @@ retry:
return entry;
}
+static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
+ unsigned long index, void *item)
+{
+ while (radix_tree_insert(root, index, item))
+ cond_resched();
+}
+
#define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino)
static inline bool IS_INODE(struct page *page)
@@ -1020,7 +1061,7 @@ static inline int f2fs_test_bit(unsigned int nr, char *addr)
return mask & *addr;
}
-static inline int f2fs_set_bit(unsigned int nr, char *addr)
+static inline int f2fs_test_and_set_bit(unsigned int nr, char *addr)
{
int mask;
int ret;
@@ -1032,7 +1073,7 @@ static inline int f2fs_set_bit(unsigned int nr, char *addr)
return ret;
}
-static inline int f2fs_clear_bit(unsigned int nr, char *addr)
+static inline int f2fs_test_and_clear_bit(unsigned int nr, char *addr)
{
int mask;
int ret;
@@ -1044,6 +1085,15 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr)
return ret;
}
+static inline void f2fs_change_bit(unsigned int nr, char *addr)
+{
+ int mask;
+
+ addr += (nr >> 3);
+ mask = 1 << (7 - (nr & 0x07));
+ *addr ^= mask;
+}
+
/* used for f2fs_inode_info->flags */
enum {
FI_NEW_INODE, /* indicate newly allocated inode */
@@ -1057,11 +1107,13 @@ enum {
FI_NO_EXTENT, /* not to use the extent cache */
FI_INLINE_XATTR, /* used for inline xattr */
FI_INLINE_DATA, /* used for inline data*/
+ FI_INLINE_DENTRY, /* used for inline dentry */
FI_APPEND_WRITE, /* inode has appended data */
FI_UPDATE_WRITE, /* inode has in-place-update data */
FI_NEED_IPU, /* used for ipu per file */
FI_ATOMIC_FILE, /* indicate atomic file */
FI_VOLATILE_FILE, /* indicate volatile file */
+ FI_DATA_EXIST, /* indicate data exists */
};
static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
@@ -1087,15 +1139,6 @@ static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode)
set_inode_flag(fi, FI_ACL_MODE);
}
-static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag)
-{
- if (is_inode_flag_set(fi, FI_ACL_MODE)) {
- clear_inode_flag(fi, FI_ACL_MODE);
- return 1;
- }
- return 0;
-}
-
static inline void get_inline_info(struct f2fs_inode_info *fi,
struct f2fs_inode *ri)
{
@@ -1103,6 +1146,10 @@ static inline void get_inline_info(struct f2fs_inode_info *fi,
set_inode_flag(fi, FI_INLINE_XATTR);
if (ri->i_inline & F2FS_INLINE_DATA)
set_inode_flag(fi, FI_INLINE_DATA);
+ if (ri->i_inline & F2FS_INLINE_DENTRY)
+ set_inode_flag(fi, FI_INLINE_DENTRY);
+ if (ri->i_inline & F2FS_DATA_EXIST)
+ set_inode_flag(fi, FI_DATA_EXIST);
}
static inline void set_raw_inline(struct f2fs_inode_info *fi,
@@ -1114,6 +1161,10 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi,
ri->i_inline |= F2FS_INLINE_XATTR;
if (is_inode_flag_set(fi, FI_INLINE_DATA))
ri->i_inline |= F2FS_INLINE_DATA;
+ if (is_inode_flag_set(fi, FI_INLINE_DENTRY))
+ ri->i_inline |= F2FS_INLINE_DENTRY;
+ if (is_inode_flag_set(fi, FI_DATA_EXIST))
+ ri->i_inline |= F2FS_DATA_EXIST;
}
static inline int f2fs_has_inline_xattr(struct inode *inode)
@@ -1148,6 +1199,17 @@ static inline int f2fs_has_inline_data(struct inode *inode)
return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA);
}
+static inline void f2fs_clear_inline_inode(struct inode *inode)
+{
+ clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ clear_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+}
+
+static inline int f2fs_exist_data(struct inode *inode)
+{
+ return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST);
+}
+
static inline bool f2fs_is_atomic_file(struct inode *inode)
{
return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE);
@@ -1164,6 +1226,23 @@ static inline void *inline_data_addr(struct page *page)
return (void *)&(ri->i_addr[1]);
}
+static inline int f2fs_has_inline_dentry(struct inode *inode)
+{
+ return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY);
+}
+
+static inline void *inline_dentry_addr(struct page *page)
+{
+ struct f2fs_inode *ri = F2FS_INODE(page);
+ return (void *)&(ri->i_addr[1]);
+}
+
+static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page)
+{
+ if (!f2fs_has_inline_dentry(dir))
+ kunmap(page);
+}
+
static inline int f2fs_readonly(struct super_block *sb)
{
return sb->s_flags & MS_RDONLY;
@@ -1224,6 +1303,19 @@ struct dentry *f2fs_get_parent(struct dentry *child);
/*
* dir.c
*/
+extern unsigned char f2fs_filetype_table[F2FS_FT_MAX];
+void set_de_type(struct f2fs_dir_entry *, struct inode *);
+struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *,
+ struct f2fs_dentry_ptr *);
+bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
+ unsigned int);
+void do_make_empty_dir(struct inode *, struct inode *,
+ struct f2fs_dentry_ptr *);
+struct page *init_inode_metadata(struct inode *, struct inode *,
+ const struct qstr *, struct page *);
+void update_parent_metadata(struct inode *, struct inode *, unsigned int);
+int room_for_filename(const void *, int, int);
+void f2fs_drop_nlink(struct inode *, struct inode *, struct page *);
struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *,
struct page **);
struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **);
@@ -1232,7 +1324,8 @@ void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
struct page *, struct inode *);
int update_dent_inode(struct inode *, const struct qstr *);
int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *);
-void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *);
+void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *,
+ struct inode *);
int f2fs_do_tmpfile(struct inode *, struct inode *);
int f2fs_make_empty(struct inode *, struct inode *);
bool f2fs_empty_dir(struct inode *);
@@ -1296,6 +1389,7 @@ void destroy_node_manager_caches(void);
* segment.c
*/
void register_inmem_page(struct inode *, struct page *);
+void invalidate_inmem_page(struct inode *, struct page *);
void commit_inmem_pages(struct inode *, bool);
void f2fs_balance_fs(struct f2fs_sb_info *);
void f2fs_balance_fs_bg(struct f2fs_sb_info *);
@@ -1337,8 +1431,8 @@ void destroy_segment_manager_caches(void);
*/
struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
-struct page *get_meta_page_ra(struct f2fs_sb_info *, pgoff_t);
int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int);
+void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t);
long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
@@ -1405,7 +1499,7 @@ struct f2fs_stat_info {
int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
int nats, sits, fnids;
int total_count, utilization;
- int bg_gc, inline_inode;
+ int bg_gc, inline_inode, inline_dir, inmem_pages;
unsigned int valid_count, valid_node_count, valid_inode_count;
unsigned int bimodal, avg_vblocks;
int util_free, util_valid, util_invalid;
@@ -1438,14 +1532,23 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
#define stat_inc_inline_inode(inode) \
do { \
if (f2fs_has_inline_data(inode)) \
- ((F2FS_I_SB(inode))->inline_inode++); \
+ (atomic_inc(&F2FS_I_SB(inode)->inline_inode)); \
} while (0)
#define stat_dec_inline_inode(inode) \
do { \
if (f2fs_has_inline_data(inode)) \
- ((F2FS_I_SB(inode))->inline_inode--); \
+ (atomic_dec(&F2FS_I_SB(inode)->inline_inode)); \
+ } while (0)
+#define stat_inc_inline_dir(inode) \
+ do { \
+ if (f2fs_has_inline_dentry(inode)) \
+ (atomic_inc(&F2FS_I_SB(inode)->inline_dir)); \
+ } while (0)
+#define stat_dec_inline_dir(inode) \
+ do { \
+ if (f2fs_has_inline_dentry(inode)) \
+ (atomic_dec(&F2FS_I_SB(inode)->inline_dir)); \
} while (0)
-
#define stat_inc_seg_type(sbi, curseg) \
((sbi)->segment_count[(curseg)->alloc_type]++)
#define stat_inc_block_count(sbi, curseg) \
@@ -1492,6 +1595,8 @@ void f2fs_destroy_root_stats(void);
#define stat_inc_read_hit(sb)
#define stat_inc_inline_inode(inode)
#define stat_dec_inline_inode(inode)
+#define stat_inc_inline_dir(inode)
+#define stat_dec_inline_dir(inode)
#define stat_inc_seg_type(sbi, curseg)
#define stat_inc_block_count(sbi, curseg)
#define stat_inc_seg_count(si, type)
@@ -1519,9 +1624,20 @@ extern const struct inode_operations f2fs_special_inode_operations;
* inline.c
*/
bool f2fs_may_inline(struct inode *);
+void read_inline_data(struct page *, struct page *);
int f2fs_read_inline_data(struct inode *, struct page *);
-int f2fs_convert_inline_data(struct inode *, pgoff_t, struct page *);
-int f2fs_write_inline_data(struct inode *, struct page *, unsigned int);
-void truncate_inline_data(struct inode *, u64);
+int f2fs_convert_inline_page(struct dnode_of_data *, struct page *);
+int f2fs_convert_inline_inode(struct inode *);
+int f2fs_write_inline_data(struct inode *, struct page *);
+void truncate_inline_data(struct page *, u64);
bool recover_inline_data(struct inode *, struct page *);
+struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *,
+ struct page **);
+struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **);
+int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *);
+int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *);
+void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *,
+ struct inode *, struct inode *);
+bool f2fs_empty_inline_dir(struct inode *);
+int f2fs_read_inline_dir(struct file *, struct dir_context *);
#endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 8e68bb64f835..3c27e0ecb3bc 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -41,18 +41,18 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
sb_start_pagefault(inode->i_sb);
- /* force to convert with normal data indices */
- err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page);
- if (err)
- goto out;
+ f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
/* block allocation */
f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = f2fs_reserve_block(&dn, page->index);
- f2fs_unlock_op(sbi);
- if (err)
+ if (err) {
+ f2fs_unlock_op(sbi);
goto out;
+ }
+ f2fs_put_dnode(&dn);
+ f2fs_unlock_op(sbi);
file_update_time(vma->vm_file);
lock_page(page);
@@ -130,10 +130,45 @@ static inline bool need_do_checkpoint(struct inode *inode)
need_cp = true;
else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
need_cp = true;
+ else if (test_opt(sbi, FASTBOOT))
+ need_cp = true;
+ else if (sbi->active_logs == 2)
+ need_cp = true;
return need_cp;
}
+static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino)
+{
+ struct page *i = find_get_page(NODE_MAPPING(sbi), ino);
+ bool ret = false;
+ /* But we need to avoid that there are some inode updates */
+ if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino))
+ ret = true;
+ f2fs_put_page(i, 0);
+ return ret;
+}
+
+static void try_to_fix_pino(struct inode *inode)
+{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ nid_t pino;
+
+ down_write(&fi->i_sem);
+ fi->xattr_ver = 0;
+ if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
+ get_parent_ino(inode, &pino)) {
+ fi->i_pino = pino;
+ file_got_pino(inode);
+ up_write(&fi->i_sem);
+
+ mark_inode_dirty_sync(inode);
+ f2fs_write_inode(inode, NULL);
+ } else {
+ up_write(&fi->i_sem);
+ }
+}
+
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;
@@ -164,19 +199,21 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
return ret;
}
+ /* if the inode is dirty, let's recover all the time */
+ if (!datasync && is_inode_flag_set(fi, FI_DIRTY_INODE)) {
+ update_inode_page(inode);
+ goto go_write;
+ }
+
/*
* if there is no written data, don't waste time to write recovery info.
*/
if (!is_inode_flag_set(fi, FI_APPEND_WRITE) &&
!exist_written_data(sbi, ino, APPEND_INO)) {
- struct page *i = find_get_page(NODE_MAPPING(sbi), ino);
- /* But we need to avoid that there are some inode updates */
- if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) {
- f2fs_put_page(i, 0);
+ /* it may call write_inode just prior to fsync */
+ if (need_inode_page_update(sbi, ino))
goto go_write;
- }
- f2fs_put_page(i, 0);
if (is_inode_flag_set(fi, FI_UPDATE_WRITE) ||
exist_written_data(sbi, ino, UPDATE_INO))
@@ -196,49 +233,36 @@ go_write:
up_read(&fi->i_sem);
if (need_cp) {
- nid_t pino;
-
/* all the dirty node pages should be flushed for POR */
ret = f2fs_sync_fs(inode->i_sb, 1);
- down_write(&fi->i_sem);
- F2FS_I(inode)->xattr_ver = 0;
- if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
- get_parent_ino(inode, &pino)) {
- F2FS_I(inode)->i_pino = pino;
- file_got_pino(inode);
- up_write(&fi->i_sem);
- mark_inode_dirty_sync(inode);
- ret = f2fs_write_inode(inode, NULL);
- if (ret)
- goto out;
- } else {
- up_write(&fi->i_sem);
- }
- } else {
+ /*
+ * We've secured consistency through sync_fs. Following pino
+ * will be used only for fsynced inodes after checkpoint.
+ */
+ try_to_fix_pino(inode);
+ goto out;
+ }
sync_nodes:
- sync_node_pages(sbi, ino, &wbc);
-
- if (need_inode_block_update(sbi, ino)) {
- mark_inode_dirty_sync(inode);
- ret = f2fs_write_inode(inode, NULL);
- if (ret)
- goto out;
- goto sync_nodes;
- }
+ sync_node_pages(sbi, ino, &wbc);
- ret = wait_on_node_pages_writeback(sbi, ino);
- if (ret)
- goto out;
+ if (need_inode_block_update(sbi, ino)) {
+ mark_inode_dirty_sync(inode);
+ f2fs_write_inode(inode, NULL);
+ goto sync_nodes;
+ }
+
+ ret = wait_on_node_pages_writeback(sbi, ino);
+ if (ret)
+ goto out;
- /* once recovery info is written, don't need to tack this */
- remove_dirty_inode(sbi, ino, APPEND_INO);
- clear_inode_flag(fi, FI_APPEND_WRITE);
+ /* once recovery info is written, don't need to tack this */
+ remove_dirty_inode(sbi, ino, APPEND_INO);
+ clear_inode_flag(fi, FI_APPEND_WRITE);
flush_out:
- remove_dirty_inode(sbi, ino, UPDATE_INO);
- clear_inode_flag(fi, FI_UPDATE_WRITE);
- ret = f2fs_issue_flush(F2FS_I_SB(inode));
- }
+ remove_dirty_inode(sbi, ino, UPDATE_INO);
+ clear_inode_flag(fi, FI_UPDATE_WRITE);
+ ret = f2fs_issue_flush(sbi);
out:
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
return ret;
@@ -296,7 +320,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
goto fail;
/* handle inline data case */
- if (f2fs_has_inline_data(inode)) {
+ if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
if (whence == SEEK_HOLE)
data_ofs = isize;
goto found;
@@ -374,6 +398,15 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
+ struct inode *inode = file_inode(file);
+
+ /* we don't need to use inline_data strictly */
+ if (f2fs_has_inline_data(inode)) {
+ int err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
+ }
+
file_accessed(file);
vma->vm_ops = &f2fs_file_vm_ops;
return 0;
@@ -415,20 +448,17 @@ void truncate_data_blocks(struct dnode_of_data *dn)
truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
}
-static void truncate_partial_data_page(struct inode *inode, u64 from)
+static int truncate_partial_data_page(struct inode *inode, u64 from)
{
unsigned offset = from & (PAGE_CACHE_SIZE - 1);
struct page *page;
- if (f2fs_has_inline_data(inode))
- return truncate_inline_data(inode, from);
-
if (!offset)
- return;
+ return 0;
page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false);
if (IS_ERR(page))
- return;
+ return 0;
lock_page(page);
if (unlikely(!PageUptodate(page) ||
@@ -438,9 +468,9 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
f2fs_wait_on_page_writeback(page, DATA);
zero_user(page, offset, PAGE_CACHE_SIZE - offset);
set_page_dirty(page);
-
out:
f2fs_put_page(page, 1);
+ return 0;
}
int truncate_blocks(struct inode *inode, u64 from, bool lock)
@@ -450,27 +480,33 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
struct dnode_of_data dn;
pgoff_t free_from;
int count = 0, err = 0;
+ struct page *ipage;
trace_f2fs_truncate_blocks_enter(inode, from);
- if (f2fs_has_inline_data(inode))
- goto done;
-
free_from = (pgoff_t)
- ((from + blocksize - 1) >> (sbi->log_blocksize));
+ ((from + blocksize - 1) >> (sbi->log_blocksize));
if (lock)
f2fs_lock_op(sbi);
- set_new_dnode(&dn, inode, NULL, NULL, 0);
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage)) {
+ err = PTR_ERR(ipage);
+ goto out;
+ }
+
+ if (f2fs_has_inline_data(inode)) {
+ f2fs_put_page(ipage, 1);
+ goto out;
+ }
+
+ set_new_dnode(&dn, inode, ipage, NULL, 0);
err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);
if (err) {
if (err == -ENOENT)
goto free_next;
- if (lock)
- f2fs_unlock_op(sbi);
- trace_f2fs_truncate_blocks_exit(inode, err);
- return err;
+ goto out;
}
count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
@@ -486,11 +522,13 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
f2fs_put_dnode(&dn);
free_next:
err = truncate_inode_blocks(inode, free_from);
+out:
if (lock)
f2fs_unlock_op(sbi);
-done:
+
/* lastly zero out the first data page */
- truncate_partial_data_page(inode, from);
+ if (!err)
+ err = truncate_partial_data_page(inode, from);
trace_f2fs_truncate_blocks_exit(inode, err);
return err;
@@ -504,6 +542,12 @@ void f2fs_truncate(struct inode *inode)
trace_f2fs_truncate(inode);
+ /* we should check inline_data size */
+ if (f2fs_has_inline_data(inode) && !f2fs_may_inline(inode)) {
+ if (f2fs_convert_inline_inode(inode))
+ return;
+ }
+
if (!truncate_blocks(inode, i_size_read(inode), true)) {
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
@@ -561,10 +605,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
return err;
if (attr->ia_valid & ATTR_SIZE) {
- err = f2fs_convert_inline_data(inode, attr->ia_size, NULL);
- if (err)
- return err;
-
if (attr->ia_size != i_size_read(inode)) {
truncate_setsize(inode, attr->ia_size);
f2fs_truncate(inode);
@@ -665,9 +705,11 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (offset >= inode->i_size)
return ret;
- ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL);
- if (ret)
- return ret;
+ if (f2fs_has_inline_data(inode)) {
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
+ }
pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
@@ -721,9 +763,11 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
if (ret)
return ret;
- ret = f2fs_convert_inline_data(inode, offset + len, NULL);
- if (ret)
- return ret;
+ if (f2fs_has_inline_data(inode)) {
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
+ }
pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
@@ -874,7 +918,15 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
- return f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL);
+ return f2fs_convert_inline_inode(inode);
+}
+
+static int f2fs_release_file(struct inode *inode, struct file *filp)
+{
+ /* some remained atomic pages should discarded */
+ if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
+ commit_inmem_pages(inode, true);
+ return 0;
}
static int f2fs_ioc_commit_atomic_write(struct file *filp)
@@ -908,7 +960,8 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
return -EACCES;
set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
- return 0;
+
+ return f2fs_convert_inline_inode(inode);
}
static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
@@ -985,6 +1038,7 @@ const struct file_operations f2fs_file_operations = {
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.open = generic_file_open,
+ .release = f2fs_release_file,
.mmap = f2fs_file_mmap,
.fsync = f2fs_sync_file,
.fallocate = f2fs_fallocate,
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 2a8f4acdb86b..eec0933a4819 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -96,8 +96,6 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
dev_t dev = sbi->sb->s_bdev->bd_dev;
int err = 0;
- if (!test_opt(sbi, BG_GC))
- goto out;
gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
if (!gc_th) {
err = -ENOMEM;
@@ -340,34 +338,39 @@ static const struct victim_selection default_v_ops = {
.get_victim = get_victim_by_default,
};
-static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist)
+static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino)
{
struct inode_entry *ie;
- list_for_each_entry(ie, ilist, list)
- if (ie->inode->i_ino == ino)
- return ie->inode;
+ ie = radix_tree_lookup(&gc_list->iroot, ino);
+ if (ie)
+ return ie->inode;
return NULL;
}
-static void add_gc_inode(struct inode *inode, struct list_head *ilist)
+static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode)
{
struct inode_entry *new_ie;
- if (inode == find_gc_inode(inode->i_ino, ilist)) {
+ if (inode == find_gc_inode(gc_list, inode->i_ino)) {
iput(inode);
return;
}
-
new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS);
new_ie->inode = inode;
- list_add_tail(&new_ie->list, ilist);
+retry:
+ if (radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie)) {
+ cond_resched();
+ goto retry;
+ }
+ list_add_tail(&new_ie->list, &gc_list->ilist);
}
-static void put_gc_inode(struct list_head *ilist)
+static void put_gc_inode(struct gc_inode_list *gc_list)
{
struct inode_entry *ie, *next_ie;
- list_for_each_entry_safe(ie, next_ie, ilist, list) {
+ list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) {
+ radix_tree_delete(&gc_list->iroot, ie->inode->i_ino);
iput(ie->inode);
list_del(&ie->list);
kmem_cache_free(winode_slab, ie);
@@ -553,7 +556,7 @@ out:
* the victim data block is ignored.
*/
static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
- struct list_head *ilist, unsigned int segno, int gc_type)
+ struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
{
struct super_block *sb = sbi->sb;
struct f2fs_summary *entry;
@@ -605,27 +608,27 @@ next_step:
data_page = find_data_page(inode,
start_bidx + ofs_in_node, false);
- if (IS_ERR(data_page))
- goto next_iput;
+ if (IS_ERR(data_page)) {
+ iput(inode);
+ continue;
+ }
f2fs_put_page(data_page, 0);
- add_gc_inode(inode, ilist);
- } else {
- inode = find_gc_inode(dni.ino, ilist);
- if (inode) {
- start_bidx = start_bidx_of_node(nofs,
- F2FS_I(inode));
- data_page = get_lock_data_page(inode,
+ add_gc_inode(gc_list, inode);
+ continue;
+ }
+
+ /* phase 3 */
+ inode = find_gc_inode(gc_list, dni.ino);
+ if (inode) {
+ start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
+ data_page = get_lock_data_page(inode,
start_bidx + ofs_in_node);
- if (IS_ERR(data_page))
- continue;
- move_data_page(inode, data_page, gc_type);
- stat_inc_data_blk_count(sbi, 1);
- }
+ if (IS_ERR(data_page))
+ continue;
+ move_data_page(inode, data_page, gc_type);
+ stat_inc_data_blk_count(sbi, 1);
}
- continue;
-next_iput:
- iput(inode);
}
if (++phase < 4)
@@ -646,18 +649,20 @@ next_iput:
}
static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
- int gc_type, int type)
+ int gc_type)
{
struct sit_info *sit_i = SIT_I(sbi);
int ret;
+
mutex_lock(&sit_i->sentry_lock);
- ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, type, LFS);
+ ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type,
+ NO_CHECK_TYPE, LFS);
mutex_unlock(&sit_i->sentry_lock);
return ret;
}
static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
- struct list_head *ilist, int gc_type)
+ struct gc_inode_list *gc_list, int gc_type)
{
struct page *sum_page;
struct f2fs_summary_block *sum;
@@ -675,7 +680,7 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
gc_node_segment(sbi, sum->entries, segno, gc_type);
break;
case SUM_TYPE_DATA:
- gc_data_segment(sbi, sum->entries, ilist, segno, gc_type);
+ gc_data_segment(sbi, sum->entries, gc_list, segno, gc_type);
break;
}
blk_finish_plug(&plug);
@@ -688,16 +693,18 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
int f2fs_gc(struct f2fs_sb_info *sbi)
{
- struct list_head ilist;
unsigned int segno, i;
int gc_type = BG_GC;
int nfree = 0;
int ret = -1;
- struct cp_control cpc = {
- .reason = CP_SYNC,
+ struct cp_control cpc;
+ struct gc_inode_list gc_list = {
+ .ilist = LIST_HEAD_INIT(gc_list.ilist),
+ .iroot = RADIX_TREE_INIT(GFP_NOFS),
};
- INIT_LIST_HEAD(&ilist);
+ cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC;
+
gc_more:
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
goto stop;
@@ -709,7 +716,7 @@ gc_more:
write_checkpoint(sbi, &cpc);
}
- if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE))
+ if (!__get_victim(sbi, &segno, gc_type))
goto stop;
ret = 0;
@@ -719,7 +726,7 @@ gc_more:
META_SSA);
for (i = 0; i < sbi->segs_per_sec; i++)
- do_garbage_collect(sbi, segno + i, &ilist, gc_type);
+ do_garbage_collect(sbi, segno + i, &gc_list, gc_type);
if (gc_type == FG_GC) {
sbi->cur_victim_sec = NULL_SEGNO;
@@ -735,7 +742,7 @@ gc_more:
stop:
mutex_unlock(&sbi->gc_mutex);
- put_gc_inode(&ilist);
+ put_gc_inode(&gc_list);
return ret;
}
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 16f0b2b22999..6ff7ad38463e 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -40,6 +40,11 @@ struct inode_entry {
struct inode *inode;
};
+struct gc_inode_list {
+ struct list_head ilist;
+ struct radix_tree_root iroot;
+};
+
/*
* inline functions
*/
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 88036fd75797..f2d3c581e776 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -15,35 +15,44 @@
bool f2fs_may_inline(struct inode *inode)
{
- block_t nr_blocks;
- loff_t i_size;
-
if (!test_opt(F2FS_I_SB(inode), INLINE_DATA))
return false;
if (f2fs_is_atomic_file(inode))
return false;
- nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2;
- if (inode->i_blocks > nr_blocks)
+ if (!S_ISREG(inode->i_mode))
return false;
- i_size = i_size_read(inode);
- if (i_size > MAX_INLINE_DATA)
+ if (i_size_read(inode) > MAX_INLINE_DATA)
return false;
return true;
}
-int f2fs_read_inline_data(struct inode *inode, struct page *page)
+void read_inline_data(struct page *page, struct page *ipage)
{
- struct page *ipage;
void *src_addr, *dst_addr;
- if (page->index) {
- zero_user_segment(page, 0, PAGE_CACHE_SIZE);
- goto out;
- }
+ if (PageUptodate(page))
+ return;
+
+ f2fs_bug_on(F2FS_P_SB(page), page->index);
+
+ zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
+
+ /* Copy the whole inline data block */
+ src_addr = inline_data_addr(ipage);
+ dst_addr = kmap_atomic(page);
+ memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
+ flush_dcache_page(page);
+ kunmap_atomic(dst_addr);
+ SetPageUptodate(page);
+}
+
+int f2fs_read_inline_data(struct inode *inode, struct page *page)
+{
+ struct page *ipage;
ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(ipage)) {
@@ -51,112 +60,116 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
return PTR_ERR(ipage);
}
- zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
+ if (!f2fs_has_inline_data(inode)) {
+ f2fs_put_page(ipage, 1);
+ return -EAGAIN;
+ }
- /* Copy the whole inline data block */
- src_addr = inline_data_addr(ipage);
- dst_addr = kmap(page);
- memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
- kunmap(page);
- f2fs_put_page(ipage, 1);
+ if (page->index)
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ else
+ read_inline_data(page, ipage);
-out:
SetPageUptodate(page);
+ f2fs_put_page(ipage, 1);
unlock_page(page);
-
return 0;
}
-static int __f2fs_convert_inline_data(struct inode *inode, struct page *page)
+int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
{
- int err = 0;
- struct page *ipage;
- struct dnode_of_data dn;
void *src_addr, *dst_addr;
block_t new_blk_addr;
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_io_info fio = {
.type = DATA,
.rw = WRITE_SYNC | REQ_PRIO,
};
+ int dirty, err;
- f2fs_lock_op(sbi);
- ipage = get_node_page(sbi, inode->i_ino);
- if (IS_ERR(ipage)) {
- err = PTR_ERR(ipage);
- goto out;
- }
+ f2fs_bug_on(F2FS_I_SB(dn->inode), page->index);
- /* someone else converted inline_data already */
- if (!f2fs_has_inline_data(inode))
- goto out;
+ if (!f2fs_exist_data(dn->inode))
+ goto clear_out;
- /*
- * i_addr[0] is not used for inline data,
- * so reserving new block will not destroy inline data
- */
- set_new_dnode(&dn, inode, ipage, NULL, 0);
- err = f2fs_reserve_block(&dn, 0);
+ err = f2fs_reserve_block(dn, 0);
if (err)
- goto out;
+ return err;
f2fs_wait_on_page_writeback(page, DATA);
+
+ if (PageUptodate(page))
+ goto no_update;
+
zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
/* Copy the whole inline data block */
- src_addr = inline_data_addr(ipage);
- dst_addr = kmap(page);
+ src_addr = inline_data_addr(dn->inode_page);
+ dst_addr = kmap_atomic(page);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
- kunmap(page);
+ flush_dcache_page(page);
+ kunmap_atomic(dst_addr);
SetPageUptodate(page);
+no_update:
+ /* clear dirty state */
+ dirty = clear_page_dirty_for_io(page);
/* write data page to try to make data consistent */
set_page_writeback(page);
- write_data_page(page, &dn, &new_blk_addr, &fio);
- update_extent_cache(new_blk_addr, &dn);
+
+ write_data_page(page, dn, &new_blk_addr, &fio);
+ update_extent_cache(new_blk_addr, dn);
f2fs_wait_on_page_writeback(page, DATA);
+ if (dirty)
+ inode_dec_dirty_pages(dn->inode);
- /* clear inline data and flag after data writeback */
- zero_user_segment(ipage, INLINE_DATA_OFFSET,
- INLINE_DATA_OFFSET + MAX_INLINE_DATA);
- clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
- stat_dec_inline_inode(inode);
+ /* this converted inline_data should be recovered. */
+ set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE);
- sync_inode_page(&dn);
- f2fs_put_dnode(&dn);
-out:
- f2fs_unlock_op(sbi);
- return err;
+ /* clear inline data and flag after data writeback */
+ truncate_inline_data(dn->inode_page, 0);
+clear_out:
+ stat_dec_inline_inode(dn->inode);
+ f2fs_clear_inline_inode(dn->inode);
+ sync_inode_page(dn);
+ f2fs_put_dnode(dn);
+ return 0;
}
-int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size,
- struct page *page)
+int f2fs_convert_inline_inode(struct inode *inode)
{
- struct page *new_page = page;
- int err;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct dnode_of_data dn;
+ struct page *ipage, *page;
+ int err = 0;
- if (!f2fs_has_inline_data(inode))
- return 0;
- else if (to_size <= MAX_INLINE_DATA)
- return 0;
+ page = grab_cache_page(inode->i_mapping, 0);
+ if (!page)
+ return -ENOMEM;
+
+ f2fs_lock_op(sbi);
- if (!page || page->index != 0) {
- new_page = grab_cache_page(inode->i_mapping, 0);
- if (!new_page)
- return -ENOMEM;
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage)) {
+ err = PTR_ERR(ipage);
+ goto out;
}
- err = __f2fs_convert_inline_data(inode, new_page);
- if (!page || page->index != 0)
- f2fs_put_page(new_page, 1);
+ set_new_dnode(&dn, inode, ipage, ipage, 0);
+
+ if (f2fs_has_inline_data(inode))
+ err = f2fs_convert_inline_page(&dn, page);
+
+ f2fs_put_dnode(&dn);
+out:
+ f2fs_unlock_op(sbi);
+
+ f2fs_put_page(page, 1);
return err;
}
-int f2fs_write_inline_data(struct inode *inode,
- struct page *page, unsigned size)
+int f2fs_write_inline_data(struct inode *inode, struct page *page)
{
void *src_addr, *dst_addr;
- struct page *ipage;
struct dnode_of_data dn;
int err;
@@ -164,47 +177,39 @@ int f2fs_write_inline_data(struct inode *inode,
err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
if (err)
return err;
- ipage = dn.inode_page;
- f2fs_wait_on_page_writeback(ipage, NODE);
- zero_user_segment(ipage, INLINE_DATA_OFFSET,
- INLINE_DATA_OFFSET + MAX_INLINE_DATA);
- src_addr = kmap(page);
- dst_addr = inline_data_addr(ipage);
- memcpy(dst_addr, src_addr, size);
- kunmap(page);
-
- /* Release the first data block if it is allocated */
if (!f2fs_has_inline_data(inode)) {
- truncate_data_blocks_range(&dn, 1);
- set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
- stat_inc_inline_inode(inode);
+ f2fs_put_dnode(&dn);
+ return -EAGAIN;
}
+ f2fs_bug_on(F2FS_I_SB(inode), page->index);
+
+ f2fs_wait_on_page_writeback(dn.inode_page, NODE);
+ src_addr = kmap_atomic(page);
+ dst_addr = inline_data_addr(dn.inode_page);
+ memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
+ kunmap_atomic(src_addr);
+
set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
+ set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+
sync_inode_page(&dn);
f2fs_put_dnode(&dn);
-
return 0;
}
-void truncate_inline_data(struct inode *inode, u64 from)
+void truncate_inline_data(struct page *ipage, u64 from)
{
- struct page *ipage;
+ void *addr;
if (from >= MAX_INLINE_DATA)
return;
- ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
- if (IS_ERR(ipage))
- return;
-
f2fs_wait_on_page_writeback(ipage, NODE);
- zero_user_segment(ipage, INLINE_DATA_OFFSET + from,
- INLINE_DATA_OFFSET + MAX_INLINE_DATA);
- set_page_dirty(ipage);
- f2fs_put_page(ipage, 1);
+ addr = inline_data_addr(ipage);
+ memset(addr + from, 0, MAX_INLINE_DATA - from);
}
bool recover_inline_data(struct inode *inode, struct page *npage)
@@ -236,6 +241,10 @@ process_inline:
src_addr = inline_data_addr(npage);
dst_addr = inline_data_addr(ipage);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
+
+ set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+
update_inode(inode, ipage);
f2fs_put_page(ipage, 1);
return true;
@@ -244,16 +253,279 @@ process_inline:
if (f2fs_has_inline_data(inode)) {
ipage = get_node_page(sbi, inode->i_ino);
f2fs_bug_on(sbi, IS_ERR(ipage));
- f2fs_wait_on_page_writeback(ipage, NODE);
- zero_user_segment(ipage, INLINE_DATA_OFFSET,
- INLINE_DATA_OFFSET + MAX_INLINE_DATA);
- clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ truncate_inline_data(ipage, 0);
+ f2fs_clear_inline_inode(inode);
update_inode(inode, ipage);
f2fs_put_page(ipage, 1);
} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
truncate_blocks(inode, 0, false);
- set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
goto process_inline;
}
return false;
}
+
+struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
+ struct qstr *name, struct page **res_page)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+ struct f2fs_inline_dentry *inline_dentry;
+ struct f2fs_dir_entry *de;
+ struct f2fs_dentry_ptr d;
+ struct page *ipage;
+
+ ipage = get_node_page(sbi, dir->i_ino);
+ if (IS_ERR(ipage))
+ return NULL;
+
+ inline_dentry = inline_data_addr(ipage);
+
+ make_dentry_ptr(&d, (void *)inline_dentry, 2);
+ de = find_target_dentry(name, NULL, &d);
+
+ unlock_page(ipage);
+ if (de)
+ *res_page = ipage;
+ else
+ f2fs_put_page(ipage, 0);
+
+ /*
+ * For the most part, it should be a bug when name_len is zero.
+ * We stop here for figuring out where the bugs has occurred.
+ */
+ f2fs_bug_on(sbi, d.max < 0);
+ return de;
+}
+
+struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *dir,
+ struct page **p)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ struct page *ipage;
+ struct f2fs_dir_entry *de;
+ struct f2fs_inline_dentry *dentry_blk;
+
+ ipage = get_node_page(sbi, dir->i_ino);
+ if (IS_ERR(ipage))
+ return NULL;
+
+ dentry_blk = inline_data_addr(ipage);
+ de = &dentry_blk->dentry[1];
+ *p = ipage;
+ unlock_page(ipage);
+ return de;
+}
+
+int make_empty_inline_dir(struct inode *inode, struct inode *parent,
+ struct page *ipage)
+{
+ struct f2fs_inline_dentry *dentry_blk;
+ struct f2fs_dentry_ptr d;
+
+ dentry_blk = inline_data_addr(ipage);
+
+ make_dentry_ptr(&d, (void *)dentry_blk, 2);
+ do_make_empty_dir(inode, parent, &d);
+
+ set_page_dirty(ipage);
+
+ /* update i_size to MAX_INLINE_DATA */
+ if (i_size_read(inode) < MAX_INLINE_DATA) {
+ i_size_write(inode, MAX_INLINE_DATA);
+ set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
+ }
+ return 0;
+}
+
+static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
+ struct f2fs_inline_dentry *inline_dentry)
+{
+ struct page *page;
+ struct dnode_of_data dn;
+ struct f2fs_dentry_block *dentry_blk;
+ int err;
+
+ page = grab_cache_page(dir->i_mapping, 0);
+ if (!page)
+ return -ENOMEM;
+
+ set_new_dnode(&dn, dir, ipage, NULL, 0);
+ err = f2fs_reserve_block(&dn, 0);
+ if (err)
+ goto out;
+
+ f2fs_wait_on_page_writeback(page, DATA);
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+
+ dentry_blk = kmap_atomic(page);
+
+ /* copy data from inline dentry block to new dentry block */
+ memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap,
+ INLINE_DENTRY_BITMAP_SIZE);
+ memcpy(dentry_blk->dentry, inline_dentry->dentry,
+ sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY);
+ memcpy(dentry_blk->filename, inline_dentry->filename,
+ NR_INLINE_DENTRY * F2FS_SLOT_LEN);
+
+ kunmap_atomic(dentry_blk);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+
+ /* clear inline dir and flag after data writeback */
+ truncate_inline_data(ipage, 0);
+
+ stat_dec_inline_dir(dir);
+ clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY);
+
+ if (i_size_read(dir) < PAGE_CACHE_SIZE) {
+ i_size_write(dir, PAGE_CACHE_SIZE);
+ set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
+ }
+
+ sync_inode_page(&dn);
+out:
+ f2fs_put_page(page, 1);
+ return err;
+}
+
+int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
+ struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ struct page *ipage;
+ unsigned int bit_pos;
+ f2fs_hash_t name_hash;
+ struct f2fs_dir_entry *de;
+ size_t namelen = name->len;
+ struct f2fs_inline_dentry *dentry_blk = NULL;
+ int slots = GET_DENTRY_SLOTS(namelen);
+ struct page *page;
+ int err = 0;
+ int i;
+
+ name_hash = f2fs_dentry_hash(name);
+
+ ipage = get_node_page(sbi, dir->i_ino);
+ if (IS_ERR(ipage))
+ return PTR_ERR(ipage);
+
+ dentry_blk = inline_data_addr(ipage);
+ bit_pos = room_for_filename(&dentry_blk->dentry_bitmap,
+ slots, NR_INLINE_DENTRY);
+ if (bit_pos >= NR_INLINE_DENTRY) {
+ err = f2fs_convert_inline_dir(dir, ipage, dentry_blk);
+ if (!err)
+ err = -EAGAIN;
+ goto out;
+ }
+
+ down_write(&F2FS_I(inode)->i_sem);
+ page = init_inode_metadata(inode, dir, name, ipage);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto fail;
+ }
+
+ f2fs_wait_on_page_writeback(ipage, NODE);
+ de = &dentry_blk->dentry[bit_pos];
+ de->hash_code = name_hash;
+ de->name_len = cpu_to_le16(namelen);
+ memcpy(dentry_blk->filename[bit_pos], name->name, name->len);
+ de->ino = cpu_to_le32(inode->i_ino);
+ set_de_type(de, inode);
+ for (i = 0; i < slots; i++)
+ test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
+ set_page_dirty(ipage);
+
+ /* we don't need to mark_inode_dirty now */
+ F2FS_I(inode)->i_pino = dir->i_ino;
+ update_inode(inode, page);
+ f2fs_put_page(page, 1);
+
+ update_parent_metadata(dir, inode, 0);
+fail:
+ up_write(&F2FS_I(inode)->i_sem);
+
+ if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
+ update_inode(dir, ipage);
+ clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
+ }
+out:
+ f2fs_put_page(ipage, 1);
+ return err;
+}
+
+void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
+ struct inode *dir, struct inode *inode)
+{
+ struct f2fs_inline_dentry *inline_dentry;
+ int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
+ unsigned int bit_pos;
+ int i;
+
+ lock_page(page);
+ f2fs_wait_on_page_writeback(page, NODE);
+
+ inline_dentry = inline_data_addr(page);
+ bit_pos = dentry - inline_dentry->dentry;
+ for (i = 0; i < slots; i++)
+ test_and_clear_bit_le(bit_pos + i,
+ &inline_dentry->dentry_bitmap);
+
+ set_page_dirty(page);
+
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+
+ if (inode)
+ f2fs_drop_nlink(dir, inode, page);
+
+ f2fs_put_page(page, 1);
+}
+
+bool f2fs_empty_inline_dir(struct inode *dir)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ struct page *ipage;
+ unsigned int bit_pos = 2;
+ struct f2fs_inline_dentry *dentry_blk;
+
+ ipage = get_node_page(sbi, dir->i_ino);
+ if (IS_ERR(ipage))
+ return false;
+
+ dentry_blk = inline_data_addr(ipage);
+ bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
+ NR_INLINE_DENTRY,
+ bit_pos);
+
+ f2fs_put_page(ipage, 1);
+
+ if (bit_pos < NR_INLINE_DENTRY)
+ return false;
+
+ return true;
+}
+
+int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx)
+{
+ struct inode *inode = file_inode(file);
+ struct f2fs_inline_dentry *inline_dentry = NULL;
+ struct page *ipage = NULL;
+ struct f2fs_dentry_ptr d;
+
+ if (ctx->pos == NR_INLINE_DENTRY)
+ return 0;
+
+ ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
+ if (IS_ERR(ipage))
+ return PTR_ERR(ipage);
+
+ inline_dentry = inline_data_addr(ipage);
+
+ make_dentry_ptr(&d, (void *)inline_dentry, 2);
+
+ if (!f2fs_fill_dentries(ctx, &d, 0))
+ ctx->pos = NR_INLINE_DENTRY;
+
+ f2fs_put_page(ipage, 1);
+ return 0;
+}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 0deead4505e7..196cc7843aaf 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -67,12 +67,38 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
}
}
+static int __recover_inline_status(struct inode *inode, struct page *ipage)
+{
+ void *inline_data = inline_data_addr(ipage);
+ struct f2fs_inode *ri;
+ void *zbuf;
+
+ zbuf = kzalloc(MAX_INLINE_DATA, GFP_NOFS);
+ if (!zbuf)
+ return -ENOMEM;
+
+ if (!memcmp(zbuf, inline_data, MAX_INLINE_DATA)) {
+ kfree(zbuf);
+ return 0;
+ }
+ kfree(zbuf);
+
+ f2fs_wait_on_page_writeback(ipage, NODE);
+ set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+
+ ri = F2FS_INODE(ipage);
+ set_raw_inline(F2FS_I(inode), ri);
+ set_page_dirty(ipage);
+ return 0;
+}
+
static int do_read_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct page *node_page;
struct f2fs_inode *ri;
+ int err = 0;
/* Check if ino is within scope */
if (check_nid_range(sbi, inode->i_ino)) {
@@ -114,11 +140,19 @@ static int do_read_inode(struct inode *inode)
get_extent_info(&fi->ext, ri->i_ext);
get_inline_info(fi, ri);
+ /* check data exist */
+ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
+ err = __recover_inline_status(inode, node_page);
+
/* get rdev by using inline_info */
__get_inode_rdev(inode, ri);
f2fs_put_page(node_page, 1);
- return 0;
+
+ stat_inc_inline_inode(inode);
+ stat_inc_inline_dir(inode);
+
+ return err;
}
struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
@@ -156,7 +190,7 @@ make_now:
inode->i_op = &f2fs_dir_inode_operations;
inode->i_fop = &f2fs_dir_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
- mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+ mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO);
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &f2fs_symlink_inode_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
@@ -295,11 +329,12 @@ void f2fs_evict_inode(struct inode *inode)
f2fs_lock_op(sbi);
remove_inode_page(inode);
- stat_dec_inline_inode(inode);
f2fs_unlock_op(sbi);
sb_end_intwrite(inode->i_sb);
no_delete:
+ stat_dec_inline_dir(inode);
+ stat_dec_inline_inode(inode);
invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
if (xnid)
invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
@@ -325,8 +360,9 @@ void handle_failed_inode(struct inode *inode)
f2fs_truncate(inode);
remove_inode_page(inode);
- stat_dec_inline_inode(inode);
+ clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ clear_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
alloc_nid_failed(sbi, inode->i_ino);
f2fs_unlock_op(sbi);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 0d2526e5aa11..547a2deeb1ac 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -54,6 +54,12 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
nid_free = true;
goto out;
}
+
+ if (f2fs_may_inline(inode))
+ set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ if (test_opt(sbi, INLINE_DENTRY) && S_ISDIR(inode->i_mode))
+ set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
+
trace_f2fs_new_inode(inode, 0);
mark_inode_dirty(inode);
return inode;
@@ -129,8 +135,12 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
alloc_nid_done(sbi, ino);
+ stat_inc_inline_inode(inode);
d_instantiate(dentry, inode);
unlock_new_inode(inode);
+
+ if (IS_DIRSYNC(dir))
+ f2fs_sync_fs(sbi->sb, 1);
return 0;
out:
handle_failed_inode(inode);
@@ -157,6 +167,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
f2fs_unlock_op(sbi);
d_instantiate(dentry, inode);
+
+ if (IS_DIRSYNC(dir))
+ f2fs_sync_fs(sbi->sb, 1);
return 0;
out:
clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
@@ -187,14 +200,12 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
de = f2fs_find_entry(dir, &dentry->d_name, &page);
if (de) {
nid_t ino = le32_to_cpu(de->ino);
- kunmap(page);
+ f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
inode = f2fs_iget(dir->i_sb, ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
-
- stat_inc_inline_inode(inode);
}
return d_splice_alias(inode, dentry);
@@ -219,15 +230,18 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
err = acquire_orphan_inode(sbi);
if (err) {
f2fs_unlock_op(sbi);
- kunmap(page);
+ f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
goto fail;
}
- f2fs_delete_entry(de, page, inode);
+ f2fs_delete_entry(de, page, dir, inode);
f2fs_unlock_op(sbi);
/* In order to evict this inode, we set it dirty */
mark_inode_dirty(inode);
+
+ if (IS_DIRSYNC(dir))
+ f2fs_sync_fs(sbi->sb, 1);
fail:
trace_f2fs_unlink_exit(inode, err);
return err;
@@ -261,6 +275,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
d_instantiate(dentry, inode);
unlock_new_inode(inode);
+
+ if (IS_DIRSYNC(dir))
+ f2fs_sync_fs(sbi->sb, 1);
return err;
out:
handle_failed_inode(inode);
@@ -291,11 +308,14 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
goto out_fail;
f2fs_unlock_op(sbi);
+ stat_inc_inline_dir(inode);
alloc_nid_done(sbi, inode->i_ino);
d_instantiate(dentry, inode);
unlock_new_inode(inode);
+ if (IS_DIRSYNC(dir))
+ f2fs_sync_fs(sbi->sb, 1);
return 0;
out_fail:
@@ -338,8 +358,12 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
f2fs_unlock_op(sbi);
alloc_nid_done(sbi, inode->i_ino);
+
d_instantiate(dentry, inode);
unlock_new_inode(inode);
+
+ if (IS_DIRSYNC(dir))
+ f2fs_sync_fs(sbi->sb, 1);
return 0;
out:
handle_failed_inode(inode);
@@ -435,7 +459,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(old_inode);
- f2fs_delete_entry(old_entry, old_page, NULL);
+ f2fs_delete_entry(old_entry, old_page, old_dir, NULL);
if (old_dir_entry) {
if (old_dir != new_dir) {
@@ -443,7 +467,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_dir_page, new_dir);
update_inode_page(old_inode);
} else {
- kunmap(old_dir_page);
+ f2fs_dentry_kunmap(old_inode, old_dir_page);
f2fs_put_page(old_dir_page, 0);
}
drop_nlink(old_dir);
@@ -452,19 +476,22 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
f2fs_unlock_op(sbi);
+
+ if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
+ f2fs_sync_fs(sbi->sb, 1);
return 0;
put_out_dir:
f2fs_unlock_op(sbi);
- kunmap(new_page);
+ f2fs_dentry_kunmap(new_dir, new_page);
f2fs_put_page(new_page, 0);
out_dir:
if (old_dir_entry) {
- kunmap(old_dir_page);
+ f2fs_dentry_kunmap(old_inode, old_dir_page);
f2fs_put_page(old_dir_page, 0);
}
out_old:
- kunmap(old_page);
+ f2fs_dentry_kunmap(old_dir, old_page);
f2fs_put_page(old_page, 0);
out:
return err;
@@ -588,6 +615,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
update_inode_page(new_dir);
f2fs_unlock_op(sbi);
+
+ if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
+ f2fs_sync_fs(sbi->sb, 1);
return 0;
out_undo:
/* Still we may fail to recover name info of f2fs_inode here */
@@ -596,19 +626,19 @@ out_unlock:
f2fs_unlock_op(sbi);
out_new_dir:
if (new_dir_entry) {
- kunmap(new_dir_page);
+ f2fs_dentry_kunmap(new_inode, new_dir_page);
f2fs_put_page(new_dir_page, 0);
}
out_old_dir:
if (old_dir_entry) {
- kunmap(old_dir_page);
+ f2fs_dentry_kunmap(old_inode, old_dir_page);
f2fs_put_page(old_dir_page, 0);
}
out_new:
- kunmap(new_page);
+ f2fs_dentry_kunmap(new_dir, new_page);
f2fs_put_page(new_page, 0);
out_old:
- kunmap(old_page);
+ f2fs_dentry_kunmap(old_dir, old_page);
f2fs_put_page(old_page, 0);
out:
return err;
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 44b8afef43d9..f83326ca32ef 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -31,22 +31,38 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct sysinfo val;
+ unsigned long avail_ram;
unsigned long mem_size = 0;
bool res = false;
si_meminfo(&val);
- /* give 25%, 25%, 50% memory for each components respectively */
+
+ /* only uses low memory */
+ avail_ram = val.totalram - val.totalhigh;
+
+ /* give 25%, 25%, 50%, 50% memory for each components respectively */
if (type == FREE_NIDS) {
- mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12;
- res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
+ mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
+ PAGE_CACHE_SHIFT;
+ res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
} else if (type == NAT_ENTRIES) {
- mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12;
- res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
+ mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
+ PAGE_CACHE_SHIFT;
+ res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
} else if (type == DIRTY_DENTS) {
if (sbi->sb->s_bdi->dirty_exceeded)
return false;
mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
- res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1);
+ res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
+ } else if (type == INO_ENTRIES) {
+ int i;
+
+ if (sbi->sb->s_bdi->dirty_exceeded)
+ return false;
+ for (i = 0; i <= UPDATE_INO; i++)
+ mem_size += (sbi->im[i].ino_num *
+ sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
+ res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
}
return res;
}
@@ -131,7 +147,7 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
if (get_nat_flag(ne, IS_DIRTY))
return;
-retry:
+
head = radix_tree_lookup(&nm_i->nat_set_root, set);
if (!head) {
head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
@@ -140,11 +156,7 @@ retry:
INIT_LIST_HEAD(&head->set_list);
head->set = set;
head->entry_cnt = 0;
-
- if (radix_tree_insert(&nm_i->nat_set_root, set, head)) {
- cond_resched();
- goto retry;
- }
+ f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head);
}
list_move_tail(&ne->list, &head->entry_list);
nm_i->dirty_nat_cnt++;
@@ -155,7 +167,7 @@ retry:
static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
struct nat_entry *ne)
{
- nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK;
+ nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
struct nat_entry_set *head;
head = radix_tree_lookup(&nm_i->nat_set_root, set);
@@ -180,11 +192,11 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
struct nat_entry *e;
bool is_cp = true;
- read_lock(&nm_i->nat_tree_lock);
+ down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (e && !get_nat_flag(e, IS_CHECKPOINTED))
is_cp = false;
- read_unlock(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
return is_cp;
}
@@ -194,11 +206,11 @@ bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino)
struct nat_entry *e;
bool fsynced = false;
- read_lock(&nm_i->nat_tree_lock);
+ down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, ino);
if (e && get_nat_flag(e, HAS_FSYNCED_INODE))
fsynced = true;
- read_unlock(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
return fsynced;
}
@@ -208,13 +220,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
struct nat_entry *e;
bool need_update = true;
- read_lock(&nm_i->nat_tree_lock);
+ down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, ino);
if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
(get_nat_flag(e, IS_CHECKPOINTED) ||
get_nat_flag(e, HAS_FSYNCED_INODE)))
need_update = false;
- read_unlock(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
return need_update;
}
@@ -222,13 +234,8 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
{
struct nat_entry *new;
- new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC);
- if (!new)
- return NULL;
- if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
- kmem_cache_free(nat_entry_slab, new);
- return NULL;
- }
+ new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC);
+ f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
memset(new, 0, sizeof(struct nat_entry));
nat_set_nid(new, nid);
nat_reset_flag(new);
@@ -241,18 +248,14 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
struct f2fs_nat_entry *ne)
{
struct nat_entry *e;
-retry:
- write_lock(&nm_i->nat_tree_lock);
+
+ down_write(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (!e) {
e = grab_nat_entry(nm_i, nid);
- if (!e) {
- write_unlock(&nm_i->nat_tree_lock);
- goto retry;
- }
node_info_from_raw_nat(&e->ni, ne);
}
- write_unlock(&nm_i->nat_tree_lock);
+ up_write(&nm_i->nat_tree_lock);
}
static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
@@ -260,15 +263,11 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
-retry:
- write_lock(&nm_i->nat_tree_lock);
+
+ down_write(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, ni->nid);
if (!e) {
e = grab_nat_entry(nm_i, ni->nid);
- if (!e) {
- write_unlock(&nm_i->nat_tree_lock);
- goto retry;
- }
e->ni = *ni;
f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
} else if (new_blkaddr == NEW_ADDR) {
@@ -310,7 +309,7 @@ retry:
set_nat_flag(e, HAS_FSYNCED_INODE, true);
set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
}
- write_unlock(&nm_i->nat_tree_lock);
+ up_write(&nm_i->nat_tree_lock);
}
int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -320,7 +319,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
if (available_free_memory(sbi, NAT_ENTRIES))
return 0;
- write_lock(&nm_i->nat_tree_lock);
+ down_write(&nm_i->nat_tree_lock);
while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
struct nat_entry *ne;
ne = list_first_entry(&nm_i->nat_entries,
@@ -328,7 +327,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
__del_from_nat_cache(nm_i, ne);
nr_shrink--;
}
- write_unlock(&nm_i->nat_tree_lock);
+ up_write(&nm_i->nat_tree_lock);
return nr_shrink;
}
@@ -351,14 +350,14 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
ni->nid = nid;
/* Check nat cache */
- read_lock(&nm_i->nat_tree_lock);
+ down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (e) {
ni->ino = nat_get_ino(e);
ni->blk_addr = nat_get_blkaddr(e);
ni->version = nat_get_version(e);
}
- read_unlock(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
if (e)
return;
@@ -1298,16 +1297,22 @@ static int f2fs_write_node_page(struct page *page,
return 0;
}
- if (wbc->for_reclaim)
- goto redirty_out;
-
- down_read(&sbi->node_write);
+ if (wbc->for_reclaim) {
+ if (!down_read_trylock(&sbi->node_write))
+ goto redirty_out;
+ } else {
+ down_read(&sbi->node_write);
+ }
set_page_writeback(page);
write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
dec_page_count(sbi, F2FS_DIRTY_NODES);
up_read(&sbi->node_write);
unlock_page(page);
+
+ if (wbc->for_reclaim)
+ f2fs_submit_merged_bio(sbi, NODE, WRITE);
+
return 0;
redirty_out:
@@ -1410,13 +1415,13 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
if (build) {
/* do not add allocated nids */
- read_lock(&nm_i->nat_tree_lock);
+ down_read(&nm_i->nat_tree_lock);
ne = __lookup_nat_cache(nm_i, nid);
if (ne &&
(!get_nat_flag(ne, IS_CHECKPOINTED) ||
nat_get_blkaddr(ne) != NULL_ADDR))
allocated = true;
- read_unlock(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
if (allocated)
return 0;
}
@@ -1425,15 +1430,22 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
i->nid = nid;
i->state = NID_NEW;
+ if (radix_tree_preload(GFP_NOFS)) {
+ kmem_cache_free(free_nid_slab, i);
+ return 0;
+ }
+
spin_lock(&nm_i->free_nid_list_lock);
if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
spin_unlock(&nm_i->free_nid_list_lock);
+ radix_tree_preload_end();
kmem_cache_free(free_nid_slab, i);
return 0;
}
list_add_tail(&i->list, &nm_i->free_nid_list);
nm_i->fcnt++;
spin_unlock(&nm_i->free_nid_list_lock);
+ radix_tree_preload_end();
return 1;
}
@@ -1804,21 +1816,15 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
nid_t nid = le32_to_cpu(nid_in_journal(sum, i));
raw_ne = nat_in_journal(sum, i);
-retry:
- write_lock(&nm_i->nat_tree_lock);
- ne = __lookup_nat_cache(nm_i, nid);
- if (ne)
- goto found;
- ne = grab_nat_entry(nm_i, nid);
+ down_write(&nm_i->nat_tree_lock);
+ ne = __lookup_nat_cache(nm_i, nid);
if (!ne) {
- write_unlock(&nm_i->nat_tree_lock);
- goto retry;
+ ne = grab_nat_entry(nm_i, nid);
+ node_info_from_raw_nat(&ne->ni, &raw_ne);
}
- node_info_from_raw_nat(&ne->ni, &raw_ne);
-found:
__set_nat_cache_dirty(nm_i, ne);
- write_unlock(&nm_i->nat_tree_lock);
+ up_write(&nm_i->nat_tree_lock);
}
update_nats_in_cursum(sum, -i);
mutex_unlock(&curseg->curseg_mutex);
@@ -1889,10 +1895,10 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
}
raw_nat_from_node_info(raw_ne, &ne->ni);
- write_lock(&NM_I(sbi)->nat_tree_lock);
+ down_write(&NM_I(sbi)->nat_tree_lock);
nat_reset_flag(ne);
__clear_nat_cache_dirty(NM_I(sbi), ne);
- write_unlock(&NM_I(sbi)->nat_tree_lock);
+ up_write(&NM_I(sbi)->nat_tree_lock);
if (nat_get_blkaddr(ne) == NULL_ADDR)
add_free_nid(sbi, nid, false);
@@ -1903,10 +1909,10 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
else
f2fs_put_page(page, 1);
- if (!set->entry_cnt) {
- radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
- kmem_cache_free(nat_entry_set_slab, set);
- }
+ f2fs_bug_on(sbi, set->entry_cnt);
+
+ radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
+ kmem_cache_free(nat_entry_set_slab, set);
}
/*
@@ -1923,6 +1929,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
nid_t set_idx = 0;
LIST_HEAD(sets);
+ if (!nm_i->dirty_nat_cnt)
+ return;
/*
* if there are no enough space in journal to store dirty nat
* entries, remove all entries from journal and merge them
@@ -1931,9 +1939,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
remove_nats_in_journal(sbi);
- if (!nm_i->dirty_nat_cnt)
- return;
-
while ((found = __gang_lookup_nat_set(nm_i,
set_idx, NATVEC_SIZE, setvec))) {
unsigned idx;
@@ -1973,13 +1978,13 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->free_nid_list);
- INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
- INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_ATOMIC);
+ INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
+ INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
INIT_LIST_HEAD(&nm_i->nat_entries);
mutex_init(&nm_i->build_lock);
spin_lock_init(&nm_i->free_nid_list_lock);
- rwlock_init(&nm_i->nat_tree_lock);
+ init_rwsem(&nm_i->nat_tree_lock);
nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
@@ -2035,7 +2040,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
spin_unlock(&nm_i->free_nid_list_lock);
/* destroy nat cache */
- write_lock(&nm_i->nat_tree_lock);
+ down_write(&nm_i->nat_tree_lock);
while ((found = __gang_lookup_nat_cache(nm_i,
nid, NATVEC_SIZE, natvec))) {
unsigned idx;
@@ -2044,7 +2049,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
__del_from_nat_cache(nm_i, natvec[idx]);
}
f2fs_bug_on(sbi, nm_i->nat_cnt);
- write_unlock(&nm_i->nat_tree_lock);
+ up_write(&nm_i->nat_tree_lock);
kfree(nm_i->nat_bitmap);
sbi->nm_info = NULL;
@@ -2061,17 +2066,17 @@ int __init create_node_manager_caches(void)
free_nid_slab = f2fs_kmem_cache_create("free_nid",
sizeof(struct free_nid));
if (!free_nid_slab)
- goto destory_nat_entry;
+ goto destroy_nat_entry;
nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
sizeof(struct nat_entry_set));
if (!nat_entry_set_slab)
- goto destory_free_nid;
+ goto destroy_free_nid;
return 0;
-destory_free_nid:
+destroy_free_nid:
kmem_cache_destroy(free_nid_slab);
-destory_nat_entry:
+destroy_nat_entry:
kmem_cache_destroy(nat_entry_slab);
fail:
return -ENOMEM;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 8d5e6e0dd840..d10b6448a671 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -106,7 +106,8 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
enum mem_type {
FREE_NIDS, /* indicates the free nid list */
NAT_ENTRIES, /* indicates the cached nat entry */
- DIRTY_DENTS /* indicates dirty dentry pages */
+ DIRTY_DENTS, /* indicates dirty dentry pages */
+ INO_ENTRIES, /* indicates inode entries */
};
struct nat_entry_set {
@@ -192,10 +193,7 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
{
unsigned int block_off = NAT_BLOCK_OFFSET(start_nid);
- if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
- f2fs_clear_bit(block_off, nm_i->nat_bitmap);
- else
- f2fs_set_bit(block_off, nm_i->nat_bitmap);
+ f2fs_change_bit(block_off, nm_i->nat_bitmap);
}
static inline void fill_node_footer(struct page *page, nid_t nid,
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index ebd013225788..9160a37e1c7a 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -111,7 +111,7 @@ retry:
iput(einode);
goto out_unmap_put;
}
- f2fs_delete_entry(de, page, einode);
+ f2fs_delete_entry(de, page, dir, einode);
iput(einode);
goto retry;
}
@@ -129,7 +129,7 @@ retry:
goto out;
out_unmap_put:
- kunmap(page);
+ f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
out_err:
iput(dir);
@@ -170,13 +170,15 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+ ra_meta_pages(sbi, blkaddr, 1, META_POR);
+
while (1) {
struct fsync_inode_entry *entry;
if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
return 0;
- page = get_meta_page_ra(sbi, blkaddr);
+ page = get_meta_page(sbi, blkaddr);
if (cp_ver != cpver_of_node(page))
break;
@@ -227,6 +229,8 @@ next:
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
f2fs_put_page(page, 1);
+
+ ra_meta_pages_cond(sbi, blkaddr);
}
f2fs_put_page(page, 1);
return err;
@@ -436,7 +440,9 @@ static int recover_data(struct f2fs_sb_info *sbi,
if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
break;
- page = get_meta_page_ra(sbi, blkaddr);
+ ra_meta_pages_cond(sbi, blkaddr);
+
+ page = get_meta_page(sbi, blkaddr);
if (cp_ver != cpver_of_node(page)) {
f2fs_put_page(page, 1);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 923cb76fdc46..42607a679923 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -178,17 +178,47 @@ void register_inmem_page(struct inode *inode, struct page *page)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
struct inmem_pages *new;
+ int err;
+
+ SetPagePrivate(page);
new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
/* add atomic page indices to the list */
new->page = page;
INIT_LIST_HEAD(&new->list);
-
+retry:
/* increase reference count with clean state */
mutex_lock(&fi->inmem_lock);
+ err = radix_tree_insert(&fi->inmem_root, page->index, new);
+ if (err == -EEXIST) {
+ mutex_unlock(&fi->inmem_lock);
+ kmem_cache_free(inmem_entry_slab, new);
+ return;
+ } else if (err) {
+ mutex_unlock(&fi->inmem_lock);
+ goto retry;
+ }
get_page(page);
list_add_tail(&new->list, &fi->inmem_pages);
+ inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
+ mutex_unlock(&fi->inmem_lock);
+}
+
+void invalidate_inmem_page(struct inode *inode, struct page *page)
+{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct inmem_pages *cur;
+
+ mutex_lock(&fi->inmem_lock);
+ cur = radix_tree_lookup(&fi->inmem_root, page->index);
+ if (cur) {
+ radix_tree_delete(&fi->inmem_root, cur->page->index);
+ f2fs_put_page(cur->page, 0);
+ list_del(&cur->list);
+ kmem_cache_free(inmem_entry_slab, cur);
+ dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
+ }
mutex_unlock(&fi->inmem_lock);
}
@@ -203,7 +233,16 @@ void commit_inmem_pages(struct inode *inode, bool abort)
.rw = WRITE_SYNC,
};
- f2fs_balance_fs(sbi);
+ /*
+ * The abort is true only when f2fs_evict_inode is called.
+ * Basically, the f2fs_evict_inode doesn't produce any data writes, so
+ * that we don't need to call f2fs_balance_fs.
+ * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this
+ * inode becomes free by iget_locked in f2fs_iget.
+ */
+ if (!abort)
+ f2fs_balance_fs(sbi);
+
f2fs_lock_op(sbi);
mutex_lock(&fi->inmem_lock);
@@ -216,9 +255,11 @@ void commit_inmem_pages(struct inode *inode, bool abort)
do_write_data_page(cur->page, &fio);
submit_bio = true;
}
+ radix_tree_delete(&fi->inmem_root, cur->page->index);
f2fs_put_page(cur->page, 1);
list_del(&cur->list);
kmem_cache_free(inmem_entry_slab, cur);
+ dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
}
if (submit_bio)
f2fs_submit_merged_bio(sbi, DATA, WRITE);
@@ -248,7 +289,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
{
/* check the # of cached NAT entries and prefree segments */
if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
- excess_prefree_segs(sbi))
+ excess_prefree_segs(sbi) ||
+ available_free_memory(sbi, INO_ENTRIES))
f2fs_sync_fs(sbi->sb, true);
}
@@ -441,10 +483,33 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
}
}
-static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+static void __add_discard_entry(struct f2fs_sb_info *sbi,
+ struct cp_control *cpc, unsigned int start, unsigned int end)
{
struct list_head *head = &SM_I(sbi)->discard_list;
- struct discard_entry *new;
+ struct discard_entry *new, *last;
+
+ if (!list_empty(head)) {
+ last = list_last_entry(head, struct discard_entry, list);
+ if (START_BLOCK(sbi, cpc->trim_start) + start ==
+ last->blkaddr + last->len) {
+ last->len += end - start;
+ goto done;
+ }
+ }
+
+ new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
+ INIT_LIST_HEAD(&new->list);
+ new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
+ new->len = end - start;
+ list_add_tail(&new->list, head);
+done:
+ SM_I(sbi)->nr_discards += end - start;
+ cpc->trimmed += end - start;
+}
+
+static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+{
int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
int max_blocks = sbi->blocks_per_seg;
struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
@@ -473,13 +538,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
}
mutex_unlock(&dirty_i->seglist_lock);
- new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
- INIT_LIST_HEAD(&new->list);
- new->blkaddr = START_BLOCK(sbi, cpc->trim_start);
- new->len = sbi->blocks_per_seg;
- list_add_tail(&new->list, head);
- SM_I(sbi)->nr_discards += sbi->blocks_per_seg;
- cpc->trimmed += sbi->blocks_per_seg;
+ __add_discard_entry(sbi, cpc, 0, sbi->blocks_per_seg);
return;
}
@@ -489,7 +548,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
for (i = 0; i < entries; i++)
- dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
+ dmap[i] = ~(cur_map[i] | ckpt_map[i]);
while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
start = __find_rev_next_bit(dmap, max_blocks, end + 1);
@@ -501,14 +560,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (end - start < cpc->trim_minlen)
continue;
- new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
- INIT_LIST_HEAD(&new->list);
- new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
- new->len = end - start;
- cpc->trimmed += end - start;
-
- list_add_tail(&new->list, head);
- SM_I(sbi)->nr_discards += end - start;
+ __add_discard_entry(sbi, cpc, start, end);
}
}
@@ -620,10 +672,10 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
/* Update valid block bitmap */
if (del > 0) {
- if (f2fs_set_bit(offset, se->cur_valid_map))
+ if (f2fs_test_and_set_bit(offset, se->cur_valid_map))
f2fs_bug_on(sbi, 1);
} else {
- if (!f2fs_clear_bit(offset, se->cur_valid_map))
+ if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map))
f2fs_bug_on(sbi, 1);
}
if (!f2fs_test_bit(offset, se->ckpt_valid_map))
@@ -1004,6 +1056,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
range->len < sbi->blocksize)
return -EINVAL;
+ cpc.trimmed = 0;
if (end <= MAIN_BLKADDR(sbi))
goto out;
@@ -1015,10 +1068,11 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
cpc.trim_start = start_segno;
cpc.trim_end = end_segno;
cpc.trim_minlen = range->minlen >> sbi->log_blocksize;
- cpc.trimmed = 0;
/* do checkpoint to issue discard commands safely */
+ mutex_lock(&sbi->gc_mutex);
write_checkpoint(sbi, &cpc);
+ mutex_unlock(&sbi->gc_mutex);
out:
range->len = cpc.trimmed << sbi->log_blocksize;
return 0;
@@ -1050,8 +1104,8 @@ static int __get_segment_type_4(struct page *page, enum page_type p_type)
else
return CURSEG_COLD_DATA;
} else {
- if (IS_DNODE(page) && !is_cold_node(page))
- return CURSEG_HOT_NODE;
+ if (IS_DNODE(page) && is_cold_node(page))
+ return CURSEG_WARM_NODE;
else
return CURSEG_COLD_NODE;
}
@@ -1524,17 +1578,7 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
unsigned int segno)
{
- struct sit_info *sit_i = SIT_I(sbi);
- unsigned int offset = SIT_BLOCK_OFFSET(segno);
- block_t blk_addr = sit_i->sit_base_addr + offset;
-
- check_seg_range(sbi, segno);
-
- /* calculate sit block address */
- if (f2fs_test_bit(offset, sit_i->sit_bitmap))
- blk_addr += sit_i->sit_blocks;
-
- return get_meta_page(sbi, blk_addr);
+ return get_meta_page(sbi, current_sit_addr(sbi, segno));
}
static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
@@ -1687,7 +1731,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
* #2, flush sit entries to sit page.
*/
list_for_each_entry_safe(ses, tmp, head, set_list) {
- struct page *page;
+ struct page *page = NULL;
struct f2fs_sit_block *raw_sit = NULL;
unsigned int start_segno = ses->start_segno;
unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
@@ -2200,7 +2244,7 @@ int __init create_segment_manager_caches(void)
goto fail;
sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
- sizeof(struct nat_entry_set));
+ sizeof(struct sit_entry_set));
if (!sit_entry_set_slab)
goto destory_discard_entry;
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 2495bec1c621..7f327c0ba4e3 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -657,10 +657,7 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start)
{
unsigned int block_off = SIT_BLOCK_OFFSET(start);
- if (f2fs_test_bit(block_off, sit_i->sit_bitmap))
- f2fs_clear_bit(block_off, sit_i->sit_bitmap);
- else
- f2fs_set_bit(block_off, sit_i->sit_bitmap);
+ f2fs_change_bit(block_off, sit_i->sit_bitmap);
}
static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi)
@@ -714,6 +711,9 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
*/
static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
{
+ if (sbi->sb->s_bdi->dirty_exceeded)
+ return 0;
+
if (type == DATA)
return sbi->blocks_per_seg;
else if (type == NODE)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 41d6f700f4ee..f71421d70475 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -51,8 +51,10 @@ enum {
Opt_disable_ext_identify,
Opt_inline_xattr,
Opt_inline_data,
+ Opt_inline_dentry,
Opt_flush_merge,
Opt_nobarrier,
+ Opt_fastboot,
Opt_err,
};
@@ -69,8 +71,10 @@ static match_table_t f2fs_tokens = {
{Opt_disable_ext_identify, "disable_ext_identify"},
{Opt_inline_xattr, "inline_xattr"},
{Opt_inline_data, "inline_data"},
+ {Opt_inline_dentry, "inline_dentry"},
{Opt_flush_merge, "flush_merge"},
{Opt_nobarrier, "nobarrier"},
+ {Opt_fastboot, "fastboot"},
{Opt_err, NULL},
};
@@ -340,12 +344,18 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_inline_data:
set_opt(sbi, INLINE_DATA);
break;
+ case Opt_inline_dentry:
+ set_opt(sbi, INLINE_DENTRY);
+ break;
case Opt_flush_merge:
set_opt(sbi, FLUSH_MERGE);
break;
case Opt_nobarrier:
set_opt(sbi, NOBARRIER);
break;
+ case Opt_fastboot:
+ set_opt(sbi, FASTBOOT);
+ break;
default:
f2fs_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" or missing value",
@@ -373,6 +383,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
fi->i_advise = 0;
rwlock_init(&fi->ext.ext_lock);
init_rwsem(&fi->i_sem);
+ INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS);
INIT_LIST_HEAD(&fi->inmem_pages);
mutex_init(&fi->inmem_lock);
@@ -473,9 +484,9 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
trace_f2fs_sync_fs(sb, sync);
if (sync) {
- struct cp_control cpc = {
- .reason = CP_SYNC,
- };
+ struct cp_control cpc;
+
+ cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC;
mutex_lock(&sbi->gc_mutex);
write_checkpoint(sbi, &cpc);
mutex_unlock(&sbi->gc_mutex);
@@ -562,10 +573,14 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",disable_ext_identify");
if (test_opt(sbi, INLINE_DATA))
seq_puts(seq, ",inline_data");
+ if (test_opt(sbi, INLINE_DENTRY))
+ seq_puts(seq, ",inline_dentry");
if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
seq_puts(seq, ",flush_merge");
if (test_opt(sbi, NOBARRIER))
seq_puts(seq, ",nobarrier");
+ if (test_opt(sbi, FASTBOOT))
+ seq_puts(seq, ",fastboot");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);
return 0;
@@ -654,7 +669,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
f2fs_sync_fs(sb, 1);
need_restart_gc = true;
}
- } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) {
+ } else if (!sbi->gc_thread) {
err = start_gc_thread(sbi);
if (err)
goto restore_opts;
@@ -667,7 +682,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
*/
if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
destroy_flush_cmd_control(sbi);
- } else if (test_opt(sbi, FLUSH_MERGE) && !SM_I(sbi)->cmd_control_info) {
+ } else if (!SM_I(sbi)->cmd_control_info) {
err = create_flush_cmd_control(sbi);
if (err)
goto restore_gc;
@@ -922,7 +937,7 @@ retry:
static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
{
struct f2fs_sb_info *sbi;
- struct f2fs_super_block *raw_super;
+ struct f2fs_super_block *raw_super = NULL;
struct buffer_head *raw_super_buf;
struct inode *root;
long err = -EINVAL;
@@ -1123,7 +1138,7 @@ try_onemore:
* If filesystem is not mounted as read-only then
* do start the gc_thread.
*/
- if (!f2fs_readonly(sb)) {
+ if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) {
/* After POR, we can run background GC thread.*/
err = start_gc_thread(sbi);
if (err)
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index deca8728117b..5072bf9ae0ef 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -83,7 +83,7 @@ static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name,
}
if (strcmp(name, "") == 0)
return -EINVAL;
- return f2fs_getxattr(dentry->d_inode, type, name, buffer, size);
+ return f2fs_getxattr(dentry->d_inode, type, name, buffer, size, NULL);
}
static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name,
@@ -398,7 +398,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
}
int f2fs_getxattr(struct inode *inode, int index, const char *name,
- void *buffer, size_t buffer_size)
+ void *buffer, size_t buffer_size, struct page *ipage)
{
struct f2fs_xattr_entry *entry;
void *base_addr;
@@ -412,7 +412,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
if (len > F2FS_NAME_LEN)
return -ERANGE;
- base_addr = read_all_xattrs(inode, NULL);
+ base_addr = read_all_xattrs(inode, ipage);
if (!base_addr)
return -ENOMEM;
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index 34ab7dbcf5e3..969d792ca362 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -115,7 +115,8 @@ extern const struct xattr_handler *f2fs_xattr_handlers[];
extern int f2fs_setxattr(struct inode *, int, const char *,
const void *, size_t, struct page *, int);
-extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t);
+extern int f2fs_getxattr(struct inode *, int, const char *, void *,
+ size_t, struct page *);
extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t);
#else
@@ -126,7 +127,8 @@ static inline int f2fs_setxattr(struct inode *inode, int index,
return -EOPNOTSUPP;
}
static inline int f2fs_getxattr(struct inode *inode, int index,
- const char *name, void *buffer, size_t buffer_size)
+ const char *name, void *buffer,
+ size_t buffer_size, struct page *dpage)
{
return -EOPNOTSUPP;
}
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 3963ede84eb0..c5d6bb939d19 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -702,10 +702,11 @@ static int fat_readdir(struct file *file, struct dir_context *ctx)
}
#define FAT_IOCTL_FILLDIR_FUNC(func, dirent_type) \
-static int func(void *__buf, const char *name, int name_len, \
+static int func(struct dir_context *ctx, const char *name, int name_len, \
loff_t offset, u64 ino, unsigned int d_type) \
{ \
- struct fat_ioctl_filldir_callback *buf = __buf; \
+ struct fat_ioctl_filldir_callback *buf = \
+ container_of(ctx, struct fat_ioctl_filldir_callback, ctx); \
struct dirent_type __user *d1 = buf->dirent; \
struct dirent_type __user *d2 = d1 + 1; \
\
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index e0c4ba39a377..64e295e8ff38 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -370,6 +370,7 @@ extern int fat_file_fsync(struct file *file, loff_t start, loff_t end,
int datasync);
/* fat/inode.c */
+extern int fat_block_truncate_page(struct inode *inode, loff_t from);
extern void fat_attach(struct inode *inode, loff_t i_pos);
extern void fat_detach(struct inode *inode);
extern struct inode *fat_iget(struct super_block *sb, loff_t i_pos);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 85f79a89e747..8429c68e3057 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -443,6 +443,9 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
}
if (attr->ia_valid & ATTR_SIZE) {
+ error = fat_block_truncate_page(inode, attr->ia_size);
+ if (error)
+ goto out;
down_write(&MSDOS_I(inode)->truncate_lock);
truncate_setsize(inode, attr->ia_size);
fat_truncate_blocks(inode, attr->ia_size);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 756aead10d96..7b41a2dcdd76 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -294,6 +294,18 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
return blocknr;
}
+/*
+ * fat_block_truncate_page() zeroes out a mapping from file offset `from'
+ * up to the end of the block which corresponds to `from'.
+ * This is required during truncate to physically zeroout the tail end
+ * of that block so it doesn't yield old data if the file is later grown.
+ * Also, avoid causing failure from fsx for cases of "data past EOF"
+ */
+int fat_block_truncate_page(struct inode *inode, loff_t from)
+{
+ return block_truncate_page(inode->i_mapping, from, fat_get_block);
+}
+
static const struct address_space_operations fat_aops = {
.readpage = fat_readpage,
.readpages = fat_readpages,
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 6df8d3d885e5..b8b92c2f9683 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -736,7 +736,12 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
}
alias = d_find_alias(inode);
- if (alias && !vfat_d_anon_disconn(alias)) {
+ /*
+ * Checking "alias->d_parent == dentry->d_parent" to make sure
+ * FS is not corrupted (especially double linked dir).
+ */
+ if (alias && alias->d_parent == dentry->d_parent &&
+ !vfat_d_anon_disconn(alias)) {
/*
* This inode has non anonymous-DCACHE_DISCONNECTED
* dentry. This means, the user did ->lookup() by an
@@ -755,12 +760,9 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
out:
mutex_unlock(&MSDOS_SB(sb)->s_lock);
- dentry->d_time = dentry->d_parent->d_inode->i_version;
- dentry = d_splice_alias(inode, dentry);
- if (dentry)
- dentry->d_time = dentry->d_parent->d_inode->i_version;
- return dentry;
-
+ if (!inode)
+ dentry->d_time = dir->i_version;
+ return d_splice_alias(inode, dentry);
error:
mutex_unlock(&MSDOS_SB(sb)->s_lock);
return ERR_PTR(err);
@@ -793,7 +795,6 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
/* timestamp is already written, so mark_inode_dirty() is unneeded. */
- dentry->d_time = dentry->d_parent->d_inode->i_version;
d_instantiate(dentry, inode);
out:
mutex_unlock(&MSDOS_SB(sb)->s_lock);
@@ -824,6 +825,7 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry)
clear_nlink(inode);
inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC;
fat_detach(inode);
+ dentry->d_time = dir->i_version;
out:
mutex_unlock(&MSDOS_SB(sb)->s_lock);
@@ -849,6 +851,7 @@ static int vfat_unlink(struct inode *dir, struct dentry *dentry)
clear_nlink(inode);
inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC;
fat_detach(inode);
+ dentry->d_time = dir->i_version;
out:
mutex_unlock(&MSDOS_SB(sb)->s_lock);
@@ -889,7 +892,6 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
/* timestamp is already written, so mark_inode_dirty() is unneeded. */
- dentry->d_time = dentry->d_parent->d_inode->i_version;
d_instantiate(dentry, inode);
mutex_unlock(&MSDOS_SB(sb)->s_lock);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 99d440a4a6ba..ee85cd4e136a 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -740,14 +740,15 @@ static int __init fcntl_init(void)
* Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
* is defined as O_NONBLOCK on some platforms and not on others.
*/
- BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
+ BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
O_RDONLY | O_WRONLY | O_RDWR |
O_CREAT | O_EXCL | O_NOCTTY |
O_TRUNC | O_APPEND | /* O_NONBLOCK | */
__O_SYNC | O_DSYNC | FASYNC |
O_DIRECT | O_LARGEFILE | O_DIRECTORY |
O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
- __FMODE_EXEC | O_PATH | __O_TMPFILE
+ __FMODE_EXEC | O_PATH | __O_TMPFILE |
+ __FMODE_NONOTIFY
));
fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/file.c b/fs/file.c
index ab3eb6a88239..ee738ea028fa 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -869,7 +869,7 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes)
struct file *file = fget_raw(fildes);
if (file) {
- ret = get_unused_fd();
+ ret = get_unused_fd_flags(0);
if (ret >= 0)
fd_install(ret, file);
else
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ef9bef118342..2d609a5fbfea 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -479,12 +479,28 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* write_inode()
*/
spin_lock(&inode->i_lock);
- /* Clear I_DIRTY_PAGES if we've written out all dirty pages */
- if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
- inode->i_state &= ~I_DIRTY_PAGES;
+
dirty = inode->i_state & I_DIRTY;
- inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
+ inode->i_state &= ~I_DIRTY;
+
+ /*
+ * Paired with smp_mb() in __mark_inode_dirty(). This allows
+ * __mark_inode_dirty() to test i_state without grabbing i_lock -
+ * either they see the I_DIRTY bits cleared or we see the dirtied
+ * inode.
+ *
+ * I_DIRTY_PAGES is always cleared together above even if @mapping
+ * still has dirty pages. The flag is reinstated after smp_mb() if
+ * necessary. This guarantees that either __mark_inode_dirty()
+ * sees clear I_DIRTY_PAGES or we see PAGECACHE_TAG_DIRTY.
+ */
+ smp_mb();
+
+ if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
+ inode->i_state |= I_DIRTY_PAGES;
+
spin_unlock(&inode->i_lock);
+
/* Don't write the inode if only I_DIRTY_PAGES was set */
if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
int err = write_inode(inode, wbc);
@@ -1148,12 +1164,11 @@ void __mark_inode_dirty(struct inode *inode, int flags)
}
/*
- * make sure that changes are seen by all cpus before we test i_state
- * -- mikulas
+ * Paired with smp_mb() in __writeback_single_inode() for the
+ * following lockless i_state test. See there for details.
*/
smp_mb();
- /* avoid the locking if we can */
if ((inode->i_state & flags) == flags)
return;
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 966ace8b243f..28d0c7abba1c 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -415,7 +415,7 @@ err_unlock:
err_region:
unregister_chrdev_region(devt, 1);
err:
- fuse_conn_kill(fc);
+ fuse_abort_conn(fc);
goto out;
}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ca887314aba9..ba1107977f2e 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -511,6 +511,35 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
}
EXPORT_SYMBOL_GPL(fuse_request_send);
+ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
+{
+ struct fuse_req *req;
+ ssize_t ret;
+
+ req = fuse_get_req(fc, 0);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ req->in.h.opcode = args->in.h.opcode;
+ req->in.h.nodeid = args->in.h.nodeid;
+ req->in.numargs = args->in.numargs;
+ memcpy(req->in.args, args->in.args,
+ args->in.numargs * sizeof(struct fuse_in_arg));
+ req->out.argvar = args->out.argvar;
+ req->out.numargs = args->out.numargs;
+ memcpy(req->out.args, args->out.args,
+ args->out.numargs * sizeof(struct fuse_arg));
+ fuse_request_send(fc, req);
+ ret = req->out.h.error;
+ if (!ret && args->out.argvar) {
+ BUG_ON(args->out.numargs != 1);
+ ret = req->out.args[0].size;
+ }
+ fuse_put_request(fc, req);
+
+ return ret;
+}
+
static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
struct fuse_req *req)
{
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index dbab798f5caf..252b8a5de8b5 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -145,22 +145,22 @@ static void fuse_invalidate_entry(struct dentry *entry)
fuse_invalidate_entry_cache(entry);
}
-static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req,
+static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
u64 nodeid, struct qstr *name,
struct fuse_entry_out *outarg)
{
memset(outarg, 0, sizeof(struct fuse_entry_out));
- req->in.h.opcode = FUSE_LOOKUP;
- req->in.h.nodeid = nodeid;
- req->in.numargs = 1;
- req->in.args[0].size = name->len + 1;
- req->in.args[0].value = name->name;
- req->out.numargs = 1;
+ args->in.h.opcode = FUSE_LOOKUP;
+ args->in.h.nodeid = nodeid;
+ args->in.numargs = 1;
+ args->in.args[0].size = name->len + 1;
+ args->in.args[0].value = name->name;
+ args->out.numargs = 1;
if (fc->minor < 9)
- req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+ args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
else
- req->out.args[0].size = sizeof(struct fuse_entry_out);
- req->out.args[0].value = outarg;
+ args->out.args[0].size = sizeof(struct fuse_entry_out);
+ args->out.args[0].value = outarg;
}
u64 fuse_get_attr_version(struct fuse_conn *fc)
@@ -200,9 +200,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
goto invalid;
else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
(flags & LOOKUP_REVAL)) {
- int err;
struct fuse_entry_out outarg;
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_forget_link *forget;
u64 attr_version;
@@ -215,31 +214,23 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
goto out;
fc = get_fuse_conn(inode);
- req = fuse_get_req_nopages(fc);
- ret = PTR_ERR(req);
- if (IS_ERR(req))
- goto out;
forget = fuse_alloc_forget();
- if (!forget) {
- fuse_put_request(fc, req);
- ret = -ENOMEM;
+ ret = -ENOMEM;
+ if (!forget)
goto out;
- }
attr_version = fuse_get_attr_version(fc);
parent = dget_parent(entry);
- fuse_lookup_init(fc, req, get_node_id(parent->d_inode),
+ fuse_lookup_init(fc, &args, get_node_id(parent->d_inode),
&entry->d_name, &outarg);
- fuse_request_send(fc, req);
+ ret = fuse_simple_request(fc, &args);
dput(parent);
- err = req->out.h.error;
- fuse_put_request(fc, req);
/* Zero nodeid is same as -ENOENT */
- if (!err && !outarg.nodeid)
- err = -ENOENT;
- if (!err) {
+ if (!ret && !outarg.nodeid)
+ ret = -ENOENT;
+ if (!ret) {
fi = get_fuse_inode(inode);
if (outarg.nodeid != get_node_id(inode)) {
fuse_queue_forget(fc, forget, outarg.nodeid, 1);
@@ -250,7 +241,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
spin_unlock(&fc->lock);
}
kfree(forget);
- if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
+ if (ret == -ENOMEM)
+ goto out;
+ if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
goto invalid;
fuse_change_attributes(inode, &outarg.attr,
@@ -296,7 +289,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
struct fuse_entry_out *outarg, struct inode **inode)
{
struct fuse_conn *fc = get_fuse_conn_super(sb);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_forget_link *forget;
u64 attr_version;
int err;
@@ -306,24 +299,16 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
if (name->len > FUSE_NAME_MAX)
goto out;
- req = fuse_get_req_nopages(fc);
- err = PTR_ERR(req);
- if (IS_ERR(req))
- goto out;
forget = fuse_alloc_forget();
err = -ENOMEM;
- if (!forget) {
- fuse_put_request(fc, req);
+ if (!forget)
goto out;
- }
attr_version = fuse_get_attr_version(fc);
- fuse_lookup_init(fc, req, nodeid, name, outarg);
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ fuse_lookup_init(fc, &args, nodeid, name, outarg);
+ err = fuse_simple_request(fc, &args);
/* Zero nodeid is same as -ENOENT, but with valid timeout */
if (err || !outarg->nodeid)
goto out_put_forget;
@@ -372,7 +357,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
if (inode && get_node_id(inode) == FUSE_ROOT_ID)
goto out_iput;
- newent = d_materialise_unique(entry, inode);
+ newent = d_splice_alias(inode, entry);
err = PTR_ERR(newent);
if (IS_ERR(newent))
goto out_err;
@@ -405,7 +390,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
int err;
struct inode *inode;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_forget_link *forget;
struct fuse_create_in inarg;
struct fuse_open_out outopen;
@@ -420,15 +405,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
if (!forget)
goto out_err;
- req = fuse_get_req_nopages(fc);
- err = PTR_ERR(req);
- if (IS_ERR(req))
- goto out_put_forget_req;
-
err = -ENOMEM;
ff = fuse_file_alloc(fc);
if (!ff)
- goto out_put_request;
+ goto out_put_forget_req;
if (!fc->dont_mask)
mode &= ~current_umask();
@@ -439,24 +419,23 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
inarg.flags = flags;
inarg.mode = mode;
inarg.umask = current_umask();
- req->in.h.opcode = FUSE_CREATE;
- req->in.h.nodeid = get_node_id(dir);
- req->in.numargs = 2;
- req->in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) :
+ args.in.h.opcode = FUSE_CREATE;
+ args.in.h.nodeid = get_node_id(dir);
+ args.in.numargs = 2;
+ args.in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) :
sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->in.args[1].size = entry->d_name.len + 1;
- req->in.args[1].value = entry->d_name.name;
- req->out.numargs = 2;
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = entry->d_name.len + 1;
+ args.in.args[1].value = entry->d_name.name;
+ args.out.numargs = 2;
if (fc->minor < 9)
- req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+ args.out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
else
- req->out.args[0].size = sizeof(outentry);
- req->out.args[0].value = &outentry;
- req->out.args[1].size = sizeof(outopen);
- req->out.args[1].value = &outopen;
- fuse_request_send(fc, req);
- err = req->out.h.error;
+ args.out.args[0].size = sizeof(outentry);
+ args.out.args[0].value = &outentry;
+ args.out.args[1].size = sizeof(outopen);
+ args.out.args[1].value = &outopen;
+ err = fuse_simple_request(fc, &args);
if (err)
goto out_free_ff;
@@ -464,7 +443,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
goto out_free_ff;
- fuse_put_request(fc, req);
ff->fh = outopen.fh;
ff->nodeid = outentry.nodeid;
ff->open_flags = outopen.open_flags;
@@ -492,8 +470,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
out_free_ff:
fuse_file_free(ff);
-out_put_request:
- fuse_put_request(fc, req);
out_put_forget_req:
kfree(forget);
out_err:
@@ -547,7 +523,7 @@ no_open:
/*
* Code shared between mknod, mkdir, symlink and link
*/
-static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
+static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
struct inode *dir, struct dentry *entry,
umode_t mode)
{
@@ -557,22 +533,18 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
struct fuse_forget_link *forget;
forget = fuse_alloc_forget();
- if (!forget) {
- fuse_put_request(fc, req);
+ if (!forget)
return -ENOMEM;
- }
memset(&outarg, 0, sizeof(outarg));
- req->in.h.nodeid = get_node_id(dir);
- req->out.numargs = 1;
+ args->in.h.nodeid = get_node_id(dir);
+ args->out.numargs = 1;
if (fc->minor < 9)
- req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+ args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
else
- req->out.args[0].size = sizeof(outarg);
- req->out.args[0].value = &outarg;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args->out.args[0].size = sizeof(outarg);
+ args->out.args[0].value = &outarg;
+ err = fuse_simple_request(fc, args);
if (err)
goto out_put_forget_req;
@@ -609,9 +581,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
{
struct fuse_mknod_in inarg;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ FUSE_ARGS(args);
if (!fc->dont_mask)
mode &= ~current_umask();
@@ -620,14 +590,14 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
inarg.mode = mode;
inarg.rdev = new_encode_dev(rdev);
inarg.umask = current_umask();
- req->in.h.opcode = FUSE_MKNOD;
- req->in.numargs = 2;
- req->in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE :
+ args.in.h.opcode = FUSE_MKNOD;
+ args.in.numargs = 2;
+ args.in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE :
sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->in.args[1].size = entry->d_name.len + 1;
- req->in.args[1].value = entry->d_name.name;
- return create_new_entry(fc, req, dir, entry, mode);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = entry->d_name.len + 1;
+ args.in.args[1].value = entry->d_name.name;
+ return create_new_entry(fc, &args, dir, entry, mode);
}
static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
@@ -640,9 +610,7 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
{
struct fuse_mkdir_in inarg;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ FUSE_ARGS(args);
if (!fc->dont_mask)
mode &= ~current_umask();
@@ -650,13 +618,13 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
memset(&inarg, 0, sizeof(inarg));
inarg.mode = mode;
inarg.umask = current_umask();
- req->in.h.opcode = FUSE_MKDIR;
- req->in.numargs = 2;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->in.args[1].size = entry->d_name.len + 1;
- req->in.args[1].value = entry->d_name.name;
- return create_new_entry(fc, req, dir, entry, S_IFDIR);
+ args.in.h.opcode = FUSE_MKDIR;
+ args.in.numargs = 2;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = entry->d_name.len + 1;
+ args.in.args[1].value = entry->d_name.name;
+ return create_new_entry(fc, &args, dir, entry, S_IFDIR);
}
static int fuse_symlink(struct inode *dir, struct dentry *entry,
@@ -664,17 +632,15 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
{
struct fuse_conn *fc = get_fuse_conn(dir);
unsigned len = strlen(link) + 1;
- struct fuse_req *req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ FUSE_ARGS(args);
- req->in.h.opcode = FUSE_SYMLINK;
- req->in.numargs = 2;
- req->in.args[0].size = entry->d_name.len + 1;
- req->in.args[0].value = entry->d_name.name;
- req->in.args[1].size = len;
- req->in.args[1].value = link;
- return create_new_entry(fc, req, dir, entry, S_IFLNK);
+ args.in.h.opcode = FUSE_SYMLINK;
+ args.in.numargs = 2;
+ args.in.args[0].size = entry->d_name.len + 1;
+ args.in.args[0].value = entry->d_name.name;
+ args.in.args[1].size = len;
+ args.in.args[1].value = link;
+ return create_new_entry(fc, &args, dir, entry, S_IFLNK);
}
static inline void fuse_update_ctime(struct inode *inode)
@@ -689,18 +655,14 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
{
int err;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- req->in.h.opcode = FUSE_UNLINK;
- req->in.h.nodeid = get_node_id(dir);
- req->in.numargs = 1;
- req->in.args[0].size = entry->d_name.len + 1;
- req->in.args[0].value = entry->d_name.name;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ FUSE_ARGS(args);
+
+ args.in.h.opcode = FUSE_UNLINK;
+ args.in.h.nodeid = get_node_id(dir);
+ args.in.numargs = 1;
+ args.in.args[0].size = entry->d_name.len + 1;
+ args.in.args[0].value = entry->d_name.name;
+ err = fuse_simple_request(fc, &args);
if (!err) {
struct inode *inode = entry->d_inode;
struct fuse_inode *fi = get_fuse_inode(inode);
@@ -729,18 +691,14 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
{
int err;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- req->in.h.opcode = FUSE_RMDIR;
- req->in.h.nodeid = get_node_id(dir);
- req->in.numargs = 1;
- req->in.args[0].size = entry->d_name.len + 1;
- req->in.args[0].value = entry->d_name.name;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ FUSE_ARGS(args);
+
+ args.in.h.opcode = FUSE_RMDIR;
+ args.in.h.nodeid = get_node_id(dir);
+ args.in.numargs = 1;
+ args.in.args[0].size = entry->d_name.len + 1;
+ args.in.args[0].value = entry->d_name.name;
+ err = fuse_simple_request(fc, &args);
if (!err) {
clear_nlink(entry->d_inode);
fuse_invalidate_attr(dir);
@@ -757,27 +715,21 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
int err;
struct fuse_rename2_in inarg;
struct fuse_conn *fc = get_fuse_conn(olddir);
- struct fuse_req *req;
-
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ FUSE_ARGS(args);
memset(&inarg, 0, argsize);
inarg.newdir = get_node_id(newdir);
inarg.flags = flags;
- req->in.h.opcode = opcode;
- req->in.h.nodeid = get_node_id(olddir);
- req->in.numargs = 3;
- req->in.args[0].size = argsize;
- req->in.args[0].value = &inarg;
- req->in.args[1].size = oldent->d_name.len + 1;
- req->in.args[1].value = oldent->d_name.name;
- req->in.args[2].size = newent->d_name.len + 1;
- req->in.args[2].value = newent->d_name.name;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.in.h.opcode = opcode;
+ args.in.h.nodeid = get_node_id(olddir);
+ args.in.numargs = 3;
+ args.in.args[0].size = argsize;
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = oldent->d_name.len + 1;
+ args.in.args[1].value = oldent->d_name.name;
+ args.in.args[2].size = newent->d_name.len + 1;
+ args.in.args[2].value = newent->d_name.name;
+ err = fuse_simple_request(fc, &args);
if (!err) {
/* ctime changes */
fuse_invalidate_attr(oldent->d_inode);
@@ -849,19 +801,17 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
struct fuse_link_in inarg;
struct inode *inode = entry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ FUSE_ARGS(args);
memset(&inarg, 0, sizeof(inarg));
inarg.oldnodeid = get_node_id(inode);
- req->in.h.opcode = FUSE_LINK;
- req->in.numargs = 2;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->in.args[1].size = newent->d_name.len + 1;
- req->in.args[1].value = newent->d_name.name;
- err = create_new_entry(fc, req, newdir, newent, inode->i_mode);
+ args.in.h.opcode = FUSE_LINK;
+ args.in.numargs = 2;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = newent->d_name.len + 1;
+ args.in.args[1].value = newent->d_name.name;
+ err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
/* Contrary to "normal" filesystems it can happen that link
makes two "logical" inodes point to the same "physical"
inode. We invalidate the attributes of the old one, so it
@@ -929,13 +879,9 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
struct fuse_getattr_in inarg;
struct fuse_attr_out outarg;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
u64 attr_version;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
attr_version = fuse_get_attr_version(fc);
memset(&inarg, 0, sizeof(inarg));
@@ -947,20 +893,18 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
inarg.getattr_flags |= FUSE_GETATTR_FH;
inarg.fh = ff->fh;
}
- req->in.h.opcode = FUSE_GETATTR;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->out.numargs = 1;
+ args.in.h.opcode = FUSE_GETATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.out.numargs = 1;
if (fc->minor < 9)
- req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+ args.out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
else
- req->out.args[0].size = sizeof(outarg);
- req->out.args[0].value = &outarg;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ err = fuse_simple_request(fc, &args);
if (!err) {
if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
make_bad_inode(inode);
@@ -1102,7 +1046,7 @@ int fuse_allow_current_process(struct fuse_conn *fc)
static int fuse_access(struct inode *inode, int mask)
{
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_access_in inarg;
int err;
@@ -1111,20 +1055,14 @@ static int fuse_access(struct inode *inode, int mask)
if (fc->no_access)
return 0;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
memset(&inarg, 0, sizeof(inarg));
inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
- req->in.h.opcode = FUSE_ACCESS;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.in.h.opcode = FUSE_ACCESS;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ err = fuse_simple_request(fc, &args);
if (err == -ENOSYS) {
fc->no_access = 1;
err = 0;
@@ -1320,7 +1258,7 @@ static int fuse_direntplus_link(struct file *file,
if (!inode)
goto out;
- alias = d_materialise_unique(dentry, inode);
+ alias = d_splice_alias(inode, dentry);
err = PTR_ERR(alias);
if (IS_ERR(alias))
goto out;
@@ -1445,31 +1383,27 @@ static char *read_link(struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req = fuse_get_req_nopages(fc);
+ FUSE_ARGS(args);
char *link;
-
- if (IS_ERR(req))
- return ERR_CAST(req);
+ ssize_t ret;
link = (char *) __get_free_page(GFP_KERNEL);
- if (!link) {
- link = ERR_PTR(-ENOMEM);
- goto out;
- }
- req->in.h.opcode = FUSE_READLINK;
- req->in.h.nodeid = get_node_id(inode);
- req->out.argvar = 1;
- req->out.numargs = 1;
- req->out.args[0].size = PAGE_SIZE - 1;
- req->out.args[0].value = link;
- fuse_request_send(fc, req);
- if (req->out.h.error) {
+ if (!link)
+ return ERR_PTR(-ENOMEM);
+
+ args.in.h.opcode = FUSE_READLINK;
+ args.in.h.nodeid = get_node_id(inode);
+ args.out.argvar = 1;
+ args.out.numargs = 1;
+ args.out.args[0].size = PAGE_SIZE - 1;
+ args.out.args[0].value = link;
+ ret = fuse_simple_request(fc, &args);
+ if (ret < 0) {
free_page((unsigned long) link);
- link = ERR_PTR(req->out.h.error);
- } else
- link[req->out.args[0].size] = '\0';
- out:
- fuse_put_request(fc, req);
+ link = ERR_PTR(ret);
+ } else {
+ link[ret] = '\0';
+ }
fuse_invalidate_atime(inode);
return link;
}
@@ -1629,22 +1563,22 @@ void fuse_release_nowrite(struct inode *inode)
spin_unlock(&fc->lock);
}
-static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req,
+static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
struct inode *inode,
struct fuse_setattr_in *inarg_p,
struct fuse_attr_out *outarg_p)
{
- req->in.h.opcode = FUSE_SETATTR;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(*inarg_p);
- req->in.args[0].value = inarg_p;
- req->out.numargs = 1;
+ args->in.h.opcode = FUSE_SETATTR;
+ args->in.h.nodeid = get_node_id(inode);
+ args->in.numargs = 1;
+ args->in.args[0].size = sizeof(*inarg_p);
+ args->in.args[0].value = inarg_p;
+ args->out.numargs = 1;
if (fc->minor < 9)
- req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+ args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
else
- req->out.args[0].size = sizeof(*outarg_p);
- req->out.args[0].value = outarg_p;
+ args->out.args[0].size = sizeof(*outarg_p);
+ args->out.args[0].value = outarg_p;
}
/*
@@ -1653,14 +1587,9 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req,
int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
{
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_setattr_in inarg;
struct fuse_attr_out outarg;
- int err;
-
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
memset(&inarg, 0, sizeof(inarg));
memset(&outarg, 0, sizeof(outarg));
@@ -1677,12 +1606,9 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
inarg.valid |= FATTR_FH;
inarg.fh = ff->fh;
}
- fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
- return err;
+ return fuse_simple_request(fc, &args);
}
/*
@@ -1698,7 +1624,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_setattr_in inarg;
struct fuse_attr_out outarg;
bool is_truncate = false;
@@ -1723,10 +1649,6 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
if (attr->ia_valid & ATTR_SIZE)
is_truncate = true;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
if (is_truncate) {
fuse_set_nowrite(inode);
set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
@@ -1747,10 +1669,8 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
inarg.valid |= FATTR_LOCKOWNER;
inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
}
- fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
+ err = fuse_simple_request(fc, &args);
if (err) {
if (err == -EINTR)
fuse_invalidate_attr(inode);
@@ -1837,32 +1757,26 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
{
struct inode *inode = entry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_setxattr_in inarg;
int err;
if (fc->no_setxattr)
return -EOPNOTSUPP;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
memset(&inarg, 0, sizeof(inarg));
inarg.size = size;
inarg.flags = flags;
- req->in.h.opcode = FUSE_SETXATTR;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 3;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->in.args[1].size = strlen(name) + 1;
- req->in.args[1].value = name;
- req->in.args[2].size = size;
- req->in.args[2].value = value;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.in.h.opcode = FUSE_SETXATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 3;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = strlen(name) + 1;
+ args.in.args[1].value = name;
+ args.in.args[2].size = size;
+ args.in.args[2].value = value;
+ err = fuse_simple_request(fc, &args);
if (err == -ENOSYS) {
fc->no_setxattr = 1;
err = -EOPNOTSUPP;
@@ -1879,7 +1793,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
{
struct inode *inode = entry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_getxattr_in inarg;
struct fuse_getxattr_out outarg;
ssize_t ret;
@@ -1887,40 +1801,32 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
if (fc->no_getxattr)
return -EOPNOTSUPP;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
memset(&inarg, 0, sizeof(inarg));
inarg.size = size;
- req->in.h.opcode = FUSE_GETXATTR;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 2;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->in.args[1].size = strlen(name) + 1;
- req->in.args[1].value = name;
+ args.in.h.opcode = FUSE_GETXATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 2;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = strlen(name) + 1;
+ args.in.args[1].value = name;
/* Th