Home Home > GIT Browse > vanilla
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKernel Build Daemon <kbuild@suse.de>2019-07-11 12:03:02 +0200
committerKernel Build Daemon <kbuild@suse.de>2019-07-11 12:03:02 +0200
commit86d7c15b8b34c44b3fd7daaa8b9a0738ccb9cbfa (patch)
tree647d2f51d33d7be4a4efe38602b82f545bdc7d3f
parent67fb0d73ef2e373ba7c70622753b3064190a2ac5 (diff)
Automatically updated to 5.2-3311-g5450e8a316a6
-rw-r--r--Documentation/filesystems/Locking14
-rw-r--r--Documentation/filesystems/ext2.txt8
-rw-r--r--Documentation/filesystems/fscrypt.rst43
-rw-r--r--Documentation/security/keys/core.rst128
-rw-r--r--Documentation/security/keys/request-key.rst9
-rw-r--r--arch/alpha/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/include/asm/Kbuild1
-rw-r--r--arch/arm/tools/syscall.tbl1
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/asm/unistd32.h2
-rw-r--r--arch/c6x/Kconfig1
-rw-r--r--arch/c6x/include/asm/flat.h7
-rw-r--r--arch/h8300/Kconfig3
-rw-r--r--arch/h8300/include/asm/flat.h7
-rw-r--r--arch/ia64/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/m68k/Kconfig4
-rw-r--r--arch/m68k/include/asm/flat.h30
-rw-r--r--arch/m68k/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/microblaze/Kconfig1
-rw-r--r--arch/microblaze/include/asm/flat.h7
-rw-r--r--arch/microblaze/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_n64.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl1
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/riscv/Kconfig1
-rw-r--r--arch/riscv/include/asm/Kbuild1
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/sh/Kconfig1
-rw-r--r--arch/sh/include/asm/flat.h7
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--arch/xtensa/Kconfig1
-rw-r--r--arch/xtensa/include/asm/flat.h7
-rw-r--r--arch/xtensa/kernel/syscalls/syscall.tbl1
-rw-r--r--certs/blacklist.c7
-rw-r--r--certs/system_keyring.c12
-rw-r--r--drivers/md/dm-crypt.c2
-rw-r--r--drivers/nvdimm/security.c2
-rw-r--r--fs/Kconfig.binfmt18
-rw-r--r--fs/afs/callback.c20
-rw-r--r--fs/afs/cmservice.c5
-rw-r--r--fs/afs/dir.c21
-rw-r--r--fs/afs/dir_silly.c5
-rw-r--r--fs/afs/file.c6
-rw-r--r--fs/afs/fsclient.c2
-rw-r--r--fs/afs/inode.c17
-rw-r--r--fs/afs/internal.h18
-rw-r--r--fs/afs/misc.c48
-rw-r--r--fs/afs/protocol_uae.h132
-rw-r--r--fs/afs/rxrpc.c2
-rw-r--r--fs/afs/security.c2
-rw-r--r--fs/afs/server.c39
-rw-r--r--fs/afs/server_list.c6
-rw-r--r--fs/afs/write.c3
-rw-r--r--fs/binfmt_flat.c99
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/buffer.c62
-rw-r--r--fs/ceph/file.c23
-rw-r--r--fs/cifs/cifs_spnego.c25
-rw-r--r--fs/cifs/cifsacl.c28
-rw-r--r--fs/cifs/cifsfs.c4
-rw-r--r--fs/cifs/connect.c4
-rw-r--r--fs/configfs/dir.c3
-rw-r--r--fs/crypto/Kconfig1
-rw-r--r--fs/crypto/bio.c73
-rw-r--r--fs/crypto/crypto.c299
-rw-r--r--fs/crypto/fname.c1
-rw-r--r--fs/crypto/fscrypt_private.h15
-rw-r--r--fs/crypto/hooks.c1
-rw-r--r--fs/crypto/keyinfo.c3
-rw-r--r--fs/crypto/policy.c2
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/debugfs/inode.c21
-rw-r--r--fs/devpts/inode.c1
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h2
-rw-r--r--fs/ecryptfs/keystore.c2
-rw-r--r--fs/ext2/balloc.c3
-rw-r--r--fs/ext2/ialloc.c5
-rw-r--r--fs/ext2/inode.c7
-rw-r--r--fs/ext2/super.c17
-rw-r--r--fs/ext2/xattr.c164
-rw-r--r--fs/ext4/balloc.c4
-rw-r--r--fs/ext4/dir.c27
-rw-r--r--fs/ext4/ext4.h65
-rw-r--r--fs/ext4/ext4_jbd2.h12
-rw-r--r--fs/ext4/extents.c4
-rw-r--r--fs/ext4/extents_status.c1
-rw-r--r--fs/ext4/file.c4
-rw-r--r--fs/ext4/indirect.c22
-rw-r--r--fs/ext4/inline.c21
-rw-r--r--fs/ext4/inode.c130
-rw-r--r--fs/ext4/ioctl.c48
-rw-r--r--fs/ext4/mballoc.c5
-rw-r--r--fs/ext4/move_extent.c15
-rw-r--r--fs/ext4/namei.c213
-rw-r--r--fs/ext4/page-io.c44
-rw-r--r--fs/ext4/sysfs.c6
-rw-r--r--fs/f2fs/data.c17
-rw-r--r--fs/fscache/object-list.c2
-rw-r--r--fs/fuse/file.c29
-rw-r--r--fs/gfs2/aops.c110
-rw-r--r--fs/gfs2/aops.h4
-rw-r--r--fs/gfs2/bmap.c16
-rw-r--r--fs/gfs2/dir.c4
-rw-r--r--fs/gfs2/file.c37
-rw-r--r--fs/gfs2/glock.c42
-rw-r--r--fs/gfs2/glock.h11
-rw-r--r--fs/gfs2/glops.c12
-rw-r--r--fs/gfs2/incore.h6
-rw-r--r--fs/gfs2/inode.c2
-rw-r--r--fs/gfs2/log.c3
-rw-r--r--fs/gfs2/lops.c22
-rw-r--r--fs/gfs2/meta_io.c6
-rw-r--r--fs/gfs2/ops_fstype.c27
-rw-r--r--fs/gfs2/quota.c2
-rw-r--r--fs/gfs2/recovery.c3
-rw-r--r--fs/gfs2/rgrp.c48
-rw-r--r--fs/gfs2/rgrp.h3
-rw-r--r--fs/gfs2/super.c43
-rw-r--r--fs/gfs2/super.h2
-rw-r--r--fs/gfs2/sys.c5
-rw-r--r--fs/gfs2/trans.c6
-rw-r--r--fs/gfs2/util.c8
-rw-r--r--fs/inode.c20
-rw-r--r--fs/internal.h2
-rw-r--r--fs/iomap.c17
-rw-r--r--fs/jbd2/commit.c25
-rw-r--r--fs/jbd2/journal.c25
-rw-r--r--fs/jbd2/transaction.c49
-rw-r--r--fs/lockd/clntproc.c21
-rw-r--r--fs/lockd/svc4proc.c14
-rw-r--r--fs/lockd/svclock.c118
-rw-r--r--fs/lockd/svcproc.c14
-rw-r--r--fs/lockd/svcsubs.c2
-rw-r--r--fs/lockd/xdr.c3
-rw-r--r--fs/lockd/xdr4.c3
-rw-r--r--fs/locks.c67
-rw-r--r--fs/namei.c2
-rw-r--r--fs/nfs/nfs4file.c23
-rw-r--r--fs/nfs/nfs4idmap.c30
-rw-r--r--fs/nfs/unlink.c6
-rw-r--r--fs/nfsd/blocklayout.c8
-rw-r--r--fs/nfsd/cache.h5
-rw-r--r--fs/nfsd/netns.h44
-rw-r--r--fs/nfsd/nfs4idmap.c2
-rw-r--r--fs/nfsd/nfs4state.c453
-rw-r--r--fs/nfsd/nfs4xdr.c38
-rw-r--r--fs/nfsd/nfscache.c236
-rw-r--r--fs/nfsd/nfsctl.c233
-rw-r--r--fs/nfsd/nfsd.h11
-rw-r--r--fs/nfsd/state.h11
-rw-r--r--fs/nfsd/vfs.c2
-rw-r--r--fs/nfsd/xdr4.h5
-rw-r--r--fs/notify/fanotify/fanotify_user.c22
-rw-r--r--fs/notify/fsnotify.c41
-rw-r--r--fs/proc/root.c2
-rw-r--r--fs/quota/dquot.c11
-rw-r--r--fs/quota/quota.c38
-rw-r--r--fs/read_write.c124
-rw-r--r--fs/seq_file.c11
-rw-r--r--fs/tracefs/inode.c3
-rw-r--r--fs/ubifs/auth.c2
-rw-r--r--fs/ubifs/crypto.c19
-rw-r--r--fs/udf/inode.c93
-rw-r--r--fs/unicode/utf8-core.c28
-rw-r--r--fs/xfs/xfs_file.c15
-rw-r--r--include/asm-generic/flat.h (renamed from arch/arm/include/asm/flat.h)19
-rw-r--r--include/linux/flat.h58
-rw-r--r--include/linux/fs.h18
-rw-r--r--include/linux/fscrypt.h96
-rw-r--r--include/linux/fsnotify.h26
-rw-r--r--include/linux/fsnotify_backend.h4
-rw-r--r--include/linux/iomap.h1
-rw-r--r--include/linux/jbd2.h23
-rw-r--r--include/linux/key.h121
-rw-r--r--include/linux/lockd/lockd.h2
-rw-r--r--include/linux/pid.h3
-rw-r--r--include/linux/seq_file.h1
-rw-r--r--include/linux/string_helpers.h3
-rw-r--r--include/linux/sunrpc/xdr.h7
-rw-r--r--include/linux/syscalls.h1
-rw-r--r--include/linux/unicode.h3
-rw-r--r--include/trace/events/afs.h132
-rw-r--r--include/trace/events/filelock.h35
-rw-r--r--include/uapi/asm-generic/unistd.h4
-rw-r--r--include/uapi/linux/flat.h59
-rw-r--r--include/uapi/linux/keyctl.h65
-rw-r--r--kernel/fork.c26
-rw-r--r--kernel/pid.c71
-rw-r--r--kernel/signal.c11
-rw-r--r--lib/digsig.c2
-rw-r--r--lib/string_helpers.c19
-rw-r--r--mm/filemap.c132
-rw-r--r--net/ceph/ceph_common.c2
-rw-r--r--net/dns_resolver/dns_key.c12
-rw-r--r--net/dns_resolver/dns_query.c15
-rw-r--r--net/rxrpc/key.c19
-rw-r--r--net/sunrpc/cache.c1
-rw-r--r--net/sunrpc/rpc_pipe.c4
-rw-r--r--net/sunrpc/svc_xprt.c2
-rw-r--r--net/wireless/reg.c6
-rw-r--r--security/integrity/digsig.c31
-rw-r--r--security/integrity/digsig_asymmetric.c2
-rw-r--r--security/integrity/evm/evm_crypto.c2
-rw-r--r--security/integrity/ima/ima_mok.c13
-rw-r--r--security/integrity/integrity.h6
-rw-r--r--security/integrity/platform_certs/platform_keyring.c14
-rw-r--r--security/keys/compat.c2
-rw-r--r--security/keys/encrypted-keys/encrypted.c2
-rw-r--r--security/keys/encrypted-keys/masterkey_trusted.c2
-rw-r--r--security/keys/gc.c2
-rw-r--r--security/keys/internal.h16
-rw-r--r--security/keys/key.c29
-rw-r--r--security/keys/keyctl.c104
-rw-r--r--security/keys/keyring.c27
-rw-r--r--security/keys/permission.c361
-rw-r--r--security/keys/persistent.c27
-rw-r--r--security/keys/proc.c22
-rw-r--r--security/keys/process_keys.c86
-rw-r--r--security/keys/request_key.c34
-rw-r--r--security/keys/request_key_auth.c15
-rw-r--r--security/selinux/hooks.c16
-rw-r--r--security/smack/smack_lsm.c3
-rw-r--r--tools/testing/selftests/pidfd/.gitignore1
-rw-r--r--tools/testing/selftests/pidfd/Makefile4
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h57
-rw-r--r--tools/testing/selftests/pidfd/pidfd_open_test.c169
-rw-r--r--tools/testing/selftests/pidfd/pidfd_test.c248
233 files changed, 4135 insertions, 2693 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index dac435575384..204dd3ea36bb 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -361,8 +361,6 @@ so fl_release_private called on a lease should not block.
----------------------- lock_manager_operations ---------------------------
prototypes:
- int (*lm_compare_owner)(struct file_lock *, struct file_lock *);
- unsigned long (*lm_owner_key)(struct file_lock *);
void (*lm_notify)(struct file_lock *); /* unblock callback */
int (*lm_grant)(struct file_lock *, struct file_lock *, int);
void (*lm_break)(struct file_lock *); /* break_lease callback */
@@ -371,23 +369,11 @@ prototypes:
locking rules:
inode->i_lock blocked_lock_lock may block
-lm_compare_owner: yes[1] maybe no
-lm_owner_key yes[1] yes no
lm_notify: yes yes no
lm_grant: no no no
lm_break: yes no no
lm_change yes no no
-[1]: ->lm_compare_owner and ->lm_owner_key are generally called with
-*an* inode->i_lock held. It may not be the i_lock of the inode
-associated with either file_lock argument! This is the case with deadlock
-detection, since the code has to chase down the owners of locks that may
-be entirely unrelated to the one on which the lock is being acquired.
-For deadlock detection however, the blocked_lock_lock is also held. The
-fact that these locks are held ensures that the file_locks do not
-disappear out from under you while doing the comparison or generating an
-owner key.
-
--------------------------- buffer_head -----------------------------------
prototypes:
void (*b_end_io)(struct buffer_head *bh, int uptodate);
diff --git a/Documentation/filesystems/ext2.txt b/Documentation/filesystems/ext2.txt
index a19973a4dd1e..94c2cf0292f5 100644
--- a/Documentation/filesystems/ext2.txt
+++ b/Documentation/filesystems/ext2.txt
@@ -57,7 +57,13 @@ noacl Don't support POSIX ACLs.
nobh Do not attach buffer_heads to file pagecache.
-grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2.
+quota, usrquota Enable user disk quota support
+ (requires CONFIG_QUOTA).
+
+grpquota Enable group disk quota support
+ (requires CONFIG_QUOTA).
+
+noquota option ls silently ignored by ext2.
Specification
diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index 08c23b60e016..82efa41b0e6c 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -191,7 +191,9 @@ Currently, the following pairs of encryption modes are supported:
If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair.
AES-128-CBC was added only for low-powered embedded devices with
-crypto accelerators such as CAAM or CESA that do not support XTS.
+crypto accelerators such as CAAM or CESA that do not support XTS. To
+use AES-128-CBC, CONFIG_CRYPTO_SHA256 (or another SHA-256
+implementation) must be enabled so that ESSIV can be used.
Adiantum is a (primarily) stream cipher-based mode that is fast even
on CPUs without dedicated crypto instructions. It's also a true
@@ -647,3 +649,42 @@ Note that the precise way that filenames are presented to userspace
without the key is subject to change in the future. It is only meant
as a way to temporarily present valid filenames so that commands like
``rm -r`` work as expected on encrypted directories.
+
+Tests
+=====
+
+To test fscrypt, use xfstests, which is Linux's de facto standard
+filesystem test suite. First, run all the tests in the "encrypt"
+group on the relevant filesystem(s). For example, to test ext4 and
+f2fs encryption using `kvm-xfstests
+<https://github.com/tytso/xfstests-bld/blob/master/Documentation/kvm-quickstart.md>`_::
+
+ kvm-xfstests -c ext4,f2fs -g encrypt
+
+UBIFS encryption can also be tested this way, but it should be done in
+a separate command, and it takes some time for kvm-xfstests to set up
+emulated UBI volumes::
+
+ kvm-xfstests -c ubifs -g encrypt
+
+No tests should fail. However, tests that use non-default encryption
+modes (e.g. generic/549 and generic/550) will be skipped if the needed
+algorithms were not built into the kernel's crypto API. Also, tests
+that access the raw block device (e.g. generic/399, generic/548,
+generic/549, generic/550) will be skipped on UBIFS.
+
+Besides running the "encrypt" group tests, for ext4 and f2fs it's also
+possible to run most xfstests with the "test_dummy_encryption" mount
+option. This option causes all new files to be automatically
+encrypted with a dummy key, without having to make any API calls.
+This tests the encrypted I/O paths more thoroughly. To do this with
+kvm-xfstests, use the "encrypt" filesystem configuration::
+
+ kvm-xfstests -c ext4/encrypt,f2fs/encrypt -g auto
+
+Because this runs many more tests than "-g encrypt" does, it takes
+much longer to run; so also consider using `gce-xfstests
+<https://github.com/tytso/xfstests-bld/blob/master/Documentation/gce-xfstests.md>`_
+instead of kvm-xfstests::
+
+ gce-xfstests -c ext4/encrypt,f2fs/encrypt -g auto
diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst
index bc561ca95c86..d6d8b0b756b6 100644
--- a/Documentation/security/keys/core.rst
+++ b/Documentation/security/keys/core.rst
@@ -57,9 +57,9 @@ Each key has a number of attributes:
type provides an operation to perform a match between the description on a
key and a criterion string.
- * Each key has an owner user ID, a group ID and an ACL. These are used to
- control what a process may do to a key from userspace, and whether a
- kernel service will be able to find the key.
+ * Each key has an owner user ID, a group ID and a permissions mask. These
+ are used to control what a process may do to a key from userspace, and
+ whether a kernel service will be able to find the key.
* Each key can be set to expire at a specific time by the key type's
instantiation function. Keys can also be immortal.
@@ -198,110 +198,43 @@ The key service provides a number of features besides keys:
Key Access Permissions
======================
-Keys have an owner user ID, a group ID and an ACL. The ACL is made up of a
-sequence of ACEs that each contain three elements:
+Keys have an owner user ID, a group access ID, and a permissions mask. The mask
+has up to eight bits each for possessor, user, group and other access. Only
+six of each set of eight bits are defined. These permissions granted are:
- * The type of subject.
- * The subject.
+ * View
- These two together indicate the subject to whom the permits are granted.
- The type can be one of:
+ This permits a key or keyring's attributes to be viewed - including key
+ type and description.
- * ``KEY_ACE_SUBJ_STANDARD``
+ * Read
- The subject is a standard 'macro' type. The subject can be one of:
-
- * ``KEY_ACE_EVERYONE``
-
- The permits are granted to everyone. It replaces the old 'other'
- type on the assumption that you wouldn't grant a permission to other
- that you you wouldn't grant to everyone else.
-
- * ``KEY_ACE_OWNER``
-
- The permits are granted to the owner of the key (key->uid).
-
- * ``KEY_ACE_GROUP``
-
- The permits are granted to the key's group (key->gid).
-
- * ``KEY_ACE_POSSESSOR``
-
- The permits are granted to anyone who possesses the key.
-
- * The set of permits granted to the subject. These include:
-
- * ``KEY_ACE_VIEW``
-
- This permits a key or keyring's attributes to be viewed - including the
- key type and description.
-
- * ``KEY_ACE_READ``
-
- This permits a key's payload to be viewed or a keyring's list of linked
- keys.
-
- * ``KEY_ACE_WRITE``
-
- This permits a key's payload to be instantiated or updated, or it allows
- a link to be added to or removed from a keyring.
-
- * ``KEY_ACE_SEARCH``
-
- This permits keyrings to be searched and keys to be found. Searches can
- only recurse into nested keyrings that have search permission set.
-
- * ``KEY_ACE_LINK``
-
- This permits a key or keyring to be linked to. To create a link from a
- keyring to a key, a process must have Write permission on the keyring
- and Link permission on the key.
-
- * ``KEY_ACE_SET_SECURITY``
-
- This permits a key's UID, GID and permissions mask to be changed.
+ This permits a key's payload to be viewed or a keyring's list of linked
+ keys.
- * ``KEY_ACE_INVAL``
+ * Write
- This permits a key to be invalidated with KEYCTL_INVALIDATE.
+ This permits a key's payload to be instantiated or updated, or it allows a
+ link to be added to or removed from a keyring.
- * ``KEY_ACE_REVOKE``
+ * Search
- This permits a key to be revoked with KEYCTL_REVOKE.
+ This permits keyrings to be searched and keys to be found. Searches can
+ only recurse into nested keyrings that have search permission set.
- * ``KEY_ACE_JOIN``
+ * Link
- This permits a keyring to be joined as a session by
- KEYCTL_JOIN_SESSION_KEYRING or KEYCTL_SESSION_TO_PARENT.
+ This permits a key or keyring to be linked to. To create a link from a
+ keyring to a key, a process must have Write permission on the keyring and
+ Link permission on the key.
- * ``KEY_ACE_CLEAR``
+ * Set Attribute
- This permits a keyring to be cleared.
+ This permits a key's UID, GID and permissions mask to be changed.
For changing the ownership, group ID or permissions mask, being the owner of
the key or having the sysadmin capability is sufficient.
-The legacy KEYCTL_SETPERM and KEYCTL_DESCRIBE functions can only see/generate
-View, Read, Write, Search, Link and SetAttr permits, and do this for each of
-possessor, user, group and other permission sets as a 32-bit flag mask. These
-will be approximated/inferred:
-
- SETPERM Permit Implied ACE Permit
- =============== =======================
- Search Inval, Join
- Write Revoke, Clear
- Setattr Set Security, Revoke
-
- ACE Permit Described as
- =============== =======================
- Inval Search
- Join Search
- Revoke Write (unless Setattr)
- Clear write
- Set Security Setattr
-
-'Other' will be approximated as/inferred from the 'Everyone' subject.
-
SELinux Support
===============
@@ -1151,8 +1084,7 @@ payload contents" for more information.
struct key *request_key(const struct key_type *type,
const char *description,
- const char *callout_info,
- struct key_acl *acl);
+ const char *callout_info);
This is used to request a key or keyring with a description that matches
the description specified according to the key type's match_preparse()
@@ -1167,8 +1099,6 @@ payload contents" for more information.
If successful, the key will have been attached to the default keyring for
implicitly obtained request-key keys, as set by KEYCTL_SET_REQKEY_KEYRING.
- If a key is created, it will be given the specified ACL.
-
See also Documentation/security/keys/request-key.rst.
@@ -1177,8 +1107,7 @@ payload contents" for more information.
struct key *request_key_tag(const struct key_type *type,
const char *description,
struct key_tag *domain_tag,
- const char *callout_info,
- struct key_acl *acl);
+ const char *callout_info);
This is identical to request_key(), except that a domain tag may be
specifies that causes search algorithm to only match keys matching that
@@ -1193,8 +1122,7 @@ payload contents" for more information.
struct key_tag *domain_tag,
const void *callout_info,
size_t callout_len,
- void *aux,
- struct key_acl *acl);
+ void *aux);
This is identical to request_key_tag(), except that the auxiliary data is
passed to the key_type->request_key() op if it exists, and the
@@ -1267,7 +1195,7 @@ payload contents" for more information.
struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
const struct cred *cred,
- struct key_acl *acl,
+ key_perm_t perm,
struct key_restriction *restrict_link,
unsigned long flags,
struct key *dest);
diff --git a/Documentation/security/keys/request-key.rst b/Documentation/security/keys/request-key.rst
index f356fd06c8d5..35f2296b704a 100644
--- a/Documentation/security/keys/request-key.rst
+++ b/Documentation/security/keys/request-key.rst
@@ -11,16 +11,14 @@ The process starts by either the kernel requesting a service by calling
struct key *request_key(const struct key_type *type,
const char *description,
- const char *callout_info,
- struct key_acl *acl);
+ const char *callout_info);
or::
struct key *request_key_tag(const struct key_type *type,
const char *description,
const struct key_tag *domain_tag,
- const char *callout_info,
- struct key_acl *acl);
+ const char *callout_info);
or::
@@ -29,8 +27,7 @@ or::
const struct key_tag *domain_tag,
const char *callout_info,
size_t callout_len,
- void *aux,
- struct key_acl *acl);
+ void *aux);
or::
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 9e7704e44f6d..1db9bbcfb84e 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -473,3 +473,4 @@
541 common fsconfig sys_fsconfig
542 common fsmount sys_fsmount
543 common fspick sys_fspick
+544 common pidfd_open sys_pidfd_open
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index ad00e17d6988..d850feb5cc0a 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -4,6 +4,7 @@ config ARM
default y
select ARCH_32BIT_OFF_T
select ARCH_CLOCKSOURCE_DATA
+ select ARCH_HAS_BINFMT_FLAT
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
@@ -30,6 +31,7 @@ config ARM
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_WANT_IPC_PARSE_VERSION
+ select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
select BUILDTIME_EXTABLE_SORT if MMU
select CLONE_BACKWARDS
select CPU_PM if SUSPEND || CPU_IDLE
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index a8f149ab45b8..6b2dc15b6dff 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -5,6 +5,7 @@ generic-y += early_ioremap.h
generic-y += emergency-restart.h
generic-y += exec.h
generic-y += extable.h
+generic-y += flat.h
generic-y += irq_regs.h
generic-y += kdebug.h
generic-y += local.h
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index aaf479a9e92d..81e6e1817c45 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -447,3 +447,4 @@
431 common fsconfig sys_fsconfig
432 common fsmount sys_fsmount
433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 2a23614198f1..ede7b88d4f15 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -38,7 +38,7 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-#define __NR_compat_syscalls 434
+#define __NR_compat_syscalls 435
#endif
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index aa995920bd34..52415923e08f 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -875,6 +875,8 @@ __SYSCALL(__NR_fsconfig, sys_fsconfig)
__SYSCALL(__NR_fsmount, sys_fsmount)
#define __NR_fspick 433
__SYSCALL(__NR_fspick, sys_fspick)
+#define __NR_pidfd_open 434
+__SYSCALL(__NR_pidfd_open, sys_pidfd_open)
/*
* Please add new compat syscalls above this comment and update
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index c5e6b70e1510..b4fb61c83494 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -7,6 +7,7 @@
config C6X
def_bool y
select ARCH_32BIT_OFF_T
+ select ARCH_HAS_BINFMT_FLAT
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select CLKDEV_LOOKUP
diff --git a/arch/c6x/include/asm/flat.h b/arch/c6x/include/asm/flat.h
index 76fd0bb962a3..9e6544b51386 100644
--- a/arch/c6x/include/asm/flat.h
+++ b/arch/c6x/include/asm/flat.h
@@ -4,11 +4,8 @@
#include <asm/unaligned.h>
-#define flat_argvp_envp_on_stack() 0
-#define flat_old_ram_flag(flags) (flags)
-#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
- u32 *addr, u32 *persistent)
+ u32 *addr)
{
*addr = get_unaligned((__force u32 *)rp);
return 0;
@@ -18,7 +15,5 @@ static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
put_unaligned(addr, (__force u32 *)rp);
return 0;
}
-#define flat_get_relocate_addr(rel) (rel)
-#define flat_set_persistent(relval, p) 0
#endif /* __ASM_C6X_FLAT_H */
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index ecfc4b4b6373..ec800e9d5aad 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -2,6 +2,9 @@
config H8300
def_bool y
select ARCH_32BIT_OFF_T
+ select ARCH_HAS_BINFMT_FLAT
+ select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
+ select BINFMT_FLAT_OLD_ALWAYS_RAM
select GENERIC_ATOMIC64
select HAVE_UID16
select VIRT_TO_BUS
diff --git a/arch/h8300/include/asm/flat.h b/arch/h8300/include/asm/flat.h
index f4cdfcbdd2ba..78070f924177 100644
--- a/arch/h8300/include/asm/flat.h
+++ b/arch/h8300/include/asm/flat.h
@@ -8,11 +8,6 @@
#include <asm/unaligned.h>
-#define flat_argvp_envp_on_stack() 1
-#define flat_old_ram_flag(flags) 1
-#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
-#define flat_set_persistent(relval, p) 0
-
/*
* on the H8 a couple of the relocations have an instruction in the
* top byte. As there can only be 24bits of address space, we just
@@ -22,7 +17,7 @@
#define flat_get_relocate_addr(rel) (rel & ~0x00000001)
static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
- u32 *addr, u32 *persistent)
+ u32 *addr)
{
u32 val = get_unaligned((__force u32 *)rp);
if (!(flags & FLAT_FLAG_GOTPIC))
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index e01df3f2f80d..ecc44926737b 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -354,3 +354,4 @@
431 common fsconfig sys_fsconfig
432 common fsmount sys_fsmount
433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 00f5c98a5e05..c518d695c376 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -3,12 +3,14 @@ config M68K
bool
default y
select ARCH_32BIT_OFF_T
+ select ARCH_HAS_BINFMT_FLAT
select ARCH_HAS_DMA_MMAP_PGPROT if MMU && !COLDFIRE
- select ARCH_HAS_DMA_PREP_COHERENT
+ select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE
select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA
select ARCH_MIGHT_HAVE_PC_PARPORT if ISA
select ARCH_NO_COHERENT_DMA_MMAP if !MMU
select ARCH_NO_PREEMPT if !COLDFIRE
+ select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
select DMA_DIRECT_REMAP if HAS_DMA && MMU && !COLDFIRE
select HAVE_IDE
select HAVE_AOUT if MMU
diff --git a/arch/m68k/include/asm/flat.h b/arch/m68k/include/asm/flat.h
index 4f1d1e373420..46379e08cdd6 100644
--- a/arch/m68k/include/asm/flat.h
+++ b/arch/m68k/include/asm/flat.h
@@ -6,35 +6,7 @@
#ifndef __M68KNOMMU_FLAT_H__
#define __M68KNOMMU_FLAT_H__
-#include <linux/uaccess.h>
-
-#define flat_argvp_envp_on_stack() 1
-#define flat_old_ram_flag(flags) (flags)
-#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
-static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
- u32 *addr, u32 *persistent)
-{
-#ifdef CONFIG_CPU_HAS_NO_UNALIGNED
- return copy_from_user(addr, rp, 4) ? -EFAULT : 0;
-#else
- return get_user(*addr, rp);
-#endif
-}
-
-static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
-{
-#ifdef CONFIG_CPU_HAS_NO_UNALIGNED
- return copy_to_user(rp, &addr, 4) ? -EFAULT : 0;
-#else
- return put_user(addr, rp);
-#endif
-}
-#define flat_get_relocate_addr(rel) (rel)
-
-static inline int flat_set_persistent(u32 relval, u32 *persistent)
-{
- return 0;
-}
+#include <asm-generic/flat.h>
#define FLAT_PLAT_INIT(regs) \
do { \
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 7e3d0734b2f3..9a3eb2558568 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -433,3 +433,4 @@
431 common fsconfig sys_fsconfig
432 common fsmount sys_fsmount
433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index f11433daab4a..d411de05b628 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -3,6 +3,7 @@ config MICROBLAZE
def_bool y
select ARCH_32BIT_OFF_T
select ARCH_NO_SWAP
+ select ARCH_HAS_BINFMT_FLAT if !MMU
select ARCH_HAS_DMA_COHERENT_TO_PFN if MMU
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_SYNC_DMA_FOR_CPU
diff --git a/arch/microblaze/include/asm/flat.h b/arch/microblaze/include/asm/flat.h
index 3d2747d4c967..1ab86770eaee 100644
--- a/arch/microblaze/include/asm/flat.h
+++ b/arch/microblaze/include/asm/flat.h
@@ -13,11 +13,6 @@
#include <asm/unaligned.h>
-#define flat_argvp_envp_on_stack() 0
-#define flat_old_ram_flag(flags) (flags)
-#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
-#define flat_set_persistent(relval, p) 0
-
/*
* Microblaze works a little differently from other arches, because
* of the MICROBLAZE_64 reloc type. Here, a 32 bit address is split
@@ -33,7 +28,7 @@
*/
static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
- u32 *addr, u32 *persistent)
+ u32 *addr)
{
u32 *p = (__force u32 *)rp;
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 26339e417695..ad706f83c755 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -439,3 +439,4 @@
431 common fsconfig sys_fsconfig
432 common fsmount sys_fsmount
433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 0e2dd68ade57..97035e19ad03 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -372,3 +372,4 @@
431 n32 fsconfig sys_fsconfig
432 n32 fsmount sys_fsmount
433 n32 fspick sys_fspick
+434 n32 pidfd_open sys_pidfd_open
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 5eebfa0d155c..d7292722d3b0 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -348,3 +348,4 @@
431 n64 fsconfig sys_fsconfig
432 n64 fsmount sys_fsmount
433 n64 fspick sys_fspick
+434 n64 pidfd_open sys_pidfd_open
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 3cc1374e02d0..dba084c92f14 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -421,3 +421,4 @@
431 o32 fsconfig sys_fsconfig
432 o32 fsmount sys_fsmount
433 o32 fspick sys_fspick
+434 o32 pidfd_open sys_pidfd_open
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index c9e377d59232..5022b9e179c2 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -430,3 +430,4 @@
431 common fsconfig sys_fsconfig
432 common fsmount sys_fsmount
433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 103655d84b4b..f2c3bda2d39f 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -515,3 +515,4 @@
431 common fsconfig sys_fsconfig
432 common fsmount sys_fsmount
433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 376bc759b9ab..13a1c0d04e9e 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -17,6 +17,7 @@ config RISCV
select OF
select OF_EARLY_FLATTREE
select OF_IRQ
+ select ARCH_HAS_BINFMT_FLAT
select ARCH_WANT_FRAME_POINTERS
select CLONE_BACKWARDS
select COMMON_CLK
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 5ee646619cc3..1efaeddf1e4b 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -5,6 +5,7 @@ generic-y += compat.h
generic-y += device.h
generic-y += div64.h
generic-y += extable.h
+generic-y += flat.h
generic-y += dma.h
generic-y += dma-contiguous.h
generic-y += dma-mapping.h
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index e822b2964a83..6ebacfeaf853 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -436,3 +436,4 @@
431 common fsconfig sys_fsconfig sys_fsconfig
432 common fsmount sys_fsmount sys_fsmount
433 common fspick sys_fspick sys_fspick
+434 common pidfd_open sys_pidfd_open sys_pidfd_open
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index ce1a28654507..c7c99e18d5ff 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
config SUPERH
def_bool y
+ select ARCH_HAS_BINFMT_FLAT if !MMU
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_MIGHT_HAVE_PC_PARPORT
diff --git a/arch/sh/include/asm/flat.h b/arch/sh/include/asm/flat.h
index 843d458b8329..fee4f25555cb 100644
--- a/arch/sh/include/asm/flat.h
+++ b/arch/sh/include/asm/flat.h
@@ -11,11 +11,8 @@
#include <asm/unaligned.h>
-#define flat_argvp_envp_on_stack() 0
-#define flat_old_ram_flag(flags) (flags)
-#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
- u32 *addr, u32 *persistent)
+ u32 *addr)
{
*addr = get_unaligned((__force u32 *)rp);
return 0;
@@ -25,8 +22,6 @@ static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
put_unaligned(addr, (__force u32 *)rp);
return 0;
}
-#define flat_get_relocate_addr(rel) (rel)
-#define flat_set_persistent(relval, p) ({ (void)p; 0; })
#define FLAT_PLAT_INIT(_r) \
do { _r->regs[0]=0; _r->regs[1]=0; _r->regs[2]=0; _r->regs[3]=0; \
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index 016a727d4357..834c9c7d79fa 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -436,3 +436,4 @@
431 common fsconfig sys_fsconfig
432 common fsmount sys_fsmount
433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index e047480b1605..c58e71f21129 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -479,3 +479,4 @@
431 common fsconfig sys_fsconfig
432 common fsmount sys_fsmount
433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index ad968b7bac72..43e4429a5272 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -438,3 +438,4 @@
431 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig
432 i386 fsmount sys_fsmount __ia32_sys_fsmount
433 i386 fspick sys_fspick __ia32_sys_fspick
+434 i386 pidfd_open sys_pidfd_open __ia32_sys_pidfd_open
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index b4e6f9e6204a..1bee0a77fdd3 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -355,6 +355,7 @@
431 common fsconfig __x64_sys_fsconfig
432 common fsmount __x64_sys_fsmount
433 common fspick __x64_sys_fspick
+434 common pidfd_open __x64_sys_pidfd_open
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 6ec1b75eabc5..ebc135bda921 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -2,6 +2,7 @@
config XTENSA
def_bool y
select ARCH_32BIT_OFF_T
+ select ARCH_HAS_BINFMT_FLAT if !MMU
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_NO_COHERENT_DMA_MMAP if !MMU
diff --git a/arch/xtensa/include/asm/flat.h b/arch/xtensa/include/asm/flat.h
index b8532d7877b3..ed5870c779f9 100644
--- a/arch/xtensa/include/asm/flat.h
+++ b/arch/xtensa/include/asm/flat.h
@@ -4,11 +4,8 @@
#include <asm/unaligned.h>
-#define flat_argvp_envp_on_stack() 0
-#define flat_old_ram_flag(flags) (flags)
-#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
- u32 *addr, u32 *persistent)
+ u32 *addr)
{
*addr = get_unaligned((__force u32 *)rp);
return 0;
@@ -18,7 +15,5 @@ static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
put_unaligned(addr, (__force u32 *)rp);
return 0;
}
-#define flat_get_relocate_addr(rel) (rel)
-#define flat_set_persistent(relval, p) 0
#endif /* __ASM_XTENSA_FLAT_H */
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 5fa0ee1c8e00..782b81945ccc 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -404,3 +404,4 @@
431 common fsconfig sys_fsconfig
432 common fsmount sys_fsmount
433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
diff --git a/certs/blacklist.c b/certs/blacklist.c
index 93d70b885f8e..ec00bf337eb6 100644
--- a/certs/blacklist.c
+++ b/certs/blacklist.c
@@ -89,7 +89,8 @@ int mark_hash_blacklisted(const char *hash)
hash,
NULL,
0,
- &internal_key_acl,
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW),
KEY_ALLOC_NOT_IN_QUOTA |
KEY_ALLOC_BUILT_IN);
if (IS_ERR(key)) {
@@ -148,7 +149,9 @@ static int __init blacklist_init(void)
keyring_alloc(".blacklist",
KUIDT_INIT(0), KGIDT_INIT(0),
current_cred(),
- &internal_keyring_acl,
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ |
+ KEY_USR_SEARCH,
KEY_ALLOC_NOT_IN_QUOTA |
KEY_FLAG_KEEP,
NULL, NULL);
diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index 57be78b5fdfc..1eba08a1af82 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c
@@ -99,7 +99,9 @@ static __init int system_trusted_keyring_init(void)
builtin_trusted_keys =
keyring_alloc(".builtin_trusted_keys",
KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
- &internal_key_acl, KEY_ALLOC_NOT_IN_QUOTA,
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH),
+ KEY_ALLOC_NOT_IN_QUOTA,
NULL, NULL);
if (IS_ERR(builtin_trusted_keys))
panic("Can't allocate builtin trusted keyring\n");
@@ -108,7 +110,10 @@ static __init int system_trusted_keyring_init(void)
secondary_trusted_keys =
keyring_alloc(".secondary_trusted_keys",
KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
- &internal_writable_keyring_acl, KEY_ALLOC_NOT_IN_QUOTA,
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH |
+ KEY_USR_WRITE),
+ KEY_ALLOC_NOT_IN_QUOTA,
get_builtin_and_secondary_restriction(),
NULL);
if (IS_ERR(secondary_trusted_keys))
@@ -158,7 +163,8 @@ static __init int load_system_certificate_list(void)
NULL,
p,
plen,
- &internal_key_acl,
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ),
KEY_ALLOC_NOT_IN_QUOTA |
KEY_ALLOC_BUILT_IN |
KEY_ALLOC_BYPASS_RESTRICTION);
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 0fd3ca9bfe54..1b16d34bb785 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -2035,7 +2035,7 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string
return -ENOMEM;
key = request_key(key_string[0] == 'l' ? &key_type_logon : &key_type_user,
- key_desc + 1, NULL, NULL);
+ key_desc + 1, NULL);
if (IS_ERR(key)) {
kzfree(new_key_string);
return PTR_ERR(key);
diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c
index 99a5708b37e3..a570f2263a42 100644
--- a/drivers/nvdimm/security.c
+++ b/drivers/nvdimm/security.c
@@ -55,7 +55,7 @@ static struct key *nvdimm_request_key(struct nvdimm *nvdimm)
struct device *dev = &nvdimm->dev;
sprintf(desc, "%s%s", NVDIMM_PREFIX, nvdimm->dimm_id);
- key = request_key(&key_type_encrypted, desc, "", NULL);
+ key = request_key(&key_type_encrypted, desc, "");
if (IS_ERR(key)) {
if (PTR_ERR(key) == -ENOKEY)
dev_dbg(dev, "request_key() found no key\n");
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index f87ddd1b6d72..62dc4f577ba1 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -91,12 +91,28 @@ config BINFMT_SCRIPT
Most systems will not boot if you say M or N here. If unsure, say Y.
+config ARCH_HAS_BINFMT_FLAT
+ bool
+
config BINFMT_FLAT
bool "Kernel support for flat binaries"
- depends on !MMU || ARM || M68K
+ depends on ARCH_HAS_BINFMT_FLAT
help
Support uClinux FLAT format binaries.
+config BINFMT_FLAT_ARGVP_ENVP_ON_STACK
+ bool
+
+config BINFMT_FLAT_OLD_ALWAYS_RAM
+ bool
+
+config BINFMT_FLAT_OLD
+ bool "Enable support for very old legacy flat binaries"
+ depends on BINFMT_FLAT
+ help
+ Support decade old uClinux FLAT format binaries. Unless you know
+ you have some of those say N here.
+
config BINFMT_ZFLAT
bool "Enable ZFLAT support"
depends on BINFMT_FLAT
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 915010464572..6cdd7047c809 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -48,7 +48,7 @@ static struct afs_cb_interest *afs_create_interest(struct afs_server *server,
refcount_set(&new->usage, 1);
new->sb = vnode->vfs_inode.i_sb;
new->vid = vnode->volume->vid;
- new->server = afs_get_server(server);
+ new->server = afs_get_server(server, afs_server_trace_get_new_cbi);
INIT_HLIST_NODE(&new->cb_vlink);
write_lock(&server->cb_break_lock);
@@ -195,7 +195,7 @@ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi)
write_unlock(&cbi->server->cb_break_lock);
if (vi)
kfree_rcu(vi, rcu);
- afs_put_server(net, cbi->server);
+ afs_put_server(net, cbi->server, afs_server_trace_put_cbi);
}
kfree_rcu(cbi, rcu);
}
@@ -212,7 +212,7 @@ void afs_init_callback_state(struct afs_server *server)
/*
* actually break a callback
*/
-void __afs_break_callback(struct afs_vnode *vnode)
+void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason)
{
_enter("");
@@ -223,13 +223,17 @@ void __afs_break_callback(struct afs_vnode *vnode)
if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB)
afs_lock_may_be_available(vnode);
+
+ trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, true);
+ } else {
+ trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, false);
}
}
-void afs_break_callback(struct afs_vnode *vnode)
+void afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason)
{
write_seqlock(&vnode->cb_lock);
- __afs_break_callback(vnode);
+ __afs_break_callback(vnode, reason);
write_sequnlock(&vnode->cb_lock);
}
@@ -277,6 +281,8 @@ static void afs_break_one_callback(struct afs_server *server,
write_lock(&volume->cb_v_break_lock);
volume->cb_v_break++;
+ trace_afs_cb_break(fid, volume->cb_v_break,
+ afs_cb_break_for_volume_callback, false);
write_unlock(&volume->cb_v_break_lock);
} else {
data.volume = NULL;
@@ -285,8 +291,10 @@ static void afs_break_one_callback(struct afs_server *server,
afs_iget5_test, &data);
if (inode) {
vnode = AFS_FS_I(inode);
- afs_break_callback(vnode);
+ afs_break_callback(vnode, afs_cb_break_for_callback);
iput(inode);
+ } else {
+ trace_afs_cb_miss(fid, afs_cb_break_for_callback);
}
}
}
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 3451be03667f..602d75bf9bb2 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -256,8 +256,11 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
* server holds up change visibility till it receives our reply so as
* to maintain cache coherency.
*/
- if (call->server)
+ if (call->server) {
+ trace_afs_server(call->server, atomic_read(&call->server->usage),
+ afs_server_trace_callback);
afs_break_callbacks(call->server, call->count, call->request);
+ }
afs_send_empty_reply(call);
afs_put_call(call);
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index da9563d62b32..e640d67274be 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -238,8 +238,7 @@ retry:
if (nr_inline > (PAGE_SIZE - sizeof(*req)) / sizeof(struct page *))
nr_inline = 0;
- req = kzalloc(sizeof(*req) + sizeof(struct page *) * nr_inline,
- GFP_KERNEL);
+ req = kzalloc(struct_size(req, array, nr_inline), GFP_KERNEL);
if (!req)
return ERR_PTR(-ENOMEM);
@@ -1363,12 +1362,12 @@ static int afs_dir_remove_link(struct afs_vnode *dvnode, struct dentry *dentry,
drop_nlink(&vnode->vfs_inode);
if (vnode->vfs_inode.i_nlink == 0) {
set_bit(AFS_VNODE_DELETED, &vnode->flags);
- __afs_break_callback(vnode);
+ __afs_break_callback(vnode, afs_cb_break_for_unlink);
}
write_sequnlock(&vnode->cb_lock);
ret = 0;
} else {
- afs_break_callback(vnode);
+ afs_break_callback(vnode, afs_cb_break_for_unlink);
if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
kdebug("AFS_VNODE_DELETED");
@@ -1390,7 +1389,8 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
{
struct afs_fs_cursor fc;
struct afs_status_cb *scb;
- struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
+ struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
struct key *key;
bool need_rehash = false;
int ret;
@@ -1413,15 +1413,12 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
}
/* Try to make sure we have a callback promise on the victim. */
- if (d_really_is_positive(dentry)) {
- vnode = AFS_FS_I(d_inode(dentry));
- ret = afs_validate(vnode, key);
- if (ret < 0)
- goto error_key;
- }
+ ret = afs_validate(vnode, key);
+ if (ret < 0)
+ goto error_key;
spin_lock(&dentry->d_lock);
- if (vnode && d_count(dentry) > 1) {
+ if (d_count(dentry) > 1) {
spin_unlock(&dentry->d_lock);
/* Start asynchronous writeout of the inode */
write_inode_now(d_inode(dentry), 0);
diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
index 057b8d322422..361088a5edb9 100644
--- a/fs/afs/dir_silly.c
+++ b/fs/afs/dir_silly.c
@@ -60,11 +60,6 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
afs_edit_dir_add(dvnode, &new->d_name,
&vnode->fid, afs_edit_dir_for_silly_1);
-
- /* vfs_unlink and the like do not issue this when a file is
- * sillyrenamed, so do it here.
- */
- fsnotify_nameremove(old, 0);
}
kfree(scb);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 8fd7d3b9a1b1..56b69576274d 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -310,8 +310,7 @@ int afs_page_filler(void *data, struct page *page)
/* fall through */
default:
go_on:
- req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *),
- GFP_KERNEL);
+ req = kzalloc(struct_size(req, array, 1), GFP_KERNEL);
if (!req)
goto enomem;
@@ -461,8 +460,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping,
n++;
}
- req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *) * n,
- GFP_NOFS);
+ req = kzalloc(struct_size(req, array, n), GFP_NOFS);
if (!req)
return -ENOMEM;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index a1ef0266422a..1ce73e014139 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -1911,7 +1911,7 @@ struct afs_call *afs_fs_get_capabilities(struct afs_net *net,
return ERR_PTR(-ENOMEM);
call->key = key;
- call->server = afs_get_server(server);
+ call->server = afs_get_server(server, afs_server_trace_get_caps);
call->server_index = server_index;
call->upgrade = true;
call->async = true;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 18a50d4febcf..7b1c18c32f48 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -283,7 +283,7 @@ void afs_vnode_commit_status(struct afs_fs_cursor *fc,
if (scb->status.abort_code == VNOVNODE) {
set_bit(AFS_VNODE_DELETED, &vnode->flags);
clear_nlink(&vnode->vfs_inode);
- __afs_break_callback(vnode);
+ __afs_break_callback(vnode, afs_cb_break_for_deleted);
}
} else {
if (scb->have_status)
@@ -594,8 +594,9 @@ bool afs_check_validity(struct afs_vnode *vnode)
struct afs_cb_interest *cbi;
struct afs_server *server;
struct afs_volume *volume = vnode->volume;
+ enum afs_cb_break_reason need_clear = afs_cb_break_no_break;
time64_t now = ktime_get_real_seconds();
- bool valid, need_clear = false;
+ bool valid;
unsigned int cb_break, cb_s_break, cb_v_break;
int seq = 0;
@@ -613,13 +614,13 @@ bool afs_check_validity(struct afs_vnode *vnode)
vnode->cb_v_break != cb_v_break) {
vnode->cb_s_break = cb_s_break;
vnode->cb_v_break = cb_v_break;
- need_clear = true;
+ need_clear = afs_cb_break_for_vsbreak;
valid = false;
} else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
- need_clear = true;
+ need_clear = afs_cb_break_for_zap;
valid = false;
} else if (vnode->cb_expires_at - 10 <= now) {
- need_clear = true;
+ need_clear = afs_cb_break_for_lapsed;
valid = false;
} else {
valid = true;
@@ -635,10 +636,12 @@ bool afs_check_validity(struct afs_vnode *vnode)
done_seqretry(&vnode->cb_lock, seq);
- if (need_clear) {
+ if (need_clear != afs_cb_break_no_break) {
write_seqlock(&vnode->cb_lock);
if (cb_break == vnode->cb_break)
- __afs_break_callback(vnode);
+ __afs_break_callback(vnode, need_clear);
+ else
+ trace_afs_cb_miss(&vnode->fid, need_clear);
write_sequnlock(&vnode->cb_lock);
valid = false;
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 7ee63526c6a2..be37fafbaeb5 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -514,6 +514,7 @@ struct afs_server {
atomic_t usage;
u32 addr_version; /* Address list version */
u32 cm_epoch; /* Server RxRPC epoch */
+ unsigned int debug_id; /* Debugging ID for traces */
/* file service access */
rwlock_t fs_lock; /* access lock */
@@ -844,9 +845,9 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def;
* callback.c
*/
extern void afs_init_callback_state(struct afs_server *);
-extern void __afs_break_callback(struct afs_vnode *);
-extern void afs_break_callback(struct afs_vnode *);
-extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*);
+extern void __afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason);
+extern void afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason);
+extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break *);
extern int afs_register_server_cb_interest(struct afs_vnode *,
struct afs_server_list *, unsigned int);
@@ -1240,17 +1241,12 @@ extern void __exit afs_clean_up_permit_cache(void);
*/
extern spinlock_t afs_server_peer_lock;
-static inline struct afs_server *afs_get_server(struct afs_server *server)
-{
- atomic_inc(&server->usage);
- return server;
-}
-
extern struct afs_server *afs_find_server(struct afs_net *,
const struct sockaddr_rxrpc *);
extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const uuid_t *);
extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *);
-extern void afs_put_server(struct afs_net *, struct afs_server *);
+extern struct afs_server *afs_get_server(struct afs_server *, enum afs_server_trace);
+extern void afs_put_server(struct afs_net *, struct afs_server *, enum afs_server_trace);
extern void afs_manage_servers(struct work_struct *);
extern void afs_servers_timer(struct timer_list *);
extern void __net_exit afs_purge_servers(struct afs_net *);
@@ -1434,7 +1430,7 @@ static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc,
{
if (fc->ac.error == -ENOENT) {
set_bit(AFS_VNODE_DELETED, &vnode->flags);
- afs_break_callback(vnode);
+ afs_break_callback(vnode, afs_cb_break_for_deleted);
}
}
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 5497ab38f585..52b19e9c1535 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -10,6 +10,7 @@
#include <linux/errno.h>
#include "internal.h"
#include "afs_fs.h"
+#include "protocol_uae.h"
/*
* convert an AFS abort code to a Linux error number
@@ -65,34 +66,25 @@ int afs_abort_to_error(u32 abort_code)
case AFSVL_PERM: return -EACCES;
case AFSVL_NOMEM: return -EREMOTEIO;
- /* Unified AFS error table; ET "uae" == 0x2f6df00 */
- case 0x2f6df00: return -EPERM;
- case 0x2f6df01: return -ENOENT;
- case 0x2f6df04: return -EIO;
- case 0x2f6df0a: return -EAGAIN;
- case 0x2f6df0b: return -ENOMEM;
- case 0x2f6df0c: return -EACCES;
- case 0x2f6df0f: return -EBUSY;
- case 0x2f6df10: return -EEXIST;
- case 0x2f6df11: return -EXDEV;
- case 0x2f6df12: return -ENODEV;
- case 0x2f6df13: return -ENOTDIR;
- case 0x2f6df14: return -EISDIR;
- case 0x2f6df15: return -EINVAL;
- case 0x2f6df1a: return -EFBIG;
- case 0x2f6df1b: return -ENOSPC;
- case 0x2f6df1d: return -EROFS;
- case 0x2f6df1e: return -EMLINK;
- case 0x2f6df20: return -EDOM;
- case 0x2f6df21: return -ERANGE;
- case 0x2f6df22: return -EDEADLK;
- case 0x2f6df23: return -ENAMETOOLONG;
- case 0x2f6df24: return -ENOLCK;
- case 0x2f6df26: return -ENOTEMPTY;
- case 0x2f6df28: return -EWOULDBLOCK;
- case 0x2f6df69: return -ENOTCONN;
- case 0x2f6df6c: return -ETIMEDOUT;
- case 0x2f6df78: return -EDQUOT;
+ /* Unified AFS error table */
+ case UAEPERM: return -EPERM;
+ case UAENOENT: return -ENOENT;
+ case UAEACCES: return -EACCES;
+ case UAEBUSY: return -EBUSY;
+ case UAEEXIST: return -EEXIST;
+ case UAENOTDIR: return -ENOTDIR;
+ case UAEISDIR: return -EISDIR;
+ case UAEFBIG: return -EFBIG;
+ case UAENOSPC: return -ENOSPC;
+ case UAEROFS: return -EROFS;
+ case UAEMLINK: return -EMLINK;
+ case UAEDEADLK: return -EDEADLK;
+ case UAENAMETOOLONG: return -ENAMETOOLONG;
+ case UAENOLCK: return -ENOLCK;
+ case UAENOTEMPTY: return -ENOTEMPTY;
+ case UAELOOP: return -ELOOP;
+ case UAENOMEDIUM: return -ENOMEDIUM;
+ case UAEDQUOT: return -EDQUOT;
/* RXKAD abort codes; from include/rxrpc/packet.h. ET "RXK" == 0x1260B00 */
case RXKADINCONSISTENCY: return -EPROTO;
diff --git a/fs/afs/protocol_uae.h b/fs/afs/protocol_uae.h
new file mode 100644
index 000000000000..1b3d1060bd34
--- /dev/null
+++ b/fs/afs/protocol_uae.h
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Universal AFS Error codes (UAE).
+ *
+ * Copyright (C) 2003, Daria Phoebe Brashear
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ */
+
+enum {
+ UAEPERM = 0x2f6df00, /* Operation not permitted */
+ UAENOENT = 0x2f6df01, /* No such file or directory */
+ UAESRCH = 0x2f6df02, /* No such process */
+ UAEINTR = 0x2f6df03, /* Interrupted system call */
+ UAEIO = 0x2f6df04, /* I/O error */
+ UAENXIO = 0x2f6df05, /* No such device or address */
+ UAE2BIG = 0x2f6df06, /* Arg list too long */
+ UAENOEXEC = 0x2f6df07, /* Exec format error */
+ UAEBADF = 0x2f6df08, /* Bad file number */
+ UAECHILD = 0x2f6df09, /* No child processes */
+ UAEAGAIN = 0x2f6df0a, /* Try again */
+ UAENOMEM = 0x2f6df0b, /* Out of memory */
+ UAEACCES = 0x2f6df0c, /* Permission denied */
+ UAEFAULT = 0x2f6df0d, /* Bad address */
+ UAENOTBLK = 0x2f6df0e, /* Block device required */
+ UAEBUSY = 0x2f6df0f, /* Device or resource busy */
+ UAEEXIST = 0x2f6df10, /* File exists */
+ UAEXDEV = 0x2f6df11, /* Cross-device link */
+ UAENODEV = 0x2f6df12, /* No such device */
+ UAENOTDIR = 0x2f6df13, /* Not a directory */
+ UAEISDIR = 0x2f6df14, /* Is a directory */
+ UAEINVAL = 0x2f6df15, /* Invalid argument */
+ UAENFILE = 0x2f6df16, /* File table overflow */
+ UAEMFILE = 0x2f6df17, /* Too many open files */
+ UAENOTTY = 0x2f6df18, /* Not a typewriter */
+ UAETXTBSY = 0x2f6df19, /* Text file busy */
+ UAEFBIG = 0x2f6df1a, /* File too large */
+ UAENOSPC = 0x2f6df1b, /* No space left on device */
+ UAESPIPE = 0x2f6df1c, /* Illegal seek */
+ UAEROFS = 0x2f6df1d, /* Read-only file system */
+ UAEMLINK = 0x2f6df1e, /* Too many links */
+ UAEPIPE = 0x2f6df1f, /* Broken pipe */
+ UAEDOM = 0x2f6df20, /* Math argument out of domain of func */
+ UAERANGE = 0x2f6df21, /* Math result not representable */
+ UAEDEADLK = 0x2f6df22, /* Resource deadlock would occur */
+ UAENAMETOOLONG = 0x2f6df23, /* File name too long */
+ UAENOLCK = 0x2f6df24, /* No record locks available */
+ UAENOSYS = 0x2f6df25, /* Function not implemented */
+ UAENOTEMPTY = 0x2f6df26, /* Directory not empty */
+ UAELOOP = 0x2f6df27, /* Too many symbolic links encountered */
+ UAEWOULDBLOCK = 0x2f6df28, /* Operation would block */
+ UAENOMSG = 0x2f6df29, /* No message of desired type */
+ UAEIDRM = 0x2f6df2a, /* Identifier removed */
+ UAECHRNG = 0x2f6df2b, /* Channel number out of range */
+ UAEL2NSYNC = 0x2f6df2c, /* Level 2 not synchronized */
+ UAEL3HLT = 0x2f6df2d, /* Level 3 halted */
+ UAEL3RST = 0x2f6df2e, /* Level 3 reset */
+ UAELNRNG = 0x2f6df2f, /* Link number out of range */
+ UAEUNATCH = 0x2f6df30, /* Protocol driver not attached */
+ UAENOCSI = 0x2f6df31, /* No CSI structure available */
+ UAEL2HLT = 0x2f6df32, /* Level 2 halted */
+ UAEBADE = 0x2f6df33, /* Invalid exchange */
+ UAEBADR = 0x2f6df34, /* Invalid request descriptor */
+ UAEXFULL = 0x2f6df35, /* Exchange full */
+ UAENOANO = 0x2f6df36, /* No anode */
+ UAEBADRQC = 0x2f6df37, /* Invalid request code */
+ UAEBADSLT = 0x2f6df38, /* Invalid slot */
+ UAEBFONT = 0x2f6df39, /* Bad font file format */
+ UAENOSTR = 0x2f6df3a, /* Device not a stream */
+ UAENODATA = 0x2f6df3b, /* No data available */
+ UAETIME = 0x2f6df3c, /* Timer expired */
+ UAENOSR = 0x2f6df3d, /* Out of streams resources */
+ UAENONET = 0x2f6df3e, /* Machine is not on the network */
+ UAENOPKG = 0x2f6df3f, /* Package not installed */
+ UAEREMOTE = 0x2f6df40, /* Object is remote */
+ UAENOLINK = 0x2f6df41, /* Link has been severed */
+ UAEADV = 0x2f6df42, /* Advertise error */
+ UAESRMNT = 0x2f6df43, /* Srmount error */
+ UAECOMM = 0x2f6df44, /* Communication error on send */
+ UAEPROTO = 0x2f6df45, /* Protocol error */
+ UAEMULTIHOP = 0x2f6df46, /* Multihop attempted */
+ UAEDOTDOT = 0x2f6df47, /* RFS specific error */
+ UAEBADMSG = 0x2f6df48, /* Not a data message */
+ UAEOVERFLOW = 0x2f6df49, /* Value too large for defined data type */
+ UAENOTUNIQ = 0x2f6df4a, /* Name not unique on network */
+ UAEBADFD = 0x2f6df4b, /* File descriptor in bad state */
+ UAEREMCHG = 0x2f6df4c, /* Remote address changed */
+ UAELIBACC = 0x2f6df4d, /* Can not access a needed shared library */
+ UAELIBBAD = 0x2f6df4e, /* Accessing a corrupted shared library */
+ UAELIBSCN = 0x2f6df4f, /* .lib section in a.out corrupted */
+ UAELIBMAX = 0x2f6df50, /* Attempting to link in too many shared libraries */
+ UAELIBEXEC = 0x2f6df51, /* Cannot exec a shared library directly */
+ UAEILSEQ = 0x2f6df52, /* Illegal byte sequence */
+ UAERESTART = 0x2f6df53, /* Interrupted system call should be restarted */
+ UAESTRPIPE = 0x2f6df54, /* Streams pipe error */
+ UAEUSERS = 0x2f6df55, /* Too many users */
+ UAENOTSOCK = 0x2f6df56, /* Socket operation on non-socket */
+ UAEDESTADDRREQ = 0x2f6df57, /* Destination address required */
+ UAEMSGSIZE = 0x2f6df58, /* Message too long */
+ UAEPROTOTYPE = 0x2f6df59, /* Protocol wrong type for socket */
+ UAENOPROTOOPT = 0x2f6df5a, /* Protocol not available */
+ UAEPROTONOSUPPORT = 0x2f6df5b, /* Protocol not supported */
+ UAESOCKTNOSUPPORT = 0x2f6df5c, /* Socket type not supported */
+ UAEOPNOTSUPP = 0x2f6df5d, /* Operation not supported on transport endpoint */
+ UAEPFNOSUPPORT = 0x2f6df5e, /* Protocol family not supported */
+ UAEAFNOSUPPORT = 0x2f6df5f, /* Address family not supported by protocol */
+ UAEADDRINUSE = 0x2f6df60, /* Address already in use */
+ UAEADDRNOTAVAIL = 0x2f6df61, /* Cannot assign requested address */
+ UAENETDOWN = 0x2f6df62, /* Network is down */
+ UAENETUNREACH = 0x2f6df63, /* Network is unreachable */
+ UAENETRESET = 0x2f6df64, /* Network dropped connection because of reset */
+ UAECONNABORTED = 0x2f6df65, /* Software caused connection abort */
+ UAECONNRESET = 0x2f6df66, /* Connection reset by peer */
+ UAENOBUFS = 0x2f6df67, /* No buffer space available */
+ UAEISCONN = 0x2f6df68, /* Transport endpoint is already connected */
+ UAENOTCONN = 0x2f6df69, /* Transport endpoint is not connected */
+ UAESHUTDOWN = 0x2f6df6a, /* Cannot send after transport endpoint shutdown */
+ UAETOOMANYREFS = 0x2f6df6b, /* Too many references: cannot splice */
+ UAETIMEDOUT = 0x2f6df6c, /* Connection timed out */
+ UAECONNREFUSED = 0x2f6df6d, /* Connection refused */
+ UAEHOSTDOWN = 0x2f6df6e, /* Host is down */
+ UAEHOSTUNREACH = 0x2f6df6f, /* No route to host */
+ UAEALREADY = 0x2f6df70, /* Operation already in progress */
+ UAEINPROGRESS = 0x2f6df71, /* Operation now in progress */
+ UAESTALE = 0x2f6df72, /* Stale NFS file handle */
+ UAEUCLEAN = 0x2f6df73, /* Structure needs cleaning */
+ UAENOTNAM = 0x2f6df74, /* Not a XENIX named type file */
+ UAENAVAIL = 0x2f6df75, /* No XENIX semaphores available */
+ UAEISNAM = 0x2f6df76, /* Is a named type file */
+ UAEREMOTEIO = 0x2f6df77, /* Remote I/O error */
+ UAEDQUOT = 0x2f6df78, /* Quota exceeded */
+ UAENOMEDIUM = 0x2f6df79, /* No medium found */
+ UAEMEDIUMTYPE = 0x2f6df7a, /* Wrong medium type */
+};
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index d1dde2834b6d..0e5269374ac1 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -184,7 +184,7 @@ void afs_put_call(struct afs_call *call)
if (call->type->destructor)
call->type->destructor(call);
- afs_put_server(call->net, call->server);
+ afs_put_server(call->net, call->server, afs_server_trace_put_call);
afs_put_cb_interest(call->net, call->cbi);
afs_put_addrlist(call->alist);
kfree(call->request);
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 8866703b2e6c..71e71c07568f 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -28,7 +28,7 @@ struct key *afs_request_key(struct afs_cell *cell)
_debug("key %s", cell->anonymous_key->description);
key = request_key(&key_type_rxrpc, cell->anonymous_key->description,
- NULL, NULL);
+ NULL);
if (IS_ERR(key)) {
if (PTR_ERR(key) != -ENOKEY) {
_leave(" = %ld", PTR_ERR(key));
diff --git a/fs/afs/server.c b/fs/afs/server.c
index e900cd74361b..64d440aaabc0 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -13,6 +13,7 @@
static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */
static unsigned afs_server_update_delay = 30; /* Time till VLDB recheck in secs */
+static atomic_t afs_server_debug_id;
static void afs_inc_servers_outstanding(struct afs_net *net)
{
@@ -47,7 +48,7 @@ struct afs_server *afs_find_server(struct afs_net *net,
do {
if (server)
- afs_put_server(net, server);
+ afs_put_server(net, server, afs_server_trace_put_find_rsq);
server = NULL;
read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
@@ -112,7 +113,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu
* changes.
*/
if (server)
- afs_put_server(net, server);
+ afs_put_server(net, server, afs_server_trace_put_uuid_rsq);
server = NULL;
read_seqbegin_or_lock(&net->fs_lock, &seq);
@@ -127,7 +128,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu
} else if (diff > 0) {
p = p->rb_right;
} else {
- afs_get_server(server);
+ afs_get_server(server, afs_server_trace_get_by_uuid);
break;
}
@@ -198,7 +199,7 @@ static struct afs_server *afs_install_server(struct afs_net *net,
ret = 0;
exists:
- afs_get_server(server);
+ afs_get_server(server, afs_server_trace_get_install);
write_sequnlock(&net->fs_lock);
return server;
}
@@ -219,6 +220,7 @@ static struct afs_server *afs_alloc_server(struct afs_net *net,
goto enomem;
atomic_set(&server->usage, 1);
+ server->debug_id = atomic_inc_return(&afs_server_debug_id);
RCU_INIT_POINTER(server->addresses, alist);
server->addr_version = alist->version;
server->uuid = *uuid;
@@ -230,6 +232,7 @@ static struct afs_server *afs_alloc_server(struct afs_net *net,
spin_lock_init(&server->probe_lock);
afs_inc_servers_outstanding(net);
+ trace_afs_server(server, 1, afs_server_trace_alloc);
_leave(" = %p", server);
return server;
@@ -325,9 +328,22 @@ void afs_servers_timer(struct timer_list *timer)
}
/*
+ * Get a reference on a server object.
+ */
+struct afs_server *afs_get_server(struct afs_server *server,
+ enum afs_server_trace reason)
+{
+ unsigned int u = atomic_inc_return(&server->usage);
+
+ trace_afs_server(server, u, reason);
+ return server;
+}
+
+/*
* Release a reference on a server record.
*/
-void afs_put_server(struct afs_net *net, struct afs_server *server)
+void afs_put_server(struct afs_net *net, struct afs_server *server,
+ enum afs_server_trace reason)
{
unsigned int usage;
@@ -338,7 +354,7 @@ void afs_put_server(struct afs_net *net, struct afs_server *server)
usage = atomic_dec_return(&server->usage);
- _enter("{%u}", usage);
+ trace_afs_server(server, usage, reason);
if (likely(usage > 0))
return;
@@ -350,6 +366,8 @@ static void afs_server_rcu(struct rcu_head *rcu)
{
struct afs_server *server = container_of(rcu, struct afs_server, rcu);
+ trace_afs_server(server, atomic_read(&server->usage),
+ afs_server_trace_free);
afs_put_addrlist(rcu_access_pointer(server->addresses));
kfree(server);
}
@@ -365,7 +383,9 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
.index = alist->preferred,
.error = 0,
};
- _enter("%p", server);
+
+ trace_afs_server(server, atomic_read(&server->usage),
+ afs_server_trace_give_up_cb);
if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
@@ -373,6 +393,8 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
wait_var_event(&server->probe_outstanding,
atomic_read(&server->probe_outstanding) == 0);
+ trace_afs_server(server, atomic_read(&server->usage),
+ afs_server_trace_destroy);
call_rcu(&server->rcu, afs_server_rcu);
afs_dec_servers_outstanding(net);
}
@@ -392,6 +414,7 @@ static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
write_seqlock(&net->fs_lock);
usage = 1;
deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
+ trace_afs_server(server, usage, afs_server_trace_gc);
if (deleted) {
rb_erase(&server->uuid_rb, &net->fs_servers);
hlist_del_rcu(&server->proc_link);
@@ -514,6 +537,8 @@ static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct a
_enter("");
+ trace_afs_server(server, atomic_read(&server->usage), afs_server_trace_update);
+
alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
&server->uuid);
if (IS_ERR(alist)) {
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index b4988bc8e6f2..888d91d195d9 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -16,7 +16,8 @@ void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist)
if (slist && refcount_dec_and_test(&slist->usage)) {
for (i = 0; i < slist->nr_servers; i++) {
afs_put_cb_interest(net, slist->servers[i].cb_interest);
- afs_put_server(net, slist->servers[i].server);
+ afs_put_server(net, slist->servers[i].server,
+ afs_server_trace_put_slist);
}
kfree(slist);
}
@@ -67,7 +68,8 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
break;
if (j < slist->nr_servers) {
if (slist->servers[j].server == server) {
- afs_put_server(cell->net, server);
+ afs_put_server(cell->net, server,
+ afs_server_trace_put_slist_isort);
continue;
}
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 98eb7adbce91..cb76566763db 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -44,8 +44,7 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
return 0;
}
- req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *),
- GFP_KERNEL);
+ req = kzalloc(struct_size(req, array, 1), GFP_KERNEL);
if (!req)
return -ENOMEM;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index e4b59e76afb0..8c6b50f34466 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -42,6 +42,11 @@
#include <asm/unaligned.h>
#include <asm/cacheflush.h>
#include <asm/page.h>
+#include <asm/flat.h>
+
+#ifndef flat_get_relocate_addr
+#define flat_get_relocate_addr(rel) (rel)
+#endif
/****************************************************************************/
@@ -63,6 +68,12 @@
#define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */
#define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */
+#ifdef CONFIG_BINFMT_SHARED_FLAT
+#define MAX_SHARED_LIBS (4)
+#else
+#define MAX_SHARED_LIBS (1)
+#endif
+
struct lib_info {
struct {
unsigned long start_code; /* Start of text segment */
@@ -120,14 +131,15 @@ static int create_flat_tables(struct linux_binprm *bprm, unsigned long arg_start
sp -= bprm->envc + 1;
sp -= bprm->argc + 1;
- sp -= flat_argvp_envp_on_stack() ? 2 : 0;
+ if (IS_ENABLED(CONFIG_BINFMT_FLAT_ARGVP_ENVP_ON_STACK))
+ sp -= 2; /* argvp + envp */
sp -= 1; /* &argc */
current->mm->start_stack = (unsigned long)sp & -FLAT_STACK_ALIGN;
sp = (unsigned long __user *)current->mm->start_stack;
__put_user(bprm->argc, sp++);
- if (flat_argvp_envp_on_stack()) {
+ if (IS_ENABLED(CONFIG_BINFMT_FLAT_ARGVP_ENVP_ON_STACK)) {
unsigned long argv, envp;
argv = (unsigned long)(sp + 2);
envp = (unsigned long)(sp + 2 + bprm->argc + 1);
@@ -345,7 +357,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp)
start_code = p->lib_list[id].start_code;
text_len = p->lib_list[id].text_len;
- if (!flat_reloc_valid(r, start_brk - start_data + text_len)) {
+ if (r > start_brk - start_data + text_len) {
pr_err("reloc outside program 0x%lx (0 - 0x%lx/0x%lx)",
r, start_brk-start_data+text_len, text_len);
goto failed;
@@ -368,6 +380,7 @@ failed:
/****************************************************************************/
+#ifdef CONFIG_BINFMT_FLAT_OLD
static void old_reloc(unsigned long rl)
{
static const char *segment[] = { "TEXT", "DATA", "BSS", "*UNKNOWN*" };
@@ -405,6 +418,7 @@ static void old_reloc(unsigned long rl)
pr_debug("Relocation became %lx\n", val);
}
+#endif /* CONFIG_BINFMT_FLAT_OLD */
/****************************************************************************/
@@ -415,7 +429,8 @@ static int load_flat_file(struct linux_binprm *bprm,
unsigned long textpos, datapos, realdatastart;
u32 text_len, data_len, bss_len, stack_len, full_data, flags;
unsigned long len, memp, memp_size, extra, rlim;
- u32 __user *reloc, *rp;
+ __be32 __user *reloc;
+ u32 __user *rp;
struct inode *inode;
int i, rev, relocs;
loff_t fpos;
@@ -454,6 +469,7 @@ static int load_flat_file(struct linux_binprm *bprm,
if (flags & FLAT_FLAG_KTRACE)
pr_info("Loading file: %s\n", bprm->filename);
+#ifdef CONFIG_BINFMT_FLAT_OLD
if (rev != FLAT_VERSION && rev != OLD_FLAT_VERSION) {
pr_err("bad flat file version 0x%x (supported 0x%lx and 0x%lx)\n",
rev, FLAT_VERSION, OLD_FLAT_VERSION);
@@ -470,6 +486,23 @@ static int load_flat_file(struct linux_binprm *bprm,
}
/*
+ * fix up the flags for the older format, there were all kinds
+ * of endian hacks, this only works for the simple cases
+ */
+ if (rev == OLD_FLAT_VERSION &&
+ (flags || IS_ENABLED(CONFIG_BINFMT_FLAT_OLD_ALWAYS_RAM)))
+ flags = FLAT_FLAG_RAM;
+
+#else /* CONFIG_BINFMT_FLAT_OLD */
+ if (rev != FLAT_VERSION) {
+ pr_err("bad flat file version 0x%x (supported 0x%lx)\n",
+ rev, FLAT_VERSION);
+ ret = -ENOEXEC;
+ goto err;
+ }
+#endif /* !CONFIG_BINFMT_FLAT_OLD */
+
+ /*
* Make sure the header params are sane.
* 28 bits (256 MB) is way more than reasonable in this case.
* If some top bits are set we have probable binary corruption.
@@ -480,13 +513,6 @@ static int load_flat_file(struct linux_binprm *bprm,
goto err;
}
- /*
- * fix up the flags for the older format, there were all kinds
- * of endian hacks, this only works for the simple cases
- */
- if (rev == OLD_FLAT_VERSION && flat_old_ram_flag(flags))
- flags = FLAT_FLAG_RAM;
-
#ifndef CONFIG_BINFMT_ZFLAT
if (flags & (FLAT_FLAG_GZIP|FLAT_FLAG_GZDATA)) {
pr_err("Support for ZFLAT executables is not enabled.\n");
@@ -547,7 +573,7 @@ static int load_flat_file(struct linux_binprm *bprm,
goto err;
}
- len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
+ len = data_len + extra;
len = PAGE_ALIGN(len);
realdatastart = vm_mmap(NULL, 0, len,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
@@ -561,9 +587,7 @@ static int load_flat_file(struct linux_binprm *bprm,
vm_munmap(textpos, text_len);
goto err;
}
- datapos = ALIGN(realdatastart +
- MAX_SHARED_LIBS * sizeof(unsigned long),
- FLAT_DATA_ALIGN);
+ datapos = ALIGN(realdatastart, FLAT_DATA_ALIGN);
pr_debug("Allocated data+bss+stack (%u bytes): %lx\n",
data_len + bss_len + stack_len, datapos);
@@ -587,13 +611,13 @@ static int load_flat_file(struct linux_binprm *bprm,
goto err;
}
- reloc = (u32 __user *)
+ reloc = (__be32 __user *)
(datapos + (ntohl(hdr->reloc_start) - text_len));
memp = realdatastart;
memp_size = len;
} else {
- len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(u32);
+ len = text_len + data_len + extra;
len = PAGE_ALIGN(len);
textpos = vm_mmap(NULL, 0, len,
PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
@@ -608,11 +632,9 @@ static int load_flat_file(struct linux_binprm *bprm,
}
realdatastart = textpos + ntohl(hdr->data_start);
- datapos = ALIGN(realdatastart +
- MAX_SHARED_LIBS * sizeof(u32),
- FLAT_DATA_ALIGN);
+ datapos = ALIGN(realdatastart, FLAT_DATA_ALIGN);
- reloc = (u32 __user *)
+ reloc = (__be32 __user *)
(datapos + (ntohl(hdr->reloc_start) - text_len));
memp = textpos;
memp_size = len;
@@ -627,8 +649,9 @@ static int load_flat_file(struct linux_binprm *bprm,
(text_len + full_data
- sizeof(struct flat_hdr)),
0);
- memmove((void *) datapos, (void *) realdatastart,
- full_data);
+ if (datapos != realdatastart)
+ memmove((void *)datapos, (void *)realdatastart,
+ full_data);
#else
/*
* This is used on MMU systems mainly for testing.
@@ -684,8 +707,7 @@ static int load_flat_file(struct linux_binprm *bprm,
if (IS_ERR_VALUE(result)) {
ret = result;
pr_err("Unable to read code+data+bss, errno %d\n", ret);
- vm_munmap(textpos, text_len + data_len + extra +
- MAX_SHARED_LIBS * sizeof(u32));
+ vm_munmap(textpos, text_len + data_len + extra);
goto err;
}
}
@@ -775,20 +797,18 @@ static int load_flat_file(struct linux_binprm *bprm,
* __start to address 4 so that is okay).
*/
if (rev > OLD_FLAT_VERSION) {
- u32 __maybe_unused persistent = 0;
for (i = 0; i < relocs; i++) {
u32 addr, relval;
+ __be32 tmp;
/*
* Get the address of the pointer to be
* relocated (of course, the address has to be
* relocated first).
*/
- if (get_user(relval, reloc + i))
+ if (get_user(tmp, reloc + i))
return -EFAULT;
- relval = ntohl(relval);
- if (flat_set_persistent(relval, &persistent))
- continue;
+ relval = ntohl(tmp);
addr = flat_get_relocate_addr(relval);
rp = (u32 __user *)calc_reloc(addr, libinfo, id, 1);
if (rp == (u32 __user *)RELOC_FAILED) {
@@ -797,8 +817,7 @@ static int load_flat_file(struct linux_binprm *bprm,
}
/* Get the pointer's value. */
- ret = flat_get_addr_from_rp(rp, relval, flags,
- &addr, &persistent);
+ ret = flat_get_addr_from_rp(rp, relval, flags, &addr);
if (unlikely(ret))
goto err;
@@ -807,8 +826,13 @@ static int load_flat_file(struct linux_binprm *bprm,
* Do the relocation. PIC relocs in the data section are
* already in target order
*/
- if ((flags & FLAT_FLAG_GOTPIC) == 0)
- addr = ntohl(addr);
+ if ((flags & FLAT_FLAG_GOTPIC) == 0) {
+ /*
+ * Meh, the same value can have a different
+ * byte order based on a flag..
+ */
+ addr = ntohl((__force __be32)addr);
+ }
addr = calc_reloc(addr, libinfo, id, 0);
if (addr == RELOC_FAILED) {
ret = -ENOEXEC;
@@ -821,14 +845,15 @@ static int load_flat_file(struct linux_binprm *bprm,
goto err;
}
}
+#ifdef CONFIG_BINFMT_FLAT_OLD
} else {
for (i = 0; i < relocs; i++) {
- u32 relval;
+ __be32 relval;
if (get_user(relval, reloc + i))
return -EFAULT;
- relval = ntohl(relval);
- old_reloc(relval);
+ old_reloc(ntohl(relval));
}
+#endif /* CONFIG_BINFMT_FLAT_OLD */
}
flush_icache_range(start_code, end_code);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2a1be0d1a698..56ae2f659b6d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2928,8 +2928,10 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
inode_lock(inode);
err = btrfs_delete_subvolume(dir, dentry);
inode_unlock(inode);
- if (!err)
+ if (!err) {
+ fsnotify_rmdir(dir, dentry);
d_delete(dentry);
+ }
out_dput:
dput(dentry);
diff --git a/fs/buffer.c b/fs/buffer.c
index e450c55f6434..49a871570092 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2086,38 +2086,6 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
}
EXPORT_SYMBOL(block_write_begin);
-void __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
- struct page *page)
-{
- loff_t old_size = inode->i_size;
- bool i_size_changed = false;
-
- /*
- * No need to use i_size_read() here, the i_size cannot change under us
- * because we hold i_rwsem.
- *
- * But it's important to update i_size while still holding page lock:
- * page writeout could otherwise come in and zero beyond i_size.
- */
- if (pos + copied > inode->i_size) {
- i_size_write(inode, pos + copied);
- i_size_changed = true;
- }
-
- unlock_page(page);
-
- if (old_size < pos)
- pagecache_isize_extended(inode, old_size, pos);
- /*
- * Don't mark the inode dirty under page lock. First, it unnecessarily
- * makes the holding time of page lock longer. Second, it forces lock
- * ordering of page lock and transaction start for journaling
- * filesystems.
- */
- if (i_size_changed)
- mark_inode_dirty(inode);
-}
-
int block_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
@@ -2158,9 +2126,37 @@ int generic_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
+ struct inode *inode = mapping->host;
+ loff_t old_size = inode->i_size;
+ bool i_size_changed = false;
+
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
- __generic_write_end(mapping->host, pos, copied, page);
+
+ /*
+ * No need to use i_size_read() here, the i_size cannot change under us
+ * because we hold i_rwsem.
+ *
+ * But it's important to update i_size while still holding page lock:
+ * page writeout could otherwise come in and zero beyond i_size.
+ */
+ if (pos + copied > inode->i_size) {
+ i_size_write(inode, pos + copied);
+ i_size_changed = true;
+ }
+
+ unlock_page(page);
put_page(page);
+
+ if (old_size < pos)
+ pagecache_isize_extended(inode, old_size, pos);
+ /*
+ * Don't mark the inode dirty under page lock. First, it unnecessarily
+ * makes the holding time of page lock longer. Second, it forces lock
+ * ordering of page lock and transaction start for journaling
+ * filesystems.
+ */
+ if (i_size_changed)
+ mark_inode_dirty(inode);
return copied;
}
EXPORT_SYMBOL(generic_write_end);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 183c37c0a8fc..c5517ffeb11c 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1889,9 +1889,9 @@ static int is_file_size_ok(struct inode *src_inode, struct inode *dst_inode,
return 0;
}
-static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off,
- struct file *dst_file, loff_t dst_off,
- size_t len, unsigned int flags)
+static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off,
+ size_t len, unsigned int flags)
{
struct inode *src_inode = file_inode(src_file);
struct inode *dst_inode = file_inode(dst_file);
@@ -1909,6 +1909,8 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off,
if (src_inode == dst_inode)
return -EINVAL;
+ if (src_inode->i_sb != dst_inode->i_sb)
+ return -EXDEV;
if (ceph_snap(dst_inode) != CEPH_NOSNAP)
return -EROFS;
@@ -2100,6 +2102,21 @@ out:
return ret;
}
+static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off,
+ size_t len, unsigned int flags)
+{
+ ssize_t ret;
+
+ ret = __ceph_copy_file_range(src_file, src_off, dst_file, dst_off,
+ len, flags);
+
+ if (ret == -EOPNOTSUPP || ret == -EXDEV)
+ ret = generic_copy_file_range(src_file, src_off, dst_file,
+ dst_off, len, flags);
+ return ret;
+}
+
const struct file_operations ceph_file_fops = {
.open = ceph_open,
.release = ceph_release,
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index d1b439ad0f1a..7f01c6e60791 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -32,25 +32,6 @@
#include "cifsproto.h"
static const struct cred *spnego_cred;
-static struct key_acl cifs_spnego_key_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .possessor_viewable = true,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_VIEW | KEY_ACE_SEARCH | KEY_ACE_READ),
- KEY_OWNER_ACE(KEY_ACE_VIEW),
- }
-};
-
-static struct key_acl cifs_spnego_keyring_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_WRITE),
- KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_CLEAR),
- }
-};
-
/* create a new cifs key */
static int
cifs_spnego_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
@@ -189,8 +170,7 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo)
cifs_dbg(FYI, "key description = %s\n", description);
saved_cred = override_creds(spnego_cred);
- spnego_key = request_key(&cifs_spnego_key_type, description, "",
- &cifs_spnego_key_acl);
+ spnego_key = request_key(&cifs_spnego_key_type, description, "");
revert_creds(saved_cred);
#ifdef CONFIG_CIFS_DEBUG2
@@ -227,7 +207,8 @@ init_cifs_spnego(void)
keyring = keyring_alloc(".cifs_spnego",
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
- &cifs_spnego_keyring_acl,
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ,
KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
if (IS_ERR(keyring)) {
ret = PTR_ERR(keyring);
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 78eed72f3af0..1d377b7f2860 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -33,25 +33,6 @@
#include "cifsproto.h"
#include "cifs_debug.h"
-static struct key_acl cifs_idmap_key_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .possessor_viewable = true,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_VIEW | KEY_ACE_SEARCH | KEY_ACE_READ),
- KEY_OWNER_ACE(KEY_ACE_VIEW),
- }
-};
-
-static struct key_acl cifs_idmap_keyring_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_WRITE),
- KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ),
- }
-};
-
/* security id for everyone/world system group */
static const struct cifs_sid sid_everyone = {
1, 1, {0, 0, 0, 0, 0, 1}, {0} };
@@ -317,8 +298,7 @@ id_to_sid(unsigned int cid, uint sidtype, struct cifs_sid *ssid)
rc = 0;
saved_cred = override_creds(root_cred);
- sidkey = request_key(&cifs_idmap_key_type, desc, "",
- &cifs_idmap_key_acl);
+ sidkey = request_key(&cifs_idmap_key_type, desc, "");
if (IS_ERR(sidkey)) {
rc = -EINVAL;
cifs_dbg(FYI, "%s: Can't map %cid %u to a SID\n",
@@ -423,8 +403,7 @@ try_upcall_to_get_id:
return -ENOMEM;
saved_cred = override_creds(root_cred);
- sidkey = request_key(&cifs_idmap_key_type, sidstr, "",
- &cifs_idmap_key_acl);
+ sidkey = request_key(&cifs_idmap_key_type, sidstr, "");
if (IS_ERR(sidkey)) {
rc = -EINVAL;
cifs_dbg(FYI, "%s: Can't map SID %s to a %cid\n",
@@ -502,7 +481,8 @@ init_cifs_idmap(void)
keyring = keyring_alloc(".cifs_idmap",
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
- &cifs_idmap_keyring_acl,
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ,
KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
if (IS_ERR(keyring)) {
ret = PTR_ERR(keyring);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 72db1c89bf5a..24635b65effa 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1149,6 +1149,10 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off,
rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff,
len, flags);
free_xid(xid);
+
+ if (rc == -EOPNOTSUPP || rc == -EXDEV)
+ rc = generic_copy_file_range(src_file, off, dst_file,
+ destoff, len, flags);
return rc;
}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index ae6bae2ecb5d..714a359c7c8d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2992,7 +2992,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
}
cifs_dbg(FYI, "%s: desc=%s\n", __func__, desc);
- key = request_key(&key_type_logon, desc, "", NULL);
+ key = request_key(&key_type_logon, desc, "");
if (IS_ERR(key)) {
if (!ses->domainName) {
cifs_dbg(FYI, "domainName is NULL\n");
@@ -3003,7 +3003,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
/* didn't work, try to find a domain key */
sprintf(desc, "cifs:d:%s", ses->domainName);
cifs_dbg(FYI, "%s: desc=%s\n", __func__, desc);
- key = request_key(&key_type_logon, desc, "", NULL);
+ key = request_key(&key_type_logon, desc, "");
if (IS_ERR(key)) {
rc = PTR_ERR(key);
goto out_err;
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index d2ca5287762d..92112915de8e 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -13,6 +13,7 @@
#undef DEBUG
#include <linux/fs.h>
+#include <linux/fsnotify.h>
#include <linux/mount.h>
#include <linux/module.h>
#include <linux/slab.h>
@@ -1788,6 +1789,7 @@ void configfs_unregister_group(struct config_group *group)
configfs_detach_group(&group->cg_item);
d_inode(dentry)->i_flags |= S_DEAD;
dont_mount(dentry);
+ fsnotify_rmdir(d_inode(parent), dentry);
d_delete(dentry);
inode_unlock(d_inode(parent));
@@ -1916,6 +1918,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
configfs_detach_group(&group->cg_item);
d_inode(dentry)->i_flags |= S_DEAD;
dont_mount(dentry);
+ fsnotify_rmdir(d_inode(root), dentry);
inode_unlock(d_inode(dentry));
d_delete(dentry);
diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig
index 24ed99e2eca0..5fdf24877c17 100644
--- a/fs/crypto/Kconfig
+++ b/fs/crypto/Kconfig
@@ -7,7 +7,6 @@ config FS_ENCRYPTION
select CRYPTO_ECB
select CRYPTO_XTS
select CRYPTO_CTS
- select CRYPTO_SHA256
select KEYS
help
Enable encryption of files and directories. This
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index b46021ebde85..82da2510721f 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -33,9 +33,8 @@ static void __fscrypt_decrypt_bio(struct bio *bio, bool done)
bio_for_each_segment_all(bv, bio, iter_all) {
struct page *page = bv->bv_page;
- int ret = fscrypt_decrypt_page(page->mapping->host, page,
- PAGE_SIZE, 0, page->index);
-
+ int ret = fscrypt_decrypt_pagecache_blocks(page, bv->bv_len,
+ bv->bv_offset);
if (ret)
SetPageError(page);
else if (done)
@@ -53,9 +52,8 @@ EXPORT_SYMBOL(fscrypt_decrypt_bio);
static void completion_pages(struct work_struct *work)
{
- struct fscrypt_ctx *ctx =
- container_of(work, struct fscrypt_ctx, r.work);
- struct bio *bio = ctx->r.bio;
+ struct fscrypt_ctx *ctx = container_of(work, struct fscrypt_ctx, work);
+ struct bio *bio = ctx->bio;
__fscrypt_decrypt_bio(bio, true);
fscrypt_release_ctx(ctx);
@@ -64,57 +62,29 @@ static void completion_pages(struct work_struct *work)
void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, struct bio *bio)
{
- INIT_WORK(&ctx->r.work, completion_pages);
- ctx->r.bio = bio;
- fscrypt_enqueue_decrypt_work(&ctx->r.work);
+ INIT_WORK(&ctx->work, completion_pages);
+ ctx->bio = bio;
+ fscrypt_enqueue_decrypt_work(&ctx->work);
}
EXPORT_SYMBOL(fscrypt_enqueue_decrypt_bio);
-void fscrypt_pullback_bio_page(struct page **page, bool restore)
-{
- struct fscrypt_ctx *ctx;
- struct page *bounce_page;
-
- /* The bounce data pages are unmapped. */
- if ((*page)->mapping)
- return;
-
- /* The bounce data page is unmapped. */
- bounce_page = *page;
- ctx = (struct fscrypt_ctx *)page_private(bounce_page);
-
- /* restore control page */
- *page = ctx->w.control_page;
-
- if (restore)
- fscrypt_restore_control_page(bounce_page);
-}
-EXPORT_SYMBOL(fscrypt_pullback_bio_page);
-
int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
sector_t pblk, unsigned int len)
{
- struct fscrypt_ctx *ctx;
- struct page *ciphertext_page = NULL;
+ const unsigned int blockbits = inode->i_blkbits;
+ const unsigned int blocksize = 1 << blockbits;
+ struct page *ciphertext_page;
struct bio *bio;
int ret, err = 0;
- BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE);
-
- ctx = fscrypt_get_ctx(GFP_NOFS);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- ciphertext_page = fscrypt_alloc_bounce_page(ctx, GFP_NOWAIT);
- if (IS_ERR(ciphertext_page)) {
- err = PTR_ERR(ciphertext_page);
- goto errout;
- }
+ ciphertext_page = fscrypt_alloc_bounce_page(GFP_NOWAIT);
+ if (!ciphertext_page)
+ return -ENOMEM;
while (len--) {
- err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk,
- ZERO_PAGE(0), ciphertext_page,
- PAGE_SIZE, 0, GFP_NOFS);
+ err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk,
+ ZERO_PAGE(0), ciphertext_page,
+ blocksize, 0, GFP_NOFS);
if (err)
goto errout;
@@ -124,14 +94,11 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
goto errout;
}
bio_set_dev(bio, inode->i_sb->s_bdev);
- bio->bi_iter.bi_sector =
- pblk << (inode->i_sb->s_blocksize_bits - 9);
+ bio->bi_iter.bi_sector = pblk << (blockbits - 9);
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- ret = bio_add_page(bio, ciphertext_page,
- inode->i_sb->s_blocksize, 0);
- if (ret != inode->i_sb->s_blocksize) {
+ ret = bio_add_page(bio, ciphertext_page, blocksize, 0);
+ if (WARN_ON(ret != blocksize)) {
/* should never happen! */
- WARN_ON(1);
bio_put(bio);
err = -EIO;
goto errout;
@@ -147,7 +114,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
}
err = 0;
errout:
- fscrypt_release_ctx(ctx);
+ fscrypt_free_bounce_page(ciphertext_page);
return err;
}
EXPORT_SYMBOL(fscrypt_zeroout_range);
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 335a362ee446..45c3d0427fb2 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -59,23 +59,16 @@ void fscrypt_enqueue_decrypt_work(struct work_struct *work)
EXPORT_SYMBOL(fscrypt_enqueue_decrypt_work);
/**
- * fscrypt_release_ctx() - Releases an encryption context
- * @ctx: The encryption context to release.
+ * fscrypt_release_ctx() - Release a decryption context
+ * @ctx: The decryption context to release.
*
- * If the encryption context was allocated from the pre-allocated pool, returns
- * it to that pool. Else, frees it.
- *
- * If there's a bounce page in the context, this frees that.
+ * If the decryption context was allocated from the pre-allocated pool, return
+ * it to that pool. Else, free it.
*/
void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
{
unsigned long flags;
- if (ctx->flags & FS_CTX_HAS_BOUNCE_BUFFER_FL && ctx->w.bounce_page) {
- mempool_free(ctx->w.bounce_page, fscrypt_bounce_page_pool);
- ctx->w.bounce_page = NULL;
- }
- ctx->w.control_page = NULL;
if (ctx->flags & FS_CTX_REQUIRES_FREE_ENCRYPT_FL) {
kmem_cache_free(fscrypt_ctx_cachep, ctx);
} else {
@@ -87,12 +80,12 @@ void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
EXPORT_SYMBOL(fscrypt_release_ctx);
/**
- * fscrypt_get_ctx() - Gets an encryption context
+ * fscrypt_get_ctx() - Get a decryption context
* @gfp_flags: The gfp flag for memory allocation
*
- * Allocates and initializes an encryption context.
+ * Allocate and initialize a decryption context.
*
- * Return: A new encryption context on success; an ERR_PTR() otherwise.
+ * Return: A new decryption context on success; an ERR_PTR() otherwise.
*/
struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags)
{
@@ -100,14 +93,8 @@ struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags)
unsigned long flags;
/*
- * We first try getting the ctx from a free list because in
- * the common case the ctx will have an allocated and
- * initialized crypto tfm, so it's probably a worthwhile
- * optimization. For the bounce page, we first try getting it
- * from the kernel allocator because that's just about as fast
- * as getting it from a list and because a cache of free pages
- * should generally be a "last resort" option for a filesystem
- * to be able to do its job.
+ * First try getting a ctx from the free list so that we don't have to
+ * call into the slab allocator.
*/
spin_lock_irqsave(&fscrypt_ctx_lock, flags);
ctx = list_first_entry_or_null(&fscrypt_free_ctxs,
@@ -123,11 +110,31 @@ struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags)
} else {
ctx->flags &= ~FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
}
- ctx->flags &= ~FS_CTX_HAS_BOUNCE_BUFFER_FL;
return ctx;
}
EXPORT_SYMBOL(fscrypt_get_ctx);
+struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags)
+{
+ return mempool_alloc(fscrypt_bounce_page_pool, gfp_flags);
+}
+
+/**
+ * fscrypt_free_bounce_page() - free a ciphertext bounce page
+ *
+ * Free a bounce page that was allocated by fscrypt_encrypt_pagecache_blocks(),
+ * or by fscrypt_alloc_bounce_page() directly.
+ */
+void fscrypt_free_bounce_page(struct page *bounce_page)
+{
+ if (!bounce_page)
+ return;
+ set_page_private(bounce_page, (unsigned long)NULL);
+ ClearPagePrivate(bounce_page);
+ mempool_free(bounce_page, fscrypt_bounce_page_pool);
+}
+EXPORT_SYMBOL(fscrypt_free_bounce_page);
+
void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
const struct fscrypt_info *ci)
{
@@ -141,10 +148,11 @@ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
crypto_cipher_encrypt_one(ci->ci_essiv_tfm, iv->raw, iv->raw);
}
-int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
- u64 lblk_num, struct page *src_page,
- struct page *dest_page, unsigned int len,
- unsigned int offs, gfp_t gfp_flags)
+/* Encrypt or decrypt a single filesystem block of file contents */
+int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw,
+ u64 lblk_num, struct page *src_page,
+ struct page *dest_page, unsigned int len,
+ unsigned int offs, gfp_t gfp_flags)
{
union fscrypt_iv iv;
struct skcipher_request *req = NULL;
@@ -154,7 +162,10 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
- BUG_ON(len == 0);
+ if (WARN_ON_ONCE(len <= 0))
+ return -EINVAL;
+ if (WARN_ON_ONCE(len % FS_CRYPTO_BLOCK_SIZE != 0))
+ return -EINVAL;
fscrypt_generate_iv(&iv, lblk_num, ci);
@@ -186,126 +197,158 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
return 0;
}
-struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
- gfp_t gfp_flags)
-{
- ctx->w.bounce_page = mempool_alloc(fscrypt_bounce_page_pool, gfp_flags);
- if (ctx->w.bounce_page == NULL)
- return ERR_PTR(-ENOMEM);
- ctx->flags |= FS_CTX_HAS_BOUNCE_BUFFER_FL;
- return ctx->w.bounce_page;
-}
-
/**
- * fscypt_encrypt_page() - Encrypts a page
- * @inode: The inode for which the encryption should take place
- * @page: The page to encrypt. Must be locked for bounce-page
- * encryption.
- * @len: Length of data to encrypt in @page and encrypted
- * data in returned page.
- * @offs: Offset of data within @page and returned
- * page holding encrypted data.
- * @lblk_num: Logical block number. This must be unique for multiple
- * calls with same inode, except when overwriting
- * previously written data.
- * @gfp_flags: The gfp flag for memory allocation
- *
- * Encrypts @page using the ctx encryption context. Performs encryption
- * either in-place or into a newly allocated bounce page.
- * Called on the page write path.
+ * fscrypt_encrypt_pagecache_blocks() - Encrypt filesystem blocks from a pagecache page
+ * @page: The locked pagecache page containing the block(s) to encrypt
+ * @len: Total size of the block(s) to encrypt. Must be a nonzero
+ * multiple of the filesystem's block size.
+ * @offs: Byte offset within @page of the first block to encrypt. Must be
+ * a multiple of the filesystem's block size.
+ * @gfp_flags: Memory allocation flags
*
- * Bounce page allocation is the default.
- * In this case, the contents of @page are encrypted and stored in an
- * allocated bounce page. @page has to be locked and the caller must call
- * fscrypt_restore_control_page() on the returned ciphertext page to
- * release the bounce buffer and the encryption context.
+ * A new bounce page is allocated, and the specified block(s) are encrypted into
+ * it. In the bounce page, the ciphertext block(s) will be located at the same
+ * offsets at which the plaintext block(s) were located in the source page; any
+ * other parts of the bounce page will be left uninitialized. However, normally
+ * blocksize == PAGE_SIZE and the whole page is encrypted at once.
*
- * In-place encryption is used by setting the FS_CFLG_OWN_PAGES flag in
- * fscrypt_operations. Here, the input-page is returned with its content
- * encrypted.
+ * This is for use by the filesystem's ->writepages() method.
*
- * Return: A page with the encrypted content on success. Else, an
- * error value or NULL.
+ * Return: the new encrypted bounce page on success; an ERR_PTR() on failure
*/
-struct page *fscrypt_encrypt_page(const struct inode *inode,
- struct page *page,
- unsigned int len,
- unsigned int offs,
- u64 lblk_num, gfp_t gfp_flags)
+struct page *fscrypt_encrypt_pagecache_blocks(struct page *page,
+ unsigned int len,
+ unsigned int offs,
+ gfp_t gfp_flags)
{
- struct fscrypt_ctx *ctx;
- struct page *ciphertext_page = page;
+ const struct inode *inode = page->mapping->host;
+ const unsigned int blockbits = inode->i_blkbits;
+ const unsigned int blocksize = 1 << blockbits;
+ struct page *ciphertext_page;
+ u64 lblk_num = ((u64)page->index << (PAGE_SHIFT - blockbits)) +
+ (offs >> blockbits);
+ unsigned int i;
int err;
- BUG_ON(len % FS_CRYPTO_BLOCK_SIZE != 0);
+ if (WARN_ON_ONCE(!PageLocked(page)))
+ return ERR_PTR(-EINVAL);
- if (inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES) {
- /* with inplace-encryption we just encrypt the page */
- err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk_num, page,
- ciphertext_page, len, offs,
- gfp_flags);
- if (err)
- return ERR_PTR(err);
+ if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offs, blocksize)))
+ return ERR_PTR(-EINVAL);
- return ciphertext_page;
- }
-
- BUG_ON(!PageLocked(page));
-
- ctx = fscrypt_get_ctx(gfp_flags);
- if (IS_ERR(ctx))
- return ERR_CAST(ctx);
-
- /* The encryption operation will require a bounce page. */
- ciphertext_page = fscrypt_alloc_bounce_page(ctx, gfp_flags);
- if (IS_ERR(ciphertext_page))
- goto errout;
+ ciphertext_page = fscrypt_alloc_bounce_page(gfp_flags);
+ if (!ciphertext_page)
+ return ERR_PTR(-ENOMEM);
- ctx->w.control_page = page;
- err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk_num,
- page, ciphertext_page, len, offs,
- gfp_flags);
- if (err) {
- ciphertext_page = ERR_PTR(err);
- goto errout;
+ for (i = offs; i < offs + len; i += blocksize, lblk_num++) {
+ err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk_num,
+ page, ciphertext_page,
+ blocksize, i, gfp_flags);
+ if (err) {
+ fscrypt_free_bounce_page(ciphertext_page);
+ return ERR_PTR(err);
+ }
}
SetPagePrivate(ciphertext_page);
- set_page_private(ciphertext_page, (unsigned long)ctx);
- lock_page(ciphertext_page);
+ set_page_private(ciphertext_page, (unsigned long)page);
return ciphertext_page;
+}
+EXPORT_SYMBOL(fscrypt_encrypt_pagecache_blocks);
-errout:
- fscrypt_release_ctx(ctx);
- return ciphertext_page;
+/**
+ * fscrypt_encrypt_block_inplace() - Encrypt a filesystem block in-place
+ * @inode: The inode to which this block belongs
+ * @page: The page containing the block to encrypt
+ * @len: Size of block to encrypt. Doesn't need to be a multiple of the
+ * fs block size, but must be a multiple of FS_CRYPTO_BLOCK_SIZE.
+ * @offs: Byte offset within @page at which the block to encrypt begins
+ * @lblk_num: Filesystem logical block number of the block, i.e. the 0-based
+ * number of the block within the file
+ * @gfp_flags: Memory allocation flags
+ *
+ * Encrypt a possibly-compressed filesystem block that is located in an
+ * arbitrary page, not necessarily in the original pagecache page. The @inode
+ * and @lblk_num must be specified, as they can't be determined from @page.
+ *
+ * Return: 0 on success; -errno on failure
+ */
+int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page,
+ unsigned int len, unsigned int offs,
+ u64 lblk_num, gfp_t gfp_flags)
+{
+ return fscrypt_crypt_block(inode, FS_ENCRYPT, lblk_num, page, page,
+ len, offs, gfp_flags);
}
-EXPORT_SYMBOL(fscrypt_encrypt_page);
+EXPORT_SYMBOL(fscrypt_encrypt_block_inplace);
/**
- * fscrypt_decrypt_page() - Decrypts a page in-place
- * @inode: The corresponding inode for the page to decrypt.
- * @page: The page to decrypt. Must be locked in case
- * it is a writeback page (FS_CFLG_OWN_PAGES unset).
- * @len: Number of bytes in @page to be decrypted.
- * @offs: Start of data in @page.
- * @lblk_num: Logical block number.
+ * fscrypt_decrypt_pagecache_blocks() - Decrypt filesystem blocks in a pagecache page
+ * @page: The locked pagecache page containing the block(s) to decrypt
+ * @len: Total size of the block(s) to decrypt. Must be a nonzero
+ * multiple of the filesystem's block size.
+ * @offs: Byte offset within @page of the first block to decrypt. Must be
+ * a multiple of the filesystem's block size.
*
- * Decrypts page in-place using the ctx encryption context.
+ * The specified block(s) are decrypted in-place within the pagecache page,
+ * which must still be locked and not uptodate. Normally, blocksize ==
+ * PAGE_SIZE and the whole page is decrypted at once.
*
- * Called from the read completion callback.
+ * This is for use by the filesystem's ->readpages() method.
*
- * Return: Zero on success, non-zero otherwise.
+ * Return: 0 on success; -errno on failure
*/
-int fscrypt_decrypt_page(const struct inode *inode, struct page *page,
- unsigned int len, unsigned int offs, u64 lblk_num)
+int fscrypt_decrypt_pagecache_blocks(struct page *page, unsigned int len,
+ unsigned int offs)
{
- if (!(inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES))
- BUG_ON(!PageLocked(page));
+ const struct inode *inode = page->mapping->host;
+ const unsigned int blockbits = inode->i_blkbits;
+ const unsigned int blocksize = 1 << blockbits;
+ u64 lblk_num = ((u64)page->index << (PAGE_SHIFT - blockbits)) +
+ (offs >> blockbits);
+ unsigned int i;
+ int err;
+
+ if (WARN_ON_ONCE(!PageLocked(page)))
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offs, blocksize)))
+ return -EINVAL;
+
+ for (i = offs; i < offs + len; i += blocksize, lblk_num++) {
+ err = fscrypt_crypt_block(inode, FS_DECRYPT, lblk_num, page,
+ page, blocksize, i, GFP_NOFS);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(fscrypt_decrypt_pagecache_blocks);
- return fscrypt_do_page_crypto(inode, FS_DECRYPT, lblk_num, page, page,
- len, offs, GFP_NOFS);
+/**
+ * fscrypt_decrypt_block_inplace() - Decrypt a filesystem block in-place
+ * @inode: The inode to which this block belongs
+ * @page: The page containing the block to decrypt
+ * @len: Size of block to decrypt. Doesn't need to be a multiple of the
+ * fs block size, but must be a multiple of FS_CRYPTO_BLOCK_SIZE.
+ * @offs: Byte offset within @page at which the block to decrypt begins
+ * @lblk_num: Filesystem logical block number of the block, i.e. the 0-based
+ * number of the block within the file
+ *
+ * Decrypt a possibly-compressed filesystem block that is located in an
+ * arbitrary page, not necessarily in the original pagecache page. The @inode
+ * and @lblk_num must be specified, as they can't be determined from @page.
+ *
+ * Return: 0 on success; -errno on failure
+ */
+int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page,
+ unsigned int len, unsigned int offs,
+ u64 lblk_num)
+{
+ return fscrypt_crypt_block(inode, FS_DECRYPT, lblk_num, page, page,
+ len, offs, GFP_NOFS);
}
-EXPORT_SYMBOL(fscrypt_decrypt_page);
+EXPORT_SYMBOL(fscrypt_decrypt_block_inplace);
/*
* Validate dentries in encrypted directories to make sure we aren't potentially
@@ -355,18 +398,6 @@ const struct dentry_operations fscrypt_d_ops = {
.d_revalidate = fscrypt_d_revalidate,
};
-void fscrypt_restore_control_page(struct page *page)
-{
- struct fscrypt_ctx *ctx;
-
- ctx = (struct fscrypt_ctx *)page_private(page);
- set_page_private(page, (unsigned long)NULL);
- ClearPagePrivate(page);
- unlock_page(page);
- fscrypt_release_ctx(ctx);
-}
-EXPORT_SYMBOL(fscrypt_restore_control_page);
-
static void fscrypt_destroy(void)
{
struct fscrypt_ctx *pos, *n;
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index eccea3d8f923..00d150ff3033 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -12,7 +12,6 @@
*/
#include <linux/scatterlist.h>
-#include <linux/ratelimit.h>
#include <crypto/skcipher.h>
#include "fscrypt_private.h"
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 7da276159593..8978eec9d766 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -94,7 +94,6 @@ typedef enum {
} fscrypt_direction_t;
#define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
-#define FS_CTX_HAS_BOUNCE_BUFFER_FL 0x00000002
static inline bool fscrypt_valid_enc_modes(u32 contents_mode,
u32 filenames_mode)
@@ -117,14 +116,12 @@ static inline bool fscrypt_valid_enc_modes(u32 contents_mode,
/* crypto.c */
extern struct kmem_cache *fscrypt_info_cachep;
extern int fscrypt_initialize(unsigned int cop_flags);
-extern int fscrypt_do_page_crypto(const struct inode *inode,
- fscrypt_direction_t rw, u64 lblk_num,
- struct page *src_page,
- struct page *dest_page,
- unsigned int len, unsigned int offs,
- gfp_t gfp_flags);
-extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
- gfp_t gfp_flags);
+extern int fscrypt_crypt_block(const struct inode *inode,
+ fscrypt_direction_t rw, u64 lblk_num,
+ struct page *src_page, struct page *dest_page,
+ unsigned int len, unsigned int offs,
+ gfp_t gfp_flags);
+extern struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags);
extern const struct dentry_operations fscrypt_d_ops;
extern void __printf(3, 4) __cold
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index bd525f7573a4..c1d6715d88e9 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -5,7 +5,6 @@
* Encryption hooks for higher-level filesystem operations.
*/
-#include <linux/ratelimit.h>
#include "fscrypt_private.h"
/**
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 4f85af8ab239..207ebed918c1 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -12,7 +12,6 @@
#include <keys/user-type.h>
#include <linux/hashtable.h>
#include <linux/scatterlist.h>
-#include <linux/ratelimit.h>
#include <crypto/aes.h>
#include <crypto/algapi.h>
#include <crypto/sha.h>
@@ -92,7 +91,7 @@ find_and_lock_process_key(const char *prefix,
if (!description)
return ERR_PTR(-ENOMEM);
- key = request_key(&key_type_logon, description, NULL, NULL);
+ key = request_key(&key_type_logon, description, NULL);
kfree(description);
if (IS_ERR(key))
return key;
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index d536889ac31b..4941fe8471ce 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -81,6 +81,8 @@ int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg)
if (ret == -ENODATA) {
if (!S_ISDIR(inode->i_mode))
ret = -ENOTDIR;
+ else if (IS_DEADDIR(inode))
+ ret = -ENOENT;
else if (!inode->i_sb->s_cop->empty_dir(inode))
ret = -ENOTEMPTY;
else
diff --git a/fs/dcache.c b/fs/dcache.c
index c435398f2c81..f41121e5d1ec 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2372,7 +2372,6 @@ EXPORT_SYMBOL(d_hash_and_lookup);
void d_delete(struct dentry * dentry)
{
struct inode *inode = dentry->d_inode;
- int isdir = d_is_dir(dentry);
spin_lock(&inode->i_lock);
spin_lock(&dentry->d_lock);
@@ -2387,7 +2386,6 @@ void d_delete(struct dentry * dentry)
spin_unlock(&dentry->d_lock);
spin_unlock(&inode->i_lock);
}
- fsnotify_nameremove(dentry, isdir);
}
EXPORT_SYMBOL(d_delete);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index acef14ad53db..1e444fe1f778 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -617,13 +617,10 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
}
EXPORT_SYMBOL_GPL(debugfs_create_symlink);
-static void __debugfs_remove_file(struct dentry *dentry, struct dentry *parent)
+static void __debugfs_file_removed(struct dentry *dentry)
{
struct debugfs_fsdata *fsd;
- simple_unlink(d_inode(parent), dentry);
- d_delete(dentry);
-
/*
* Paired with the closing smp_mb() implied by a successful
* cmpxchg() in debugfs_file_get(): either
@@ -644,16 +641,18 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
if (simple_positive(dentry)) {
dget(dentry);
- if (!d_is_reg(dentry)) {
- if (d_is_dir(dentry))
- ret = simple_rmdir(d_inode(parent), dentry);
- else
- simple_unlink(d_inode(parent), dentry);
+ if (d_is_dir(dentry)) {
+ ret = simple_rmdir(d_inode(parent), dentry);
if (!ret)
- d_delete(dentry);
+ fsnotify_rmdir(d_inode(parent), dentry);
} else {
- __debugfs_remove_file(dentry, parent);
+ simple_unlink(d_inode(parent), dentry);
+ fsnotify_unlink(d_inode(parent), dentry);
}
+ if (!ret)
+ d_delete(dentry);
+ if (d_is_reg(dentry))
+ __debugfs_file_removed(dentry);
dput(dentry);
}
return ret;
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 2c14ae044dce..beeadca23b05 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -621,6 +621,7 @@ void devpts_pty_kill(struct dentry *dentry)
dentry->d_fsdata = NULL;
drop_nlink(dentry->d_inode);
+ fsnotify_unlink(d_inode(dentry->d_parent), dentry);
d_delete(dentry);
dput(dentry); /* d_alloc_name() in devpts_pty_new() */
}
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 67844fe41a61..1c1a56be7ea2 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -91,7 +91,7 @@ ecryptfs_get_encrypted_key_payload_data(struct key *key)
static inline struct key *ecryptfs_get_encrypted_key(char *sig)
{
- return request_key(&key_type_encrypted, sig, NULL, NULL);
+ return request_key(&key_type_encrypted, sig, NULL);
}
#else
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index ba382f135918..9536e592e25a 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1610,7 +1610,7 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
{
int rc = 0;
- (*auth_tok_key) = request_key(&key_type_user, sig, NULL, NULL);
+ (*auth_tok_key) = request_key(&key_type_user, sig, NULL);
if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) {
(*auth_tok_key) = ecryptfs_get_encrypted_key(sig);
if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) {
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 33db13365c5e..547c165299c0 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -1197,7 +1197,7 @@ static int ext2_has_free_blocks(struct ext2_sb_info *sbi)
/*
* Returns 1 if the passed-in block region is valid; 0 if some part overlaps
- * with filesystem metadata blocksi.
+ * with filesystem metadata blocks.
*/
int ext2_data_block_valid(struct ext2_sb_info *sbi, ext2_fsblk_t start_blk,
unsigned int count)
@@ -1212,7 +1212,6 @@ int ext2_data_block_valid(struct ext2_sb_info *sbi, ext2_fsblk_t start_blk,
(start_blk + count >= sbi->s_sb_block))
return 0;
-
return 1;
}
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index a0c5ea91fcd4..fda7d3f5b4be 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -172,9 +172,7 @@ static void ext2_preread_inode(struct inode *inode)
struct backing_dev_info *bdi;
bdi = inode_to_bdi(inode);
- if (bdi_read_congested(bdi))
- return;
- if (bdi_write_congested(bdi))
+ if (bdi_rw_congested(bdi))
return;
block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
@@ -511,6 +509,7 @@ repeat_in_this_group:
/*
* Scanned all blockgroups.
*/
+ brelse(bitmap_bh);
err = -ENOSPC;
goto fail;
got:
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index e474127dd255..7004ce581a32 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1400,7 +1400,7 @@ void ext2_set_file_ops(struct inode *inode)
struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
{
struct ext2_inode_info *ei;
- struct buffer_head * bh;
+ struct buffer_head * bh = NULL;
struct ext2_inode *raw_inode;
struct inode *inode;
long ret = -EIO;
@@ -1446,7 +1446,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
*/
if (inode->i_nlink == 0 && (inode->i_mode == 0 || ei->i_dtime)) {
/* this inode is deleted */
- brelse (bh);
ret = -ESTALE;
goto bad_inode;
}
@@ -1463,7 +1462,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
!ext2_data_block_valid(EXT2_SB(sb), ei->i_file_acl, 1)) {
ext2_error(sb, "ext2_iget", "bad extended attribute block %u",
ei->i_file_acl);
- brelse(bh);
ret = -EFSCORRUPTED;
goto bad_inode;
}
@@ -1526,6 +1524,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
return inode;
bad_inode:
+ brelse(bh);
iget_failed(inode);
return ERR_PTR(ret);
}
@@ -1640,7 +1639,7 @@ int ext2_write_inode(struct inode *inode, struct writeback_control *wbc)
}
int ext2_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int query_falgs)
+ u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
struct ext2_inode_info *ei = EXT2_I(inode);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 1d7ab73b1014..44eb6e7eb492 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -303,16 +303,16 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root)
if (test_opt(sb, NOBH))
seq_puts(seq, ",nobh");
- if (sbi->s_mount_opt & EXT2_MOUNT_USRQUOTA)
+ if (test_opt(sb, USRQUOTA))
seq_puts(seq, ",usrquota");
- if (sbi->s_mount_opt & EXT2_MOUNT_GRPQUOTA)
+ if (test_opt(sb, GRPQUOTA))
seq_puts(seq, ",grpquota");
- if (sbi->s_mount_opt & EXT2_MOUNT_XIP)
+ if (test_opt(sb, XIP))
seq_puts(seq, ",xip");
- if (sbi->s_mount_opt & EXT2_MOUNT_DAX)
+ if (test_opt(sb, DAX))
seq_puts(seq, ",dax");
if (!test_opt(sb, RESERVATION))
@@ -935,8 +935,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_resgid = opts.s_resgid;
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
- ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
- SB_POSIXACL : 0);
+ (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
sb->s_iflags |= SB_I_CGROUPWB;
if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV &&
@@ -967,11 +966,11 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
- if (sbi->s_mount_opt & EXT2_MOUNT_DAX) {
+ if (test_opt(sb, DAX)) {
if (!bdev_dax_supported(sb->s_bdev, blocksize)) {
ext2_msg(sb, KERN_ERR,
"DAX unsupported by block device. Turning off DAX.");
- sbi->s_mount_opt &= ~EXT2_MOUNT_DAX;
+ clear_opt(sbi->s_mount_opt, DAX);
}
}
@@ -1404,7 +1403,7 @@ out_set:
sbi->s_resuid = new_opts.s_resuid;
sbi->s_resgid = new_opts.s_resgid;
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
- ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? SB_POSIXACL : 0);
+ (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
spin_unlock(&sbi->s_lock);
return 0;
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 1e33e0ac8cf1..79369c13cc55 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -134,6 +134,53 @@ ext2_xattr_handler(int name_index)
return handler;
}
+static bool
+ext2_xattr_header_valid(struct ext2_xattr_header *header)
+{
+ if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
+ header->h_blocks != cpu_to_le32(1))
+ return false;
+
+ return true;
+}
+
+static bool
+ext2_xattr_entry_valid(struct ext2_xattr_entry *entry,
+ char *end, size_t end_offs)
+{
+ struct ext2_xattr_entry *next;
+ size_t size;
+
+ next = EXT2_XATTR_NEXT(entry);
+ if ((char *)next >= end)
+ return false;
+
+ if (entry->e_value_block != 0)
+ return false;
+
+ size = le32_to_cpu(entry->e_value_size);
+ if (size > end_offs ||
+ le16_to_cpu(entry->e_value_offs) + size > end_offs)
+ return false;
+
+ return true;
+}
+
+static int
+ext2_xattr_cmp_entry(int name_index, size_t name_len, const char *name,
+ struct ext2_xattr_entry *entry)
+{
+ int cmp;
+
+ cmp = name_index - entry->e_name_index;
+ if (!cmp)
+ cmp = name_len - entry->e_name_len;
+ if (!cmp)
+ cmp = memcmp(name, entry->e_name, name_len);
+
+ return cmp;
+}
+
/*
* ext2_xattr_get()
*
@@ -152,7 +199,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
struct ext2_xattr_entry *entry;
size_t name_len, size;
char *end;
- int error;
+ int error, not_found;
struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
@@ -176,9 +223,9 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
end = bh->b_data + bh->b_size;
- if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
- HDR(bh)->h_blocks != cpu_to_le32(1)) {
-bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
+ if (!ext2_xattr_header_valid(HDR(bh))) {
+bad_block:
+ ext2_error(inode->i_sb, "ext2_xattr_get",
"inode %ld: bad block %d", inode->i_ino,
EXT2_I(inode)->i_file_acl);
error = -EIO;
@@ -188,29 +235,25 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
/* find named attribute */
entry = FIRST_ENTRY(bh);
while (!IS_LAST_ENTRY(entry)) {
- struct ext2_xattr_entry *next =
- EXT2_XATTR_NEXT(entry);
- if ((char *)next >= end)
+ if (!ext2_xattr_entry_valid(entry, end,
+ inode->i_sb->s_blocksize))
goto bad_block;
- if (name_index == entry->e_name_index &&
- name_len == entry->e_name_len &&
- memcmp(name, entry->e_name, name_len) == 0)
+
+ not_found = ext2_xattr_cmp_entry(name_index, name_len, name,
+ entry);
+ if (!not_found)
goto found;
- entry = next;
+ if (not_found < 0)
+ break;
+
+ entry = EXT2_XATTR_NEXT(entry);
}
if (ext2_xattr_cache_insert(ea_block_cache, bh))
ea_idebug(inode, "cache insert failed");
error = -ENODATA;
goto cleanup;
found:
- /* check the buffer size */
- if (entry->e_value_block != 0)
- goto bad_block;
size = le32_to_cpu(entry->e_value_size);
- if (size > inode->i_sb->s_blocksize ||
- le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
- goto bad_block;
-
if (ext2_xattr_cache_insert(ea_block_cache, bh))
ea_idebug(inode, "cache insert failed");
if (buffer) {
@@ -266,9 +309,9 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
end = bh->b_data + bh->b_size;
- if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
- HDR(bh)->h_blocks != cpu_to_le32(1)) {
-bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
+ if (!ext2_xattr_header_valid(HDR(bh))) {
+bad_block:
+ ext2_error(inode->i_sb, "ext2_xattr_list",
"inode %ld: bad block %d", inode->i_ino,
EXT2_I(inode)->i_file_acl);
error = -EIO;
@@ -278,11 +321,10 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
/* check the on-disk data structure */
entry = FIRST_ENTRY(bh);
while (!IS_LAST_ENTRY(entry)) {
- struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(entry);
-
- if ((char *)next >= end)
+ if (!ext2_xattr_entry_valid(entry, end,
+ inode->i_sb->s_blocksize))
goto bad_block;
- entry = next;
+ entry = EXT2_XATTR_NEXT(entry);
}
if (ext2_xattr_cache_insert(ea_block_cache, bh))
ea_idebug(inode, "cache insert failed");
@@ -367,7 +409,7 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name,
struct super_block *sb = inode->i_sb;
struct buffer_head *bh = NULL;
struct ext2_xattr_header *header = NULL;
- struct ext2_xattr_entry *here, *last;
+ struct ext2_xattr_entry *here = NULL, *last = NULL;
size_t name_len, free, min_offs = sb->s_blocksize;
int not_found = 1, error;
char *end;
@@ -406,47 +448,39 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name,
le32_to_cpu(HDR(bh)->h_refcount));
header = HDR(bh);
end = bh->b_data + bh->b_size;
- if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
- header->h_blocks != cpu_to_le32(1)) {
-bad_block: ext2_error(sb, "ext2_xattr_set",
+ if (!ext2_xattr_header_valid(header)) {
+bad_block:
+ ext2_error(sb, "ext2_xattr_set",
"inode %ld: bad block %d", inode->i_ino,
EXT2_I(inode)->i_file_acl);
error = -EIO;
goto cleanup;
}
- /* Find the named attribute. */
- here = FIRST_ENTRY(bh);
- while (!IS_LAST_ENTRY(here)) {
- struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here);
- if ((char *)next >= end)
- goto bad_block;
- if (!here->e_value_block && here->e_value_size) {
- size_t offs = le16_to_cpu(here->e_value_offs);
- if (offs < min_offs)
- min_offs = offs;
- }
- not_found = name_index - here->e_name_index;
- if (!not_found)
- not_found = name_len - here->e_name_len;
- if (!not_found)
- not_found = memcmp(name, here->e_name,name_len);
- if (not_found <= 0)
- break;
- here = next;
- }
- last = here;
- /* We still need to compute min_offs and last. */
+ /*
+ * Find the named attribute. If not found, 'here' will point
+ * to entry where the new attribute should be inserted to
+ * maintain sorting.
+ */
+ last = FIRST_ENTRY(bh);
while (!IS_LAST_ENTRY(last)) {
- struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last);
- if ((char *)next >= end)
+ if (!ext2_xattr_entry_valid(last, end, sb->s_blocksize))
goto bad_block;
- if (!last->e_value_block && last->e_value_size) {
+ if (last->e_value_size) {
size_t offs = le16_to_cpu(last->e_value_offs);
if (offs < min_offs)
min_offs = offs;
}
- last = next;
+ if (not_found > 0) {
+ not_found = ext2_xattr_cmp_entry(name_index,
+ name_len,
+ name, last);
+ if (not_found <= 0)
+ here = last;
+ }
+ last = EXT2_XATTR_NEXT(last);
}
+ if (not_found > 0)
+ here = last;
/* Check whether we have enough space left. */
free = min_offs - ((char*)last - (char*)header) - sizeof(__u32);
@@ -454,7 +488,6 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
/* We will use a new extended attribute block. */
free = sb->s_blocksize -
sizeof(struct ext2_xattr_header) - sizeof(__u32);
- here = last = NULL; /* avoid gcc uninitialized warning. */
}
if (not_found) {
@@ -470,14 +503,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
error = -EEXIST;
if (flags & XATTR_CREATE)
goto cleanup;
- if (!here->e_value_block && here->e_value_size) {
- size_t size = le32_to_cpu(here->e_value_size);
-
- if (le16_to_cpu(here->e_value_offs) + size >
- sb->s_blocksize || size > sb->s_blocksize)
- goto bad_block;
- free += EXT2_XATTR_SIZE(size);
- }
+ free += EXT2_XATTR_SIZE(le32_to_cpu(here->e_value_size));
free += EXT2_XATTR_LEN(name_len);
}
error = -ENOSPC;
@@ -506,11 +532,10 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
unlock_buffer(bh);
ea_bdebug(bh, "cloning");
- header = kmalloc(bh->b_size, GFP_KERNEL);
+ header = kmemdup(HDR(bh), bh->b_size, GFP_KERNEL);
error = -ENOMEM;
if (header == NULL)
goto cleanup;
- memcpy(header, HDR(bh), bh->b_size);
header->h_refcount = cpu_to_le32(1);
offset = (char *)here - bh->b_data;
@@ -542,7 +567,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
here->e_name_len = name_len;
memcpy(here->e_name, name, name_len);
} else {
- if (!here->e_value_block && here->e_value_size) {
+ if (here->e_value_size) {
char *first_val = (char *)header + min_offs;
size_t offs = le16_to_cpu(here->e_value_offs);
char *val = (char *)header + offs;
@@ -569,7 +594,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
last = ENTRY(header+1);
while (!IS_LAST_ENTRY(last)) {
size_t o = le16_to_cpu(last->e_value_offs);
- if (!last->e_value_block && o < offs)
+ if (o < offs)
last->e_value_offs =
cpu_to_le16(o + size);
last = EXT2_XATTR_NEXT(last);
@@ -784,8 +809,7 @@ ext2_xattr_delete_inode(struct inode *inode)
goto cleanup;
}
ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count)));
- if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
- HDR(bh)->h_blocks != cpu_to_le32(1)) {
+ if (!ext2_xattr_header_valid(HDR(bh))) {
ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
"inode %ld: bad block %d", inode->i_ino,
EXT2_I(inode)->i_file_acl);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index e5d6ee61ff48..0b202e00d93f 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -603,9 +603,9 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
}
/**
- * ext4_should_retry_alloc()
+ * ext4_should_retry_alloc() - check if a block allocation should be retried
* @sb: super block
- * @retries number of attemps has been made
+ * @retries: number of attemps has been made
*
* ext4_should_retry_alloc() is called when ENOSPC is returned, and if
* it is profitable to retry the operation, this function will wait
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index c7843b149a1e..86054f31fe4d 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -33,6 +33,9 @@
static int ext4_dx_readdir(struct file *, struct dir_context *);
/**
+ * is_dx_dir() - check if a directory is using htree indexing
+ * @inode: directory inode
+ *
* Check if the given dir-inode refers to an htree-indexed directory
* (or a directory which could potentially get converted to use htree
* indexing).
@@ -109,7 +112,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
struct buffer_head *bh = NULL;
- int dir_has_error = 0;
struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
if (IS_ENCRYPTED(inode)) {
@@ -145,8 +147,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
return err;
}
- offset = ctx->pos & (sb->s_blocksize - 1);
-
while (ctx->pos < inode->i_size) {
struct ext4_map_blocks map;
@@ -155,9 +155,18 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
goto errout;
}
cond_resched();
+ offset = ctx->pos & (sb->s_blocksize - 1);
map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb);
map.m_len = 1;
err = ext4_map_blocks(NULL, inode, &map, 0);
+ if (err == 0) {
+ /* m_len should never be zero but let's avoid
+ * an infinite loop if it somehow is */
+ if (map.m_len == 0)
+ map.m_len = 1;
+ ctx->pos += map.m_len * sb->s_blocksize;
+ continue;
+ }
if (err > 0) {
pgoff_t index = map.m_pblk >>
(PAGE_SHIFT - inode->i_blkbits);
@@ -176,13 +185,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
}
if (!bh) {
- if (!dir_has_error) {
- EXT4_ERROR_FILE(file, 0,
- "directory contains a "
- "hole at offset %llu",
- (unsigned long long) ctx->pos);
- dir_has_error = 1;
- }
/* corrupt size? Maybe no more blocks to read */
if (ctx->pos > inode->i_blocks << 9)
break;
@@ -192,8 +194,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
/* Check the checksum */
if (!buffer_verified(bh) &&
- !ext4_dirent_csum_verify(inode,
- (struct ext4_dir_entry *)bh->b_data)) {
+ !ext4_dirblock_csum_verify(inode, bh)) {
EXT4_ERROR_FILE(file, 0, "directory fails checksum "
"at offset %llu",
(unsigned long long)ctx->pos);
@@ -674,7 +675,7 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
return memcmp(str, name->name, len);
}
- return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr);
+ return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr, false);
}
static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1cb67859e051..bf660aa7a9e0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -421,7 +421,8 @@ struct flex_groups {
EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */
-#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL))
+#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL |\
+ EXT4_PROJINHERIT_FL))
/* Flags that are appropriate for non-directories/regular files. */
#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
@@ -2077,6 +2078,9 @@ struct ext4_filename {
#ifdef CONFIG_FS_ENCRYPTION
struct fscrypt_str crypto_buf;
#endif
+#ifdef CONFIG_UNICODE
+ struct fscrypt_str cf_name;
+#endif
};
#define fname_name(p) ((p)->disk_name.name)
@@ -2302,6 +2306,12 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
struct ext4_group_desc *gdp);
ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
+#ifdef CONFIG_UNICODE
+extern void ext4_fname_setup_ci_filename(struct inode *dir,
+ const struct qstr *iname,
+ struct fscrypt_str *fname);
+#endif
+
#ifdef CONFIG_FS_ENCRYPTION
static inline void ext4_fname_from_fscrypt_name(struct ext4_filename *dst,
const struct fscrypt_name *src)
@@ -2328,6 +2338,10 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
return err;
ext4_fname_from_fscrypt_name(fname, &name);
+
+#ifdef CONFIG_UNICODE
+ ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name);
+#endif
return 0;
}
@@ -2343,6 +2357,10 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir,
return err;
ext4_fname_from_fscrypt_name(fname, &name);
+
+#ifdef CONFIG_UNICODE
+ ext4_fname_setup_ci_filename(dir, &dentry->d_name, &fname->cf_name);
+#endif
return 0;
}
@@ -2356,6 +2374,11 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname)
fname->crypto_buf.name = NULL;
fname->usr_fname = NULL;
fname->disk_name.name = NULL;
+
+#ifdef CONFIG_UNICODE
+ kfree(fname->cf_name.name);
+ fname->cf_name.name = NULL;
+#endif
}
#else /* !CONFIG_FS_ENCRYPTION */
static inline int ext4_fname_setup_filename(struct inode *dir,
@@ -2366,6 +2389,11 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
fname->usr_fname = iname;
fname->disk_name.name = (unsigned char *) iname->name;
fname->disk_name.len = iname->len;
+
+#ifdef CONFIG_UNICODE
+ ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name);
+#endif
+
return 0;
}
@@ -2376,7 +2404,13 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir,
return ext4_fname_setup_filename(dir, &dentry->d_name, 1, fname);
}
-static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
+static inline void ext4_fname_free_filename(struct ext4_filename *fname)
+{
+#ifdef CONFIG_UNICODE
+ kfree(fname->cf_name.name);
+ fname->cf_name.name = NULL;
+#endif
+}
#endif /* !CONFIG_FS_ENCRYPTION */
/* dir.c */
@@ -2568,8 +2602,8 @@ extern int ext4_ext_migrate(struct inode *);
extern int ext4_ind_migrate(struct inode *inode);
/* namei.c */
-extern int ext4_dirent_csum_verify(struct inode *inode,
- struct ext4_dir_entry *dirent);
+extern int ext4_dirblock_csum_verify(struct inode *inode,
+ struct buffer_head *bh);
extern int ext4_orphan_add(handle_t *, struct inode *);
extern int ext4_orphan_del(handle_t *, struct inode *);
extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
@@ -3070,11 +3104,11 @@ extern int ext4_try_create_inline_dir(handle_t *handle,
extern int ext4_read_inline_dir(struct file *filp,
struct dir_context *ctx,
int *has_inline_data);
-extern int htree_inlinedir_to_tree(struct file *dir_file,
- struct inode *dir, ext4_lblk_t block,
- struct dx_hash_info *hinfo,
- __u32 start_hash, __u32 start_minor_hash,
- int *has_inline_data);
+extern int ext4_inlinedir_to_tree(struct file *dir_file,
+ struct inode *dir, ext4_lblk_t block,
+ struct dx_hash_info *hinfo,
+ __u32 start_hash, __u32 start_minor_hash,
+ int *has_inline_data);
extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
struct ext4_filename *fname,
struct ext4_dir_entry_2 **res_dir,
@@ -3113,14 +3147,13 @@ extern struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
struct ext4_dir_entry_2 *de,
int blocksize, int csum_size,
unsigned int parent_ino, int dotdot_real_len);
-extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
- unsigned int blocksize);
-extern int ext4_handle_dirty_dirent_node(handle_t *handle,
- struct inode *inode,
- struct buffer_head *bh);
+extern void ext4_initialize_dirent_tail(struct buffer_head *bh,
+ unsigned int blocksize);
+extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode,
+ struct buffer_head *bh);
extern int ext4_ci_compare(const struct inode *parent,
- const struct qstr *name,
- const struct qstr *entry);
+ const struct qstr *fname,
+ const struct qstr *entry, bool quick);
#define S_SHIFT 12
static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 75a5309f2231..ef8fcf7d0d3b 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -361,20 +361,20 @@ static inline int ext4_journal_force_commit(journal_t *journal)
}
static inline int ext4_jbd2_inode_add_write(handle_t *handle,
- struct inode *inode)
+ struct inode *inode, loff_t start_byte, loff_t length)
{
if (ext4_handle_valid(handle))
- return jbd2_journal_inode_add_write(handle,
- EXT4_I(inode)->jinode);
+ return jbd2_journal_inode_ranged_write(handle,
+ EXT4_I(inode)->jinode, start_byte, length);
return 0;
}
static inline int ext4_jbd2_inode_add_wait(handle_t *handle,
- struct inode *inode)
+ struct inode *inode, loff_t start_byte, loff_t length)
{
if (ext4_handle_valid(handle))
- return jbd2_journal_inode_add_wait(handle,
- EXT4_I(inode)->jinode);
+ return jbd2_journal_inode_ranged_wait(handle,
+ EXT4_I(inode)->jinode, start_byte, length);
return 0;
}
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index d40ed940001e..92266a2da7d6 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5676,8 +5676,8 @@ out_mutex:
}
/**
- * ext4_swap_extents - Swap extents between two inodes
- *
+ * ext4_swap_extents() - Swap extents between two inodes
+ * @handle: handle for this transaction
* @inode1: First inode
* @inode2: Second inode
* @lblk1: Start block for first inode
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 023a3eb3afa3..7521de2dcf3a 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -1317,7 +1317,6 @@ static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk);
if (!es)
goto out_wrap;
- node = &es->rb_node;
while (*nr_to_scan > 0) {
if (es->es_lblk > end) {
ei->i_es_shrink_lblk = end + 1;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2c5baa5e8291..f4a24a46245e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -165,6 +165,10 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
ret = generic_write_checks(iocb, from);
if (ret <= 0)
return ret;
+
+ if (unlikely(IS_IMMUTABLE(inode)))
+ return -EPERM;
+
/*
* If we have encountered a bitmap-format file, the size limit
* is smaller than s_maxbytes, which is for extent-mapped files.
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 2024d3fa5504..36699a131168 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -294,14 +294,12 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
}
/**
- * ext4_alloc_branch - allocate and set up a chain of blocks.
- * @handle: handle for this transaction
- * @inode: owner
- * @indirect_blks: number of allocated indirect blocks
- * @blks: number of allocated direct blocks
- * @goal: preferred place for allocation
- * @offsets: offsets (in the blocks) to store the pointers to next.
- * @branch: place to store the chain in.
+ * ext4_alloc_branch() - allocate and set up a chain of blocks
+ * @handle: handle for this transaction
+ * @ar: structure describing the allocation request
+ * @indirect_blks: number of allocated indirect blocks
+ * @offsets: offsets (in the blocks) to store the pointers to next.
+ * @branch: place to store the chain in.
*
* This function allocates blocks, zeroes out all but the last one,
* links them into chain and (if we are synchronous) writes them to disk.
@@ -396,15 +394,11 @@ failed:
}
/**
- * ext4_splice_branch - splice the allocated branch onto inode.
+ * ext4_splice_branch() - splice the allocated branch onto inode.
* @handle: handle for this transaction
- * @inode: owner
- * @block: (logical) number of block we are adding
- * @chain: chain of indirect blocks (with a missing link - see
- * ext4_alloc_branch)
+ * @ar: structure describing the allocation request
* @where: location of missing link
* @num: number of indirect blocks we are adding
- * @blks: number of direct blocks we are adding
*
* This function fills the missing link and does all housekeeping needed in
* inode (->i_blocks, etc.). In case of success we end up with the full
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index f73bc3925282..88cdf3c90bd1 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1132,7 +1132,6 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
{
int err, csum_size = 0, header_size = 0;
struct ext4_dir_entry_2 *de;
- struct ext4_dir_entry_tail *t;
void *target = dir_block->b_data;
/*
@@ -1158,13 +1157,11 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
inline_size - EXT4_INLINE_DOTDOT_SIZE + header_size,
inode->i_sb->s_blocksize - csum_size);
- if (csum_size) {
- t = EXT4_DIRENT_TAIL(dir_block->b_data,
- inode->i_sb->s_blocksize);
- initialize_dirent_tail(t, inode->i_sb->s_blocksize);
- }
+ if (csum_size)
+ ext4_initialize_dirent_tail(dir_block,
+ inode->i_sb->s_blocksize);
set_buffer_uptodate(dir_block);
- err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
+ err = ext4_handle_dirty_dirblock(handle, inode, dir_block);
if (err)
return err;
set_buffer_verified(dir_block);
@@ -1327,11 +1324,11 @@ out:
* inlined dir. It returns the number directory entries loaded
* into the tree. If there is an error it is returned in err.
*/
-int htree_inlinedir_to_tree(struct file *dir_file,
- struct inode *dir, ext4_lblk_t block,
- struct dx_hash_info *hinfo,
- __u32 start_hash, __u32 start_minor_hash,
- int *has_inline_data)
+int ext4_inlinedir_to_tree(struct file *dir_file,
+ struct inode *dir, ext4_lblk_t block,
+ struct dx_hash_info *hinfo,
+ __u32 start_hash, __u32 start_minor_hash,
+ int *has_inline_data)
{
int err = 0, count = 0;
unsigned int parent_ino;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c7f77c643008..420fe3deed39 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -731,10 +731,16 @@ out_sem:
!(flags & EXT4_GET_BLOCKS_ZERO) &&
!ext4_is_quota_file(inode) &&
ext4_should_order_data(inode)) {
+ loff_t start_byte =
+ (loff_t)map->m_lblk << inode->i_blkbits;
+ loff_t length = (loff_t)map->m_len << inode->i_blkbits;
+
if (flags & EXT4_GET_BLOCKS_IO_SUBMIT)
- ret = ext4_jbd2_inode_add_wait(handle, inode);
+ ret = ext4_jbd2_inode_add_wait(handle, inode,
+ start_byte, length);
else
- ret = ext4_jbd2_inode_add_write(handle, inode);
+ ret = ext4_jbd2_inode_add_write(handle, inode,
+ start_byte, length);
if (ret)
return ret;
}
@@ -1164,8 +1170,9 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
int err = 0;
unsigned blocksize = inode->i_sb->s_blocksize;
unsigned bbits;
- struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
- bool decrypt = false;
+ struct buffer_head *bh, *head, *wait[2];
+ int nr_wait = 0;
+ int i;
BUG_ON(!PageLocked(page));
BUG_ON(from > PAGE_SIZE);
@@ -1217,23 +1224,32 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
!buffer_unwritten(bh) &&
(block_start < from || block_end > to)) {
ll_rw_block(REQ_OP_READ, 0, 1, &bh);
- *wait_bh++ = bh;
- decrypt = IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode);
+ wait[nr_wait++] = bh;
}
}
/*
* If we issued read requests, let them complete.
*/
- while (wait_bh > wait) {
- wait_on_buffer(*--wait_bh);
- if (!buffer_uptodate(*wait_bh))
+ for (i = 0; i < nr_wait; i++) {
+ wait_on_buffer(wait[i]);
+ if (!buffer_uptodate(wait[i]))
err = -EIO;
}
- if (unlikely(err))
+ if (unlikely(err)) {
page_zero_new_buffers(page, from, to);
- else if (decrypt)
- err = fscrypt_decrypt_page(page->mapping->host, page,
- PAGE_SIZE, 0, page->index);
+ } else if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) {
+ for (i = 0; i < nr_wait; i++) {
+ int err2;
+
+ err2 = fscrypt_decrypt_pagecache_blocks(page, blocksize,
+ bh_offset(wait[i]));
+ if (err2) {
+ clear_buffer_uptodate(wait[i]);
+ err = err2;
+ }
+ }
+ }
+
return err;
}
#endif
@@ -4065,9 +4081,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode)) {
/* We expect the key to be set. */
BUG_ON(!fscrypt_has_encryption_key(inode));
- BUG_ON(blocksize != PAGE_SIZE);
- WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host,
- page, PAGE_SIZE, 0, page->index));
+ WARN_ON_ONCE(fscrypt_decrypt_pagecache_blocks(
+ page, blocksize, bh_offset(bh)));
}
}
if (ext4_should_journal_data(inode)) {
@@ -4085,7 +4100,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
err = 0;
mark_buffer_dirty(bh);
if (ext4_should_order_data(inode))
- err = ext4_jbd2_inode_add_write(handle, inode);
+ err = ext4_jbd2_inode_add_write(handle, inode, from,
+ length);
}
unlock:
@@ -4570,6 +4586,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
struct buffer_head *bh;
struct super_block *sb = inode->i_sb;
ext4_fsblk_t block;
+ struct blk_plug plug;
int inodes_per_block, inode_offset;
iloc->bh = NULL;
@@ -4658,6 +4675,7 @@ make_io:
* If we need to do any I/O, try to pre-readahead extra
* blocks from the inode table.
*/
+ blk_start_plug(&plug);
if (EXT4_SB(sb)->s_inode_readahead_blks) {
ext4_fsblk_t b, end, table;
unsigned num;
@@ -4688,6 +4706,7 @@ make_io:
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
+ blk_finish_plug(&plug);
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
EXT4_ERROR_INODE_BLOCK(inode, block,
@@ -5520,6 +5539,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
+ if (unlikely(IS_IMMUTABLE(inode)))
+ return -EPERM;
+
+ if (unlikely(IS_APPEND(inode) &&
+ (ia_valid & (ATTR_MODE | ATTR_UID |
+ ATTR_GID | ATTR_TIMES_SET))))
+ return -EPERM;
+
error = setattr_prepare(dentry, attr);
if (error)
return error;
@@ -5571,7 +5598,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_valid & ATTR_SIZE) {
handle_t *handle;
loff_t oldsize = inode->i_size;
- int shrink = (attr->ia_size <= inode->i_size);
+ int shrink = (attr->ia_size < inode->i_size);
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -5585,18 +5612,33 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size)
inode_inc_iversion(inode);
- if (ext4_should_order_data(inode) &&
- (attr->ia_size < inode->i_size)) {
- error = ext4_begin_ordered_truncate(inode,
+ if (shrink) {
+ if (ext4_should_order_data(inode)) {
+ error = ext4_begin_ordered_truncate(inode,
attr->ia_size);
- if (error)
- goto err_out;
+ if (error)
+ goto err_out;
+ }
+ /*
+ * Blocks are going to be removed from the inode. Wait
+ * for dio in flight.
+ */
+ inode_dio_wait(inode);
+ }
+
+ down_write(&EXT4_I(inode)->i_mmap_sem);
+
+ rc = ext4_break_layouts(inode);
+ if (rc) {
+ up_write(&EXT4_I(inode)->i_mmap_sem);
+ return rc;
}
+
if (attr->ia_size != inode->i_size) {
handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
- goto err_out;
+ goto out_mmap_sem;
}
if (ext4_handle_valid(handle) && shrink) {
error = ext4_orphan_add(handle, inode);
@@ -5624,42 +5666,31 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
i_size_write(inode, attr->ia_size);
up_write(&EXT4_I(inode)->i_data_sem);
ext4_journal_stop(handle);
- if (error) {
- if (orphan && inode->i_nlink)
- ext4_orphan_del(NULL, inode);
- goto err_out;
+ if (error)
+ goto out_mmap_sem;
+ if (!shrink) {
+ pagecache_isize_extended(inode, oldsize,
+ inode->i_size);
+ } else if (ext4_should_journal_data(inode)) {
+ ext4_wait_for_tail_page_commit(inode);
}
}
- if (!shrink) {
- pagecache_isize_extended(inode, oldsize, inode->i_size);
- } else {
- /*
- * Blocks are going to be removed from the inode. Wait
- * for dio in flight.
- */
- inode_dio_wait(inode);
- }
- if (orphan && ext4_should_journal_data(inode))
- ext4_wait_for_tail_page_commit(inode);
- down_write(&EXT4_I(inode)->i_mmap_sem);
-
- rc = ext4_break_layouts(inode);
- if (rc) {
- up_write(&EXT4_I(inode)->i_mmap_sem);
- error = rc;
- goto err_out;
- }
/*
* Truncate pagecache after we've waited for commit
* in data=journal mode to make pages freeable.
*/
truncate_pagecache(inode, inode->i_size);
- if (shrink) {
+ /*
+ * Call ext4_truncate() even if i_size didn't change to
+ * truncate possible preallocated blocks.
+ */
+ if (attr->ia_size <= oldsize) {
rc = ext4_truncate(inode);
if (rc)
error = rc;
}
+out_mmap_sem:
up_write(&EXT4_I(inode)->i_mmap_sem);
}
@@ -6190,6 +6221,9 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
get_block_t *get_block;
int retries = 0;
+ if (unlikely(IS_IMMUTABLE(inode)))
+ return VM_FAULT_SIGBUS;
+
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index e486e49b31ed..74648d42c69b 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -269,6 +269,29 @@ static int uuid_is_zero(__u8 u[16])
}
#endif
+/*
+ * If immutable is set and we are not clearing it, we're not allowed to change
+ * anything else in the inode. Don't error out if we're only trying to set
+ * immutable on an immutable file.
+ */
+static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid,
+ unsigned int flags)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ unsigned int oldflags = ei->i_flags;
+
+ if (!(oldflags & EXT4_IMMUTABLE_FL) || !(flags & EXT4_IMMUTABLE_FL))
+ return 0;
+
+ if ((oldflags & ~EXT4_IMMUTABLE_FL) != (flags & ~EXT4_IMMUTABLE_FL))
+ return -EPERM;
+ if (ext4_has_feature_project(inode->i_sb) &&
+ __kprojid_val(ei->i_projid) != new_projid)
+ return -EPERM;
+
+ return 0;
+}
+
static int ext4_ioctl_setflags(struct inode *inode,
unsigned int flags)
{
@@ -340,6 +363,20 @@ static int ext4_ioctl_setflags(struct inode *inode,
}
}
+ /*
+ * Wait for all pending directio and then flush all the dirty pages
+ * for this file. The flush marks all the pages readonly, so any
+ * subsequent attempt to write to the file (particularly mmap pages)
+ * will come through the filesystem and fail.
+ */
+ if (S_ISREG(inode->i_mode) && !IS_IMMUTABLE(inode) &&
+ (flags & EXT4_IMMUTABLE_FL)) {
+ inode_dio_wait(inode);
+ err = filemap_write_and_wait(inode->i_mapping);
+ if (err)
+ goto flags_out;
+ }
+
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
@@ -742,6 +779,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return ext4_ioc_getfsmap(sb, (void __user *)arg);
case EXT4_IOC_GETFLAGS:
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
+ if (S_ISREG(inode->i_mode))
+ flags &= ~EXT4_PROJINHERIT_FL;
return put_user(flags, (int __user *) arg);
case EXT4_IOC_SETFLAGS: {
int err;
@@ -769,7 +808,11 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return err;
inode_lock(inode);
- err = ext4_ioctl_setflags(inode, flags);
+ err = ext4_ioctl_check_immutable(inode,
+ from_kprojid(&init_user_ns, ei->i_projid),
+ flags);
+ if (!err)
+ err = ext4_ioctl_setflags(inode, flags);
inode_unlock(inode);
mnt_drop_write_file(filp);
return err;
@@ -1139,6 +1182,9 @@ resizefs_out:
goto out;
flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) |
(flags & EXT4_FL_XFLAG_VISIBLE);
+ err = ext4_ioctl_check_immutable(inode, fa.fsx_projid, flags);
+ if (err)
+ goto out;
err = ext4_ioctl_setflags(inode, flags);
if (err)
goto out;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 99ba720dbb7a..a3e2767bdf2f 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4696,8 +4696,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
* ext4_free_blocks() -- Free given blocks and update quota
* @handle: handle for this transaction
* @inode: inode
- * @block: start physical block to free
- * @count: number of blocks to count
+ * @bh: optional buffer of the block to be freed
+ * @block: starting physical block to be freed
+ * @count: number of blocks to be freed
* @flags: flags used by ext4_free_blocks
*/
void ext4_free_blocks(handle_t *handle, struct inode *inode,
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 1083a9f3f16a..30ce3dc69378 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -13,11 +13,10 @@
#include "ext4_extents.h"
/**
- * get_ext_path - Find an extent path for designated logical block number.
- *
- * @inode: an inode which is searched
+ * get_ext_path() - Find an extent path for designated logical block number.
+ * @inode: inode to be searched
* @lblock: logical block number to find an extent path
- * @path: pointer to an extent path pointer (for output)
+ * @ppath: pointer to an extent path pointer (for output)
*
* ext4_find_extent wrapper. Return 0 on success, or a negative error value
* on failure.
@@ -42,8 +41,9 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock,
}
/**
- * ext4_double_down_write_data_sem - Acquire two inodes' write lock
- * of i_data_sem
+ * ext4_double_down_write_data_sem() - write lock two inodes's i_data_sem
+ * @first: inode to be locked
+ * @second: inode to be locked
*
* Acquire write lock of i_data_sem of the two inodes
*/
@@ -390,7 +390,8 @@ data_copy:
/* Even in case of data=writeback it is reasonable to pin
* inode to transaction, to prevent unexpected data loss */
- *err = ext4_jbd2_inode_add_write(handle, orig_inode);
+ *err = ext4_jbd2_inode_add_write(handle, orig_inode,
+ (loff_t)orig_page_offset << PAGE_SHIFT, replaced_size);
unlock_pages:
unlock_page(pagep[0]);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index cd01c4a67ffb..129029534075 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -82,8 +82,18 @@ static struct buffer_head *ext4_append(handle_t *handle,
static int ext4_dx_csum_verify(struct inode *inode,
struct ext4_dir_entry *dirent);
+/*
+ * Hints to ext4_read_dirblock regarding whether we expect a directory
+ * block being read to be an index block, or a block containing
+ * directory entries (and if the latter, whether it was found via a
+ * logical block in an htree index block). This is used to control
+ * what sort of sanity checkinig ext4_read_dirblock() will do on the
+ * directory block read from the storage device. EITHER will means
+ * the caller doesn't know what kind of directory block will be read,
+ * so no specific verification will be done.
+ */
typedef enum {
- EITHER, INDEX, DIRENT
+ EITHER, INDEX, DIRENT, DIRENT_HTREE
} dirblock_type_t;
#define ext4_read_dirblock(inode, block, type) \
@@ -109,11 +119,14 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
return bh;
}
- if (!bh) {
+ if (!bh && (type == INDEX || type == DIRENT_HTREE)) {
ext4_error_inode(inode, func, line, block,
- "Directory hole found");
+ "Directory hole found for htree %s block",
+ (type == INDEX) ? "index" : "leaf");
return ERR_PTR(-EFSCORRUPTED);
}
+ if (!bh)
+ return NULL;
dirent = (struct ext4_dir_entry *) bh->b_data;
/* Determine whether or not we have an index block */
if (is_dx(inode)) {
@@ -150,7 +163,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
}
}
if (!is_dx_block) {
- if (ext4_dirent_csum_verify(inode, dirent))
+ if (ext4_dirblock_csum_verify(inode, bh))
set_buffer_verified(bh);
else {
ext4_error_inode(inode, func, line, block,
@@ -280,9 +293,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
struct inode *dir, struct inode *inode);
/* checksumming functions */
-void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
- unsigned int blocksize)
+void ext4_initialize_dirent_tail(struct buffer_head *bh,
+ unsigned int blocksize)
{
+ struct ext4_dir_entry_tail *t = EXT4_DIRENT_TAIL(bh->b_data, blocksize);
+
memset(t, 0, sizeof(struct ext4_dir_entry_tail));
t->det_rec_len = ext4_rec_len_to_disk(
sizeof(struct ext4_dir_entry_tail), blocksize);
@@ -291,17 +306,17 @@ void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
/* Walk through a dirent block to find a checksum "dirent" at the tail */
static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
- struct ext4_dir_entry *de)
+ struct buffer_head *bh)
{
struct ext4_dir_entry_tail *t;
#ifdef PARANOID
struct ext4_dir_entry *d, *top;
- d = de;
- top = (struct ext4_dir_entry *)(((void *)de) +
+ d = (struct ext4_dir_entry *)bh->b_data;
+ top = (struct ext4_dir_entry *)(bh->b_data +
(EXT4_BLOCK_SIZE(inode->i_sb) -
- sizeof(struct ext4_dir_entry_tail)));
+ sizeof(struct ext4_dir_entry_tail)));
while (d < top && d->rec_len)
d = (struct ext4_dir_entry *)(((void *)d) +
le16_to_cpu(d->rec_len));
@@ -311,7 +326,7 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
t = (struct ext4_dir_entry_tail *)d;
#else
- t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb));
+ t = EXT4_DIRENT_TAIL(bh->b_data, EXT4_BLOCK_SIZE(inode->i_sb));
#endif
if (t->det_reserved_zero1 ||
@@ -323,8 +338,7 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
return t;
}
-static __le32 ext4_dirent_csum(struct inode *inode,
- struct ext4_dir_entry *dirent, int size)
+static __le32 ext4_dirblock_csum(struct inode *inode, void *dirent, int size)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
@@ -344,49 +358,49 @@ static void __warn_no_space_for_csum(struct inode *inode, const char *func,
"No space for directory leaf checksum. Please run e2fsck -D.");
}
-int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
+int ext4_dirblock_csum_verify(struct inode *inode, struct buffer_head *bh)
{
struct ext4_dir_entry_tail *t;
if (!ext4_has_metadata_csum(inode->i_sb))
return 1;
- t = get_dirent_tail(inode, dirent);
+ t = get_dirent_tail(inode, bh);
if (!t) {
warn_no_space_for_csum(inode);
return 0;
}
- if (t->det_checksum != ext4_dirent_csum(inode, dirent,
- (void *)t - (void *)dirent))
+ if (t->det_checksum != ext4_dirblock_csum(inode, bh->b_data,
+ (char *)t - bh->b_data))
return 0;
return 1;
}
-static void ext4_dirent_csum_set(struct inode *inode,
- struct ext4_dir_entry *dirent)
+static void ext4_dirblock_csum_set(struct inode *inode,
+ struct buffer_head *bh)
{
struct ext4_dir_entry_tail *t;
if (!ext4_has_metadata_csum(inode->i_sb))
return;
- t = get_dirent_tail(inode, dirent);
+ t = get_dirent_tail(inode, bh);
if (!t) {
warn_no_space_for_csum(inode);
return;
}
- t->det_checksum = ext4_dirent_csum(inode, dirent,
- (void *)t - (void *)dirent);
+ t->det_checksum = ext4_dirblock_csum(inode, bh->b_data,
+ (char *)t - bh->b_data);
}
-int ext4_handle_dirty_dirent_node(handle_t *handle,
- struct inode *inode,
- struct buffer_head *bh)
+int ext4_handle_dirty_dirblock(handle_t *handle,
+ struct inode *inode,
+ struct buffer_head *bh)
{
- ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
+ ext4_dirblock_csum_set(inode, bh);
return ext4_handle_dirty_metadata(handle, inode, bh);
}
@@ -980,7 +994,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
(unsigned long)block));
- bh = ext4_read_dirblock(dir, block, DIRENT);
+ bh = ext4_read_dirblock(dir, block, DIRENT_HTREE);
if (IS_ERR(bh))
return PTR_ERR(bh);
@@ -1090,10 +1104,10 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
if (ext4_has_inline_data(dir)) {
int has_inline_data = 1;
- count = htree_inlinedir_to_tree(dir_file, dir, 0,
- &hinfo, start_hash,
- start_minor_hash,
- &has_inline_data);
+ count = ext4_inlinedir_to_tree(dir_file, dir, 0,
+ &hinfo, start_hash,
+ start_minor_hash,
+ &has_inline_data);
if (has_inline_data) {
*next_hash = ~0;
return count;
@@ -1259,19 +1273,24 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
#ifdef CONFIG_UNICODE
/*
* Test whether a case-insensitive directory entry matches the filename
- * being searched for.
+ * being searched for. If quick is set, assume the name being looked up
+ * is already in the casefolded form.
*
* Returns: 0 if the directory entry matches, more than 0 if it
* doesn't match or less than zero on error.
*/
int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
- const struct qstr *entry)
+ const struct qstr *entry, bool quick)
{
const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb);
const struct unicode_map *um = sbi->s_encoding;
int ret;
- ret = utf8_strncasecmp(um, name, entry);
+ if (quick)
+ ret = utf8_strncasecmp_folded(um, name, entry);
+ else
+ ret = utf8_strncasecmp(um, name, entry);
+
if (ret < 0) {
/* Handle invalid character sequence as either an error
* or as an opaque byte sequence.
@@ -1287,6 +1306,32 @@ int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
return ret;
}
+
+void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
+ struct fscrypt_str *cf_name)
+{
+ int len;
+
+ if (!IS_CASEFOLDED(dir)) {
+ cf_name->name = NULL;
+ return;
+ }
+
+ cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS);
+ if (!cf_name->name)
+ return;
+
+ len = utf8_casefold(EXT4_SB(dir->i_sb)->s_encoding,
+ iname, cf_name->name,
+ EXT4_NAME_LEN);
+ if (len <= 0) {
+ kfree(cf_name->name);
+ cf_name->name = NULL;
+ return;
+ }
+ cf_name->len = (unsigned) len;
+
+}
#endif
/*
@@ -1313,8 +1358,15 @@ static inline bool ext4_match(const struct inode *parent,
#endif
#ifdef CONFIG_UNICODE
- if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent))
- return (ext4_ci_compare(parent, fname->usr_fname, &entry) == 0);
+ if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) {
+ if (fname->cf_name.name) {
+ struct qstr cf = {.name = fname->cf_name.name,
+ .len = fname->cf_name.len};
+ return !ext4_ci_compare(parent, &cf, &entry, true);
+ }
+ return !ext4_ci_compare(parent, fname->usr_fname, &entry,
+ false);
+ }
#endif
return fscrypt_match_name(&f, de->name, de->name_len);
@@ -1484,8 +1536,7 @@ restart:
if (!buffer_verified(bh) &&
!is_dx_internal_node(dir, block,
(struct ext4_dir_entry *)bh->b_data) &&
- !ext4_dirent_csum_verify(dir,
- (struct ext4_dir_entry *)bh->b_data)) {
+ !ext4_dirblock_csum_verify(dir, bh)) {
EXT4_ERROR_INODE(dir, "checksumming directory "
"block %lu", (unsigned long)block);
brelse(bh);
@@ -1586,7 +1637,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
return (struct buffer_head *) frame;
do {
block = dx_get_block(frame->at);
- bh = ext4_read_dirblock(dir, block, DIRENT);
+ bh = ext4_read_dirblock(dir, block, DIRENT_HTREE);
if (IS_ERR(bh))
goto errout;
@@ -1769,7 +1820,6 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
char *data1 = (*bh)->b_data, *data2;
unsigned split, move, size;
struct ext4_dir_entry_2 *de = NULL, *de2;
- struct ext4_dir_entry_tail *t;
int csum_size = 0;
int err = 0, i;
@@ -1830,11 +1880,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
(char *) de2,
blocksize);
if (csum_size) {
- t = EXT4_DIRENT_TAIL(data2, blocksize);
- initialize_dirent_tail(t, blocksize);
-
- t = EXT4_DIRENT_TAIL(data1, blocksize);
- initialize_dirent_tail(t, blocksize);
+ ext4_initialize_dirent_tail(*bh, blocksize);
+ ext4_initialize_dirent_tail(bh2, blocksize);
}
dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data1,
@@ -1848,7 +1895,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
de = de2;
}
dx_insert_block(frame, hash2 + continued, newblock);
- err = ext4_handle_dirty_dirent_node(handle, dir, bh2);
+ err = ext4_handle_dirty_dirblock(handle, dir, bh2);
if (err)
goto journal_error;
err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
@@ -1976,7 +2023,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
inode_inc_iversion(dir);
ext4_mark_inode_dirty(handle, dir);
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_dirent_node(handle, dir, bh);
+ err = ext4_handle_dirty_dirblock(handle, dir, bh);
if (err)
ext4_std_error(dir->i_sb, err);
return 0;
@@ -1995,8 +2042,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
struct dx_entry *entries;
struct ext4_dir_entry_2 *de, *de2;
- struct ext4_dir_entry_tail *t;
- char *data1, *top;
+ char *data2, *top;
unsigned len;
int retval;
unsigned blocksize;
@@ -2036,21 +2082,18 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
return PTR_ERR(bh2);
}
ext4_set_inode_flag(dir, EXT4_INODE_INDEX);
- data1 = bh2->b_data;
+ data2 = bh2->b_data;
- memcpy (data1, de, len);
- de = (struct ext4_dir_entry_2 *) data1;
- top = data1 + len;
+ memcpy(data2, de, len);
+ de = (struct ext4_dir_entry_2 *) data2;
+ top = data2 + len;
while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
de = de2;
- de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
- (char *) de,
- blocksize);
+ de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
+ (char *) de, blocksize);
- if (csum_size) {
- t = EXT4_DIRENT_TAIL(data1, blocksize);
- initialize_dirent_tail(t, blocksize);
- }
+ if (csum_size)
+ ext4_initialize_dirent_tail(bh2, blocksize);
/* Initialize the root; the dot dirents already exist */
de = (struct ext4_dir_entry_2 *) (&root->dotdot);
@@ -2080,7 +2123,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
if (retval)
goto out_frames;
- retval = ext4_handle_dirty_dirent_node(handle, dir, bh2);
+ retval = ext4_handle_dirty_dirblock(handle, dir, bh2);
if (retval)
goto out_frames;
@@ -2120,7 +2163,6 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
struct inode *dir = d_inode(dentry->d_parent);
struct buffer_head *bh = NULL;
struct ext4_dir_entry_2 *de;
- struct ext4_dir_entry_tail *t;
struct super_block *sb;
struct ext4_sb_info *sbi;
struct ext4_filename fname;
@@ -2170,6 +2212,11 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
blocks = dir->i_size >> sb->s_blocksize_bits;
for (block = 0; block < blocks; block++) {
bh = ext4_read_dirblock(dir, block, DIRENT);
+ if (bh == NULL) {
+ bh = ext4_bread(handle, dir, block,
+ EXT4_GET_BLOCKS_CREATE);
+ goto add_to_new_block;
+ }
if (IS_ERR(bh)) {
retval = PTR_ERR(bh);
bh = NULL;
@@ -2190,6 +2237,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
brelse(bh);
}
bh = ext4_append(handle, dir, &block);
+add_to_new_block:
if (IS_ERR(bh)) {
retval = PTR_ERR(bh);
bh = NULL;
@@ -2199,10 +2247,8 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
de->inode = 0;
de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize);
- if (csum_size) {
- t = EXT4_DIRENT_TAIL(bh->b_data, blocksize);
- initialize_dirent_tail(t, blocksize);
- }
+ if (csum_size)
+ ext4_initialize_dirent_tail(bh, blocksize);
retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh);
out:
@@ -2234,7 +2280,7 @@ again:
return PTR_ERR(frame);
entries = frame->entries;
at = frame->at;
- bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT);
+ bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT_HTREE);
if (IS_ERR(bh)) {
err = PTR_ERR(bh);
bh = NULL;
@@ -2460,7 +2506,7 @@ static int ext4_delete_entry(handle_t *handle,
goto out;
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_dirent_node(handle, dir, bh);
+ err = ext4_handle_dirty_dirblock(handle, dir, bh);
if (unlikely(err))
goto out;
@@ -2662,7 +2708,6 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
{
struct buffer_head *dir_block = NULL;
struct ext4_dir_entry_2 *de;
- struct ext4_dir_entry_tail *t;
ext4_lblk_t block = 0;
unsigned int blocksize = dir->i_sb->s_blocksize;
int csum_size = 0;
@@ -2686,13 +2731,11 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
de = (struct ext4_dir_entry_2 *)dir_block->b_data;
ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0);
set_nlink(inode, 2);
- if (csum_size) {
- t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize);
- initialize_dirent_tail(t, blocksize);
- }
+ if (csum_size)
+ ext4_initialize_dirent_tail(dir_block, blocksize);
BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
+ err = ext4_handle_dirty_dirblock(handle, inode, dir_block);
if (err)
goto out;
set_buffer_verified(dir_block);
@@ -2782,7 +2825,10 @@ bool ext4_empty_dir(struct inode *inode)
EXT4_ERROR_INODE(inode, "invalid size");
return true;
}
- bh = ext4_read_dirblock(inode, 0, EITHER);
+ /* The first directory block must not be a hole,
+ * so treat it as DIRENT_HTREE
+ */
+ bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
if (IS_ERR(bh))
return true;
@@ -2804,6 +2850,10 @@ bool ext4_empty_dir(struct inode *inode)
brelse(bh);
lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
bh = ext4_read_dirblock(inode, lblock, EITHER);
+ if (bh == NULL) {
+ offset += sb->s_blocksize;
+ continue;
+ }
if (IS_ERR(bh))
return true;
de = (struct ext4_dir_entry_2 *) bh->b_data;
@@ -3369,7 +3419,10 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
struct buffer_head *bh;
if (!ext4_has_inline_data(inode)) {
- bh = ext4_read_dirblock(inode, 0, EITHER);
+ /* The first directory block must not be a hole, so
+ * treat it as DIRENT_HTREE
+ */
+ bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
if (IS_ERR(bh)) {
*retval = PTR_ERR(bh);
return NULL;
@@ -3430,9 +3483,8 @@ static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent,
ent->inode,
ent->dir_bh);
} else {
- retval = ext4_handle_dirty_dirent_node(handle,
- ent->inode,
- ent->dir_bh);
+ retval = ext4_handle_dirty_dirblock(handle, ent->inode,
+ ent->dir_bh);
}
} else {
retval = ext4_mark_inode_dirty(handle, ent->inode);
@@ -3462,8 +3514,7 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
ext4_mark_inode_dirty(handle, ent->dir);
BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata");
if (!ent->inlined) {
- retval = ext4_handle_dirty_dirent_node(handle,
- ent->dir, ent->bh);
+ retval = ext4_handle_dirty_dirblock(handle, ent->dir, ent->bh);
if (unlikely(retval)) {
ext4_std_error(ent->dir->i_sb, retval);
return retval;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 4690618a92e9..a18a47a2a1d1 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -66,9 +66,7 @@ static void ext4_finish_bio(struct bio *bio)
bio_for_each_segment_all(bvec, bio, iter_all) {
struct page *page = bvec->bv_page;
-#ifdef CONFIG_FS_ENCRYPTION
- struct page *data_page = NULL;
-#endif
+ struct page *bounce_page = NULL;
struct buffer_head *bh, *head;
unsigned bio_start = bvec->bv_offset;
unsigned bio_end = bio_start + bvec->bv_len;
@@ -78,13 +76,10 @@ static void ext4_finish_bio(struct bio *bio)
if (!page)
continue;
-#ifdef CONFIG_FS_ENCRYPTION
- if (!page->mapping) {
- /* The bounce data pages are unmapped. */
- data_page = page;
- fscrypt_pullback_bio_page(&page, false);
+ if (fscrypt_is_bounce_page(page)) {
+ bounce_page = page;
+ page = fscrypt_pagecache_page(bounce_page);
}
-#endif
if (bio->bi_status) {
SetPageError(page);
@@ -111,10 +106,7 @@ static void ext4_finish_bio(struct bio *bio)
bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
local_irq_restore(flags);
if (!under_io) {
-#ifdef CONFIG_FS_ENCRYPTION
- if (data_page)
- fscrypt_restore_control_page(data_page);
-#endif
+ fscrypt_free_bounce_page(bounce_page);
end_page_writeback(page);
}
}
@@ -415,7 +407,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
struct writeback_control *wbc,
bool keep_towrite)
{
- struct page *data_page = NULL;
+ struct page *bounce_page = NULL;
struct inode *inode = page->mapping->host;
unsigned block_start;
struct buffer_head *bh, *head;
@@ -475,14 +467,22 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
bh = head = page_buffers(page);
+ /*
+ * If any blocks are being written to an encrypted file, encrypt them
+ * into a bounce page. For simplicity, just encrypt until the last
+ * block which might be needed. This may cause some unneeded blocks
+ * (e.g. holes) to be unnecessarily encrypted, but this is rare and
+ * can't happen in the common case of blocksize == PAGE_SIZE.
+ */
if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode) && nr_to_submit) {
gfp_t gfp_flags = GFP_NOFS;
+ unsigned int enc_bytes = round_up(len, i_blocksize(inode));
retry_encrypt:
- data_page = fscrypt_encrypt_page(inode, page, PAGE_SIZE, 0,
- page->index, gfp_flags);
- if (IS_ERR(data_page)) {
- ret = PTR_ERR(data_page);
+ bounce_page = fscrypt_encrypt_pagecache_blocks(page, enc_bytes,
+ 0, gfp_flags);
+ if (IS_ERR(bounce_page)) {
+ ret = PTR_ERR(bounce_page);
if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) {
if (io->io_bio) {
ext4_io_submit(io);
@@ -491,7 +491,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
gfp_flags |= __GFP_NOFAIL;
goto retry_encrypt;
}
- data_page = NULL;
+ bounce_page = NULL;
goto out;
}
}
@@ -500,8 +500,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
do {
if (!buffer_async_write(bh))
continue;
- ret = io_submit_add_bh(io, inode,
- data_page ? data_page : page, bh);
+ ret = io_submit_add_bh(io, inode, bounce_page ?: page, bh);
if (ret) {
/*
* We only get here on ENOMEM. Not much else
@@ -517,8 +516,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
/* Error stopped previous loop? Clean up buffers... */
if (ret) {
out:
- if (data_page)
- fscrypt_restore_control_page(data_page);
+ fscrypt_free_bounce_page(bounce_page);
printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
redirty_page_for_writepage(wbc, page);
do {
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 04b4f53f0659..b3cd7655a6ff 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -230,6 +230,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(journal_task),
NULL,
};
+ATTRIBUTE_GROUPS(ext4);
/* Features this copy of ext4 supports */
EXT4_ATTR_FEATURE(lazy_itable_init);
@@ -256,6 +257,7 @@ static struct attribute *ext4_feat_attrs[] = {
ATTR_LIST(metadata_csum_seed),
NULL,
};
+ATTRIBUTE_GROUPS(ext4_feat);
static void *calc_ptr(struct ext4_attr *a, struct ext4_sb_info *sbi)
{
@@ -374,13 +376,13 @@ static const struct sysfs_ops ext4_attr_ops = {
};
static struct kobj_type ext4_sb_ktype = {
- .default_attrs = ext4_attrs,
+ .default_groups = ext4_groups,
.sysfs_ops = &ext4_attr_ops,
.release = ext4_sb_release,
};
static struct kobj_type ext4_feat_ktype = {
- .default_attrs = ext4_feat_attrs,
+ .default_groups = ext4_feat_groups,
.sysfs_ops = &ext4_attr_ops,
.release = (void (*)(struct kobject *))kfree,
};
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index eda4181d2092..a546ac8685ea 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -185,7 +185,7 @@ static void f2fs_write_end_io(struct bio *bio)
continue;
}
- fscrypt_pullback_bio_page(&page, true);
+ fscrypt_finalize_bounce_page(&page);
if (unlikely(bio->bi_status)) {
mapping_set_error(page->mapping, -EIO);
@@ -362,10 +362,9 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
bio_for_each_segment_all(bvec, io->bio, iter_all) {
- if (bvec->bv_page->mapping)
- target = bvec->bv_page;
- else
- target = fscrypt_control_page(bvec->bv_page);
+ target = bvec->bv_page;
+ if (fscrypt_is_bounce_page(target))
+ target = fscrypt_pagecache_page(target);
if (inode && inode == target->mapping->host)
return true;
@@ -1727,8 +1726,9 @@ static int encrypt_one_page(struct f2fs_io_info *fio)
f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
retry_encrypt:
- fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
- PAGE_SIZE, 0, fio->page->index, gfp_flags);
+ fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(fio->page,
+ PAGE_SIZE, 0,
+ gfp_flags);
if (IS_ERR(fio->encrypted_page)) {
/* flush pending IOs and wait for a while in the ENOMEM case */
if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
@@ -1900,8 +1900,7 @@ got_it:
err = f2fs_inplace_write_data(fio);
if (err) {
if (f2fs_encrypted_file(inode))
- fscrypt_pullback_bio_page(&fio->encrypted_page,
- true);
+ fscrypt_finalize_bounce_page(&fio->encrypted_page);
if (PageWriteback(page))
end_page_writeback(page);
} else {
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index 67b7bda5647a..72ebfe578f40 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -317,7 +317,7 @@ static void fscache_objlist_config(struct fscache_objlist_data *data)
const char *buf;
int len;
- key = request_key(&key_type_user, "fscache:objlist", NULL, NULL);
+ key = request_key(&key_type_user, "fscache:objlist", NULL);
if (IS_ERR(key))
goto no_config;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index b8f9c83835d5..5ae2828beb00 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -3112,9 +3112,9 @@ out:
return err;
}
-static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- size_t len, unsigned int flags)
+static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags)
{
struct fuse_file *ff_in = file_in->private_data;
struct fuse_file *ff_out = file_out->private_data;
@@ -3142,6 +3142,9 @@ static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in,
if (fc->no_copy_file_range)
return -EOPNOTSUPP;
+ if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
+ return -EXDEV;
+
if (fc->writeback_cache) {
inode_lock(inode_in);
err = fuse_writeback_range(inode_in, pos_in, pos_in + len);
@@ -3152,6 +3155,10 @@ static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in,
inode_lock(inode_out);
+ err = file_modified(file_out);
+ if (err)
+ goto out;
+
if (fc->writeback_cache) {
err = fuse_writeback_range(inode_out, pos_out, pos_out + len);
if (err)
@@ -3190,10 +3197,26 @@ out:
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
inode_unlock(inode_out);
+ file_accessed(file_in);
return err;
}
+static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off,
+ size_t len, unsigned int flags)
+{
+ ssize_t ret;
+
+ ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off,
+ len, flags);
+
+ if (ret == -EOPNOTSUPP || ret == -EXDEV)
+ ret = generic_copy_file_range(src_file, src_off, dst_file,
+ dst_off, len, flags);
+ return ret;
+}
+
static const struct file_operations fuse_file_operations = {
.llseek = fuse_file_llseek,
.read_iter = fuse_file_read_iter,
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index abeac61cfed3..f42048cc5454 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -82,15 +82,11 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
}
/**
- * gfs2_writepage_common - Common bits of writepage
- * @page: The page to be written
+ * gfs2_writepage - Write page for writeback mappings
+ * @page: The page
* @wbc: The writeback control
- *
- * Returns: 1 if writepage is ok, otherwise an error code or zero if no error.
*/
-
-static int gfs2_writepage_common(struct page *page,
- struct writeback_control *wbc)
+static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
@@ -109,7 +105,9 @@ static int gfs2_writepage_common(struct page *page,
page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
goto out;
}
- return 1;
+
+ return nobh_writepage(page, gfs2_get_block_noalloc, wbc);
+
redirty:
redirty_page_for_writepage(wbc, page);
out:
@@ -117,24 +115,6 @@ out:
return 0;
}
-/**
- * gfs2_writepage - Write page for writeback mappings
- * @page: The page
- * @wbc: The writeback control
- *
- */
-
-static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
-{
- int ret;
-
- ret = gfs2_writepage_common(page, wbc);
- if (ret <= 0)
- return ret;
-
- return nobh_writepage(page, gfs2_get_block_noalloc, wbc);
-}
-
/* This is the same as calling block_write_full_page, but it also
* writes pages outside of i_size
*/
@@ -454,8 +434,7 @@ static int gfs2_jdata_writepages(struct address_space *mapping,
*
* Returns: errno
*/
-
-int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
+static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
{
struct buffer_head *dibh;
u64 dsize = i_size_read(&ip->i_inode);
@@ -518,7 +497,7 @@ static int __gfs2_readpage(void *file, struct page *page)
error = mpage_readpage(page, gfs2_block_map);
}
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)))
return -EIO;
return error;
@@ -635,7 +614,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
gfs2_glock_dq(&gh);
out_uninit:
gfs2_holder_uninit(&gh);
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)))
ret = -EIO;
return ret;
}
@@ -686,47 +665,6 @@ out:
}
/**
- * gfs2_stuffed_write_end - Write end for stuffed files
- * @inode: The inode
- * @dibh: The buffer_head containing the on-disk inode
- * @pos: The file position
- * @copied: How much was actually copied by the VFS
- * @page: The page
- *
- * This copies the data from the page into the inode block after
- * the inode data structure itself.
- *
- * Returns: copied bytes or errno
- */
-int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
- loff_t pos, unsigned copied,
- struct page *page)
-{
- struct gfs2_inode *ip = GFS2_I(inode);
- u64 to = pos + copied;
- void *kaddr;
- unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
-
- BUG_ON(pos + copied > gfs2_max_stuffed_size(ip));
-
- kaddr = kmap_atomic(page);
- memcpy(buf + pos, kaddr + pos, copied);
- flush_dcache_page(page);
- kunmap_atomic(kaddr);
-
- WARN_ON(!PageUptodate(page));
- unlock_page(page);
- put_page(page);
-
- if (copied) {
- if (inode->i_size < to)
- i_size_write(inode, to);
- mark_inode_dirty(inode);
- }
- return copied;
-}
-
-/**
* jdata_set_page_dirty - Page dirtying function
* @page: The page to dirty
*
@@ -759,7 +697,7 @@ static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
return 0;
if (!gfs2_is_stuffed(ip))
- dblock = generic_block_bmap(mapping, lblock, gfs2_block_map);
+ dblock = iomap_bmap(mapping, lblock, &gfs2_iomap_ops);
gfs2_glock_dq_uninit(&i_gh);
@@ -888,7 +826,7 @@ cannot_release:
return 0;
}
-static const struct address_space_operations gfs2_writeback_aops = {
+static const struct address_space_operations gfs2_aops = {
.writepage = gfs2_writepage,
.writepages = gfs2_writepages,
.readpage = gfs2_readpage,
@@ -902,21 +840,6 @@ static const struct address_space_operations gfs2_writeback_aops = {
.error_remove_page = generic_error_remove_page,
};
-static const struct address_space_operations gfs2_ordered_aops = {
- .writepage = gfs2_writepage,
- .writepages = gfs2_writepages,
- .readpage = gfs2_readpage,
- .readpages = gfs2_readpages,
- .set_page_dirty = __set_page_dirty_buffers,
- .bmap = gfs2_bmap,
- .invalidatepage = gfs2_invalidatepage,
- .releasepage = gfs2_releasepage,
- .direct_IO = noop_direct_IO,
- .migratepage = buffer_migrate_page,
- .is_partially_uptodate = block_is_partially_uptodate,
- .error_remove_page = generic_error_remove_page,
-};
-
static const struct address_space_operations gfs2_jdata_aops = {
.writepage = gfs2_jdata_writepage,
.writepages = gfs2_jdata_writepages,
@@ -932,15 +855,8 @@ static const struct address_space_operations gfs2_jdata_aops = {
void gfs2_set_aops(struct inode *inode)
{
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_sbd *sdp = GFS2_SB(inode);
-
- if (gfs2_is_jdata(ip))
+ if (gfs2_is_jdata(GFS2_I(inode)))
inode->i_mapping->a_ops = &gfs2_jdata_aops;
- else if (gfs2_is_writeback(sdp))
- inode->i_mapping->a_ops = &gfs2_writeback_aops;
- else if (gfs2_is_ordered(sdp))
- inode->i_mapping->a_ops = &gfs2_ordered_aops;
else
- BUG();
+ inode->i_mapping->a_ops = &gfs2_aops;
}
diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h
index fa8e5d0144dd..ff9877a68780 100644
--- a/fs/gfs2/aops.h
+++ b/fs/gfs2/aops.h
@@ -8,10 +8,6 @@
#include "incore.h"
-extern int stuffed_readpage(struct gfs2_inode *ip, struct page *page);
-extern int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
- loff_t pos, unsigned copied,
- struct page *page);
extern void adjust_fs_space(struct inode *inode);
extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
unsigned int from, unsigned int len);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 93ea1d529aa3..79581b9bdebb 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -595,7 +595,6 @@ enum alloc_state {
* gfs2_iomap_alloc - Build a metadata tree of the requested height
* @inode: The GFS2 inode
* @iomap: The iomap structure
- * @flags: iomap flags
* @mp: The metapath, with proper height information calculated
*
* In this routine we may have to alloc:
@@ -622,7 +621,7 @@ enum alloc_state {
*/
static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
- unsigned flags, struct metapath *mp)
+ struct metapath *mp)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -1088,7 +1087,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
}
if (iomap->type == IOMAP_HOLE) {
- ret = gfs2_iomap_alloc(inode, iomap, flags, mp);
+ ret = gfs2_iomap_alloc(inode, iomap, mp);
if (ret) {
gfs2_trans_end(sdp);
gfs2_inplace_release(ip);
@@ -1182,6 +1181,8 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
if (ip->i_qadata && ip->i_qadata->qa_qd_num)
gfs2_quota_unlock(ip);
+ if (iomap->flags & IOMAP_F_SIZE_CHANGED)
+ mark_inode_dirty(inode);
gfs2_write_unlock(inode);
out:
@@ -1232,7 +1233,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
if (create) {
ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp);
if (!ret && iomap.type == IOMAP_HOLE)
- ret = gfs2_iomap_alloc(inode, &iomap, IOMAP_WRITE, &mp);
+ ret = gfs2_iomap_alloc(inode, &iomap, &mp);
release_metapath(&mp);
} else {
ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
@@ -1462,7 +1463,7 @@ int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp);
if (!ret && iomap->type == IOMAP_HOLE)
- ret = gfs2_iomap_alloc(inode, iomap, IOMAP_WRITE, &mp);
+ ret = gfs2_iomap_alloc(inode, iomap, &mp);
release_metapath(&mp);
return ret;
}
@@ -1862,9 +1863,8 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
gfs2_assert_withdraw(sdp, bh);
if (gfs2_assert_withdraw(sdp,
prev_bnr != bh->b_blocknr)) {
- printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, "
- "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n",
- sdp->sd_fsname,
+ fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u,"
+ "s_h:%u, mp_h:%u\n",
(unsigned long long)ip->i_no_addr,
prev_bnr, ip->i_height, strip_h, mp_h);
}
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 88e4f955c518..6f35d19eec25 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -750,7 +750,7 @@ static struct gfs2_dirent *gfs2_dirent_split_alloc(struct inode *inode,
struct gfs2_dirent *dent;
dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
gfs2_dirent_find_offset, name, ptr);
- if (!dent || IS_ERR(dent))
+ if (IS_ERR_OR_NULL(dent))
return dent;
return do_init_dirent(inode, dent, name, bh,
(unsigned)(ptr - (void *)dent));
@@ -854,7 +854,7 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode,
return ERR_PTR(error);
dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, scan, name, NULL);
got_dent:
- if (unlikely(dent == NULL || IS_ERR(dent))) {
+ if (IS_ERR_OR_NULL(dent)) {
brelse(bh);
bh = NULL;
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index d174b1f8fd08..8b0c2bfa90c1 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -363,31 +363,30 @@ static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size)
}
/**
- * gfs2_allocate_page_backing - Use bmap to allocate blocks
+ * gfs2_allocate_page_backing - Allocate blocks for a write fault
* @page: The (locked) page to allocate backing for
*
- * We try to allocate all the blocks required for the page in
- * one go. This might fail for various reasons, so we keep
- * trying until all the blocks to back this page are allocated.
- * If some of the blocks are already allocated, thats ok too.
+ * We try to allocate all the blocks required for the page in one go. This
+ * might fail for various reasons, so we keep trying until all the blocks to
+ * back this page are allocated. If some of the blocks are already allocated,
+ * that is ok too.
*/
-
static int gfs2_allocate_page_backing(struct page *page)
{
- struct inode *inode = page->mapping->host;
- struct buffer_head bh;
- unsigned long size = PAGE_SIZE;
- u64 lblock = page->index << (PAGE_SHIFT - inode->i_blkbits);
+ u64 pos = page_offset(page);
+ u64 size = PAGE_SIZE;
do {
- bh.b_state = 0;
- bh.b_size = size;
- gfs2_block_map(inode, lblock, &bh, 1);
- if (!buffer_mapped(&bh))
+ struct iomap iomap = { };
+
+ if (gfs2_iomap_get_alloc(page->mapping->host, pos, 1, &iomap))
return -EIO;
- size -= bh.b_size;
- lblock += (bh.b_size >> inode->i_blkbits);
- } while(size > 0);
+
+ iomap.length = min(iomap.length, size);
+ size -= iomap.length;
+ pos += iomap.length;
+ } while (size > 0);
+
return 0;
}
@@ -408,7 +407,7 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_alloc_parms ap = { .aflags = 0, };
unsigned long last_index;
- u64 pos = page->index << PAGE_SHIFT;
+ u64 pos = page_offset(page);
unsigned int data_blocks, ind_blocks, rblocks;
struct gfs2_holder gh;
loff_t size;
@@ -1166,7 +1165,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
cmd = F_SETLK;
fl->fl_type = F_UNLCK;
}
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) {
if (fl->fl_type == F_UNLCK)
locks_lock_file_wait(file, fl);
return -EIO;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f1ebcb42cbf5..e23fb8b7b020 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -544,7 +544,7 @@ __acquires(&gl->gl_lockref.lock)
unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0);
int ret;
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) &&
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) &&
target != LM_ST_UNLOCKED)
return;
lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
@@ -581,7 +581,7 @@ __acquires(&gl->gl_lockref.lock)
}
else if (ret) {
fs_err(sdp, "lm_lock ret %d\n", ret);
- GLOCK_BUG_ON(gl, !test_bit(SDF_SHUTDOWN,
+ GLOCK_BUG_ON(gl, !test_bit(SDF_WITHDRAWN,
&sdp->sd_flags));
}
} else { /* lock_nolock */
@@ -681,7 +681,7 @@ static void delete_work_func(struct work_struct *work)
goto out;
inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
- if (inode && !IS_ERR(inode)) {
+ if (!IS_ERR_OR_NULL(inode)) {
d_prune_aliases(inode);
iput(inode);
}
@@ -1075,7 +1075,7 @@ trap_recursive:
fs_err(sdp, "pid: %d\n", pid_nr(gh->gh_owner_pid));
fs_err(sdp, "lock type: %d req lock state : %d\n",
gh->gh_gl->gl_name.ln_type, gh->gh_state);
- gfs2_dump_glock(NULL, gl);
+ gfs2_dump_glock(NULL, gl, true);
BUG();
}
@@ -1094,7 +1094,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
int error = 0;
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)))
return -EIO;
if (test_bit(GLF_LRU, &gl->gl_flags))
@@ -1610,16 +1610,16 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp)
glock_hash_walk(thaw_glock, sdp);
}
-static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
+static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
{
spin_lock(&gl->gl_lockref.lock);
- gfs2_dump_glock(seq, gl);
+ gfs2_dump_glock(seq, gl, fsid);
spin_unlock(&gl->gl_lockref.lock);
}
static void dump_glock_func(struct gfs2_glock *gl)
{
- dump_glock(NULL, gl);
+ dump_glock(NULL, gl, true);
}
/**
@@ -1704,10 +1704,12 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
* dump_holder - print information about a glock holder
* @seq: the seq_file struct
* @gh: the glock holder
+ * @fs_id_buf: pointer to file system id (if requested)
*
*/
-static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
+static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh,
+ const char *fs_id_buf)
{
struct task_struct *gh_owner = NULL;
char flags_buf[32];
@@ -1715,8 +1717,8 @@ static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
rcu_read_lock();
if (gh->gh_owner_pid)
gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
- gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
- state2str(gh->gh_state),
+ gfs2_print_dbg(seq, "%s H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
+ fs_id_buf, state2str(gh->gh_state),
hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
gh->gh_error,
gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
@@ -1766,6 +1768,7 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
* gfs2_dump_glock - print information about a glock
* @seq: The seq_file struct
* @gl: the glock
+ * @fsid: If true, also dump the file system id
*
* The file format is as follows:
* One line per object, capital letters are used to indicate objects
@@ -1779,19 +1782,24 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
*
*/
-void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
+void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
{
const struct gfs2_glock_operations *glops = gl->gl_ops;
unsigned long long dtime;
const struct gfs2_holder *gh;
char gflags_buf[32];
+ char fs_id_buf[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2];
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+ memset(fs_id_buf, 0, sizeof(fs_id_buf));
+ if (fsid && sdp) /* safety precaution */
+ sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
dtime = jiffies - gl->gl_demote_time;
dtime *= 1000000/HZ; /* demote time in uSec */
if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
dtime = 0;
- gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d m:%ld\n",
- state2str(gl->gl_state),
+ gfs2_print_dbg(seq, "%sG: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d "
+ "v:%d r:%d m:%ld\n", fs_id_buf, state2str(gl->gl_state),
gl->gl_name.ln_type,
(unsigned long long)gl->gl_name.ln_number,
gflags2str(gflags_buf, gl),
@@ -1802,10 +1810,10 @@ void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
(int)gl->gl_lockref.count, gl->gl_hold_time);
list_for_each_entry(gh, &gl->gl_holders, gh_list)
- dump_holder(seq, gh);
+ dump_holder(seq, gh, fs_id_buf);
if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump)
- glops->go_dump(seq, gl);
+ glops->go_dump(seq, gl, fs_id_buf);
}
static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr)
@@ -2006,7 +2014,7 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
{
- dump_glock(seq, iter_ptr);
+ dump_glock(seq, iter_ptr, false);
return 0;
}
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 149d7f6af085..e4e0bed5257c 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -199,8 +199,11 @@ extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
struct gfs2_holder *gh);
extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
-extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl);
-#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { gfs2_dump_glock(NULL, gl); BUG(); } } while(0)
+extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl,
+ bool fsid);
+#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { \
+ gfs2_dump_glock(NULL, gl, true); \
+ BUG(); } } while(0)
extern __printf(2, 3)
void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
@@ -266,7 +269,7 @@ static inline void glock_set_object(struct gfs2_glock *gl, void *object)
{
spin_lock(&gl->gl_lockref.lock);
if (gfs2_assert_warn(gl->gl_name.ln_sbd, gl->gl_object == NULL))
- gfs2_dump_glock(NULL, gl);
+ gfs2_dump_glock(NULL, gl, true);
gl->gl_object = object;
spin_unlock(&gl->gl_lockref.lock);
}
@@ -278,7 +281,7 @@ static inline void glock_set_object(struct gfs2_glock *gl, void *object)
*
* I'd love to similarly add this:
* else if (gfs2_assert_warn(gl->gl_sbd, gl->gl_object == object))
- * gfs2_dump_glock(NULL, gl);
+ * gfs2_dump_glock(NULL, gl, true);
* Unfortunately, that's not possible because as soon as gfs2_delete_inode
* frees the block in the rgrp, another process can reassign it for an I_NEW
* inode in gfs2_create_inode because that calls new_inode, not gfs2_iget.
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index cf4c767005b1..ff213690e364 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -461,10 +461,12 @@ static int inode_go_lock(struct gfs2_holder *gh)
* inode_go_dump - print information about an inode
* @seq: The iterator
* @ip: the inode
+ * @fs_id_buf: file system id (may be empty)
*
*/
-static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl)
+static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl,
+ const char *fs_id_buf)
{
struct gfs2_inode *ip = gl->gl_object;
struct inode *inode = &ip->i_inode;
@@ -477,7 +479,8 @@ static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl)
nrpages = inode->i_data.nrpages;
xa_unlock_irq(&inode->i_data.i_pages);
- gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu p:%lu\n",
+ gfs2_print_dbg(seq, "%s I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu "
+ "p:%lu\n", fs_id_buf,
(unsigned long long)ip->i_no_formal_ino,
(unsigned long long)ip->i_no_addr,
IF2DT(ip->i_inode.i_mode), ip->i_flags,
@@ -503,7 +506,8 @@ static void freeze_go_sync(struct gfs2_glock *gl)
atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE);
error = freeze_super(sdp->sd_vfs);
if (error) {
- printk(KERN_INFO "GFS2: couldn't freeze filesystem: %d\n", error);
+ fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n",
+ error);
gfs2_assert_withdraw(sdp, 0);
}
queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work);
@@ -536,7 +540,7 @@ static int freeze_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh)
gfs2_consist(sdp);
/* Initialize some head of the log stuff */
- if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
+ if (!test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) {
sdp->sd_log_sequence = head.lh_sequence + 1;
gfs2_log_pointers_init(sdp, head.lh_blkno);
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index c9af93ac6c73..7a993d7c022e 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -240,7 +240,8 @@ struct gfs2_glock_operations {
int (*go_demote_ok) (const struct gfs2_glock *gl);
int (*go_lock) (struct gfs2_holder *gh);
void (*go_unlock) (struct gfs2_holder *gh);
- void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl);
+ void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl,
+ const char *fs_id_buf);
void (*go_callback)(struct gfs2_glock *gl, bool remote);
const int go_type;
const unsigned long go_flags;
@@ -504,7 +505,6 @@ struct gfs2_trans {
unsigned int tr_num_buf_rm;
unsigned int tr_num_databuf_rm;
unsigned int tr_num_revoke;
- unsigned int tr_num_revoke_rm;
struct list_head tr_list;
struct list_head tr_databuf;
@@ -609,7 +609,7 @@ struct gfs2_tune {
enum {
SDF_JOURNAL_CHECKED = 0,
SDF_JOURNAL_LIVE = 1,
- SDF_SHUTDOWN = 2,
+ SDF_WITHDRAWN = 2,
SDF_NOBARRIERS = 3,
SDF_NORECOVERY = 4,
SDF_DEMOTE = 5,
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index b296c59832a7..2e2a8a2fb51d 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -793,7 +793,7 @@ fail_free_acls:
fail_gunlock:
gfs2_dir_no_add(&da);
gfs2_glock_dq_uninit(ghs);
- if (inode && !IS_ERR(inode)) {
+ if (!IS_ERR_OR_NULL(inode)) {
clear_nlink(inode);
if (!free_vfs_inode)
mark_inode_dirty(inode);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index c4c9700c366e..58e237fba565 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -882,7 +882,6 @@ static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new)
old->tr_num_buf_rm += new->tr_num_buf_rm;
old->tr_num_databuf_rm += new->tr_num_databuf_rm;
old->tr_num_revoke += new->tr_num_revoke;
- old->tr_num_revoke_rm += new->tr_num_revoke_rm;
list_splice_tail_init(&new->tr_databuf, &old->tr_databuf);
list_splice_tail_init(&new->tr_buf, &old->tr_buf);
@@ -904,7 +903,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
set_bit(TR_ATTACHED, &tr->tr_flags);
}
- sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
+ sdp->sd_log_commited_revoke += tr->tr_num_revoke;
reserved = calc_reserved(sdp);
maxres = sdp->sd_log_blks_reserved + tr->tr_reserved;
gfs2_assert_withdraw(sdp, maxres >= reserved);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 1921cda034fd..5b17979af539 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -759,9 +759,27 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
if (gfs2_meta_check(sdp, bh_ip))
error = -EIO;
- else
+ else {
+ struct gfs2_meta_header *mh =
+ (struct gfs2_meta_header *)bh_ip->b_data;
+
+ if (mh->mh_type == cpu_to_be32(GFS2_METATYPE_RG)) {
+ struct gfs2_rgrpd *rgd;
+
+ rgd = gfs2_blk2rgrpd(sdp, blkno, false);
+ if (rgd && rgd->rd_addr == blkno &&
+ rgd->rd_bits && rgd->rd_bits->bi_bh) {
+ fs_info(sdp, "Replaying 0x%llx but we "
+ "already have a bh!\n",
+ (unsigned long long)blkno);
+ fs_info(sdp, "busy:%d, pinned:%d\n",
+ buffer_busy(rgd->rd_bits->bi_bh) ? 1 : 0,
+ buffer_pinned(rgd->rd_bits->bi_bh));
+ gfs2_dump_glock(NULL, rgd->rd_gl, true);
+ }
+ }
mark_buffer_dirty(bh_ip);
-
+ }
brelse(bh_log);
brelse(bh_ip);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 456763e18def..662ef36c1874 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -251,7 +251,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
struct buffer_head *bh, *bhs[2];
int num = 0;
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) {
*bhp = NULL;
return -EIO;
}
@@ -309,7 +309,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
{
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)))
return -EIO;
wait_on_buffer(bh);
@@ -320,7 +320,7 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
gfs2_io_error_bh_wd(sdp, bh);
return -EIO;
}
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)))
return -EIO;
return 0;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 08823bb3b2d0..4a8e5a7310f0 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -61,6 +61,13 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
gt->gt_complain_secs = 10;
}
+void free_sbd(struct gfs2_sbd *sdp)
+{
+ if (sdp->sd_lkstats)
+ free_percpu(sdp->sd_lkstats);
+ kfree(sdp);
+}
+
static struct gfs2_sbd *init_sbd(struct super_block *sb)
{
struct gfs2_sbd *sdp;
@@ -72,10 +79,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
sdp->sd_vfs = sb;
sdp->sd_lkstats = alloc_percpu(struct gfs2_pcpu_lkstats);
- if (!sdp->sd_lkstats) {
- kfree(sdp);
- return NULL;
- }
+ if (!sdp->sd_lkstats)
+ goto fail;
sb->s_fs_info = sdp;
set_bit(SDF_NOJOURNALID, &sdp->sd_flags);
@@ -134,8 +139,11 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
mutex_init(&sdp->sd_freeze_mutex);
return sdp;
-}
+fail:
+ free_sbd(sdp);
+ return NULL;
+}
/**
* gfs2_check_sb - Check superblock
@@ -568,7 +576,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
INIT_WORK(&jd->jd_work, gfs2_recover_func);
jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1);
- if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
+ if (IS_ERR_OR_NULL(jd->jd_inode)) {
if (!jd->jd_inode)
error = -ENOENT;
else
@@ -996,7 +1004,7 @@ hostdata_error:
void gfs2_lm_unmount(struct gfs2_sbd *sdp)
{
const struct lm_lockops *lm = sdp->sd_lockstruct.ls_ops;
- if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) &&
+ if (likely(!test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) &&
lm->lm_unmount)
lm->lm_unmount(sdp);
}
@@ -1086,8 +1094,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
if (error) {
/* In this case, we haven't initialized sysfs, so we have to
manually free the sdp. */
- free_percpu(sdp->sd_lkstats);
- kfree(sdp);
+ free_sbd(sdp);
sb->s_fs_info = NULL;
return error;
}
@@ -1190,7 +1197,6 @@ fail_lm:
gfs2_lm_unmount(sdp);
fail_debug:
gfs2_delete_debugfs_file(sdp);
- free_percpu(sdp->sd_lkstats);
/* gfs2_sys_fs_del must be the last thing we do, since it causes
* sysfs to call function gfs2_sbd_release, which frees sdp. */
gfs2_sys_fs_del(sdp);
@@ -1370,7 +1376,6 @@ static void gfs2_kill_sb(struct super_block *sb)
sdp->sd_root_dir = NULL;
sdp->sd_master_dir = NULL;
shrink_dcache_sb(sb);
- free_percpu(sdp->sd_lkstats);
kill_block_super(sb);
}
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 8189b581236d..69c4b77f127b 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1475,7 +1475,7 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error)
{
if (error == 0 || error == -EROFS)
return;
- if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
+ if (!test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) {
fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error);
sdp->sd_log_error = error;
wake_up(&sdp->sd_logd_waitq);
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 2299a3fa1911..c529f8749a89 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -388,7 +388,8 @@ void gfs2_recover_func(struct work_struct *work)
}
t_tlck = ktime_get();
- fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
+ fs_info(sdp, "jid=%u: Replaying journal...0x%x to 0x%x\n",
+ jd->jd_jid, head.lh_tail, head.lh_blkno);
for (pass = 0; pass < 2; pass++) {
lops_before_scan(jd, &head, pass);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 36f20a89d0c2..49ac0a5e74ea 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -610,11 +610,12 @@ int gfs2_rsqa_alloc(struct gfs2_inode *ip)
return gfs2_qa_alloc(ip);
}
-static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
+static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs,
+ const char *fs_id_buf)
{
struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res);
- gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n",
+ gfs2_print_dbg(seq, "%s B: n:%llu s:%llu b:%u f:%u\n", fs_id_buf,
(unsigned long long)ip->i_no_addr,
(unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
rs->rs_rbm.offset, rs->rs_free);
@@ -1111,32 +1112,33 @@ static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
{
struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
+ struct gfs2_sbd *sdp = rgd->rd_sbd;
int valid = 1;
if (rgl->rl_flags != str->rg_flags) {
- printk(KERN_WARNING "GFS2: rgd: %llu lvb flag mismatch %u/%u",
- (unsigned long long)rgd->rd_addr,
+ fs_warn(sdp, "GFS2: rgd: %llu lvb flag mismatch %u/%u",
+ (unsigned long long)rgd->rd_addr,
be32_to_cpu(rgl->rl_flags), be32_to_cpu(str->rg_flags));
valid = 0;
}
if (rgl->rl_free != str->rg_free) {
- printk(KERN_WARNING "GFS2: rgd: %llu lvb free mismatch %u/%u",
- (unsigned long long)rgd->rd_addr,
- be32_to_cpu(rgl->rl_free), be32_to_cpu(str->rg_free));
+ fs_warn(sdp, "GFS2: rgd: %llu lvb free mismatch %u/%u",
+ (unsigned long long)rgd->rd_addr,
+ be32_to_cpu(rgl->rl_free), be32_to_cpu(str->rg_free));
valid = 0;
}
if (rgl->rl_dinodes != str->rg_dinodes) {
- printk(KERN_WARNING "GFS2: rgd: %llu lvb dinode mismatch %u/%u",
- (unsigned long long)rgd->rd_addr,
- be32_to_cpu(rgl->rl_dinodes),
- be32_to_cpu(str->rg_dinodes));
+ fs_warn(sdp, "GFS2: rgd: %llu lvb dinode mismatch %u/%u",
+ (unsigned long long)rgd->rd_addr,
+ be32_to_cpu(rgl->rl_dinodes),
+ be32_to_cpu(str->rg_dinodes));
valid = 0;
}
if (rgl->rl_igeneration != str->rg_igeneration) {
- printk(KERN_WARNING "GFS2: rgd: %llu lvb igen mismatch "
- "%llu/%llu", (unsigned long long)rgd->rd_addr,
- (unsigned long long)be64_to_cpu(rgl->rl_igeneration),
- (unsigned long long)be64_to_cpu(str->rg_igeneration));
+ fs_warn(sdp, "GFS2: rgd: %llu lvb igen mismatch %llu/%llu",
+ (unsigned long long)rgd->rd_addr,
+ (unsigned long long)be64_to_cpu(rgl->rl_igeneration),
+ (unsigned long long)be64_to_cpu(str->rg_igeneration));
valid = 0;
}
return valid;
@@ -2246,10 +2248,12 @@ static void rgblk_free(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd,
* gfs2_rgrp_dump - print out an rgrp
* @seq: The iterator
* @gl: The glock in question
+ * @fs_id_buf: pointer to file system id (if requested)
*
*/
-void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl)
+void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl,
+ const char *fs_id_buf)
{
struct gfs2_rgrpd *rgd = gl->gl_object;
struct gfs2_blkreserv *trs;
@@ -2257,14 +2261,15 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl)
if (rgd == NULL)
return;
- gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
+ gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
+ fs_id_buf,
(unsigned long long)rgd->rd_addr, rgd->rd_flags,
rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
rgd->rd_reserved, rgd->rd_extfail_pt);
if (rgd->rd_sbd->sd_args.ar_rgrplvb) {
struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
- gfs2_print_dbg(seq, " L: f:%02x b:%u i:%u\n",
+ gfs2_print_dbg(seq, "%s L: f:%02x b:%u i:%u\n", fs_id_buf,
be32_to_cpu(rgl->rl_flags),
be32_to_cpu(rgl->rl_free),
be32_to_cpu(rgl->rl_dinodes));
@@ -2272,7 +2277,7 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl)
spin_lock(&rgd->rd_rsspin);
for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
- dump_rs(seq, trs);
+ dump_rs(seq, trs, fs_id_buf);
}
spin_unlock(&rgd->rd_rsspin);
}
@@ -2280,10 +2285,13 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl)
static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
+ char fs_id_buf[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2];
+
fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
(unsigned long long)rgd->rd_addr);
fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
- gfs2_rgrp_dump(NULL, rgd->rd_gl);
+ sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
+ gfs2_rgrp_dump(NULL, rgd->rd_gl, fs_id_buf);
rgd->rd_flags |= GFS2_RDF_ERROR;
}
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 6a3adf0ee0b7..c14a673ae36f 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -69,7 +69,8 @@ extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist);
extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
extern u64 gfs2_ri_total(struct gfs2_sbd *sdp);
-extern void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl);
+extern void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl,
+ const char *fs_id_buf);
extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
struct buffer_head *bh,
const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index b70cea5c8c59..0acc5834f653 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -394,6 +394,7 @@ static int init_threads(struct gfs2_sbd *sdp)
fail:
kthread_stop(sdp->sd_logd_process);
+ sdp->sd_logd_process = NULL;
return error;
}
@@ -451,8 +452,12 @@ fail:
freeze_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq_uninit(&freeze_gh);
fail_threads:
- kthread_stop(sdp->sd_quotad_process);
- kthread_stop(sdp->sd_logd_process);
+ if (sdp->sd_quotad_process)
+ kthread_stop(sdp->sd_quotad_process);
+ sdp->sd_quotad_process = NULL;
+ if (sdp->sd_logd_process)
+ kthread_stop(sdp->sd_logd_process);
+ sdp->sd_logd_process = NULL;
return error;
}
@@ -800,7 +805,7 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
if (!(flags & I_DIRTY_INODE))
return;
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)))
return;
if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
@@ -849,12 +854,16 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, GL_NOCACHE,
&freeze_gh);
- if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+ if (error && !test_bit(SDF_WITHDRAWN, &sdp->sd_flags))
return error;
flush_workqueue(gfs2_delete_workqueue);
- kthread_stop(sdp->sd_quotad_process);
- kthread_stop(sdp->sd_logd_process);
+ if (sdp->sd_quotad_process)
+ kthread_stop(sdp->sd_quotad_process);
+ sdp->sd_quotad_process = NULL;
+ if (sdp->sd_logd_process)
+ kthread_stop(sdp->sd_logd_process);
+ sdp->sd_logd_process = NULL;
gfs2_quota_sync(sdp->sd_vfs, 0);
gfs2_statfs_sync(sdp->sd_vfs, 0);
@@ -969,14 +978,14 @@ void gfs2_freeze_func(struct work_struct *work)
error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0,
&freeze_gh);
if (error) {
- printk(KERN_INFO "GFS2: couldn't get freeze lock : %d\n", error);
+ fs_info(sdp, "GFS2: couldn't get freeze lock : %d\n", error);
gfs2_assert_withdraw(sdp, 0);
} else {
atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
error = thaw_super(sb);
if (error) {
- printk(KERN_INFO "GFS2: couldn't thaw filesystem: %d\n",
- error);
+ fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n",
+ error);
gfs2_assert_withdraw(sdp, 0);
}
if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
@@ -1004,7 +1013,7 @@ static int gfs2_freeze(struct super_block *sb)
if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN)
goto out;
- if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
+ if (test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) {
error = -EINVAL;
goto out;
}
@@ -1014,20 +1023,14 @@ static int gfs2_freeze(struct super_block *sb)
if (!error)
break;
- switch (error) {
- case -EBUSY:
+ if (error == -EBUSY)
fs_err(sdp, "waiting for recovery before freeze\n");
- break;
-
- default:
+ else
fs_err(sdp, "error freezing FS: %d\n", error);
- break;
- }
fs_err(sdp, "retrying...\n");
msleep(1000);
}
- error = 0;
set_bit(SDF_FS_FROZEN, &sdp->sd_flags);
out:
mutex_unlock(&sdp->sd_freeze_mutex);
@@ -1273,8 +1276,6 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
error = gfs2_make_fs_ro(sdp);
else
error = gfs2_make_fs_rw(sdp);
- if (error)
- return error;
}
sdp->sd_args = args;
@@ -1300,7 +1301,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
spin_unlock(&gt->gt_spin);
gfs2_online_uevent(sdp);
- return 0;
+ return error;
}
/**
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index c5f42f0c503b..9d49eaadb9d9 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -44,6 +44,8 @@ extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
extern int gfs2_statfs_sync(struct super_block *sb, int type);
extern void gfs2_freeze_func(struct work_struct *work);
+extern void free_sbd(struct gfs2_sbd *sdp);
+
extern struct file_system_type gfs2_fs_type;
extern struct file_system_type gfs2meta_fs_type;
extern const struct export_operations gfs2_export_ops;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 159aedf63c2a..289328831e24 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -118,7 +118,7 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf)
{
- unsigned int b = test_bit(SDF_SHUTDOWN, &sdp->sd_flags);
+ unsigned int b = test_bit(SDF_WITHDRAWN, &sdp->sd_flags);
return snprintf(buf, PAGE_SIZE, "%u\n", b);
}
@@ -301,7 +301,7 @@ static void gfs2_sbd_release(struct kobject *kobj)
{
struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
- kfree(sdp);
+ free_sbd(sdp);
}
static struct kobj_type gfs2_ktype = {
@@ -679,7 +679,6 @@ fail_lock_module:
fail_tune:
sysfs_remove_group(&sdp->sd_kobj, &tune_group);
fail_reg:
- free_percpu(sdp->sd_lkstats);
fs_err(sdp, "error %d adding sysfs files\n", error);
kobject_put(&sdp->sd_kobj);
sb->s_fs_info = NULL;
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 6f67ef7aa412..35e3059255fe 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -77,10 +77,10 @@ static void gfs2_print_trans(struct gfs2_sbd *sdp, const struct gfs2_trans *tr)
fs_warn(sdp, "blocks=%u revokes=%u reserved=%u touched=%u\n",
tr->tr_blocks, tr->tr_revokes, tr->tr_reserved,
test_bit(TR_TOUCHED, &tr->tr_flags));
- fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u/%u\n",
+ fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u\n",
tr->tr_num_buf_new, tr->tr_num_buf_rm,
tr->tr_num_databuf_new, tr->tr_num_databuf_rm,
- tr->tr_num_revoke, tr->tr_num_revoke_rm);
+ tr->tr_num_revoke);
}
void gfs2_trans_end(struct gfs2_sbd *sdp)
@@ -263,7 +263,7 @@ void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
sdp->sd_log_num_revoke--;
kmem_cache_free(gfs2_bufdata_cachep, bd);
- tr->tr_num_revoke_rm++;
+ tr->tr_num_revoke--;
if (--n == 0)
break;
}
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index a7e55234211f..83f6c582773a 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -41,7 +41,7 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...)
struct va_format vaf;
if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
- test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+ test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags))
return 0;
if (fmt) {
@@ -178,9 +178,11 @@ int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
const char *function, char *file, unsigned int line)
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
+ char fs_id_buf[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2];
int rv;
- gfs2_rgrp_dump(NULL, rgd->rd_gl);
+ sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
+ gfs2_rgrp_dump(NULL, rgd->rd_gl, fs_id_buf);
rv = gfs2_lm_withdraw(sdp,
"fatal: filesystem consistency error\n"
" RG = %llu\n"
@@ -256,7 +258,7 @@ void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
const char *function, char *file, unsigned int line,
bool withdraw)
{
- if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+ if (!test_bit(SDF_WITHDRAWN, &sdp->sd_flags))
fs_err(sdp,
"fatal: I/O error\n"
" block = %llu\n"
diff --git a/fs/inode.c b/fs/inode.c
index 2bf21e2c90fc..5f5431ec3d62 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1899,6 +1899,26 @@ int file_update_time(struct file *file)
}
EXPORT_SYMBOL(file_update_time);
+/* Caller must hold the file's inode lock */
+int file_modified(struct file *file)
+{
+ int err;
+
+ /*
+ * Clear the security bits if the process is not being run by root.
+ * This keeps people from modifying setuid and setgid binaries.
+ */
+ err = file_remove_privs(file);
+ if (err)
+ return err;
+
+ if (unlikely(file->f_mode & FMODE_NOCMTIME))
+ return 0;
+
+ return file_update_time(file);
+}
+EXPORT_SYMBOL(file_modified);
+
int inode_needs_sync(struct inode *inode)
{
if (IS_SYNC(inode))
diff --git a/fs/internal.h b/fs/internal.h
index a48ef81be37d..2f3c3de51fad 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -40,8 +40,6 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait)
extern void guard_bio_eod(int rw, struct bio *bio);
extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block, struct iomap *iomap);
-void __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
- struct page *page);
/*
* char_dev.c
diff --git a/fs/iomap.c b/fs/iomap.c
index 7a147aa0c4d9..217c3e5a13d6 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -777,6 +777,7 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
unsigned copied, struct page *page, struct iomap *iomap)
{
const struct iomap_page_ops *page_ops = iomap->page_ops;
+ loff_t old_size = inode->i_size;
int ret;
if (iomap->type == IOMAP_INLINE) {
@@ -788,9 +789,21 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
ret = __iomap_write_end(inode, pos, len, copied, page, iomap);
}
- __generic_write_end(inode, pos, ret, page);
+ /*
+ * Update the in-memory inode size after copying the data into the page
+ * cache. It's up to the file system to write the updated size to disk,
+ * preferably after I/O completion so that no stale data is exposed.
+ */
+ if (pos + ret > old_size) {
+ i_size_write(inode, pos + ret);
+ iomap->flags |= IOMAP_F_SIZE_CHANGED;
+ }
+ unlock_page(page);
+
+ if (old_size < pos)
+ pagecache_isize_extended(inode, old_size, pos);
if (page_ops && page_ops->page_done)
- page_ops->page_done(inode, pos, copied, page, iomap);
+ page_ops->page_done(inode, pos, ret, page, iomap);
put_page(page);
if (ret < len)
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index efd0ce9489ae..132fb92098c7 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -184,17 +184,18 @@ static int journal_wait_on_commit_record(journal_t *journal,
/*
* write the filemap data using writepage() address_space_operations.
* We don't do block allocation here even for delalloc. We don't
- * use writepages() because with dealyed allocation we may be doing
+ * use writepages() because with delayed allocation we may be doing
* block allocation in writepages().
*/
-static int journal_submit_inode_data_buffers(struct address_space *mapping)
+static int journal_submit_inode_data_buffers(struct address_space *mapping,
+ loff_t dirty_start, loff_t dirty_end)
{
int ret;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = mapping->nrpages * 2,
- .range_start = 0,
- .range_end = i_size_read(mapping->host),
+ .range_start = dirty_start,
+ .range_end = dirty_end,
};
ret = generic_writepages(mapping, &wbc);
@@ -218,6 +219,9 @@ static int journal_submit_data_buffers(journal_t *journal,
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
+ loff_t dirty_start = jinode->i_dirty_start;
+ loff_t dirty_end = jinode->i_dirty_end;
+
if (!(jinode->i_flags & JI_WRITE_DATA))
continue;
mapping = jinode->i_vfs_inode->i_mapping;
@@ -230,7 +234,8 @@ static int journal_submit_data_buffers(journal_t *journal,
* only allocated blocks here.
*/
trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
- err = journal_submit_inode_data_buffers(mapping);
+ err = journal_submit_inode_data_buffers(mapping, dirty_start,
+ dirty_end);
if (!ret)
ret = err;
spin_lock(&journal->j_list_lock);
@@ -257,12 +262,16 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
/* For locking, see the comment in journal_submit_data_buffers() */
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
+ loff_t dirty_start = jinode->i_dirty_start;
+ loff_t dirty_end = jinode->i_dirty_end;
+
if (!(jinode->i_flags & JI_WAIT_DATA))
continue;
jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock);
- err = filemap_fdatawait_keep_errors(
- jinode->i_vfs_inode->i_mapping);
+ err = filemap_fdatawait_range_keep_errors(
+ jinode->i_vfs_inode->i_mapping, dirty_start,
+ dirty_end);
if (!ret)
ret = err;
spin_lock(&journal->j_list_lock);
@@ -282,6 +291,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
&jinode->i_transaction->t_inode_list);
} else {
jinode->i_transaction = NULL;
+ jinode->i_dirty_start = 0;
+ jinode->i_dirty_end = 0;
}
}
spin_unlock(&journal->j_list_lock);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 43df0c943229..953990eb70a9 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -66,9 +66,6 @@ EXPORT_SYMBOL(jbd2_journal_get_undo_access);
EXPORT_SYMBOL(jbd2_journal_set_triggers);
EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
EXPORT_SYMBOL(jbd2_journal_forget);
-#if 0
-EXPORT_SYMBOL(journal_sync_buffer);
-#endif
EXPORT_SYMBOL(jbd2_journal_flush);
EXPORT_SYMBOL(jbd2_journal_revoke);
@@ -94,6 +91,8 @@ EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
EXPORT_SYMBOL(jbd2_journal_force_commit);
EXPORT_SYMBOL(jbd2_journal_inode_add_write);
EXPORT_SYMBOL(jbd2_journal_inode_add_wait);
+EXPORT_SYMBOL(jbd2_journal_inode_ranged_write);
+EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait);
EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
@@ -203,7 +202,7 @@ loop:
if (journal->j_flags & JBD2_UNMOUNT)
goto end_loop;
- jbd_debug(1, "commit_sequence=%d, commit_request=%d\n",
+ jbd_debug(1, "commit_sequence=%u, commit_request=%u\n",
journal->j_commit_sequence, journal->j_commit_request);
if (journal->j_commit_sequence != journal->j_commit_request) {
@@ -324,7 +323,7 @@ static void journal_kill_thread(journal_t *journal)
* IO is in progress. do_get_write_access() handles this.
*
* The function returns a pointer to the buffer_head to be used for IO.
- *
+ *
*
* Return value:
* <0: Error
@@ -500,7 +499,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
*/
journal->j_commit_request = target;
- jbd_debug(1, "JBD2: requesting commit %d/%d\n",
+ jbd_debug(1, "JBD2: requesting commit %u/%u\n",
journal->j_commit_request,
journal->j_commit_sequence);
journal->j_running_transaction->t_requested = jiffies;
@@ -513,7 +512,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n",
journal->j_commit_request,
journal->j_commit_sequence,
- target, journal->j_running_transaction ?
+ target, journal->j_running_transaction ?
journal->j_running_transaction->t_tid : 0);
return 0;
}
@@ -698,12 +697,12 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
#ifdef CONFIG_JBD2_DEBUG
if (!tid_geq(journal->j_commit_request, tid)) {
printk(KERN_ERR
- "%s: error: j_commit_request=%d, tid=%d\n",
+ "%s: error: j_commit_request=%u, tid=%u\n",
__func__, journal->j_commit_request, tid);
}
#endif
while (tid_gt(tid, journal->j_commit_sequence)) {
- jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n",
+ jbd_debug(1, "JBD2: want %u, j_commit_sequence=%u\n",
tid, journal->j_commit_sequence);
read_unlock(&journal->j_state_lock);
wake_up(&journal->j_wait_commit);
@@ -944,7 +943,7 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
trace_jbd2_update_log_tail(journal, tid, block, freed);
jbd_debug(1,
- "Cleaning journal tail from %d to %d (offset %lu), "
+ "Cleaning journal tail from %u to %u (offset %lu), "
"freeing %lu\n",
journal->j_tail_sequence, tid, block, freed);
@@ -1318,7 +1317,7 @@ static int journal_reset(journal_t *journal)
*/
if (sb->s_start == 0) {
jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
- "(start %ld, seq %d, errno %d)\n",
+ "(start %ld, seq %u, errno %d)\n",
journal->j_tail, journal->j_tail_sequence,
journal->j_errno);
journal->j_flags |= JBD2_FLUSHED;
@@ -1453,7 +1452,7 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
return;
}
- jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
+ jbd_debug(1, "JBD2: Marking journal as empty (seq %u)\n",
journal->j_tail_sequence);
sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
@@ -2574,6 +2573,8 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
jinode->i_next_transaction = NULL;
jinode->i_vfs_inode = inode;
jinode->i_flags = 0;
+ jinode->i_dirty_start = 0;
+ jinode->i_dirty_end = 0;
INIT_LIST_HEAD(&jinode->i_list);
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 8ca4fddc705f..990e7b5062e7 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2565,7 +2565,7 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
* File inode in the inode list of the handle's transaction
*/
static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
- unsigned long flags)
+ unsigned long flags, loff_t start_byte, loff_t end_byte)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal;
@@ -2577,26 +2577,17 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
transaction->t_tid);
- /*
- * First check whether inode isn't already on the transaction's
- * lists without taking the lock. Note that this check is safe
- * without the lock as we cannot race with somebody removing inode
- * from the transaction. The reason is that we remove inode from the
- * transaction only in journal_release_jbd_inode() and when we commit
- * the transaction. We are guarded from the first case by holding
- * a reference to the inode. We are safe against the second case
- * because if jinode->i_transaction == transaction, commit code
- * cannot touch the transaction because we hold reference to it,
- * and if jinode->i_next_transaction == transaction, commit code
- * will only file the inode where we want it.
- */
- if ((jinode->i_transaction == transaction ||
- jinode->i_next_transaction == transaction) &&
- (jinode->i_flags & flags) == flags)
- return 0;
-
spin_lock(&journal->j_list_lock);
jinode->i_flags |= flags;
+
+ if (jinode->i_dirty_end) {
+ jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte);
+ jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte);
+ } else {
+ jinode->i_dirty_start = start_byte;
+ jinode->i_dirty_end = end_byte;
+ }
+
/* Is inode already attached where we need it? */
if (jinode->i_transaction == transaction ||
jinode->i_next_transaction == transaction)
@@ -2631,12 +2622,28 @@ done:
int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode)
{
return jbd2_journal_file_inode(handle, jinode,
- JI_WRITE_DATA | JI_WAIT_DATA);
+ JI_WRITE_DATA | JI_WAIT_DATA, 0, LLONG_MAX);
}
int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode)
{
- return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA);
+ return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0,
+ LLONG_MAX);
+}
+
+int jbd2_journal_inode_ranged_write(handle_t *handle,
+ struct jbd2_inode *jinode, loff_t start_byte, loff_t length)
+{
+ return jbd2_journal_file_inode(handle, jinode,
+ JI_WRITE_DATA | JI_WAIT_DATA, start_byte,
+ start_byte + length - 1);
+}
+
+int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *jinode,
+ loff_t start_byte, loff_t length)
+{
+ return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA,
+ start_byte, start_byte + length - 1);
}
/*
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 62f98225abb3..b11f2afa84f1 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -47,13 +47,14 @@ void nlmclnt_next_cookie(struct nlm_cookie *c)
c->len=4;
}
-static struct nlm_lockowner *nlm_get_lockowner(struct nlm_lockowner *lockowner)
+static struct nlm_lockowner *
+nlmclnt_get_lockowner(struct nlm_lockowner *lockowner)
{
refcount_inc(&lockowner->count);
return lockowner;
}
-static void nlm_put_lockowner(struct nlm_lockowner *lockowner)
+static void nlmclnt_put_lockowner(struct nlm_lockowner *lockowner)
{
if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock))
return;
@@ -82,28 +83,28 @@ static inline uint32_t __nlm_alloc_pid(struct nlm_host *host)
return res;
}
-static struct nlm_lockowner *__nlm_find_lockowner(struct nlm_host *host, fl_owner_t owner)
+static struct nlm_lockowner *__nlmclnt_find_lockowner(struct nlm_host *host, fl_owner_t owner)
{
struct nlm_lockowner *lockowner;
list_for_each_entry(lockowner, &host->h_lockowners, list) {
if (lockowner->owner != owner)
continue;
- return nlm_get_lockowner(lockowner);
+ return nlmclnt_get_lockowner(lockowner);
}
return NULL;
}
-static struct nlm_lockowner *nlm_find_lockowner(struct nlm_host *host, fl_owner_t owner)
+static struct nlm_lockowner *nlmclnt_find_lockowner(struct nlm_host *host, fl_owner_t owner)
{
struct nlm_lockowner *res, *new = NULL;
spin_lock(&host->h_lock);
- res = __nlm_find_lockowner(host, owner);
+ res = __nlmclnt_find_lockowner(host, owner);
if (res == NULL) {
spin_unlock(&host->h_lock);
new = kmalloc(sizeof(*new), GFP_KERNEL);
spin_lock(&host->h_lock);
- res = __nlm_find_lockowner(host, owner);
+ res = __nlmclnt_find_lockowner(host, owner);
if (res == NULL && new != NULL) {
res = new;
refcount_set(&new->count, 1);
@@ -457,7 +458,7 @@ static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
{
spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock);
new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state;
- new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner);
+ new->fl_u.nfs_fl.owner = nlmclnt_get_lockowner(fl->fl_u.nfs_fl.owner);
list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted);
spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock);
}
@@ -467,7 +468,7 @@ static void nlmclnt_locks_release_private(struct file_lock *fl)
spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock);
list_del(&fl->fl_u.nfs_fl.list);
spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock);
- nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
+ nlmclnt_put_lockowner(fl->fl_u.nfs_fl.owner);
}
static const struct file_lock_operations nlmclnt_lock_ops = {
@@ -478,7 +479,7 @@ static const struct file_lock_operations nlmclnt_lock_ops = {
static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *host)
{
fl->fl_u.nfs_fl.state = 0;
- fl->fl_u.nfs_fl.owner = nlm_find_lockowner(host, fl->fl_owner);
+ fl->fl_u.nfs_fl.owner = nlmclnt_find_lockowner(host, fl->fl_owner);
INIT_LIST_HEAD(&fl->fl_u.nfs_fl.list);
fl->fl_ops = &nlmclnt_lock_ops;
}
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 1bddf70d9656..e4d3f783e06a 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -46,8 +46,14 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Set up the missing parts of the file_lock structure */
lock->fl.fl_file = file->f_file;
- lock->fl.fl_owner = (fl_owner_t) host;
+ lock->fl.fl_pid = current->tgid;
lock->fl.fl_lmops = &nlmsvc_lock_operations;
+ nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
+ if (!lock->fl.fl_owner) {
+ /* lockowner allocation has failed */
+ nlmsvc_release_host(host);
+ return nlm_lck_denied_nolocks;
+ }
}
return 0;
@@ -94,6 +100,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
else
dprintk("lockd: TEST4 status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
@@ -142,6 +149,7 @@ __nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
else
dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
@@ -178,6 +186,7 @@ __nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
resp->status = nlmsvc_cancel_blocked(SVC_NET(rqstp), file, &argp->lock);
dprintk("lockd: CANCEL status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
@@ -217,6 +226,7 @@ __nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
resp->status = nlmsvc_unlock(SVC_NET(rqstp), file, &argp->lock);
dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
@@ -365,6 +375,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp)
resp->status = nlmsvc_share_file(host, file, argp);
dprintk("lockd: SHARE status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
@@ -399,6 +410,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp)
resp->status = nlmsvc_unshare_file(host, file, argp);
dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index ea719cdd6a36..61d3cc2283dc 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -332,6 +332,93 @@ restart:
mutex_unlock(&file->f_mutex);
}
+static struct nlm_lockowner *
+nlmsvc_get_lockowner(struct nlm_lockowner *lockowner)
+{
+ refcount_inc(&lockowner->count);
+ return lockowner;
+}
+
+static void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner)
+{
+ if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock))
+ return;
+ list_del(&lockowner->list);
+ spin_unlock(&lockowner->host->h_lock);
+ nlmsvc_release_host(lockowner->host);
+ kfree(lockowner);
+}
+
+static struct nlm_lockowner *__nlmsvc_find_lockowner(struct nlm_host *host, pid_t pid)
+{
+ struct nlm_lockowner *lockowner;
+ list_for_each_entry(lockowner, &host->h_lockowners, list) {
+ if (lockowner->pid != pid)
+ continue;
+ return nlmsvc_get_lockowner(lockowner);
+ }
+ return NULL;
+}
+
+static struct nlm_lockowner *nlmsvc_find_lockowner(struct nlm_host *host, pid_t pid)
+{
+ struct nlm_lockowner *res, *new = NULL;
+
+ spin_lock(&host->h_lock);
+ res = __nlmsvc_find_lockowner(host, pid);
+
+ if (res == NULL) {
+ spin_unlock(&host->h_lock);
+ new = kmalloc(sizeof(*res), GFP_KERNEL);
+ spin_lock(&host->h_lock);
+ res = __nlmsvc_find_lockowner(host, pid);
+ if (res == NULL && new != NULL) {
+ res = new;
+ /* fs/locks.c will manage the refcount through lock_ops */
+ refcount_set(&new->count, 1);
+ new->pid = pid;
+ new->host = nlm_get_host(host);
+ list_add(&new->list, &host->h_lockowners);
+ new = NULL;
+ }
+ }
+
+ spin_unlock(&host->h_lock);
+ kfree(new);
+ return res;
+}
+
+void
+nlmsvc_release_lockowner(struct nlm_lock *lock)
+{
+ if (lock->fl.fl_owner)
+ nlmsvc_put_lockowner(lock->fl.fl_owner);
+}
+
+static void nlmsvc_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+{
+ struct nlm_lockowner *nlm_lo = (struct nlm_lockowner *)fl->fl_owner;
+ new->fl_owner = nlmsvc_get_lockowner(nlm_lo);
+}
+
+static void nlmsvc_locks_release_private(struct file_lock *fl)
+{
+ nlmsvc_put_lockowner((struct nlm_lockowner *)fl->fl_owner);
+}
+
+static const struct file_lock_operations nlmsvc_lock_ops = {
+ .fl_copy_lock = nlmsvc_locks_copy_lock,
+ .fl_release_private = nlmsvc_locks_release_private,
+};
+
+void nlmsvc_locks_init_private(struct file_lock *fl, struct nlm_host *host,
+ pid_t pid)
+{
+ fl->fl_owner = nlmsvc_find_lockowner(host, pid);
+ if (fl->fl_owner != NULL)
+ fl->fl_ops = &nlmsvc_lock_ops;
+}
+
/*
* Initialize arguments for GRANTED call. The nlm_rqst structure
* has been cleared already.
@@ -345,7 +432,7 @@ static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
/* set default data area */
call->a_args.lock.oh.data = call->a_owner;
- call->a_args.lock.svid = lock->fl.fl_pid;
+ call->a_args.lock.svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid;
if (lock->oh.len > NLMCLNT_OHSIZE) {
void *data = kmalloc(lock->oh.len, GFP_KERNEL);
@@ -509,6 +596,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
{
int error;
__be32 ret;
+ struct nlm_lockowner *test_owner;
dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
locks_inode(file->f_file)->i_sb->s_id,
@@ -522,6 +610,9 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
goto out;
}
+ /* If there's a conflicting lock, remember to clean up the test lock */
+ test_owner = (struct nlm_lockowner *)lock->fl.fl_owner;
+
error = vfs_test_lock(file->f_file, &lock->fl);
if (error) {
/* We can't currently deal with deferred test requests */
@@ -543,11 +634,16 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
conflock->caller = "somehost"; /* FIXME */
conflock->len = strlen(conflock->caller);
conflock->oh.len = 0; /* don't return OH info */
- conflock->svid = lock->fl.fl_pid;
+ conflock->svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid;
conflock->fl.fl_type = lock->fl.fl_type;
conflock->fl.fl_start = lock->fl.fl_start;
conflock->fl.fl_end = lock->fl.fl_end;
locks_release_private(&lock->fl);
+
+ /* Clean up the test lock */
+ lock->fl.fl_owner = NULL;
+ nlmsvc_put_lockowner(test_owner);
+
ret = nlm_lck_denied;
out:
return ret;
@@ -692,25 +788,7 @@ nlmsvc_notify_blocked(struct file_lock *fl)
printk(KERN_WARNING "lockd: notification for unknown block!\n");
}
-static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2)
-{
- return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid;
-}
-
-/*
- * Since NLM uses two "keys" for tracking locks, we need to hash them down
- * to one for the blocked_hash. Here, we're just xor'ing the host address
- * with the pid in order to create a key value for picking a hash bucket.
- */
-static unsigned long
-nlmsvc_owner_key(struct file_lock *fl)
-{
- return (unsigned long)fl->fl_owner ^ (unsigned long)fl->fl_pid;
-}
-
const struct lock_manager_operations nlmsvc_lock_operations = {
- .lm_compare_owner = nlmsvc_same_owner,
- .lm_owner_key = nlmsvc_owner_key,
.lm_notify = nlmsvc_notify_blocked,
.lm_grant = nlmsvc_grant_deferred,
};
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index ea77c66d3cc3..d0bb7a6bf005 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -76,8 +76,14 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Set up the missing parts of the file_lock structure */
lock->fl.fl_file = file->f_file;
- lock->fl.fl_owner = (fl_owner_t) host;
+ lock->fl.fl_pid = current->tgid;
lock->fl.fl_lmops = &nlmsvc_lock_operations;
+ nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
+ if (!lock->fl.fl_owner) {
+ /* lockowner allocation has failed */
+ nlmsvc_release_host(host);
+ return nlm_lck_denied_nolocks;
+ }
}
return 0;
@@ -125,6 +131,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
dprintk("lockd: TEST status %d vers %d\n",
ntohl(resp->status), rqstp->rq_vers);
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
@@ -173,6 +180,7 @@ __nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
else
dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
@@ -210,6 +218,7 @@ __nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
resp->status = cast_status(nlmsvc_cancel_blocked(net, file, &argp->lock));
dprintk("lockd: CANCEL status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
@@ -250,6 +259,7 @@ __nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
resp->status = cast_status(nlmsvc_unlock(net, file, &argp->lock));
dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
@@ -408,6 +418,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp)
resp->status = cast_status(nlmsvc_share_file(host, file, argp));
dprintk("lockd: SHARE status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
@@ -442,6 +453,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp)
resp->status = cast_status(nlmsvc_unshare_file(host, file, argp));
dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 0e610f422406..028fc152da22 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -180,7 +180,7 @@ again:
/* update current lock count */
file->f_locks++;
- lockhost = (struct nlm_host *) fl->fl_owner;
+ lockhost = ((struct nlm_lockowner *)fl->fl_owner)->host;
if (match(lockhost, host)) {
struct file_lock lock = *fl;
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 7147e4aebecc..982629f7b120 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -126,8 +126,6 @@ nlm_decode_lock(__be32 *p, struct nlm_lock *lock)
lock->svid = ntohl(*p++);
locks_init_lock(fl);
- fl->fl_owner = current->files;
- fl->fl_pid = (pid_t)lock->svid;
fl->fl_flags = FL_POSIX;
fl->fl_type = F_RDLCK; /* as good as anything else */
start = ntohl(*p++);
@@ -269,7 +267,6 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
memset(lock, 0, sizeof(*lock));
locks_init_lock(&lock->fl);
lock->svid = ~(u32) 0;
- lock->fl.fl_pid = (pid_t)lock->svid;
if (!(p = nlm_decode_cookie(p, &argp->cookie))
|| !(p = xdr_decode_string_inplace(p, &lock->caller,
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 7ed9edf9aed4..5fa9f48a9dba 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -118,8 +118,6 @@ nlm4_decode_lock(__be32 *p, struct nlm_lock *lock)
lock->svid = ntohl(*p++);
locks_init_lock(fl);
- fl->fl_owner = current->files;
- fl->fl_pid = (pid_t)lock->svid;
fl->fl_flags = FL_POSIX;
fl->fl_type = F_RDLCK; /* as good as anything else */
p = xdr_decode_hyper(p, &start);
@@ -266,7 +264,6 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
memset(lock, 0, sizeof(*lock));
locks_init_lock(&lock->fl);
lock->svid = ~(u32) 0;
- lock->fl.fl_pid = (pid_t)lock->svid;
if (!(p = nlm4_decode_cookie(p, &argp->cookie))
|| !(p = xdr_decode_string_inplace(p, &lock->caller,
diff --git a/fs/locks.c b/fs/locks.c
index ec1e4a5df629..686eae21daf6 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -658,9 +658,6 @@ static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
*/
static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
{
- if (fl1->fl_lmops && fl1->fl_lmops->lm_compare_owner)
- return fl2->fl_lmops == fl1->fl_lmops &&
- fl1->fl_lmops->lm_compare_owner(fl1, fl2);
return fl1->fl_owner == fl2->fl_owner;
}
@@ -701,8 +698,6 @@ static void locks_delete_global_locks(struct file_lock *fl)
static unsigned long
posix_owner_key(struct file_lock *fl)
{
- if (fl->fl_lmops && fl->fl_lmops->lm_owner_key)
- return fl->fl_lmops->lm_owner_key(fl);
return (unsigned long)fl->fl_owner;
}
@@ -1534,11 +1529,21 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose)
static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
{
- if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT))
- return false;
- if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE))
- return false;
- return locks_conflict(breaker, lease);
+ bool rc;
+
+ if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) {
+ rc = false;
+ goto trace;
+ }
+ if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) {
+ rc = false;
+ goto trace;
+ }
+
+ rc = locks_conflict(breaker, lease);
+trace:
+ trace_leases_conflict(rc, lease, breaker);
+ return rc;
}
static bool
@@ -1753,10 +1758,10 @@ int fcntl_getlease(struct file *filp)
}
/**
- * check_conflicting_open - see if the given dentry points to a file that has
+ * check_conflicting_open - see if the given file points to an inode that has
* an existing open that would conflict with the
* desired lease.
- * @dentry: dentry to check
+ * @filp: file to check
* @arg: type of lease that we're trying to acquire
* @flags: current lock flags
*
@@ -1764,30 +1769,42 @@ int fcntl_getlease(struct file *filp)
* conflict with the lease we're trying to set.
*/
static int
-check_conflicting_open(const struct dentry *dentry, const long arg, int flags)
+check_conflicting_open(struct file *filp, const long arg, int flags)
{
- int ret = 0;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = locks_inode(filp);
+ int self_wcount = 0, self_rcount = 0;
if (flags & FL_LAYOUT)
return 0;
- if ((arg == F_RDLCK) && inode_is_open_for_write(inode))
- return -EAGAIN;
+ if (arg == F_RDLCK)
+ return inode_is_open_for_write(inode) ? -EAGAIN : 0;
+ else if (arg != F_WRLCK)
+ return 0;
- if ((arg == F_WRLCK) && ((d_count(dentry) > 1) ||
- (atomic_read(&inode->i_count) > 1)))
- ret = -EAGAIN;
+ /*
+ * Make sure that only read/write count is from lease requestor.
+ * Note that this will result in denying write leases when i_writecount
+ * is negative, which is what we want. (We shouldn't grant write leases
+ * on files open for execution.)
+ */
+ if (filp->f_mode & FMODE_WRITE)
+ self_wcount = 1;
+ else if (filp->f_mode & FMODE_READ)
+ self_rcount = 1;
- return ret;
+ if (atomic_read(&inode->i_writecount) != self_wcount ||
+ atomic_read(&inode->i_readcount) != self_rcount)
+ return -EAGAIN;
+
+ return 0;
}
static int
generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv)
{
struct file_lock *fl, *my_fl = NULL, *lease;
- struct dentry *dentry = filp->f_path.dentry;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = locks_inode(filp);
struct file_lock_context *ctx;
bool is_deleg = (*flp)->fl_flags & FL_DELEG;
int error;
@@ -1822,7 +1839,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
time_out_leases(inode, &dispose);
- error = check_conflicting_open(dentry, arg, lease->fl_flags);
+ error = check_conflicting_open(filp, arg, lease->fl_flags);
if (error)
goto out;
@@ -1879,7 +1896,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
* precedes these checks.
*/
smp_mb();
- error = check_conflicting_open(dentry, arg, lease->fl_flags);
+ error = check_conflicting_open(filp, arg, lease->fl_flags);
if (error) {
locks_unlink_lock_ctx(lease);
goto out;
diff --git a/fs/namei.c b/fs/namei.c
index 20831c2fbb34..209c51a5226c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3883,6 +3883,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
dentry->d_inode->i_flags |= S_DEAD;
dont_mount(dentry);
detach_mounts(dentry);
+ fsnotify_rmdir(dir, dentry);
out:
inode_unlock(dentry->d_inode);
@@ -3999,6 +4000,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
if (!error) {
dont_mount(dentry);
detach_mounts(dentry);
+ fsnotify_unlink(dir, dentry);
}
}
}
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index cf42a8b939e3..f4157eb1f69d 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -129,10 +129,13 @@ nfs4_file_flush(struct file *file, fl_owner_t id)
}
#ifdef CONFIG_NFS_V4_2
-static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- size_t count, unsigned int flags)
+static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t count, unsigned int flags)
{
+ /* Only offload copy if superblock is the same */
+ if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
+ return -EXDEV;
if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY))
return -EOPNOTSUPP;
if (file_inode(file_in) == file_inode(file_out))
@@ -140,6 +143,20 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
}
+static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t count, unsigned int flags)
+{
+ ssize_t ret;
+
+ ret = __nfs4_copy_file_range(file_in, pos_in, file_out, pos_out, count,
+ flags);
+ if (ret == -EOPNOTSUPP || ret == -EXDEV)
+ ret = generic_copy_file_range(file_in, pos_in, file_out,
+ pos_out, count, flags);
+ return ret;
+}
+
static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
{
loff_t ret;
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 69679f4f2e6c..1e7296395d71 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -72,25 +72,6 @@ struct idmap {
const struct cred *cred;
};
-static struct key_acl nfs_idmap_key_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .possessor_viewable = true,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_VIEW | KEY_ACE_SEARCH | KEY_ACE_READ),
- KEY_OWNER_ACE(KEY_ACE_VIEW),
- }
-};
-
-static struct key_acl nfs_idmap_keyring_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_WRITE),
- KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ),
- }
-};
-
static struct user_namespace *idmap_userns(const struct idmap *idmap)
{
if (idmap && idmap->cred)
@@ -227,7 +208,8 @@ int nfs_idmap_init(void)
keyring = keyring_alloc(".id_resolver",
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
- &nfs_idmap_keyring_acl,
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ,
KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
if (IS_ERR(keyring)) {
ret = PTR_ERR(keyring);
@@ -305,13 +287,11 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
return ERR_PTR(ret);
if (!idmap->cred || idmap->cred->user_ns == &init_user_ns)
- rkey = request_key(&key_type_id_resolver, desc, "",
- &nfs_idmap_key_acl);
+ rkey = request_key(&key_type_id_resolver, desc, "");
if (IS_ERR(rkey)) {
mutex_lock(&idmap->idmap_mutex);
rkey = request_key_with_auxdata(&key_type_id_resolver_legacy,
- desc, NULL, "", 0, idmap,
- &nfs_idmap_key_acl);
+ desc, NULL, "", 0, idmap);
mutex_unlock(&idmap->idmap_mutex);
}
if (!IS_ERR(rkey))
@@ -340,6 +320,8 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
}
rcu_read_lock();
+ rkey->perm |= KEY_USR_VIEW;
+
ret = key_validate(rkey);
if (ret < 0)
goto out_up;
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 52d533967485..0effeee28352 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -396,12 +396,6 @@ nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data)
nfs_cancel_async_unlink(dentry);
return;
}
-
- /*
- * vfs_unlink and the like do not issue this when a file is
- * sillyrenamed, so do it here.
- */
- fsnotify_nameremove(dentry, 0);
}
#define SILLYNAME_PREFIX ".nfs"
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 4fb1f72a25fb..66d4c55eb48e 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -121,15 +121,13 @@ nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp,
{
loff_t new_size = lcp->lc_last_wr + 1;
struct iattr iattr = { .ia_valid = 0 };
- struct timespec ts;
int error;
- ts = timespec64_to_timespec(inode->i_mtime);
if (lcp->lc_mtime.tv_nsec == UTIME_NOW ||
- timespec_compare(&lcp->lc_mtime, &ts) < 0)
- lcp->lc_mtime = timespec64_to_timespec(current_time(inode));
+ timespec64_compare(&lcp->lc_mtime, &inode->i_mtime) < 0)
+ lcp->lc_mtime = current_time(inode);
iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME;
- iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = timespec_to_timespec64(lcp->lc_mtime);
+ iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime;
if (new_size > i_size_read(inode)) {
iattr.ia_valid |= ATTR_SIZE;
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index 4a98537efb0f..10ec5ecdf117 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -10,6 +10,7 @@
#define NFSCACHE_H
#include <linux/sunrpc/svc.h>
+#include "netns.h"
/*
* Representation of a reply cache entry.
@@ -77,8 +78,8 @@ enum {
/* Checksum this amount of the request */
#define RC_CSUMLEN (256U)
-int nfsd_reply_cache_init(void);
-void nfsd_reply_cache_shutdown(void);
+int nfsd_reply_cache_init(struct nfsd_net *);
+void nfsd_reply_cache_shutdown(struct nfsd_net *);
int nfsd_cache_lookup(struct svc_rqst *);
void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
int nfsd_reply_cache_stats_open(struct inode *, struct file *);
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 7c686a270d60..bdfe5bcb3dcd 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -42,6 +42,11 @@ struct nfsd_net {
bool grace_ended;
time_t boot_time;
+ /* internal mount of the "nfsd" pseudofilesystem: */
+ struct vfsmount *nfsd_mnt;
+
+ struct dentry *nfsd_client_dir;
+
/*
* reclaim_str_hashtbl[] holds known client info from previous reset/reboot
* used in reboot/reset lease grace period processing
@@ -106,6 +111,7 @@ struct nfsd_net {
*/
unsigned int max_connections;
+ u32 clientid_base;
u32 clientid_counter;
u32 clverifier_counter;
@@ -127,6 +133,44 @@ struct nfsd_net {
*/
bool *nfsd_versions;
bool *nfsd4_minorversions;
+
+ /*
+ * Duplicate reply cache
+ */
+ struct nfsd_drc_bucket *drc_hashtbl;
+ struct kmem_cache *drc_slab;
+
+ /* max number of entries allowed in the cache */
+ unsigned int max_drc_entries;
+
+ /* number of significant bits in the hash value */
+ unsigned int maskbits;
+ unsigned int drc_hashsize;
+
+ /*
+ * Stats and other tracking of on the duplicate reply cache.
+ * These fields and the "rc" fields in nfsdstats are modified
+ * with only the per-bucket cache lock, which isn't really safe
+ * and should be fixed if we want the statistics to be
+ * completely accurate.
+ */
+
+ /* total number of entries */
+ atomic_t num_drc_entries;
+
+ /* cache misses due only to checksum comparison failures */
+ unsigned int payload_misses;
+
+ /* amount of memory (in bytes) currently consumed by the DRC */
+ unsigned int drc_mem_usage;
+
+ /* longest hash chain seen */
+ unsigned int longest_chain;
+
+ /* size of cache when we saw the longest hash chain */
+ unsigned int longest_chain_cachesize;
+
+ struct shrinker nfsd_reply_cache_shrinker;
};
/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 2961016097ac..d1f285245af8 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -83,7 +83,7 @@ ent_init(struct cache_head *cnew, struct cache_head *citm)
new->type = itm->type;
strlcpy(new->name, itm->name, sizeof(new->name));
- strlcpy(new->authname, itm->authname, sizeof(new->name));
+ strlcpy(new->authname, itm->authname, sizeof(new->authname));
}
static void
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1a0cdeb3b875..7857942c5ca6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -42,6 +42,7 @@
#include <linux/sunrpc/svcauth_gss.h>
#include <linux/sunrpc/addr.h>
#include <linux/jhash.h>
+#include <linux/string_helpers.h>
#include "xdr4.h"
#include "xdr4cb.h"
#include "vfs.h"
@@ -99,6 +100,13 @@ enum nfsd4_st_mutex_lock_subclass {
*/
static DECLARE_WAIT_QUEUE_HEAD(close_wq);
+/*
+ * A waitqueue where a writer to clients/#/ctl destroying a client can
+ * wait for cl_rpc_users to drop to 0 and then for the client to be
+ * unhashed.
+ */
+static DECLARE_WAIT_QUEUE_HEAD(expiry_wq);
+
static struct kmem_cache *client_slab;
static struct kmem_cache *openowner_slab;
static struct kmem_cache *lockowner_slab;
@@ -138,7 +146,7 @@ static __be32 get_client_locked(struct nfs4_client *clp)
if (is_client_expired(clp))
return nfserr_expired;
- atomic_inc(&clp->cl_refcount);
+ atomic_inc(&clp->cl_rpc_users);
return nfs_ok;
}
@@ -170,20 +178,24 @@ static void put_client_renew_locked(struct nfs4_client *clp)
lockdep_assert_held(&nn->client_lock);
- if (!atomic_dec_and_test(&clp->cl_refcount))
+ if (!atomic_dec_and_test(&clp->cl_rpc_users))
return;
if (!is_client_expired(clp))
renew_client_locked(clp);
+ else
+ wake_up_all(&expiry_wq);
}
static void put_client_renew(struct nfs4_client *clp)
{
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
- if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock))
+ if (!atomic_dec_and_lock(&clp->cl_rpc_users, &nn->client_lock))
return;
if (!is_client_expired(clp))
renew_client_locked(clp);
+ else
+ wake_up_all(&expiry_wq);
spin_unlock(&nn->client_lock);
}
@@ -694,7 +706,8 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
idr_preload(GFP_KERNEL);
spin_lock(&cl->cl_lock);
- new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 0, 0, GFP_NOWAIT);
+ /* Reserving 0 for start of file in nfsdfs "states" file: */
+ new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 1, 0, GFP_NOWAIT);
spin_unlock(&cl->cl_lock);
idr_preload_end();
if (new_id < 0)
@@ -1844,7 +1857,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
clp = kmem_cache_zalloc(client_slab, GFP_KERNEL);
if (clp == NULL)
return NULL;
- clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);
+ xdr_netobj_dup(&clp->cl_name, &name, GFP_KERNEL);
if (clp->cl_name.data == NULL)
goto err_no_name;
clp->cl_ownerstr_hashtbl = kmalloc_array(OWNER_HASH_SIZE,
@@ -1854,10 +1867,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
goto err_no_hashtbl;
for (i = 0; i < OWNER_HASH_SIZE; i++)
INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]);
- clp->cl_name.len = name.len;
INIT_LIST_HEAD(&clp->cl_sessions);
idr_init(&clp->cl_stateids);
- atomic_set(&clp->cl_refcount, 0);
+ atomic_set(&clp->cl_rpc_users, 0);
clp->cl_cb_state = NFSD4_CB_UNKNOWN;
INIT_LIST_HEAD(&clp->cl_idhash);
INIT_LIST_HEAD(&clp->cl_openowners);
@@ -1879,6 +1891,25 @@ err_no_name:
return NULL;
}
+static void __free_client(struct kref *k)
+{
+ struct nfsdfs_client *c = container_of(k, struct nfsdfs_client, cl_ref);
+ struct nfs4_client *clp = container_of(c, struct nfs4_client, cl_nfsdfs);
+
+ free_svc_cred(&clp->cl_cred);
+ kfree(clp->cl_ownerstr_hashtbl);
+ kfree(clp->cl_name.data);
+ kfree(clp->cl_nii_domain.data);
+ kfree(clp->cl_nii_name.data);
+ idr_destroy(&clp->cl_stateids);
+ kmem_cache_free(client_slab, clp);
+}
+
+static void drop_client(struct nfs4_client *clp)
+{
+ kref_put(&clp->cl_nfsdfs.cl_ref, __free_client);
+}
+
static void
free_client(struct nfs4_client *clp)
{
@@ -1891,11 +1922,12 @@ free_client(struct nfs4_client *clp)
free_session(ses);
}
rpc_destroy_wait_queue(&clp->cl_cb_waitq);
- free_svc_cred(&clp->cl_cred);
- kfree(clp->cl_ownerstr_hashtbl);
- kfree(clp->cl_name.data);
- idr_destroy(&clp->cl_stateids);
- kmem_cache_free(client_slab, clp);
+ if (clp->cl_nfsd_dentry) {
+ nfsd_client_rmdir(clp->cl_nfsd_dentry);
+ clp->cl_nfsd_dentry = NULL;
+ wake_up_all(&expiry_wq);
+ }
+ drop_client(clp);
}
/* must be called under the client_lock */
@@ -1936,7 +1968,7 @@ unhash_client(struct nfs4_client *clp)
static __be32 mark_client_expired_locked(struct nfs4_client *clp)
{
- if (atomic_read(&clp->cl_refcount))
+ if (atomic_read(&clp->cl_rpc_users))
return nfserr_jukebox;
unhash_client_locked(clp);
return nfs_ok;
@@ -1989,6 +2021,7 @@ __destroy_client(struct nfs4_client *clp)
if (clp->cl_cb_conn.cb_xprt)
svc_xprt_put(clp->cl_cb_conn.cb_xprt);
free_client(clp);
+ wake_up_all(&expiry_wq);
}
static void
@@ -2199,6 +2232,342 @@ find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
return s;
}
+static struct nfs4_client *get_nfsdfs_clp(struct inode *inode)
+{
+ struct nfsdfs_client *nc;
+ nc = get_nfsdfs_client(inode);
+ if (!nc)
+ return NULL;
+ return container_of(nc, struct nfs4_client, cl_nfsdfs);
+}
+
+static void seq_quote_mem(struct seq_file *m, char *data, int len)
+{
+ seq_printf(m, "\"");
+ seq_escape_mem_ascii(m, data, len);
+ seq_printf(m, "\"");
+}
+
+static int client_info_show(struct seq_file *m, void *v)
+{
+ struct inode *inode = m->private;
+ struct nfs4_client *clp;
+ u64 clid;
+
+ clp = get_nfsdfs_clp(inode);
+ if (!clp)
+ return -ENXIO;
+ memcpy(&clid, &clp->cl_clientid, sizeof(clid));
+ seq_printf(m, "clientid: 0x%llx\n", clid);
+ seq_printf(m, "address: \"%pISpc\"\n", (struct sockaddr *)&clp->cl_addr);
+ seq_printf(m, "name: ");
+ seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len);
+ seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion);
+ if (clp->cl_nii_domain.data) {
+ seq_printf(m, "Implementation domain: ");
+ seq_quote_mem(m, clp->cl_nii_domain.data,
+ clp->cl_nii_domain.len);
+ seq_printf(m, "\nImplementation name: ");
+ seq_quote_mem(m, clp->cl_nii_name.data, clp->cl_nii_name.len);
+ seq_printf(m, "\nImplementation time: [%ld, %ld]\n",
+ clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec);
+ }
+ drop_client(clp);
+
+ return 0;
+}
+
+static int client_info_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, client_info_show, inode);
+}
+
+static const struct file_operations client_info_fops = {
+ .open = client_info_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static void *states_start(struct seq_file *s, loff_t *pos)
+ __acquires(&clp->cl_lock)
+{
+ struct nfs4_client *clp = s->private;
+ unsigned long id = *pos;
+ void *ret;
+
+ spin_lock(&clp->cl_lock);
+ ret = idr_get_next_ul(&clp->cl_stateids, &id);
+ *pos = id;
+ return ret;
+}
+
+static void *states_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct nfs4_client *clp = s->private;
+ unsigned long id = *pos;
+ void *ret;
+
+ id = *pos;
+ id++;
+ ret = idr_get_next_ul(&clp->cl_stateids, &id);
+ *pos = id;
+ return ret;
+}
+
+static void states_stop(struct seq_file *s, void *v)
+ __releases(&clp->cl_lock)
+{
+ struct nfs4_client *clp = s->private;
+
+ spin_unlock(&clp->cl_lock);
+}
+
+static void nfs4_show_superblock(struct seq_file *s, struct file *f)
+{
+ struct inode *inode = file_inode(f);
+
+ seq_printf(s, "superblock: \"%02x:%02x:%ld\"",
+ MAJOR(inode->i_sb->s_dev),
+ MINOR(inode->i_sb->s_dev),
+ inode->i_ino);
+}
+
+static void nfs4_show_owner(struct seq_file *s, struct nfs4_stateowner *oo)
+{
+ seq_printf(s, "owner: ");
+ seq_quote_mem(s, oo->so_owner.data, oo->so_owner.len);
+}
+
+static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
+{
+ struct nfs4_ol_stateid *ols;
+ struct nfs4_file *nf;
+ struct file *file;
+ struct nfs4_stateowner *oo;
+ unsigned int access, deny;
+
+ if (st->sc_type != NFS4_OPEN_STID && st->sc_type != NFS4_LOCK_STID)
+ return 0; /* XXX: or SEQ_SKIP? */
+ ols = openlockstateid(st);
+ oo = ols->st_stateowner;
+ nf = st->sc_file;
+ file = find_any_file(nf);
+
+ seq_printf(s, "- 0x%16phN: { type: open, ", &st->sc_stateid);
+
+ access = bmap_to_share_mode(ols->st_access_bmap);
+ deny = bmap_to_share_mode(ols->st_deny_bmap);
+
+ seq_printf(s, "access: \%s\%s, ",
+ access & NFS4_SHARE_ACCESS_READ ? "r" : "-",
+ access & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
+ seq_printf(s, "deny: \%s\%s, ",
+ deny & NFS4_SHARE_ACCESS_READ ? "r" : "-",
+ deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
+
+ nfs4_show_superblock(s, file);
+ seq_printf(s, ", ");
+ nfs4_show_owner(s, oo);
+ seq_printf(s, " }\n");
+ fput(file);
+
+ return 0;
+}
+
+static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
+{
+ struct nfs4_ol_stateid *ols;
+ struct nfs4_file *nf;
+ struct file *file;
+ struct nfs4_stateowner *oo;
+
+ ols = openlockstateid(st);
+ oo = ols->st_stateowner;
+ nf = st->sc_file;
+ file = find_any_file(nf);
+
+ seq_printf(s, "- 0x%16phN: { type: lock, ", &st->sc_stateid);
+
+ /*
+ * Note: a lock stateid isn't really the same thing as a lock,
+ * it's the locking state held by one owner on a file, and there
+ * may be multiple (or no) lock ranges associated with it.
+ * (Same for the matter is true of open stateids.)
+ */
+
+ nfs4_show_superblock(s, file);
+ /* XXX: open stateid? */
+ seq_printf(s, ", ");
+ nfs4_show_owner(s, oo);
+ seq_printf(s, " }\n");
+ fput(file);
+
+ return 0;
+}
+
+static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
+{
+ struct nfs4_delegation *ds;
+ struct nfs4_file *nf;
+ struct file *file;
+
+ ds = delegstateid(st);
+ nf = st->sc_file;
+ file = nf->fi_deleg_file;
+
+ seq_printf(s, "- 0x%16phN: { type: deleg, ", &st->sc_stateid);
+
+ /* Kinda dead code as long as we only support read delegs: */
+ seq_printf(s, "access: %s, ",
+ ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w");
+
+ /* XXX: lease time, whether it's being recalled. */
+
+ nfs4_show_superblock(s, file);
+ seq_printf(s, " }\n");
+
+ return 0;
+}
+
+static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st)
+{
+ struct nfs4_layout_stateid *ls;
+ struct file *file;
+
+ ls = container_of(st, struct nfs4_layout_stateid, ls_stid);
+ file = ls->ls_file;
+
+ seq_printf(s, "- 0x%16phN: { type: layout, ", &st->sc_stateid);
+
+ /* XXX: What else would be useful? */
+
+ nfs4_show_superblock(s, file);
+ seq_printf(s, " }\n");
+
+ return 0;
+}
+
+static int states_show(struct seq_file *s, void *v)
+{
+ struct nfs4_stid *st = v;
+
+ switch (st->sc_type) {
+ case NFS4_OPEN_STID:
+ return nfs4_show_open(s, st);
+ case NFS4_LOCK_STID:
+ return nfs4_show_lock(s, st);
+ case NFS4_DELEG_STID:
+ return nfs4_show_deleg(s, st);
+ case NFS4_LAYOUT_STID:
+ return nfs4_show_layout(s, st);
+ default:
+ return 0; /* XXX: or SEQ_SKIP? */
+ }
+ /* XXX: copy stateids? */
+}
+
+static struct seq_operations states_seq_ops = {
+ .start = states_start,
+ .next = states_next,
+ .stop = states_stop,
+ .show = states_show
+};
+
+static int client_states_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *s;
+ struct nfs4_client *clp;
+ int ret;
+
+ clp = get_nfsdfs_clp(inode);
+ if (!clp)
+ return -ENXIO;
+
+ ret = seq_open(file, &states_seq_ops);
+ if (ret)
+ return ret;
+ s = file->private_data;
+ s->private = clp;
+ return 0;
+}
+
+static int client_opens_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *m = file->private_data;
+ struct nfs4_client *clp = m->private;
+
+ /* XXX: alternatively, we could get/drop in seq start/stop */
+ drop_client(clp);
+ return 0;
+}
+
+static const struct file_operations client_states_fops = {
+ .open = client_states_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = client_opens_release,
+};
+
+/*
+ * Normally we refuse to destroy clients that are in use, but here the
+ * administrator is telling us to just do it. We also want to wait
+ * so the caller has a guarantee that the client's locks are gone by
+ * the time the write returns:
+ */
+static void force_expire_client(struct nfs4_client *clp)
+{
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ bool already_expired;
+
+ spin_lock(&clp->cl_lock);
+ clp->cl_time = 0;
+ spin_unlock(&clp->cl_lock);
+
+ wait_event(expiry_wq, atomic_read(&clp->cl_rpc_users) == 0);
+ spin_lock(&nn->client_lock);
+ already_expired = list_empty(&clp->cl_lru);
+ if (!already_expired)
+ unhash_client_locked(clp);
+ spin_unlock(&nn->client_lock);
+
+ if (!already_expired)
+ expire_client(clp);
+ else
+ wait_event(expiry_wq, clp->cl_nfsd_dentry == NULL);
+}
+
+static ssize_t client_ctl_write(struct file *file, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ char *data;
+ struct nfs4_client *clp;
+
+ data = simple_transaction_get(file, buf, size);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+ if (size != 7 || 0 != memcmp(data, "expire\n", 7))
+ return -EINVAL;
+ clp = get_nfsdfs_clp(file_inode(file));
+ if (!clp)
+ return -ENXIO;
+ force_expire_client(clp);
+ drop_client(clp);
+ return 7;
+}
+
+static const struct file_operations client_ctl_fops = {
+ .write = client_ctl_write,
+ .release = simple_transaction_release,
+};
+
+static const struct tree_descr client_files[] = {
+ [0] = {"info", &client_info_fops, S_IRUSR},
+ [1] = {"states", &client_states_fops, S_IRUSR},
+ [2] = {"ctl", &client_ctl_fops, S_IRUSR|S_IWUSR},
+ [3] = {""},
+};
+
static struct nfs4_client *create_client(struct xdr_netobj name,
struct svc_rqst *rqstp, nfs4_verifier *verf)
{
@@ -2206,6 +2575,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
struct sockaddr *sa = svc_addr(rqstp);
int ret;
struct net *net = SVC_NET(rqstp);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
clp = alloc_client(name);
if (clp == NULL)
@@ -2216,13 +2586,22 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
free_client(clp);
return NULL;
}
+ gen_clid(clp, nn);
+ kref_init(&clp->cl_nfsdfs.cl_ref);
nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL);
clp->cl_time = get_seconds();
clear_bit(0, &clp->cl_cb_slot_busy);
copy_verf(clp, verf);
- rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
+ memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage));
clp->cl_cb_session = NULL;
clp->net = net;
+ clp->cl_nfsd_dentry = nfsd_client_mkdir(nn, &clp->cl_nfsdfs,
+ clp->cl_clientid.cl_id - nn->clientid_base,
+ client_files);
+ if (!clp->cl_nfsd_dentry) {
+ free_client(clp);
+ return NULL;
+ }
return clp;
}
@@ -2533,6 +2912,22 @@ static bool client_has_state(struct nfs4_client *clp)
|| !list_empty(&clp->async_copies);
}
+static __be32 copy_impl_id(struct nfs4_client *clp,
+ struct nfsd4_exchange_id *exid)
+{
+ if (!exid->nii_domain.data)
+ return 0;
+ xdr_netobj_dup(&clp->cl_nii_domain, &exid->nii_domain, GFP_KERNEL);
+ if (!clp->cl_nii_domain.data)
+ return nfserr_jukebox;
+ xdr_netobj_dup(&clp->cl_nii_name, &exid->nii_name, GFP_KERNEL);
+ if (!clp->cl_nii_name.data)
+ return nfserr_jukebox;
+ clp->cl_nii_time.tv_sec = exid->nii_time.tv_sec;
+ clp->cl_nii_time.tv_nsec = exid->nii_time.tv_nsec;
+ return 0;
+}
+
__be32
nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
@@ -2559,6 +2954,9 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
new = create_client(exid->clname, rqstp, &verf);
if (new == NULL)
return nfserr_jukebox;
+ status = copy_impl_id(new, exid);
+ if (status)
+ goto out_nolock;
switch (exid->spa_how) {
case SP4_MACH_CRED:
@@ -2667,7 +3065,6 @@ out_new:
new->cl_spo_must_allow.u.words[0] = exid->spo_must_allow[0];
new->cl_spo_must_allow.u.words[1] = exid->spo_must_allow[1];
- gen_clid(new, nn);
add_to_unconfirmed(new);
swap(new, conf);
out_copy:
@@ -3411,7 +3808,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
copy_clid(new, conf);
gen_confirm(new, nn);
} else /* case 4 (new client) or cases 2, 3 (client reboot): */
- gen_clid(new, nn);
+ ;
new->cl_minorversion = 0;
gen_callback(new, setclid, rqstp);
add_to_unconfirmed(new);
@@ -3632,12 +4029,11 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj
if (!sop)
return NULL;
- sop->so_owner.data = kmemdup(owner->data, owner->len, GFP_KERNEL);
+ xdr_netobj_dup(&sop->so_owner, owner, GFP_KERNEL);
if (!sop->so_owner.data) {
kmem_cache_free(slab, sop);
return NULL;
}
- sop->so_owner.len = owner->len;
INIT_LIST_HEAD(&sop->so_stateids);
sop->so_client = clp;
@@ -4092,7 +4488,7 @@ static __be32 lookup_clientid(clientid_t *clid,
spin_unlock(&nn->client_lock);
return nfserr_expired;
}
- atomic_inc(&found->cl_refcount);
+ atomic_inc(&found->cl_rpc_users);
spin_unlock(&nn->client_lock);
/* Cache the nfs4_client in cstate! */
@@ -5725,12 +6121,11 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
if (fl->fl_lmops == &nfsd_posix_mng_ops) {
lo = (struct nfs4_lockowner *) fl->fl_owner;
- deny->ld_owner.data = kmemdup(lo->lo_owner.so_owner.data,
- lo->lo_owner.so_owner.len, GFP_KERNEL);
+ xdr_netobj_dup(&deny->ld_owner, &lo->lo_owner.so_owner,
+ GFP_KERNEL);
if (!deny->ld_owner.data)
/* We just don't care that much */
goto nevermind;
- deny->ld_owner.len = lo->lo_owner.so_owner.len;
deny->ld_clientid = lo->lo_owner.so_client->cl_clientid;
} else {
nevermind:
@@ -6584,7 +6979,7 @@ nfs4_check_open_reclaim(clientid_t *clid,
static inline void
put_client(struct nfs4_client *clp)
{
- atomic_dec(&clp->cl_refcount);
+ atomic_dec(&clp->cl_rpc_users);
}
static struct nfs4_client *
@@ -6702,7 +7097,7 @@ nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst,
return;
lockdep_assert_held(&nn->client_lock);
- atomic_inc(&clp->cl_refcount);
+ atomic_inc(&clp->cl_rpc_users);
list_add(&lst->st_locks, collect);
}
@@ -6731,7 +7126,7 @@ static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max,
* Despite the fact that these functions deal
* with 64-bit integers for "count", we must
* ensure that it doesn't blow up the
- * clp->cl_refcount. Throw a warning if we
+ * clp->cl_rpc_users. Throw a warning if we
* start to approach INT_MAX here.
*/
WARN_ON_ONCE(count == (INT_MAX / 2));
@@ -6855,7 +7250,7 @@ nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max,
if (func) {
func(oop);
if (collect) {
- atomic_inc(&clp->cl_refcount);
+ atomic_inc(&clp->cl_rpc_users);
list_add(&oop->oo_perclient, collect);
}
}
@@ -6863,7 +7258,7 @@ nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max,
/*
* Despite the fact that these functions deal with
* 64-bit integers for "count", we must ensure that
- * it doesn't blow up the clp->cl_refcount. Throw a
+ * it doesn't blow up the clp->cl_rpc_users. Throw a
* warning if we start to approach INT_MAX here.
*/
WARN_ON_ONCE(count == (INT_MAX / 2));
@@ -6993,7 +7388,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
if (dp->dl_time != 0)
continue;
- atomic_inc(&clp->cl_refcount);
+ atomic_inc(&clp->cl_rpc_users);
WARN_ON(!unhash_delegation_locked(dp));
list_add(&dp->dl_recall_lru, victims);
}
@@ -7001,7 +7396,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
/*
* Despite the fact that these functions deal with
* 64-bit integers for "count", we must ensure that
- * it doesn't blow up the clp->cl_refcount. Throw a
+ * it doesn't blow up the clp->cl_rpc_users. Throw a
* warning if we start to approach INT_MAX here.
*/
WARN_ON_ONCE(count == (INT_MAX / 2));
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 52c4f6daa649..442811809f3d 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -269,19 +269,13 @@ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
return ret;
}
-/*
- * We require the high 32 bits of 'seconds' to be 0, and
- * we ignore all 32 bits of 'nseconds'.
- */
static __be32
-nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec *tv)
+nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec64 *tv)
{
DECODE_HEAD;
- u64 sec;
READ_BUF(12);
- p = xdr_decode_hyper(p, &sec);
- tv->tv_sec = sec;
+ p = xdr_decode_hyper(p, &tv->tv_sec);
tv->tv_nsec = be32_to_cpup(p++);
if (tv->tv_nsec >= (u32)1000000000)
return nfserr_inval;
@@ -320,7 +314,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
struct iattr *iattr, struct nfs4_acl **acl,
struct xdr_netobj *label, int *umask)
{
- struct timespec ts;
int expected_len, len = 0;
u32 dummy32;
char *buf;
@@ -422,8 +415,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
switch (dummy32) {
case NFS4_SET_TO_CLIENT_TIME:
len += 12;
- status = nfsd4_decode_time(argp, &ts);
- iattr->ia_atime = timespec_to_timespec64(ts);
+ status = nfsd4_decode_time(argp, &iattr->ia_atime);
if (status)
return status;
iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET);
@@ -442,8 +434,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
switch (dummy32) {
case NFS4_SET_TO_CLIENT_TIME:
len += 12;
- status = nfsd4_decode_time(argp, &ts);
- iattr->ia_mtime = timespec_to_timespec64(ts);
+ status = nfsd4_decode_time(argp, &iattr->ia_mtime);
if (status)
return status;
iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET);
@@ -1398,7 +1389,6 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
goto xdr_error;
}
- /* Ignore Implementation ID */
READ_BUF(4); /* nfs_impl_id4 array length */
dummy = be32_to_cpup(p++);
@@ -1406,21 +1396,19 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
goto xdr_error;
if (dummy == 1) {
- /* nii_domain */
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy);
- p += XDR_QUADLEN(dummy);
+ status = nfsd4_decode_opaque(argp, &exid->nii_domain);
+ if (status)
+ goto xdr_error;
/* nii_name */
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy);
- p += XDR_QUADLEN(dummy);
+ status = nfsd4_decode_opaque(argp, &exid->nii_name);
+ if (status)
+ goto xdr_error;
/* nii_date */
- READ_BUF(12);
- p += 3;
+ status = nfsd4_decode_time(argp, &exid->nii_time);
+ if (status)
+ goto xdr_error;
}
DECODE_TAIL;
}
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index da52b594362a..26ad75ae2be0 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -9,6 +9,7 @@
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*/
+#include <linux/sunrpc/svc_xprt.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sunrpc/addr.h>
@@ -35,48 +36,12 @@ struct nfsd_drc_bucket {
spinlock_t cache_lock;
};
-static struct nfsd_drc_bucket *drc_hashtbl;
-static struct kmem_cache *drc_slab;
-
-/* max number of entries allowed in the cache */
-static unsigned int max_drc_entries;
-
-/* number of significant bits in the hash value */
-static unsigned int maskbits;
-static unsigned int drc_hashsize;
-
-/*
- * Stats and other tracking of on the duplicate reply cache. All of these and
- * the "rc" fields in nfsdstats are protected by the cache_lock
- */
-
-/* total number of entries */
-static atomic_t num_drc_entries;
-
-/* cache misses due only to checksum comparison failures */
-static unsigned int payload_misses;
-
-/* amount of memory (in bytes) currently consumed by the DRC */
-static unsigned int drc_mem_usage;
-
-/* longest hash chain seen */
-static unsigned int longest_chain;
-
-/* size of cache when we saw the longest hash chain */
-static unsigned int longest_chain_cachesize;
-
static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
static unsigned long nfsd_reply_cache_count(struct shrinker *shrink,
struct shrink_control *sc);
static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink,
struct shrink_control *sc);
-static struct shrinker nfsd_reply_cache_shrinker = {
- .scan_objects = nfsd_reply_cache_scan,
- .count_objects = nfsd_reply_cache_count,
- .seeks = 1,
-};
-
/*
* Put a cap on the size of the DRC based on the amount of available
* low memory in the machine.
@@ -94,6 +59,9 @@ static struct shrinker nfsd_reply_cache_shrinker = {
* ...with a hard cap of 256k entries. In the worst case, each entry will be
* ~1k, so the above numbers should give a rough max of the amount of memory
* used in k.
+ *
+ * XXX: these limits are per-container, so memory used will increase
+ * linearly with number of containers. Maybe that's OK.
*/
static unsigned int
nfsd_cache_size_limit(void)
@@ -116,17 +84,18 @@ nfsd_hashsize(unsigned int limit)
}
static u32
-nfsd_cache_hash(__be32 xid)
+nfsd_cache_hash(__be32 xid, struct nfsd_net *nn)
{
- return hash_32(be32_to_cpu(xid), maskbits);
+ return hash_32(be32_to_cpu(xid), nn->maskbits);
}
static struct svc_cacherep *
-nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum)
+nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum,
+ struct nfsd_net *nn)
{
struct svc_cacherep *rp;
- rp = kmem_cache_alloc(drc_slab, GFP_KERNEL);
+ rp = kmem_cache_alloc(nn->drc_slab, GFP_KERNEL);
if (rp) {
rp->c_state = RC_UNUSED;
rp->c_type = RC_NOCACHE;
@@ -147,91 +116,101 @@ nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum)
}
static void
-nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
+nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
+ struct nfsd_net *nn)
{
if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
- drc_mem_usage -= rp->c_replvec.iov_len;
+ nn->drc_mem_usage -= rp->c_replvec.iov_len;
kfree(rp->c_replvec.iov_base);
}
if (rp->c_state != RC_UNUSED) {
rb_erase(&rp->c_node, &b->rb_head);
list_del(&rp->c_lru);
- atomic_dec(&num_drc_entries);
- drc_mem_usage -= sizeof(*rp);
+ atomic_dec(&nn->num_drc_entries);
+ nn->drc_mem_usage -= sizeof(*rp);
}
- kmem_cache_free(drc_slab, rp);
+ kmem_cache_free(nn->drc_slab, rp);
}
static void
-nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
+nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
+ struct nfsd_net *nn)
{
spin_lock(&b->cache_lock);
- nfsd_reply_cache_free_locked(b, rp);
+ nfsd_reply_cache_free_locked(b, rp, nn);
spin_unlock(&b->cache_lock);
}
-int nfsd_reply_cache_init(void)
+int nfsd_reply_cache_init(struct nfsd_net *nn)
{
unsigned int hashsize;
unsigned int i;
int status = 0;
- max_drc_entries = nfsd_cache_size_limit();
- atomic_set(&num_drc_entries, 0);
- hashsize = nfsd_hashsize(max_drc_entries);
- maskbits = ilog2(hashsize);
+ nn->max_drc_entries = nfsd_cache_size_limit();
+ atomic_set(&nn->num_drc_entries, 0);
+ hashsize = nfsd_hashsize(nn->max_drc_entries);
+ nn->maskbits = ilog2(hashsize);
- status = register_shrinker(&nfsd_reply_cache_shrinker);
+ nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan;
+ nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count;
+ nn->nfsd_reply_cache_shrinker.seeks = 1;
+ status = register_shrinker(&nn->nfsd_reply_cache_shrinker);
if (status)
- return status;
-
- drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
- 0, 0, NULL);
- if (!drc_slab)
goto out_nomem;
- drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL);
- if (!drc_hashtbl) {
- drc_hashtbl = vzalloc(array_size(hashsize,
- sizeof(*drc_hashtbl)));
- if (!drc_hashtbl)
- goto out_nomem;
+ nn->drc_slab = kmem_cache_create("nfsd_drc",
+ sizeof(struct svc_cacherep), 0, 0, NULL);
+ if (!nn->drc_slab)
+ goto out_shrinker;
+
+ nn->drc_hashtbl = kcalloc(hashsize,
+ sizeof(*nn->drc_hashtbl), GFP_KERNEL);
+ if (!nn->drc_hashtbl) {
+ nn->drc_hashtbl = vzalloc(array_size(hashsize,
+ sizeof(*nn->drc_hashtbl)));
+ if (!nn->drc_hashtbl)
+ goto out_slab;
}
for (i = 0; i < hashsize; i++) {
- INIT_LIST_HEAD(&drc_hashtbl[i].lru_head);
- spin_lock_init(&drc_hashtbl[i].cache_lock);
+ INIT_LIST_HEAD(&nn->drc_hashtbl[i].lru_head);
+ spin_lock_init(&nn->drc_hashtbl[i].cache_lock);
}
- drc_hashsize = hashsize;
+ nn->drc_hashsize = hashsize;
return 0;
+out_slab:
+ kmem_cache_destroy(nn->drc_slab);
+out_shrinker:
+ unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
out_nomem:
printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
- nfsd_reply_cache_shutdown();
return -ENOMEM;
}
-void nfsd_reply_cache_shutdown(void)
+void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
{
struct svc_cacherep *rp;
unsigned int i;
- unregister_shrinker(&nfsd_reply_cache_shrinker);
+ unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
- for (i = 0; i < drc_hashsize; i++) {
- struct list_head *head = &drc_hashtbl[i].lru_head;
+ for (i = 0; i < nn->drc_hashsize; i++) {
+ struct list_head *head = &nn->drc_hashtbl[i].lru_head;
while (!list_empty(head)) {
rp = list_first_entry(head, struct svc_cacherep, c_lru);
- nfsd_reply_cache_free_locked(&drc_hashtbl[i], rp);
+ nfsd_reply_cache_free_locked(&nn->drc_hashtbl[i],
+ rp, nn);
}
}
- kvfree(drc_hashtbl);
- drc_hashtbl = NULL;
- drc_hashsize = 0;
+ kvfree(nn->drc_hashtbl);
+ nn->drc_hashtbl = NULL;
+ nn->drc_hashsize = 0;
- kmem_cache_destroy(drc_slab);
- drc_slab = NULL;
+ kmem_cache_destroy(nn->drc_slab);
+ nn->drc_slab = NULL;
}
/*
@@ -246,7 +225,7 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
}
static long
-prune_bucket(struct nfsd_drc_bucket *b)
+prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn)
{
struct svc_cacherep *rp, *tmp;
long freed = 0;
@@ -258,10 +237,10 @@ prune_bucket(struct nfsd_drc_bucket *b)
*/
if (rp->c_state == RC_INPROG)
continue;
- if (atomic_read(&num_drc_entries) <= max_drc_entries &&
+ if (atomic_read(&nn->num_drc_entries) <= nn->max_drc_entries &&
time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
break;
- nfsd_reply_cache_free_locked(b, rp);
+ nfsd_reply_cache_free_locked(b, rp, nn);
freed++;
}
return freed;
@@ -272,18 +251,18 @@ prune_bucket(struct nfsd_drc_bucket *b)
* Also prune the oldest ones when the total exceeds the max number of entries.
*/
static long
-prune_cache_entries(void)
+prune_cache_entries(struct nfsd_net *nn)
{
unsigned int i;
long freed = 0;
- for (i = 0; i < drc_hashsize; i++) {
- struct nfsd_drc_bucket *b = &drc_hashtbl[i];
+ for (i = 0; i < nn->drc_hashsize; i++) {
+ struct nfsd_drc_bucket *b = &nn->drc_hashtbl[i];
if (list_empty(&b->lru_head))
continue;
spin_lock(&b->cache_lock);
- freed += prune_bucket(b);
+ freed += prune_bucket(b, nn);
spin_unlock(&b->cache_lock);
}
return freed;
@@ -292,13 +271,19 @@ prune_cache_entries(void)
static unsigned long
nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
{
- return atomic_read(&num_drc_entries);
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_reply_cache_shrinker);
+
+ return atomic_read(&nn->num_drc_entries);
}
static unsigned long
nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
{
- return prune_cache_entries();
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_reply_cache_shrinker);
+
+ return prune_cache_entries(nn);
}
/*
* Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
@@ -334,11 +319,12 @@ nfsd_cache_csum(struct svc_rqst *rqstp)
}
static int
-nfsd_cache_key_cmp(const struct svc_cacherep *key, const struct svc_cacherep *rp)
+nfsd_cache_key_cmp(const struct svc_cacherep *key,
+ const struct svc_cacherep *rp, struct nfsd_net *nn)
{
if (key->c_key.k_xid == rp->c_key.k_xid &&
key->c_key.k_csum != rp->c_key.k_csum)
- ++payload_misses;
+ ++nn->payload_misses;
return memcmp(&key->c_key, &rp->c_key, sizeof(key->c_key));
}
@@ -349,7 +335,8 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key, const struct svc_cacherep *rp
* inserts an empty key on failure.
*/
static struct svc_cacherep *
-nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key)
+nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key,
+ struct nfsd_net *nn)
{
struct svc_cacherep *rp, *ret = key;
struct rb_node **p = &b->rb_head.rb_node,
@@ -362,7 +349,7 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key)
parent = *p;
rp = rb_entry(parent, struct svc_cacherep, c_node);
- cmp = nfsd_cache_key_cmp(key, rp);
+ cmp = nfsd_cache_key_cmp(key, rp, nn);
if (cmp < 0)
p = &parent->rb_left;
else if (cmp > 0)
@@ -376,14 +363,14 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key)
rb_insert_color(&key->c_node, &b->rb_head);
out:
/* tally hash chain length stats */
- if (entries > longest_chain) {
- longest_chain = entries;
- longest_chain_cachesize = atomic_read(&num_drc_entries);
- } else if (entries == longest_chain) {
+ if (entries > nn->longest_chain) {
+ nn->longest_chain = entries;
+ nn->longest_chain_cachesize = atomic_read(&nn->num_drc_entries);
+ } else if (entries == nn->longest_chain) {
/* prefer to keep the smallest cachesize possible here */
- longest_chain_cachesize = min_t(unsigned int,
- longest_chain_cachesize,
- atomic_read(&num_drc_entries));
+ nn->longest_chain_cachesize = min_t(unsigned int,
+ nn->longest_chain_cachesize,
+ atomic_read(&nn->num_drc_entries));
}
lru_put_end(b, ret);
@@ -400,11 +387,12 @@ out:
int
nfsd_cache_lookup(struct svc_rqst *rqstp)
{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct svc_cacherep *rp, *found;
__be32 xid = rqstp->rq_xid;
__wsum csum;
- u32 hash = nfsd_cache_hash(xid);
- struct nfsd_drc_bucket *b = &drc_hashtbl[hash];
+ u32 hash = nfsd_cache_hash(xid, nn);
+ struct nfsd_drc_bucket *b = &nn->drc_hashtbl[hash];
int type = rqstp->rq_cachetype;
int rtn = RC_DOIT;
@@ -420,16 +408,16 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
* Since the common case is a cache miss followed by an insert,
* preallocate an entry.
*/
- rp = nfsd_reply_cache_alloc(rqstp, csum);
+ rp = nfsd_reply_cache_alloc(rqstp, csum, nn);
if (!rp) {
dprintk("nfsd: unable to allocate DRC entry!\n");
return rtn;
}
spin_lock(&b->cache_lock);
- found = nfsd_cache_insert(b, rp);
+ found = nfsd_cache_insert(b, rp, nn);
if (found != rp) {
- nfsd_reply_cache_free_locked(NULL, rp);
+ nfsd_reply_cache_free_locked(NULL, rp, nn);
rp = found;
goto found_entry;
}
@@ -438,11 +426,11 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
rqstp->rq_cacherep = rp;
rp->c_state = RC_INPROG;
- atomic_inc(&num_drc_entries);
- drc_mem_usage += sizeof(*rp);
+ atomic_inc(&nn->num_drc_entries);
+ nn->drc_mem_usage += sizeof(*rp);
/* go ahead and prune the cache */
- prune_bucket(b);
+ prune_bucket(b, nn);
out:
spin_unlock(&b->cache_lock);
return rtn;
@@ -477,7 +465,7 @@ found_entry:
break;
default:
printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type);
- nfsd_reply_cache_free_locked(b, rp);
+ nfsd_reply_cache_free_locked(b, rp, nn);
}
goto out;
@@ -502,6 +490,7 @@ found_entry:
void
nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct svc_cacherep *rp = rqstp->rq_cacherep;
struct kvec *resv = &rqstp->rq_res.head[0], *cachv;
u32 hash;
@@ -512,15 +501,15 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
if (!rp)
return;
- hash = nfsd_cache_hash(rp->c_key.k_xid);
- b = &drc_hashtbl[hash];
+ hash = nfsd_cache_hash(rp->c_key.k_xid, nn);
+ b = &nn->drc_hashtbl[hash];
len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
len >>= 2;
/* Don't cache excessive amounts of data and XDR failures */
if (!statp || len > (256 >> 2)) {
- nfsd_reply_cache_free(b, rp);
+ nfsd_reply_cache_free(b, rp, nn);
return;
}
@@ -535,18 +524,18 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
bufsize = len << 2;
cachv->iov_base = kmalloc(bufsize, GFP_KERNEL);
if (!cachv->iov_base) {
- nfsd_reply_cache_free(b, rp);
+ nfsd_reply_cache_free(b, rp, nn);
return;
}
cachv->iov_len = bufsize;
memcpy(cachv->iov_base, statp, bufsize);
break;
case RC_NOCACHE:
- nfsd_reply_cache_free(b, rp);
+ nfsd_reply_cache_free(b, rp, nn);
return;
}
spin_lock(&b->cache_lock);
- drc_mem_usage += bufsize;
+ nn->drc_mem_usage += bufsize;
lru_put_end(b, rp);
rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags);
rp->c_type = cachetype;
@@ -582,21 +571,26 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
*/
static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
{
- seq_printf(m, "max entries: %u\n", max_drc_entries);
+ struct nfsd_net *nn = v;
+
+ seq_printf(m, "max entries: %u\n", nn->max_drc_entries);
seq_printf(m, "num entries: %u\n",
- atomic_read(&num_drc_entries));
- seq_printf(m, "hash buckets: %u\n", 1 << maskbits);
- seq_printf(m, "mem usage: %u\n", drc_mem_usage);
+ atomic_read(&nn->num_drc_entries));
+ seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits);
+ seq_printf(m, "mem usage: %u\n", nn->drc_mem_usage);
seq_printf(m, "cache hits: %u\n", nfsdstats.rchits);
seq_printf(m, "cache misses: %u\n", nfsdstats.rcmisses);
seq_printf(m, "not cached: %u\n", nfsdstats.rcnocache);
- seq_printf(m, "payload misses: %u\n", payload_misses);
- seq_printf(m, "longest chain len: %u\n", longest_chain);
- seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize);
+ seq_printf(m, "payload misses: %u\n", nn->payload_misses);
+ seq_printf(m, "longest chain len: %u\n", nn->longest_chain);
+ seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize);
return 0;
}
int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file)
{
- return single_open(file, nfsd_reply_cache_stats_show, NULL);
+ struct nfsd_net *nn = net_generic(file_inode(file)->i_sb->s_fs_info,
+ nfsd_net_id);
+
+ return single_open(file, nfsd_reply_cache_stats_show, nn);
}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 62c58cfeb8d8..72fad54fc7e5 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -16,6 +16,7 @@
#include <linux/sunrpc/gss_krb5_enctypes.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/module.h>
+#include <linux/fsnotify.h>
#include "idmap.h"
#include "nfsd.h"
@@ -53,6 +54,7 @@ enum {
NFSD_RecoveryDir,
NFSD_V4EndGrace,
#endif
+ NFSD_MaxReserved
};
/*
@@ -1147,8 +1149,201 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
* populating the filesystem.
*/
+/* Basically copying rpc_get_inode. */
+static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode)
+{
+ struct inode *inode = new_inode(sb);
+ if (!inode)
+ return NULL;
+ /* Following advice from simple_fill_super documentation: */
+ inode->i_ino = iunique(sb, NFSD_MaxReserved);
+ inode->i_mode = mode;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ switch (mode & S_IFMT) {
+ case S_IFDIR:
+ inode->i_fop = &simple_dir_operations;
+ inode->i_op = &simple_dir_inode_operations;
+ inc_nlink(inode);
+ default:
+ break;
+ }
+ return inode;
+}
+
+static int __nfsd_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ struct inode *inode;
+
+ inode = nfsd_get_inode(dir->i_sb, mode);
+ if (!inode)
+ return -ENOMEM;
+ d_add(dentry, inode);
+ inc_nlink(dir);
+ fsnotify_mkdir(dir, dentry);
+ return 0;
+}
+
+static struct dentry *nfsd_mkdir(struct dentry *parent, struct nfsdfs_client *ncl, char *name)
+{
+ struct inode *dir = parent->d_inode;
+ struct dentry *dentry;
+ int ret = -ENOMEM;
+
+ inode_lock(dir);
+ dentry = d_alloc_name(parent, name);
+ if (!dentry)
+ goto out_err;
+ ret = __nfsd_mkdir(d_inode(parent), dentry, S_IFDIR | 0600);
+ if (ret)
+ goto out_err;
+ if (ncl) {
+ d_inode(dentry)->i_private = ncl;
+ kref_get(&ncl->cl_ref);
+ }
+out:
+ inode_unlock(dir);
+ return dentry;
+out_err:
+ dentry = ERR_PTR(ret);
+ goto out;
+}
+
+static void clear_ncl(struct inode *inode)
+{
+ struct nfsdfs_client *ncl = inode->i_private;
+
+ inode->i_private = NULL;
+ synchronize_rcu();
+ kref_put(&ncl->cl_ref, ncl->cl_release);
+}
+
+
+static struct nfsdfs_client *__get_nfsdfs_client(struct inode *inode)
+{
+ struct nfsdfs_client *nc = inode->i_private;
+
+ if (nc)
+ kref_get(&nc->cl_ref);
+ return nc;
+}
+
+struct nfsdfs_client *get_nfsdfs_client(struct inode *inode)
+{
+ struct nfsdfs_client *nc;
+
+ rcu_read_lock();
+ nc = __get_nfsdfs_client(inode);
+ rcu_read_unlock();
+ return nc;
+}
+/* from __rpc_unlink */
+static void nfsdfs_remove_file(struct inode *dir, struct dentry *dentry)
+{
+ int ret;
+
+ clear_ncl(d_inode(dentry));
+ dget(dentry);
+ ret = simple_unlink(dir, dentry);
+ d_delete(dentry);
+ dput(dentry);
+ WARN_ON_ONCE(ret);
+}
+
+static void nfsdfs_remove_files(struct dentry *root)
+{
+ struct dentry *dentry, *tmp;
+
+ list_for_each_entry_safe(dentry, tmp, &root->d_subdirs, d_child) {
+ if (!simple_positive(dentry)) {
+ WARN_ON_ONCE(1); /* I think this can't happen? */
+ continue;
+ }
+ nfsdfs_remove_file(d_inode(root), dentry);
+ }
+}
+
+/* XXX: cut'n'paste from simple_fill_super; figure out if we could share
+ * code instead. */
+static int nfsdfs_create_files(struct dentry *root,
+ const struct tree_descr *files)
+{
+ struct inode *dir = d_inode(root);
+ struct inode *inode;
+ struct dentry *dentry;
+ int i;
+
+ inode_lock(dir);
+ for (i = 0; files->name && files->name[0]; i++, files++) {
+ if (!files->name)
+ continue;
+ dentry = d_alloc_name(root, files->name);
+ if (!dentry)
+ goto out;
+ inode = nfsd_get_inode(d_inode(root)->i_sb,
+ S_IFREG | files->mode);
+ if (!inode) {
+ dput(dentry);
+ goto out;
+ }
+ inode->i_fop = files->ops;
+ inode->i_private = __get_nfsdfs_client(dir);
+ d_add(dentry, inode);
+ fsnotify_create(dir, dentry);
+ }
+ inode_unlock(dir);
+ return 0;
+out:
+ nfsdfs_remove_files(root);
+ inode_unlock(dir);
+ return -ENOMEM;
+}
+
+/* on success, returns positive number unique to that client. */
+struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
+ struct nfsdfs_client *ncl, u32 id,
+ const struct tree_descr *files)
+{
+ struct dentry *dentry;
+ char name[11];
+ int ret;
+
+ sprintf(name, "%u", id);
+
+ dentry = nfsd_mkdir(nn->nfsd_client_dir, ncl, name);
+ if (IS_ERR(dentry)) /* XXX: tossing errors? */
+ return NULL;
+ ret = nfsdfs_create_files(dentry, files);
+ if (ret) {
+ nfsd_client_rmdir(dentry);
+ return NULL;
+ }
+ return dentry;
+}
+
+/* Taken from __rpc_rmdir: */
+void nfsd_client_rmdir(struct dentry *dentry)
+{
+ struct inode *dir = d_inode(dentry->d_parent);
+ struct inode *inode = d_inode(dentry);
+ int ret;
+
+ inode_lock(dir);
+ nfsdfs_remove_files(dentry);
+ clear_ncl(inode);
+ dget(dentry);
+ ret = simple_rmdir(dir, dentry);
+ WARN_ON_ONCE(ret);
+ d_delete(dentry);
+ inode_unlock(dir);
+}
+
static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
{
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
+ struct dentry *dentry;
+ int ret;
+
static const struct tree_descr nfsd_files[] = {
[NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO},
[NFSD_Export_features] = {"export_features",
@@ -1178,7 +1373,15 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
/* last one */ {""}
};
get_net(sb->s_fs_info);
- return simple_fill_super(sb, 0x6e667364, nfsd_files);
+ ret = simple_fill_super(sb, 0x6e667364, nfsd_files);
+ if (ret)
+ return ret;
+ dentry = nfsd_mkdir(sb->s_root, NULL, "clients");
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+ nn->nfsd_client_dir = dentry;
+ return 0;
+
}
static struct dentry *nfsd_mount(struct file_system_type *fs_type,
@@ -1232,6 +1435,7 @@ unsigned int nfsd_net_id;
static __net_init int nfsd_init_net(struct net *net)
{
int retval;
+ struct vfsmount *mnt;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
retval = nfsd_export_init(net);
@@ -1242,18 +1446,33 @@ static __net_init int nfsd_init_net(struct net *net)
goto out_idmap_error;
nn->nfsd_versions = NULL;
nn->nfsd4_minorversions = NULL;
+ retval = nfsd_reply_cache_init(nn);
+ if (retval)
+ goto out_drc_error;
nn->nfsd4_lease = 90; /* default lease time */
nn->nfsd4_grace = 90;
nn->somebody_reclaimed = false;
nn->track_reclaim_completes = false;
nn->clverifier_counter = prandom_u32();
- nn->clientid_counter = prandom_u32();
+ nn->clientid_base = prandom_u32();
+ nn->clientid_counter = nn->clientid_base + 1;
nn->s2s_cp_cl_id = nn->clientid_counter++;
atomic_set(&nn->ntf_refcnt, 0);
init_waitqueue_head(&nn->ntf_wq);
+
+ mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
+ if (IS_ERR(mnt)) {
+ retval = PTR_ERR(mnt);
+ goto out_mount_err;
+ }
+ nn->nfsd_mnt = mnt;
return 0;
+out_mount_err:
+ nfsd_reply_cache_shutdown(nn);
+out_drc_error:
+ nfsd_idmap_shutdown(net);
out_idmap_error:
nfsd_export_shutdown(net);
out_export_error:
@@ -1262,6 +1481,10 @@ out_export_error:
static __net_exit void nfsd_exit_net(struct net *net)
{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ mntput(nn->nfsd_mnt);
+ nfsd_reply_cache_shutdown(nn);
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
nfsd_netns_free_versions(net_generic(net, nfsd_net_id));
@@ -1295,9 +1518,6 @@ static int __init init_nfsd(void)
if (retval)
goto out_exit_pnfs;
nfsd_stat_init(); /* Statistics */
- retval = nfsd_reply_cache_init();
- if (retval)
- goto out_free_stat;
nfsd_lockd_init(); /* lockd->nfsd callbacks */
retval = create_proc_exports_entry();
if (retval)
@@ -1311,8 +1531,6 @@ out_free_all:
remove_proc_entry("fs/nfs", NULL);
out_free_lockd:
nfsd_lockd_shutdown();
- nfsd_reply_cache_shutdown();
-out_free_stat:
nfsd_stat_shutdown();
nfsd_fault_inject_cleanup();
out_exit_pnfs:
@@ -1328,7 +1546,6 @@ out_unregister_pernet:
static void __exit exit_nfsd(void)
{
- nfsd_reply_cache_shutdown();
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
nfsd_stat_shutdown();
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 24187b5dd638..af2947551e9c 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -22,6 +22,7 @@
#include <uapi/linux/nfsd/debug.h>
+#include "netns.h"
#include "stats.h"
#include "export.h"
@@ -86,6 +87,16 @@ int nfsd_pool_stats_release(struct inode *, struct file *);
void nfsd_destroy(struct net *net);
+struct nfsdfs_client {
+ struct kref cl_ref;
+ void (*cl_release)(struct kref *kref);
+};
+
+struct nfsdfs_client *get_nfsdfs_client(struct inode *);
+struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
+ struct nfsdfs_client *ncl, u32 id, const struct tree_descr *);
+void nfsd_client_rmdir(struct dentry *dentry);
+
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
#ifdef CONFIG_NFSD_V2_ACL
extern const struct svc_version nfsd_acl_version2;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 0b74d371ed67..8cb20cab012b 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -39,6 +39,7 @@
#include <linux/refcount.h>
#include <linux/sunrpc/svc_xprt.h>
#include "nfsfh.h"
+#include "nfsd.h"
typedef struct {
u32 cl_boot;
@@ -316,6 +317,10 @@ struct nfs4_client {
clientid_t cl_clientid; /* generated by server */
nfs4_verifier cl_confirm; /* generated by server */
u32 cl_minorversion;
+ /* NFSv4.1 client implementation id: */
+ struct xdr_netobj cl_nii_domain;
+ struct xdr_netobj cl_nii_name;
+ struct timespec cl_nii_time;
/* for v4.0 and v4.1 callbacks: */
struct nfs4_cb_conn cl_cb_conn;
@@ -347,9 +352,13 @@ struct nfs4_client {
struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */
u32 cl_exchange_flags;
/* number of rpc's in progress over an associated session: */
- atomic_t cl_refcount;
+ atomic_t cl_rpc_users;
+ struct nfsdfs_client cl_nfsdfs;
struct nfs4_op_map cl_spo_must_allow;
+ /* debugging info directory under nfsd/clients/ : */
+ struct dentry *cl_nfsd_dentry;
+
/* for nfs41 callbacks */
/* We currently support a single back channel with a single slot */
unsigned long cl_cb_slot_busy;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index fc24ee47eab5..c85783e536d5 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -404,7 +404,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
/*
* If utimes(2) and friends are called with times not NULL, we should
* not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission
- * will return EACCESS, when the caller's effective UID does not match
+ * will return EACCES, when the caller's effective UID does not match
* the owner of the file, and the caller is not privileged. In this
* situation, we should return EPERM(notify_change will return this).
*/
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index feeb6d4bdffd..d64c870f998a 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -410,6 +410,9 @@ struct nfsd4_exchange_id {
int spa_how;
u32 spo_must_enforce[3];
u32 spo_must_allow[3];
+ struct xdr_netobj nii_domain;
+ struct xdr_netobj nii_name;
+ struct timespec64 nii_time;
};
struct nfsd4_sequence {
@@ -472,7 +475,7 @@ struct nfsd4_layoutcommit {
u32 lc_reclaim; /* request */
u32 lc_newoffset; /* request */
u64 lc_last_wr; /* request */
- struct timespec lc_mtime; /* request */
+ struct timespec64 lc_mtime; /* request */
u32 lc_layout_type; /* request */
u32 lc_up_len; /* layout length */
void *lc_up_layout; /* decoded by callback */
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index a90bb19dcfa2..91006f47e420 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -920,6 +920,22 @@ static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid)
return 0;
}
+static int fanotify_events_supported(struct path *path, __u64 mask)
+{
+ /*
+ * Some filesystems such as 'proc' acquire unusual locks when opening
+ * files. For them fanotify permission events have high chances of
+ * deadlocking the system - open done when reporting fanotify event
+ * blocks on this "unusual" lock while another process holding the lock
+ * waits for fanotify permission event to be answered. Just disallow
+ * permission events for such filesystems.
+ */
+ if (mask & FANOTIFY_PERM_EVENTS &&
+ path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM)
+ return -EINVAL;
+ return 0;
+}
+
static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
int dfd, const char __user *pathname)
{
@@ -1018,6 +1034,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
if (ret)
goto fput_and_out;
+ if (flags & FAN_MARK_ADD) {
+ ret = fanotify_events_supported(&path, mask);
+ if (ret)
+ goto path_put_and_out;
+ }
+
if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
ret = fanotify_test_fid(&path, &__fsid);
if (ret)
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 4eb2ebfac468..2ecef6155fc0 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -95,47 +95,6 @@ void fsnotify_sb_delete(struct super_block *sb)
}
/*
- * fsnotify_nameremove - a filename was removed from a directory
- *
- * This is mostly called under parent vfs inode lock so name and
- * dentry->d_parent should be stable. However there are some corner cases where
- * inode lock is not held. So to be on the safe side and be reselient to future
- * callers and out of tree users of d_delete(), we do not assume that d_parent
- * and d_name are stable and we use dget_parent() and
- * take_dentry_name_snapshot() to grab stable references.
- */
-void fsnotify_nameremove(struct dentry *dentry, int isdir)
-{
- struct dentry *parent;
- struct name_snapshot name;
- __u32 mask = FS_DELETE;
-
- /* d_delete() of pseudo inode? (e.g. __ns_get_path() playing tricks) */
- if (IS_ROOT(dentry))
- return;
-
- if (isdir)
- mask |= FS_ISDIR;
-
- parent = dget_parent(dentry);
- /* Avoid unneeded take_dentry_name_snapshot() */
- if (!(d_inode(parent)->i_fsnotify_mask & FS_DELETE) &&
- !(dentry->d_sb->s_fsnotify_mask & FS_DELETE))
- goto out_dput;
-
- take_dentry_name_snapshot(&name, dentry);
-
- fsnotify(d_inode(parent), mask, d_inode(dentry), FSNOTIFY_EVENT_INODE,
- &name.name, 0);
-
- release_dentry_name_snapshot(&name);
-
-out_dput:
- dput(parent);
-}
-EXPORT_SYMBOL(fsnotify_nameremove);
-
-/*
* Given an inode, first check if we care what happens to our children. Inotify
* and dnotify both tell their parents about events. If we care about any event
* on a child we run all of our children and set a dentry flag saying that the
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 8b145e7b9661..522199e9525e 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -211,7 +211,7 @@ static struct file_system_type proc_fs_type = {
.init_fs_context = proc_init_fs_context,
.parameters = &proc_fs_parameters,
.kill_sb = proc_kill_sb,
- .fs_flags = FS_USERNS_MOUNT,
+ .fs_flags = FS_USERNS_MOUNT | FS_DISALLOW_NOTIFY_PERM,
};
void __init proc_root_init(void)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 58f15a083dd1..be9c471cdbc8 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -223,9 +223,9 @@ static void put_quota_format(struct quota_format_type *fmt)
/*
* Dquot List Management:
- * The quota code uses three lists for dquot management: the inuse_list,
- * free_dquots, and dquot_hash[] array. A single dquot structure may be
- * on all three lists, depending on its current state.
+ * The quota code uses four lists for dquot management: the inuse_list,
+ * free_dquots, dqi_dirty_list, and dquot_hash[] array. A single dquot
+ * structure may be on some of those lists, depending on its current state.
*
* All dquots are placed to the end of inuse_list when first created, and this
* list is used for invalidate operation, which must look at every dquot.
@@ -236,6 +236,11 @@ static void put_quota_format(struct quota_format_type *fmt)
* dqstats.free_dquots gives the number of dquots on the list. When
* dquot is invalidated it's completely released from memory.
*
+ * Dirty dquots are added to the dqi_dirty_list of quota_info when mark
+ * dirtied, and this list is searched when writing dirty dquots back to
+ * quota file. Note that some filesystems do dirty dquot tracking on their
+ * own (e.g. in a journal) and thus don't use dqi_dirty_list.
+ *
* Dquots with a specific identity (device, type and id) are placed on
* one of the dquot_hash[] hash chains. The provides an efficient search
* mechanism to locate a specific dquot.
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index fd5dd806f1b9..cb13fb76dbee 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -331,9 +331,9 @@ static int quota_state_to_flags(struct qc_state *state)
return flags;
}
-static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs)
+static int quota_getstate(struct super_block *sb, int type,
+ struct fs_quota_stat *fqs)
{
- int type;
struct qc_state state;
int ret;
@@ -349,14 +349,7 @@ static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs)
if (!fqs->qs_flags)
return -ENOSYS;
fqs->qs_incoredqs = state.s_incoredqs;
- /*
- * GETXSTATE quotactl has space for just one set of time limits so
- * report them for the first enabled quota type
- */
- for (type = 0; type < MAXQUOTAS; type++)
- if (state.s_state[type].flags & QCI_ACCT_ENABLED)
- break;
- BUG_ON(type == MAXQUOTAS);
+
fqs->qs_btimelimit = state.s_state[type].spc_timelimit;
fqs->qs_itimelimit = state.s_state[type].ino_timelimit;
fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit;
@@ -391,22 +384,22 @@ static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs)
return 0;
}
-static int quota_getxstate(struct super_block *sb, void __user *addr)
+static int quota_getxstate(struct super_block *sb, int type, void __user *addr)
{
struct fs_quota_stat fqs;
int ret;
if (!sb->s_qcop->get_state)
return -ENOSYS;
- ret = quota_getstate(sb, &fqs);
+ ret = quota_getstate(sb, type, &fqs);
if (!ret && copy_to_user(addr, &fqs, sizeof(fqs)))
return -EFAULT;
return ret;
}
-static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs)
+static int quota_getstatev(struct super_block *sb, int type,
+ struct fs_quota_statv *fqs)
{
- int type;
struct qc_state state;
int ret;
@@ -422,14 +415,7 @@ static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs)
if (!fqs->qs_flags)
return -ENOSYS;
fqs->qs_incoredqs = state.s_incoredqs;
- /*
- * GETXSTATV quotactl has space for just one set of time limits so
- * report them for the first enabled quota type
- */
- for (type = 0; type < MAXQUOTAS; type++)
- if (state.s_state[type].flags & QCI_ACCT_ENABLED)
- break;
- BUG_ON(type == MAXQUOTAS);
+
fqs->qs_btimelimit = state.s_state[type].spc_timelimit;
fqs->qs_itimelimit = state.s_state[type].ino_timelimit;
fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit;
@@ -455,7 +441,7 @@ static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs)
return 0;
}
-static int quota_getxstatev(struct super_block *sb, void __user *addr)
+static int quota_getxstatev(struct super_block *sb, int type, void __user *addr)
{
struct fs_quota_statv fqs;
int ret;
@@ -474,7 +460,7 @@ static int quota_getxstatev(struct super_block *sb, void __user *addr)
default:
return -EINVAL;
}
- ret = quota_getstatev(sb, &fqs);
+ ret = quota_getstatev(sb, type, &fqs);
if (!ret && copy_to_user(addr, &fqs, sizeof(fqs)))
return -EFAULT;
return ret;
@@ -744,9 +730,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
case Q_XQUOTARM:
return quota_rmxquota(sb, addr);
case Q_XGETQSTAT:
- return quota_getxstate(sb, addr);
+ return quota_getxstate(sb, type, addr);
case Q_XGETQSTATV:
- return quota_getxstatev(sb, addr);
+ return quota_getxstatev(sb, type, addr);
case Q_XSETQLIM:
return quota_setxquota(sb, type, id, addr);
case Q_XGETQUOTA:
diff --git a/fs/read_write.c b/fs/read_write.c
index c543d965e288..1f5088dec566 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1565,6 +1565,58 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
}
#endif
+/**
+ * generic_copy_file_range - copy data between two files
+ * @file_in: file structure to read from
+ * @pos_in: file offset to read from
+ * @file_out: file structure to write data to
+ * @pos_out: file offset to write data to
+ * @len: amount of data to copy
+ * @flags: copy flags
+ *
+ * This is a generic filesystem helper to copy data from one file to another.
+ * It has no constraints on the source or destination file owners - the files
+ * can belong to different superblocks and different filesystem types. Short
+ * copies are allowed.
+ *
+ * This should be called from the @file_out filesystem, as per the
+ * ->copy_file_range() method.
+ *
+ * Returns the number of bytes copied or a negative error indicating the
+ * failure.
+ */
+
+ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags)
+{
+ return do_splice_direct(file_in, &pos_in, file_out, &pos_out,
+ len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
+}
+EXPORT_SYMBOL(generic_copy_file_range);
+
+static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags)
+{
+ /*
+ * Although we now allow filesystems to handle cross sb copy, passing
+ * a file of the wrong filesystem type to filesystem driver can result
+ * in an attempt to dereference the wrong type of ->private_data, so
+ * avoid doing that until we really have a good reason. NFS defines
+ * several different file_system_type structures, but they all end up
+ * using the same ->copy_file_range() function pointer.
+ */
+ if (file_out->f_op->copy_file_range &&
+ file_out->f_op->copy_file_range == file_in->f_op->copy_file_range)
+ return file_out->f_op->copy_file_range(file_in, pos_in,
+ file_out, pos_out,
+ len, flags);
+
+ return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
+ flags);
+}
+
/*
* copy_file_range() differs from regular file read and write in that it
* specifically allows return partial success. When it does so is up to
@@ -1574,17 +1626,15 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t len, unsigned int flags)
{
- struct inode *inode_in = file_inode(file_in);
- struct inode *inode_out = file_inode(file_out);
ssize_t ret;
if (flags != 0)
return -EINVAL;
- if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
- return -EISDIR;
- if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
- return -EINVAL;
+ ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
+ flags);
+ if (unlikely(ret))
+ return ret;
ret = rw_verify_area(READ, file_in, &pos_in, len);
if (unlikely(ret))
@@ -1594,15 +1644,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
if (unlikely(ret))
return ret;
- if (!(file_in->f_mode & FMODE_READ) ||
- !(file_out->f_mode & FMODE_WRITE) ||
- (file_out->f_flags & O_APPEND))
- return -EBADF;
-
- /* this could be relaxed once a method supports cross-fs copies */
- if (inode_in->i_sb != inode_out->i_sb)
- return -EXDEV;
-
if (len == 0)
return 0;
@@ -1612,7 +1653,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
* Try cloning first, this is supported by more file systems, and
* more efficient if both clone and copy are supported (e.g. NFS).
*/
- if (file_in->f_op->remap_file_range) {
+ if (file_in->f_op->remap_file_range &&
+ file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
loff_t cloned;
cloned = file_in->f_op->remap_file_range(file_in, pos_in,
@@ -1625,16 +1667,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
}
}
- if (file_out->f_op->copy_file_range) {
- ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out,
- pos_out, len, flags);
- if (ret != -EOPNOTSUPP)
- goto done;
- }
-
- ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
- len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
-
+ ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len,
+ flags);
+ WARN_ON_ONCE(ret == -EOPNOTSUPP);
done:
if (ret > 0) {
fsnotify_access(file_in);
@@ -1951,25 +1986,10 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
return ret;
/* If can't alter the file contents, we're done. */
- if (!(remap_flags & REMAP_FILE_DEDUP)) {
- /* Update the timestamps, since we can alter file contents. */
- if (!(file_out->f_mode & FMODE_NOCMTIME)) {
- ret = file_update_time(file_out);
- if (ret)
- return ret;
- }
-
- /*
- * Clear the security bits if the process is not being run by
- * root. This keeps people from modifying setuid and setgid
- * binaries.
- */
- ret = file_remove_privs(file_out);
- if (ret)
- return ret;
- }
+ if (!(remap_flags & REMAP_FILE_DEDUP))
+ ret = file_modified(file_out);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(generic_remap_file_range_prep);
@@ -1977,29 +1997,21 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags)
{
- struct inode *inode_in = file_inode(file_in);
- struct inode *inode_out = file_inode(file_out);
loff_t ret;
WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP);
- if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
- return -EISDIR;
- if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
- return -EINVAL;
-
/*
* FICLONE/FICLONERANGE ioctls enforce that src and dest files are on
* the same mount. Practically, they only need to be on the same file
* system.
*/
- if (inode_in->i_sb != inode_out->i_sb)
+ if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
return -EXDEV;
- if (!(file_in->f_mode & FMODE_READ) ||
- !(file_out->f_mode & FMODE_WRITE) ||
- (file_out->f_flags & O_APPEND))
- return -EBADF;
+ ret = generic_file_rw_checks(file_in, file_out);
+ if (ret < 0)
+ return ret;
if (!file_in->f_op->remap_file_range)
return -EOPNOTSUPP;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index abe27ec43176..04f09689cd6d 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -384,6 +384,17 @@ void seq_escape(struct seq_file *m, const char *s, const char *esc)
}
EXPORT_SYMBOL(seq_escape);
+void seq_escape_mem_ascii(struct seq_file *m, const char *src, size_t isz)
+{
+ char *buf;
+ size_t size = seq_get_buf(m, &buf);
+ int ret;
+
+ ret = string_escape_mem_ascii(src, isz, buf, size);
+ seq_commit(m, ret < size ? ret : -1);
+}
+EXPORT_SYMBOL(seq_escape_mem_ascii);
+
void seq_vprintf(struct seq_file *m, const char *f, va_list args)
{
int len;
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index a5bab190a297..eeeae0475da9 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -505,9 +505,12 @@ static int __tracefs_remove(struct dentry *dentry, struct dentry *parent)
switch (dentry->d_inode->i_mode & S_IFMT) {
case S_IFDIR:
ret = simple_rmdir(parent->d_inode, dentry);
+ if (!ret)
+ fsnotify_rmdir(parent->d_inode, dentry);
break;
default:
simple_unlink(parent->d_inode, dentry);
+ fsnotify_unlink(parent->d_inode, dentry);
break;
}
if (!ret)
diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c
index 38718026ad0b..60f43b93d06e 100644
--- a/fs/ubifs/auth.c
+++ b/fs/ubifs/auth.c
@@ -227,7 +227,7 @@ int ubifs_init_authentication(struct ubifs_info *c)
snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
c->auth_hash_name);
- keyring_key = request_key(&key_type_logon, c->auth_key_name, NULL, NULL);
+ keyring_key = request_key(&key_type_logon, c->auth_key_name, NULL);
if (IS_ERR(keyring_key)) {
ubifs_err(c, "Failed to request key: %ld",
diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c
index 4aaedf2d7f44..22be7aeb96c4 100644
--- a/fs/ubifs/crypto.c
+++ b/fs/ubifs/crypto.c
@@ -29,8 +29,8 @@ int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn,
{
struct ubifs_info *c = inode->i_sb->s_fs_info;
void *p = &dn->data;
- struct page *ret;
unsigned int pad_len = round_up(in_len, UBIFS_CIPHER_BLOCK_SIZE);
+ int err;
ubifs_assert(c, pad_len <= *out_len);
dn->compr_size = cpu_to_le16(in_len);
@@ -39,11 +39,11 @@ int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn,
if (pad_len != in_len)
memset(p + in_len, 0, pad_len - in_len);
- ret = fscrypt_encrypt_page(inode, virt_to_page(&dn->data), pad_len,
- offset_in_page(&dn->data), block, GFP_NOFS);
- if (IS_ERR(ret)) {
- ubifs_err(c, "fscrypt_encrypt_page failed: %ld", PTR_ERR(ret));
- return PTR_ERR(ret);
+ err = fscrypt_encrypt_block_inplace(inode, virt_to_page(p), pad_len,
+ offset_in_page(p), block, GFP_NOFS);
+ if (err) {
+ ubifs_err(c, "fscrypt_encrypt_block_inplace() failed: %d", err);
+ return err;
}
*out_len = pad_len;
@@ -64,10 +64,11 @@ int ubifs_decrypt(const struct inode *inode, struct ubifs_data_node *dn,
}
ubifs_assert(c, dlen <= UBIFS_BLOCK_SIZE);
- err = fscrypt_decrypt_page(inode, virt_to_page(&dn->data), dlen,
- offset_in_page(&dn->data), block);
+ err = fscrypt_decrypt_block_inplace(inode, virt_to_page(&dn->data),
+ dlen, offset_in_page(&dn->data),
+ block);
if (err) {
- ubifs_err(c, "fscrypt_decrypt_page failed: %i", err);
+ ubifs_err(c, "fscrypt_decrypt_block_inplace() failed: %d", err);
return err;
}
*out_len = clen;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index e7276932e433..9bb18311a22f 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -470,13 +470,15 @@ static struct buffer_head *udf_getblk(struct inode *inode, udf_pblk_t block,
return NULL;
}
-/* Extend the file by 'blocks' blocks, return the number of extents added */
+/* Extend the file with new blocks totaling 'new_block_bytes',
+ * return the number of extents added
+ */
static int udf_do_extend_file(struct inode *inode,
struct extent_position *last_pos,
struct kernel_long_ad *last_ext,
- sector_t blocks)
+ loff_t new_block_bytes)
{
- sector_t add;
+ uint32_t add;
int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
struct super_block *sb = inode->i_sb;
struct kernel_lb_addr prealloc_loc = {};
@@ -486,7 +488,7 @@ static int udf_do_extend_file(struct inode *inode,
/* The previous extent is fake and we should not extend by anything
* - there's nothing to do... */
- if (!blocks && fake)
+ if (!new_block_bytes && fake)
return 0;
iinfo = UDF_I(inode);
@@ -517,13 +519,12 @@ static int udf_do_extend_file(struct inode *inode,
/* Can we merge with the previous extent? */
if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) ==
EXT_NOT_RECORDED_NOT_ALLOCATED) {
- add = ((1 << 30) - sb->s_blocksize -
- (last_ext->extLength & UDF_EXTENT_LENGTH_MASK)) >>
- sb->s_blocksize_bits;
- if (add > blocks)
- add = blocks;
- blocks -= add;
- last_ext->extLength += add << sb->s_blocksize_bits;
+ add = (1 << 30) - sb->s_blocksize -
+ (last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
+ if (add > new_block_bytes)
+ add = new_block_bytes;
+ new_block_bytes -= add;
+ last_ext->extLength += add;
}
if (fake) {
@@ -544,28 +545,27 @@ static int udf_do_extend_file(struct inode *inode,
}
/* Managed to do everything necessary? */
- if (!blocks)
+ if (!new_block_bytes)
goto out;
/* All further extents will be NOT_RECORDED_NOT_ALLOCATED */
last_ext->extLocation.logicalBlockNum = 0;
last_ext->extLocation.partitionReferenceNum = 0;
- add = (1 << (30-sb->s_blocksize_bits)) - 1;
- last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
- (add << sb->s_blocksize_bits);
+ add = (1 << 30) - sb->s_blocksize;
+ last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | add;
/* Create enough extents to cover the whole hole */
- while (blocks > add) {
- blocks -= add;
+ while (new_block_bytes > add) {
+ new_block_bytes -= add;
err = udf_add_aext(inode, last_pos, &last_ext->extLocation,
last_ext->extLength, 1);
if (err)
return err;
count++;
}
- if (blocks) {
+ if (new_block_bytes) {
last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
- (blocks << sb->s_blocksize_bits);
+ new_block_bytes;
err = udf_add_aext(inode, last_pos, &last_ext->extLocation,
last_ext->extLength, 1);
if (err)
@@ -596,6 +596,24 @@ out:
return count;
}
+/* Extend the final block of the file to final_block_len bytes */
+static void udf_do_extend_final_block(struct inode *inode,
+ struct extent_position *last_pos,
+ struct kernel_long_ad *last_ext,
+ uint32_t final_block_len)
+{
+ struct super_block *sb = inode->i_sb;
+ uint32_t added_bytes;
+
+ added_bytes = final_block_len -
+ (last_ext->extLength & (sb->s_blocksize - 1));
+ last_ext->extLength += added_bytes;
+ UDF_I(inode)->i_lenExtents += added_bytes;
+
+ udf_write_aext(inode, last_pos, &last_ext->extLocation,
+ last_ext->extLength, 1);
+}
+
static int udf_extend_file(struct inode *inode, loff_t newsize)
{
@@ -605,10 +623,12 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
int8_t etype;
struct super_block *sb = inode->i_sb;
sector_t first_block = newsize >> sb->s_blocksize_bits, offset;
+ unsigned long partial_final_block;
int adsize;
struct udf_inode_info *iinfo = UDF_I(inode);
struct kernel_long_ad extent;
- int err;
+ int err = 0;
+ int within_final_block;
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
adsize = sizeof(struct short_ad);
@@ -618,18 +638,8 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
BUG();
etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset);
+ within_final_block = (etype != -1);
- /* File has extent covering the new size (could happen when extending
- * inside a block)? */
- if (etype != -1)
- return 0;
- if (newsize & (sb->s_blocksize - 1))
- offset++;
- /* Extended file just to the boundary of the last file block? */
- if (offset == 0)
- return 0;
-
- /* Truncate is extending the file by 'offset' blocks */
if ((!epos.bh && epos.offset == udf_file_entry_alloc_offset(inode)) ||
(epos.bh && epos.offset == sizeof(struct allocExtDesc))) {
/* File has no extents at all or has empty last
@@ -643,7 +653,22 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
&extent.extLength, 0);
extent.extLength |= etype << 30;
}
- err = udf_do_extend_file(inode, &epos, &extent, offset);
+
+ partial_final_block = newsize & (sb->s_blocksize - 1);
+
+ /* File has extent covering the new size (could happen when extending
+ * inside a block)?
+ */
+ if (within_final_block) {
+ /* Extending file within the last file block */
+ udf_do_extend_final_block(inode, &epos, &extent,
+ partial_final_block);
+ } else {
+ loff_t add = ((loff_t)offset << sb->s_blocksize_bits) |
+ partial_final_block;
+ err = udf_do_extend_file(inode, &epos, &extent, add);
+ }
+
if (err < 0)
goto out;
err = 0;
@@ -745,6 +770,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
/* Are we beyond EOF? */
if (etype == -1) {
int ret;
+ loff_t hole_len;
isBeyondEOF = true;
if (count) {
if (c)
@@ -760,7 +786,8 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
startnum = (offset > 0);
}
/* Create extents for the hole between EOF and offset */
- ret = udf_do_extend_file(inode, &prev_epos, laarr, offset);
+ hole_len = (loff_t)offset << inode->i_blkbits;
+ ret = udf_do_extend_file(inode, &prev_epos, laarr, hole_len);
if (ret < 0) {
*err = ret;
newblock = 0;
diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c
index 6afab4fdce90..71ca4d047d65 100644
--- a/fs/unicode/utf8-core.c
+++ b/fs/unicode/utf8-core.c
@@ -73,6 +73,34 @@ int utf8_strncasecmp(const struct unicode_map *um,
}
EXPORT_SYMBOL(utf8_strncasecmp);
+/* String cf is expected to be a valid UTF-8 casefolded
+ * string.
+ */
+int utf8_strncasecmp_folded(const struct unicode_map *um,
+ const struct qstr *cf,
+ const struct qstr *s1)
+{
+ const struct utf8data *data = utf8nfdicf(um->version);
+ struct utf8cursor cur1;
+ int c1, c2;
+ int i = 0;
+
+ if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
+ return -EINVAL;
+
+ do {
+ c1 = utf8byte(&cur1);
+ c2 = cf->name[i++];
+ if (c1 < 0)
+ return -EINVAL;
+ if (c1 != c2)
+ return 1;
+ } while (c1);
+
+ return 0;
+}
+EXPORT_SYMBOL(utf8_strncasecmp_folded);
+
int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen)
{
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 76748255f843..916a35cae5e9 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -367,20 +367,7 @@ restart:
* lock above. Eventually we should look into a way to avoid
* the pointless lock roundtrip.
*/
- if (likely(!(file->f_mode & FMODE_NOCMTIME))) {
- error = file_update_time(file);
- if (error)
- return error;
- }
-
- /*
- * If we're writing the file then make sure to clear the setuid and
- * setgid bits if the process is not being run by root. This keeps
- * people from modifying setuid and setgid binaries.
- */
- if (!IS_NOSEC(inode))
- return file_remove_privs(file);
- return 0;
+ return file_modified(file);
}
static int
diff --git a/arch/arm/include/asm/flat.h b/include/asm-generic/flat.h
index f0c75ddeea23..1928a3596938 100644
--- a/arch/arm/include/asm/flat.h
+++ b/include/asm-generic/flat.h
@@ -1,19 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/arm/include/asm/flat.h -- uClinux flat-format executables
- */
-
-#ifndef __ARM_FLAT_H__
-#define __ARM_FLAT_H__
+#ifndef _ASM_GENERIC_FLAT_H
+#define _ASM_GENERIC_FLAT_H
#include <linux/uaccess.h>
-#define flat_argvp_envp_on_stack() 1
-#define flat_old_ram_flag(flags) (flags)
-#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
-
static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
- u32 *addr, u32 *persistent)
+ u32 *addr)
{
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
return copy_from_user(addr, rp, 4) ? -EFAULT : 0;
@@ -31,7 +23,4 @@ static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
#endif
}
-#define flat_get_relocate_addr(rel) (rel)
-#define flat_set_persistent(relval, p) 0
-
-#endif /* __ARM_FLAT_H__ */
+#endif /* _ASM_GENERIC_FLAT_H */
diff --git a/include/linux/flat.h b/include/linux/flat.h
index 569b67d64d5c..83977c0ce3de 100644
--- a/include/linux/flat.h
+++ b/include/linux/flat.h
@@ -10,8 +10,41 @@
#ifndef _LINUX_FLAT_H
#define _LINUX_FLAT_H
-#include <uapi/linux/flat.h>
-#include <asm/flat.h>
+#define FLAT_VERSION 0x00000004L
+
+/*
+ * To make everything easier to port and manage cross platform
+ * development, all fields are in network byte order.
+ */
+
+struct flat_hdr {
+ char magic[4];
+ __be32 rev; /* version (as above) */
+ __be32 entry; /* Offset of first executable instruction
+ with text segment from beginning of file */
+ __be32 data_start; /* Offset of data segment from beginning of
+ file */
+ __be32 data_end; /* Offset of end of data segment from beginning
+ of file */
+ __be32 bss_end; /* Offset of end of bss segment from beginning
+ of file */
+
+ /* (It is assumed that data_end through bss_end forms the bss segment.) */
+
+ __be32 stack_size; /* Size of stack, in bytes */
+ __be32 reloc_start; /* Offset of relocation records from beginning of
+ file */
+ __be32 reloc_count; /* Number of relocation records */
+ __be32 flags;
+ __be32 build_date; /* When the program/library was built */
+ __u32 filler[5]; /* Reservered, set to zero */
+};
+
+#define FLAT_FLAG_RAM 0x0001 /* load program entirely into RAM */
+#define FLAT_FLAG_GOTPIC 0x0002 /* program is PIC with GOT */
+#define FLAT_FLAG_GZIP 0x0004 /* all but the header is compressed */
+#define FLAT_FLAG_GZDATA 0x0008 /* only data/relocs are compressed (for XIP) */
+#define FLAT_FLAG_KTRACE 0x0010 /* output useful kernel trace for debugging */
/*
* While it would be nice to keep this header clean, users of older
@@ -22,28 +55,21 @@
* with the format above, except to fix bugs with old format support.
*/
-#include <asm/byteorder.h>
-
#define OLD_FLAT_VERSION 0x00000002L
#define OLD_FLAT_RELOC_TYPE_TEXT 0
#define OLD_FLAT_RELOC_TYPE_DATA 1
#define OLD_FLAT_RELOC_TYPE_BSS 2
typedef union {
- unsigned long value;
+ u32 value;
struct {
-# if defined(mc68000) && !defined(CONFIG_COLDFIRE)
- signed long offset : 30;
- unsigned long type : 2;
-# define OLD_FLAT_FLAG_RAM 0x1 /* load program entirely into RAM */
+#if defined(__LITTLE_ENDIAN_BITFIELD) || \
+ (defined(mc68000) && !defined(CONFIG_COLDFIRE))
+ s32 offset : 30;
+ u32 type : 2;
# elif defined(__BIG_ENDIAN_BITFIELD)
- unsigned long type : 2;
- signed long offset : 30;
-# define OLD_FLAT_FLAG_RAM 0x1 /* load program entirely into RAM */
-# elif defined(__LITTLE_ENDIAN_BITFIELD)
- signed long offset : 30;
- unsigned long type : 2;
-# define OLD_FLAT_FLAG_RAM 0x1 /* load program entirely into RAM */
+ u32 type : 2;
+ s32 offset : 30;
# else
# error "Unknown bitfield order for flat files."
# endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c564cf3f48d9..9193f5f6b09d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -694,7 +694,7 @@ struct inode {
atomic_t i_count;
atomic_t i_dio_count;
atomic_t i_writecount;
-#ifdef CONFIG_IMA
+#if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
atomic_t i_readcount; /* struct files open RO */
#endif
union {
@@ -1019,8 +1019,6 @@ struct file_lock_operations {
};
struct lock_manager_operations {
- int (*lm_compare_owner)(struct file_lock *, struct file_lock *);
- unsigned long (*lm_owner_key)(struct file_lock *);
fl_owner_t (*lm_get_owner)(fl_owner_t);
void (*lm_put_owner)(fl_owner_t);
void (*lm_notify)(struct file_lock *); /* unblock callback */
@@ -1889,6 +1887,9 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
unsigned long, loff_t *, rwf_t);
extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
loff_t, size_t, unsigned int);
+extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags);
extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t *count,
@@ -2174,6 +2175,8 @@ static inline void file_accessed(struct file *file)
touch_atime(&file->f_path);
}
+extern int file_modified(struct file *file);
+
int sync_inode(struct inode *inode, struct writeback_control *wbc);
int sync_inode_metadata(struct inode *inode, int wait);
@@ -2184,6 +2187,7 @@ struct file_system_type {
#define FS_BINARY_MOUNTDATA 2
#define FS_HAS_SUBTYPE 4
#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
+#define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
int (*init_fs_context)(struct fs_context *);
const struct fs_parameter_description *parameters;
@@ -2712,6 +2716,8 @@ extern int filemap_flush(struct address_space *);
extern int filemap_fdatawait_keep_errors(struct address_space *mapping);
extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
loff_t lend);
+extern int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
+ loff_t start_byte, loff_t end_byte);
static inline int filemap_fdatawait(struct address_space *mapping)
{
@@ -2890,7 +2896,7 @@ static inline bool inode_is_open_for_write(const struct inode *inode)
return atomic_read(&inode->i_writecount) > 0;
}
-#ifdef CONFIG_IMA
+#if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
static inline void i_readcount_dec(struct inode *inode)
{
BUG_ON(!atomic_read(&inode->i_readcount));
@@ -3046,6 +3052,10 @@ extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
extern int generic_remap_checks(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t *count, unsigned int remap_flags);
+extern int generic_file_rw_checks(struct file *file_in, struct file *file_out);
+extern int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t *count, unsigned int flags);
extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index f7680ef1abd2..bd8f207a2fb6 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -63,16 +63,13 @@ struct fscrypt_operations {
unsigned int max_namelen;
};
+/* Decryption work */
struct fscrypt_ctx {
union {
struct {
- struct page *bounce_page; /* Ciphertext page */
- struct page *control_page; /* Original page */
- } w;
- struct {
struct bio *bio;
struct work_struct work;
- } r;
+ };
struct list_head free_list; /* Free list */
};
u8 flags; /* Flags */
@@ -106,18 +103,33 @@ static inline void fscrypt_handle_d_move(struct dentry *dentry)
extern void fscrypt_enqueue_decrypt_work(struct work_struct *);
extern struct fscrypt_ctx *fscrypt_get_ctx(gfp_t);
extern void fscrypt_release_ctx(struct fscrypt_ctx *);
-extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *,
- unsigned int, unsigned int,
- u64, gfp_t);
-extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int,
- unsigned int, u64);
-static inline struct page *fscrypt_control_page(struct page *page)
+extern struct page *fscrypt_encrypt_pagecache_blocks(struct page *page,
+ unsigned int len,
+ unsigned int offs,
+ gfp_t gfp_flags);
+extern int fscrypt_encrypt_block_inplace(const struct inode *inode,
+ struct page *page, unsigned int len,
+ unsigned int offs, u64 lblk_num,
+ gfp_t gfp_flags);
+
+extern int fscrypt_decrypt_pagecache_blocks(struct page *page, unsigned int len,
+ unsigned int offs);
+extern int fscrypt_decrypt_block_inplace(const struct inode *inode,
+ struct page *page, unsigned int len,
+ unsigned int offs, u64 lblk_num);
+
+static inline bool fscrypt_is_bounce_page(struct page *page)
+{
+ return page->mapping == NULL;
+}
+
+static inline struct page *fscrypt_pagecache_page(struct page *bounce_page)
{
- return ((struct fscrypt_ctx *)page_private(page))->w.control_page;
+ return (struct page *)page_private(bounce_page);
}
-extern void fscrypt_restore_control_page(struct page *);
+extern void fscrypt_free_bounce_page(struct page *bounce_page);
/* policy.c */
extern int fscrypt_ioctl_set_policy(struct file *, const void __user *);
@@ -223,7 +235,6 @@ static inline bool fscrypt_match_name(const struct fscrypt_name *fname,
extern void fscrypt_decrypt_bio(struct bio *);
extern void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx,
struct bio *bio);
-extern void fscrypt_pullback_bio_page(struct page **, bool);
extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t,
unsigned int);
@@ -283,32 +294,51 @@ static inline void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
return;
}
-static inline struct page *fscrypt_encrypt_page(const struct inode *inode,
+static inline struct page *fscrypt_encrypt_pagecache_blocks(struct page *page,
+ unsigned int len,
+ unsigned int offs,
+ gfp_t gfp_flags)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline int fscrypt_encrypt_block_inplace(const struct inode *inode,
struct page *page,
unsigned int len,
- unsigned int offs,
- u64 lblk_num, gfp_t gfp_flags)
+ unsigned int offs, u64 lblk_num,
+ gfp_t gfp_flags)
{
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
+}
+
+static inline int fscrypt_decrypt_pagecache_blocks(struct page *page,
+ unsigned int len,
+ unsigned int offs)
+{
+ return -EOPNOTSUPP;
}
-static inline int fscrypt_decrypt_page(const struct inode *inode,
- struct page *page,
- unsigned int len, unsigned int offs,
- u64 lblk_num)
+static inline int fscrypt_decrypt_block_inplace(const struct inode *inode,
+ struct page *page,
+ unsigned int len,
+ unsigned int offs, u64 lblk_num)
{
return -EOPNOTSUPP;
}
-static inline struct page *fscrypt_control_page(struct page *page)
+static inline bool fscrypt_is_bounce_page(struct page *page)
+{
+ return false;
+}
+
+static inline struct page *fscrypt_pagecache_page(struct page *bounce_page)
{
WARN_ON_ONCE(1);
return ERR_PTR(-EINVAL);
}
-static inline void fscrypt_restore_control_page(struct page *page)
+static inline void fscrypt_free_bounce_page(struct page *bounce_page)
{
- return;
}
/* policy.c */
@@ -410,11 +440,6 @@ static inline void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx,
{
}
-static inline void fscrypt_pullback_bio_page(struct page **page, bool restore)
-{
- return;
-}
-
static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
sector_t pblk, unsigned int len)
{
@@ -692,4 +717,15 @@ static inline int fscrypt_encrypt_symlink(struct inode *inode,
return 0;
}
+/* If *pagep is a bounce page, free it and set *pagep to the pagecache page */
+static inline void fscrypt_finalize_bounce_page(struct page **pagep)
+{
+ struct page *page = *pagep;
+
+ if (fscrypt_is_bounce_page(page)) {
+ *pagep = fscrypt_pagecache_page(page);
+ fscrypt_free_bounce_page(page);
+ }
+}
+
#endif /* _LINUX_FSCRYPT_H */
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 94972e8eb6d1..a2d5d175d3c1 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -189,6 +189,19 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct
}
/*
+ * fsnotify_unlink - 'name' was unlinked
+ *
+ * Caller must make sure that dentry->d_name is stable.
+ */
+static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry)
+{
+ /* Expected to be called before d_delete() */
+ WARN_ON_ONCE(d_is_negative(dentry));
+
+ fsnotify_dirent(dir, dentry, FS_DELETE);
+}
+
+/*
* fsnotify_mkdir - directory 'name' was created
*/
static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
@@ -199,6 +212,19 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
}
/*
+ * fsnotify_rmdir - directory 'name' was removed
+ *
+ * Caller must make sure that dentry->d_name is stable.
+ */
+static inline void fsnotify_rmdir(struct inode *dir, struct dentry *dentry)
+{
+ /* Expected to be called before d_delete() */
+ WARN_ON_ONCE(d_is_negative(dentry));
+
+ fsnotify_dirent(dir, dentry, FS_DELETE | FS_ISDIR);
+}
+
+/*
* fsnotify_access - file was read
*/
static inline void fsnotify_access(struct file *file)
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index d4844cad2c2b..2de3b2ddd19a 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -357,7 +357,6 @@ extern int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u
extern void __fsnotify_inode_delete(struct inode *inode);
extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt);
extern void fsnotify_sb_delete(struct super_block *sb);
-extern void fsnotify_nameremove(struct dentry *dentry, int isdir);
extern u32 fsnotify_get_cookie(void);
static inline int fsnotify_inode_watches_children(struct inode *inode)
@@ -527,9 +526,6 @@ static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
static inline void fsnotify_sb_delete(struct super_block *sb)
{}
-static inline void fsnotify_nameremove(struct dentry *dentry, int isdir)
-{}
-
static inline void fsnotify_update_flags(struct dentry *dentry)
{}
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 2103b94cb1bf..1df9ea187a9a 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -35,6 +35,7 @@ struct vm_fault;
#define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */
#define IOMAP_F_DIRTY 0x02 /* uncommitted metadata */
#define IOMAP_F_BUFFER_HEAD 0x04 /* file system requires buffer heads */
+#define IOMAP_F_SIZE_CHANGED 0x08 /* file size has changed */
/*
* Flags that only need to be reported for IOMAP_REPORT requests:
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 5c04181b7c6d..df03825ad1a1 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -451,6 +451,22 @@ struct jbd2_inode {
* @i_flags: Flags of inode [j_list_lock]
*/
unsigned long i_flags;
+
+ /**
+ * @i_dirty_start:
+ *
+ * Offset in bytes where the dirty range for this inode starts.
+ * [j_list_lock]
+ */
+ loff_t i_dirty_start;
+
+ /**
+ * @i_dirty_end:
+ *
+ * Inclusive offset in bytes where the dirty range for this inode
+ * ends. [j_list_lock]
+ */
+ loff_t i_dirty_end;
};
struct jbd2_revoke_table_s;
@@ -1357,7 +1373,6 @@ void jbd2_journal_set_triggers(struct buffer_head *,
struct jbd2_buffer_trigger_type *type);
extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
extern int jbd2_journal_forget (handle_t *, struct buffer_head *);
-extern void journal_sync_buffer (struct buffer_head *);
extern int jbd2_journal_invalidatepage(journal_t *,
struct page *, unsigned int, unsigned int);
extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
@@ -1397,6 +1412,12 @@ extern int jbd2_journal_force_commit(journal_t *);
extern int jbd2_journal_force_commit_nested(journal_t *);
extern int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *inode);
extern int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *inode);
+extern int jbd2_journal_inode_ranged_write(handle_t *handle,
+ struct jbd2_inode *inode, loff_t start_byte,
+ loff_t length);
+extern int jbd2_journal_inode_ranged_wait(handle_t *handle,
+ struct jbd2_inode *inode, loff_t start_byte,
+ loff_t length);
extern int jbd2_journal_begin_ordered_truncate(journal_t *journal,
struct jbd2_inode *inode, loff_t new_size);
extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
diff --git a/include/linux/key.h b/include/linux/key.h
index 6fef6684501f..91f391cd272e 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -27,15 +27,50 @@
/* key handle serial number */
typedef int32_t key_serial_t;
+/* key handle permissions mask */
+typedef uint32_t key_perm_t;
+
struct key;
struct net;
#ifdef CONFIG_KEYS
-#include <linux/keyctl.h>
-
#undef KEY_DEBUGGING
+#define KEY_POS_VIEW 0x01000000 /* possessor can view a key's attributes */
+#define KEY_POS_READ 0x02000000 /* possessor can read key payload / view keyring */
+#define KEY_POS_WRITE 0x04000000 /* possessor can update key payload / add link to keyring */
+#define KEY_POS_SEARCH 0x08000000 /* possessor can find a key in search / search a keyring */
+#define KEY_POS_LINK 0x10000000 /* possessor can create a link to a key/keyring */
+#define KEY_POS_SETATTR 0x20000000 /* possessor can set key attributes */
+#define KEY_POS_ALL 0x3f000000
+
+#define KEY_USR_VIEW 0x00010000 /* user permissions... */
+#define KEY_USR_READ 0x00020000
+#define KEY_USR_WRITE 0x00040000
+#define KEY_USR_SEARCH 0x00080000
+#define KEY_USR_LINK 0x00100000
+#define KEY_USR_SETATTR 0x00200000
+#define KEY_USR_ALL 0x003f0000
+
+#define KEY_GRP_VIEW 0x00000100 /* group permissions... */
+#define KEY_GRP_READ 0x00000200
+#define KEY_GRP_WRITE 0x00000400
+#define KEY_GRP_SEARCH 0x00000800
+#define KEY_GRP_LINK 0x00001000
+#define KEY_GRP_SETATTR 0x00002000
+#define KEY_GRP_ALL 0x00003f00
+
+#define KEY_OTH_VIEW 0x00000001 /* third party permissions... */
+#define KEY_OTH_READ 0x00000002
+#define KEY_OTH_WRITE 0x00000004
+#define KEY_OTH_SEARCH 0x00000008
+#define KEY_OTH_LINK 0x00000010
+#define KEY_OTH_SETATTR 0x00000020
+#define KEY_OTH_ALL 0x0000003f
+
+#define KEY_PERM_UNDEF 0xffffffff
+
struct seq_file;
struct user_struct;
struct signal_struct;
@@ -78,36 +113,6 @@ union key_payload {
void *data[4];
};
-struct key_ace {
- unsigned int type;
- unsigned int perm;
- union {
- kuid_t uid;
- kgid_t gid;
- unsigned int subject_id;
- };
-};
-
-struct key_acl {
- refcount_t usage;
- unsigned short nr_ace;
- bool possessor_viewable;
- struct rcu_head rcu;
- struct key_ace aces[];
-};
-
-#define KEY_POSSESSOR_ACE(perms) { \
- .type = KEY_ACE_SUBJ_STANDARD, \
- .perm = perms, \
- .subject_id = KEY_ACE_POSSESSOR \
- }
-
-#define KEY_OWNER_ACE(perms) { \
- .type = KEY_ACE_SUBJ_STANDARD, \
- .perm = perms, \
- .subject_id = KEY_ACE_OWNER \
- }
-
/*****************************************************************************/
/*
* key reference with possession attribute handling
@@ -174,7 +179,6 @@ struct key {
struct rw_semaphore sem; /* change vs change sem */
struct key_user *user; /* owner of this key */
void *security; /* security data for this key */
- struct key_acl __rcu *acl;
union {
time64_t expiry; /* time at which key expires (or 0) */
time64_t revoked_at; /* time at which key was revoked */
@@ -182,6 +186,7 @@ struct key {
time64_t last_used_at; /* last time used for LRU keyring discard */
kuid_t uid;
kgid_t gid;
+ key_perm_t perm; /* access permissions */
unsigned short quotalen; /* length added to quota */
unsigned short datalen; /* payload data length
* - may not match RCU dereferenced payload
@@ -205,7 +210,6 @@ struct key {
#define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */
#define KEY_FLAG_KEEP 8 /* set if key should not be removed */
#define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */
-#define KEY_FLAG_HAS_ACL 10 /* Set if KEYCTL_SETACL called on key */
/* the key type and key description string
* - the desc is used to match a key against search criteria
@@ -254,7 +258,7 @@ extern struct key *key_alloc(struct key_type *type,
const char *desc,
kuid_t uid, kgid_t gid,
const struct cred *cred,
- struct key_acl *acl,
+ key_perm_t perm,
unsigned long flags,
struct key_restriction *restrict_link);
@@ -291,8 +295,7 @@ static inline void key_ref_put(key_ref_t key_ref)
extern struct key *request_key_tag(struct key_type *type,
const char *description,
struct key_tag *domain_tag,
- const char *callout_info,
- struct key_acl *acl);
+ const char *callout_info);
extern struct key *request_key_rcu(struct key_type *type,
const char *description,
@@ -303,24 +306,21 @@ extern struct key *request_key_with_auxdata(struct key_type *type,
struct key_tag *domain_tag,
const void *callout_info,
size_t callout_len,
- void *aux,
- struct key_acl *acl);
+ void *aux);
/**
* request_key - Request a key and wait for construction
* @type: Type of key.
* @description: The searchable description of the key.
* @callout_info: The data to pass to the instantiation upcall (or NULL).
- * @acl: The ACL to attach to a new key (or NULL).
*
* As for request_key_tag(), but with the default global domain tag.
*/
static inline struct key *request_key(struct key_type *type,
const char *description,
- const char *callout_info,
- struct key_acl *acl)
+ const char *callout_info)
{
- return request_key_tag(type, description, NULL, callout_info, acl);
+ return request_key_tag(type, description, NULL, callout_info);
}
#ifdef CONFIG_NET
@@ -330,7 +330,6 @@ static inline struct key *request_key(struct key_type *type,
* @description: The searchable description of the key.
* @net: The network namespace that is the key's domain of operation.
* @callout_info: The data to pass to the instantiation upcall (or NULL).
- * @acl: The ACL to attach to a new key (or NULL).
*
* As for request_key() except that it does not add the returned key to a
* keyring if found, new keys are always allocated in the user's quota, the
@@ -340,8 +339,8 @@ static inline struct key *request_key(struct key_type *type,
* Furthermore, it then works as wait_for_key_construction() to wait for the
* completion of keys undergoing construction with a non-interruptible wait.
*/
-#define request_key_net(type, description, net, callout_info, acl) \
- request_key_tag(type, description, net->key_domain, callout_info, acl);
+#define request_key_net(type, description, net, callout_info) \
+ request_key_tag(type, description, net->key_domain, callout_info);
#endif /* CONFIG_NET */
extern int wait_for_key_construction(struct key *key, bool intr);
@@ -353,7 +352,7 @@ extern key_ref_t key_create_or_update(key_ref_t keyring,
const char *description,
const void *payload,
size_t plen,
- struct key_acl *acl,
+ key_perm_t perm,
unsigned long flags);
extern int key_update(key_ref_t key,
@@ -373,7 +372,7 @@ extern int key_unlink(struct key *keyring,
extern struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid,
const struct cred *cred,
- struct key_acl *acl,
+ key_perm_t perm,
unsigned long flags,
struct key_restriction *restrict_link,
struct key *dest);
@@ -406,29 +405,19 @@ static inline key_serial_t key_serial(const struct key *key)
extern void key_set_timeout(struct key *, unsigned);
extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags,
- u32 desired_perm);
+ key_perm_t perm);
extern void key_free_user_ns(struct user_namespace *);
/*
* The permissions required on a key that we're looking up.
*/
-#define KEY_NEED_VIEW 0x001 /* Require permission to view attributes */
-#define KEY_NEED_READ 0x002 /* Require permission to read content */
-#define KEY_NEED_WRITE 0x004 /* Require permission to update / modify */
-#define KEY_NEED_SEARCH 0x008 /* Require permission to search (keyring) or find (key) */
-#define KEY_NEED_LINK 0x010 /* Require permission to link */
-#define KEY_NEED_SETSEC 0x020 /* Require permission to set owner, group, ACL */
-#define KEY_NEED_INVAL 0x040 /* Require permission to invalidate key */
-#define KEY_NEED_REVOKE 0x080 /* Require permission to revoke key */
-#define KEY_NEED_JOIN 0x100 /* Require permission to join keyring as session */
-#define KEY_NEED_CLEAR 0x200 /* Require permission to clear a keyring */
-#define KEY_NEED_ALL 0x3ff
-
-#define OLD_KEY_NEED_SETATTR 0x20 /* Used to be Require permission to change attributes */
-
-extern struct key_acl internal_key_acl;
-extern struct key_acl internal_keyring_acl;
-extern struct key_acl internal_writable_keyring_acl;
+#define KEY_NEED_VIEW 0x01 /* Require permission to view attributes */
+#define KEY_NEED_READ 0x02 /* Require permission to read content */
+#define KEY_NEED_WRITE 0x04 /* Require permission to update / modify */
+#define KEY_NEED_SEARCH 0x08 /* Require permission to search (keyring) or find (key) */
+#define KEY_NEED_LINK 0x10 /* Require permission to link */
+#define KEY_NEED_SETATTR 0x20 /* Require permission to change attributes */
+#define KEY_NEED_ALL 0x3f /* All the above permissions */
static inline short key_read_state(const struct key *key)
{
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index c9b422dde542..d294dde9e546 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -282,6 +282,7 @@ void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
nlm_host_match_fn_t match);
void nlmsvc_grant_reply(struct nlm_cookie *, __be32);
void nlmsvc_release_call(struct nlm_rqst *);
+void nlmsvc_locks_init_private(struct file_lock *, struct nlm_host *, pid_t);
/*
* File handling for the server personality
@@ -289,6 +290,7 @@ void nlmsvc_release_call(struct nlm_rqst *);
__be32 nlm_lookup_file(struct svc_rqst *, struct nlm_file **,
struct nfs_fh *);
void nlm_release_file(struct nlm_file *);
+void nlmsvc_release_lockowner(struct nlm_lock *);
void nlmsvc_mark_resources(struct net *);
void nlmsvc_free_host_resources(struct nlm_host *);
void nlmsvc_invalidate_all(void);
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 3c8ef5a199ca..1484db6ca8d1 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -3,6 +3,7 @@
#define _LINUX_PID_H
#include <linux/rculist.h>
+#include <linux/wait.h>
enum pid_type
{
@@ -60,6 +61,8 @@ struct pid
unsigned int level;
/* lists of tasks that use this pid */
struct hlist_head tasks[PIDTYPE_MAX];
+ /* wait queue for pidfd notifications */
+ wait_queue_head_t wait_pidfd;
struct rcu_head rcu;
struct upid numbers[1];
};
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index a121982af0f5..5998e1f4ff06 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -127,6 +127,7 @@ void seq_put_hex_ll(struct seq_file *m, const char *delimiter,
unsigned long long v, unsigned int width);
void seq_escape(struct seq_file *m, const char *s, const char *esc);
+void seq_escape_mem_ascii(struct seq_file *m, const char *src, size_t isz);
void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type,
int rowsize, int groupsize, const void *buf, size_t len,
diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index d23c5030901a..c28955132234 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -54,6 +54,9 @@ static inline int string_unescape_any_inplace(char *buf)
int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
unsigned int flags, const char *only);
+int string_escape_mem_ascii(const char *src, size_t isz, char *dst,
+ size_t osz);
+
static inline int string_escape_mem_any_np(const char *src, size_t isz,
char *dst, size_t osz, const char *only)
{
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 9ee3970ba59c..8a87d8bcb197 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -164,6 +164,13 @@ xdr_decode_opaque_fixed(__be32 *p, void *ptr, unsigned int len)
return p + XDR_QUADLEN(len);
}
+static inline void xdr_netobj_dup(struct xdr_netobj *dst,
+ struct xdr_netobj *src, gfp_t gfp_mask)
+{
+ dst->data = kmemdup(src->data, src->len, gfp_mask);
+ dst->len = src->len;
+}
+
/*
* Adjust kvec to reflect end of xdr'ed data (RPC client XDR)
*/
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index bc4bbbb9ed9a..699aed6674a0 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -927,6 +927,7 @@ asmlinkage long sys_clock_adjtime32(clockid_t which_clock,
struct old_timex32 __user *tx);
asmlinkage long sys_syncfs(int fd);
asmlinkage long sys_setns(int fd, int nstype);
+asmlinkage long sys_pidfd_open(pid_t pid, unsigned int flags);
asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg,
unsigned int vlen, unsigned flags);
asmlinkage long sys_process_vm_readv(pid_t pid,
diff --git a/include/linux/unicode.h b/include/linux/unicode.h
index aec2c6d800aa..990aa97d8049 100644
--- a/include/linux/unicode.h
+++ b/include/linux/unicode.h
@@ -17,6 +17,9 @@ int utf8_strncmp(const struct unicode_map *um,
int utf8_strncasecmp(const struct unicode_map *um,
const struct qstr *s1, const struct qstr *s2);
+int utf8_strncasecmp_folded(const struct unicode_map *um,
+ const struct qstr *cf,
+ const struct qstr *s1);
int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen);
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 51b1e0da2efc..d5ec4fac82ae 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -27,6 +27,26 @@ enum afs_call_trace {
afs_call_trace_work,
};
+enum afs_server_trace {
+ afs_server_trace_alloc,
+ afs_server_trace_callback,
+ afs_server_trace_destroy,
+ afs_server_trace_free,
+ afs_server_trace_gc,
+ afs_server_trace_get_by_uuid,
+ afs_server_trace_get_caps,
+ afs_server_trace_get_install,
+ afs_server_trace_get_new_cbi,
+ afs_server_trace_give_up_cb,
+ afs_server_trace_put_call,
+ afs_server_trace_put_cbi,
+ afs_server_trace_put_find_rsq,
+ afs_server_trace_put_slist,
+ afs_server_trace_put_slist_isort,
+ afs_server_trace_put_uuid_rsq,
+ afs_server_trace_update,
+};
+
enum afs_fs_operation {
afs_FS_FetchData = 130, /* AFS Fetch file data */
afs_FS_FetchACL = 131, /* AFS Fetch file ACL */
@@ -191,6 +211,17 @@ enum afs_flock_operation {
afs_flock_op_wake,
};
+enum afs_cb_break_reason {
+ afs_cb_break_no_break,
+ afs_cb_break_for_callback,
+ afs_cb_break_for_deleted,
+ afs_cb_break_for_lapsed,
+ afs_cb_break_for_unlink,
+ afs_cb_break_for_vsbreak,
+ afs_cb_break_for_volume_callback,
+ afs_cb_break_for_zap,
+};
+
#endif /* end __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY */
/*
@@ -204,6 +235,25 @@ enum afs_flock_operation {
EM(afs_call_trace_wake, "WAKE ") \
E_(afs_call_trace_work, "WORK ")
+#define afs_server_traces \
+ EM(afs_server_trace_alloc, "ALLOC ") \
+ EM(afs_server_trace_callback, "CALLBACK ") \
+ EM(afs_server_trace_destroy, "DESTROY ") \
+ EM(afs_server_trace_free, "FREE ") \
+ EM(afs_server_trace_gc, "GC ") \
+ EM(afs_server_trace_get_by_uuid, "GET uuid ") \
+ EM(afs_server_trace_get_caps, "GET caps ") \
+ EM(afs_server_trace_get_install, "GET inst ") \
+ EM(afs_server_trace_get_new_cbi, "GET cbi ") \
+ EM(afs_server_trace_give_up_cb, "giveup-cb") \
+ EM(afs_server_trace_put_call, "PUT call ") \
+ EM(afs_server_trace_put_cbi, "PUT cbi ") \
+ EM(afs_server_trace_put_find_rsq, "PUT f-rsq") \
+ EM(afs_server_trace_put_slist, "PUT slist") \
+ EM(afs_server_trace_put_slist_isort, "PUT isort") \
+ EM(afs_server_trace_put_uuid_rsq, "PUT u-req") \
+ E_(afs_server_trace_update, "UPDATE")
+
#define afs_fs_operations \
EM(afs_FS_FetchData, "FS.FetchData") \
EM(afs_FS_FetchStatus, "FS.FetchStatus") \
@@ -370,6 +420,16 @@ enum afs_flock_operation {
EM(afs_flock_op_unlock, "UNLOCK ") \
E_(afs_flock_op_wake, "WAKE ")
+#define afs_cb_break_reasons \
+ EM(afs_cb_break_no_break, "no-break") \
+ EM(afs_cb_break_for_callback, "break-cb") \
+ EM(afs_cb_break_for_deleted, "break-del") \
+ EM(afs_cb_break_for_lapsed, "break-lapsed") \
+ EM(afs_cb_break_for_unlink, "break-unlink") \
+ EM(afs_cb_break_for_vsbreak, "break-vs") \
+ EM(afs_cb_break_for_volume_callback, "break-v-cb") \
+ E_(afs_cb_break_for_zap, "break-zap")
+
/*
* Export enum symbols via userspace.
*/
@@ -379,6 +439,7 @@ enum afs_flock_operation {
#define E_(a, b) TRACE_DEFINE_ENUM(a);
afs_call_traces;
+afs_server_traces;
afs_fs_operations;
afs_vl_operations;
afs_edit_dir_ops;
@@ -388,6 +449,7 @@ afs_io_errors;
afs_file_errors;
afs_flock_types;
afs_flock_operations;
+afs_cb_break_reasons;
/*
* Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -1167,6 +1229,76 @@ TRACE_EVENT(afs_get_tree,
__entry->cell, __entry->volume, __entry->vid)
);
+TRACE_EVENT(afs_cb_break,
+ TP_PROTO(struct afs_fid *fid, unsigned int cb_break,
+ enum afs_cb_break_reason reason, bool skipped),
+
+ TP_ARGS(fid, cb_break, reason, skipped),
+
+ TP_STRUCT__entry(
+ __field_struct(struct afs_fid, fid )
+ __field(unsigned int, cb_break )
+ __field(enum afs_cb_break_reason, reason )
+ __field(bool, skipped )
+ ),
+
+ TP_fast_assign(
+ __entry->fid = *fid;
+ __entry->cb_break = cb_break;
+ __entry->reason = reason;
+ __entry->skipped = skipped;
+ ),
+
+ TP_printk("%llx:%llx:%x b=%x s=%u %s",
+ __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique,
+ __entry->cb_break,
+ __entry->skipped,
+ __print_symbolic(__entry->reason, afs_cb_break_reasons))
+ );
+
+TRACE_EVENT(afs_cb_miss,
+ TP_PROTO(struct afs_fid *fid, enum afs_cb_break_reason reason),
+
+ TP_ARGS(fid, reason),
+
+ TP_STRUCT__entry(
+ __field_struct(struct afs_fid, fid )
+ __field(enum afs_cb_break_reason, reason )
+ ),
+
+ TP_fast_assign(
+ __entry->fid = *fid;
+ __entry->reason = reason;
+ ),
+
+ TP_printk(" %llx:%llx:%x %s",
+ __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique,
+ __print_symbolic(__entry->reason, afs_cb_break_reasons))
+ );
+
+TRACE_EVENT(afs_server,
+ TP_PROTO(struct afs_server *server, int usage, enum afs_server_trace reason),
+
+ TP_ARGS(server, usage, reason),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, server )
+ __field(int, usage )
+ __field(int, reason )
+ ),
+
+ TP_fast_assign(
+ __entry->server = server->debug_id;
+ __entry->usage = usage;
+ __entry->reason = reason;
+ ),
+
+ TP_printk("s=%08x %s u=%d",
+ __entry->server,
+ __print_symbolic(__entry->reason, afs_server_traces),
+ __entry->usage)
+ );
+
#endif /* _TRACE_AFS_H */
/* This part must be outside protection */
diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h
index fad7befa612d..4b735923f2ff 100644
--- a/include/trace/events/filelock.h
+++ b/include/trace/events/filelock.h
@@ -203,6 +203,41 @@ TRACE_EVENT(generic_add_lease,
show_fl_type(__entry->fl_type))
);
+TRACE_EVENT(leases_conflict,
+ TP_PROTO(bool conflict, struct file_lock *lease, struct file_lock *breaker),
+
+ TP_ARGS(conflict, lease, breaker),
+
+ TP_STRUCT__entry(
+ __field(void *, lease)
+ __field(void *, breaker)
+ __field(unsigned int, l_fl_flags)
+ __field(unsigned int, b_fl_flags)
+ __field(unsigned char, l_fl_type)
+ __field(unsigned char, b_fl_type)
+ __field(bool, conflict)
+ ),
+
+ TP_fast_assign(
+ __entry->lease = lease;
+ __entry->l_fl_flags = lease->fl_flags;
+ __entry->l_fl_type = lease->fl_type;
+ __entry->breaker = breaker;
+ __entry->b_fl_flags = breaker->fl_flags;
+ __entry->b_fl_type = breaker->fl_type;
+ __entry->conflict = conflict;
+ ),
+
+ TP_printk("conflict %d: lease=0x%p fl_flags=%s fl_type=%s; breaker=0x%p fl_flags=%s fl_type=%s",
+ __entry->conflict,
+ __entry->lease,
+ show_fl_flags(__entry->l_fl_flags),
+ show_fl_type(__entry->l_fl_type),
+ __entry->breaker,
+ show_fl_flags(__entry->b_fl_flags),
+ show_fl_type(__entry->b_fl_type))
+);
+
#endif /* _TRACE_FILELOCK_H */
/* This part must be outside protection */
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index a87904daf103..e5684a4512c0 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -844,9 +844,11 @@ __SYSCALL(__NR_fsconfig, sys_fsconfig)
__SYSCALL(__NR_fsmount, sys_fsmount)
#define __NR_fspick 433
__SYSCALL(__NR_fspick, sys_fspick)
+#define __NR_pidfd_open 434
+__SYSCALL(__NR_pidfd_open, sys_pidfd_open)
#undef __NR_syscalls
-#define __NR_syscalls 434
+#define __NR_syscalls 435
/*
* 32 bit systems traditionally used different
diff --git a/include/uapi/linux/flat.h b/include/uapi/linux/flat.h
deleted file mode 100644
index 27e595e44fb7..000000000000
--- a/include/uapi/linux/flat.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * Copyright (C) 2002-2003 David McCullough <davidm@snapgear.com>
- * Copyright (C) 1998 Kenneth Albanowski <kjahds@kjahds.com>
- * The Silver Hammer Group, Ltd.
- *
- * This file provides the definitions and structures needed to
- * support uClinux flat-format executables.
- */
-
-#ifndef _UAPI_LINUX_FLAT_H
-#define _UAPI_LINUX_FLAT_H
-
-
-#define FLAT_VERSION 0x00000004L
-
-#ifdef CONFIG_BINFMT_SHARED_FLAT
-#define MAX_SHARED_LIBS (4)
-#else
-#define MAX_SHARED_LIBS (1)
-#endif
-
-/*
- * To make everything easier to port and manage cross platform
- * development, all fields are in network byte order.
- */
-
-struct flat_hdr {
- char magic[4];
- unsigned long rev; /* version (as above) */
- unsigned long entry; /* Offset of first executable instruction
- with text segment from beginning of file */
- unsigned long data_start; /* Offset of data segment from beginning of
- file */
- unsigned long data_end; /* Offset of end of data segment
- from beginning of file */
- unsigned long bss_end; /* Offset of end of bss segment from beginning
- of file */
-
- /* (It is assumed that data_end through bss_end forms the bss segment.) */
-
- unsigned long stack_size; /* Size of stack, in bytes */
- unsigned long reloc_start; /* Offset of relocation records from
- beginning of file */
- unsigned long reloc_count; /* Number of relocation records */
- unsigned long flags;
- unsigned long build_date; /* When the program/library was built */
- unsigned long filler[5]; /* Reservered, set to zero */
-};
-
-#define FLAT_FLAG_RAM 0x0001 /* load program entirely into RAM */
-#define FLAT_FLAG_GOTPIC 0x0002 /* program is PIC with GOT */
-#define FLAT_FLAG_GZIP 0x0004 /* all but the header is compressed */
-#define FLAT_FLAG_GZDATA 0x0008 /* only data/relocs are compressed (for XIP) */
-#define FLAT_FLAG_KTRACE 0x0010 /* output useful kernel trace for debugging */
-
-
-
-#endif /* _UAPI_LINUX_FLAT_H */
diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h
index 1f7a4e737214..ed3d5893830d 100644
--- a/include/uapi/linux/keyctl.h
+++ b/include/uapi/linux/keyctl.h
@@ -15,69 +15,6 @@
#include <linux/types.h>
-/*
- * Keyring permission grant definitions
- */
-enum key_ace_subject_type {
- KEY_ACE_SUBJ_STANDARD = 0, /* subject is one of key_ace_standard_subject */
- nr__key_ace_subject_type
-};
-
-enum key_ace_standard_subject {
- KEY_ACE_EVERYONE = 0, /* Everyone, including owner and group */
- KEY_ACE_GROUP = 1, /* The key's group */
- KEY_ACE_OWNER = 2, /* The owner of the key */
- KEY_ACE_POSSESSOR = 3, /* Any process that possesses of the key */
- nr__key_ace_standard_subject
-};
-
-#define KEY_ACE_VIEW 0x00000001 /* Can describe the key */
-#define KEY_ACE_READ 0x00000002 /* Can read the key content */
-#define KEY_ACE_WRITE 0x00000004 /* Can update/modify the key content */
-#define KEY_ACE_SEARCH 0x00000008 /* Can find the key by search */
-#define KEY_ACE_LINK 0x00000010 /* Can make a link to the key */
-#define KEY_ACE_SET_SECURITY 0x00000020 /* Can set owner, group, ACL */
-#define KEY_ACE_INVAL 0x00000040 /* Can invalidate the key */
-#define KEY_ACE_REVOKE 0x00000080 /* Can revoke the key */
-#define KEY_ACE_JOIN 0x00000100 /* Can join keyring */
-#define KEY_ACE_CLEAR 0x00000200 /* Can clear keyring */
-#define KEY_ACE__PERMS 0xffffffff
-
-/*
- * Old-style permissions mask, deprecated in favour of ACL.
- */
-#define KEY_POS_VIEW 0x01000000 /* possessor can view a key's attributes */
-#define KEY_POS_READ 0x02000000 /* possessor can read key payload / view keyring */
-#define KEY_POS_WRITE 0x04000000 /* possessor can update key payload / add link to keyring */
-#define KEY_POS_SEARCH 0x08000000 /* possessor can find a key in search / search a keyring */
-#define KEY_POS_LINK 0x10000000 /* possessor can create a link to a key/keyring */
-#define KEY_POS_SETATTR 0x20000000 /* possessor can set key attributes */
-#define KEY_POS_ALL 0x3f000000
-
-#define KEY_USR_VIEW 0x00010000 /* user permissions... */
-#define KEY_USR_READ 0x00020000
-#define KEY_USR_WRITE 0x00040000
-#define KEY_USR_SEARCH 0x00080000
-#define KEY_USR_LINK 0x00100000
-#define KEY_USR_SETATTR 0x00200000
-#define KEY_USR_ALL 0x003f0000
-
-#define KEY_GRP_VIEW 0x00000100 /* group permissions... */
-#define KEY_GRP_READ 0x00000200
-#define KEY_GRP_WRITE 0x00000400
-#define KEY_GRP_SEARCH 0x00000800
-#define KEY_GRP_LINK 0x00001000
-#define KEY_GRP_SETATTR 0x00002000
-#define KEY_GRP_ALL 0x00003f00
-
-#define KEY_OTH_VIEW 0x00000001 /* third party permissions... */
-#define KEY_OTH_READ 0x00000002
-#define KEY_OTH_WRITE 0x00000004
-#define KEY_OTH_SEARCH 0x00000008
-#define KEY_OTH_LINK 0x00000010
-#define KEY_OTH_SETATTR 0x00000020
-#define KEY_OTH_ALL 0x0000003f
-
/* special process keyring shortcut IDs */
#define KEY_SPEC_THREAD_KEYRING -1 /* - key ID for thread-specific keyring */
#define KEY_SPEC_PROCESS_KEYRING -2 /* - key ID for process-specific keyring */
@@ -132,7 +69,6 @@ enum key_ace_standard_subject {
#define KEYCTL_RESTRICT_KEYRING 29 /* Restrict keys allowed to link to a keyring */
#define KEYCTL_MOVE 30 /* Move keys between keyrings */
#define KEYCTL_CAPABILITIES 31 /* Find capabilities of keyrings subsystem */
-#define KEYCTL_GRANT_PERMISSION 32 /* Grant a permit to a key */
/* keyctl structures */
struct keyctl_dh_params {
@@ -194,6 +130,5 @@ struct keyctl_pkey_params {
#define KEYCTL_CAPS0_MOVE 0x80 /* KEYCTL_MOVE supported */
#define KEYCTL_CAPS1_NS_KEYRING_NAME 0x01 /* Keyring names are per-user_namespace */
#define KEYCTL_CAPS1_NS_KEY_TAG 0x02 /* Key indexing can include a namespace tag */
-#define KEYCTL_CAPS1_ACL_ALTERABLE 0x04 /* Keys have internal ACL that can be altered */
#endif /* _LINUX_KEYCTL_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 847dd147b068..187c02ce534c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1711,8 +1711,34 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
}
#endif
+/*
+ * Poll support for process exit notification.
+ */
+static unsigned int pidfd_poll(struct file *file, struct poll_table_struct *pts)
+{
+ struct task_struct *task;
+ struct pid *pid = file->private_data;
+ int poll_flags = 0;
+
+ poll_wait(file, &pid->wait_pidfd, pts);
+
+ rcu_read_lock();
+ task = pid_task(pid, PIDTYPE_PID);
+ /*
+ * Inform pollers only when the whole thread group exits.
+ * If the thread group leader exits before all other threads in the
+ * group, then poll(2) should block, similar to the wait(2) family.
+ */
+ if (!task || (task->exit_state && thread_group_empty(task)))
+ poll_flags = POLLIN | POLLRDNORM;
+ rcu_read_unlock();
+
+ return poll_flags;
+}
+
const struct file_operations pidfd_fops = {
.release = pidfd_release,
+ .poll = pidfd_poll,
#ifdef CONFIG_PROC_FS
.show_fdinfo = pidfd_show_fdinfo,
#endif
diff --git a/kernel/pid.c b/kernel/pid.c
index e5cad0c7d5dd..16263b526560 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -38,6 +38,8 @@
#include <linux/syscalls.h>
#include <linux/proc_ns.h>
#include <linux/proc_fs.h>
+#include <linux/anon_inodes.h>
+#include <linux/sched/signal.h>
#include <linux/sched/task.h>
#include <linux/idr.h>
@@ -214,6 +216,8 @@ struct pid *alloc_pid(struct pid_namespace *ns)
for (type = 0; type < PIDTYPE_MAX; ++type)
INIT_HLIST_HEAD(&pid->tasks[type]);
+ init_waitqueue_head(&pid->wait_pidfd);
+
upid = pid->numbers + ns->level;
spin_lock_irq(&pidmap_lock);
if (!(ns->pid_allocated & PIDNS_ADDING))
@@ -451,6 +455,73 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
return idr_get_next(&ns->idr, &nr);
}
+/**
+ * pidfd_create() - Create a new pid file descriptor.
+ *
+ * @pid: struct pid that the pidfd will reference
+ *
+ * This creates a new pid file descriptor with the O_CLOEXEC flag set.
+ *
+ * Note, that this function can only be called after the fd table has
+ * been unshared to avoid leaking the pidfd to the new process.
+ *
+ * Return: On success, a cloexec pidfd is returned.
+ * On error, a negative errno number will be returned.
+ */
+static int pidfd_create(struct pid *pid)
+{
+ int fd;
+
+ fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
+ O_RDWR | O_CLOEXEC);
+ if (fd < 0)
+ put_pid(pid);
+
+ return fd;
+}
+
+/**
+ * pidfd_open() - Open new pid file descriptor.
+ *
+ * @pid: pid for which to retrieve a pidfd
+ * @flags: flags to pass
+ *
+ * This creates a new pid file descriptor with the O_CLOEXEC flag set for
+ * the process identified by @pid. Currently, the process identified by
+ * @pid must be a thread-group leader. This restriction currently exists
+ * for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot
+ * be used with CLONE_THREAD) and pidfd polling (only supports thread group
+ * leaders).
+ *
+ * Return: On success, a cloexec pidfd is returned.
+ * On error, a negative errno number will be returned.
+ */
+SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
+{
+ int fd, ret;
+ struct pid *p;
+
+ if (flags)
+ return -EINVAL;
+
+ if (pid <= 0)
+ return -EINVAL;
+
+ p = find_get_pid(pid);
+ if (!p)
+ return -ESRCH;
+
+ ret = 0;
+ rcu_read_lock();
+ if (!pid_task(p, PIDTYPE_TGID))
+ ret = -EINVAL;
+ rcu_read_unlock();
+
+ fd = ret ?: pidfd_create(p);
+ put_pid(p);
+ return fd;
+}
+
void __init pid_idr_init(void)
{
/* Verify no one has done anything silly: */
diff --git a/kernel/signal.c b/kernel/signal.c
index 91cb8ca41954..dabe100d2091 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1881,6 +1881,14 @@ ret:
return ret;
}
+static void do_notify_pidfd(struct task_struct *task)
+{
+ struct pid *pid;
+
+ pid = task_pid(task);
+ wake_up_all(&pid->wait_pidfd);
+}
+
/*
* Let a parent know about the death of a child.
* For a stopped/continued status change, use do_notify_parent_cldstop instead.
@@ -1904,6 +1912,9 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
BUG_ON(!tsk->ptrace &&
(tsk->group_leader != tsk || !thread_group_empty(tsk)));
+ /* Wake up all pidfd waiters */
+ do_notify_pidfd(tsk);
+
if (sig != SIGCHLD) {
/*
* This is only possible if parent == real_parent.
diff --git a/lib/digsig.c b/lib/digsig.c
index ab0800f98eaf..e0627c3e53b2 100644
--- a/lib/digsig.c
+++ b/lib/digsig.c
@@ -224,7 +224,7 @@ int digsig_verify(struct key *keyring, const char *sig, int siglen,
else
key = key_ref_to_ptr(kref);
} else {
- key = request_key(&key_type_user, name, NULL, NULL);
+ key = request_key(&key_type_user, name, NULL);
}
if (IS_ERR(key)) {
pr_err("key not found, id: %s\n", name);
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 4403e1924f73..3a90a9e2b94a 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -540,6 +540,25 @@ int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
}
EXPORT_SYMBOL(string_escape_mem);
+int string_escape_mem_ascii(const char *src, size_t isz, char *dst,
+ size_t osz)
+{
+ char *p = dst;
+ char *end = p + osz;
+
+ while (isz--) {
+ unsigned char c = *src++;
+
+ if (!isprint(c) || !isascii(c) || c == '"' || c == '\\')
+ escape_hex(c, &p, end);
+ else
+ escape_passthrough(c, &p, end);
+ }
+
+ return p - dst;
+}
+EXPORT_SYMBOL(string_escape_mem_ascii);
+
/*
* Return an allocated string that has been escaped of special characters
* and double quotes, making it safe to log in quotes.
diff --git a/mm/filemap.c b/mm/filemap.c
index 6dd9a2274c80..f1aa20ab8434 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -550,6 +550,28 @@ int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
EXPORT_SYMBOL(filemap_fdatawait_range);
/**
+ * filemap_fdatawait_range_keep_errors - wait for writeback to complete
+ * @mapping: address space structure to wait for
+ * @start_byte: offset in bytes where the range starts
+ * @end_byte: offset in bytes where the range ends (inclusive)
+ *
+ * Walk the list of under-writeback pages of the given address space in the
+ * given range and wait for all of them. Unlike filemap_fdatawait_range(),
+ * this function does not clear error status of the address space.
+ *
+ * Use this function if callers don't handle errors themselves. Expected
+ * call sites are system-wide / filesystem-wide data flushers: e.g. sync(2),
+ * fsfreeze(8)
+ */
+int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
+ loff_t start_byte, loff_t end_byte)
+{
+ __filemap_fdatawait_range(mapping, start_byte, end_byte);
+ return filemap_check_and_keep_errors(mapping);
+}
+EXPORT_SYMBOL(filemap_fdatawait_range_keep_errors);
+
+/**
* file_fdatawait_range - wait for writeback to complete
* @file: file pointing to address space structure to wait for
* @start_byte: offset in bytes where the range starts
@@ -2925,24 +2947,11 @@ EXPORT_SYMBOL(read_cache_page_gfp);
* LFS limits. If pos is under the limit it becomes a short access. If it
* exceeds the limit we return -EFBIG.
*/
-static int generic_access_check_limits(struct file *file, loff_t pos,
- loff_t *count)
-{
- struct inode *inode = file->f_mapping->host;
- loff_t max_size = inode->i_sb->s_maxbytes;
-
- if (!(file->f_flags & O_LARGEFILE))
- max_size = MAX_NON_LFS;
-
- if (unlikely(pos >= max_size))
- return -EFBIG;
- *count = min(*count, max_size - pos);
- return 0;
-}
-
static int generic_write_check_limits(struct file *file, loff_t pos,
loff_t *count)
{
+ struct inode *inode = file->f_mapping->host;
+ loff_t max_size = inode->i_sb->s_maxbytes;
loff_t limit = rlimit(RLIMIT_FSIZE);
if (limit != RLIM_INFINITY) {
@@ -2953,7 +2962,15 @@ static int generic_write_check_limits(struct file *file, loff_t pos,
*count = min(*count, limit - pos);
}
- return generic_access_check_limits(file, pos, count);
+ if (!(file->f_flags & O_LARGEFILE))
+ max_size = MAX_NON_LFS;
+
+ if (unlikely(pos >= max_size))
+ return -EFBIG;
+
+ *count = min(*count, max_size - pos);
+
+ return 0;
}
/*
@@ -2993,7 +3010,7 @@ EXPORT_SYMBOL(generic_write_checks);
/*
* Performs necessary checks before doing a clone.
*
- * Can adjust amount of bytes to clone.
+ * Can adjust amount of bytes to clone via @req_count argument.
* Returns appropriate error code that caller should return or
* zero in case the clone should be allowed.
*/
@@ -3031,10 +3048,6 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
return -EINVAL;
count = min(count, size_in - (uint64_t)pos_in);
- ret = generic_access_check_limits(file_in, pos_in, &count);
- if (ret)
- return ret;
-
ret = generic_write_check_limits(file_out, pos_out, &count);
if (ret)
return ret;
@@ -3071,6 +3084,83 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
return 0;
}
+
+/*
+ * Performs common checks before doing a file copy/clone
+ * from @file_in to @file_out.
+ */
+int generic_file_rw_checks(struct file *file_in, struct file *file_out)
+{
+ struct inode *inode_in = file_inode(file_in);
+ struct inode *inode_out = file_inode(file_out);
+
+ /* Don't copy dirs, pipes, sockets... */
+ if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+ return -EISDIR;
+ if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+ return -EINVAL;
+
+ if (!(file_in->f_mode & FMODE_READ) ||
+ !(file_out->f_mode & FMODE_WRITE) ||
+ (file_out->f_flags & O_APPEND))
+ return -EBADF;
+
+ return 0;
+}
+
+/*
+ * Performs necessary checks before doing a file copy
+ *
+ * Can adjust amount of bytes to copy via @req_count argument.
+ * Returns appropriate error code that caller should return or
+ * zero in case the copy should be allowed.
+ */
+int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t *req_count, unsigned int flags)
+{
+ struct inode *inode_in = file_inode(file_in);
+ struct inode *inode_out = file_inode(file_out);
+ uint64_t count = *req_count;
+ loff_t size_in;
+ int ret;
+
+ ret = generic_file_rw_checks(file_in, file_out);
+ if (ret)
+ return ret;
+
+ /* Don't touch certain kinds of inodes */
+ if (IS_IMMUTABLE(inode_out))
+ return -EPERM;
+
+ if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
+ return -ETXTBSY;
+
+ /* Ensure offsets don't wrap. */
+ if (pos_in + count < pos_in || pos_out + count < pos_out)
+ return -EOVERFLOW;
+
+ /* Shorten the copy to EOF */
+ size_in = i_size_read(inode_in);
+ if (pos_in >= size_in)
+ count = 0;
+ else
+ count = min(count, size_in - (uint64_t)pos_in);
+
+ ret = generic_write_check_limits(file_out, pos_out, &count);
+ if (ret)
+ return ret;
+
+ /* Don't allow overlapped copying within the same file. */
+ if (inode_in == inode_out &&
+ pos_out + count > pos_in &&
+ pos_out < pos_in + count)
+ return -EINVAL;
+
+ *req_count = count;
+ return 0;
+}
+
int pagecache_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 38de80d01aae..1c811c74bfc0 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -306,7 +306,7 @@ static int get_secret(struct ceph_crypto_key *dst, const char *name) {
int err = 0;
struct ceph_crypto_key *ckey;
- ukey = request_key(&key_type_ceph, name, NULL, NULL);
+ ukey = request_key(&key_type_ceph, name, NULL);
if (IS_ERR(ukey)) {
/* request_key errors don't map nicely to mount(2)
errors; don't even try, but still printk */
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 6b201531b165..3e1a90669006 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -46,15 +46,6 @@ const struct cred *dns_resolver_cache;
#define DNS_ERRORNO_OPTION "dnserror"
-static struct key_acl dns_keyring_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_WRITE),
- KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_CLEAR),
- }
-};
-
/*
* Preparse instantiation data for a dns_resolver key.
*
@@ -352,7 +343,8 @@ static int __init init_dns_resolver(void)
keyring = keyring_alloc(".dns_resolver",
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
- &dns_keyring_acl,
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ,
KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
if (IS_ERR(keyring)) {
ret = PTR_ERR(keyring);
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index 236baf2bfa4c..cab4e0df924f 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -47,16 +47,6 @@
#include "internal.h"
-static struct key_acl dns_key_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .possessor_viewable = true,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_VIEW | KEY_ACE_SEARCH | KEY_ACE_READ),
- KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_INVAL),
- }
-};
-
/**
* dns_query - Query the DNS
* @net: The network namespace to operate in.
@@ -135,8 +125,7 @@ int dns_query(struct net *net,
* add_key() to preinstall malicious redirections
*/
saved_cred = override_creds(dns_resolver_cache);
- rkey = request_key_net(&key_type_dns_resolver, desc, net, options,
- &dns_key_acl);
+ rkey = request_key_net(&key_type_dns_resolver, desc, net, options);
revert_creds(saved_cred);
kfree(desc);
if (IS_ERR(rkey)) {
@@ -146,6 +135,8 @@ int dns_query(struct net *net,
down_read(&rkey->sem);
set_bit(KEY_FLAG_ROOT_CAN_INVAL, &rkey->flags);
+ rkey->perm |= KEY_USR_VIEW;
+
ret = key_validate(rkey);
if (ret < 0)
goto put;
diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
index 2032f6a8225e..6c3f35fac42d 100644
--- a/net/rxrpc/key.c
+++ b/net/rxrpc/key.c
@@ -23,14 +23,6 @@
#include <keys/user-type.h>
#include "ar-internal.h"
-static struct key_acl rxrpc_null_key_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 1,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_READ),
- }
-};
-
static int rxrpc_vet_description_s(const char *);
static int rxrpc_preparse(struct key_preparsed_payload *);
static int rxrpc_preparse_s(struct key_preparsed_payload *);
@@ -918,8 +910,7 @@ int rxrpc_request_key(struct rxrpc_sock *rx, char __user *optval, int optlen)
if (IS_ERR(description))
return PTR_ERR(description);
- key = request_key_net(&key_type_rxrpc, description, sock_net(&rx->sk),
- NULL, NULL);
+ key = request_key_net(&key_type_rxrpc, description, sock_net(&rx->sk), NULL);
if (IS_ERR(key)) {
kfree(description);
_leave(" = %ld", PTR_ERR(key));
@@ -950,8 +941,7 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user *optval,
if (IS_ERR(description))
return PTR_ERR(description);
- key = request_key_net(&key_type_keyring, description, sock_net(&rx->sk),
- NULL, NULL);
+ key = request_key_net(&key_type_keyring, description, sock_net(&rx->sk), NULL);
if (IS_ERR(key)) {
kfree(description);
_leave(" = %ld", PTR_ERR(key));
@@ -984,8 +974,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
_enter("");
key = key_alloc(&key_type_rxrpc, "x",
- GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
- &internal_key_acl,
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, 0,
KEY_ALLOC_NOT_IN_QUOTA, NULL);
if (IS_ERR(key)) {
_leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key));
@@ -1033,7 +1022,7 @@ struct key *rxrpc_get_null_key(const char *keyname)
key = key_alloc(&key_type_rxrpc, keyname,
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
- &rxrpc_null_key_acl, KEY_ALLOC_NOT_IN_QUOTA, NULL);
+ KEY_POS_SEARCH, KEY_ALLOC_NOT_IN_QUOTA, NULL);
if (IS_ERR(key))
return key;
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 66fbb9d2fba7..6f1528f271ee 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1375,7 +1375,6 @@ static void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
hlist_first_rcu(&cd->hash_table[hash])),
struct cache_head, cache_list);
}
-EXPORT_SYMBOL_GPL(cache_seq_next);
void *cache_seq_start_rcu(struct seq_file *m, loff_t *pos)
__acquires(RCU)
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 126d31472a99..73bd62979fe7 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -598,6 +598,8 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry)
dget(dentry);
ret = simple_rmdir(dir, dentry);
+ if (!ret)
+ fsnotify_rmdir(dir, dentry);
d_delete(dentry);
dput(dentry);
return ret;
@@ -609,6 +611,8 @@ static int __rpc_unlink(struct inode *dir, struct dentry *dentry)
dget(dentry);
ret = simple_unlink(dir, dentry);
+ if (!ret)
+ fsnotify_unlink(dir, dentry);
d_delete(dentry);
dput(dentry);
return ret;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 869ce7737997..de3c077733a7 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -35,7 +35,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt);
/* apparently the "standard" is that clients close
* idle connections after 5 minutes, servers after
* 6 minutes
- * http://www.connectathon.org/talks96/nfstcp.pdf
+ * http://nfsv4bat.org/Documents/ConnectAThon/1996/nfstcp.pdf
*/
static int svc_conn_age_period = 6*60;
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 298fe91557f7..4831ad745f91 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -741,7 +741,8 @@ static void __init load_keys_from_buffer(const u8 *p, unsigned int buflen)
key = key_create_or_update(make_key_ref(builtin_regdb_keys, 1),
"asymmetric", NULL, p, plen,
- &internal_key_acl,
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ),
KEY_ALLOC_NOT_IN_QUOTA |
KEY_ALLOC_BUILT_IN |
KEY_ALLOC_BYPASS_RESTRICTION);
@@ -767,7 +768,8 @@ static int __init load_builtin_regdb_keys(void)
builtin_regdb_keys =
keyring_alloc(".builtin_regdb_keys",
KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
- &internal_keyring_acl,
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH),
KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
if (IS_ERR(builtin_regdb_keys))
return PTR_ERR(builtin_regdb_keys);
diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c
index f9f3c8ffe786..868ade3e8970 100644
--- a/security/integrity/digsig.c
+++ b/security/integrity/digsig.c
@@ -47,8 +47,7 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
if (!keyring[id]) {
keyring[id] =
- request_key(&key_type_keyring, keyring_name[id],
- NULL, NULL);
+ request_key(&key_type_keyring, keyring_name[id], NULL);
if (IS_ERR(keyring[id])) {
int err = PTR_ERR(keyring[id]);
pr_err("no %s keyring: %d\n", keyring_name[id], err);
@@ -71,14 +70,14 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
}
static int __init __integrity_init_keyring(const unsigned int id,
- struct key_acl *acl,
+ key_perm_t perm,
struct key_restriction *restriction)
{
const struct cred *cred = current_cred();
int err = 0;
keyring[id] = keyring_alloc(keyring_name[id], KUIDT_INIT(0),
- KGIDT_INIT(0), cred, acl,
+ KGIDT_INIT(0), cred, perm,
KEY_ALLOC_NOT_IN_QUOTA, restriction, NULL);
if (IS_ERR(keyring[id])) {
err = PTR_ERR(keyring[id]);
@@ -96,7 +95,10 @@ static int __init __integrity_init_keyring(const unsigned int id,
int __init integrity_init_keyring(const unsigned int id)
{
struct key_restriction *restriction;
- struct key_acl *acl = &internal_keyring_acl;
+ key_perm_t perm;
+
+ perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW
+ | KEY_USR_READ | KEY_USR_SEARCH;
if (id == INTEGRITY_KEYRING_PLATFORM) {
restriction = NULL;
@@ -111,14 +113,14 @@ int __init integrity_init_keyring(const unsigned int id)
return -ENOMEM;
restriction->check = restrict_link_to_ima;
- acl = &internal_writable_keyring_acl;
+ perm |= KEY_USR_WRITE;
out:
- return __integrity_init_keyring(id, acl, restriction);
+ return __integrity_init_keyring(id, perm, restriction);
}
-static int __init integrity_add_key(const unsigned int id, const void *data,
- off_t size, struct key_acl *acl)
+int __init integrity_add_key(const unsigned int id, const void *data,
+ off_t size, key_perm_t perm)
{
key_ref_t key;
int rc = 0;
@@ -127,7 +129,7 @@ static int __init integrity_add_key(const unsigned int id, const void *data,
return -EINVAL;
key = key_create_or_update(make_key_ref(keyring[id], 1), "asymmetric",
- NULL, data, size, acl ?: &internal_key_acl,
+ NULL, data, size, perm,
KEY_ALLOC_NOT_IN_QUOTA);
if (IS_ERR(key)) {
rc = PTR_ERR(key);
@@ -147,6 +149,7 @@ int __init integrity_load_x509(const unsigned int id, const char *path)
void *data;
loff_t size;
int rc;
+ key_perm_t perm;
rc = kernel_read_file_from_path(path, &data, &size, 0,
READING_X509_CERTIFICATE);
@@ -155,19 +158,21 @@ int __init integrity_load_x509(const unsigned int id, const char *path)
return rc;
}
+ perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ;
+
pr_info("Loading X.509 certificate: %s\n", path);
- rc = integrity_add_key(id, data, size, NULL);
+ rc = integrity_add_key(id, (const void *)data, size, perm);
vfree(data);
return rc;
}
int __init integrity_load_cert(const unsigned int id, const char *source,
- const void *data, size_t len, struct key_acl *acl)
+ const void *data, size_t len, key_perm_t perm)
{
if (!data)
return -EINVAL;
pr_info("Loading X.509 certificate: %s\n", source);
- return integrity_add_key(id, data, len, acl);
+ return integrity_add_key(id, data, len, perm);
}
diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c
index a29df775fdd8..55aec161d0e1 100644
--- a/security/integrity/digsig_asymmetric.c
+++ b/security/integrity/digsig_asymmetric.c
@@ -53,7 +53,7 @@ static struct key *request_asymmetric_key(struct key *keyring, uint32_t keyid)
else
key = key_ref_to_ptr(kref);
} else {
- key = request_key(&key_type_asymmetric, name, NULL, NULL);
+ key = request_key(&key_type_asymmetric, name, NULL);
}
if (IS_ERR(key)) {
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index 466eebd3b4aa..d485f6fc908e 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -356,7 +356,7 @@ int evm_init_key(void)
struct encrypted_key_payload *ekp;
int rc;
- evm_key = request_key(&key_type_encrypted, EVMKEY, NULL, NULL);
+ evm_key = request_key(&key_type_encrypted, EVMKEY, NULL);
if (IS_ERR(evm_key))
return -ENOENT;
diff --git a/security/integrity/ima/ima_mok.c b/security/integrity/ima/ima_mok.c
index b52ae1476ec3..36cadadbfba4 100644
--- a/security/integrity/ima/ima_mok.c
+++ b/security/integrity/ima/ima_mok.c
@@ -16,15 +16,6 @@
#include <keys/system_keyring.h>
-static struct key_acl integrity_blacklist_keyring_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_WRITE),
- KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_WRITE | KEY_ACE_SEARCH),
- }
-};
-
struct key *ima_blacklist_keyring;
/*
@@ -44,7 +35,9 @@ __init int ima_mok_init(void)
ima_blacklist_keyring = keyring_alloc(".ima_blacklist",
KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
- &integrity_blacklist_keyring_acl,
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ |
+ KEY_USR_WRITE | KEY_USR_SEARCH,
KEY_ALLOC_NOT_IN_QUOTA,
restriction, NULL);
diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h
index 875c6a7a5af1..ed12d8e13d04 100644
--- a/security/integrity/integrity.h
+++ b/security/integrity/integrity.h
@@ -12,8 +12,6 @@
#include <linux/key.h>
#include <linux/audit.h>
-struct key_acl;
-
/* iint action cache flags */
#define IMA_MEASURE 0x00000001
#define IMA_MEASURED 0x00000002
@@ -157,7 +155,7 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
int __init integrity_init_keyring(const unsigned int id);
int __init integrity_load_x509(const unsigned int id, const char *path);
int __init integrity_load_cert(const unsigned int id, const char *source,
- const void *data, size_t len, struct key_acl *acl);
+ const void *data, size_t len, key_perm_t perm);
#else
static inline int integrity_digsig_verify(const unsigned int id,
@@ -175,7 +173,7 @@ static inline int integrity_init_keyring(const unsigned int id)
static inline int __init integrity_load_cert(const unsigned int id,
const char *source,
const void *data, size_t len,
- struct key_acl *acl)
+ key_perm_t perm)
{
return 0;
}
diff --git a/security/integrity/platform_certs/platform_keyring.c b/security/integrity/platform_certs/platform_keyring.c
index 7646e35f2d91..bcafd7387729 100644
--- a/security/integrity/platform_certs/platform_keyring.c
+++ b/security/integrity/platform_certs/platform_keyring.c
@@ -14,15 +14,6 @@
#include <linux/slab.h>
#include "../integrity.h"
-static struct key_acl platform_key_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_READ),
- KEY_OWNER_ACE(KEY_ACE_VIEW),
- }
-};
-
/**
* add_to_platform_keyring - Add to platform keyring without validation.
* @source: Source of key
@@ -35,10 +26,13 @@ static struct key_acl platform_key_acl = {
void __init add_to_platform_keyring(const char *source, const void *data,
size_t len)
{
+ key_perm_t perm;
int rc;
+ perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW;
+
rc = integrity_load_cert(INTEGRITY_KEYRING_PLATFORM, source, data, len,
- &platform_key_acl);
+ perm);
if (rc)
pr_info("Error adding keys to platform keyring %s\n", source);
}
diff --git a/security/keys/compat.c b/security/keys/compat.c
index b0e59546e7bd..9bcc404131aa 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -157,8 +157,6 @@ COMPAT_SYSCALL_DEFINE5(keyctl, u32, option,
case KEYCTL_MOVE:
return keyctl_keyring_move(arg2, arg3, arg4, arg5);
- case KEYCTL_GRANT_PERMISSION:
- return keyctl_grant_permission(arg2, arg3, arg4, arg5);
case KEYCTL_CAPABILITIES:
return keyctl_capabilities(compat_ptr(arg2), arg3);
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 9df560e477c2..60720f58cbe0 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -304,7 +304,7 @@ static struct key *request_user_key(const char *master_desc, const u8 **master_k
const struct user_key_payload *upayload;
struct key *ukey;
- ukey = request_key(&key_type_user, master_desc, NULL, NULL);
+ ukey = request_key(&key_type_user, master_desc, NULL);
if (IS_ERR(ukey))
goto error;
diff --git a/security/keys/encrypted-keys/masterkey_trusted.c b/security/keys/encrypted-keys/masterkey_trusted.c
index d649f2f29475..c68528aa49c6 100644
--- a/security/keys/encrypted-keys/masterkey_trusted.c
+++ b/security/keys/encrypted-keys/masterkey_trusted.c
@@ -30,7 +30,7 @@ struct key *request_trusted_key(const char *trusted_desc,
struct trusted_key_payload *tpayload;
struct key *tkey;
- tkey = request_key(&key_type_trusted, trusted_desc, NULL, NULL);
+ tkey = request_key(&key_type_trusted, trusted_desc, NULL);
if (IS_ERR(tkey))
goto error;
diff --git a/security/keys/gc.c b/security/keys/gc.c
index 48c3e124c272..671dd730ecfc 100644
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -151,7 +151,6 @@ static noinline void key_gc_unused_keys(struct list_head *keys)
key_user_put(key->user);
key_put_tag(key->domain_tag);
- key_put_acl(rcu_access_pointer(key->acl));
kfree(key->description);
memzero_explicit(key, sizeof(*key));
@@ -221,6 +220,7 @@ continue_scanning:
if (key->type == key_gc_dead_keytype) {
gc_state |= KEY_GC_FOUND_DEAD_KEY;
set_bit(KEY_FLAG_DEAD, &key->flags);
+ key->perm = 0;
goto skip_dead_key;
} else if (key->type == &key_type_keyring &&
key->restrict_link) {
diff --git a/security/keys/internal.h b/security/keys/internal.h
index e0c5bb8b1685..c039373488bd 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -84,11 +84,8 @@ extern struct rb_root key_serial_tree;
extern spinlock_t key_serial_lock;
extern struct mutex key_construction_mutex;
extern wait_queue_head_t request_key_conswq;
-extern struct key_acl default_key_acl;
-extern struct key_acl joinable_keyring_acl;
extern void key_set_index_key(struct keyring_index_key *index_key);
-
extern struct key_type *key_type_lookup(const char *type);
extern void key_type_put(struct key_type *ktype);
@@ -159,7 +156,6 @@ extern struct key *request_key_and_link(struct key_type *type,
const void *callout_info,
size_t callout_len,
void *aux,
- struct key_acl *acl,
struct key *dest_keyring,
unsigned long flags);
@@ -183,10 +179,7 @@ extern void key_gc_keytype(struct key_type *ktype);
extern int key_task_permission(const key_ref_t key_ref,
const struct cred *cred,
- u32 desired_perm);
-extern unsigned int key_acl_to_perm(const struct key_acl *acl);
-extern long key_set_acl(struct key *key, struct key_acl *acl);
-extern void key_put_acl(struct key_acl *acl);
+ key_perm_t perm);
/*
* Check to see whether permission is granted to use a key in the desired way.
@@ -233,7 +226,7 @@ extern long keyctl_keyring_search(key_serial_t, const char __user *,
const char __user *, key_serial_t);
extern long keyctl_read_key(key_serial_t, char __user *, size_t);
extern long keyctl_chown_key(key_serial_t, uid_t, gid_t);
-extern long keyctl_setperm_key(key_serial_t, unsigned int);
+extern long keyctl_setperm_key(key_serial_t, key_perm_t);
extern long keyctl_instantiate_key(key_serial_t, const void __user *,
size_t, key_serial_t);
extern long keyctl_negate_key(key_serial_t, unsigned, key_serial_t);
@@ -338,11 +331,6 @@ static inline long keyctl_pkey_e_d_s(int op,
extern long keyctl_capabilities(unsigned char __user *_buffer, size_t buflen);
-extern long keyctl_grant_permission(key_serial_t keyid,
- enum key_ace_subject_type type,
- unsigned int subject,
- unsigned int perm);
-
/*
* Debugging key validation
*/
diff --git a/security/keys/key.c b/security/keys/key.c
index 519211a996e7..764f4c57913e 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -195,7 +195,7 @@ serial_exists:
* @uid: The owner of the new key.
* @gid: The group ID for the new key's group permissions.
* @cred: The credentials specifying UID namespace.
- * @acl: The ACL to attach to the new key.
+ * @perm: The permissions mask of the new key.
* @flags: Flags specifying quota properties.
* @restrict_link: Optional link restriction for new keyrings.
*
@@ -223,7 +223,7 @@ serial_exists:
*/
struct key *key_alloc(struct key_type *type, const char *desc,
kuid_t uid, kgid_t gid, const struct cred *cred,
- struct key_acl *acl, unsigned long flags,
+ key_perm_t perm, unsigned long flags,
struct key_restriction *restrict_link)
{
struct key_user *user = NULL;
@@ -246,9 +246,6 @@ struct key *key_alloc(struct key_type *type, const char *desc,
desclen = strlen(desc);
quotalen = desclen + 1 + type->def_datalen;
- if (!acl)
- acl = &default_key_acl;
-
/* get hold of the key tracking for this user */
user = key_user_lookup(uid);
if (!user)
@@ -295,8 +292,7 @@ struct key *key_alloc(struct key_type *type, const char *desc,
key->datalen = type->def_datalen;
key->uid = uid;
key->gid = gid;
- refcount_inc(&acl->usage);
- rcu_assign_pointer(key->acl, acl);
+ key->perm = perm;
key->restrict_link = restrict_link;
key->last_used_at = ktime_get_real_seconds();
@@ -791,7 +787,7 @@ error:
* @description: The searchable description for the key.
* @payload: The data to use to instantiate or update the key.
* @plen: The length of @payload.
- * @acl: The ACL to attach if a key is created.
+ * @perm: The permissions mask for a new key.
* @flags: The quota flags for a new key.
*
* Search the destination keyring for a key of the same description and if one
@@ -814,7 +810,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
const char *description,
const void *payload,
size_t plen,
- struct key_acl *acl,
+ key_perm_t perm,
unsigned long flags)
{
struct keyring_index_key index_key = {
@@ -911,9 +907,22 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
goto found_matching_key;
}
+ /* if the client doesn't provide, decide on the permissions we want */
+ if (perm == KEY_PERM_UNDEF) {
+ perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
+ perm |= KEY_USR_VIEW;
+
+ if (index_key.type->read)
+ perm |= KEY_POS_READ;
+
+ if (index_key.type == &key_type_keyring ||
+ index_key.type->update)
+ perm |= KEY_POS_WRITE;
+ }
+
/* allocate a new key */
key = key_alloc(index_key.type, index_key.description,
- cred->fsuid, cred->fsgid, cred, acl, flags, NULL);
+ cred->fsuid, cred->fsgid, cred, perm, flags, NULL);
if (IS_ERR(key)) {
key_ref = ERR_CAST(key);
goto error_link_end;
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index c2dd66d556d4..9b898c969558 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -37,8 +37,7 @@ static const unsigned char keyrings_capabilities[2] = {
KEYCTL_CAPS0_MOVE
),
[1] = (KEYCTL_CAPS1_NS_KEYRING_NAME |
- KEYCTL_CAPS1_NS_KEY_TAG |
- KEYCTL_CAPS1_ACL_ALTERABLE),
+ KEYCTL_CAPS1_NS_KEY_TAG),
};
static int key_get_type_from_user(char *type,
@@ -131,7 +130,8 @@ SYSCALL_DEFINE5(add_key, const char __user *, _type,
/* create or update the requested key and add it to the target
* keyring */
key_ref = key_create_or_update(keyring_ref, type, description,
- payload, plen, NULL, KEY_ALLOC_IN_QUOTA);
+ payload, plen, KEY_PERM_UNDEF,
+ KEY_ALLOC_IN_QUOTA);
if (!IS_ERR(key_ref)) {
ret = key_ref_to_ptr(key_ref)->serial;
key_ref_put(key_ref);
@@ -221,8 +221,7 @@ SYSCALL_DEFINE4(request_key, const char __user *, _type,
/* do the search */
key = request_key_and_link(ktype, description, NULL, callout_info,
- callout_len, NULL, NULL,
- key_ref_to_ptr(dest_ref),
+ callout_len, NULL, key_ref_to_ptr(dest_ref),
KEY_ALLOC_IN_QUOTA);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
@@ -384,10 +383,16 @@ long keyctl_revoke_key(key_serial_t id)
struct key *key;
long ret;
- key_ref = lookup_user_key(id, 0, KEY_NEED_REVOKE);
+ key_ref = lookup_user_key(id, 0, KEY_NEED_WRITE);
if (IS_ERR(key_ref)) {
ret = PTR_ERR(key_ref);
- goto error;
+ if (ret != -EACCES)
+ goto error;
+ key_ref = lookup_user_key(id, 0, KEY_NEED_SETATTR);
+ if (IS_ERR(key_ref)) {
+ ret = PTR_ERR(key_ref);
+ goto error;
+ }
}
key = key_ref_to_ptr(key_ref);
@@ -421,7 +426,7 @@ long keyctl_invalidate_key(key_serial_t id)
kenter("%d", id);
- key_ref = lookup_user_key(id, 0, KEY_NEED_INVAL);
+ key_ref = lookup_user_key(id, 0, KEY_NEED_SEARCH);
if (IS_ERR(key_ref)) {
ret = PTR_ERR(key_ref);
@@ -466,7 +471,7 @@ long keyctl_keyring_clear(key_serial_t ringid)
struct key *keyring;
long ret;
- keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_NEED_CLEAR);
+ keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE);
if (IS_ERR(keyring_ref)) {
ret = PTR_ERR(keyring_ref);
@@ -641,7 +646,6 @@ long keyctl_describe_key(key_serial_t keyid,
size_t buflen)
{
struct key *key, *instkey;
- unsigned int perm;
key_ref_t key_ref;
char *infobuf;
long ret;
@@ -671,10 +675,6 @@ okay:
key = key_ref_to_ptr(key_ref);
desclen = strlen(key->description);
- rcu_read_lock();
- perm = key_acl_to_perm(rcu_dereference(key->acl));
- rcu_read_unlock();
-
/* calculate how much information we're going to return */
ret = -ENOMEM;
infobuf = kasprintf(GFP_KERNEL,
@@ -682,7 +682,7 @@ okay:
key->type->name,
from_kuid_munged(current_user_ns(), key->uid),
from_kgid_munged(current_user_ns(), key->gid),
- perm);
+ key->perm);
if (!infobuf)
goto error2;
infolen = strlen(infobuf);
@@ -899,7 +899,7 @@ long keyctl_chown_key(key_serial_t id, uid_t user, gid_t group)
goto error;
key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
- KEY_NEED_SETSEC);
+ KEY_NEED_SETATTR);
if (IS_ERR(key_ref)) {
ret = PTR_ERR(key_ref);
goto error;
@@ -994,25 +994,18 @@ quota_overrun:
* the key need not be fully instantiated yet. If the caller does not have
* sysadmin capability, it may only change the permission on keys that it owns.
*/
-long keyctl_setperm_key(key_serial_t id, unsigned int perm)
+long keyctl_setperm_key(key_serial_t id, key_perm_t perm)
{
- struct key_acl *acl;
struct key *key;
key_ref_t key_ref;
long ret;
- int nr, i, j;
+ ret = -EINVAL;
if (perm & ~(KEY_POS_ALL | KEY_USR_ALL | KEY_GRP_ALL | KEY_OTH_ALL))
- return -EINVAL;
-
- nr = 0;
- if (perm & KEY_POS_ALL) nr++;
- if (perm & KEY_USR_ALL) nr++;
- if (perm & KEY_GRP_ALL) nr++;
- if (perm & KEY_OTH_ALL) nr++;
+ goto error;
key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
- KEY_NEED_SETSEC);
+ KEY_NEED_SETATTR);
if (IS_ERR(key_ref)) {
ret = PTR_ERR(key_ref);
goto error;
@@ -1020,45 +1013,17 @@ long keyctl_setperm_key(key_serial_t id, unsigned int perm)
key = key_ref_to_ptr(key_ref);
- ret = -EOPNOTSUPP;
- if (test_bit(KEY_FLAG_HAS_ACL, &key->flags))
- goto error_key;
+ /* make the changes with the locks held to prevent chown/chmod races */
+ ret = -EACCES;
+ down_write(&key->sem);
- ret = -ENOMEM;
- acl = kzalloc(struct_size(acl, aces, nr), GFP_KERNEL);
- if (!acl)
- goto error_key;
-
- refcount_set(&acl->usage, 1);
- acl->nr_ace = nr;
- j = 0;
- for (i = 0; i < 4; i++) {
- struct key_ace *ace = &acl->aces[j];
- unsigned int subset = (perm >> (i * 8)) & KEY_OTH_ALL;
-
- if (!subset)
- continue;
- ace->type = KEY_ACE_SUBJ_STANDARD;
- ace->subject_id = KEY_ACE_EVERYONE + i;
- ace->perm = subset;
- if (subset & (KEY_OTH_WRITE | KEY_OTH_SETATTR))
- ace->perm |= KEY_ACE_REVOKE;
- if (subset & KEY_OTH_SEARCH)
- ace->perm |= KEY_ACE_INVAL;
- if (key->type == &key_type_keyring) {
- if (subset & KEY_OTH_SEARCH)
- ace->perm |= KEY_ACE_JOIN;
- if (subset & KEY_OTH_WRITE)
- ace->perm |= KEY_ACE_CLEAR;
- }
- j++;
+ /* if we're not the sysadmin, we can only change a key that we own */
+ if (capable(CAP_SYS_ADMIN) || uid_eq(key->uid, current_fsuid())) {
+ key->perm = perm;
+ ret = 0;
}
- /* make the changes with the locks held to prevent chown/chmod races */
- down_write(&key->sem);
- ret = key_set_acl(key, acl);
up_write(&key->sem);
-error_key:
key_put(key);
error:
return ret;
@@ -1423,7 +1388,7 @@ long keyctl_set_timeout(key_serial_t id, unsigned timeout)
long ret;
key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
- KEY_NEED_SETSEC);
+ KEY_NEED_SETATTR);
if (IS_ERR(key_ref)) {
/* setting the timeout on a key under construction is permitted
* if we have the authorisation token handy */
@@ -1574,7 +1539,7 @@ long keyctl_get_security(key_serial_t keyid,
* Attempt to install the calling process's session keyring on the process's
* parent process.
*
- * The keyring must exist and must grant the caller JOIN permission, and the
+ * The keyring must exist and must grant the caller LINK permission, and the
* parent process must be single-threaded and must have the same effective
* ownership as this process and mustn't be SUID/SGID.
*
@@ -1591,7 +1556,7 @@ long keyctl_session_to_parent(void)
struct cred *cred;
int ret;
- keyring_r = lookup_user_key(KEY_SPEC_SESSION_KEYRING, 0, KEY_NEED_JOIN);
+ keyring_r = lookup_user_key(KEY_SPEC_SESSION_KEYRING, 0, KEY_NEED_LINK);
if (IS_ERR(keyring_r))
return PTR_ERR(keyring_r);
@@ -1693,7 +1658,7 @@ long keyctl_restrict_keyring(key_serial_t id, const char __user *_type,
char *restriction = NULL;
long ret;
- key_ref = lookup_user_key(id, 0, KEY_NEED_SETSEC);
+ key_ref = lookup_user_key(id, 0, KEY_NEED_SETATTR);
if (IS_ERR(key_ref))
return PTR_ERR(key_ref);
@@ -1799,7 +1764,7 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3,
case KEYCTL_SETPERM:
return keyctl_setperm_key((key_serial_t) arg2,
- (unsigned int)arg3);
+ (key_perm_t) arg3);
case KEYCTL_INSTANTIATE:
return keyctl_instantiate_key((key_serial_t) arg2,
@@ -1888,11 +1853,6 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3,
(key_serial_t)arg3,
(key_serial_t)arg4,
(unsigned int)arg5);
- case KEYCTL_GRANT_PERMISSION:
- return keyctl_grant_permission((key_serial_t)arg2,
- (enum key_ace_subject_type)arg3,
- (unsigned int)arg4,
- (unsigned int)arg5);
case KEYCTL_CAPABILITIES:
return keyctl_capabilities((unsigned char __user *)arg2, (size_t)arg3);
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 3b5458f23a95..febf36c6ddc5 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -515,19 +515,11 @@ static long keyring_read(const struct key *keyring,
return ret;
}
-/**
- * keyring_alloc - Allocate a keyring and link into the destination
- * @description: The key description to allow the key to be searched out.
- * @uid: The owner of the new key.
- * @gid: The group ID for the new key's group permissions.
- * @cred: The credentials specifying UID namespace.
- * @acl: The ACL to attach to the new key.
- * @flags: Flags specifying quota properties.
- * @restrict_link: Optional link restriction for new keyrings.
- * @dest: Destination keyring.
+/*
+ * Allocate a keyring and link into the destination keyring.
*/
struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid,
- const struct cred *cred, struct key_acl *acl,
+ const struct cred *cred, key_perm_t perm,
unsigned long flags,
struct key_restriction *restrict_link,
struct key *dest)
@@ -536,7 +528,7 @@ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid,
int ret;
keyring = key_alloc(&key_type_keyring, description,
- uid, gid, cred, acl, flags, restrict_link);
+ uid, gid, cred, perm, flags, restrict_link);
if (!IS_ERR(keyring)) {
ret = key_instantiate_and_link(keyring, NULL, 0, dest, NULL);
if (ret < 0) {
@@ -1140,11 +1132,10 @@ found:
/*
* Find a keyring with the specified name.
*
- * Only keyrings that have nonzero refcount, are not revoked, and are owned by
- * a user in the current user namespace are considered. If @uid_keyring is
- * %true, the keyring additionally must have been allocated as a user or user
- * session keyring; otherwise, it must grant JOIN permission directly to the
- * caller (ie. not through possession).
+ * Only keyrings that have nonzero refcount, are not revoked, and are owned by a
+ * user in the current user namespace are considered. If @uid_keyring is %true,
+ * the keyring additionally must have been allocated as a user or user session
+ * keyring; otherwise, it must grant Search permission directly to the caller.
*
* Returns a pointer to the keyring with the keyring's refcount having being
* incremented on success. -ENOKEY is returned if a key could not be found.
@@ -1178,7 +1169,7 @@ struct key *find_keyring_by_name(const char *name, bool uid_keyring)
continue;
} else {
if (key_permission(make_key_ref(keyring, 0),
- KEY_NEED_JOIN) < 0)
+ KEY_NEED_SEARCH) < 0)
continue;
}
diff --git a/security/keys/permission.c b/security/keys/permission.c
index fd8a5dc6910a..085f907b64ac 100644
--- a/security/keys/permission.c
+++ b/security/keys/permission.c
@@ -7,67 +7,13 @@
#include <linux/export.h>
#include <linux/security.h>
-#include <linux/user_namespace.h>
-#include <linux/uaccess.h>
#include "internal.h"
-struct key_acl default_key_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .possessor_viewable = true,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE__PERMS & ~KEY_ACE_JOIN),
- KEY_OWNER_ACE(KEY_ACE_VIEW),
- }
-};
-EXPORT_SYMBOL(default_key_acl);
-
-struct key_acl joinable_keyring_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .possessor_viewable = true,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE__PERMS & ~KEY_ACE_JOIN),
- KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_LINK | KEY_ACE_JOIN),
- }
-};
-EXPORT_SYMBOL(joinable_keyring_acl);
-
-struct key_acl internal_key_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_SEARCH),
- KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_SEARCH),
- }
-};
-EXPORT_SYMBOL(internal_key_acl);
-
-struct key_acl internal_keyring_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_SEARCH),
- KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_SEARCH),
- }
-};
-EXPORT_SYMBOL(internal_keyring_acl);
-
-struct key_acl internal_writable_keyring_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_WRITE),
- KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_WRITE | KEY_ACE_SEARCH),
- }
-};
-EXPORT_SYMBOL(internal_writable_keyring_acl);
-
/**
* key_task_permission - Check a key can be used
* @key_ref: The key to check.
* @cred: The credentials to use.
- * @desired_perm: The permission to check for.
+ * @perm: The permissions to check for.
*
* Check to see whether permission is granted to use a key in the desired way,
* but permit the security modules to override.
@@ -78,73 +24,53 @@ EXPORT_SYMBOL(internal_writable_keyring_acl);
* permissions bits or the LSM check.
*/
int key_task_permission(const key_ref_t key_ref, const struct cred *cred,
- unsigned int desired_perm)
+ unsigned perm)
{
- const struct key_acl *acl;
- const struct key *key;
- unsigned int allow = 0;
- int i;
-
- BUILD_BUG_ON(KEY_NEED_VIEW != KEY_ACE_VIEW ||
- KEY_NEED_READ != KEY_ACE_READ ||
- KEY_NEED_WRITE != KEY_ACE_WRITE ||
- KEY_NEED_SEARCH != KEY_ACE_SEARCH ||
- KEY_NEED_LINK != KEY_ACE_LINK ||
- KEY_NEED_SETSEC != KEY_ACE_SET_SECURITY ||
- KEY_NEED_INVAL != KEY_ACE_INVAL ||
- KEY_NEED_REVOKE != KEY_ACE_REVOKE ||
- KEY_NEED_JOIN != KEY_ACE_JOIN ||
- KEY_NEED_CLEAR != KEY_ACE_CLEAR);
+ struct key *key;
+ key_perm_t kperm;
+ int ret;
key = key_ref_to_ptr(key_ref);
- rcu_read_lock();
-
- acl = rcu_dereference(key->acl);
- if (!acl || acl->nr_ace == 0)
- goto no_access_rcu;
+ /* use the second 8-bits of permissions for keys the caller owns */
+ if (uid_eq(key->uid, cred->fsuid)) {
+ kperm = key->perm >> 16;
+ goto use_these_perms;
+ }
- for (i = 0; i < acl->nr_ace; i++) {
- const struct key_ace *ace = &acl->aces[i];
+ /* use the third 8-bits of permissions for keys the caller has a group
+ * membership in common with */
+ if (gid_valid(key->gid) && key->perm & KEY_GRP_ALL) {
+ if (gid_eq(key->gid, cred->fsgid)) {
+ kperm = key->perm >> 8;
+ goto use_these_perms;
+ }
- switch (ace->type) {
- case KEY_ACE_SUBJ_STANDARD:
- switch (ace->subject_id) {
- case KEY_ACE_POSSESSOR:
- if (is_key_possessed(key_ref))
- allow |= ace->perm;
- break;
- case KEY_ACE_OWNER:
- if (uid_eq(key->uid, cred->fsuid))
- allow |= ace->perm;
- break;
- case KEY_ACE_GROUP:
- if (gid_valid(key->gid)) {
- if (gid_eq(key->gid, cred->fsgid))
- allow |= ace->perm;
- else if (groups_search(cred->group_info, key->gid))
- allow |= ace->perm;
- }
- break;
- case KEY_ACE_EVERYONE:
- allow |= ace->perm;
- break;
- }
- break;
+ ret = groups_search(cred->group_info, key->gid);
+ if (ret) {
+ kperm = key->perm >> 8;
+ goto use_these_perms;
}
}
- rcu_read_unlock();
+ /* otherwise use the least-significant 8-bits */
+ kperm = key->perm;
+
+use_these_perms:
- if (!(allow & desired_perm))
- goto no_access;
+ /* use the top 8-bits of permissions for keys the caller possesses
+ * - possessor permissions are additive with other permissions
+ */
+ if (is_key_possessed(key_ref))
+ kperm |= key->perm >> 24;
- return security_key_permission(key_ref, cred, desired_perm);
+ kperm = kperm & perm & KEY_NEED_ALL;
-no_access_rcu:
- rcu_read_unlock();
-no_access:
- return -EACCES;
+ if (kperm != perm)
+ return -EACCES;
+
+ /* let LSM be the final arbiter */
+ return security_key_permission(key_ref, cred, perm);
}
EXPORT_SYMBOL(key_task_permission);
@@ -178,218 +104,3 @@ int key_validate(const struct key *key)
return 0;
}
EXPORT_SYMBOL(key_validate);
-
-/*
- * Roughly render an ACL to an old-style permissions mask. We cannot
- * accurately render what the ACL, particularly if it has ACEs that represent
- * subjects outside of { poss, user, group, other }.
- */
-unsigned int key_acl_to_perm(const struct key_acl *acl)
-{
- unsigned int perm = 0, tperm;
- int i;
-
- BUILD_BUG_ON(KEY_OTH_VIEW != KEY_ACE_VIEW ||
- KEY_OTH_READ != KEY_ACE_READ ||
- KEY_OTH_WRITE != KEY_ACE_WRITE ||
- KEY_OTH_SEARCH != KEY_ACE_SEARCH ||
- KEY_OTH_LINK != KEY_ACE_LINK ||
- KEY_OTH_SETATTR != KEY_ACE_SET_SECURITY);
-
- if (!acl || acl->nr_ace == 0)
- return 0;
-
- for (i = 0; i < acl->nr_ace; i++) {
- const struct key_ace *ace = &acl->aces[i];
-
- switch (ace->type) {
- case KEY_ACE_SUBJ_STANDARD:
- tperm = ace->perm & KEY_OTH_ALL;
-
- /* Invalidation and joining were allowed by SEARCH */
- if (ace->perm & (KEY_ACE_INVAL | KEY_ACE_JOIN))
- tperm |= KEY_OTH_SEARCH;
-
- /* Revocation was allowed by either SETATTR or WRITE */
- if ((ace->perm & KEY_ACE_REVOKE) && !(tperm & KEY_OTH_SETATTR))
- tperm |= KEY_OTH_WRITE;
-
- /* Clearing was allowed by WRITE */
- if (ace->perm & KEY_ACE_CLEAR)
- tperm |= KEY_OTH_WRITE;
-
- switch (ace->subject_id) {
- case KEY_ACE_POSSESSOR:
- perm |= tperm << 24;
- break;
- case KEY_ACE_OWNER:
- perm |= tperm << 16;
- break;
- case KEY_ACE_GROUP:
- perm |= tperm << 8;
- break;
- case KEY_ACE_EVERYONE:
- perm |= tperm << 0;
- break;
- }
- }
- }
-
- return perm;
-}
-
-/*
- * Destroy a key's ACL.
- */
-void key_put_acl(struct key_acl *acl)
-{
- if (acl && refcount_dec_and_test(&acl->usage))
- kfree_rcu(acl, rcu);
-}
-
-/*
- * Try to set the ACL. This either attaches or discards the proposed ACL.
- */
-long key_set_acl(struct key *key, struct key_acl *acl)
-{
- int i;
-
- /* If we're not the sysadmin, we can only change a key that we own. */
- if (!capable(CAP_SYS_ADMIN) && !uid_eq(key->uid, current_fsuid())) {
- key_put_acl(acl);
- return -EACCES;
- }
-
- for (i = 0; i < acl->nr_ace; i++) {
- const struct key_ace *ace = &acl->aces[i];
- if (ace->type == KEY_ACE_SUBJ_STANDARD &&
- ace->subject_id == KEY_ACE_POSSESSOR) {
- if (ace->perm & KEY_ACE_VIEW)
- acl->possessor_viewable = true;
- break;
- }
- }
-
- rcu_swap_protected(key->acl, acl, lockdep_is_held(&key->sem));
- key_put_acl(acl);
- return 0;
-}
-
-/*
- * Allocate a new ACL with an extra ACE slot.
- */
-static struct key_acl *key_alloc_acl(const struct key_acl *old_acl, int nr, int skip)
-{
- struct key_acl *acl;
- int nr_ace, i, j = 0;
-
- nr_ace = old_acl->nr_ace + nr;
- if (nr_ace > 16)
- return ERR_PTR(-EINVAL);
-
- acl = kzalloc(struct_size(acl, aces, nr_ace), GFP_KERNEL);
- if (!acl)
- return ERR_PTR(-ENOMEM);
-
- refcount_set(&acl->usage, 1);
- acl->nr_ace = nr_ace;
- for (i = 0; i < old_acl->nr_ace; i++) {
- if (i == skip)
- continue;
- acl->aces[j] = old_acl->aces[i];
- j++;
- }
- return acl;
-}
-
-/*
- * Generate the revised ACL.
- */
-static long key_change_acl(struct key *key, struct key_ace *new_ace)
-{
- struct key_acl *acl, *old;
- int i;
-
- old = rcu_dereference_protected(key->acl, lockdep_is_held(&key->sem));
-
- for (i = 0; i < old->nr_ace; i++)
- if (old->aces[i].type == new_ace->type &&
- old->aces[i].subject_id == new_ace->subject_id)
- goto found_match;
-
- if (new_ace->perm == 0)
- return 0; /* No permissions to remove. Add deny record? */
-
- acl = key_alloc_acl(old, 1, -1);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- acl->aces[i] = *new_ace;
- goto change;
-
-found_match:
- if (new_ace->perm == 0)
- goto delete_ace;
- if (new_ace->perm == old->aces[i].perm)
- return 0;
- acl = key_alloc_acl(old, 0, -1);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- acl->aces[i].perm = new_ace->perm;
- goto change;
-
-delete_ace:
- acl = key_alloc_acl(old, -1, i);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- goto change;
-
-change:
- return key_set_acl(key, acl);
-}
-
-/*
- * Add, alter or remove (if perm == 0) an ACE in a key's ACL.
- */
-long keyctl_grant_permission(key_serial_t keyid,
- enum key_ace_subject_type type,
- unsigned int subject,
- unsigned int perm)
-{
- struct key_ace new_ace;
- struct key *key;
- key_ref_t key_ref;
- long ret;
-
- new_ace.type = type;
- new_ace.perm = perm;
-
- switch (type) {
- case KEY_ACE_SUBJ_STANDARD:
- if (subject >= nr__key_ace_standard_subject)
- return -ENOENT;
- new_ace.subject_id = subject;
- break;
-
- default:
- return -ENOENT;
- }
-
- key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_NEED_SETSEC);
- if (IS_ERR(key_ref)) {
- ret = PTR_ERR(key_ref);
- goto error;
- }
-
- key = key_ref_to_ptr(key_ref);
-
- down_write(&key->sem);
-
- /* If we're not the sysadmin, we can only change a key that we own */
- ret = -EACCES;
- if (capable(CAP_SYS_ADMIN) || uid_eq(key->uid, current_fsuid()))
- ret = key_change_acl(key, &new_ace);
- up_write(&key->sem);
- key_put(key);
-error:
- return ret;
-}
diff --git a/security/keys/persistent.c b/security/keys/persistent.c
index 8171c90d4c9a..97af230aa4b2 100644
--- a/security/keys/persistent.c
+++ b/security/keys/persistent.c
@@ -12,27 +12,6 @@
unsigned persistent_keyring_expiry = 3 * 24 * 3600; /* Expire after 3 days of non-use */
-static struct key_acl persistent_register_keyring_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_WRITE),
- KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ),
- }
-};
-
-static struct key_acl persistent_keyring_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .possessor_viewable = true,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_WRITE |
- KEY_ACE_SEARCH | KEY_ACE_LINK |
- KEY_ACE_CLEAR | KEY_ACE_INVAL),
- KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ),
- }
-};
-
/*
* Create the persistent keyring register for the current user namespace.
*
@@ -43,7 +22,8 @@ static int key_create_persistent_register(struct user_namespace *ns)
struct key *reg = keyring_alloc(".persistent_register",
KUIDT_INIT(0), KGIDT_INIT(0),
current_cred(),
- &persistent_register_keyring_acl,
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ),
KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
if (IS_ERR(reg))
return PTR_ERR(reg);
@@ -76,7 +56,8 @@ static key_ref_t key_create_persistent(struct user_namespace *ns, kuid_t uid,
persistent = keyring_alloc(index_key->description,
uid, INVALID_GID, current_cred(),
- &persistent_keyring_acl,
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ),
KEY_ALLOC_NOT_IN_QUOTA, NULL,
ns->persistent_keyring_register);
if (IS_ERR(persistent))
diff --git a/security/keys/proc.c b/security/keys/proc.c
index b394ad1e874b..415f3f1c2da0 100644
--- a/security/keys/proc.c
+++ b/security/keys/proc.c
@@ -110,13 +110,11 @@ static struct key *find_ge_key(struct seq_file *p, key_serial_t id)
}
static void *proc_keys_start(struct seq_file *p, loff_t *_pos)
- __acquires(rcu)
__acquires(key_serial_lock)
{
key_serial_t pos = *_pos;
struct key *key;
- rcu_read_lock();
spin_lock(&key_serial_lock);
if (*_pos > INT_MAX)
@@ -146,15 +144,12 @@ static void *proc_keys_next(struct seq_file *p, void *v, loff_t *_pos)
static void proc_keys_stop(struct seq_file *p, void *v)
__releases(key_serial_lock)
- __releases(rcu)
{
spin_unlock(&key_serial_lock);
- rcu_read_unlock();
}
static int proc_keys_show(struct seq_file *m, void *v)
{
- const struct key_acl *acl;
struct rb_node *_p = v;
struct key *key = rb_entry(_p, struct key, serial_node);
unsigned long flags;
@@ -162,7 +157,6 @@ static int proc_keys_show(struct seq_file *m, void *v)
time64_t now, expiry;
char xbuf[16];
short state;
- bool check_pos;
u64 timo;
int rc;
@@ -176,15 +170,15 @@ static int proc_keys_show(struct seq_file *m, void *v)
KEYRING_SEARCH_RECURSE),
};
- acl = rcu_dereference(key->acl);
- check_pos = acl->possessor_viewable;
+ key_ref = make_key_ref(key, 0);
/* determine if the key is possessed by this process (a test we can
* skip if the key does not indicate the possessor can view it
*/
- key_ref = make_key_ref(key, 0);
- if (check_pos) {
+ if (key->perm & KEY_POS_VIEW) {
+ rcu_read_lock();
skey_ref = search_cred_keyrings_rcu(&ctx);
+ rcu_read_unlock();
if (!IS_ERR(skey_ref)) {
key_ref_put(skey_ref);
key_ref = make_key_ref(key, 1);
@@ -194,10 +188,12 @@ static int proc_keys_show(struct seq_file *m, void *v)
/* check whether the current task is allowed to view the key */
rc = key_task_permission(key_ref, ctx.cred, KEY_NEED_VIEW);
if (rc < 0)
- goto out;
+ return 0;
now = ktime_get_real_seconds();
+ rcu_read_lock();
+
/* come up with a suitable timeout value */
expiry = READ_ONCE(key->expiry);
if (expiry == 0) {
@@ -236,7 +232,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
showflag(flags, 'i', KEY_FLAG_INVALIDATED),
refcount_read(&key->usage),
xbuf,
- key_acl_to_perm(acl),
+ key->perm,
from_kuid_munged(seq_user_ns(m), key->uid),
from_kgid_munged(seq_user_ns(m), key->gid),
key->type->name);
@@ -247,7 +243,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
key->type->describe(key, m);
seq_putc(m, '\n');
-out:
+ rcu_read_unlock();
return 0;
}
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index aa3bfcadbc66..09541de31f2f 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -32,47 +32,6 @@ struct key_user root_key_user = {
.uid = GLOBAL_ROOT_UID,
};
-static struct key_acl user_reg_keyring_acl = {
- .usage = REFCOUNT_INIT(1),
- .possessor_viewable = true,
- .nr_ace = 2,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_WRITE | KEY_ACE_SEARCH),
- KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ),
- }
-};
-
-static struct key_acl user_keyring_acl = {
- .usage = REFCOUNT_INIT(1),
- .possessor_viewable = true,
- .nr_ace = 2,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_WRITE |
- KEY_ACE_SEARCH | KEY_ACE_LINK),
- KEY_OWNER_ACE(KEY_ACE__PERMS & ~(KEY_ACE_JOIN | KEY_ACE_SET_SECURITY)),
- }
-};
-
-static struct key_acl session_keyring_acl = {
- .usage = REFCOUNT_INIT(1),
- .possessor_viewable = true,
- .nr_ace = 2,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE__PERMS & ~KEY_ACE_JOIN),
- KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ),
- }
-};
-
-static struct key_acl thread_and_process_keyring_acl = {
- .usage = REFCOUNT_INIT(1),
- .possessor_viewable = true,
- .nr_ace = 2,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE__PERMS & ~(KEY_ACE_JOIN | KEY_ACE_SET_SECURITY)),
- KEY_OWNER_ACE(KEY_ACE_VIEW),
- }
-};
-
/*
* Get or create a user register keyring.
*/
@@ -92,8 +51,11 @@ static struct key *get_user_register(struct user_namespace *user_ns)
if (!reg_keyring) {
reg_keyring = keyring_alloc(".user_reg",
user_ns->owner, INVALID_GID,
- &init_cred, &user_reg_keyring_acl,
- 0, NULL, NULL);
+ &init_cred,
+ KEY_POS_WRITE | KEY_POS_SEARCH |
+ KEY_USR_VIEW | KEY_USR_READ,
+ 0,
+ NULL, NULL);
if (!IS_ERR(reg_keyring))
smp_store_release(&user_ns->user_keyring_register,
reg_keyring);
@@ -115,11 +77,14 @@ int look_up_user_keyrings(struct key **_user_keyring,
const struct cred *cred = current_cred();
struct user_namespace *user_ns = current_user_ns();
struct key *reg_keyring, *uid_keyring, *session_keyring;
+ key_perm_t user_keyring_perm;
key_ref_t uid_keyring_r, session_keyring_r;
uid_t uid = from_kuid(user_ns, cred->user->uid);
char buf[20];
int ret;
+ user_keyring_perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL;
+
kenter("%u", uid);
reg_keyring = get_user_register(user_ns);
@@ -139,7 +104,7 @@ int look_up_user_keyrings(struct key **_user_keyring,
kdebug("_uid %p", uid_keyring_r);
if (uid_keyring_r == ERR_PTR(-EAGAIN)) {
uid_keyring = keyring_alloc(buf, cred->user->uid, INVALID_GID,
- cred, &user_keyring_acl,
+ cred, user_keyring_perm,
KEY_ALLOC_UID_KEYRING |
KEY_ALLOC_IN_QUOTA,
NULL, reg_keyring);
@@ -161,7 +126,7 @@ int look_up_user_keyrings(struct key **_user_keyring,
kdebug("_uid_ses %p", session_keyring_r);
if (session_keyring_r == ERR_PTR(-EAGAIN)) {
session_keyring = keyring_alloc(buf, cred->user->uid, INVALID_GID,
- cred, &user_keyring_acl,
+ cred, user_keyring_perm,
KEY_ALLOC_UID_KEYRING |
KEY_ALLOC_IN_QUOTA,
NULL, NULL);
@@ -261,7 +226,7 @@ int install_thread_keyring_to_cred(struct cred *new)
return 0;
keyring = keyring_alloc("_tid", new->uid, new->gid, new,
- &thread_and_process_keyring_acl,
+ KEY_POS_ALL | KEY_USR_VIEW,
KEY_ALLOC_QUOTA_OVERRUN,
NULL, NULL);
if (IS_ERR(keyring))
@@ -308,7 +273,7 @@ int install_process_keyring_to_cred(struct cred *new)
return 0;
keyring = keyring_alloc("_pid", new->uid, new->gid, new,
- &thread_and_process_keyring_acl,
+ KEY_POS_ALL | KEY_USR_VIEW,
KEY_ALLOC_QUOTA_OVERRUN,
NULL, NULL);
if (IS_ERR(keyring))
@@ -363,7 +328,8 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
flags = KEY_ALLOC_IN_QUOTA;
keyring = keyring_alloc("_ses", cred->uid, cred->gid, cred,
- &session_keyring_acl, flags, NULL, NULL);
+ KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ,
+ flags, NULL, NULL);
if (IS_ERR(keyring))
return PTR_ERR(keyring);
} else {
@@ -643,7 +609,7 @@ bool lookup_user_key_possessed(const struct key *key,
* returned key reference.
*/
key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags,
- unsigned int desired_perm)
+ key_perm_t perm)
{
struct keyring_search_context ctx = {
.match_data.cmp = lookup_user_key_possessed,
@@ -818,12 +784,12 @@ try_again:
case -ERESTARTSYS:
goto invalid_key;
default:
- if (desired_perm)
+ if (perm)
goto invalid_key;
case 0:
break;
}
- } else if (desired_perm) {
+ } else if (perm) {
ret = key_validate(key);
if (ret < 0)
goto invalid_key;
@@ -835,11 +801,9 @@ try_again:
goto invalid_key;
/* check the permissions */
- if (desired_perm) {
- ret = key_task_permission(key_ref, ctx.cred, desired_perm);
- if (ret < 0)
- goto invalid_key;
- }
+ ret = key_task_permission(key_ref, ctx.cred, perm);
+ if (ret < 0)
+ goto invalid_key;
key->last_used_at = ktime_get_real_seconds();
@@ -904,13 +868,13 @@ long join_session_keyring(const char *name)
if (PTR_ERR(keyring) == -ENOKEY) {
/* not found - try and create a new one */
keyring = keyring_alloc(
- name, old->uid, old->gid, old, &joinable_keyring_acl,
+ name, old->uid, old->gid, old,
+ KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_LINK,
KEY_ALLOC_IN_QUOTA, NULL, NULL);
if (IS_ERR(keyring)) {
ret = PTR_ERR(keyring);
goto error2;
}
- goto no_perm_test;
} else if (IS_ERR(keyring)) {
ret = PTR_ERR(keyring);
goto error2;
@@ -919,12 +883,6 @@ long join_session_keyring(const char *name)
goto error3;
}
- ret = key_task_permission(make_key_ref(keyring, false), old,
- KEY_NEED_JOIN);
- if (ret < 0)
- goto error3;
-
-no_perm_test:
/* we've got a keyring - now to install it */
ret = install_session_keyring_to_cred(new, keyring);
if (ret < 0)
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 46c5187ce03f..7325f382dbf4 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -135,7 +135,8 @@ static int call_sbin_request_key(struct key *authkey, void *aux)
cred = get_current_cred();
keyring = keyring_alloc(desc, cred->fsuid, cred->fsgid, cred,
- NULL, KEY_ALLOC_QUOTA_OVERRUN, NULL, NULL);
+ KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ,
+ KEY_ALLOC_QUOTA_OVERRUN, NULL, NULL);
put_cred(cred);
if (IS_ERR(keyring)) {
ret = PTR_ERR(keyring);
@@ -366,11 +367,11 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
struct key *dest_keyring,
unsigned long flags,
struct key_user *user,
- struct key_acl *acl,
struct key **_key)
{
struct assoc_array_edit *edit = NULL;
struct key *key;
+ key_perm_t perm;
key_ref_t key_ref;
int ret;
@@ -380,9 +381,17 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
*_key = NULL;
mutex_lock(&user->cons_lock);
+ perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
+ perm |= KEY_USR_VIEW;
+ if (ctx->index_key.type->read)
+ perm |= KEY_POS_READ;
+ if (ctx->index_key.type == &key_type_keyring ||
+ ctx->index_key.type->update)
+ perm |= KEY_POS_WRITE;
+
key = key_alloc(ctx->index_key.type, ctx->index_key.description,
ctx->cred->fsuid, ctx->cred->fsgid, ctx->cred,
- acl, flags, NULL);
+ perm, flags, NULL);
if (IS_ERR(key))
goto alloc_failed;
@@ -465,7 +474,6 @@ static struct key *construct_key_and_link(struct keyring_search_context *ctx,
const char *callout_info,
size_t callout_len,
void *aux,
- struct key_acl *acl,
struct key *dest_keyring,
unsigned long flags)
{
@@ -488,7 +496,7 @@ static struct key *construct_key_and_link(struct keyring_search_context *ctx,
goto error_put_dest_keyring;
}
- ret = construct_alloc_key(ctx, dest_keyring, flags, user, acl, &key);
+ ret = construct_alloc_key(ctx, dest_keyring, flags, user, &key);
key_user_put(user);
if (ret == 0) {
@@ -526,7 +534,6 @@ error:
* @callout_info: The data to pass to the instantiation upcall (or NULL).
* @callout_len: The length of callout_info.
* @aux: Auxiliary data for the upcall.
- * @acl: The ACL to attach if a new key is created.
* @dest_keyring: Where to cache the key.
* @flags: Flags to key_alloc().
*
@@ -554,7 +561,6 @@ struct key *request_key_and_link(struct key_type *type,
const void *callout_info,
size_t callout_len,
void *aux,
- struct key_acl *acl,
struct key *dest_keyring,
unsigned long flags)
{
@@ -629,7 +635,7 @@ struct key *request_key_and_link(struct key_type *type,
goto error_free;
key = construct_key_and_link(&ctx, callout_info, callout_len,
- aux, acl, dest_keyring, flags);
+ aux, dest_keyring, flags);
}
error_free:
@@ -672,7 +678,6 @@ EXPORT_SYMBOL(wait_for_key_construction);
* @description: The searchable description of the key.
* @domain_tag: The domain in which the key operates.
* @callout_info: The data to pass to the instantiation upcall (or NULL).
- * @acl: The ACL to attach if a new key is created.
*
* As for request_key_and_link() except that it does not add the returned key
* to a keyring if found, new keys are always allocated in the user's quota,
@@ -685,8 +690,7 @@ EXPORT_SYMBOL(wait_for_key_construction);
struct key *request_key_tag(struct key_type *type,
const char *description,
struct key_tag *domain_tag,
- const char *callout_info,
- struct key_acl *acl)
+ const char *callout_info)
{
struct key *key;
size_t callout_len = 0;
@@ -696,7 +700,7 @@ struct key *request_key_tag(struct key_type *type,
callout_len = strlen(callout_info);
key = request_key_and_link(type, description, domain_tag,
callout_info, callout_len,
- NULL, acl, NULL, KEY_ALLOC_IN_QUOTA);
+ NULL, NULL, KEY_ALLOC_IN_QUOTA);
if (!IS_ERR(key)) {
ret = wait_for_key_construction(key, false);
if (ret < 0) {
@@ -716,7 +720,6 @@ EXPORT_SYMBOL(request_key_tag);
* @callout_info: The data to pass to the instantiation upcall (or NULL).
* @callout_len: The length of callout_info.
* @aux: Auxiliary data for the upcall.
- * @acl: The ACL to attach if a new key is created.
*
* As for request_key_and_link() except that it does not add the returned key
* to a keyring if found and new keys are always allocated in the user's quota.
@@ -729,15 +732,14 @@ struct key *request_key_with_auxdata(struct key_type *type,
struct key_tag *domain_tag,
const void *callout_info,
size_t callout_len,
- void *aux,
- struct key_acl *acl)
+ void *aux)
{
struct key *key;
int ret;
key = request_key_and_link(type, description, domain_tag,
callout_info, callout_len,
- aux, acl, NULL, KEY_ALLOC_IN_QUOTA);
+ aux, NULL, KEY_ALLOC_IN_QUOTA);
if (!IS_ERR(key)) {
ret = wait_for_key_construction(key, false);
if (ret < 0) {
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 27e437d94b81..e73ec040e250 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -24,17 +24,6 @@ static void request_key_auth_revoke(struct key *);
static void request_key_auth_destroy(struct key *);
static long request_key_auth_read(const struct key *, char __user *, size_t);
-static struct key_acl request_key_auth_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .possessor_viewable = true,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_SEARCH |
- KEY_ACE_LINK),
- KEY_OWNER_ACE(KEY_ACE_VIEW),
- }
-};
-
/*
* The request-key authorisation key type definition.
*/
@@ -221,8 +210,8 @@ struct key *request_key_auth_new(struct key *target, const char *op,
authkey = key_alloc(&key_type_request_key_auth, desc,
cred->fsuid, cred->fsgid, cred,
- &request_key_auth_acl,
- KEY_ALLOC_NOT_IN_QUOTA, NULL);
+ KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH | KEY_POS_LINK |
+ KEY_USR_VIEW, KEY_ALLOC_NOT_IN_QUOTA, NULL);
if (IS_ERR(authkey)) {
ret = PTR_ERR(authkey);
goto error_free_rka;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 4bef86ed463b..74dd46de01b6 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -6502,7 +6502,6 @@ static int selinux_key_permission(key_ref_t key_ref,
{
struct key *key;
struct key_security_struct *ksec;
- unsigned oldstyle_perm;
u32 sid;
/* if no specific permissions are requested, we skip the
@@ -6511,26 +6510,13 @@ static int selinux_key_permission(key_ref_t key_ref,
if (perm == 0)
return 0;
- oldstyle_perm = perm & (KEY_NEED_VIEW | KEY_NEED_READ | KEY_NEED_WRITE |
- KEY_NEED_SEARCH | KEY_NEED_LINK);
- if (perm & KEY_NEED_SETSEC)
- oldstyle_perm |= OLD_KEY_NEED_SETATTR;
- if (perm & KEY_NEED_INVAL)
- oldstyle_perm |= KEY_NEED_SEARCH;
- if (perm & KEY_NEED_REVOKE && !(perm & OLD_KEY_NEED_SETATTR))
- oldstyle_perm |= KEY_NEED_WRITE;
- if (perm & KEY_NEED_JOIN)
- oldstyle_perm |= KEY_NEED_SEARCH;
- if (perm & KEY_NEED_CLEAR)
- oldstyle_perm |= KEY_NEED_WRITE;
-
sid = cred_sid(cred);
key = key_ref_to_ptr(key_ref);
ksec = key->security;
return avc_has_perm(&selinux_state,
- sid, ksec->sid, SECCLASS_KEY, oldstyle_perm, NULL);
+ sid, ksec->sid, SECCLASS_KEY, perm, NULL);
}
static int selinux_key_getsecurity(struct key *key, char **_buffer)
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 50c536cad85b..4c5e5a438f8b 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -4284,8 +4284,7 @@ static int smack_key_permission(key_ref_t key_ref,
#endif
if (perm & (KEY_NEED_READ | KEY_NEED_SEARCH | KEY_NEED_VIEW))
request |= MAY_READ;
- if (perm & (KEY_NEED_WRITE | KEY_NEED_LINK | KEY_NEED_SETSEC |
- KEY_NEED_INVAL | KEY_NEED_REVOKE | KEY_NEED_CLEAR))
+ if (perm & (KEY_NEED_WRITE | KEY_NEED_LINK | KEY_NEED_SETATTR))
request |= MAY_WRITE;
rc = smk_access(tkp, keyp->security, request, &ad);
rc = smk_bu_note("key access", tkp, keyp->security, request, rc);
diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore
index 822a1e63d045..16d84d117bc0 100644
--- a/tools/testing/selftests/pidfd/.gitignore
+++ b/tools/testing/selftests/pidfd/.gitignore
@@ -1 +1,2 @@
+pidfd_open_test
pidfd_test
diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile
index 443fedbd6231..720b2d884b3c 100644
--- a/tools/testing/selftests/pidfd/Makefile
+++ b/tools/testing/selftests/pidfd/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
-CFLAGS += -g -I../../../../usr/include/
+CFLAGS += -g -I../../../../usr/include/ -lpthread
-TEST_GEN_PROGS := pidfd_test
+TEST_GEN_PROGS := pidfd_test pidfd_open_test
include ../lib.mk
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
new file mode 100644
index 000000000000..8452e910463f
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __PIDFD_H
+#define __PIDFD_H
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/mount.h>
+
+#include "../kselftest.h"
+
+/*
+ * The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c
+ * That means, when it wraps around any pid < 300 will be skipped.
+ * So we need to use a pid > 300 in order to test recycling.
+ */
+#define PID_RECYCLE 1000
+
+/*
+ * Define a few custom error codes for the child process to clearly indicate
+ * what is happening. This way we can tell the difference between a system
+ * error, a test error, etc.
+ */
+#define PIDFD_PASS 0
+#define PIDFD_FAIL 1
+#define PIDFD_ERROR 2
+#define PIDFD_SKIP 3
+#define PIDFD_XFAIL 4
+
+int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+
+#endif /* __PIDFD_H */
diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c
new file mode 100644
index 000000000000..0377133dd6dc
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_open_test.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "pidfd.h"
+#include "../kselftest.h"
+
+static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
+{
+ return syscall(__NR_pidfd_open, pid, flags);
+}
+
+static int safe_int(const char *numstr, int *converted)
+{
+ char *err = NULL;
+ long sli;
+
+ errno = 0;
+ sli = strtol(numstr, &err, 0);
+ if (errno == ERANGE && (sli == LONG_MAX || sli == LONG_MIN))
+ return -ERANGE;
+
+ if (errno != 0 && sli == 0)
+ return -EINVAL;
+
+ if (err == numstr || *err != '\0')
+ return -EINVAL;
+
+ if (sli > INT_MAX || sli < INT_MIN)
+ return -ERANGE;
+
+ *converted = (int)sli;
+ return 0;
+}
+
+static int char_left_gc(const char *buffer, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ if (buffer[i] == ' ' ||
+ buffer[i] == '\t')
+ continue;
+
+ return i;
+ }
+
+ return 0;
+}
+
+static int char_right_gc(const char *buffer, size_t len)
+{
+ int i;
+
+ for (i = len - 1; i >= 0; i--) {
+ if (buffer[i] == ' ' ||
+ buffer[i] == '\t' ||
+ buffer[i] == '\n' ||
+ buffer[i] == '\0')
+ continue;
+
+ return i + 1;
+ }
+
+ return 0;
+}
+
+static char *trim_whitespace_in_place(char *buffer)
+{
+ buffer += char_left_gc(buffer, strlen(buffer));
+ buffer[char_right_gc(buffer, strlen(buffer))] = '\0';
+ return buffer;
+}
+
+static pid_t get_pid_from_fdinfo_file(int pidfd, const char *key, size_t keylen)
+{
+ int ret;
+ char path[512];
+ FILE *f;
+ size_t n = 0;
+ pid_t result = -1;
+ char *line = NULL;
+
+ snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", pidfd);
+
+ f = fopen(path, "re");
+ if (!f)
+ return -1;
+
+ while (getline(&line, &n, f) != -1) {
+ char *numstr;
+
+ if (strncmp(line, key, keylen))
+ continue;
+
+ numstr = trim_whitespace_in_place(line + 4);
+ ret = safe_int(numstr, &result);
+ if (ret < 0)
+ goto out;
+
+ break;
+ }
+
+out:
+ free(line);
+ fclose(f);
+ return result;
+}
+
+int main(int argc, char **argv)
+{
+ int pidfd = -1, ret = 1;
+ pid_t pid;
+
+ ksft_set_plan(3);
+
+ pidfd = sys_pidfd_open(-1, 0);
+ if (pidfd >= 0) {
+ ksft_print_msg(
+ "%s - succeeded to open pidfd for invalid pid -1\n",
+ strerror(errno));
+ goto on_error;
+ }
+ ksft_test_result_pass("do not allow invalid pid test: passed\n");
+
+ pidfd = sys_pidfd_open(getpid(), 1);
+ if (pidfd >= 0) {
+ ksft_print_msg(
+ "%s - succeeded to open pidfd with invalid flag value specified\n",
+ strerror(errno));
+ goto on_error;
+ }
+ ksft_test_result_pass("do not allow invalid flag test: passed\n");
+
+ pidfd = sys_pidfd_open(getpid(), 0);
+ if (pidfd < 0) {
+ ksft_print_msg("%s - failed to open pidfd\n", strerror(errno));
+ goto on_error;
+ }
+ ksft_test_result_pass("open a new pidfd test: passed\n");
+
+ pid = get_pid_from_fdinfo_file(pidfd, "Pid:", sizeof("Pid:") - 1);
+ ksft_print_msg("pidfd %d refers to process with pid %d\n", pidfd, pid);
+
+ ret = 0;
+
+on_error:
+ if (pidfd >= 0)
+ close(pidfd);
+
+ return !ret ? ksft_exit_pass() : ksft_exit_fail();
+}
diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c
index 104c75a33882..7eaa8a3de262 100644
--- a/tools/testing/selftests/pidfd/pidfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_test.c
@@ -4,22 +4,49 @@
#include <errno.h>
#include <fcntl.h>
#include <linux/types.h>
+#include <pthread.h>
#include <sched.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <syscall.h>
+#include <sys/epoll.h>
+#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/wait.h>
+#include <time.h>
#include <unistd.h>
+#include "pidfd.h"
#include "../kselftest.h"
#ifndef __NR_pidfd_send_signal
#define __NR_pidfd_send_signal -1
#endif
+#define str(s) _str(s)
+#define _str(s) #s
+#define CHILD_THREAD_MIN_WAIT 3 /* seconds */
+
+#define MAX_EVENTS 5
+
+#ifndef CLONE_PIDFD
+#define CLONE_PIDFD 0x00001000
+#endif
+
+static pid_t pidfd_clone(int flags, int *pidfd, int (*fn)(void *))
+{
+ size_t stack_size = 1024;
+ char *stack[1024] = { 0 };
+
+#ifdef __ia64__
+ return __clone2(fn, stack, stack_size, flags | SIGCHLD, NULL, pidfd);
+#else
+ return clone(fn, stack + stack_size, flags | SIGCHLD, NULL, pidfd);
+#endif
+}
+
static inline int sys_pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
unsigned int flags)
{
@@ -66,28 +93,6 @@ static int test_pidfd_send_signal_simple_success(void)
return 0;
}
-static int wait_for_pid(pid_t pid)
-{
- int status, ret;
-
-again:
- ret = waitpid(pid, &status, 0);
- if (ret == -1) {
- if (errno == EINTR)
- goto again;
-
- return -1;
- }
-
- if (ret != pid)
- goto again;
-
- if (!WIFEXITED(status))
- return -1;
-
- return WEXITSTATUS(status);
-}
-
static int test_pidfd_send_signal_exited_fail(void)
{
int pidfd, ret, saved_errno;
@@ -133,13 +138,6 @@ static int test_pidfd_send_signal_exited_fail(void)
}
/*
- * The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c
- * That means, when it wraps around any pid < 300 will be skipped.
- * So we need to use a pid > 300 in order to test recycling.
- */
-#define PID_RECYCLE 1000
-
-/*
* Maximum number of cycles we allow. This is equivalent to PID_MAX_DEFAULT.
* If users set a higher limit or we have cycled PIDFD_MAX_DEFAULT number of
* times then we skip the test to not go into an infinite loop or block for a
@@ -147,17 +145,6 @@ static int test_pidfd_send_signal_exited_fail(void)
*/
#define PIDFD_MAX_DEFAULT 0x8000
-/*
- * Define a few custom error codes for the child process to clearly indicate
- * what is happening. This way we can tell the difference between a system
- * error, a test error, etc.
- */
-#define PIDFD_PASS 0
-#define PIDFD_FAIL 1
-#define PIDFD_ERROR 2
-#define PIDFD_SKIP 3
-#define PIDFD_XFAIL 4
-
static int test_pidfd_send_signal_recycled_pid_fail(void)
{
int i, ret;
@@ -372,11 +359,192 @@ static int test_pidfd_send_signal_syscall_support(void)
return 0;
}
+static void *test_pidfd_poll_exec_thread(void *priv)
+{
+ ksft_print_msg("Child Thread: starting. pid %d tid %d ; and sleeping\n",
+ getpid(), syscall(SYS_gettid));
+ ksft_print_msg("Child Thread: doing exec of sleep\n");
+
+ execl("/bin/sleep", "sleep", str(CHILD_THREAD_MIN_WAIT), (char *)NULL);
+
+ ksft_print_msg("Child Thread: DONE. pid %d tid %d\n",
+ getpid(), syscall(SYS_gettid));
+ return NULL;
+}
+
+static void poll_pidfd(const char *test_name, int pidfd)
+{
+ int c;
+ int epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+ struct epoll_event event, events[MAX_EVENTS];
+
+ if (epoll_fd == -1)
+ ksft_exit_fail_msg("%s test: Failed to create epoll file descriptor "
+ "(errno %d)\n",
+ test_name, errno);
+
+ event.events = EPOLLIN;
+ event.data.fd = pidfd;
+
+ if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, pidfd, &event)) {
+ ksft_exit_fail_msg("%s test: Failed to add epoll file descriptor "
+ "(errno %d)\n",
+ test_name, errno);
+ }
+
+ c = epoll_wait(epoll_fd, events, MAX_EVENTS, 5000);
+ if (c != 1 || !(events[0].events & EPOLLIN))
+ ksft_exit_fail_msg("%s test: Unexpected epoll_wait result (c=%d, events=%x) ",
+ "(errno %d)\n",
+ test_name, c, events[0].events, errno);
+
+ close(epoll_fd);
+ return;
+
+}
+
+static int child_poll_exec_test(void *args)
+{
+ pthread_t t1;
+
+ ksft_print_msg("Child (pidfd): starting. pid %d tid %d\n", getpid(),
+ syscall(SYS_gettid));
+ pthread_create(&t1, NULL, test_pidfd_poll_exec_thread, NULL);
+ /*
+ * Exec in the non-leader thread will destroy the leader immediately.
+ * If the wait in the parent returns too soon, the test fails.
+ */
+ while (1)
+ sleep(1);
+}
+
+static void test_pidfd_poll_exec(int use_waitpid)
+{
+ int pid, pidfd = 0;
+ int status, ret;
+ pthread_t t1;
+ time_t prog_start = time(NULL);
+ const char *test_name = "pidfd_poll check for premature notification on child thread exec";
+
+ ksft_print_msg("Parent: pid: %d\n", getpid());
+ pid = pidfd_clone(CLONE_PIDFD, &pidfd, child_poll_exec_test);
+ if (pid < 0)
+ ksft_exit_fail_msg("%s test: pidfd_clone failed (ret %d, errno %d)\n",
+ test_name, pid, errno);
+
+ ksft_print_msg("Parent: Waiting for Child (%d) to complete.\n", pid);
+
+ if (use_waitpid) {
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1)
+ ksft_print_msg("Parent: error\n");
+
+ if (ret == pid)
+ ksft_print_msg("Parent: Child process waited for.\n");
+ } else {
+ poll_pidfd(test_name, pidfd);
+ }
+
+ time_t prog_time = time(NULL) - prog_start;
+
+ ksft_print_msg("Time waited for child: %lu\n", prog_time);
+
+ close(pidfd);
+
+ if (prog_time < CHILD_THREAD_MIN_WAIT || prog_time > CHILD_THREAD_MIN_WAIT + 2)
+ ksft_exit_fail_msg("%s test: Failed\n", test_name);
+ else
+ ksft_test_result_pass("%s test: Passed\n", test_name);
+}
+
+static void *test_pidfd_poll_leader_exit_thread(void *priv)
+{
+ ksft_print_msg("Child Thread: starting. pid %d tid %d ; and sleeping\n",
+ getpid(), syscall(SYS_gettid));
+ sleep(CHILD_THREAD_MIN_WAIT);
+ ksft_print_msg("Child Thread: DONE. pid %d tid %d\n", getpid(), syscall(SYS_gettid));
+ return NULL;
+}
+
+static time_t *child_exit_secs;
+static int child_poll_leader_exit_test(void *args)
+{
+ pthread_t t1, t2;
+
+ ksft_print_msg("Child: starting. pid %d tid %d\n", getpid(), syscall(SYS_gettid));
+ pthread_create(&t1, NULL, test_pidfd_poll_leader_exit_thread, NULL);
+ pthread_create(&t2, NULL, test_pidfd_poll_leader_exit_thread, NULL);
+
+ /*
+ * glibc exit calls exit_group syscall, so explicity call exit only
+ * so that only the group leader exits, leaving the threads alone.
+ */
+ *child_exit_secs = time(NULL);
+ syscall(SYS_exit, 0);
+}
+
+static void test_pidfd_poll_leader_exit(int use_waitpid)
+{
+ int pid, pidfd = 0;
+ int status, ret;
+ time_t prog_start = time(NULL);
+ const char *test_name = "pidfd_poll check for premature notification on non-empty"
+ "group leader exit";
+
+ child_exit_secs = mmap(NULL, sizeof *child_exit_secs, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+
+ if (child_exit_secs == MAP_FAILED)
+ ksft_exit_fail_msg("%s test: mmap failed (errno %d)\n",
+ test_name, errno);
+
+ ksft_print_msg("Parent: pid: %d\n", getpid());
+ pid = pidfd_clone(CLONE_PIDFD, &pidfd, child_poll_leader_exit_test);
+ if (pid < 0)
+ ksft_exit_fail_msg("%s test: pidfd_clone failed (ret %d, errno %d)\n",
+ test_name, pid, errno);
+
+ ksft_print_msg("Parent: Waiting for Child (%d) to complete.\n", pid);
+
+ if (use_waitpid) {
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1)
+ ksft_print_msg("Parent: error\n");
+ } else {
+ /*
+ * This sleep tests for the case where if the child exits, and is in
+ * EXIT_ZOMBIE, but the thread group leader is non-empty, then the poll
+ * doesn't prematurely return even though there are active threads
+ */
+ sleep(1);
+ poll_pidfd(test_name, pidfd);
+ }
+
+ if (ret == pid)
+ ksft_print_msg("Parent: Child process waited for.\n");
+
+ time_t since_child_exit = time(NULL) - *child_exit_secs;
+
+ ksft_print_msg("Time since child exit: %lu\n", since_child_exit);
+
+ close(pidfd);
+
+ if (since_child_exit < CHILD_THREAD_MIN_WAIT ||
+ since_child_exit > CHILD_THREAD_MIN_WAIT + 2)
+ ksft_exit_fail_msg("%s test: Failed\n", test_name);
+ else
+ ksft_test_result_pass("%s test: Passed\n", test_name);
+}
+
int main(int argc, char **argv)
{
ksft_print_header();
ksft_set_plan(4);
+ test_pidfd_poll_exec(0);
+ test_pidfd_poll_exec(1);
+ test_pidfd_poll_leader_exit(0);
+ test_pidfd_poll_leader_exit(1);
test_pidfd_send_signal_syscall_support();
test_pidfd_send_signal_simple_success();
test_pidfd_send_signal_exited_fail();