aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CREDITS4
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst20
-rw-r--r--Documentation/arm64/memory.rst9
-rw-r--r--Documentation/core-api/index.rst1
-rw-r--r--Documentation/core-api/memory-allocation.rst4
-rw-r--r--Documentation/core-api/symbol-namespaces.rst (renamed from Documentation/kbuild/namespaces.rst)0
-rw-r--r--Documentation/dev-tools/kselftest.rst16
-rw-r--r--Documentation/process/coding-style.rst2
-rw-r--r--Documentation/process/deprecated.rst33
-rw-r--r--MAINTAINERS6
-rw-r--r--Makefile7
-rw-r--r--arch/arm/crypto/Kconfig1
-rw-r--r--arch/arm/crypto/aes-ce-core.S1
-rw-r--r--arch/arm64/Kconfig15
-rw-r--r--arch/arm64/Makefile16
-rw-r--r--arch/arm64/include/asm/atomic_lse.h6
-rw-r--r--arch/arm64/include/asm/vdso/compat_barrier.h2
-rw-r--r--arch/arm64/include/asm/vdso_datapage.h33
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c5
-rw-r--r--arch/arm64/kernel/cpu_errata.c4
-rw-r--r--arch/arm64/kernel/cpufeature.c1
-rw-r--r--arch/arm64/kernel/entry.S1
-rw-r--r--arch/arm64/kernel/ftrace.c12
-rw-r--r--arch/arm64/kernel/process.c32
-rw-r--r--arch/arm64/kernel/vdso/gettimeofday.S0
-rw-r--r--arch/arm64/kernel/vdso32/Makefile44
-rw-r--r--arch/arm64/mm/fault.c13
-rw-r--r--arch/x86/include/asm/uaccess.h23
-rw-r--r--block/blk-rq-qos.c14
-rw-r--r--block/blk-rq-qos.h4
-rw-r--r--block/blk-wbt.c6
-rw-r--r--drivers/ata/libata-scsi.c21
-rw-r--r--drivers/block/nbd.c2
-rw-r--r--drivers/block/null_blk_zoned.c3
-rw-r--r--drivers/gpio/gpio-eic-sprd.c7
-rw-r--r--drivers/gpio/gpio-max77620.c6
-rw-r--r--drivers/gpio/gpiolib-of.c2
-rw-r--r--drivers/gpio/gpiolib.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c14
-rw-r--r--drivers/gpu/drm/bridge/tc358767.c7
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c15
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.c12
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pm.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c16
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c101
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ringbuffer.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c3
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c5
-rw-r--r--drivers/gpu/drm/i915/i915_gem.h6
-rw-r--r--drivers/gpu/drm/i915/i915_request.c69
-rw-r--r--drivers/gpu/drm/i915/i915_request.h2
-rw-r--r--drivers/gpu/drm/i915/intel_pch.c1
-rw-r--r--drivers/gpu/drm/i915/intel_pch.h1
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem.c6
-rw-r--r--drivers/gpu/drm/panel/panel-lg-lb035q02.c9
-rw-r--r--drivers/gpu/drm/panel/panel-nec-nl8048hl11.c9
-rw-r--r--drivers/gpu/drm/panel/panel-sony-acx565akm.c9
-rw-r--r--drivers/gpu/drm/panel/panel-tpo-td028ttec1.c3
-rw-r--r--drivers/gpu/drm/panel/panel-tpo-td043mtea1.c9
-rw-r--r--drivers/hid/hid-hyperv.c56
-rw-r--r--drivers/hv/vmbus_drv.c6
-rw-r--r--drivers/infiniband/core/cm.c3
-rw-r--r--drivers/infiniband/core/cma.c3
-rw-r--r--drivers/infiniband/core/device.c9
-rw-r--r--drivers/infiniband/core/nldev.c12
-rw-r--r--drivers/infiniband/core/security.c2
-rw-r--r--drivers/infiniband/core/umem_odp.c6
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c7
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c28
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c10
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c5
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c4
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c58
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h3
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c68
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c58
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c2
-rw-r--r--drivers/infiniband/sw/siw/siw_qp.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mr.c8
-rw-r--r--fs/btrfs/file.c13
-rw-r--r--fs/btrfs/inode.c3
-rw-r--r--fs/btrfs/ref-verify.c2
-rw-r--r--fs/btrfs/send.c2
-rw-r--r--fs/btrfs/tree-log.c36
-rw-r--r--fs/btrfs/volumes.c6
-rw-r--r--fs/cifs/cifsfs.c24
-rw-r--r--fs/cifs/cifsglob.h2
-rw-r--r--fs/cifs/connect.c4
-rw-r--r--fs/cifs/dir.c8
-rw-r--r--fs/cifs/file.c33
-rw-r--r--fs/cifs/inode.c4
-rw-r--r--fs/cifs/netmisc.c4
-rw-r--r--fs/cifs/smb2pdu.c14
-rw-r--r--fs/cifs/smb2proto.h4
-rw-r--r--fs/fs-writeback.c9
-rw-r--r--fs/io_uring.c24
-rw-r--r--fs/libfs.c137
-rw-r--r--fs/nfs/direct.c106
-rw-r--r--fs/nfs/nfs4proc.c1
-rw-r--r--fs/nfs/write.c5
-rw-r--r--fs/ocfs2/aops.c25
-rw-r--r--fs/ocfs2/ioctl.c2
-rw-r--r--fs/ocfs2/xattr.c56
-rw-r--r--fs/readdir.c44
-rw-r--r--fs/super.c5
-rw-r--r--fs/xfs/libxfs/xfs_ag.c5
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c21
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c6
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h3
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c2
-rw-r--r--fs/xfs/scrub/refcount.c3
-rw-r--r--fs/xfs/xfs_bmap_util.c4
-rw-r--r--fs/xfs/xfs_buf.c12
-rw-r--r--fs/xfs/xfs_log.c2
-rw-r--r--fs/xfs/xfs_log_recover.c2
-rw-r--r--include/linux/compiler_attributes.h17
-rw-r--r--include/linux/export.h10
-rw-r--r--include/linux/leds.h5
-rw-r--r--include/linux/memcontrol.h29
-rw-r--r--include/linux/slab.h4
-rw-r--r--include/linux/sunrpc/xprtsock.h1
-rw-r--r--include/linux/uaccess.h6
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/freezer.c6
-rw-r--r--kernel/panic.c1
-rw-r--r--lib/vdso/Kconfig9
-rw-r--r--mm/backing-dev.c4
-rw-r--r--mm/memcontrol.c5
-rw-r--r--mm/memremap.c2
-rw-r--r--mm/page_alloc.c8
-rw-r--r--mm/shmem.c6
-rw-r--r--mm/shuffle.c2
-rw-r--r--mm/slab_common.c19
-rw-r--r--mm/slob.c62
-rw-r--r--mm/slub.c14
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/vmpressure.c20
-rw-r--r--mm/vmscan.c72
-rw-r--r--mm/z3fold.c10
-rw-r--r--net/sctp/sm_make_chunk.c12
-rw-r--r--net/sunrpc/xprtsock.c17
-rw-r--r--scripts/coccinelle/misc/add_namespace.cocci2
-rw-r--r--scripts/mod/modpost.c29
-rw-r--r--scripts/nsdeps4
-rw-r--r--security/selinux/ss/services.c9
-rw-r--r--tools/testing/selftests/Makefile19
-rw-r--r--tools/testing/selftests/kselftest/runner.sh36
-rwxr-xr-xtools/testing/selftests/kselftest_install.sh4
-rw-r--r--tools/testing/selftests/rtc/settings1
-rw-r--r--tools/testing/selftests/watchdog/watchdog-test.c27
153 files changed, 1321 insertions, 886 deletions
diff --git a/CREDITS b/CREDITS
index 8b67a85844b5..031605d46b4d 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1637,6 +1637,10 @@ S: Panoramastrasse 18
S: D-69126 Heidelberg
S: Germany
+N: Simon Horman
+M: horms@verge.net.au
+D: Renesas ARM/ARM64 SoC maintainer
+
N: Christopher Horn
E: chorn@warwick.net
D: Miscellaneous sysctl hacks
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 0fa8c0e615c2..5361ebec3361 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -615,8 +615,8 @@ on an IO device and is an example of this type.
Protections
-----------
-A cgroup is protected to be allocated upto the configured amount of
-the resource if the usages of all its ancestors are under their
+A cgroup is protected upto the configured amount of the resource
+as long as the usages of all its ancestors are under their
protected levels. Protections can be hard guarantees or best effort
soft boundaries. Protections can also be over-committed in which case
only upto the amount available to the parent is protected among
@@ -1096,7 +1096,10 @@ PAGE_SIZE multiple when read back.
is within its effective min boundary, the cgroup's memory
won't be reclaimed under any conditions. If there is no
unprotected reclaimable memory available, OOM killer
- is invoked.
+ is invoked. Above the effective min boundary (or
+ effective low boundary if it is higher), pages are reclaimed
+ proportionally to the overage, reducing reclaim pressure for
+ smaller overages.
Effective min boundary is limited by memory.min values of
all ancestor cgroups. If there is memory.min overcommitment
@@ -1118,7 +1121,10 @@ PAGE_SIZE multiple when read back.
Best-effort memory protection. If the memory usage of a
cgroup is within its effective low boundary, the cgroup's
memory won't be reclaimed unless memory can be reclaimed
- from unprotected cgroups.
+ from unprotected cgroups. Above the effective low boundary (or
+ effective min boundary if it is higher), pages are reclaimed
+ proportionally to the overage, reducing reclaim pressure for
+ smaller overages.
Effective low boundary is limited by memory.low values of
all ancestor cgroups. If there is memory.low overcommitment
@@ -2482,8 +2488,10 @@ system performance due to overreclaim, to the point where the feature
becomes self-defeating.
The memory.low boundary on the other hand is a top-down allocated
-reserve. A cgroup enjoys reclaim protection when it's within its low,
-which makes delegation of subtrees possible.
+reserve. A cgroup enjoys reclaim protection when it's within its
+effective low, which makes delegation of subtrees possible. It also
+enjoys having reclaim pressure proportional to its overage when
+above its effective low.
The original high boundary, the hard limit, is defined as a strict
limit that can not budge, even if the OOM killer has to be called.
diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst
index b040909e45f8..02e02175e6f5 100644
--- a/Documentation/arm64/memory.rst
+++ b/Documentation/arm64/memory.rst
@@ -154,11 +154,18 @@ return virtual addresses to userspace from a 48-bit range.
Software can "opt-in" to receiving VAs from a 52-bit space by
specifying an mmap hint parameter that is larger than 48-bit.
+
For example:
- maybe_high_address = mmap(~0UL, size, prot, flags,...);
+
+.. code-block:: c
+
+ maybe_high_address = mmap(~0UL, size, prot, flags,...);
It is also possible to build a debug kernel that returns addresses
from a 52-bit space by enabling the following kernel config options:
+
+.. code-block:: sh
+
CONFIG_EXPERT=y && CONFIG_ARM64_FORCE_52BIT=y
Note that this option is only intended for debugging applications
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index fa16a0538dcb..ab0eae1c153a 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -38,6 +38,7 @@ Core utilities
protection-keys
../RCU/index
gcc-plugins
+ symbol-namespaces
Interfaces for kernel debugging
diff --git a/Documentation/core-api/memory-allocation.rst b/Documentation/core-api/memory-allocation.rst
index 7744aa3bf2e0..939e3dfc86e9 100644
--- a/Documentation/core-api/memory-allocation.rst
+++ b/Documentation/core-api/memory-allocation.rst
@@ -98,6 +98,10 @@ limited. The actual limit depends on the hardware and the kernel
configuration, but it is a good practice to use `kmalloc` for objects
smaller than page size.
+The address of a chunk allocated with `kmalloc` is aligned to at least
+ARCH_KMALLOC_MINALIGN bytes. For sizes which are a power of two, the
+alignment is also guaranteed to be at least the respective size.
+
For large allocations you can use :c:func:`vmalloc` and
:c:func:`vzalloc`, or directly request pages from the page
allocator. The memory allocated by `vmalloc` and related functions is
diff --git a/Documentation/kbuild/namespaces.rst b/Documentation/core-api/symbol-namespaces.rst
index 982ed7b568ac..982ed7b568ac 100644
--- a/Documentation/kbuild/namespaces.rst
+++ b/Documentation/core-api/symbol-namespaces.rst
diff --git a/Documentation/dev-tools/kselftest.rst b/Documentation/dev-tools/kselftest.rst
index 25604904fa6e..ecdfdc9d4b03 100644
--- a/Documentation/dev-tools/kselftest.rst
+++ b/Documentation/dev-tools/kselftest.rst
@@ -89,6 +89,22 @@ To build, save output files in a separate directory with KBUILD_OUTPUT ::
$ export KBUILD_OUTPUT=/tmp/kselftest; make TARGETS="size timers" kselftest
+Additionally you can use the "SKIP_TARGETS" variable on the make command
+line to specify one or more targets to exclude from the TARGETS list.
+
+To run all tests but a single subsystem::
+
+ $ make -C tools/testing/selftests SKIP_TARGETS=ptrace run_tests
+
+You can specify multiple tests to skip::
+
+ $ make SKIP_TARGETS="size timers" kselftest
+
+You can also specify a restricted list of tests to run together with a
+dedicated skiplist::
+
+ $ make TARGETS="bpf breakpoints size timers" SKIP_TARGETS=bpf kselftest
+
See the top-level tools/testing/selftests/Makefile for the list of all
possible targets.
diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst
index f4a2198187f9..ada573b7d703 100644
--- a/Documentation/process/coding-style.rst
+++ b/Documentation/process/coding-style.rst
@@ -56,7 +56,7 @@ instead of ``double-indenting`` the ``case`` labels. E.g.:
case 'K':
case 'k':
mem <<= 10;
- /* fall through */
+ fallthrough;
default:
break;
}
diff --git a/Documentation/process/deprecated.rst b/Documentation/process/deprecated.rst
index 053b24a6dd38..179f2a5625a0 100644
--- a/Documentation/process/deprecated.rst
+++ b/Documentation/process/deprecated.rst
@@ -122,14 +122,27 @@ memory adjacent to the stack (when built without `CONFIG_VMAP_STACK=y`)
Implicit switch case fall-through
---------------------------------
-The C language allows switch cases to "fall through" when
-a "break" statement is missing at the end of a case. This,
-however, introduces ambiguity in the code, as it's not always
-clear if the missing break is intentional or a bug. As there
-have been a long list of flaws `due to missing "break" statements
+The C language allows switch cases to "fall-through" when a "break" statement
+is missing at the end of a case. This, however, introduces ambiguity in the
+code, as it's not always clear if the missing break is intentional or a bug.
+
+As there have been a long list of flaws `due to missing "break" statements
<https://cwe.mitre.org/data/definitions/484.html>`_, we no longer allow
-"implicit fall-through". In order to identify an intentional fall-through
-case, we have adopted the marking used by static analyzers: a comment
-saying `/* Fall through */`. Once the C++17 `__attribute__((fallthrough))`
-is more widely handled by C compilers, static analyzers, and IDEs, we can
-switch to using that instead.
+"implicit fall-through".
+
+In order to identify intentional fall-through cases, we have adopted a
+pseudo-keyword macro 'fallthrough' which expands to gcc's extension
+__attribute__((__fallthrough__)). `Statement Attributes
+<https://gcc.gnu.org/onlinedocs/gcc/Statement-Attributes.html>`_
+
+When the C17/C18 [[fallthrough]] syntax is more commonly supported by
+C compilers, static analyzers, and IDEs, we can switch to using that syntax
+for the macro pseudo-keyword.
+
+All switch/case blocks must end in one of:
+
+ break;
+ fallthrough;
+ continue;
+ goto <label>;
+ return [expression];
diff --git a/MAINTAINERS b/MAINTAINERS
index 55199ef7fa74..e9b0a4100a29 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2165,12 +2165,10 @@ F: arch/arm64/boot/dts/realtek/
F: Documentation/devicetree/bindings/arm/realtek.yaml
ARM/RENESAS ARM64 ARCHITECTURE
-M: Simon Horman <horms@verge.net.au>
M: Geert Uytterhoeven <geert+renesas@glider.be>
M: Magnus Damm <magnus.damm@gmail.com>
L: linux-renesas-soc@vger.kernel.org
Q: http://patchwork.kernel.org/project/linux-renesas-soc/list/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next
T: git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel.git next
S: Supported
F: arch/arm64/boot/dts/renesas/
@@ -2282,12 +2280,10 @@ S: Maintained
F: drivers/media/platform/s5p-mfc/
ARM/SHMOBILE ARM ARCHITECTURE
-M: Simon Horman <horms@verge.net.au>
M: Geert Uytterhoeven <geert+renesas@glider.be>
M: Magnus Damm <magnus.damm@gmail.com>
L: linux-renesas-soc@vger.kernel.org
Q: http://patchwork.kernel.org/project/linux-renesas-soc/list/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next
T: git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel.git next
S: Supported
F: arch/arm/boot/dts/emev2*
@@ -9187,6 +9183,7 @@ M: Pavel Machek <pavel@ucw.cz>
R: Dan Murphy <dmurphy@ti.com>
L: linux-leds@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski/linux-leds.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/pavel/linux-leds.git
S: Maintained
F: Documentation/devicetree/bindings/leds/
F: drivers/leds/
@@ -11547,6 +11544,7 @@ NSDEPS
M: Matthias Maennich <maennich@google.com>
S: Maintained
F: scripts/nsdeps
+F: Documentation/core-api/symbol-namespaces.rst
NTB AMD DRIVER
M: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
diff --git a/Makefile b/Makefile
index f47dfdec7086..f87a5d6ab78b 100644
--- a/Makefile
+++ b/Makefile
@@ -599,7 +599,7 @@ endif
# in addition to whatever we do anyway.
# Just "make" or "make all" shall build modules as well
-ifneq ($(filter all _all modules,$(MAKECMDGOALS)),)
+ifneq ($(filter all _all modules nsdeps,$(MAKECMDGOALS)),)
KBUILD_MODULES := 1
endif
@@ -1217,9 +1217,8 @@ PHONY += kselftest
kselftest:
$(Q)$(MAKE) -C $(srctree)/tools/testing/selftests run_tests
-PHONY += kselftest-clean
-kselftest-clean:
- $(Q)$(MAKE) -C $(srctree)/tools/testing/selftests clean
+kselftest-%: FORCE
+ $(Q)$(MAKE) -C $(srctree)/tools/testing/selftests $*
PHONY += kselftest-merge
kselftest-merge:
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index b24df84a1d7a..043b0b18bf7e 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -98,6 +98,7 @@ config CRYPTO_AES_ARM_CE
tristate "Accelerated AES using ARMv8 Crypto Extensions"
depends on KERNEL_MODE_NEON
select CRYPTO_BLKCIPHER
+ select CRYPTO_LIB_AES
select CRYPTO_SIMD
help
Use an implementation of AES in CBC, CTR and XTS modes that uses
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S
index b978cdf133af..4d1707388d94 100644
--- a/arch/arm/crypto/aes-ce-core.S
+++ b/arch/arm/crypto/aes-ce-core.S
@@ -9,6 +9,7 @@
#include <asm/assembler.h>
.text
+ .arch armv8-a
.fpu crypto-neon-fp-armv8
.align 3
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 41a9b4257b72..950a56b71ff0 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -110,7 +110,6 @@ config ARM64
select GENERIC_STRNLEN_USER
select GENERIC_TIME_VSYSCALL
select GENERIC_GETTIMEOFDAY
- select GENERIC_COMPAT_VDSO if (!CPU_BIG_ENDIAN && COMPAT)
select HANDLE_DOMAIN_IRQ
select HARDIRQS_SW_RESEND
select HAVE_PCI
@@ -1159,7 +1158,7 @@ menuconfig COMPAT
if COMPAT
config KUSER_HELPERS
- bool "Enable kuser helpers page for 32 bit applications"
+ bool "Enable kuser helpers page for 32-bit applications"
default y
help
Warning: disabling this option may break 32-bit user programs.
@@ -1185,6 +1184,18 @@ config KUSER_HELPERS
Say N here only if you are absolutely certain that you do not
need these helpers; otherwise, the safe option is to say Y.
+config COMPAT_VDSO
+ bool "Enable vDSO for 32-bit applications"
+ depends on !CPU_BIG_ENDIAN && "$(CROSS_COMPILE_COMPAT)" != ""
+ select GENERIC_COMPAT_VDSO
+ default y
+ help
+ Place in the process address space of 32-bit applications an
+ ELF shared object providing fast implementations of gettimeofday
+ and clock_gettime.
+
+ You must have a 32-bit build of glibc 2.22 or later for programs
+ to seamlessly take advantage of this.
menuconfig ARMV8_DEPRECATED
bool "Emulate deprecated/obsolete ARMv8 instructions"
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 84a3d502c5a5..2c0238ce0551 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -53,22 +53,6 @@ $(warning Detected assembler with broken .inst; disassembly will be unreliable)
endif
endif
-ifeq ($(CONFIG_GENERIC_COMPAT_VDSO), y)
- CROSS_COMPILE_COMPAT ?= $(CONFIG_CROSS_COMPILE_COMPAT_VDSO:"%"=%)
-
- ifeq ($(CONFIG_CC_IS_CLANG), y)
- $(warning CROSS_COMPILE_COMPAT is clang, the compat vDSO will not be built)
- else ifeq ($(strip $(CROSS_COMPILE_COMPAT)),)
- $(warning CROSS_COMPILE_COMPAT not defined or empty, the compat vDSO will not be built)
- else ifeq ($(shell which $(CROSS_COMPILE_COMPAT)gcc 2> /dev/null),)
- $(error $(CROSS_COMPILE_COMPAT)gcc not found, check CROSS_COMPILE_COMPAT)
- else
- export CROSS_COMPILE_COMPAT
- export CONFIG_COMPAT_VDSO := y
- compat_vdso := -DCONFIG_COMPAT_VDSO=1
- endif
-endif
-
KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) \
$(compat_vdso) $(cc_has_k_constraint)
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index c6bd87d2915b..574808b9df4c 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -321,7 +321,8 @@ static inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v)
}
#define __CMPXCHG_CASE(w, sfx, name, sz, mb, cl...) \
-static inline u##sz __lse__cmpxchg_case_##name##sz(volatile void *ptr, \
+static __always_inline u##sz \
+__lse__cmpxchg_case_##name##sz(volatile void *ptr, \
u##sz old, \
u##sz new) \
{ \
@@ -362,7 +363,8 @@ __CMPXCHG_CASE(x, , mb_, 64, al, "memory")
#undef __CMPXCHG_CASE
#define __CMPXCHG_DBL(name, mb, cl...) \
-static inline long __lse__cmpxchg_double##name(unsigned long old1, \
+static __always_inline long \
+__lse__cmpxchg_double##name(unsigned long old1, \
unsigned long old2, \
unsigned long new1, \
unsigned long new2, \
diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h
index fb60a88b5ed4..3fd8fd6d8fc2 100644
--- a/arch/arm64/include/asm/vdso/compat_barrier.h
+++ b/arch/arm64/include/asm/vdso/compat_barrier.h
@@ -20,7 +20,7 @@
#define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory")
-#if __LINUX_ARM_ARCH__ >= 8
+#if __LINUX_ARM_ARCH__ >= 8 && defined(CONFIG_AS_DMB_ISHLD)
#define aarch32_smp_mb() dmb(ish)
#define aarch32_smp_rmb() dmb(ishld)
#define aarch32_smp_wmb() dmb(ishst)
diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h
deleted file mode 100644
index 1f38bf330a6e..000000000000
--- a/arch/arm64/include/asm/vdso_datapage.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 ARM Limited
- */
-#ifndef __ASM_VDSO_DATAPAGE_H
-#define __ASM_VDSO_DATAPAGE_H
-
-#ifndef __ASSEMBLY__
-
-struct vdso_data {
- __u64 cs_cycle_last; /* Timebase at clocksource init */
- __u64 raw_time_sec; /* Raw time */
- __u64 raw_time_nsec;
- __u64 xtime_clock_sec; /* Kernel time */
- __u64 xtime_clock_nsec;
- __u64 xtime_coarse_sec; /* Coarse time */
- __u64 xtime_coarse_nsec;
- __u64 wtm_clock_sec; /* Wall to monotonic time */
- __u64 wtm_clock_nsec;
- __u32 tb_seq_count; /* Timebase sequence counter */
- /* cs_* members must be adjacent and in this order (ldp accesses) */
- __u32 cs_mono_mult; /* NTP-adjusted clocksource multiplier */
- __u32 cs_shift; /* Clocksource shift (mono = raw) */
- __u32 cs_raw_mult; /* Raw clocksource multiplier */
- __u32 tz_minuteswest; /* Whacky timezone stuff */
- __u32 tz_dsttime;
- __u32 use_syscall;
- __u32 hrtimer_res;
-};
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* __ASM_VDSO_DATAPAGE_H */
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 2ec09debc2bb..ca158be21f83 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -174,6 +174,9 @@ static void __init register_insn_emulation(struct insn_emulation_ops *ops)
struct insn_emulation *insn;
insn = kzalloc(sizeof(*insn), GFP_KERNEL);
+ if (!insn)
+ return;
+
insn->ops = ops;
insn->min = INSN_UNDEF;
@@ -233,6 +236,8 @@ static void __init register_insn_emulation_sysctl(void)
insns_sysctl = kcalloc(nr_insn_emulated + 1, sizeof(*sysctl),
GFP_KERNEL);
+ if (!insns_sysctl)
+ return;
raw_spin_lock_irqsave(&insn_emulation_lock, flags);
list_for_each_entry(insn, &insn_emulation, node) {
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 1e43ba5c79b7..f593f4cffc0d 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -128,8 +128,8 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn,
int cpu, slot = -1;
/*
- * enable_smccc_arch_workaround_1() passes NULL for the hyp_vecs
- * start/end if we're a guest. Skip the hyp-vectors work.
+ * detect_harden_bp_fw() passes NULL for the hyp_vecs start/end if
+ * we're a guest. Skip the hyp-vectors work.
*/
if (!hyp_vecs_start) {
__this_cpu_write(bp_hardening_data.fn, fn);
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 9323bcc40a58..cabebf1a7976 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -136,6 +136,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FRINTTS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_GPI_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 84a822748c84..e304fe04b098 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -775,6 +775,7 @@ el0_sync_compat:
b.ge el0_dbg
b el0_inv
el0_svc_compat:
+ gic_prio_kentry_setup tmp=x1
mov x0, sp
bl el0_svc_compat_handler
b ret_to_user
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 171773257974..06e56b470315 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -121,10 +121,16 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
/*
* Ensure updated trampoline is visible to instruction
- * fetch before we patch in the branch.
+ * fetch before we patch in the branch. Although the
+ * architecture doesn't require an IPI in this case,
+ * Neoverse-N1 erratum #1542419 does require one
+ * if the TLB maintenance in module_enable_ro() is
+ * skipped due to rodata_enabled. It doesn't seem worth
+ * it to make it conditional given that this is
+ * certainly not a fast-path.
*/
- __flush_icache_range((unsigned long)&dst[0],
- (unsigned long)&dst[1]);
+ flush_icache_range((unsigned long)&dst[0],
+ (unsigned long)&dst[1]);
}
addr = (unsigned long)dst;
#else /* CONFIG_ARM64_MODULE_PLTS */
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index a47462def04b..1fb2819fc048 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -332,22 +332,27 @@ void arch_release_task_struct(struct task_struct *tsk)
fpsimd_release_task(tsk);
}
-/*
- * src and dst may temporarily have aliased sve_state after task_struct
- * is copied. We cannot fix this properly here, because src may have
- * live SVE state and dst's thread_info may not exist yet, so tweaking
- * either src's or dst's TIF_SVE is not safe.
- *
- * The unaliasing is done in copy_thread() instead. This works because
- * dst is not schedulable or traceable until both of these functions
- * have been called.
- */
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
if (current->mm)
fpsimd_preserve_current_state();
*dst = *src;
+ /* We rely on the above assignment to initialize dst's thread_flags: */
+ BUILD_BUG_ON(!IS_ENABLED(CONFIG_THREAD_INFO_IN_TASK));
+
+ /*
+ * Detach src's sve_state (if any) from dst so that it does not
+ * get erroneously used or freed prematurely. dst's sve_state
+ * will be allocated on demand later on if dst uses SVE.
+ * For consistency, also clear TIF_SVE here: this could be done
+ * later in copy_process(), but to avoid tripping up future
+ * maintainers it is best not to leave TIF_SVE and sve_state in
+ * an inconsistent state, even temporarily.
+ */
+ dst->thread.sve_state = NULL;
+ clear_tsk_thread_flag(dst, TIF_SVE);
+
return 0;
}
@@ -361,13 +366,6 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
/*
- * Unalias p->thread.sve_state (if any) from the parent task
- * and disable discard SVE state for p:
- */
- clear_tsk_thread_flag(p, TIF_SVE);
- p->thread.sve_state = NULL;
-
- /*
* In case p was allocated the same task_struct pointer as some
* other recently-exited task, make sure p is disassociated from
* any cpu that may have run that now-exited task recently.
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ /dev/null
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index 1fba0776ed40..76b327f88fbb 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -8,15 +8,21 @@
ARCH_REL_TYPE_ABS := R_ARM_JUMP_SLOT|R_ARM_GLOB_DAT|R_ARM_ABS32
include $(srctree)/lib/vdso/Makefile
-COMPATCC := $(CROSS_COMPILE_COMPAT)gcc
+# Same as cc-*option, but using CC_COMPAT instead of CC
+ifeq ($(CONFIG_CC_IS_CLANG), y)
+CC_COMPAT ?= $(CC)
+else
+CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc
+endif
-# Same as cc-*option, but using COMPATCC instead of CC
cc32-option = $(call try-run,\
- $(COMPATCC) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
+ $(CC_COMPAT) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
cc32-disable-warning = $(call try-run,\
- $(COMPATCC) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
+ $(CC_COMPAT) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
cc32-ldoption = $(call try-run,\
- $(COMPATCC) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2))
+ $(CC_COMPAT) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2))
+cc32-as-instr = $(call try-run,\
+ printf "%b\n" "$(1)" | $(CC_COMPAT) $(VDSO_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3))
# We cannot use the global flags to compile the vDSO files, the main reason
# being that the 32-bit compiler may be older than the main (64-bit) compiler
@@ -25,22 +31,21 @@ cc32-ldoption = $(call try-run,\
# arm64 one.
# As a result we set our own flags here.
-# From top-level Makefile
-# NOSTDINC_FLAGS
-VDSO_CPPFLAGS := -nostdinc -isystem $(shell $(COMPATCC) -print-file-name=include)
+# KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile
+VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include)
VDSO_CPPFLAGS += $(LINUXINCLUDE)
-VDSO_CPPFLAGS += $(KBUILD_CPPFLAGS)
# Common C and assembly flags
# From top-level Makefile
VDSO_CAFLAGS := $(VDSO_CPPFLAGS)
+ifneq ($(shell $(CC_COMPAT) --version 2>&1 | head -n 1 | grep clang),)
+VDSO_CAFLAGS += --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%))
+endif
+
VDSO_CAFLAGS += $(call cc32-option,-fno-PIE)
ifdef CONFIG_DEBUG_INFO
VDSO_CAFLAGS += -g
endif
-ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(COMPATCC)), y)
-VDSO_CAFLAGS += -DCC_HAVE_ASM_GOTO
-endif
# From arm Makefile
VDSO_CAFLAGS += $(call cc32-option,-fno-dwarf2-cfi-asm)
@@ -55,6 +60,7 @@ endif
VDSO_CAFLAGS += -fPIC -fno-builtin -fno-stack-protector
VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING
+
# Try to compile for ARMv8. If the compiler is too old and doesn't support it,
# fall back to v7. There is no easy way to check for what architecture the code
# is being compiled, so define a macro specifying that (see arch/arm/Makefile).
@@ -91,6 +97,12 @@ VDSO_CFLAGS += -Wno-int-to-pointer-cast
VDSO_AFLAGS := $(VDSO_CAFLAGS)
VDSO_AFLAGS += -D__ASSEMBLY__
+# Check for binutils support for dmb ishld
+dmbinstr := $(call cc32-as-instr,dmb ishld,-DCONFIG_AS_DMB_ISHLD=1)
+
+VDSO_CFLAGS += $(dmbinstr)
+VDSO_AFLAGS += $(dmbinstr)
+
VDSO_LDFLAGS := $(VDSO_CPPFLAGS)
# From arm vDSO Makefile
VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1
@@ -159,14 +171,14 @@ quiet_cmd_vdsold_and_vdso_check = LD32 $@
cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check)
quiet_cmd_vdsold = LD32 $@
- cmd_vdsold = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \
+ cmd_vdsold = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \
-Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@
quiet_cmd_vdsocc = CC32 $@
- cmd_vdsocc = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $<
+ cmd_vdsocc = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $<
quiet_cmd_vdsocc_gettimeofday = CC32 $@
- cmd_vdsocc_gettimeofday = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) $(VDSO_CFLAGS_gettimeofday_o) -c -o $@ $<
+ cmd_vdsocc_gettimeofday = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) $(VDSO_CFLAGS_gettimeofday_o) -c -o $@ $<
quiet_cmd_vdsoas = AS32 $@
- cmd_vdsoas = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $<
+ cmd_vdsoas = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $<
quiet_cmd_vdsomunge = MUNGE $@
cmd_vdsomunge = $(obj)/$(munge) $< $@
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 115d7a0e4b08..855f2a7954e6 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -113,6 +113,15 @@ static inline bool is_ttbr1_addr(unsigned long addr)
return arch_kasan_reset_tag(addr) >= PAGE_OFFSET;
}
+static inline unsigned long mm_to_pgd_phys(struct mm_struct *mm)
+{
+ /* Either init_pg_dir or swapper_pg_dir */
+ if (mm == &init_mm)
+ return __pa_symbol(mm->pgd);
+
+ return (unsigned long)virt_to_phys(mm->pgd);
+}
+
/*
* Dump out the page tables associated with 'addr' in the currently active mm.
*/
@@ -141,7 +150,7 @@ static void show_pte(unsigned long addr)
pr_alert("%s pgtable: %luk pages, %llu-bit VAs, pgdp=%016lx\n",
mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
- vabits_actual, (unsigned long)virt_to_phys(mm->pgd));
+ vabits_actual, mm_to_pgd_phys(mm));
pgdp = pgd_offset(mm, addr);
pgd = READ_ONCE(*pgdp);
pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd));
@@ -266,7 +275,7 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
* If we got a different type of fault from the AT instruction,
* treat the translation fault as spurious.
*/
- dfsc = FIELD_PREP(SYS_PAR_EL1_FST, par);
+ dfsc = FIELD_GET(SYS_PAR_EL1_FST, par);
return (dfsc & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT;
}
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 35c225ede0e4..61d93f062a36 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -734,5 +734,28 @@ do { \
if (unlikely(__gu_err)) goto err_label; \
} while (0)
+/*
+ * We want the unsafe accessors to always be inlined and use
+ * the error labels - thus the macro games.
+ */
+#define unsafe_copy_loop(dst, src, len, type, label) \
+ while (len >= sizeof(type)) { \
+ unsafe_put_user(*(type *)src,(type __user *)dst,label); \
+ dst += sizeof(type); \
+ src += sizeof(type); \
+ len -= sizeof(type); \
+ }
+
+#define unsafe_copy_to_user(_dst,_src,_len,label) \
+do { \
+ char __user *__ucu_dst = (_dst); \
+ const char *__ucu_src = (_src); \
+ size_t __ucu_len = (_len); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \
+} while (0)
+
#endif /* _ASM_X86_UACCESS_H */
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 61b635bc2a31..656460636ad3 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -160,24 +160,27 @@ bool rq_depth_calc_max_depth(struct rq_depth *rqd)
return ret;
}
-void rq_depth_scale_up(struct rq_depth *rqd)
+/* Returns true on success and false if scaling up wasn't possible */
+bool rq_depth_scale_up(struct rq_depth *rqd)
{
/*
* Hit max in previous round, stop here
*/
if (rqd->scaled_max)
- return;
+ return false;
rqd->scale_step--;
rqd->scaled_max = rq_depth_calc_max_depth(rqd);
+ return true;
}
/*
* Scale rwb down. If 'hard_throttle' is set, do it quicker, since we
- * had a latency violation.
+ * had a latency violation. Returns true on success and returns false if
+ * scaling down wasn't possible.
*/
-void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
+bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
{
/*
* Stop scaling down when we've hit the limit. This also prevents
@@ -185,7 +188,7 @@ void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
* keep up.
*/
if (rqd->max_depth == 1)
- return;
+ return false;
if (rqd->scale_step < 0 && hard_throttle)
rqd->scale_step = 0;
@@ -194,6 +197,7 @@ void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
rqd->scaled_max = false;
rq_depth_calc_max_depth(rqd);
+ return true;
}
struct rq_qos_wait_data {
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 08a09dbe0f4b..e8cb68f6958a 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -130,8 +130,8 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
acquire_inflight_cb_t *acquire_inflight_cb,
cleanup_cb_t *cleanup_cb);
bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit);
-void rq_depth_scale_up(struct rq_depth *rqd);
-void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle);
+bool rq_depth_scale_up(struct rq_depth *rqd);
+bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle);
bool rq_depth_calc_max_depth(struct rq_depth *rqd);
void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio);
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 8af553a0ba00..8641ba9793c5 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -308,7 +308,8 @@ static void calc_wb_limits(struct rq_wb *rwb)
static void scale_up(struct rq_wb *rwb)
{
- rq_depth_scale_up(&rwb->rq_depth);
+ if (!rq_depth_scale_up(&rwb->rq_depth))
+ return;
calc_wb_limits(rwb);
rwb->unknown_cnt = 0;
rwb_wake_all(rwb);
@@ -317,7 +318,8 @@ static void scale_up(struct rq_wb *rwb)
static void scale_down(struct rq_wb *rwb, bool hard_throttle)
{
- rq_depth_scale_down(&rwb->rq_depth, hard_throttle);
+ if (!rq_depth_scale_down(&rwb->rq_depth, hard_throttle))
+ return;
calc_wb_limits(rwb);
rwb->unknown_cnt = 0;
rwb_trace_step(rwb, "scale down");
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 76d0f9de767b..58e09ffe8b9c 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -4791,27 +4791,6 @@ void ata_scsi_hotplug(struct work_struct *work)
return;
}
- /*
- * XXX - UGLY HACK
- *
- * The block layer suspend/resume path is fundamentally broken due
- * to freezable kthreads and workqueue and may deadlock if a block
- * device gets removed while resume is in progress. I don't know
- * what the solution is short of removing freezable kthreads and
- * workqueues altogether.
- *
- * The following is an ugly hack to avoid kicking off device
- * removal while freezer is active. This is a joke but does avoid
- * this particular deadlock scenario.
- *
- * https://bugzilla.kernel.org/show_bug.cgi?id=62801
- * http://marc.info/?l=linux-kernel&m=138695698516487
- */
-#ifdef CONFIG_FREEZER
- while (pm_freezing)
- msleep(10);
-#endif
-
DPRINTK("ENTER\n");
mutex_lock(&ap->scsi_scan_mutex);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index ac07e8c94c79..478aa86fc1f2 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -248,8 +248,8 @@ static void nbd_put(struct nbd_device *nbd)
if (refcount_dec_and_mutex_lock(&nbd->refs,
&nbd_index_mutex)) {
idr_remove(&nbd_index_idr, nbd->index);
- mutex_unlock(&nbd_index_mutex);
nbd_dev_remove(nbd);
+ mutex_unlock(&nbd_index_mutex);
}
}
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
index eabc116832a7..3d7fdea872f8 100644
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -142,8 +142,7 @@ static blk_status_t null_zone_reset(struct nullb_cmd *cmd, sector_t sector)
zone->wp = zone->start;
break;
default:
- cmd->error = BLK_STS_NOTSUPP;
- break;
+ return BLK_STS_NOTSUPP;
}
return BLK_STS_OK;
}
diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c
index fe7a73f52329..bb287f35cf40 100644
--- a/drivers/gpio/gpio-eic-sprd.c
+++ b/drivers/gpio/gpio-eic-sprd.c
@@ -530,11 +530,12 @@ static void sprd_eic_handle_one_type(struct gpio_chip *chip)
}
for_each_set_bit(n, &reg, SPRD_EIC_PER_BANK_NR) {
- girq = irq_find_mapping(chip->irq.domain,
- bank * SPRD_EIC_PER_BANK_NR + n);
+ u32 offset = bank * SPRD_EIC_PER_BANK_NR + n;
+
+ girq = irq_find_mapping(chip->irq.domain, offset);
generic_handle_irq(girq);
- sprd_eic_toggle_trigger(chip, girq, n);
+ sprd_eic_toggle_trigger(chip, girq, offset);
}
}
}
diff --git a/drivers/gpio/gpio-max77620.c b/drivers/gpio/gpio-max77620.c
index 47d05e357e61..faf86ea9c51a 100644
--- a/drivers/gpio/gpio-max77620.c
+++ b/drivers/gpio/gpio-max77620.c
@@ -192,13 +192,13 @@ static int max77620_gpio_set_debounce(struct max77620_gpio *mgpio,
case 0:
val = MAX77620_CNFG_GPIO_DBNC_None;
break;
- case 1 ... 8:
+ case 1000 ... 8000:
val = MAX77620_CNFG_GPIO_DBNC_8ms;
break;
- case 9 ... 16:
+ case 9000 ... 16000:
val = MAX77620_CNFG_GPIO_DBNC_16ms;
break;
- case 17 ... 32:
+ case 17000 ... 32000:
val = MAX77620_CNFG_GPIO_DBNC_32ms;
break;
default:
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 1eea2c6c2e1d..80ea49f570f4 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -317,7 +317,7 @@ struct gpio_desc *gpiod_get_from_of_node(struct device_node *node,
transitory = flags & OF_GPIO_TRANSITORY;
ret = gpiod_request(desc, label);
- if (ret == -EBUSY && (flags & GPIOD_FLAGS_BIT_NONEXCLUSIVE))
+ if (ret == -EBUSY && (dflags & GPIOD_FLAGS_BIT_NONEXCLUSIVE))
return desc;
if (ret)
return ERR_PTR(ret);
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index bdbc1649eafa..5833e4f380d6 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -3070,8 +3070,10 @@ int gpiod_direction_output(struct gpio_desc *desc, int value)
if (!ret)
goto set_output_value;
/* Emulate open drain by not actively driving the line high */
- if (value)
- return gpiod_direction_input(desc);
+ if (value) {
+ ret = gpiod_direction_input(desc);
+ goto set_output_flag;
+ }
}
else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags)) {
ret = gpio_set_config(gc, gpio_chip_hwgpio(desc),
@@ -3079,8 +3081,10 @@ int gpiod_direction_output(struct gpio_desc *desc, int value)
if (!ret)
goto set_output_value;
/* Emulate open source by not actively driving the line low */
- if (!value)
- return gpiod_direction_input(desc);
+ if (!value) {
+ ret = gpiod_direction_input(desc);
+ goto set_output_flag;
+ }
} else {
gpio_set_config(gc, gpio_chip_hwgpio(desc),
PIN_CONFIG_DRIVE_PUSH_PULL);
@@ -3088,6 +3092,17 @@ int gpiod_direction_output(struct gpio_desc *desc, int value)
set_output_value:
return gpiod_direction_output_raw_commit(desc, value);
+
+set_output_flag:
+ /*
+ * When emulating open-source or open-drain functionalities by not
+ * actively driving the line (setting mode to input) we still need to
+ * set the IS_OUT flag or otherwise we won't be able to set the line
+ * value anymore.
+ */
+ if (ret == 0)
+ set_bit(FLAG_IS_OUT, &desc->flags);
+ return ret;
}
EXPORT_SYMBOL_GPL(gpiod_direction_output);
@@ -3448,8 +3463,6 @@ static void gpio_set_open_drain_value_commit(struct gpio_desc *desc, bool value)
if (value) {
ret = chip->direction_input(chip, offset);
- if (!ret)
- clear_bit(FLAG_IS_OUT, &desc->flags);
} else {
ret = chip->direction_output(chip, offset, 0);
if (!ret)
@@ -3479,8 +3492,6 @@ static void gpio_set_open_source_value_commit(struct gpio_desc *desc, bool value
set_bit(FLAG_IS_OUT, &desc->flags);
} else {
ret = chip->direction_input(chip, offset);
- if (!ret)
- clear_bit(FLAG_IS_OUT, &desc->flags);
}
trace_gpio_direction(desc_to_gpio(desc), !value, ret);
if (ret < 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 7bcf86c61999..61e38e43ad1d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -270,7 +270,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
r = amdgpu_bo_create_list_entry_array(&args->in, &info);
if (r)
- goto error_free;
+ return r;
switch (args->in.operation) {
case AMDGPU_BO_LIST_OP_CREATE:
@@ -283,8 +283,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
mutex_unlock(&fpriv->bo_list_lock);
if (r < 0) {
- amdgpu_bo_list_put(list);
- return r;
+ goto error_put_list;
}
handle = r;
@@ -306,9 +305,8 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
mutex_unlock(&fpriv->bo_list_lock);
if (IS_ERR(old)) {
- amdgpu_bo_list_put(list);
r = PTR_ERR(old);
- goto error_free;
+ goto error_put_list;
}
amdgpu_bo_list_put(old);
@@ -325,8 +323,10 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
return 0;
+error_put_list:
+ amdgpu_bo_list_put(list);
+
error_free:
- if (info)
- kvfree(info);
+ kvfree(info);
return r;
}
diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c
index cebc8e620820..8a8d605021f0 100644
--- a/drivers/gpu/drm/bridge/tc358767.c
+++ b/drivers/gpu/drm/bridge/tc358767.c
@@ -728,6 +728,8 @@ static int tc_set_video_mode(struct tc_data *tc,
int lower_margin = mode->vsync_start - mode->vdisplay;
int vsync_len = mode->vsync_end - mode->vsync_start;
u32 dp0_syncval;
+ u32 bits_per_pixel = 24;
+ u32 in_bw, out_bw;
/*
* Recommended maximum number of symbols transferred in a transfer unit:
@@ -735,7 +737,10 @@ static int tc_set_video_mode(struct tc_data *tc,
* (output active video bandwidth in bytes))
* Must be less than tu_size.
*/
- max_tu_symbol = TU_SIZE_RECOMMENDED - 1;
+
+ in_bw = mode->clock * bits_per_pixel / 8;
+ out_bw = tc->link.base.num_lanes * tc->link.base.rate;
+ max_tu_symbol = DIV_ROUND_UP(in_bw * TU_SIZE_RECOMMENDED, out_bw);
dev_dbg(tc->dev, "set mode %dx%d\n",
mode->hdisplay, mode->vdisplay);
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index ce05e805b08f..aa54bb22796d 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -3280,7 +3280,20 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb,
switch (fb->modifier) {
case DRM_FORMAT_MOD_LINEAR:
case I915_FORMAT_MOD_X_TILED:
- return 4096;
+ /*
+ * Validated limit is 4k, but has 5k should
+ * work apart from the following features:
+ * - Ytile (already limited to 4k)
+ * - FP16 (already limited to 4k)
+ * - render compression (already limited to 4k)
+ * - KVMR sprite and cursor (don't care)
+ * - horizontal panning (TODO verify this)
+ * - pipe and plane scaling (TODO verify this)
+ */
+ if (cpp == 8)
+ return 4096;
+ else
+ return 5120;
case I915_FORMAT_MOD_Y_TILED_CCS:
case I915_FORMAT_MOD_Yf_TILED_CCS:
/* FIXME AUX plane? */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 261c9bd83f51..91051e178021 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -245,11 +245,9 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
wakeref = intel_runtime_pm_get(rpm);
- srcu = intel_gt_reset_trylock(ggtt->vm.gt);
- if (srcu < 0) {
- ret = srcu;
+ ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
+ if (ret)
goto err_rpm;
- }
ret = i915_mutex_lock_interruptible(dev);
if (ret)
@@ -318,7 +316,11 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
intel_wakeref_auto(&i915->ggtt.userfault_wakeref,
msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
- i915_vma_set_ggtt_write(vma);
+ if (write) {
+ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+ i915_vma_set_ggtt_write(vma);
+ obj->mm.dirty = true;
+ }
err_fence:
i915_vma_unpin_fence(vma);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 92e53c25424c..ad2a63dbcac2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -241,9 +241,6 @@ void i915_gem_resume(struct drm_i915_private *i915)
mutex_lock(&i915->drm.struct_mutex);
intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
- i915_gem_restore_gtt_mappings(i915);
- i915_gem_restore_fences(i915);
-
if (i915_gem_init_hw(i915))
goto err_wedged;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index d3c6993f4f46..22aab8593abf 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -136,6 +136,20 @@ execlists_active(const struct intel_engine_execlists *execlists)
return READ_ONCE(*execlists->active);
}
+static inline void
+execlists_active_lock_bh(struct intel_engine_execlists *execlists)
+{
+ local_bh_disable(); /* prevent local softirq and lock recursion */
+ tasklet_lock(&execlists->tasklet);
+}
+
+static inline void
+execlists_active_unlock_bh(struct intel_engine_execlists *execlists)
+{
+ tasklet_unlock(&execlists->tasklet);
+ local_bh_enable(); /* restore softirq, and kick ksoftirqd! */
+}
+
struct i915_request *
execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 82630db0394b..4ce8626b140e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1197,9 +1197,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
struct drm_printer *m)
{
struct drm_i915_private *dev_priv = engine->i915;
- const struct intel_engine_execlists * const execlists =
- &engine->execlists;
- unsigned long flags;
+ struct intel_engine_execlists * const execlists = &engine->execlists;
u64 addr;
if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
@@ -1281,7 +1279,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
idx, hws[idx * 2], hws[idx * 2 + 1]);
}
- spin_lock_irqsave(&engine->active.lock, flags);
+ execlists_active_lock_bh(execlists);
for (port = execlists->active; (rq = *port); port++) {
char hdr[80];
int len;
@@ -1309,7 +1307,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
hwsp_seqno(rq));
print_request(m, rq, hdr);
}
- spin_unlock_irqrestore(&engine->active.lock, flags);
+ execlists_active_unlock_bh(execlists);
} else if (INTEL_GEN(dev_priv) > 6) {
drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
ENGINE_READ(engine, RING_PP_DIR_BASE));
@@ -1440,8 +1438,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
if (!intel_engine_supports_stats(engine))
return -ENODEV;
- spin_lock_irqsave(&engine->active.lock, flags);
- write_seqlock(&engine->stats.lock);
+ execlists_active_lock_bh(execlists);
+ write_seqlock_irqsave(&engine->stats.lock, flags);
if (unlikely(engine->stats.enabled == ~0)) {
err = -EBUSY;
@@ -1469,8 +1467,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
}
unlock:
- write_sequnlock(&engine->stats.lock);
- spin_unlock_irqrestore(&engine->active.lock, flags);
+ write_sequnlock_irqrestore(&engine->stats.lock, flags);
+ execlists_active_unlock_bh(execlists);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index d42584439f51..bdfcc7bdadbf 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -631,7 +631,6 @@ execlists_schedule_out(struct i915_request *rq)
struct intel_engine_cs *cur, *old;
trace_i915_request_out(rq);
- GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
old = READ_ONCE(ce->inflight);
do
@@ -797,6 +796,17 @@ static bool can_merge_rq(const struct i915_request *prev,
GEM_BUG_ON(prev == next);
GEM_BUG_ON(!assert_priority_queue(prev, next));
+ /*
+ * We do not submit known completed requests. Therefore if the next
+ * request is already completed, we can pretend to merge it in
+ * with the previous context (and we will skip updating the ELSP
+ * and tracking). Thus hopefully keeping the ELSP full with active
+ * contexts, despite the best efforts of preempt-to-busy to confuse
+ * us.
+ */
+ if (i915_request_completed(next))
+ return true;
+
if (!can_merge_ctx(prev->hw_context, next->hw_context))
return false;
@@ -893,7 +903,7 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
static struct i915_request *
last_active(const struct intel_engine_execlists *execlists)
{
- struct i915_request * const *last = execlists->active;
+ struct i915_request * const *last = READ_ONCE(execlists->active);
while (*last && i915_request_completed(*last))
last++;
@@ -1172,21 +1182,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
continue;
}
- if (i915_request_completed(rq)) {
- ve->request = NULL;
- ve->base.execlists.queue_priority_hint = INT_MIN;
- rb_erase_cached(rb, &execlists->virtual);
- RB_CLEAR_NODE(rb);
-
- rq->engine = engine;
- __i915_request_submit(rq);
-
- spin_unlock(&ve->base.active.lock);
-
- rb = rb_first_cached(&execlists->virtual);
- continue;
- }
-
if (last && !can_merge_rq(last, rq)) {
spin_unlock(&ve->base.active.lock);
return; /* leave this for another */
@@ -1237,11 +1232,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(ve->siblings[0] != engine);
}
- __i915_request_submit(rq);
- if (!i915_request_completed(rq)) {
+ if (__i915_request_submit(rq)) {
submit = true;
last = rq;
}
+
+ /*
+ * Hmm, we have a bunch of virtual engine requests,
+ * but the first one was already completed (thanks
+ * preempt-to-busy!). Keep looking at the veng queue
+ * until we have no more relevant requests (i.e.
+ * the normal submit queue has higher priority).
+ */
+ if (!submit) {
+ spin_unlock(&ve->base.active.lock);
+ rb = rb_first_cached(&execlists->virtual);
+ continue;
+ }
}
spin_unlock(&ve->base.active.lock);
@@ -1254,8 +1261,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
int i;
priolist_for_each_request_consume(rq, rn, p, i) {
- if (i915_request_completed(rq))
- goto skip;
+ bool merge = true;
/*
* Can we combine this request with the current port?
@@ -1296,14 +1302,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
ctx_single_port_submission(rq->hw_context))
goto done;
- *port = execlists_schedule_in(last, port - execlists->pending);
- port++;
+ merge = false;
}
- last = rq;
- submit = true;
-skip:
- __i915_request_submit(rq);
+ if (__i915_request_submit(rq)) {
+ if (!merge) {
+ *port = execlists_schedule_in(last, port - execlists->pending);
+ port++;
+ last = NULL;
+ }
+
+ GEM_BUG_ON(last &&
+ !can_merge_ctx(last->hw_context,
+ rq->hw_context));
+
+ submit = true;
+ last = rq;
+ }
}
rb_erase_cached(&p->node, &execlists->queue);
@@ -1593,8 +1608,11 @@ static void process_csb(struct intel_engine_cs *engine)
static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
{
lockdep_assert_held(&engine->active.lock);
- if (!engine->execlists.pending[0])
+ if (!engine->execlists.pending[0]) {
+ rcu_read_lock(); /* protect peeking at execlists->active */
execlists_dequeue(engine);
+ rcu_read_unlock();
+ }
}
/*
@@ -2399,10 +2417,14 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
static struct i915_request *active_request(struct i915_request *rq)
{
- const struct list_head * const list = &rq->timeline->requests;
const struct intel_context * const ce = rq->hw_context;
struct i915_request *active = NULL;
+ struct list_head *list;
+
+ if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
+ return rq;
+ list = &rq->timeline->requests;
list_for_each_entry_from_reverse(rq, list, link) {
if (i915_request_completed(rq))
break;
@@ -2565,7 +2587,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
int i;
priolist_for_each_request_consume(rq, rn, p, i) {
- list_del_init(&rq->sched.link);
__i915_request_submit(rq);
dma_fence_set_error(&rq->fence, -EIO);
i915_request_mark_complete(rq);
@@ -3631,18 +3652,22 @@ static void
virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
{
struct virtual_engine *ve = to_virtual_engine(rq->engine);
+ intel_engine_mask_t allowed, exec;
struct ve_bond *bond;
+ allowed = ~to_request(signal)->engine->mask;
+
bond = virtual_find_bond(ve, to_request(signal)->engine);
- if (bond) {
- intel_engine_mask_t old, new, cmp;
+ if (bond)
+ allowed &= bond->sibling_mask;
- cmp = READ_ONCE(rq->execution_mask);
- do {
- old = cmp;
- new = cmp & bond->sibling_mask;
- } while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old);
- }
+ /* Restrict the bonded request to run on only the available engines */
+ exec = READ_ONCE(rq->execution_mask);
+ while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
+ ;
+
+ /* Prevent the master from being re-run on the bonded engines */
+ to_request(signal)->execution_mask &= ~allowed;
}
struct intel_context *
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index b9d84d52e986..8cea42379dd7 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -42,11 +42,10 @@ static void engine_skip_context(struct i915_request *rq)
struct intel_engine_cs *engine = rq->engine;
struct i915_gem_context *hung_ctx = rq->gem_context;
- lockdep_assert_held(&engine->active.lock);
-
if (!i915_request_is_active(rq))
return;
+ lockdep_assert_held(&engine->active.lock);
list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
if (rq->gem_context == hung_ctx)
i915_request_skip(rq, -EIO);
@@ -123,7 +122,6 @@ void __i915_request_reset(struct i915_request *rq, bool guilty)
rq->fence.seqno,
yesno(guilty));
- lockdep_assert_held(&rq->engine->active.lock);
GEM_BUG_ON(i915_request_completed(rq));
if (guilty) {
@@ -1214,10 +1212,8 @@ out:
intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
}
-int intel_gt_reset_trylock(struct intel_gt *gt)
+int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu)
{
- int srcu;
-
might_lock(&gt->reset.backoff_srcu);
might_sleep();
@@ -1232,10 +1228,10 @@ int intel_gt_reset_trylock(struct intel_gt *gt)
rcu_read_lock();
}
- srcu = srcu_read_lock(&gt->reset.backoff_srcu);
+ *srcu = srcu_read_lock(&gt->reset.backoff_srcu);
rcu_read_unlock();
- return srcu;
+ return 0;
}
void intel_gt_reset_unlock(struct intel_gt *gt, int tag)
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
index 37a987b17108..52c00199e069 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -38,7 +38,7 @@ int intel_engine_reset(struct intel_engine_cs *engine,
void __i915_request_reset(struct i915_request *rq, bool guilty);
-int __must_check intel_gt_reset_trylock(struct intel_gt *gt);
+int __must_check intel_gt_reset_trylock(struct intel_gt *gt, int *srcu);
void intel_gt_reset_unlock(struct intel_gt *gt, int tag);
void intel_gt_set_wedged(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 601c16239fdf..bacaa7bb8c9a 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1573,7 +1573,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
struct intel_engine_cs *engine = rq->engine;
enum intel_engine_id id;
const int num_engines =
- IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
+ IS_HASWELL(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
bool force_restore = false;
int len;
u32 *cs;
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 45481eb1fa3c..5f6ec2fd29a0 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1063,6 +1063,9 @@ static void gen9_whitelist_build(struct i915_wa_list *w)
/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
whitelist_reg(w, GEN8_HDC_CHICKEN1);
+
+ /* WaSendPushConstantsFromMMIO:skl,bxt */
+ whitelist_reg(w, COMMON_SLICE_CHICKEN2);
}
static void skl_whitelist_build(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 020696726f9e..bb6f86c7067a 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1924,6 +1924,11 @@ static int i915_drm_resume(struct drm_device *dev)
if (ret)
DRM_ERROR("failed to re-enable GGTT\n");
+ mutex_lock(&dev_priv->drm.struct_mutex);
+ i915_gem_restore_gtt_mappings(dev_priv);
+ i915_gem_restore_fences(dev_priv);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
+
intel_csr_ucode_resume(dev_priv);
i915_restore_state(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 167a7b56ed5b..6795f1daa3d5 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -77,6 +77,12 @@ struct drm_i915_private;
#define I915_GEM_IDLE_TIMEOUT (HZ / 5)
+static inline void tasklet_lock(struct tasklet_struct *t)
+{
+ while (!tasklet_trylock(t))
+ cpu_relax();
+}
+
static inline void __tasklet_disable_sync_once(struct tasklet_struct *t)
{
if (!atomic_fetch_inc(&t->count))
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index a53777dd371c..1c5506822dc7 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -194,6 +194,27 @@ static void free_capture_list(struct i915_request *request)
}
}
+static void remove_from_engine(struct i915_request *rq)
+{
+ struct intel_engine_cs *engine, *locked;
+
+ /*
+ * Virtual engines complicate acquiring the engine timeline lock,
+ * as their rq->engine pointer is not stable until under that
+ * engine lock. The simple ploy we use is to take the lock then
+ * check that the rq still belongs to the newly locked engine.
+ */
+ locked = READ_ONCE(rq->engine);
+ spin_lock(&locked->active.lock);
+ while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
+ spin_unlock(&locked->active.lock);
+ spin_lock(&engine->active.lock);
+ locked = engine;
+ }
+ list_del(&rq->sched.link);
+ spin_unlock(&locked->active.lock);
+}
+
static bool i915_request_retire(struct i915_request *rq)
{
struct i915_active_request *active, *next;
@@ -259,9 +280,7 @@ static bool i915_request_retire(struct i915_request *rq)
* request that we have removed from the HW and put back on a run
* queue.
*/
- spin_lock(&rq->engine->active.lock);
- list_del(&rq->sched.link);
- spin_unlock(&rq->engine->active.lock);
+ remove_from_engine(rq);
spin_lock(&rq->lock);
i915_request_mark_complete(rq);
@@ -358,9 +377,10 @@ __i915_request_await_execution(struct i915_request *rq,
return 0;
}
-void __i915_request_submit(struct i915_request *request)
+bool __i915_request_submit(struct i915_request *request)
{
struct intel_engine_cs *engine = request->engine;
+ bool result = false;
GEM_TRACE("%s fence %llx:%lld, current %d\n",
engine->name,
@@ -370,6 +390,25 @@ void __i915_request_submit(struct i915_request *request)
GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&engine->active.lock);
+ /*
+ * With the advent of preempt-to-busy, we frequently encounter
+ * requests that we have unsubmitted from HW, but left running
+ * until the next ack and so have completed in the meantime. On
+ * resubmission of that completed request, we can skip
+ * updating the payload, and execlists can even skip submitting
+ * the request.
+ *
+ * We must remove the request from the caller's priority queue,
+ * and the caller must only call us when the request is in their
+ * priority queue, under the active.lock. This ensures that the
+ * request has *not* yet been retired and we can safely move
+ * the request into the engine->active.list where it will be
+ * dropped upon retiring. (Otherwise if resubmit a *retired*
+ * request, this would be a horrible use-after-free.)
+ */
+ if (i915_request_completed(request))
+ goto xfer;
+
if (i915_gem_context_is_banned(request->gem_context))
i915_request_skip(request, -EIO);
@@ -393,13 +432,18 @@ void __i915_request_submit(struct i915_request *request)
i915_sw_fence_signaled(&request->semaphore))
engine->saturated |= request->sched.semaphores;
- /* We may be recursing from the signal callback of another i915 fence */
- spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+ engine->emit_fini_breadcrumb(request,
+ request->ring->vaddr + request->postfix);
- list_move_tail(&request->sched.link, &engine->active.requests);
+ trace_i915_request_execute(request);
+ engine->serial++;
+ result = true;
+
+xfer: /* We may be recursing from the signal callback of another i915 fence */
+ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
- GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
- set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
+ if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags))
+ list_move_tail(&request->sched.link, &engine->active.requests);
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
@@ -410,12 +454,7 @@ void __i915_request_submit(struct i915_request *request)
spin_unlock(&request->lock);
- engine->emit_fini_breadcrumb(request,
- request->ring->vaddr + request->postfix);
-
- engine->serial++;
-
- trace_i915_request_execute(request);
+ return result;
}
void i915_request_submit(struct i915_request *request)
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 8ac6e1226a56..e4dd013761e8 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -292,7 +292,7 @@ int i915_request_await_execution(struct i915_request *rq,
void i915_request_add(struct i915_request *rq);
-void __i915_request_submit(struct i915_request *request);
+bool __i915_request_submit(struct i915_request *request);
void i915_request_submit(struct i915_request *request);
void i915_request_skip(struct i915_request *request, int error);
diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c
index fa864d8f2b73..15f8bff141f9 100644
--- a/drivers/gpu/drm/i915/intel_pch.c
+++ b/drivers/gpu/drm/i915/intel_pch.c
@@ -69,6 +69,7 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id)
WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv));
return PCH_CNP;
case INTEL_PCH_CMP_DEVICE_ID_TYPE:
+ case INTEL_PCH_CMP2_DEVICE_ID_TYPE:
DRM_DEBUG_KMS("Found Comet Lake PCH (CMP)\n");
WARN_ON(!IS_COFFEELAKE(dev_priv));
/* CometPoint is CNP Compatible */
diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h
index e6a2d65f19c6..c29c81ec7971 100644
--- a/drivers/gpu/drm/i915/intel_pch.h
+++ b/drivers/gpu/drm/i915/intel_pch.h
@@ -41,6 +41,7 @@ enum intel_pch {
#define INTEL_PCH_CNP_DEVICE_ID_TYPE 0xA300
#define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE 0x9D80
#define INTEL_PCH_CMP_DEVICE_ID_TYPE 0x0280
+#define INTEL_PCH_CMP2_DEVICE_ID_TYPE 0x0680
#define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480
#define INTEL_PCH_MCC_DEVICE_ID_TYPE 0x4B00
#define INTEL_PCH_MCC2_DEVICE_ID_TYPE 0x3880
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index bb6dd54a6ff3..37593831b539 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -118,6 +118,12 @@ static void pm_resume(struct drm_i915_private *i915)
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
intel_gt_sanitize(&i915->gt, false);
i915_gem_sanitize(i915);
+
+ mutex_lock(&i915->drm.struct_mutex);
+ i915_gem_restore_gtt_mappings(i915);
+ i915_gem_restore_fences(i915);
+ mutex_unlock(&i915->drm.struct_mutex);
+
i915_gem_resume(i915);
}
}
diff --git a/drivers/gpu/drm/panel/panel-lg-lb035q02.c b/drivers/gpu/drm/panel/panel-lg-lb035q02.c
index fc82a525b071..ee4379729a5b 100644
--- a/drivers/gpu/drm/panel/panel-lg-lb035q02.c
+++ b/drivers/gpu/drm/panel/panel-lg-lb035q02.c
@@ -220,9 +220,17 @@ static const struct of_device_id lb035q02_of_match[] = {
MODULE_DEVICE_TABLE(of, lb035q02_of_match);
+static const struct spi_device_id lb035q02_ids[] = {
+ { "lb035q02", 0 },
+ { /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(spi, lb035q02_ids);
+
static struct spi_driver lb035q02_driver = {
.probe = lb035q02_probe,
.remove = lb035q02_remove,
+ .id_table = lb035q02_ids,
.driver = {
.name = "panel-lg-lb035q02",
.of_match_table = lb035q02_of_match,
@@ -231,7 +239,6 @@ static struct spi_driver lb035q02_driver = {
module_spi_driver(lb035q02_driver);
-MODULE_ALIAS("spi:lgphilips,lb035q02");
MODULE_AUTHOR("Tomi Valkeinen <tomi.valkeinen@ti.com>");
MODULE_DESCRIPTION("LG.Philips LB035Q02 LCD Panel driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c b/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c
index 299b217c83e1..20f17e46e65d 100644
--- a/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c
+++ b/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c
@@ -230,9 +230,17 @@ static const struct of_device_id nl8048_of_match[] = {
MODULE_DEVICE_TABLE(of, nl8048_of_match);
+static const struct spi_device_id nl8048_ids[] = {
+ { "nl8048hl11", 0 },
+ { /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(spi, nl8048_ids);
+
static struct spi_driver nl8048_driver = {
.probe = nl8048_probe,
.remove = nl8048_remove,
+ .id_table = nl8048_ids,
.driver = {
.name = "panel-nec-nl8048hl11",
.pm = &nl8048_pm_ops,
@@ -242,7 +250,6 @@ static struct spi_driver nl8048_driver = {
module_spi_driver(nl8048_driver);
-MODULE_ALIAS("spi:nec,nl8048hl11");
MODULE_AUTHOR("Erik Gilling <konkers@android.com>");
MODULE_DESCRIPTION("NEC-NL8048HL11 Driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/panel/panel-sony-acx565akm.c b/drivers/gpu/drm/panel/panel-sony-acx565akm.c
index 305259b58767..3d5b9c4f68d9 100644
--- a/drivers/gpu/drm/panel/panel-sony-acx565akm.c
+++ b/drivers/gpu/drm/panel/panel-sony-acx565akm.c
@@ -684,9 +684,17 @@ static const struct of_device_id acx565akm_of_match[] = {
MODULE_DEVICE_TABLE(of, acx565akm_of_match);
+static const struct spi_device_id acx565akm_ids[] = {
+ { "acx565akm", 0 },
+ { /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(spi, acx565akm_ids);
+
static struct spi_driver acx565akm_driver = {
.probe = acx565akm_probe,
.remove = acx565akm_remove,
+ .id_table = acx565akm_ids,
.driver = {
.name = "panel-sony-acx565akm",
.of_match_table = acx565akm_of_match,
@@ -695,7 +703,6 @@ static struct spi_driver acx565akm_driver = {
module_spi_driver(acx565akm_driver);
-MODULE_ALIAS("spi:sony,acx565akm");
MODULE_AUTHOR("Nokia Corporation");
MODULE_DESCRIPTION("Sony ACX565AKM LCD Panel Driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c b/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c
index d7b2e34626ef..f2baff827f50 100644
--- a/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c
+++ b/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c
@@ -375,8 +375,7 @@ static const struct of_device_id td028ttec1_of_match[] = {
MODULE_DEVICE_TABLE(of, td028ttec1_of_match);
static const struct spi_device_id td028ttec1_ids[] = {
- { "tpo,td028ttec1", 0},
- { "toppoly,td028ttec1", 0 },
+ { "td028ttec1", 0 },
{ /* sentinel */ }
};
diff --git a/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c b/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c
index 84370562910f..ba163c779084 100644
--- a/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c
+++ b/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c
@@ -491,9 +491,17 @@ static const struct of_device_id td043mtea1_of_match[] = {
MODULE_DEVICE_TABLE(of, td043mtea1_of_match);
+static const struct spi_device_id td043mtea1_ids[] = {
+ { "td043mtea1", 0 },
+ { /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(spi, td043mtea1_ids);
+
static struct spi_driver td043mtea1_driver = {
.probe = td043mtea1_probe,
.remove = td043mtea1_remove,
+ .id_table = td043mtea1_ids,
.driver = {
.name = "panel-tpo-td043mtea1",
.pm = &td043mtea1_pm_ops,
@@ -503,7 +511,6 @@ static struct spi_driver td043mtea1_driver = {
module_spi_driver(td043mtea1_driver);
-MODULE_ALIAS("spi:tpo,td043mtea1");
MODULE_AUTHOR("GraÅžvydas Ignotas <notasas@gmail.com>");
MODULE_DESCRIPTION("TPO TD043MTEA1 Panel Driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c
index cc5b09b87ab0..79a28fc91521 100644
--- a/drivers/hid/hid-hyperv.c
+++ b/drivers/hid/hid-hyperv.c
@@ -314,60 +314,24 @@ static void mousevsc_on_receive(struct hv_device *device,
static void mousevsc_on_channel_callback(void *context)
{
- const int packet_size = 0x100;
- int ret;
struct hv_device *device = context;
- u32 bytes_recvd;
- u64 req_id;
struct vmpacket_descriptor *desc;
- unsigned char *buffer;
- int bufferlen = packet_size;
-
- buffer = kmalloc(bufferlen, GFP_ATOMIC);
- if (!buffer)
- return;
-
- do {
- ret = vmbus_recvpacket_raw(device->channel, buffer,
- bufferlen, &bytes_recvd, &req_id);
-
- switch (ret) {
- case 0:
- if (bytes_recvd <= 0) {
- kfree(buffer);
- return;
- }
- desc = (struct vmpacket_descriptor *)buffer;
-
- switch (desc->type) {
- case VM_PKT_COMP:
- break;
-
- case VM_PKT_DATA_INBAND:
- mousevsc_on_receive(device, desc);
- break;
-
- default:
- pr_err("unhandled packet type %d, tid %llx len %d\n",
- desc->type, req_id, bytes_recvd);
- break;
- }
+ foreach_vmbus_pkt(desc, device->channel) {
+ switch (desc->type) {
+ case VM_PKT_COMP:
break;
- case -ENOBUFS:
- kfree(buffer);
- /* Handle large packet */
- bufferlen = bytes_recvd;
- buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
-
- if (!buffer)
- return;
+ case VM_PKT_DATA_INBAND:
+ mousevsc_on_receive(device, desc);
+ break;
+ default:
+ pr_err("Unhandled packet type %d, tid %llx len %d\n",
+ desc->type, desc->trans_id, desc->len8 * 8);
break;
}
- } while (1);
-
+ }
}
static int mousevsc_connect_to_vsp(struct hv_device *device)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 391f0b225c9a..53a60c81e220 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -912,6 +912,7 @@ static void vmbus_shutdown(struct device *child_device)
drv->shutdown(dev);
}
+#ifdef CONFIG_PM_SLEEP
/*
* vmbus_suspend - Suspend a vmbus device
*/
@@ -949,6 +950,7 @@ static int vmbus_resume(struct device *child_device)
return drv->resume(dev);
}
+#endif /* CONFIG_PM_SLEEP */
/*
* vmbus_device_release - Final callback release of the vmbus child device
@@ -1070,6 +1072,7 @@ msg_handled:
vmbus_signal_eom(msg, message_type);
}
+#ifdef CONFIG_PM_SLEEP
/*
* Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for
* hibernation, because hv_sock connections can not persist across hibernation.
@@ -1105,6 +1108,7 @@ static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
vmbus_connection.work_queue,
&ctx->work);
}
+#endif /* CONFIG_PM_SLEEP */
/*
* Direct callback for channels using other deferred processing
@@ -2125,6 +2129,7 @@ acpi_walk_err:
return ret_val;
}
+#ifdef CONFIG_PM_SLEEP
static int vmbus_bus_suspend(struct device *dev)
{
struct vmbus_channel *channel, *sc;
@@ -2247,6 +2252,7 @@ static int vmbus_bus_resume(struct device *dev)
return 0;
}
+#endif /* CONFIG_PM_SLEEP */
static const struct acpi_device_id vmbus_acpi_device_ids[] = {
{"VMBUS", 0},
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index da10e6ccb43c..5920c0085d35 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -4399,6 +4399,7 @@ error2:
error1:
port_modify.set_port_cap_mask = 0;
port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
+ kfree(port);
while (--i) {
if (!rdma_cap_ib_cm(ib_device, i))
continue;
@@ -4407,6 +4408,7 @@ error1:
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
ib_unregister_mad_agent(port->mad_agent);
cm_remove_port_fs(port);
+ kfree(port);
}
free:
kfree(cm_dev);
@@ -4460,6 +4462,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
spin_unlock_irq(&cm.state_lock);
ib_unregister_mad_agent(cur_mad_agent);
cm_remove_port_fs(port);
+ kfree(port);
}
kfree(cm_dev);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 0e3cf3461999..d78f67623f24 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2396,9 +2396,10 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
conn_id->cm_id.iw = NULL;
cma_exch(conn_id, RDMA_CM_DESTROYING);
mutex_unlock(&conn_id->handler_mutex);
+ mutex_unlock(&listen_id->handler_mutex);
cma_deref_id(conn_id);
rdma_destroy_id(&conn_id->id);
- goto out;
+ return ret;
}
mutex_unlock(&conn_id->handler_mutex);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 99c4a55545cf..2dd2cfe9b561 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1987,8 +1987,6 @@ static int iw_query_port(struct ib_device *device,
if (!netdev)
return -ENODEV;
- dev_put(netdev);
-
port_attr->max_mtu = IB_MTU_4096;
port_attr->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
@@ -1996,19 +1994,22 @@ static int iw_query_port(struct ib_device *device,
port_attr->state = IB_PORT_DOWN;
port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
} else {
- inetdev = in_dev_get(netdev);
+ rcu_read_lock();
+ inetdev = __in_dev_get_rcu(netdev);
if (inetdev && inetdev->ifa_list) {
port_attr->state = IB_PORT_ACTIVE;
port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
- in_dev_put(inetdev);
} else {
port_attr->state = IB_PORT_INIT;
port_attr->phys_state =
IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING;
}
+
+ rcu_read_unlock();
}
+ dev_put(netdev);
err = device->ops.query_port(device, port_num, port_attr);
if (err)
return err;
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 7a7474000100..65b36548bc17 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -1230,7 +1230,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg) {
ret = -ENOMEM;
- goto err;
+ goto err_get;
}
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
@@ -1787,10 +1787,6 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
- ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
- if (ret)
- goto err_unbind;
-
if (fill_nldev_handle(msg, device) ||
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
@@ -1799,13 +1795,15 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
goto err_fill;
}
+ ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
+ if (ret)
+ goto err_fill;
+
nlmsg_end(msg, nlh);
ib_device_put(device);
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_fill:
- rdma_counter_bind_qpn(device, port, qpn, cntn);
-err_unbind:
nlmsg_free(msg);
err:
ib_device_put(device);
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 1ab423b19f77..6eb6d2717ca5 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -426,7 +426,7 @@ int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev)
int ret;
rdma_for_each_port (dev, i) {
- is_ib = rdma_protocol_ib(dev, i++);
+ is_ib = rdma_protocol_ib(dev, i);
if (is_ib)
break;
}
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index f67a30fda1ed..163ff7ba92b7 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -451,8 +451,10 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
* that the hardware will not attempt to access the MR any more.
*/
if (!umem_odp->is_implicit_odp) {
+ mutex_lock(&umem_odp->umem_mutex);
ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
ib_umem_end(umem_odp));
+ mutex_unlock(&umem_odp->umem_mutex);
kvfree(umem_odp->dma_list);
kvfree(umem_odp->page_list);
}
@@ -719,6 +721,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
u64 addr;
struct ib_device *dev = umem_odp->umem.ibdev;
+ lockdep_assert_held(&umem_odp->umem_mutex);
+
virt = max_t(u64, virt, ib_umem_start(umem_odp));
bound = min_t(u64, bound, ib_umem_end(umem_odp));
/* Note that during the run of this function, the
@@ -726,7 +730,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
* faults from completion. We might be racing with other
* invalidations, so we must make sure we free each page only
* once. */
- mutex_lock(&umem_odp->umem_mutex);
for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
if (umem_odp->page_list[idx]) {
@@ -757,7 +760,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
umem_odp->npages--;
}
}
- mutex_unlock(&umem_odp->umem_mutex);
}
EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index a8b9548bd1a2..599340c1f0b8 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -242,10 +242,13 @@ static void set_ep_sin6_addrs(struct c4iw_ep *ep,
}
}
-static int dump_qp(struct c4iw_qp *qp, struct c4iw_debugfs_data *qpd)
+static int dump_qp(unsigned long id, struct c4iw_qp *qp,
+ struct c4iw_debugfs_data *qpd)
{
int space;
int cc;
+ if (id != qp->wq.sq.qid)
+ return 0;
space = qpd->bufsize - qpd->pos - 1;
if (space == 0)
@@ -350,7 +353,7 @@ static int qp_open(struct inode *inode, struct file *file)
xa_lock_irq(&qpd->devp->qps);
xa_for_each(&qpd->devp->qps, index, qp)
- dump_qp(qp, qpd);
+ dump_qp(index, qp, qpd);
xa_unlock_irq(&qpd->devp->qps);
qpd->buf[qpd->pos++] = 0;
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index aa772ee0706f..35c284af574d 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -275,13 +275,17 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
struct sk_buff *skb, struct c4iw_wr_wait *wr_waitp)
{
int err;
- struct fw_ri_tpte tpt;
+ struct fw_ri_tpte *tpt;
u32 stag_idx;
static atomic_t key;
if (c4iw_fatal_error(rdev))
return -EIO;
+ tpt = kmalloc(sizeof(*tpt), GFP_KERNEL);
+ if (!tpt)
+ return -ENOMEM;
+
stag_state = stag_state > 0;
stag_idx = (*stag) >> 8;
@@ -291,6 +295,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
mutex_lock(&rdev->stats.lock);
rdev->stats.stag.fail++;
mutex_unlock(&rdev->stats.lock);
+ kfree(tpt);
return -ENOMEM;
}
mutex_lock(&rdev->stats.lock);
@@ -305,28 +310,28 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
/* write TPT entry */
if (reset_tpt_entry)
- memset(&tpt, 0, sizeof(tpt));
+ memset(tpt, 0, sizeof(*tpt));
else {
- tpt.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F |
+ tpt->valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F |
FW_RI_TPTE_STAGKEY_V((*stag & FW_RI_TPTE_STAGKEY_M)) |
FW_RI_TPTE_STAGSTATE_V(stag_state) |
FW_RI_TPTE_STAGTYPE_V(type) | FW_RI_TPTE_PDID_V(pdid));
- tpt.locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) |
+ tpt->locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) |
(bind_enabled ? FW_RI_TPTE_MWBINDEN_F : 0) |
FW_RI_TPTE_ADDRTYPE_V((zbva ? FW_RI_ZERO_BASED_TO :
FW_RI_VA_BASED_TO))|
FW_RI_TPTE_PS_V(page_size));
- tpt.nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32(
+ tpt->nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32(
FW_RI_TPTE_PBLADDR_V(PBL_OFF(rdev, pbl_addr)>>3));
- tpt.len_lo = cpu_to_be32((u32)(len & 0xffffffffUL));
- tpt.va_hi = cpu_to_be32((u32)(to >> 32));
- tpt.va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL));
- tpt.dca_mwbcnt_pstag = cpu_to_be32(0);
- tpt.len_hi = cpu_to_be32((u32)(len >> 32));
+ tpt->len_lo = cpu_to_be32((u32)(len & 0xffffffffUL));
+ tpt->va_hi = cpu_to_be32((u32)(to >> 32));
+ tpt->va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL));
+ tpt->dca_mwbcnt_pstag = cpu_to_be32(0);
+ tpt->len_hi = cpu_to_be32((u32)(len >> 32));
}
err = write_adapter_mem(rdev, stag_idx +
(rdev->lldi.vr->stag.start >> 5),
- sizeof(tpt), &tpt, skb, wr_waitp);
+ sizeof(*tpt), tpt, skb, wr_waitp);
if (reset_tpt_entry) {
c4iw_put_resource(&rdev->resource.tpt_table, stag_idx);
@@ -334,6 +339,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
rdev->stats.stag.cur -= 32;
mutex_unlock(&rdev->stats.lock);
}
+ kfree(tpt);
return err;
}
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index eb9368be28c1..bbcac539777a 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -2737,15 +2737,11 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6)
srq->flags = T4_SRQ_LIMIT_SUPPORT;
- ret = xa_insert_irq(&rhp->qps, srq->wq.qid, srq, GFP_KERNEL);
- if (ret)
- goto err_free_queue;
-
if (udata) {
srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL);
if (!srq_key_mm) {
ret = -ENOMEM;
- goto err_remove_handle;
+ goto err_free_queue;
}
srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL);
if (!srq_db_key_mm) {
@@ -2789,8 +2785,6 @@ err_free_srq_db_key_mm:
kfree(srq_db_key_mm);
err_free_srq_key_mm:
kfree(srq_key_mm);
-err_remove_handle:
- xa_erase_irq(&rhp->qps, srq->wq.qid);
err_free_queue:
free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
srq->wr_waitp);
@@ -2813,8 +2807,6 @@ void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
rhp = srq->rhp;
pr_debug("%s id %d\n", __func__, srq->wq.qid);
-
- xa_erase_irq(&rhp->qps, srq->wq.qid);
ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
ibucontext);
free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 2395fd4233a7..2ed7bfd5feea 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -1526,8 +1526,11 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
}
ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params);
- if (ret < 0)
+ if (ret < 0) {
+ kfree(tmp_sdma_rht);
goto bail;
+ }
+
dd->sdma_rht = tmp_sdma_rht;
dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 8056930bbe2c..cd9ee1664a69 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -2773,6 +2773,10 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev)
return -ENOMEM;
iwibdev = iwdev->iwibdev;
rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group);
+ ret = ib_device_set_netdev(&iwibdev->ibdev, iwdev->netdev, 1);
+ if (ret)
+ goto error;
+
ret = ib_register_device(&iwibdev->ibdev, "i40iw%d");
if (ret)
goto error;
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 59022b744144..d609f4659afb 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1298,29 +1298,6 @@ static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
return 0;
}
-static void devx_free_indirect_mkey(struct rcu_head *rcu)
-{
- kfree(container_of(rcu, struct devx_obj, devx_mr.rcu));
-}
-
-/* This function to delete from the radix tree needs to be called before
- * destroying the underlying mkey. Otherwise a race might occur in case that
- * other thread will get the same mkey before this one will be deleted,
- * in that case it will fail via inserting to the tree its own data.
- *
- * Note:
- * An error in the destroy is not expected unless there is some other indirect
- * mkey which points to this one. In a kernel cleanup flow it will be just
- * destroyed in the iterative destruction call. In a user flow, in case
- * the application didn't close in the expected order it's its own problem,
- * the mkey won't be part of the tree, in both cases the kernel is safe.
- */
-static void devx_cleanup_mkey(struct devx_obj *obj)
-{
- xa_erase(&obj->ib_dev->mdev->priv.mkey_table,
- mlx5_base_mkey(obj->devx_mr.mmkey.key));
-}
-
static void devx_cleanup_subscription(struct mlx5_ib_dev *dev,
struct devx_event_subscription *sub)
{
@@ -1362,8 +1339,16 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
int ret;
dev = mlx5_udata_to_mdev(&attrs->driver_udata);
- if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
- devx_cleanup_mkey(obj);
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
+ /*
+ * The pagefault_single_data_segment() does commands against
+ * the mmkey, we must wait for that to stop before freeing the
+ * mkey, as another allocation could get the same mkey #.
+ */
+ xa_erase(&obj->ib_dev->mdev->priv.mkey_table,
+ mlx5_base_mkey(obj->devx_mr.mmkey.key));
+ synchronize_srcu(&dev->mr_srcu);
+ }
if (obj->flags & DEVX_OBJ_FLAGS_DCT)
ret = mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
@@ -1382,12 +1367,6 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
devx_cleanup_subscription(dev, sub_entry);
mutex_unlock(&devx_event_table->event_xa_lock);
- if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
- call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu,
- devx_free_indirect_mkey);
- return ret;
- }
-
kfree(obj);
return ret;
}
@@ -1491,26 +1470,21 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
&obj_id);
WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
- if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
- err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out);
- if (err)
- goto obj_destroy;
- }
-
err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
if (err)
- goto err_copy;
+ goto obj_destroy;
if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJECT)
obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type);
-
obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id);
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
+ err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out);
+ if (err)
+ goto obj_destroy;
+ }
return 0;
-err_copy:
- if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
- devx_cleanup_mkey(obj);
obj_destroy:
if (obj->flags & DEVX_OBJ_FLAGS_DCT)
mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 2ceaef3ea3fb..1a98ee2e01c4 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -606,7 +606,7 @@ struct mlx5_ib_mr {
struct mlx5_ib_dev *dev;
u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
struct mlx5_core_sig_ctx *sig;
- int live;
+ unsigned int live;
void *descs_alloc;
int access_flags; /* Needed for rereg MR */
@@ -639,7 +639,6 @@ struct mlx5_ib_mw {
struct mlx5_ib_devx_mr {
struct mlx5_core_mkey mmkey;
int ndescs;
- struct rcu_head rcu;
};
struct mlx5_ib_umr_context {
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 1eff031ef048..630599311586 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -84,32 +84,6 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
}
-static void update_odp_mr(struct mlx5_ib_mr *mr)
-{
- if (is_odp_mr(mr)) {
- /*
- * This barrier prevents the compiler from moving the
- * setting of umem->odp_data->private to point to our
- * MR, before reg_umr finished, to ensure that the MR
- * initialization have finished before starting to
- * handle invalidations.
- */
- smp_wmb();
- to_ib_umem_odp(mr->umem)->private = mr;
- /*
- * Make sure we will see the new
- * umem->odp_data->private value in the invalidation
- * routines, before we can get page faults on the
- * MR. Page faults can happen once we put the MR in
- * the tree, below this line. Without the barrier,
- * there can be a fault handling and an invalidation
- * before umem->odp_data->private == mr is visible to
- * the invalidation handler.
- */
- smp_wmb();
- }
-}
-
static void reg_mr_callback(int status, struct mlx5_async_work *context)
{
struct mlx5_ib_mr *mr =
@@ -1346,8 +1320,6 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->umem = umem;
set_mr_fields(dev, mr, npages, length, access_flags);
- update_odp_mr(mr);
-
if (use_umr) {
int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
@@ -1363,10 +1335,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
}
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
- mr->live = 1;
+ if (is_odp_mr(mr)) {
+ to_ib_umem_odp(mr->umem)->private = mr;
atomic_set(&mr->num_pending_prefetch, 0);
}
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ smp_store_release(&mr->live, 1);
return &mr->ibmr;
error:
@@ -1441,6 +1415,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
if (!mr->umem)
return -EINVAL;
+ if (is_odp_mr(mr))
+ return -EOPNOTSUPP;
+
if (flags & IB_MR_REREG_TRANS) {
addr = virt_addr;
len = length;
@@ -1486,8 +1463,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
}
mr->allocated_from_cache = 0;
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
- mr->live = 1;
} else {
/*
* Send a UMR WQE
@@ -1516,7 +1491,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
set_mr_fields(dev, mr, npages, len, access_flags);
- update_odp_mr(mr);
return 0;
err:
@@ -1607,15 +1581,16 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
/* Prevent new page faults and
* prefetch requests from succeeding
*/
- mr->live = 0;
+ WRITE_ONCE(mr->live, 0);
+
+ /* Wait for all running page-fault handlers to finish. */
+ synchronize_srcu(&dev->mr_srcu);
/* dequeue pending prefetch requests for the mr */
if (atomic_read(&mr->num_pending_prefetch))
flush_workqueue(system_unbound_wq);
WARN_ON(atomic_read(&mr->num_pending_prefetch));
- /* Wait for all running page-fault handlers to finish. */
- synchronize_srcu(&dev->mr_srcu);
/* Destroy all page mappings */
if (!umem_odp->is_implicit_odp)
mlx5_ib_invalidate_range(umem_odp,
@@ -1987,14 +1962,25 @@ free:
int mlx5_ib_dealloc_mw(struct ib_mw *mw)
{
+ struct mlx5_ib_dev *dev = to_mdev(mw->device);
struct mlx5_ib_mw *mmw = to_mmw(mw);
int err;
- err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
- &mmw->mmkey);
- if (!err)
- kfree(mmw);
- return err;
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+ xa_erase(&dev->mdev->priv.mkey_table,
+ mlx5_base_mkey(mmw->mmkey.key));
+ /*
+ * pagefault_single_data_segment() may be accessing mmw under
+ * SRCU if the user bound an ODP MR to this MW.
+ */
+ synchronize_srcu(&dev->mr_srcu);
+ }
+
+ err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
+ if (err)
+ return err;
+ kfree(mmw);
+ return 0;
}
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 2e9b43061797..3f9478d19376 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -178,6 +178,29 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
return;
}
+ /*
+ * The locking here is pretty subtle. Ideally the implicit children
+ * list would be protected by the umem_mutex, however that is not
+ * possible. Instead this uses a weaker update-then-lock pattern:
+ *
+ * srcu_read_lock()
+ * <change children list>
+ * mutex_lock(umem_mutex)
+ * mlx5_ib_update_xlt()
+ * mutex_unlock(umem_mutex)
+ * destroy lkey
+ *
+ * ie any change the children list must be followed by the locked
+ * update_xlt before destroying.
+ *
+ * The umem_mutex provides the acquire/release semantic needed to make
+ * the children list visible to a racing thread. While SRCU is not
+ * technically required, using it gives consistent use of the SRCU
+ * locking around the children list.
+ */
+ lockdep_assert_held(&to_ib_umem_odp(mr->umem)->umem_mutex);
+ lockdep_assert_held(&mr->dev->mr_srcu);
+
odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE,
nentries * MLX5_IMR_MTT_SIZE, mr);
@@ -202,15 +225,22 @@ static void mr_leaf_free_action(struct work_struct *work)
struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
+ struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem);
+ int srcu_key;
mr->parent = NULL;
synchronize_srcu(&mr->dev->mr_srcu);
- ib_umem_odp_release(odp);
- if (imr->live)
+ if (smp_load_acquire(&imr->live)) {
+ srcu_key = srcu_read_lock(&mr->dev->mr_srcu);
+ mutex_lock(&odp_imr->umem_mutex);
mlx5_ib_update_xlt(imr, idx, 1, 0,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ATOMIC);
+ mutex_unlock(&odp_imr->umem_mutex);
+ srcu_read_unlock(&mr->dev->mr_srcu, srcu_key);
+ }
+ ib_umem_odp_release(odp);
mlx5_mr_cache_free(mr->dev, mr);
if (atomic_dec_and_test(&imr->num_leaf_free))
@@ -278,7 +308,6 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
idx - blk_start_idx + 1, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
- mutex_unlock(&umem_odp->umem_mutex);
/*
* We are now sure that the device will not access the
* memory. We can safely unmap it, and mark it as dirty if
@@ -289,10 +318,12 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
if (unlikely(!umem_odp->npages && mr->parent &&
!umem_odp->dying)) {
- WRITE_ONCE(umem_odp->dying, 1);
+ WRITE_ONCE(mr->live, 0);
+ umem_odp->dying = 1;
atomic_inc(&mr->parent->num_leaf_free);
schedule_work(&umem_odp->work);
}
+ mutex_unlock(&umem_odp->umem_mutex);
}
void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
@@ -429,8 +460,6 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
- mr->live = 1;
-
mlx5_ib_dbg(dev, "key %x dev %p mr %p\n",
mr->mmkey.key, dev->mdev, mr);
@@ -484,6 +513,8 @@ next_mr:
mtt->parent = mr;
INIT_WORK(&odp->work, mr_leaf_free_action);
+ smp_store_release(&mtt->live, 1);
+
if (!nentries)
start_idx = addr >> MLX5_IMR_MTT_SHIFT;
nentries++;
@@ -536,6 +567,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
init_waitqueue_head(&imr->q_leaf_free);
atomic_set(&imr->num_leaf_free, 0);
atomic_set(&imr->num_pending_prefetch, 0);
+ smp_store_release(&imr->live, 1);
return imr;
}
@@ -555,15 +587,19 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
if (mr->parent != imr)
continue;
+ mutex_lock(&umem_odp->umem_mutex);
ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
ib_umem_end(umem_odp));
- if (umem_odp->dying)
+ if (umem_odp->dying) {
+ mutex_unlock(&umem_odp->umem_mutex);
continue;
+ }
- WRITE_ONCE(umem_odp->dying, 1);
+ umem_odp->dying = 1;
atomic_inc(&imr->num_leaf_free);
schedule_work(&umem_odp->work);
+ mutex_unlock(&umem_odp->umem_mutex);
}
up_read(&per_mm->umem_rwsem);
@@ -773,7 +809,7 @@ next_mr:
switch (mmkey->type) {
case MLX5_MKEY_MR:
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
- if (!mr->live || !mr->ibmr.pd) {
+ if (!smp_load_acquire(&mr->live) || !mr->ibmr.pd) {
mlx5_ib_dbg(dev, "got dead MR\n");
ret = -EFAULT;
goto srcu_unlock;
@@ -1641,12 +1677,12 @@ static bool num_pending_prefetch_inc(struct ib_pd *pd,
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
- if (mr->ibmr.pd != pd) {
+ if (!smp_load_acquire(&mr->live)) {
ret = false;
break;
}
- if (!mr->live) {
+ if (mr->ibmr.pd != pd) {
ret = false;
break;
}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
index 6cac0c88cf39..36cdfbdbd325 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
@@ -230,8 +230,6 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq)
pvrdma_page_dir_cleanup(dev, &srq->pdir);
- kfree(srq);
-
atomic_dec(&dev->num_srqs);
}
diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c
index 430314c8abd9..52d402f39df9 100644
--- a/drivers/infiniband/sw/siw/siw_qp.c
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -182,12 +182,19 @@ void siw_qp_llp_close(struct siw_qp *qp)
*/
void siw_qp_llp_write_space(struct sock *sk)
{
- struct siw_cep *cep = sk_to_cep(sk);
+ struct siw_cep *cep;
- cep->sk_write_space(sk);
+ read_lock(&sk->sk_callback_lock);
+
+ cep = sk_to_cep(sk);
+ if (cep) {
+ cep->sk_write_space(sk);
- if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
- (void)siw_sq_start(cep->qp);
+ if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
+ (void)siw_sq_start(cep->qp);
+ }
+
+ read_unlock(&sk->sk_callback_lock);
}
static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 9231b39d18b2..c501bf2a0252 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -112,17 +112,11 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0};
u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0};
struct xarray *mkeys = &dev->priv.mkey_table;
- struct mlx5_core_mkey *deleted_mkey;
unsigned long flags;
xa_lock_irqsave(mkeys, flags);
- deleted_mkey = __xa_erase(mkeys, mlx5_base_mkey(mkey->key));
+ __xa_erase(mkeys, mlx5_base_mkey(mkey->key));
xa_unlock_irqrestore(mkeys, flags);
- if (!deleted_mkey) {
- mlx5_core_dbg(dev, "failed xarray delete of mkey 0x%x\n",
- mlx5_base_mkey(mkey->key));
- return -ENOENT;
- }
MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 8fe4eb7e5045..27e5b269e729 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1591,7 +1591,6 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct page **pages = NULL;
- struct extent_state *cached_state = NULL;
struct extent_changeset *data_reserved = NULL;
u64 release_bytes = 0;
u64 lockstart;
@@ -1611,6 +1610,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
return -ENOMEM;
while (iov_iter_count(i) > 0) {
+ struct extent_state *cached_state = NULL;
size_t offset = offset_in_page(pos);
size_t sector_offset;
size_t write_bytes = min(iov_iter_count(i),
@@ -1758,9 +1758,20 @@ again:
if (copied > 0)
ret = btrfs_dirty_pages(inode, pages, dirty_pages,
pos, copied, &cached_state);
+
+ /*
+ * If we have not locked the extent range, because the range's
+ * start offset is >= i_size, we might still have a non-NULL
+ * cached extent state, acquired while marking the extent range
+ * as delalloc through btrfs_dirty_pages(). Therefore free any
+ * possible cached extent state to avoid a memory leak.
+ */
if (extents_locked)
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
lockstart, lockend, &cached_state);
+ else
+ free_extent_state(cached_state);
+
btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes,
true);
if (ret) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a0546401bc0a..0f2754eaa05b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6305,13 +6305,16 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
u32 sizes[2];
int nitems = name ? 2 : 1;
unsigned long ptr;
+ unsigned int nofs_flag;
int ret;
path = btrfs_alloc_path();
if (!path)
return ERR_PTR(-ENOMEM);
+ nofs_flag = memalloc_nofs_save();
inode = new_inode(fs_info->sb);
+ memalloc_nofs_restore(nofs_flag);
if (!inode) {
btrfs_free_path(path);
return ERR_PTR(-ENOMEM);
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index e87cbdad02a3..b57f3618e58e 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -500,7 +500,7 @@ static int process_leaf(struct btrfs_root *root,
struct btrfs_extent_data_ref *dref;
struct btrfs_shared_data_ref *sref;
u32 count;
- int i = 0, tree_block_level = 0, ret;
+ int i = 0, tree_block_level = 0, ret = 0;
struct btrfs_key key;
int nritems = btrfs_header_nritems(leaf);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index f3215028235c..123ac54af071 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5085,7 +5085,7 @@ static int clone_range(struct send_ctx *sctx,
struct btrfs_path *path;
struct btrfs_key key;
int ret;
- u64 clone_src_i_size;
+ u64 clone_src_i_size = 0;
/*
* Prevent cloning from a zero offset with a length matching the sector
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 29b82a795522..8a6cc600bf18 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2932,7 +2932,8 @@ out:
* in the tree of log roots
*/
static int update_log_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *log)
+ struct btrfs_root *log,
+ struct btrfs_root_item *root_item)
{
struct btrfs_fs_info *fs_info = log->fs_info;
int ret;
@@ -2940,10 +2941,10 @@ static int update_log_root(struct btrfs_trans_handle *trans,
if (log->log_transid == 1) {
/* insert root item on the first sync */
ret = btrfs_insert_root(trans, fs_info->log_root_tree,
- &log->root_key, &log->root_item);
+ &log->root_key, root_item);
} else {
ret = btrfs_update_root(trans, fs_info->log_root_tree,
- &log->root_key, &log->root_item);
+ &log->root_key, root_item);
}
return ret;
}
@@ -3041,6 +3042,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_root *log = root->log_root;
struct btrfs_root *log_root_tree = fs_info->log_root_tree;
+ struct btrfs_root_item new_root_item;
int log_transid = 0;
struct btrfs_log_ctx root_log_ctx;
struct blk_plug plug;
@@ -3104,18 +3106,26 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
goto out;
}
+ /*
+ * We _must_ update under the root->log_mutex in order to make sure we
+ * have a consistent view of the log root we are trying to commit at
+ * this moment.
+ *
+ * We _must_ copy this into a local copy, because we are not holding the
+ * log_root_tree->log_mutex yet. This is important because when we
+ * commit the log_root_tree we must have a consistent view of the
+ * log_root_tree when we update the super block to point at the
+ * log_root_tree bytenr. If we update the log_root_tree here we'll race
+ * with the commit and possibly point at the new block which we may not
+ * have written out.
+ */
btrfs_set_root_node(&log->root_item, log->node);
+ memcpy(&new_root_item, &log->root_item, sizeof(new_root_item));
root->log_transid++;
log->log_transid = root->log_transid;
root->log_start_pid = 0;
/*
- * Update or create log root item under the root's log_mutex to prevent
- * races with concurrent log syncs that can lead to failure to update
- * log root item because it was not created yet.
- */
- ret = update_log_root(trans, log);
- /*
* IO has been started, blocks of the log tree have WRITTEN flag set
* in their headers. new modifications of the log will be written to
* new positions. so it's safe to allow log writers to go in.
@@ -3135,6 +3145,14 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
mutex_unlock(&log_root_tree->log_mutex);
mutex_lock(&log_root_tree->log_mutex);
+
+ /*
+ * Now we are safe to update the log_root_tree because we're under the
+ * log_mutex, and we're a current writer so we're holding the commit
+ * open until we drop the log_mutex.
+ */
+ ret = update_log_root(trans, log, &new_root_item);
+
if (atomic_dec_and_test(&log_root_tree->log_writers)) {
/* atomic_dec_and_test implies a barrier */
cond_wake_up_nomb(&log_root_tree->log_writer_wait);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index cdd7af424033..bdfe4493e43a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3845,7 +3845,11 @@ static int alloc_profile_is_valid(u64 flags, int extended)
return !extended; /* "0" is valid for usual profiles */
/* true if exactly one bit set */
- return is_power_of_2(flags);
+ /*
+ * Don't use is_power_of_2(unsigned long) because it won't work
+ * for the single profile (1ULL << 48) on 32-bit CPUs.
+ */
+ return flags != 0 && (flags & (flags - 1)) == 0;
}
static inline int balance_need_close(struct btrfs_fs_info *fs_info)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 2e9c7f493f99..c049c7b3aa87 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -169,18 +169,26 @@ cifs_read_super(struct super_block *sb)
else
sb->s_maxbytes = MAX_NON_LFS;
- /* BB FIXME fix time_gran to be larger for LANMAN sessions */
- sb->s_time_gran = 100;
-
- if (tcon->unix_ext) {
- ts = cifs_NTtimeToUnix(0);
+ /* Some very old servers like DOS and OS/2 used 2 second granularity */
+ if ((tcon->ses->server->vals->protocol_id == SMB10_PROT_ID) &&
+ ((tcon->ses->capabilities &
+ tcon->ses->server->vals->cap_nt_find) == 0) &&
+ !tcon->unix_ext) {
+ sb->s_time_gran = 1000000000; /* 1 second is max allowed gran */
+ ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MIN), 0, 0);
sb->s_time_min = ts.tv_sec;
- ts = cifs_NTtimeToUnix(cpu_to_le64(S64_MAX));
+ ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MAX),
+ cpu_to_le16(SMB_TIME_MAX), 0);
sb->s_time_max = ts.tv_sec;
} else {
- ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MIN), 0, 0);
+ /*
+ * Almost every server, including all SMB2+, uses DCE TIME
+ * ie 100 nanosecond units, since 1601. See MS-DTYP and MS-FSCC
+ */
+ sb->s_time_gran = 100;
+ ts = cifs_NTtimeToUnix(0);
sb->s_time_min = ts.tv_sec;
- ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MAX), cpu_to_le16(SMB_TIME_MAX), 0);
+ ts = cifs_NTtimeToUnix(cpu_to_le64(S64_MAX));
sb->s_time_max = ts.tv_sec;
}
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 2e960e1049db..50dfd9049370 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1210,7 +1210,7 @@ struct cifs_search_info {
bool smallBuf:1; /* so we know which buf_release function to call */
};
-#define ACL_NO_MODE -1
+#define ACL_NO_MODE ((umode_t)(-1))
struct cifs_open_parms {
struct cifs_tcon *tcon;
struct cifs_sb_info *cifs_sb;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 2850c3ce4391..a64dfa95a925 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -4264,7 +4264,7 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
server->ops->qfs_tcon(*xid, tcon);
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) {
if (tcon->fsDevInfo.DeviceCharacteristics &
- FILE_READ_ONLY_DEVICE)
+ cpu_to_le32(FILE_READ_ONLY_DEVICE))
cifs_dbg(VFS, "mounted to read only share\n");
else if ((cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_RW_CACHE) == 0)
@@ -4445,7 +4445,7 @@ static int setup_dfs_tgt_conn(const char *path,
int rc;
struct dfs_info3_param ref = {0};
char *mdata = NULL, *fake_devname = NULL;
- struct smb_vol fake_vol = {0};
+ struct smb_vol fake_vol = {NULL};
cifs_dbg(FYI, "%s: dfs path: %s\n", __func__, path);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index dd5ac841aefa..7ce689d31aa2 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -738,10 +738,16 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
static int
cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
{
+ struct inode *inode;
+
if (flags & LOOKUP_RCU)
return -ECHILD;
if (d_really_is_positive(direntry)) {
+ inode = d_inode(direntry);
+ if ((flags & LOOKUP_REVAL) && !CIFS_CACHE_READ(CIFS_I(inode)))
+ CIFS_I(inode)->time = 0; /* force reval */
+
if (cifs_revalidate_dentry(direntry))
return 0;
else {
@@ -752,7 +758,7 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
* attributes will have been updated by
* cifs_revalidate_dentry().
*/
- if (IS_AUTOMOUNT(d_inode(direntry)) &&
+ if (IS_AUTOMOUNT(inode) &&
!(direntry->d_flags & DCACHE_NEED_AUTOMOUNT)) {
spin_lock(&direntry->d_lock);
direntry->d_flags |= DCACHE_NEED_AUTOMOUNT;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 4b95700c507c..5ad15de2bb4f 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -253,6 +253,12 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
xid, fid);
+ if (rc) {
+ server->ops->close(xid, tcon, fid);
+ if (rc == -ESTALE)
+ rc = -EOPENSTALE;
+ }
+
out:
kfree(buf);
return rc;
@@ -1840,13 +1846,12 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
{
struct cifsFileInfo *open_file = NULL;
struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
- struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
/* only filter by fsuid on multiuser mounts */
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
fsuid_only = false;
- spin_lock(&tcon->open_file_lock);
+ spin_lock(&cifs_inode->open_file_lock);
/* we could simply get the first_list_entry since write-only entries
are always at the end of the list but since the first entry might
have a close pending, we go through the whole list */
@@ -1858,7 +1863,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
/* found a good file */
/* lock it so it will not be closed on us */
cifsFileInfo_get(open_file);
- spin_unlock(&tcon->open_file_lock);
+ spin_unlock(&cifs_inode->open_file_lock);
return open_file;
} /* else might as well continue, and look for
another, or simply have the caller reopen it
@@ -1866,7 +1871,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
} else /* write only file */
break; /* write only files are last so must be done */
}
- spin_unlock(&tcon->open_file_lock);
+ spin_unlock(&cifs_inode->open_file_lock);
return NULL;
}
@@ -1877,7 +1882,6 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,
{
struct cifsFileInfo *open_file, *inv_file = NULL;
struct cifs_sb_info *cifs_sb;
- struct cifs_tcon *tcon;
bool any_available = false;
int rc = -EBADF;
unsigned int refind = 0;
@@ -1897,16 +1901,15 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,
}
cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
- tcon = cifs_sb_master_tcon(cifs_sb);
/* only filter by fsuid on multiuser mounts */
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
fsuid_only = false;
- spin_lock(&tcon->open_file_lock);
+ spin_lock(&cifs_inode->open_file_lock);
refind_writable:
if (refind > MAX_REOPEN_ATT) {
- spin_unlock(&tcon->open_file_lock);
+ spin_unlock(&cifs_inode->open_file_lock);
return rc;
}
list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
@@ -1918,7 +1921,7 @@ refind_writable:
if (!open_file->invalidHandle) {
/* found a good writable file */
cifsFileInfo_get(open_file);
- spin_unlock(&tcon->open_file_lock);
+ spin_unlock(&cifs_inode->open_file_lock);
*ret_file = open_file;
return 0;
} else {
@@ -1938,7 +1941,7 @@ refind_writable:
cifsFileInfo_get(inv_file);
}
- spin_unlock(&tcon->open_file_lock);
+ spin_unlock(&cifs_inode->open_file_lock);
if (inv_file) {
rc = cifs_reopen_file(inv_file, false);
@@ -1953,7 +1956,7 @@ refind_writable:
cifsFileInfo_put(inv_file);
++refind;
inv_file = NULL;
- spin_lock(&tcon->open_file_lock);
+ spin_lock(&cifs_inode->open_file_lock);
goto refind_writable;
}
@@ -4461,17 +4464,15 @@ static int cifs_readpage(struct file *file, struct page *page)
static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
{
struct cifsFileInfo *open_file;
- struct cifs_tcon *tcon =
- cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
- spin_lock(&tcon->open_file_lock);
+ spin_lock(&cifs_inode->open_file_lock);
list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
- spin_unlock(&tcon->open_file_lock);
+ spin_unlock(&cifs_inode->open_file_lock);
return 1;
}
}
- spin_unlock(&tcon->open_file_lock);
+ spin_unlock(&cifs_inode->open_file_lock);
return 0;
}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 3bae2e53f0b8..5dcc95b38310 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -414,6 +414,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
/* if uniqueid is different, return error */
if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM &&
CIFS_I(*pinode)->uniqueid != fattr.cf_uniqueid)) {
+ CIFS_I(*pinode)->time = 0; /* force reval */
rc = -ESTALE;
goto cgiiu_exit;
}
@@ -421,6 +422,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
/* if filetype is different, return error */
if (unlikely(((*pinode)->i_mode & S_IFMT) !=
(fattr.cf_mode & S_IFMT))) {
+ CIFS_I(*pinode)->time = 0; /* force reval */
rc = -ESTALE;
goto cgiiu_exit;
}
@@ -933,6 +935,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
/* if uniqueid is different, return error */
if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM &&
CIFS_I(*inode)->uniqueid != fattr.cf_uniqueid)) {
+ CIFS_I(*inode)->time = 0; /* force reval */
rc = -ESTALE;
goto cgii_exit;
}
@@ -940,6 +943,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
/* if filetype is different, return error */
if (unlikely(((*inode)->i_mode & S_IFMT) !=
(fattr.cf_mode & S_IFMT))) {
+ CIFS_I(*inode)->time = 0; /* force reval */
rc = -ESTALE;
goto cgii_exit;
}
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 49c17ee18254..9b41436fb8db 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -117,10 +117,6 @@ static const struct smb_to_posix_error mapping_table_ERRSRV[] = {
{0, 0}
};
-static const struct smb_to_posix_error mapping_table_ERRHRD[] = {
- {0, 0}
-};
-
/*
* Convert a string containing text IPv4 or IPv6 address to binary form.
*
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 85f9d614d968..05149862aea4 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -751,8 +751,8 @@ add_posix_context(struct kvec *iov, unsigned int *num_iovec, umode_t mode)
unsigned int num = *num_iovec;
iov[num].iov_base = create_posix_buf(mode);
- if (mode == -1)
- cifs_dbg(VFS, "illegal mode\n"); /* BB REMOVEME */
+ if (mode == ACL_NO_MODE)
+ cifs_dbg(FYI, "illegal mode\n");
if (iov[num].iov_base == NULL)
return -ENOMEM;
iov[num].iov_len = sizeof(struct create_posix);
@@ -2521,11 +2521,8 @@ SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, __u8 *oplock,
return rc;
}
- /* TODO: add handling for the mode on create */
- if (oparms->disposition == FILE_CREATE)
- cifs_dbg(VFS, "mode is 0x%x\n", oparms->mode); /* BB REMOVEME */
-
- if ((oparms->disposition == FILE_CREATE) && (oparms->mode != -1)) {
+ if ((oparms->disposition == FILE_CREATE) &&
+ (oparms->mode != ACL_NO_MODE)) {
if (n_iov > 2) {
struct create_context *ccontext =
(struct create_context *)iov[n_iov-1].iov_base;
@@ -3217,7 +3214,8 @@ SMB2_notify_init(const unsigned int xid, struct smb_rqst *rqst,
req->PersistentFileId = persistent_fid;
req->VolatileFileId = volatile_fid;
- req->OutputBufferLength = SMB2_MAX_BUFFER_SIZE - MAX_SMB2_HDR_SIZE;
+ req->OutputBufferLength =
+ cpu_to_le32(SMB2_MAX_BUFFER_SIZE - MAX_SMB2_HDR_SIZE);
req->CompletionFilter = cpu_to_le32(completion_filter);
if (watch_tree)
req->Flags = cpu_to_le16(SMB2_WATCH_TREE);
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index da3a6d580808..71b2930b8e0b 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -150,6 +150,10 @@ extern int SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
bool is_fsctl, char *in_data, u32 indatalen,
__u32 max_response_size);
extern void SMB2_ioctl_free(struct smb_rqst *rqst);
+extern int SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid, bool watch_tree,
+ u32 completion_filter);
+
extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id);
extern int SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 8aaa7eec7b74..e88421d9a48d 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -164,8 +164,13 @@ static void finish_writeback_work(struct bdi_writeback *wb,
if (work->auto_free)
kfree(work);
- if (done && atomic_dec_and_test(&done->cnt))
- wake_up_all(done->waitq);
+ if (done) {
+ wait_queue_head_t *waitq = done->waitq;
+
+ /* @done can't be accessed after the following dec */
+ if (atomic_dec_and_test(&done->cnt))
+ wake_up_all(waitq);
+ }
}
static void wb_queue_work(struct bdi_writeback *wb,
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8a0381f1a43b..92972b5c1d00 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -591,14 +591,6 @@ static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data,
io_cqring_ev_posted(ctx);
}
-static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs)
-{
- percpu_ref_put_many(&ctx->refs, refs);
-
- if (waitqueue_active(&ctx->wait))
- wake_up(&ctx->wait);
-}
-
static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
struct io_submit_state *state)
{
@@ -646,7 +638,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
req->result = 0;
return req;
out:
- io_ring_drop_ctx_refs(ctx, 1);
+ percpu_ref_put(&ctx->refs);
return NULL;
}
@@ -654,7 +646,7 @@ static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr)
{
if (*nr) {
kmem_cache_free_bulk(req_cachep, *nr, reqs);
- io_ring_drop_ctx_refs(ctx, *nr);
+ percpu_ref_put_many(&ctx->refs, *nr);
*nr = 0;
}
}
@@ -663,7 +655,7 @@ static void __io_free_req(struct io_kiocb *req)
{
if (req->file && !(req->flags & REQ_F_FIXED_FILE))
fput(req->file);
- io_ring_drop_ctx_refs(req->ctx, 1);
+ percpu_ref_put(&req->ctx->refs);
kmem_cache_free(req_cachep, req);
}
@@ -2761,7 +2753,7 @@ out:
if (link)
io_queue_link_head(ctx, link, &link->submit, shadow_req,
- block_for_last);
+ !block_for_last);
if (statep)
io_submit_state_end(statep);
@@ -2920,8 +2912,12 @@ static void io_finish_async(struct io_ring_ctx *ctx)
static void io_destruct_skb(struct sk_buff *skb)
{
struct io_ring_ctx *ctx = skb->sk->sk_user_data;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->sqo_wq); i++)
+ if (ctx->sqo_wq[i])
+ flush_workqueue(ctx->sqo_wq[i]);
- io_finish_async(ctx);
unix_destruct_scm(skb);
}
@@ -3630,7 +3626,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
}
}
- io_ring_drop_ctx_refs(ctx, 1);
+ percpu_ref_put(&ctx->refs);
out_fput:
fdput(f);
return submitted ? submitted : ret;
diff --git a/fs/libfs.c b/fs/libfs.c
index c9b2850c0f7c..540611b99b9a 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -89,58 +89,45 @@ int dcache_dir_close(struct inode *inode, struct file *file)
EXPORT_SYMBOL(dcache_dir_close);
/* parent is locked at least shared */
-static struct dentry *next_positive(struct dentry *parent,
- struct list_head *from,
- int count)
+/*
+ * Returns an element of siblings' list.
+ * We are looking for <count>th positive after <p>; if
+ * found, dentry is grabbed and returned to caller.
+ * If no such element exists, NULL is returned.
+ */
+static struct dentry *scan_positives(struct dentry *cursor,
+ struct list_head *p,
+ loff_t count,
+ struct dentry *last)
{
- unsigned *seq = &parent->d_inode->i_dir_seq, n;
- struct dentry *res;
- struct list_head *p;
- bool skipped;
- int i;
+ struct dentry *dentry = cursor->d_parent, *found = NULL;
-retry:
- i = count;
- skipped = false;
- n = smp_load_acquire(seq) & ~1;
- res = NULL;
- rcu_read_lock();
- for (p = from->next; p != &parent->d_subdirs; p = p->next) {
+ spin_lock(&dentry->d_lock);
+ while ((p = p->next) != &dentry->d_subdirs) {
struct dentry *d = list_entry(p, struct dentry, d_child);
- if (!simple_positive(d)) {
- skipped = true;
- } else if (!--i) {
- res = d;
- break;
+ // we must at least skip cursors, to avoid livelocks
+ if (d->d_flags & DCACHE_DENTRY_CURSOR)
+ continue;
+ if (simple_positive(d) && !--count) {
+ spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
+ if (simple_positive(d))
+ found = dget_dlock(d);
+ spin_unlock(&d->d_lock);
+ if (likely(found))
+ break;
+ count = 1;
+ }
+ if (need_resched()) {
+ list_move(&cursor->d_child, p);
+ p = &cursor->d_child;
+ spin_unlock(&dentry->d_lock);
+ cond_resched();
+ spin_lock(&dentry->d_lock);
}
}
- rcu_read_unlock();
- if (skipped) {
- smp_rmb();
- if (unlikely(*seq != n))
- goto retry;
- }
- return res;
-}
-
-static void move_cursor(struct dentry *cursor, struct list_head *after)
-{
- struct dentry *parent = cursor->d_parent;
- unsigned n, *seq = &parent->d_inode->i_dir_seq;
- spin_lock(&parent->d_lock);
- for (;;) {
- n = *seq;
- if (!(n & 1) && cmpxchg(seq, n, n + 1) == n)
- break;
- cpu_relax();
- }
- __list_del(cursor->d_child.prev, cursor->d_child.next);
- if (after)
- list_add(&cursor->d_child, after);
- else
- list_add_tail(&cursor->d_child, &parent->d_subdirs);
- smp_store_release(seq, n + 2);
- spin_unlock(&parent->d_lock);
+ spin_unlock(&dentry->d_lock);
+ dput(last);
+ return found;
}
loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
@@ -158,17 +145,25 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
return -EINVAL;
}
if (offset != file->f_pos) {
+ struct dentry *cursor = file->private_data;
+ struct dentry *to = NULL;
+
+ inode_lock_shared(dentry->d_inode);
+
+ if (offset > 2)
+ to = scan_positives(cursor, &dentry->d_subdirs,
+ offset - 2, NULL);
+ spin_lock(&dentry->d_lock);
+ if (to)
+ list_move(&cursor->d_child, &to->d_child);
+ else
+ list_del_init(&cursor->d_child);
+ spin_unlock(&dentry->d_lock);
+ dput(to);
+
file->f_pos = offset;
- if (file->f_pos >= 2) {
- struct dentry *cursor = file->private_data;
- struct dentry *to;
- loff_t n = file->f_pos - 2;
-
- inode_lock_shared(dentry->d_inode);
- to = next_positive(dentry, &dentry->d_subdirs, n);
- move_cursor(cursor, to ? &to->d_child : NULL);
- inode_unlock_shared(dentry->d_inode);
- }
+
+ inode_unlock_shared(dentry->d_inode);
}
return offset;
}
@@ -190,25 +185,35 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
{
struct dentry *dentry = file->f_path.dentry;
struct dentry *cursor = file->private_data;
- struct list_head *p = &cursor->d_child;
- struct dentry *next;
- bool moved = false;
+ struct list_head *anchor = &dentry->d_subdirs;
+ struct dentry *next = NULL;
+ struct list_head *p;
if (!dir_emit_dots(file, ctx))
return 0;
if (ctx->pos == 2)
- p = &dentry->d_subdirs;
- while ((next = next_positive(dentry, p, 1)) != NULL) {
+ p = anchor;
+ else if (!list_empty(&cursor->d_child))
+ p = &cursor->d_child;
+ else
+ return 0;
+
+ while ((next = scan_positives(cursor, p, 1, next)) != NULL) {
if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
d_inode(next)->i_ino, dt_type(d_inode(next))))
break;
- moved = true;
- p = &next->d_child;
ctx->pos++;
+ p = &next->d_child;
}
- if (moved)
- move_cursor(cursor, p);
+ spin_lock(&dentry->d_lock);
+ if (next)
+ list_move_tail(&cursor->d_child, &next->d_child);
+ else
+ list_del_init(&cursor->d_child);
+ spin_unlock(&dentry->d_lock);
+ dput(next);
+
return 0;
}
EXPORT_SYMBOL(dcache_readdir);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 222d7115db71..040a50fd9bf3 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -64,13 +64,6 @@
static struct kmem_cache *nfs_direct_cachep;
-/*
- * This represents a set of asynchronous requests that we're waiting on
- */
-struct nfs_direct_mirror {
- ssize_t count;
-};
-
struct nfs_direct_req {
struct kref kref; /* release manager */
@@ -84,9 +77,6 @@ struct nfs_direct_req {
atomic_t io_count; /* i/os we're waiting for */
spinlock_t lock; /* protect completion state */
- struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX];
- int mirror_count;
-
loff_t io_start; /* Start offset for I/O */
ssize_t count, /* bytes actually processed */
max_count, /* max expected count */
@@ -123,32 +113,42 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
}
static void
-nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr)
+nfs_direct_handle_truncated(struct nfs_direct_req *dreq,
+ const struct nfs_pgio_header *hdr,
+ ssize_t dreq_len)
{
- int i;
- ssize_t count;
+ if (!(test_bit(NFS_IOHDR_ERROR, &hdr->flags) ||
+ test_bit(NFS_IOHDR_EOF, &hdr->flags)))
+ return;
+ if (dreq->max_count >= dreq_len) {
+ dreq->max_count = dreq_len;
+ if (dreq->count > dreq_len)
+ dreq->count = dreq_len;
+
+ if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
+ dreq->error = hdr->error;
+ else /* Clear outstanding error if this is EOF */
+ dreq->error = 0;
+ }
+}
- WARN_ON_ONCE(dreq->count >= dreq->max_count);
+static void
+nfs_direct_count_bytes(struct nfs_direct_req *dreq,
+ const struct nfs_pgio_header *hdr)
+{
+ loff_t hdr_end = hdr->io_start + hdr->good_bytes;
+ ssize_t dreq_len = 0;
- if (dreq->mirror_count == 1) {
- dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes;
- dreq->count += hdr->good_bytes;
- } else {
- /* mirrored writes */
- count = dreq->mirrors[hdr->pgio_mirror_idx].count;
- if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) {
- count = hdr->io_start + hdr->good_bytes - dreq->io_start;
- dreq->mirrors[hdr->pgio_mirror_idx].count = count;
- }
- /* update the dreq->count by finding the minimum agreed count from all
- * mirrors */
- count = dreq->mirrors[0].count;
+ if (hdr_end > dreq->io_start)
+ dreq_len = hdr_end - dreq->io_start;
- for (i = 1; i < dreq->mirror_count; i++)
- count = min(count, dreq->mirrors[i].count);
+ nfs_direct_handle_truncated(dreq, hdr, dreq_len);
- dreq->count = count;
- }
+ if (dreq_len > dreq->max_count)
+ dreq_len = dreq->max_count;
+
+ if (dreq->count < dreq_len)
+ dreq->count = dreq_len;
}
/*
@@ -293,18 +293,6 @@ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
cinfo->completion_ops = &nfs_direct_commit_completion_ops;
}
-static inline void nfs_direct_setup_mirroring(struct nfs_direct_req *dreq,
- struct nfs_pageio_descriptor *pgio,
- struct nfs_page *req)
-{
- int mirror_count = 1;
-
- if (pgio->pg_ops->pg_get_mirror_count)
- mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req);
-
- dreq->mirror_count = mirror_count;
-}
-
static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
{
struct nfs_direct_req *dreq;
@@ -319,7 +307,6 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
INIT_LIST_HEAD(&dreq->mds_cinfo.list);
dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */
INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
- dreq->mirror_count = 1;
spin_lock_init(&dreq->lock);
return dreq;
@@ -402,20 +389,12 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
struct nfs_direct_req *dreq = hdr->dreq;
spin_lock(&dreq->lock);
- if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
- dreq->error = hdr->error;
-
if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
spin_unlock(&dreq->lock);
goto out_put;
}
- if (hdr->good_bytes != 0)
- nfs_direct_good_bytes(dreq, hdr);
-
- if (test_bit(NFS_IOHDR_EOF, &hdr->flags))
- dreq->error = 0;
-
+ nfs_direct_count_bytes(dreq, hdr);
spin_unlock(&dreq->lock);
while (!list_empty(&hdr->pages)) {
@@ -646,29 +625,22 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
LIST_HEAD(reqs);
struct nfs_commit_info cinfo;
LIST_HEAD(failed);
- int i;
nfs_init_cinfo_from_dreq(&cinfo, dreq);
nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
dreq->count = 0;
+ dreq->max_count = 0;
+ list_for_each_entry(req, &reqs, wb_list)
+ dreq->max_count += req->wb_bytes;
dreq->verf.committed = NFS_INVALID_STABLE_HOW;
nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo);
- for (i = 0; i < dreq->mirror_count; i++)
- dreq->mirrors[i].count = 0;
get_dreq(dreq);
nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,
&nfs_direct_write_completion_ops);
desc.pg_dreq = dreq;
- req = nfs_list_entry(reqs.next);
- nfs_direct_setup_mirroring(dreq, &desc, req);
- if (desc.pg_error < 0) {
- list_splice_init(&reqs, &failed);
- goto out_failed;
- }
-
list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
/* Bump the transmission count */
req->wb_nio++;
@@ -686,7 +658,6 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
}
nfs_pageio_complete(&desc);
-out_failed:
while (!list_empty(&failed)) {
req = nfs_list_entry(failed.next);
nfs_list_remove_request(req);
@@ -791,17 +762,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
nfs_init_cinfo_from_dreq(&cinfo, dreq);
spin_lock(&dreq->lock);
-
- if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
- dreq->error = hdr->error;
-
if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
spin_unlock(&dreq->lock);
goto out_put;
}
+ nfs_direct_count_bytes(dreq, hdr);
if (hdr->good_bytes != 0) {
- nfs_direct_good_bytes(dreq, hdr);
if (nfs_write_need_commit(hdr)) {
if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
request_commit = true;
@@ -923,7 +890,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
break;
}
- nfs_direct_setup_mirroring(dreq, &desc, req);
if (desc.pg_error < 0) {
nfs_free_request(req);
result = desc.pg_error;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 11eafcfc490b..ab8ca20fd579 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6106,6 +6106,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
status = nfs4_call_sync_custom(&task_setup_data);
if (setclientid.sc_cred) {
+ kfree(clp->cl_acceptor);
clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred);
put_rpccred(setclientid.sc_cred);
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 85ca49549b39..52cab65f91cf 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -786,7 +786,6 @@ static void nfs_inode_remove_request(struct nfs_page *req)
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_page *head;
- atomic_long_dec(&nfsi->nrequests);
if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
head = req->wb_head;
@@ -799,8 +798,10 @@ static void nfs_inode_remove_request(struct nfs_page *req)
spin_unlock(&mapping->private_lock);
}
- if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
+ if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) {
nfs_release_request(req);
+ atomic_long_dec(&nfsi->nrequests);
+ }
}
static void
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 8de1c9d644f6..9cd0a6815933 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2049,7 +2049,8 @@ out_write_size:
inode->i_mtime = inode->i_ctime = current_time(inode);
di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
- ocfs2_update_inode_fsync_trans(handle, inode, 1);
+ if (handle)
+ ocfs2_update_inode_fsync_trans(handle, inode, 1);
}
if (handle)
ocfs2_journal_dirty(handle, wc->w_di_bh);
@@ -2146,13 +2147,30 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock,
struct ocfs2_dio_write_ctxt *dwc = NULL;
struct buffer_head *di_bh = NULL;
u64 p_blkno;
- loff_t pos = iblock << inode->i_sb->s_blocksize_bits;
+ unsigned int i_blkbits = inode->i_sb->s_blocksize_bits;
+ loff_t pos = iblock << i_blkbits;
+ sector_t endblk = (i_size_read(inode) - 1) >> i_blkbits;
unsigned len, total_len = bh_result->b_size;
int ret = 0, first_get_block = 0;
len = osb->s_clustersize - (pos & (osb->s_clustersize - 1));
len = min(total_len, len);
+ /*
+ * bh_result->b_size is count in get_more_blocks according to write
+ * "pos" and "end", we need map twice to return different buffer state:
+ * 1. area in file size, not set NEW;
+ * 2. area out file size, set NEW.
+ *
+ * iblock endblk
+ * |--------|---------|---------|---------
+ * |<-------area in file------->|
+ */
+
+ if ((iblock <= endblk) &&
+ ((iblock + ((len - 1) >> i_blkbits)) > endblk))
+ len = (endblk - iblock + 1) << i_blkbits;
+
mlog(0, "get block of %lu at %llu:%u req %u\n",
inode->i_ino, pos, len, total_len);
@@ -2236,6 +2254,9 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock,
if (desc->c_needs_zero)
set_buffer_new(bh_result);
+ if (iblock > endblk)
+ set_buffer_new(bh_result);
+
/* May sleep in end_io. It should not happen in a irq context. So defer
* it to dio work queue. */
set_buffer_defer_completion(bh_result);
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index d6f7b299eb23..efeea208fdeb 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -283,7 +283,7 @@ static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb,
if (inode_alloc)
inode_lock(inode_alloc);
- if (o2info_coherent(&fi->ifi_req)) {
+ if (inode_alloc && o2info_coherent(&fi->ifi_req)) {
status = ocfs2_inode_lock(inode_alloc, &bh, 0);
if (status < 0) {
mlog_errno(status);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 90c830e3758e..d8507972ee13 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1490,18 +1490,6 @@ static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc,
return loc->xl_ops->xlo_check_space(loc, xi);
}
-static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
-{
- loc->xl_ops->xlo_add_entry(loc, name_hash);
- loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash);
- /*
- * We can't leave the new entry's xe_name_offset at zero or
- * add_namevalue() will go nuts. We set it to the size of our
- * storage so that it can never be less than any other entry.
- */
- loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size);
-}
-
static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc,
struct ocfs2_xattr_info *xi)
{
@@ -2133,29 +2121,31 @@ static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc,
if (rc)
goto out;
- if (loc->xl_entry) {
- if (ocfs2_xa_can_reuse_entry(loc, xi)) {
- orig_value_size = loc->xl_entry->xe_value_size;
- rc = ocfs2_xa_reuse_entry(loc, xi, ctxt);
- if (rc)
- goto out;
- goto alloc_value;
- }
+ if (!loc->xl_entry) {
+ rc = -EINVAL;
+ goto out;
+ }
- if (!ocfs2_xattr_is_local(loc->xl_entry)) {
- orig_clusters = ocfs2_xa_value_clusters(loc);
- rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
- if (rc) {
- mlog_errno(rc);
- ocfs2_xa_cleanup_value_truncate(loc,
- "overwriting",
- orig_clusters);
- goto out;
- }
+ if (ocfs2_xa_can_reuse_entry(loc, xi)) {
+ orig_value_size = loc->xl_entry->xe_value_size;
+ rc = ocfs2_xa_reuse_entry(loc, xi, ctxt);
+ if (rc)
+ goto out;
+ goto alloc_value;
+ }
+
+ if (!ocfs2_xattr_is_local(loc->xl_entry)) {
+ orig_clusters = ocfs2_xa_value_clusters(loc);
+ rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
+ if (rc) {
+ mlog_errno(rc);
+ ocfs2_xa_cleanup_value_truncate(loc,
+ "overwriting",
+ orig_clusters);
+ goto out;
}
- ocfs2_xa_wipe_namevalue(loc);
- } else
- ocfs2_xa_add_entry(loc, name_hash);
+ }
+ ocfs2_xa_wipe_namevalue(loc);
/*
* If we get here, we have a blank entry. Fill it. We grow our
diff --git a/fs/readdir.c b/fs/readdir.c
index 19bea591c3f1..6e2623e57b2e 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -27,53 +27,13 @@
/*
* Note the "unsafe_put_user() semantics: we goto a
* label for errors.
- *
- * Also note how we use a "while()" loop here, even though
- * only the biggest size needs to loop. The compiler (well,
- * at least gcc) is smart enough to turn the smaller sizes
- * into just if-statements, and this way we don't need to
- * care whether 'u64' or 'u32' is the biggest size.
- */
-#define unsafe_copy_loop(dst, src, len, type, label) \
- while (len >= sizeof(type)) { \
- unsafe_put_user(get_unaligned((type *)src), \
- (type __user *)dst, label); \
- dst += sizeof(type); \
- src += sizeof(type); \
- len -= sizeof(type); \
- }
-
-/*
- * We avoid doing 64-bit copies on 32-bit architectures. They
- * might be better, but the component names are mostly small,
- * and the 64-bit cases can end up being much more complex and
- * put much more register pressure on the code, so it's likely
- * not worth the pain of unaligned accesses etc.
- *
- * So limit the copies to "unsigned long" size. I did verify
- * that at least the x86-32 case is ok without this limiting,
- * but I worry about random other legacy 32-bit cases that
- * might not do as well.
- */
-#define unsafe_copy_type(dst, src, len, type, label) do { \
- if (sizeof(type) <= sizeof(unsigned long)) \
- unsafe_copy_loop(dst, src, len, type, label); \
-} while (0)
-
-/*
- * Copy the dirent name to user space, and NUL-terminate
- * it. This should not be a function call, since we're doing
- * the copy inside a "user_access_begin/end()" section.
*/
#define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \
char __user *dst = (_dst); \
const char *src = (_src); \
size_t len = (_len); \
- unsafe_copy_type(dst, src, len, u64, label); \
- unsafe_copy_type(dst, src, len, u32, label); \
- unsafe_copy_type(dst, src, len, u16, label); \
- unsafe_copy_type(dst, src, len, u8, label); \
- unsafe_put_user(0, dst, label); \
+ unsafe_put_user(0, dst+len, label); \
+ unsafe_copy_to_user(dst, src, len, label); \
} while (0)
diff --git a/fs/super.c b/fs/super.c
index f627b7c53d2b..cfadab2cbf35 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1300,6 +1300,7 @@ int get_tree_bdev(struct fs_context *fc,
mutex_lock(&bdev->bd_fsfreeze_mutex);
if (bdev->bd_fsfreeze_count > 0) {
mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ blkdev_put(bdev, mode);
warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
return -EBUSY;
}
@@ -1308,8 +1309,10 @@ int get_tree_bdev(struct fs_context *fc,
fc->sget_key = bdev;
s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc);
mutex_unlock(&bdev->bd_fsfreeze_mutex);
- if (IS_ERR(s))
+ if (IS_ERR(s)) {
+ blkdev_put(bdev, mode);
return PTR_ERR(s);
+ }
if (s->s_root) {
/* Don't summarily change the RO/RW state. */
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 5de296b34ab1..14fbdf22b7e7 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -28,12 +28,11 @@ xfs_get_aghdr_buf(
struct xfs_mount *mp,
xfs_daddr_t blkno,
size_t numblks,
- int flags,
const struct xfs_buf_ops *ops)
{
struct xfs_buf *bp;
- bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, flags);
+ bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, 0);
if (!bp)
return NULL;
@@ -345,7 +344,7 @@ xfs_ag_init_hdr(
{
struct xfs_buf *bp;
- bp = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, 0, ops);
+ bp = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, ops);
if (!bp)
return -ENOMEM;
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index b9f019603d0b..f0089e862216 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -826,32 +826,17 @@ xfs_attr_shortform_to_leaf(
sf = (xfs_attr_shortform_t *)tmpbuffer;
xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
- xfs_bmap_local_to_extents_empty(dp, XFS_ATTR_FORK);
+ xfs_bmap_local_to_extents_empty(args->trans, dp, XFS_ATTR_FORK);
bp = NULL;
error = xfs_da_grow_inode(args, &blkno);
- if (error) {
- /*
- * If we hit an IO error middle of the transaction inside
- * grow_inode(), we may have inconsistent data. Bail out.
- */
- if (error == -EIO)
- goto out;
- xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */
- memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */
+ if (error)
goto out;
- }
ASSERT(blkno == 0);
error = xfs_attr3_leaf_create(args, blkno, &bp);
- if (error) {
- /* xfs_attr3_leaf_create may not have instantiated a block */
- if (bp && (xfs_da_shrink_inode(args, 0, bp) != 0))
- goto out;
- xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */
- memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */
+ if (error)
goto out;
- }
memset((char *)&nargs, 0, sizeof(nargs));
nargs.dp = dp;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 4edc25a2ba80..02469d59c787 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -792,6 +792,7 @@ out_root_realloc:
*/
void
xfs_bmap_local_to_extents_empty(
+ struct xfs_trans *tp,
struct xfs_inode *ip,
int whichfork)
{
@@ -808,6 +809,7 @@ xfs_bmap_local_to_extents_empty(
ifp->if_u1.if_root = NULL;
ifp->if_height = 0;
XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
}
@@ -840,7 +842,7 @@ xfs_bmap_local_to_extents(
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
if (!ifp->if_bytes) {
- xfs_bmap_local_to_extents_empty(ip, whichfork);
+ xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
flags = XFS_ILOG_CORE;
goto done;
}
@@ -887,7 +889,7 @@ xfs_bmap_local_to_extents(
/* account for the change in fork size */
xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
- xfs_bmap_local_to_extents_empty(ip, whichfork);
+ xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
flags |= XFS_ILOG_CORE;
ifp->if_u1.if_root = NULL;
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 5bb446d80542..e2798c6f3a5f 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -182,7 +182,8 @@ void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
xfs_filblks_t len);
int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
int xfs_bmap_set_attrforkoff(struct xfs_inode *ip, int size, int *version);
-void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
+void xfs_bmap_local_to_extents_empty(struct xfs_trans *tp,
+ struct xfs_inode *ip, int whichfork);
void __xfs_bmap_add_free(struct xfs_trans *tp, xfs_fsblock_t bno,
xfs_filblks_t len, const struct xfs_owner_info *oinfo,
bool skip_discard);
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 9595ced393dc..49e4bc39e7bb 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -1096,7 +1096,7 @@ xfs_dir2_sf_to_block(
memcpy(sfp, oldsfp, ifp->if_bytes);
xfs_idata_realloc(dp, -ifp->if_bytes, XFS_DATA_FORK);
- xfs_bmap_local_to_extents_empty(dp, XFS_DATA_FORK);
+ xfs_bmap_local_to_extents_empty(tp, dp, XFS_DATA_FORK);
dp->i_d.di_size = 0;
/*
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index 93b3793bc5b3..0cab11a5d390 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -341,7 +341,6 @@ xchk_refcountbt_rec(
xfs_extlen_t len;
xfs_nlink_t refcount;
bool has_cowflag;
- int error = 0;
bno = be32_to_cpu(rec->refc.rc_startblock);
len = be32_to_cpu(rec->refc.rc_blockcount);
@@ -366,7 +365,7 @@ xchk_refcountbt_rec(
xchk_refcountbt_xref(bs->sc, bno, len, refcount);
- return error;
+ return 0;
}
/* Make sure we have as many refc blocks as the rmap says. */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 0910cb75b65d..4f443703065e 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -864,6 +864,7 @@ xfs_alloc_file_space(
xfs_filblks_t allocatesize_fsb;
xfs_extlen_t extsz, temp;
xfs_fileoff_t startoffset_fsb;
+ xfs_fileoff_t endoffset_fsb;
int nimaps;
int quota_flag;
int rt;
@@ -891,7 +892,8 @@ xfs_alloc_file_space(
imapp = &imaps[0];
nimaps = 1;
startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
- allocatesize_fsb = XFS_B_TO_FSB(mp, count);
+ endoffset_fsb = XFS_B_TO_FSB(mp, offset + count);
+ allocatesize_fsb = endoffset_fsb - startoffset_fsb;
/*
* Allocate file space until done or until there is an error
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 21c243622a79..0abba171aa89 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -345,6 +345,15 @@ xfs_buf_allocate_memory(
unsigned short page_count, i;
xfs_off_t start, end;
int error;
+ xfs_km_flags_t kmflag_mask = 0;
+
+ /*
+ * assure zeroed buffer for non-read cases.
+ */
+ if (!(flags & XBF_READ)) {
+ kmflag_mask |= KM_ZERO;
+ gfp_mask |= __GFP_ZERO;
+ }
/*
* for buffers that are contained within a single page, just allocate
@@ -354,7 +363,8 @@ xfs_buf_allocate_memory(
size = BBTOB(bp->b_length);
if (size < PAGE_SIZE) {
int align_mask = xfs_buftarg_dma_alignment(bp->b_target);
- bp->b_addr = kmem_alloc_io(size, align_mask, KM_NOFS);
+ bp->b_addr = kmem_alloc_io(size, align_mask,
+ KM_NOFS | kmflag_mask);
if (!bp->b_addr) {
/* low memory - use alloc_page loop instead */
goto use_alloc_page;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index a2beee9f74da..641d07f30a27 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1443,7 +1443,7 @@ xlog_alloc_log(
prev_iclog = iclog;
iclog->ic_data = kmem_alloc_io(log->l_iclog_size, align_mask,
- KM_MAYFAIL);
+ KM_MAYFAIL | KM_ZERO);
if (!iclog->ic_data)
goto out_free_iclog;
#ifdef DEBUG
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 508319039dce..c1a514ffff55 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -127,7 +127,7 @@ xlog_alloc_buffer(
if (nbblks > 1 && log->l_sectBBsize > 1)
nbblks += log->l_sectBBsize;
nbblks = round_up(nbblks, log->l_sectBBsize);
- return kmem_alloc_io(BBTOB(nbblks), align_mask, KM_MAYFAIL);
+ return kmem_alloc_io(BBTOB(nbblks), align_mask, KM_MAYFAIL | KM_ZERO);
}
/*
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index 6b318efd8a74..cdf016596659 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -40,6 +40,7 @@
# define __GCC4_has_attribute___noclone__ 1
# define __GCC4_has_attribute___nonstring__ 0
# define __GCC4_has_attribute___no_sanitize_address__ (__GNUC_MINOR__ >= 8)
+# define __GCC4_has_attribute___fallthrough__ 0
#endif
/*
@@ -186,6 +187,22 @@
#endif
/*
+ * Add the pseudo keyword 'fallthrough' so case statement blocks
+ * must end with any of these keywords:
+ * break;
+ * fallthrough;
+ * goto <label>;
+ * return [expression];
+ *
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Statement-Attributes.html#Statement-Attributes
+ */
+#if __has_attribute(__fallthrough__)
+# define fallthrough __attribute__((__fallthrough__))
+#else
+# define fallthrough do {} while (0) /* fallthrough */
+#endif
+
+/*
* Note the missing underscores.
*
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noinline-function-attribute
diff --git a/include/linux/export.h b/include/linux/export.h
index 95f55b7f83a0..621158ecd2e2 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -52,10 +52,10 @@ extern struct module __this_module;
__ADDRESSABLE(sym) \
asm(" .section \"___ksymtab" sec "+" #sym "\", \"a\" \n" \
" .balign 4 \n" \
- "__ksymtab_" #sym NS_SEPARATOR #ns ": \n" \
+ "__ksymtab_" #ns NS_SEPARATOR #sym ": \n" \
" .long " #sym "- . \n" \
" .long __kstrtab_" #sym "- . \n" \
- " .long __kstrtab_ns_" #sym "- . \n" \
+ " .long __kstrtabns_" #sym "- . \n" \
" .previous \n")
#define __KSYMTAB_ENTRY(sym, sec) \
@@ -76,10 +76,10 @@ struct kernel_symbol {
#else
#define __KSYMTAB_ENTRY_NS(sym, sec, ns) \
static const struct kernel_symbol __ksymtab_##sym##__##ns \
- asm("__ksymtab_" #sym NS_SEPARATOR #ns) \
+ asm("__ksymtab_" #ns NS_SEPARATOR #sym) \
__attribute__((section("___ksymtab" sec "+" #sym), used)) \
__aligned(sizeof(void *)) \
- = { (unsigned long)&sym, __kstrtab_##sym, __kstrtab_ns_##sym }
+ = { (unsigned long)&sym, __kstrtab_##sym, __kstrtabns_##sym }
#define __KSYMTAB_ENTRY(sym, sec) \
static const struct kernel_symbol __ksymtab_##sym \
@@ -112,7 +112,7 @@ struct kernel_symbol {
/* For every exported symbol, place a struct in the __ksymtab section */
#define ___EXPORT_SYMBOL_NS(sym, sec, ns) \
___export_symbol_common(sym, sec); \
- static const char __kstrtab_ns_##sym[] \
+ static const char __kstrtabns_##sym[] \
__attribute__((section("__ksymtab_strings"), used, aligned(1))) \
= #ns; \
__KSYMTAB_ENTRY_NS(sym, sec, ns)
diff --git a/include/linux/leds.h b/include/linux/leds.h
index b8df71193329..efb309dba914 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -247,7 +247,7 @@ extern void led_set_brightness(struct led_classdev *led_cdev,
/**
* led_set_brightness_sync - set LED brightness synchronously
* @led_cdev: the LED to set
- * @brightness: the brightness to set it to
+ * @value: the brightness to set it to
*
* Set an LED's brightness immediately. This function will block
* the caller for the time required for accessing device registers,
@@ -301,8 +301,7 @@ extern void led_sysfs_enable(struct led_classdev *led_cdev);
/**
* led_compose_name - compose LED class device name
* @dev: LED controller device object
- * @child: child fwnode_handle describing a LED or a group of synchronized LEDs;
- * it must be provided only for fwnode based LEDs
+ * @init_data: the LED class device initialization data
* @led_classdev_name: composed LED class device name
*
* Create LED class device name basing on the provided init_data argument.
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9b60863429cc..ae703ea3ef48 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -356,6 +356,19 @@ static inline bool mem_cgroup_disabled(void)
return !cgroup_subsys_enabled(memory_cgrp_subsys);
}
+static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg,
+ bool in_low_reclaim)
+{
+ if (mem_cgroup_disabled())
+ return 0;
+
+ if (in_low_reclaim)
+ return READ_ONCE(memcg->memory.emin);
+
+ return max(READ_ONCE(memcg->memory.emin),
+ READ_ONCE(memcg->memory.elow));
+}
+
enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
struct mem_cgroup *memcg);
@@ -537,6 +550,8 @@ void mem_cgroup_handle_over_high(void);
unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg);
+unsigned long mem_cgroup_size(struct mem_cgroup *memcg);
+
void mem_cgroup_print_oom_context(struct mem_cgroup *memcg,
struct task_struct *p);
@@ -829,6 +844,12 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
{
}
+static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg,
+ bool in_low_reclaim)
+{
+ return 0;
+}
+
static inline enum mem_cgroup_protection mem_cgroup_protected(
struct mem_cgroup *root, struct mem_cgroup *memcg)
{
@@ -968,6 +989,11 @@ static inline unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg)
return 0;
}
+static inline unsigned long mem_cgroup_size(struct mem_cgroup *memcg)
+{
+ return 0;
+}
+
static inline void
mem_cgroup_print_oom_context(struct mem_cgroup *memcg, struct task_struct *p)
{
@@ -1264,6 +1290,9 @@ void mem_cgroup_track_foreign_dirty_slowpath(struct page *page,
static inline void mem_cgroup_track_foreign_dirty(struct page *page,
struct bdi_writeback *wb)
{
+ if (mem_cgroup_disabled())
+ return;
+
if (unlikely(&page->mem_cgroup->css != wb->memcg_css))
mem_cgroup_track_foreign_dirty_slowpath(page, wb);
}
diff --git a/include/linux/slab.h b/include/linux/slab.h
index ab2b98ad76e1..4d2a2fa55ed5 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -493,6 +493,10 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
* kmalloc is the normal method of allocating memory
* for objects smaller than page size in the kernel.
*
+ * The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN
+ * bytes. For @size of power of two bytes, the alignment is also guaranteed
+ * to be at least to the size.
+ *
* The @flags argument may be one of the GFP flags defined at
* include/linux/gfp.h and described at
* :ref:`Documentation/core-api/mm-api.rst <mm-api-gfp-flags>`
diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index 7638dbe7bc50..a940de03808d 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -61,6 +61,7 @@ struct sock_xprt {
struct mutex recv_mutex;
struct sockaddr_storage srcaddr;
unsigned short srcport;
+ int xprt_err;
/*
* UDP socket buffer size parameters
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index e47d0522a1f4..d4ee6e942562 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -355,8 +355,10 @@ extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count);
#ifndef user_access_begin
#define user_access_begin(ptr,len) access_ok(ptr, len)
#define user_access_end() do { } while (0)
-#define unsafe_get_user(x, ptr, err) do { if (unlikely(__get_user(x, ptr))) goto err; } while (0)
-#define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0)
+#define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0)
+#define unsafe_get_user(x,p,e) unsafe_op_wrap(__get_user(x,p),e)
+#define unsafe_put_user(x,p,e) unsafe_op_wrap(__put_user(x,p),e)
+#define unsafe_copy_to_user(d,s,l,e) unsafe_op_wrap(__copy_to_user(d,s,l),e)
static inline unsigned long user_access_save(void) { return 0UL; }
static inline void user_access_restore(unsigned long flags) { }
#endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 1f6c45f6a734..bcdf53125210 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2925,7 +2925,7 @@ int sysctl_max_threads(struct ctl_table *table, int write,
struct ctl_table t;
int ret;
int threads = max_threads;
- int min = MIN_THREADS;
+ int min = 1;
int max = MAX_THREADS;
t = *table;
@@ -2937,7 +2937,7 @@ int sysctl_max_threads(struct ctl_table *table, int write,
if (ret || !write)
return ret;
- set_max_threads(threads);
+ max_threads = threads;
return 0;
}
diff --git a/kernel/freezer.c b/kernel/freezer.c
index c0738424bb43..dc520f01f99d 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -22,12 +22,6 @@ EXPORT_SYMBOL(system_freezing_cnt);
bool pm_freezing;
bool pm_nosig_freezing;
-/*
- * Temporary export for the deadlock workaround in ata_scsi_hotplug().
- * Remove once the hack becomes unnecessary.
- */
-EXPORT_SYMBOL_GPL(pm_freezing);
-
/* protects freezing and frozen transitions */
static DEFINE_SPINLOCK(freezer_lock);
diff --git a/kernel/panic.c b/kernel/panic.c
index 47e8ebccc22b..f470a038b05b 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -180,6 +180,7 @@ void panic(const char *fmt, ...)
* after setting panic_cpu) from invoking panic() again.
*/
local_irq_disable();
+ preempt_disable_notrace();
/*
* It's possible to come here directly from a panic-assertion and
diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig
index cc00364bd2c2..9fe698ff62ec 100644
--- a/lib/vdso/Kconfig
+++ b/lib/vdso/Kconfig
@@ -24,13 +24,4 @@ config GENERIC_COMPAT_VDSO
help
This config option enables the compat VDSO layer.
-config CROSS_COMPILE_COMPAT_VDSO
- string "32 bit Toolchain prefix for compat vDSO"
- default ""
- depends on GENERIC_COMPAT_VDSO
- help
- Defines the cross-compiler prefix for compiling compat vDSO.
- If a 64 bit compiler (i.e. x86_64) can compile the VDSO for
- 32 bit, it does not need to define this parameter.
-
endif
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index d9daa3e422d0..c360f6a6c844 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -239,8 +239,8 @@ static int __init default_bdi_init(void)
{
int err;
- bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE |
- WQ_UNBOUND | WQ_SYSFS, 0);
+ bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_UNBOUND |
+ WQ_SYSFS, 0);
if (!bdi_wq)
return -ENOMEM;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c313c49074ca..bdac56009a38 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1567,6 +1567,11 @@ unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg)
return max;
}
+unsigned long mem_cgroup_size(struct mem_cgroup *memcg)
+{
+ return page_counter_read(&memcg->memory);
+}
+
static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
int order)
{
diff --git a/mm/memremap.c b/mm/memremap.c
index 32c79b51af86..68204912cc0a 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -13,8 +13,6 @@
#include <linux/xarray.h>
static DEFINE_XARRAY(pgmap_array);
-#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
-#define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
#ifdef CONFIG_DEV_PAGEMAP_OPS
DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 15c2050c629b..c0b2e0306720 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1175,11 +1175,17 @@ static __always_inline bool free_pages_prepare(struct page *page,
debug_check_no_obj_freed(page_address(page),
PAGE_SIZE << order);
}
- arch_free_page(page, order);
if (want_init_on_free())
kernel_init_free_pages(page, 1 << order);
kernel_poison_pages(page, 1 << order, 0);
+ /*
+ * arch_free_page() can make the page's contents inaccessible. s390
+ * does this. So nothing which can access the page's contents should
+ * happen after this.
+ */
+ arch_free_page(page, order);
+
if (debug_pagealloc_enabled())
kernel_map_pages(page, 1 << order, 0);
diff --git a/mm/shmem.c b/mm/shmem.c
index cd570cc79c76..220be9fa2c41 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3482,6 +3482,12 @@ static int shmem_parse_options(struct fs_context *fc, void *data)
{
char *options = data;
+ if (options) {
+ int err = security_sb_eat_lsm_opts(options, &fc->security);
+ if (err)
+ return err;
+ }
+
while (options != NULL) {
char *this_char = options;
for (;;) {
diff --git a/mm/shuffle.c b/mm/shuffle.c
index 3ce12481b1dc..b3fe97fd6654 100644
--- a/mm/shuffle.c
+++ b/mm/shuffle.c
@@ -33,7 +33,7 @@ __meminit void page_alloc_shuffle(enum mm_shuffle_ctl ctl)
}
static bool shuffle_param;
-extern int shuffle_show(char *buffer, const struct kernel_param *kp)
+static int shuffle_show(char *buffer, const struct kernel_param *kp)
{
return sprintf(buffer, "%c\n", test_bit(SHUFFLE_ENABLE, &shuffle_state)
? 'Y' : 'N');
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 6491c3a41805..c29f03adca91 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1030,10 +1030,19 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name,
unsigned int useroffset, unsigned int usersize)
{
int err;
+ unsigned int align = ARCH_KMALLOC_MINALIGN;
s->name = name;
s->size = s->object_size = size;
- s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
+
+ /*
+ * For power of two sizes, guarantee natural alignment for kmalloc
+ * caches, regardless of SL*B debugging options.
+ */
+ if (is_power_of_2(size))
+ align = max(align, size);
+ s->align = calculate_alignment(flags, align, size);
+
s->useroffset = useroffset;
s->usersize = usersize;
@@ -1287,12 +1296,16 @@ void __init create_kmalloc_caches(slab_flags_t flags)
*/
void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
{
- void *ret;
+ void *ret = NULL;
struct page *page;
flags |= __GFP_COMP;
page = alloc_pages(flags, order);
- ret = page ? page_address(page) : NULL;
+ if (likely(page)) {
+ ret = page_address(page);
+ mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
+ 1 << order);
+ }
ret = kasan_kmalloc_large(ret, size, flags);
/* As ret might get tagged, call kmemleak hook after KASAN. */
kmemleak_alloc(ret, size, 1, flags);
diff --git a/mm/slob.c b/mm/slob.c
index cf377beab962..fa53e9f73893 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -190,7 +190,7 @@ static int slob_last(slob_t *s)
static void *slob_new_pages(gfp_t gfp, int order, int node)
{
- void *page;
+ struct page *page;
#ifdef CONFIG_NUMA
if (node != NUMA_NO_NODE)
@@ -202,14 +202,21 @@ static void *slob_new_pages(gfp_t gfp, int order, int node)
if (!page)
return NULL;
+ mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
+ 1 << order);
return page_address(page);
}
static void slob_free_pages(void *b, int order)
{
+ struct page *sp = virt_to_page(b);
+
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += 1 << order;
- free_pages((unsigned long)b, order);
+
+ mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE,
+ -(1 << order));
+ __free_pages(sp, order);
}
/*
@@ -217,6 +224,7 @@ static void slob_free_pages(void *b, int order)
* @sp: Page to look in.
* @size: Size of the allocation.
* @align: Allocation alignment.
+ * @align_offset: Offset in the allocated block that will be aligned.
* @page_removed_from_list: Return parameter.
*
* Tries to find a chunk of memory at least @size bytes big within @page.
@@ -227,7 +235,7 @@ static void slob_free_pages(void *b, int order)
* true (set to false otherwise).
*/
static void *slob_page_alloc(struct page *sp, size_t size, int align,
- bool *page_removed_from_list)
+ int align_offset, bool *page_removed_from_list)
{
slob_t *prev, *cur, *aligned = NULL;
int delta = 0, units = SLOB_UNITS(size);
@@ -236,8 +244,17 @@ static void *slob_page_alloc(struct page *sp, size_t size, int align,
for (prev = NULL, cur = sp->freelist; ; prev = cur, cur = slob_next(cur)) {
slobidx_t avail = slob_units(cur);
+ /*
+ * 'aligned' will hold the address of the slob block so that the
+ * address 'aligned'+'align_offset' is aligned according to the
+ * 'align' parameter. This is for kmalloc() which prepends the
+ * allocated block with its size, so that the block itself is
+ * aligned when needed.
+ */
if (align) {
- aligned = (slob_t *)ALIGN((unsigned long)cur, align);
+ aligned = (slob_t *)
+ (ALIGN((unsigned long)cur + align_offset, align)
+ - align_offset);
delta = aligned - cur;
}
if (avail >= units + delta) { /* room enough? */
@@ -281,7 +298,8 @@ static void *slob_page_alloc(struct page *sp, size_t size, int align,
/*
* slob_alloc: entry point into the slob allocator.
*/
-static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
+static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
+ int align_offset)
{
struct page *sp;
struct list_head *slob_list;
@@ -312,7 +330,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
if (sp->units < SLOB_UNITS(size))
continue;
- b = slob_page_alloc(sp, size, align, &page_removed_from_list);
+ b = slob_page_alloc(sp, size, align, align_offset, &page_removed_from_list);
if (!b)
continue;
@@ -349,7 +367,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
INIT_LIST_HEAD(&sp->slab_list);
set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
set_slob_page_free(sp, slob_list);
- b = slob_page_alloc(sp, size, align, &_unused);
+ b = slob_page_alloc(sp, size, align, align_offset, &_unused);
BUG_ON(!b);
spin_unlock_irqrestore(&slob_lock, flags);
}
@@ -451,7 +469,7 @@ static __always_inline void *
__do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
{
unsigned int *m;
- int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
+ int minalign = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
void *ret;
gfp &= gfp_allowed_mask;
@@ -459,19 +477,28 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
fs_reclaim_acquire(gfp);
fs_reclaim_release(gfp);
- if (size < PAGE_SIZE - align) {
+ if (size < PAGE_SIZE - minalign) {
+ int align = minalign;
+
+ /*
+ * For power of two sizes, guarantee natural alignment for
+ * kmalloc()'d objects.
+ */
+ if (is_power_of_2(size))
+ align = max(minalign, (int) size);
+
if (!size)
return ZERO_SIZE_PTR;
- m = slob_alloc(size + align, gfp, align, node);
+ m = slob_alloc(size + minalign, gfp, align, node, minalign);
if (!m)
return NULL;
*m = size;
- ret = (void *)m + align;
+ ret = (void *)m + minalign;
trace_kmalloc_node(caller, ret,
- size, size + align, gfp, node);
+ size, size + minalign, gfp, node);
} else {
unsigned int order = get_order(size);
@@ -521,8 +548,13 @@ void kfree(const void *block)
int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
unsigned int *m = (unsigned int *)(block - align);
slob_free(m, *m + align);
- } else
- __free_pages(sp, compound_order(sp));
+ } else {
+ unsigned int order = compound_order(sp);
+ mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE,
+ -(1 << order));
+ __free_pages(sp, order);
+
+ }
}
EXPORT_SYMBOL(kfree);
@@ -567,7 +599,7 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
fs_reclaim_release(flags);
if (c->size < PAGE_SIZE) {
- b = slob_alloc(c->size, flags, c->align, node);
+ b = slob_alloc(c->size, flags, c->align, node, 0);
trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
SLOB_UNITS(c->size) * SLOB_UNIT,
flags, node);
diff --git a/mm/slub.c b/mm/slub.c
index 42c1b3af3c98..3d63ae320d31 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3821,11 +3821,15 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
{
struct page *page;
void *ptr = NULL;
+ unsigned int order = get_order(size);
flags |= __GFP_COMP;
- page = alloc_pages_node(node, flags, get_order(size));
- if (page)
+ page = alloc_pages_node(node, flags, order);
+ if (page) {
ptr = page_address(page);
+ mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
+ 1 << order);
+ }
return kmalloc_large_node_hook(ptr, size, flags);
}
@@ -3951,9 +3955,13 @@ void kfree(const void *x)
page = virt_to_head_page(x);
if (unlikely(!PageSlab(page))) {
+ unsigned int order = compound_order(page);
+
BUG_ON(!PageCompound(page));
kfree_hook(object);
- __free_pages(page, compound_order(page));
+ mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
+ -(1 << order));
+ __free_pages(page, order);
return;
}
slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
diff --git a/mm/sparse.c b/mm/sparse.c
index bf32de9e666b..f6891c1992b1 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -219,7 +219,7 @@ static inline unsigned long first_present_section_nr(void)
return next_present_section_nr(-1);
}
-void subsection_mask_set(unsigned long *map, unsigned long pfn,
+static void subsection_mask_set(unsigned long *map, unsigned long pfn,
unsigned long nr_pages)
{
int idx = subsection_map_index(pfn);
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index f3b50811497a..4bac22fe1aa2 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -355,6 +355,9 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
* "hierarchy" or "local").
*
* To be used as memcg event method.
+ *
+ * Return: 0 on success, -ENOMEM on memory failure or -EINVAL if @args could
+ * not be parsed.
*/
int vmpressure_register_event(struct mem_cgroup *memcg,
struct eventfd_ctx *eventfd, const char *args)
@@ -362,7 +365,7 @@ int vmpressure_register_event(struct mem_cgroup *memcg,
struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
struct vmpressure_event *ev;
enum vmpressure_modes mode = VMPRESSURE_NO_PASSTHROUGH;
- enum vmpressure_levels level = -1;
+ enum vmpressure_levels level;
char *spec, *spec_orig;
char *token;
int ret = 0;
@@ -375,20 +378,18 @@ int vmpressure_register_event(struct mem_cgroup *memcg,
/* Find required level */
token = strsep(&spec, ",");
- level = match_string(vmpressure_str_levels, VMPRESSURE_NUM_LEVELS, token);
- if (level < 0) {
- ret = level;
+ ret = match_string(vmpressure_str_levels, VMPRESSURE_NUM_LEVELS, token);
+ if (ret < 0)
goto out;
- }
+ level = ret;
/* Find optional mode */
token = strsep(&spec, ",");
if (token) {
- mode = match_string(vmpressure_str_modes, VMPRESSURE_NUM_MODES, token);
- if (mode < 0) {
- ret = mode;
+ ret = match_string(vmpressure_str_modes, VMPRESSURE_NUM_MODES, token);
+ if (ret < 0)
goto out;
- }
+ mode = ret;
}
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
@@ -404,6 +405,7 @@ int vmpressure_register_event(struct mem_cgroup *memcg,
mutex_lock(&vmpr->events_lock);
list_add(&ev->node, &vmpr->events);
mutex_unlock(&vmpr->events_lock);
+ ret = 0;
out:
kfree(spec_orig);
return ret;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e5d52d6a24af..c6659bb758a4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2459,17 +2459,70 @@ out:
*lru_pages = 0;
for_each_evictable_lru(lru) {
int file = is_file_lru(lru);
- unsigned long size;
+ unsigned long lruvec_size;
unsigned long scan;
+ unsigned long protection;
+
+ lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
+ protection = mem_cgroup_protection(memcg,
+ sc->memcg_low_reclaim);
+
+ if (protection) {
+ /*
+ * Scale a cgroup's reclaim pressure by proportioning
+ * its current usage to its memory.low or memory.min
+ * setting.
+ *
+ * This is important, as otherwise scanning aggression
+ * becomes extremely binary -- from nothing as we
+ * approach the memory protection threshold, to totally
+ * nominal as we exceed it. This results in requiring
+ * setting extremely liberal protection thresholds. It
+ * also means we simply get no protection at all if we
+ * set it too low, which is not ideal.
+ *
+ * If there is any protection in place, we reduce scan
+ * pressure by how much of the total memory used is
+ * within protection thresholds.
+ *
+ * There is one special case: in the first reclaim pass,
+ * we skip over all groups that are within their low
+ * protection. If that fails to reclaim enough pages to
+ * satisfy the reclaim goal, we come back and override
+ * the best-effort low protection. However, we still
+ * ideally want to honor how well-behaved groups are in
+ * that case instead of simply punishing them all
+ * equally. As such, we reclaim them based on how much
+ * memory they are using, reducing the scan pressure
+ * again by how much of the total memory used is under
+ * hard protection.
+ */
+ unsigned long cgroup_size = mem_cgroup_size(memcg);
+
+ /* Avoid TOCTOU with earlier protection check */
+ cgroup_size = max(cgroup_size, protection);
+
+ scan = lruvec_size - lruvec_size * protection /
+ cgroup_size;
+
+ /*
+ * Minimally target SWAP_CLUSTER_MAX pages to keep
+ * reclaim moving forwards, avoiding decremeting
+ * sc->priority further than desirable.
+ */
+ scan = max(scan, SWAP_CLUSTER_MAX);
+ } else {
+ scan = lruvec_size;
+ }
+
+ scan >>= sc->priority;
- size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
- scan = size >> sc->priority;
/*
* If the cgroup's already been deleted, make sure to
* scrape out the remaining cache.
*/
if (!scan && !mem_cgroup_online(memcg))
- scan = min(size, SWAP_CLUSTER_MAX);
+ scan = min(lruvec_size, SWAP_CLUSTER_MAX);
switch (scan_balance) {
case SCAN_EQUAL:
@@ -2489,7 +2542,7 @@ out:
case SCAN_ANON:
/* Scan one type exclusively */
if ((scan_balance == SCAN_FILE) != file) {
- size = 0;
+ lruvec_size = 0;
scan = 0;
}
break;
@@ -2498,7 +2551,7 @@ out:
BUG();
}
- *lru_pages += size;
+ *lru_pages += lruvec_size;
nr[lru] = scan;
}
}
@@ -2742,6 +2795,13 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
memcg_memory_event(memcg, MEMCG_LOW);
break;
case MEMCG_PROT_NONE:
+ /*
+ * All protection thresholds breached. We may
+ * still choose to vary the scan pressure
+ * applied based on by how much the cgroup in
+ * question has exceeded its protection
+ * thresholds (see get_scan_count).
+ */
break;
}
diff --git a/mm/z3fold.c b/mm/z3fold.c
index 05bdf90646e7..6d3d3f698ebb 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -998,9 +998,11 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
struct z3fold_header *zhdr;
struct page *page;
enum buddy bud;
+ bool page_claimed;
zhdr = handle_to_z3fold_header(handle);
page = virt_to_page(zhdr);
+ page_claimed = test_and_set_bit(PAGE_CLAIMED, &page->private);
if (test_bit(PAGE_HEADLESS, &page->private)) {
/* if a headless page is under reclaim, just leave.
@@ -1008,7 +1010,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
* has not been set before, we release this page
* immediately so we don't care about its value any more.
*/
- if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) {
+ if (!page_claimed) {
spin_lock(&pool->lock);
list_del(&page->lru);
spin_unlock(&pool->lock);
@@ -1044,13 +1046,15 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
atomic64_dec(&pool->pages_nr);
return;
}
- if (test_bit(PAGE_CLAIMED, &page->private)) {
+ if (page_claimed) {
+ /* the page has not been claimed by us */
z3fold_page_unlock(zhdr);
return;
}
if (unlikely(PageIsolated(page)) ||
test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
z3fold_page_unlock(zhdr);
+ clear_bit(PAGE_CLAIMED, &page->private);
return;
}
if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) {
@@ -1060,10 +1064,12 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
zhdr->cpu = -1;
kref_get(&zhdr->refcount);
do_compact_page(zhdr, true);
+ clear_bit(PAGE_CLAIMED, &page->private);
return;
}
kref_get(&zhdr->refcount);
queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work);
+ clear_bit(PAGE_CLAIMED, &page->private);
z3fold_page_unlock(zhdr);
}
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index e41ed2e0ae7d..48d63956a68c 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2155,7 +2155,7 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
case SCTP_PARAM_SET_PRIMARY:
if (ep->asconf_enable)
break;
- goto fallthrough;
+ goto unhandled;
case SCTP_PARAM_HOST_NAME_ADDRESS:
/* Tell the peer, we won't support this param. */
@@ -2166,11 +2166,11 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
case SCTP_PARAM_FWD_TSN_SUPPORT:
if (ep->prsctp_enable)
break;
- goto fallthrough;
+ goto unhandled;
case SCTP_PARAM_RANDOM:
if (!ep->auth_enable)
- goto fallthrough;
+ goto unhandled;
/* SCTP-AUTH: Secion 6.1
* If the random number is not 32 byte long the association
@@ -2187,7 +2187,7 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
case SCTP_PARAM_CHUNKS:
if (!ep->auth_enable)
- goto fallthrough;
+ goto unhandled;
/* SCTP-AUTH: Section 3.2
* The CHUNKS parameter MUST be included once in the INIT or
@@ -2203,7 +2203,7 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
case SCTP_PARAM_HMAC_ALGO:
if (!ep->auth_enable)
- goto fallthrough;
+ goto unhandled;
hmacs = (struct sctp_hmac_algo_param *)param.p;
n_elt = (ntohs(param.p->length) -
@@ -2226,7 +2226,7 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
retval = SCTP_IERROR_ABORT;
}
break;
-fallthrough:
+unhandled:
default:
pr_debug("%s: unrecognized param:%d for chunk:%d\n",
__func__, ntohs(param.p->type), cid);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9ac88722fa83..70e52f567b2a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1249,19 +1249,21 @@ static void xs_error_report(struct sock *sk)
{
struct sock_xprt *transport;
struct rpc_xprt *xprt;
- int err;
read_lock_bh(&sk->sk_callback_lock);
if (!(xprt = xprt_from_sock(sk)))
goto out;
transport = container_of(xprt, struct sock_xprt, xprt);
- err = -sk->sk_err;
- if (err == 0)
+ transport->xprt_err = -sk->sk_err;
+ if (transport->xprt_err == 0)
goto out;
dprintk("RPC: xs_error_report client %p, error=%d...\n",
- xprt, -err);
- trace_rpc_socket_error(xprt, sk->sk_socket, err);
+ xprt, -transport->xprt_err);
+ trace_rpc_socket_error(xprt, sk->sk_socket, transport->xprt_err);
+
+ /* barrier ensures xprt_err is set before XPRT_SOCK_WAKE_ERROR */
+ smp_mb__before_atomic();
xs_run_error_worker(transport, XPRT_SOCK_WAKE_ERROR);
out:
read_unlock_bh(&sk->sk_callback_lock);
@@ -2476,7 +2478,6 @@ static void xs_wake_write(struct sock_xprt *transport)
static void xs_wake_error(struct sock_xprt *transport)
{
int sockerr;
- int sockerr_len = sizeof(sockerr);
if (!test_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state))
return;
@@ -2485,9 +2486,7 @@ static void xs_wake_error(struct sock_xprt *transport)
goto out;
if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state))
goto out;
- if (kernel_getsockopt(transport->sock, SOL_SOCKET, SO_ERROR,
- (char *)&sockerr, &sockerr_len) != 0)
- goto out;
+ sockerr = xchg(&transport->xprt_err, 0);
if (sockerr < 0)
xprt_wake_pending_tasks(&transport->xprt, sockerr);
out:
diff --git a/scripts/coccinelle/misc/add_namespace.cocci b/scripts/coccinelle/misc/add_namespace.cocci
index c832bb6445a8..99e93a6c2e24 100644
--- a/scripts/coccinelle/misc/add_namespace.cocci
+++ b/scripts/coccinelle/misc/add_namespace.cocci
@@ -6,6 +6,8 @@
/// add a missing namespace tag to a module source file.
///
+virtual report
+
@has_ns_import@
declarer name MODULE_IMPORT_NS;
identifier virtual.ns;
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 442d5e2ad688..936d3ad23c83 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -166,7 +166,7 @@ struct symbol {
struct module *module;
unsigned int crc;
int crc_valid;
- const char *namespace;
+ char *namespace;
unsigned int weak:1;
unsigned int vmlinux:1; /* 1 if symbol is defined in vmlinux */
unsigned int kernel:1; /* 1 if symbol is from kernel
@@ -348,20 +348,18 @@ static enum export export_from_sec(struct elf_info *elf, unsigned int sec)
return export_unknown;
}
-static const char *sym_extract_namespace(const char **symname)
+static char *sym_extract_namespace(const char **symname)
{
- size_t n;
- char *dupsymname;
+ char *namespace = NULL;
+ char *ns_separator;
- n = strcspn(*symname, ".");
- if (n < strlen(*symname) - 1) {
- dupsymname = NOFAIL(strdup(*symname));
- dupsymname[n] = '\0';
- *symname = dupsymname;
- return dupsymname + n + 1;
+ ns_separator = strchr(*symname, '.');
+ if (ns_separator) {
+ namespace = NOFAIL(strndup(*symname, ns_separator - *symname));
+ *symname = ns_separator + 1;
}
- return NULL;
+ return namespace;
}
/**
@@ -375,7 +373,6 @@ static struct symbol *sym_add_exported(const char *name, const char *namespace,
if (!s) {
s = new_symbol(name, mod, export);
- s->namespace = namespace;
} else {
if (!s->preloaded) {
warn("%s: '%s' exported twice. Previous export was in %s%s\n",
@@ -386,6 +383,8 @@ static struct symbol *sym_add_exported(const char *name, const char *namespace,
s->module = mod;
}
}
+ free(s->namespace);
+ s->namespace = namespace ? strdup(namespace) : NULL;
s->preloaded = 0;
s->vmlinux = is_vmlinux(mod->name);
s->kernel = 0;
@@ -672,7 +671,8 @@ static void handle_modversions(struct module *mod, struct elf_info *info,
unsigned int crc;
enum export export;
bool is_crc = false;
- const char *name, *namespace;
+ const char *name;
+ char *namespace;
if ((!is_vmlinux(mod->name) || mod->is_dot_o) &&
strstarts(symname, "__ksymtab"))
@@ -747,6 +747,7 @@ static void handle_modversions(struct module *mod, struct elf_info *info,
name = symname + strlen("__ksymtab_");
namespace = sym_extract_namespace(&name);
sym_add_exported(name, namespace, mod, export);
+ free(namespace);
}
if (strcmp(symname, "init_module") == 0)
mod->has_init = 1;
@@ -2195,7 +2196,7 @@ static int check_exports(struct module *mod)
else
basename = mod->name;
- if (exp->namespace) {
+ if (exp->namespace && exp->namespace[0]) {
add_namespace(&mod->required_namespaces,
exp->namespace);
diff --git a/scripts/nsdeps b/scripts/nsdeps
index ac2b6031dd13..3754dac13b31 100644
--- a/scripts/nsdeps
+++ b/scripts/nsdeps
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# Linux kernel symbol namespace import generator
#
@@ -41,7 +41,7 @@ generate_deps() {
for source_file in $mod_source_files; do
sed '/MODULE_IMPORT_NS/Q' $source_file > ${source_file}.tmp
offset=$(wc -l ${source_file}.tmp | awk '{print $1;}')
- cat $source_file | grep MODULE_IMPORT_NS | sort -u >> ${source_file}.tmp
+ cat $source_file | grep MODULE_IMPORT_NS | LANG=C sort -u >> ${source_file}.tmp
tail -n +$((offset +1)) ${source_file} | grep -v MODULE_IMPORT_NS >> ${source_file}.tmp
if ! diff -q ${source_file} ${source_file}.tmp; then
mv ${source_file}.tmp ${source_file}
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 3a29e7c24ba9..a5813c7629c1 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -1946,7 +1946,14 @@ static int convert_context(struct context *oldc, struct context *newc, void *p)
rc = string_to_context_struct(args->newp, NULL, s,
newc, SECSID_NULL);
if (rc == -EINVAL) {
- /* Retain string representation for later mapping. */
+ /*
+ * Retain string representation for later mapping.
+ *
+ * IMPORTANT: We need to copy the contents of oldc->str
+ * back into s again because string_to_context_struct()
+ * may have garbled it.
+ */
+ memcpy(s, oldc->str, oldc->len);
context_init(newc);
newc->str = s;
newc->len = oldc->len;
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index c3feccb99ff5..4cdbae6f4e61 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -63,6 +63,13 @@ TARGETS += zram
TARGETS_HOTPLUG = cpu-hotplug
TARGETS_HOTPLUG += memory-hotplug
+# User can optionally provide a TARGETS skiplist.
+SKIP_TARGETS ?=
+ifneq ($(SKIP_TARGETS),)
+ TMP := $(filter-out $(SKIP_TARGETS), $(TARGETS))
+ override TARGETS := $(TMP)
+endif
+
# Clear LDFLAGS and MAKEFLAGS if called from main
# Makefile to avoid test build failures when test
# Makefile doesn't have explicit build rules.
@@ -171,9 +178,12 @@ run_pstore_crash:
# 1. output_dir=kernel_src
# 2. a separate output directory is specified using O= KBUILD_OUTPUT
# 3. a separate output directory is specified using KBUILD_OUTPUT
+# Avoid conflict with INSTALL_PATH set by the main Makefile
#
-INSTALL_PATH ?= $(BUILD)/install
-INSTALL_PATH := $(abspath $(INSTALL_PATH))
+KSFT_INSTALL_PATH ?= $(BUILD)/kselftest_install
+KSFT_INSTALL_PATH := $(abspath $(KSFT_INSTALL_PATH))
+# Avoid changing the rest of the logic here and lib.mk.
+INSTALL_PATH := $(KSFT_INSTALL_PATH)
ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh
install: all
@@ -198,11 +208,16 @@ ifdef INSTALL_PATH
echo " cat /dev/null > \$$logfile" >> $(ALL_SCRIPT)
echo "fi" >> $(ALL_SCRIPT)
+ @# While building run_kselftest.sh skip also non-existent TARGET dirs:
+ @# they could be the result of a build failure and should NOT be
+ @# included in the generated runlist.
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
+ [ ! -d $$INSTALL_PATH/$$TARGET ] && echo "Skipping non-existent dir: $$TARGET" && continue; \
echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \
echo "cd $$TARGET" >> $(ALL_SCRIPT); \
echo -n "run_many" >> $(ALL_SCRIPT); \
+ echo -n "Emit Tests for $$TARGET\n"; \
$(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
echo "" >> $(ALL_SCRIPT); \
echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh
index 00c9020bdda8..84de7bc74f2c 100644
--- a/tools/testing/selftests/kselftest/runner.sh
+++ b/tools/testing/selftests/kselftest/runner.sh
@@ -3,9 +3,14 @@
#
# Runs a set of tests in a given subdirectory.
export skip_rc=4
+export timeout_rc=124
export logfile=/dev/stdout
export per_test_logging=
+# Defaults for "settings" file fields:
+# "timeout" how many seconds to let each test run before failing.
+export kselftest_default_timeout=45
+
# There isn't a shell-agnostic way to find the path of a sourced file,
# so we must rely on BASE_DIR being set to find other tools.
if [ -z "$BASE_DIR" ]; then
@@ -24,6 +29,16 @@ tap_prefix()
fi
}
+tap_timeout()
+{
+ # Make sure tests will time out if utility is available.
+ if [ -x /usr/bin/timeout ] ; then
+ /usr/bin/timeout "$kselftest_timeout" "$1"
+ else
+ "$1"
+ fi
+}
+
run_one()
{
DIR="$1"
@@ -32,6 +47,18 @@ run_one()
BASENAME_TEST=$(basename $TEST)
+ # Reset any "settings"-file variables.
+ export kselftest_timeout="$kselftest_default_timeout"
+ # Load per-test-directory kselftest "settings" file.
+ settings="$BASE_DIR/$DIR/settings"
+ if [ -r "$settings" ] ; then
+ while read line ; do
+ field=$(echo "$line" | cut -d= -f1)
+ value=$(echo "$line" | cut -d= -f2-)
+ eval "kselftest_$field"="$value"
+ done < "$settings"
+ fi
+
TEST_HDR_MSG="selftests: $DIR: $BASENAME_TEST"
echo "# $TEST_HDR_MSG"
if [ ! -x "$TEST" ]; then
@@ -44,14 +71,17 @@ run_one()
echo "not ok $test_num $TEST_HDR_MSG"
else
cd `dirname $TEST` > /dev/null
- (((((./$BASENAME_TEST 2>&1; echo $? >&3) |
+ ((((( tap_timeout ./$BASENAME_TEST 2>&1; echo $? >&3) |
tap_prefix >&4) 3>&1) |
(read xs; exit $xs)) 4>>"$logfile" &&
echo "ok $test_num $TEST_HDR_MSG") ||
- (if [ $? -eq $skip_rc ]; then \
+ (rc=$?; \
+ if [ $rc -eq $skip_rc ]; then \
echo "not ok $test_num $TEST_HDR_MSG # SKIP"
+ elif [ $rc -eq $timeout_rc ]; then \
+ echo "not ok $test_num $TEST_HDR_MSG # TIMEOUT"
else
- echo "not ok $test_num $TEST_HDR_MSG"
+ echo "not ok $test_num $TEST_HDR_MSG # exit=$rc"
fi)
cd - >/dev/null
fi
diff --git a/tools/testing/selftests/kselftest_install.sh b/tools/testing/selftests/kselftest_install.sh
index ec304463883c..e2e1911d62d5 100755
--- a/tools/testing/selftests/kselftest_install.sh
+++ b/tools/testing/selftests/kselftest_install.sh
@@ -24,12 +24,12 @@ main()
echo "$0: Installing in specified location - $install_loc ..."
fi
- install_dir=$install_loc/kselftest
+ install_dir=$install_loc/kselftest_install
# Create install directory
mkdir -p $install_dir
# Build tests
- INSTALL_PATH=$install_dir make install
+ KSFT_INSTALL_PATH=$install_dir make install
}
main "$@"
diff --git a/tools/testing/selftests/rtc/settings b/tools/testing/selftests/rtc/settings
new file mode 100644
index 000000000000..ba4d85f74cd6
--- /dev/null
+++ b/tools/testing/selftests/rtc/settings
@@ -0,0 +1 @@
+timeout=90
diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c
index afff120c7be6..f45e510500c0 100644
--- a/tools/testing/selftests/watchdog/watchdog-test.c
+++ b/tools/testing/selftests/watchdog/watchdog-test.c
@@ -19,7 +19,7 @@
int fd;
const char v = 'V';
-static const char sopts[] = "bdehp:t:Tn:NLf:";
+static const char sopts[] = "bdehp:t:Tn:NLf:i";
static const struct option lopts[] = {
{"bootstatus", no_argument, NULL, 'b'},
{"disable", no_argument, NULL, 'd'},
@@ -32,6 +32,7 @@ static const struct option lopts[] = {
{"getpretimeout", no_argument, NULL, 'N'},
{"gettimeleft", no_argument, NULL, 'L'},
{"file", required_argument, NULL, 'f'},
+ {"info", no_argument, NULL, 'i'},
{NULL, no_argument, NULL, 0x0}
};
@@ -72,6 +73,7 @@ static void usage(char *progname)
printf("Usage: %s [options]\n", progname);
printf(" -f, --file\t\tOpen watchdog device file\n");
printf("\t\t\tDefault is /dev/watchdog\n");
+ printf(" -i, --info\t\tShow watchdog_info\n");
printf(" -b, --bootstatus\tGet last boot status (Watchdog/POR)\n");
printf(" -d, --disable\t\tTurn off the watchdog timer\n");
printf(" -e, --enable\t\tTurn on the watchdog timer\n");
@@ -97,6 +99,7 @@ int main(int argc, char *argv[])
int c;
int oneshot = 0;
char *file = "/dev/watchdog";
+ struct watchdog_info info;
setbuf(stdout, NULL);
@@ -118,6 +121,16 @@ int main(int argc, char *argv[])
exit(-1);
}
+ /*
+ * Validate that `file` is a watchdog device
+ */
+ ret = ioctl(fd, WDIOC_GETSUPPORT, &info);
+ if (ret) {
+ printf("WDIOC_GETSUPPORT error '%s'\n", strerror(errno));
+ close(fd);
+ exit(ret);
+ }
+
optind = 0;
while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) {
@@ -205,6 +218,18 @@ int main(int argc, char *argv[])
case 'f':
/* Handled above */
break;
+ case 'i':
+ /*
+ * watchdog_info was obtained as part of file open
+ * validation. So we just show it here.
+ */
+ oneshot = 1;
+ printf("watchdog_info:\n");
+ printf(" identity:\t\t%s\n", info.identity);
+ printf(" firmware_version:\t%u\n",
+ info.firmware_version);
+ printf(" options:\t\t%08x\n", info.options);
+ break;
default:
usage(argv[0]);

Privacy Policy