diff options
366 files changed, 4635 insertions, 1722 deletions
@@ -316,6 +316,7 @@ Hans Verkuil <hverkuil@kernel.org> <hverkuil-cisco@xs4all.nl> Hans Verkuil <hverkuil@kernel.org> <hansverk@cisco.com> Hao Ge <hao.ge@linux.dev> <gehao@kylinos.cn> Harry Yoo <harry.yoo@oracle.com> <42.hyeyoo@gmail.com> +Harry Yoo <harry@kernel.org> <harry.yoo@oracle.com> Heiko Carstens <hca@linux.ibm.com> <h.carstens@de.ibm.com> Heiko Carstens <hca@linux.ibm.com> <heiko.carstens@de.ibm.com> Heiko Stuebner <heiko@sntech.de> <heiko.stuebner@bqreaders.com> @@ -587,6 +588,7 @@ Morten Welinder <terra@gnome.org> Morten Welinder <welinder@anemone.rentec.com> Morten Welinder <welinder@darter.rentec.com> Morten Welinder <welinder@troll.com> +Muhammad Usama Anjum <usama.anjum@arm.com> <usama.anjum@collabora.com> Mukesh Ojha <quic_mojha@quicinc.com> <mojha@codeaurora.org> Muna Sinada <quic_msinada@quicinc.com> <msinada@codeaurora.org> Murali Nalajala <quic_mnalajal@quicinc.com> <mnalajal@codeaurora.org> diff --git a/Documentation/PCI/pcieaer-howto.rst b/Documentation/PCI/pcieaer-howto.rst index 3210c4792978..90fdfddd3ae5 100644 --- a/Documentation/PCI/pcieaer-howto.rst +++ b/Documentation/PCI/pcieaer-howto.rst @@ -85,6 +85,16 @@ In the example, 'Requester ID' means the ID of the device that sent the error message to the Root Port. Please refer to PCIe specs for other fields. +The 'TLP Header' is the prefix/header of the TLP that caused the error +in raw hex format. To decode the TLP Header into human-readable form +one may use tlp-tool: + +https://github.com/mmpg-x86/tlp-tool + +Example usage:: + + curl -L https://git.kernel.org/linus/2ca1c94ce0b6 | rtlp-tool --aer + AER Ratelimits -------------- diff --git a/Documentation/core-api/dma-attributes.rst b/Documentation/core-api/dma-attributes.rst index 1d7bfad73b1c..123c8468d58f 100644 --- a/Documentation/core-api/dma-attributes.rst +++ b/Documentation/core-api/dma-attributes.rst @@ -149,11 +149,33 @@ For architectures that require cache flushing for DMA coherence DMA_ATTR_MMIO will not perform any cache flushing. The address provided must never be mapped cacheable into the CPU. -DMA_ATTR_CPU_CACHE_CLEAN ------------------------- - -This attribute indicates the CPU will not dirty any cacheline overlapping this -DMA_FROM_DEVICE/DMA_BIDIRECTIONAL buffer while it is mapped. This allows -multiple small buffers to safely share a cacheline without risk of data -corruption, suppressing DMA debug warnings about overlapping mappings. -All mappings sharing a cacheline should have this attribute. +DMA_ATTR_DEBUGGING_IGNORE_CACHELINES +------------------------------------ + +This attribute indicates that CPU cache lines may overlap for buffers mapped +with DMA_FROM_DEVICE or DMA_BIDIRECTIONAL. + +Such overlap may occur when callers map multiple small buffers that reside +within the same cache line. In this case, callers must guarantee that the CPU +will not dirty these cache lines after the mappings are established. When this +condition is met, multiple buffers can safely share a cache line without risking +data corruption. + +All mappings that share a cache line must set this attribute to suppress DMA +debug warnings about overlapping mappings. + +DMA_ATTR_REQUIRE_COHERENT +------------------------- + +DMA mapping requests with the DMA_ATTR_REQUIRE_COHERENT fail on any +system where SWIOTLB or cache management is required. This should only +be used to support uAPI designs that require continuous HW DMA +coherence with userspace processes, for example RDMA and DRM. At a +minimum the memory being mapped must be userspace memory from +pin_user_pages() or similar. + +Drivers should consider using dma_mmap_pages() instead of this +interface when building their uAPIs, when possible. + +It must never be used in an in-kernel driver that only works with +kernel memory. diff --git a/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml b/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml index 56c755c22945..502907dd28b3 100644 --- a/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml +++ b/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml @@ -33,6 +33,7 @@ properties: - const: rockchip,rk3066-spdif - items: - enum: + - rockchip,rk3576-spdif - rockchip,rk3588-spdif - const: rockchip,rk3568-spdif diff --git a/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml b/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml index 4a7129d0b157..551edf39e766 100644 --- a/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml +++ b/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml @@ -164,7 +164,7 @@ allOf: properties: compatible: contains: - const: st,stm32mph7-sai + const: st,stm32h7-sai then: properties: clocks: diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst index af5a69f87da4..eb846518e6ac 100644 --- a/Documentation/filesystems/overlayfs.rst +++ b/Documentation/filesystems/overlayfs.rst @@ -783,6 +783,56 @@ controlled by the "uuid" mount option, which supports these values: mounted with "uuid=on". +Durability and copy up +---------------------- + +The fsync(2) system call ensures that the data and metadata of a file +are safely written to the backing storage, which is expected to +guarantee the existence of the information post system crash. + +Without an fsync(2) call, there is no guarantee that the observed +data after a system crash will be either the old or the new data, but +in practice, the observed data after crash is often the old or new data +or a mix of both. + +When an overlayfs file is modified for the first time, copy up will +create a copy of the lower file and its parent directories in the upper +layer. Since the Linux filesystem API does not enforce any particular +ordering on storing changes without explicit fsync(2) calls, in case +of a system crash, the upper file could end up with no data at all +(i.e. zeros), which would be an unusual outcome. To avoid this +experience, overlayfs calls fsync(2) on the upper file before completing +data copy up with rename(2) or link(2) to make the copy up "atomic". + +By default, overlayfs does not explicitly call fsync(2) on copied up +directories or on metadata-only copy up, so it provides no guarantee to +persist the user's modification unless the user calls fsync(2). +The fsync during copy up only guarantees that if a copy up is observed +after a crash, the observed data is not zeroes or intermediate values +from the copy up staging area. + +On traditional local filesystems with a single journal (e.g. ext4, xfs), +fsync on a file also persists the parent directory changes, because they +are usually modified in the same transaction, so metadata durability during +data copy up effectively comes for free. Overlayfs further limits risk by +disallowing network filesystems as upper layer. + +Overlayfs can be tuned to prefer performance or durability when storing +to the underlying upper layer. This is controlled by the "fsync" mount +option, which supports these values: + +- "auto": (default) + Call fsync(2) on upper file before completion of data copy up. + No explicit fsync(2) on directory or metadata-only copy up. +- "strict": + Call fsync(2) on upper file and directories before completion of any + copy up. +- "volatile": [*] + Prefer performance over durability (see `Volatile mount`_) + +[*] The mount option "volatile" is an alias to "fsync=volatile". + + Volatile mount -------------- diff --git a/Documentation/hwmon/adm1177.rst b/Documentation/hwmon/adm1177.rst index 1c85a2af92bf..375f6d6e03a7 100644 --- a/Documentation/hwmon/adm1177.rst +++ b/Documentation/hwmon/adm1177.rst @@ -27,10 +27,10 @@ for details. Sysfs entries ------------- -The following attributes are supported. Current maxim attribute +The following attributes are supported. Current maximum attribute is read-write, all other attributes are read-only. -in0_input Measured voltage in microvolts. +in0_input Measured voltage in millivolts. -curr1_input Measured current in microamperes. -curr1_max_alarm Overcurrent alarm in microamperes. +curr1_input Measured current in milliamperes. +curr1_max Overcurrent shutdown threshold in milliamperes. diff --git a/Documentation/hwmon/peci-cputemp.rst b/Documentation/hwmon/peci-cputemp.rst index fe0422248dc5..266b62a46f49 100644 --- a/Documentation/hwmon/peci-cputemp.rst +++ b/Documentation/hwmon/peci-cputemp.rst @@ -51,8 +51,9 @@ temp1_max Provides thermal control temperature of the CPU package temp1_crit Provides shutdown temperature of the CPU package which is also known as the maximum processor junction temperature, Tjmax or Tprochot. -temp1_crit_hyst Provides the hysteresis value from Tcontrol to Tjmax of - the CPU package. +temp1_crit_hyst Provides the hysteresis temperature of the CPU + package. Returns Tcontrol, the temperature at which + the critical condition clears. temp2_label "DTS" temp2_input Provides current temperature of the CPU package scaled @@ -62,8 +63,9 @@ temp2_max Provides thermal control temperature of the CPU package temp2_crit Provides shutdown temperature of the CPU package which is also known as the maximum processor junction temperature, Tjmax or Tprochot. -temp2_crit_hyst Provides the hysteresis value from Tcontrol to Tjmax of - the CPU package. +temp2_crit_hyst Provides the hysteresis temperature of the CPU + package. Returns Tcontrol, the temperature at which + the critical condition clears. temp3_label "Tcontrol" temp3_input Provides current Tcontrol temperature of the CPU diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst index 13134bccdd39..7f86d7a37dc2 100644 --- a/Documentation/userspace-api/landlock.rst +++ b/Documentation/userspace-api/landlock.rst @@ -8,7 +8,7 @@ Landlock: unprivileged access control ===================================== :Author: Mickaël Salaün -:Date: January 2026 +:Date: March 2026 The goal of Landlock is to enable restriction of ambient rights (e.g. global filesystem or network access) for a set of processes. Because Landlock @@ -197,12 +197,27 @@ similar backwards compatibility check is needed for the restrict flags .. code-block:: c - __u32 restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON; - if (abi < 7) { - /* Clear logging flags unsupported before ABI 7. */ + __u32 restrict_flags = + LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON | + LANDLOCK_RESTRICT_SELF_TSYNC; + switch (abi) { + case 1 ... 6: + /* Removes logging flags for ABI < 7 */ restrict_flags &= ~(LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF | LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON | LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF); + __attribute__((fallthrough)); + case 7: + /* + * Removes multithreaded enforcement flag for ABI < 8 + * + * WARNING: Without this flag, calling landlock_restrict_self(2) is + * only equivalent if the calling process is single-threaded. Below + * ABI v8 (and as of ABI v8, when not using this flag), a Landlock + * policy would only be enforced for the calling thread and its + * children (and not for all threads, including parents and siblings). + */ + restrict_flags &= ~LANDLOCK_RESTRICT_SELF_TSYNC; } The next step is to restrict the current thread from gaining more privileges diff --git a/MAINTAINERS b/MAINTAINERS index 7d10988cbc62..9d31ceeac4a5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3986,7 +3986,7 @@ F: drivers/hwmon/asus-ec-sensors.c ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS M: Corentin Chary <corentin.chary@gmail.com> M: Luke D. Jones <luke@ljones.dev> -M: Denis Benato <benato.denis96@gmail.com> +M: Denis Benato <denis.benato@linux.dev> L: platform-driver-x86@vger.kernel.org S: Maintained W: https://asus-linux.org/ @@ -8628,8 +8628,14 @@ F: drivers/gpu/drm/lima/ F: include/uapi/drm/lima_drm.h DRM DRIVERS FOR LOONGSON +M: Jianmin Lv <lvjianmin@loongson.cn> +M: Qianhai Wu <wuqianhai@loongson.cn> +R: Huacai Chen <chenhuacai@kernel.org> +R: Mingcong Bai <jeffbai@aosc.io> +R: Xi Ruoyao <xry111@xry111.site> +R: Icenowy Zheng <zhengxingda@iscas.ac.cn> L: dri-devel@lists.freedesktop.org -S: Orphan +S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git F: drivers/gpu/drm/loongson/ @@ -9613,7 +9619,12 @@ F: include/linux/ext2* EXT4 FILE SYSTEM M: "Theodore Ts'o" <tytso@mit.edu> -M: Andreas Dilger <adilger.kernel@dilger.ca> +R: Andreas Dilger <adilger.kernel@dilger.ca> +R: Baokun Li <libaokun@linux.alibaba.com> +R: Jan Kara <jack@suse.cz> +R: Ojaswin Mujoo <ojaswin@linux.ibm.com> +R: Ritesh Harjani (IBM) <ritesh.list@gmail.com> +R: Zhang Yi <yi.zhang@huawei.com> L: linux-ext4@vger.kernel.org S: Maintained W: http://ext4.wiki.kernel.org @@ -16877,7 +16888,7 @@ M: Lorenzo Stoakes <ljs@kernel.org> R: Rik van Riel <riel@surriel.com> R: Liam R. Howlett <Liam.Howlett@oracle.com> R: Vlastimil Babka <vbabka@kernel.org> -R: Harry Yoo <harry.yoo@oracle.com> +R: Harry Yoo <harry@kernel.org> R: Jann Horn <jannh@google.com> L: linux-mm@kvack.org S: Maintained @@ -24343,7 +24354,7 @@ F: drivers/nvmem/layouts/sl28vpd.c SLAB ALLOCATOR M: Vlastimil Babka <vbabka@kernel.org> -M: Harry Yoo <harry.yoo@oracle.com> +M: Harry Yoo <harry@kernel.org> M: Andrew Morton <akpm@linux-foundation.org> R: Hao Li <hao.li@linux.dev> R: Christoph Lameter <cl@gentwo.org> @@ -1654,7 +1654,7 @@ CLEAN_FILES += vmlinux.symvers modules-only.symvers \ modules.builtin.ranges vmlinux.o.map vmlinux.unstripped \ compile_commands.json rust/test \ rust-project.json .vmlinux.objs .vmlinux.export.c \ - .builtin-dtbs-list .builtin-dtb.S + .builtin-dtbs-list .builtin-dtbs.S # Directories & files removed with 'make mrproper' MRPROPER_FILES += include/config include/generated \ diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index c5c5644b1878..a024d9a770dc 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1753,7 +1753,7 @@ int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new) if (!writable) return -EPERM; - ptep = (u64 __user *)hva + offset; + ptep = (void __user *)hva + offset; if (cpus_have_final_cap(ARM64_HAS_LSE_ATOMICS)) r = __lse_swap_desc(ptep, old, new); else diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 959532422d3a..b963fd975aac 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -247,6 +247,20 @@ void kvm_reset_vcpu(struct kvm_vcpu *vcpu) kvm_vcpu_set_be(vcpu); *vcpu_pc(vcpu) = target_pc; + + /* + * We may come from a state where either a PC update was + * pending (SMC call resulting in PC being increpented to + * skip the SMC) or a pending exception. Make sure we get + * rid of all that, as this cannot be valid out of reset. + * + * Note that clearing the exception mask also clears PC + * updates, but that's an implementation detail, and we + * really want to make it explicit. + */ + vcpu_clear_flag(vcpu, PENDING_EXCEPTION); + vcpu_clear_flag(vcpu, EXCEPT_MASK); + vcpu_clear_flag(vcpu, INCREMENT_PC); vcpu_set_reg(vcpu, 0, reset_state.r0); } diff --git a/arch/loongarch/include/asm/linkage.h b/arch/loongarch/include/asm/linkage.h index e2eca1a25b4e..a1bd6a3ee03a 100644 --- a/arch/loongarch/include/asm/linkage.h +++ b/arch/loongarch/include/asm/linkage.h @@ -41,4 +41,40 @@ .cfi_endproc; \ SYM_END(name, SYM_T_NONE) +/* + * This is for the signal handler trampoline, which is used as the return + * address of the signal handlers in userspace instead of called normally. + * The long standing libgcc bug https://gcc.gnu.org/PR124050 requires a + * nop between .cfi_startproc and the actual address of the trampoline, so + * we cannot simply use SYM_FUNC_START. + * + * This wrapper also contains all the .cfi_* directives for recovering + * the content of the GPRs and the "return address" (where the rt_sigreturn + * syscall will jump to), assuming there is a struct rt_sigframe (where + * a struct sigcontext containing those information we need to recover) at + * $sp. The "DWARF for the LoongArch(TM) Architecture" manual states + * column 0 is for $zero, but it does not make too much sense to + * save/restore the hardware zero register. Repurpose this column here + * for the return address (here it's not the content of $ra we cannot use + * the default column 3). + */ +#define SYM_SIGFUNC_START(name) \ + .cfi_startproc; \ + .cfi_signal_frame; \ + .cfi_def_cfa 3, RT_SIGFRAME_SC; \ + .cfi_return_column 0; \ + .cfi_offset 0, SC_PC; \ + \ + .irp num, 1, 2, 3, 4, 5, 6, 7, 8, \ + 9, 10, 11, 12, 13, 14, 15, 16, \ + 17, 18, 19, 20, 21, 22, 23, 24, \ + 25, 26, 27, 28, 29, 30, 31; \ + .cfi_offset \num, SC_REGS + \num * SZREG; \ + .endr; \ + \ + nop; \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) + +#define SYM_SIGFUNC_END(name) SYM_FUNC_END(name) + #endif diff --git a/arch/loongarch/include/asm/sigframe.h b/arch/loongarch/include/asm/sigframe.h new file mode 100644 index 000000000000..109298b8d7e0 --- /dev/null +++ b/arch/loongarch/include/asm/sigframe.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#include <asm/siginfo.h> +#include <asm/ucontext.h> + +struct rt_sigframe { + struct siginfo rs_info; + struct ucontext rs_uctx; +}; diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index 3017c7157600..2cc953f113ac 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -16,6 +16,7 @@ #include <asm/ptrace.h> #include <asm/processor.h> #include <asm/ftrace.h> +#include <asm/sigframe.h> #include <vdso/datapage.h> static void __used output_ptreg_defines(void) @@ -220,6 +221,7 @@ static void __used output_sc_defines(void) COMMENT("Linux sigcontext offsets."); OFFSET(SC_REGS, sigcontext, sc_regs); OFFSET(SC_PC, sigcontext, sc_pc); + OFFSET(RT_SIGFRAME_SC, rt_sigframe, rs_uctx.uc_mcontext); BLANK(); } diff --git a/arch/loongarch/kernel/env.c b/arch/loongarch/kernel/env.c index 841206fde3ab..652456768b55 100644 --- a/arch/loongarch/kernel/env.c +++ b/arch/loongarch/kernel/env.c @@ -42,16 +42,15 @@ static int __init init_cpu_fullname(void) int cpu, ret; char *cpuname; const char *model; - struct device_node *root; /* Parsing cpuname from DTS model property */ - root = of_find_node_by_path("/"); - ret = of_property_read_string(root, "model", &model); + ret = of_property_read_string(of_root, "model", &model); if (ret == 0) { cpuname = kstrdup(model, GFP_KERNEL); + if (!cpuname) + return -ENOMEM; loongson_sysconf.cpuname = strsep(&cpuname, " "); } - of_node_put(root); if (loongson_sysconf.cpuname && !strncmp(loongson_sysconf.cpuname, "Loongson", 8)) { for (cpu = 0; cpu < NR_CPUS; cpu++) diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index c9f7ca778364..d4151d2fb82e 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -35,6 +35,7 @@ #include <asm/cpu-features.h> #include <asm/fpu.h> #include <asm/lbt.h> +#include <asm/sigframe.h> #include <asm/ucontext.h> #include <asm/vdso.h> @@ -51,11 +52,6 @@ #define lock_lbt_owner() ({ preempt_disable(); pagefault_disable(); }) #define unlock_lbt_owner() ({ pagefault_enable(); preempt_enable(); }) -struct rt_sigframe { - struct siginfo rs_info; - struct ucontext rs_uctx; -}; - struct _ctx_layout { struct sctx_info *addr; unsigned int size; diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c index d2acb4d09e73..003bd773e11c 100644 --- a/arch/loongarch/kvm/intc/eiointc.c +++ b/arch/loongarch/kvm/intc/eiointc.c @@ -83,7 +83,7 @@ static inline void eiointc_update_sw_coremap(struct loongarch_eiointc *s, if (!(s->status & BIT(EIOINTC_ENABLE_CPU_ENCODE))) { cpuid = ffs(cpuid) - 1; - cpuid = (cpuid >= 4) ? 0 : cpuid; + cpuid = ((cpuid < 0) || (cpuid >= 4)) ? 0 : cpuid; } vcpu = kvm_get_vcpu_by_cpuid(s->kvm, cpuid); @@ -472,34 +472,34 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev, switch (addr) { case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: offset = (addr - EIOINTC_NODETYPE_START) / 4; - p = s->nodetype + offset * 4; + p = (void *)s->nodetype + offset * 4; break; case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END: offset = (addr - EIOINTC_IPMAP_START) / 4; - p = &s->ipmap + offset * 4; + p = (void *)&s->ipmap + offset * 4; break; case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: offset = (addr - EIOINTC_ENABLE_START) / 4; - p = s->enable + offset * 4; + p = (void *)s->enable + offset * 4; break; case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: offset = (addr - EIOINTC_BOUNCE_START) / 4; - p = s->bounce + offset * 4; + p = (void *)s->bounce + offset * 4; break; case EIOINTC_ISR_START ... EIOINTC_ISR_END: offset = (addr - EIOINTC_ISR_START) / 4; - p = s->isr + offset * 4; + p = (void *)s->isr + offset * 4; break; case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: if (cpu >= s->num_cpu) return -EINVAL; offset = (addr - EIOINTC_COREISR_START) / 4; - p = s->coreisr[cpu] + offset * 4; + p = (void *)s->coreisr[cpu] + offset * 4; break; case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: offset = (addr - EIOINTC_COREMAP_START) / 4; - p = s->coremap + offset * 4; + p = (void *)s->coremap + offset * 4; break; default: kvm_err("%s: unknown eiointc register, addr = %d\n", __func__, addr); diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 8ffd50a470e6..831f381a8fd1 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -588,6 +588,9 @@ struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid) { struct kvm_phyid_map *map; + if (cpuid < 0) + return NULL; + if (cpuid >= KVM_MAX_PHYID) return NULL; diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c index d923295ab8c6..d233ea2218fe 100644 --- a/arch/loongarch/pci/pci.c +++ b/arch/loongarch/pci/pci.c @@ -5,9 +5,11 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/acpi.h> +#include <linux/delay.h> #include <linux/types.h> #include <linux/pci.h> #include <linux/vgaarb.h> +#include <linux/io-64-nonatomic-lo-hi.h> #include <asm/cacheflush.h> #include <asm/loongson.h> @@ -15,6 +17,9 @@ #define PCI_DEVICE_ID_LOONGSON_DC1 0x7a06 #define PCI_DEVICE_ID_LOONGSON_DC2 0x7a36 #define PCI_DEVICE_ID_LOONGSON_DC3 0x7a46 +#define PCI_DEVICE_ID_LOONGSON_GPU1 0x7a15 +#define PCI_DEVICE_ID_LOONGSON_GPU2 0x7a25 +#define PCI_DEVICE_ID_LOONGSON_GPU3 0x7a35 int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 *val) @@ -99,3 +104,78 @@ static void pci_fixup_vgadev(struct pci_dev *pdev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC1, pci_fixup_vgadev); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC2, pci_fixup_vgadev); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC3, pci_fixup_vgadev); + +#define CRTC_NUM_MAX 2 +#define CRTC_OUTPUT_ENABLE 0x100 + +static void loongson_gpu_fixup_dma_hang(struct pci_dev *pdev, bool on) +{ + u32 i, val, count, crtc_offset, device; + void __iomem *crtc_reg, *base, *regbase; + static u32 crtc_status[CRTC_NUM_MAX] = { 0 }; + + base = pdev->bus->ops->map_bus(pdev->bus, pdev->devfn + 1, 0); + device = readw(base + PCI_DEVICE_ID); + + regbase = ioremap(readq(base + PCI_BASE_ADDRESS_0) & ~0xffull, SZ_64K); + if (!regbase) { + pci_err(pdev, "Failed to ioremap()\n"); + return; + } + + switch (device) { + case PCI_DEVICE_ID_LOONGSON_DC2: + crtc_reg = regbase + 0x1240; + crtc_offset = 0x10; + break; + case PCI_DEVICE_ID_LOONGSON_DC3: + crtc_reg = regbase; + crtc_offset = 0x400; + break; + } + + for (i = 0; i < CRTC_NUM_MAX; i++, crtc_reg += crtc_offset) { + val = readl(crtc_reg); + + if (!on) + crtc_status[i] = val; + + /* No need to fixup if the status is off at startup. */ + if (!(crtc_status[i] & CRTC_OUTPUT_ENABLE)) + continue; + + if (on) + val |= CRTC_OUTPUT_ENABLE; + else + val &= ~CRTC_OUTPUT_ENABLE; + + mb(); + writel(val, crtc_reg); + + for (count = 0; count < 40; count++) { + val = readl(crtc_reg) & CRTC_OUTPUT_ENABLE; + if ((on && val) || (!on && !val)) + break; + udelay(1000); + } + + pci_info(pdev, "DMA hang fixup at reg[0x%lx]: 0x%x\n", + (unsigned long)crtc_reg & 0xffff, readl(crtc_reg)); + } + + iounmap(regbase); +} + +static void pci_fixup_dma_hang_early(struct pci_dev *pdev) +{ + loongson_gpu_fixup_dma_hang(pdev, false); +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_GPU2, pci_fixup_dma_hang_early); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_GPU3, pci_fixup_dma_hang_early); + +static void pci_fixup_dma_hang_final(struct pci_dev *pdev) +{ + loongson_gpu_fixup_dma_hang(pdev, true); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_GPU2, pci_fixup_dma_hang_final); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_GPU3, pci_fixup_dma_hang_final); diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index 520f1513f07d..294c16b9517f 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -26,7 +26,7 @@ cflags-vdso := $(ccflags-vdso) \ $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \ -std=gnu11 -fms-extensions -O2 -g -fno-strict-aliasing -fno-common -fno-builtin \ -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \ - $(call cc-option, -fno-asynchronous-unwind-tables) \ + $(call cc-option, -fasynchronous-unwind-tables) \ $(call cc-option, -fno-stack-protector) aflags-vdso := $(ccflags-vdso) \ -D__ASSEMBLY__ -Wa,-gdwarf-2 @@ -41,7 +41,7 @@ endif # VDSO linker flags. ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ - $(filter -E%,$(KBUILD_CFLAGS)) -shared --build-id -T + $(filter -E%,$(KBUILD_CFLAGS)) -shared --build-id --eh-frame-hdr -T # # Shared build commands. diff --git a/arch/loongarch/vdso/sigreturn.S b/arch/loongarch/vdso/sigreturn.S index 9cb3c58fad03..59f940d928de 100644 --- a/arch/loongarch/vdso/sigreturn.S +++ b/arch/loongarch/vdso/sigreturn.S @@ -12,13 +12,13 @@ #include <asm/regdef.h> #include <asm/asm.h> +#include <asm/asm-offsets.h> .section .text - .cfi_sections .debug_frame -SYM_FUNC_START(__vdso_rt_sigreturn) +SYM_SIGFUNC_START(__vdso_rt_sigreturn) li.w a7, __NR_rt_sigreturn syscall 0 -SYM_FUNC_END(__vdso_rt_sigreturn) +SYM_SIGFUNC_END(__vdso_rt_sigreturn) diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index f3184073e754..dad02f5b3c8d 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -62,8 +62,8 @@ do { \ * @size: number of elements in array */ #define array_index_mask_nospec array_index_mask_nospec -static inline unsigned long array_index_mask_nospec(unsigned long index, - unsigned long size) +static __always_inline unsigned long array_index_mask_nospec(unsigned long index, + unsigned long size) { unsigned long mask; diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 64a50f0862aa..3039c88daa63 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -710,6 +710,9 @@ void kvm_arch_crypto_clear_masks(struct kvm *kvm); void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, unsigned long *aqm, unsigned long *adm); +#define SIE64_RETURN_NORMAL 0 +#define SIE64_RETURN_MCCK 1 + int __sie64a(phys_addr_t sie_block_phys, struct kvm_s390_sie_block *sie_block, u64 *rsa, unsigned long gasce); diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index c9ae680a28af..ac3606c3babe 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -62,7 +62,7 @@ struct stack_frame { struct { unsigned long sie_control_block; unsigned long sie_savearea; - unsigned long sie_reason; + unsigned long sie_return; unsigned long sie_flags; unsigned long sie_control_block_phys; unsigned long sie_guest_asce; diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index e1a5b5b54e4f..fbd26f3e9f96 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -63,7 +63,7 @@ int main(void) OFFSET(__SF_EMPTY, stack_frame, empty[0]); OFFSET(__SF_SIE_CONTROL, stack_frame, sie_control_block); OFFSET(__SF_SIE_SAVEAREA, stack_frame, sie_savearea); - OFFSET(__SF_SIE_REASON, stack_frame, sie_reason); + OFFSET(__SF_SIE_RETURN, stack_frame, sie_return); OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags); OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys); OFFSET(__SF_SIE_GUEST_ASCE, stack_frame, sie_guest_asce); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 4873fe9d891b..bb806d1ddae0 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -200,7 +200,7 @@ SYM_FUNC_START(__sie64a) stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area stg %r5,__SF_SIE_GUEST_ASCE(%r15) # save guest asce - xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0 + xc __SF_SIE_RETURN(8,%r15),__SF_SIE_RETURN(%r15) # return code = 0 mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags lmg %r0,%r13,0(%r4) # load guest gprs 0-13 mvi __TI_sie(%r14),1 @@ -237,7 +237,7 @@ SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL) xgr %r4,%r4 xgr %r5,%r5 lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers - lg %r2,__SF_SIE_REASON(%r15) # return exit reason code + lg %r2,__SF_SIE_RETURN(%r15) # return sie return code BR_EX %r14 SYM_FUNC_END(__sie64a) EXPORT_SYMBOL(__sie64a) @@ -271,6 +271,7 @@ SYM_CODE_START(system_call) xgr %r9,%r9 xgr %r10,%r10 xgr %r11,%r11 + xgr %r12,%r12 la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs mvc __PT_R8(64,%r2),__LC_SAVE_AREA(%r13) MBEAR %r2,%r13 @@ -407,6 +408,7 @@ SYM_CODE_START(\name) xgr %r6,%r6 xgr %r7,%r7 xgr %r10,%r10 + xgr %r12,%r12 xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13) MBEAR %r11,%r13 @@ -496,6 +498,7 @@ SYM_CODE_START(mcck_int_handler) xgr %r6,%r6 xgr %r7,%r7 xgr %r10,%r10 + xgr %r12,%r12 stmg %r8,%r9,__PT_PSW(%r11) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index a55abbf65333..94fbfad49f62 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -487,8 +487,8 @@ void notrace s390_do_machine_check(struct pt_regs *regs) mcck_dam_code = (mci.val & MCIC_SUBCLASS_MASK); if (test_cpu_flag(CIF_MCCK_GUEST) && (mcck_dam_code & MCCK_CODE_NO_GUEST) != mcck_dam_code) { - /* Set exit reason code for host's later handling */ - *((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR; + /* Set sie return code for host's later handling */ + ((struct stack_frame *)regs->gprs[15])->sie_return = SIE64_RETURN_MCCK; } clear_cpu_flag(CIF_MCCK_GUEST); diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c index 795b6cca74c9..d103c853e120 100644 --- a/arch/s390/kernel/syscall.c +++ b/arch/s390/kernel/syscall.c @@ -13,6 +13,7 @@ */ #include <linux/cpufeature.h> +#include <linux/nospec.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/mm.h> @@ -131,8 +132,10 @@ void noinstr __do_syscall(struct pt_regs *regs, int per_trap) if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET))) goto out; regs->gprs[2] = -ENOSYS; - if (likely(nr < NR_syscalls)) + if (likely(nr < NR_syscalls)) { + nr = array_index_nospec(nr, NR_syscalls); regs->gprs[2] = sys_call_table[nr](regs); + } out: syscall_exit_to_user_mode(regs); } diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 4630b2a067ea..a9da9390867d 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1434,7 +1434,8 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union if (rc) return rc; - pgste = pgste_get_lock(ptep_h); + if (!pgste_get_trylock(ptep_h, &pgste)) + return -EAGAIN; newpte = _pte(f->pfn, f->writable, !p, 0); newpte.s.d |= ptep->s.d; newpte.s.sd |= ptep->s.sd; @@ -1444,7 +1445,8 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union pgste_set_unlock(ptep_h, pgste); newpte = _pte(f->pfn, 0, !p, 0); - pgste = pgste_get_lock(ptep); + if (!pgste_get_trylock(ptep, &pgste)) + return -EAGAIN; pgste = __dat_ptep_xchg(ptep, pgste, newpte, gpa_to_gfn(raddr), sg->asce, uses_skeys(sg)); pgste_set_unlock(ptep, pgste); diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 18932a65ca68..7cb8ce833b62 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2724,6 +2724,9 @@ static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap) bit = bit_nr + (addr % PAGE_SIZE) * 8; + /* kvm_set_routing_entry() should never allow this to happen */ + WARN_ON_ONCE(bit > (PAGE_SIZE * BITS_PER_BYTE - 1)); + return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit; } @@ -2824,6 +2827,12 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu, int rc; mci.val = mcck_info->mcic; + + /* log machine checks being reinjected on all debugs */ + VCPU_EVENT(vcpu, 2, "guest machine check %lx", mci.val); + KVM_EVENT(2, "guest machine check %lx", mci.val); + pr_info("guest machine check pid %d: %lx", current->pid, mci.val); + if (mci.sr) cr14 |= CR14_RECOVERY_SUBMASK; if (mci.dg) @@ -2852,6 +2861,7 @@ int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { + const struct kvm_irq_routing_s390_adapter *adapter; u64 uaddr_s, uaddr_i; int idx; @@ -2862,6 +2872,14 @@ int kvm_set_routing_entry(struct kvm *kvm, return -EINVAL; e->set = set_adapter_int; + adapter = &ue->u.adapter; + if (adapter->summary_addr + (adapter->summary_offset / 8) >= + (adapter->summary_addr & PAGE_MASK) + PAGE_SIZE) + return -EINVAL; + if (adapter->ind_addr + (adapter->ind_offset / 8) >= + (adapter->ind_addr & PAGE_MASK) + PAGE_SIZE) + return -EINVAL; + idx = srcu_read_lock(&kvm->srcu); uaddr_s = gpa_to_hva(kvm, ue->u.adapter.summary_addr); uaddr_i = gpa_to_hva(kvm, ue->u.adapter.ind_addr); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 3eb60aa932ec..b2c01fa7b852 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4617,7 +4617,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) return 0; } -static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) +static int vcpu_post_run(struct kvm_vcpu *vcpu, int sie_return) { struct mcck_volatile_info *mcck_info; struct sie_page *sie_page; @@ -4633,14 +4633,14 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; - if (exit_reason == -EINTR) { - VCPU_EVENT(vcpu, 3, "%s", "machine check"); + if (sie_return == SIE64_RETURN_MCCK) { sie_page = container_of(vcpu->arch.sie_block, struct sie_page, sie_block); mcck_info = &sie_page->mcck_info; kvm_s390_reinject_machine_check(vcpu, mcck_info); return 0; } + WARN_ON_ONCE(sie_return != SIE64_RETURN_NORMAL); if (vcpu->arch.sie_block->icptcode > 0) { rc = kvm_handle_sie_intercept(vcpu); @@ -4679,7 +4679,7 @@ int noinstr kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb, #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK) static int __vcpu_run(struct kvm_vcpu *vcpu) { - int rc, exit_reason; + int rc, sie_return; struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block; /* @@ -4719,9 +4719,9 @@ xfer_to_guest_mode_check: guest_timing_enter_irqoff(); __disable_cpu_timer_accounting(vcpu); - exit_reason = kvm_s390_enter_exit_sie(vcpu->arch.sie_block, - vcpu->run->s.regs.gprs, - vcpu->arch.gmap->asce.val); + sie_return = kvm_s390_enter_exit_sie(vcpu->arch.sie_block, + vcpu->run->s.regs.gprs, + vcpu->arch.gmap->asce.val); __enable_cpu_timer_accounting(vcpu); guest_timing_exit_irqoff(); @@ -4744,7 +4744,7 @@ xfer_to_guest_mode_check: } kvm_vcpu_srcu_read_lock(vcpu); - rc = vcpu_post_run(vcpu, exit_reason); + rc = vcpu_post_run(vcpu, sie_return); if (rc || guestdbg_exit_pending(vcpu)) { kvm_vcpu_srcu_read_unlock(vcpu); break; diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index d249b10044eb..0330829b4046 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1122,6 +1122,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, struc { struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; + unsigned long sie_return = SIE64_RETURN_NORMAL; int guest_bp_isolation; int rc = 0; @@ -1163,7 +1164,7 @@ xfer_to_guest_mode_check: goto xfer_to_guest_mode_check; } guest_timing_enter_irqoff(); - rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, sg->asce.val); + sie_return = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, sg->asce.val); guest_timing_exit_irqoff(); local_irq_enable(); } @@ -1178,12 +1179,13 @@ skip_sie: kvm_vcpu_srcu_read_lock(vcpu); - if (rc == -EINTR) { - VCPU_EVENT(vcpu, 3, "%s", "machine check"); + if (sie_return == SIE64_RETURN_MCCK) { kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info); return 0; } + WARN_ON_ONCE(sie_return != SIE64_RETURN_NORMAL); + if (rc > 0) rc = 0; /* we could still have an icpt */ else if (current->thread.gmap_int_code) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index a52aa7a99b6b..191cc53caead 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -441,10 +441,17 @@ void do_secure_storage_access(struct pt_regs *regs) folio = phys_to_folio(addr); if (unlikely(!folio_try_get(folio))) return; - rc = arch_make_folio_accessible(folio); + rc = uv_convert_from_secure(folio_to_phys(folio)); + if (!rc) + clear_bit(PG_arch_1, &folio->flags.f); folio_put(folio); + /* + * There are some valid fixup types for kernel + * accesses to donated secure memory. zeropad is one + * of them. + */ if (rc) - BUG(); + return handle_fault_error_nolock(regs, 0); } else { if (faulthandler_disabled()) return handle_fault_error_nolock(regs, 0); diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 35caa5746115..79f0818131e8 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -424,7 +424,7 @@ void __init efi_unmap_boot_services(void) if (efi_enabled(EFI_DBG)) return; - sz = sizeof(*ranges_to_free) * efi.memmap.nr_map + 1; + sz = sizeof(*ranges_to_free) * (efi.memmap.nr_map + 1); ranges_to_free = kzalloc(sz, GFP_KERNEL); if (!ranges_to_free) { pr_err("Failed to allocate storage for freeable EFI regions\n"); diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 5b34b6f50e69..f1b6155065ff 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -35,6 +35,7 @@ #define IVPU_HW_IP_60XX 60 #define IVPU_HW_IP_REV_LNL_B0 4 +#define IVPU_HW_IP_REV_NVL_A0 0 #define IVPU_HW_BTRS_MTL 1 #define IVPU_HW_BTRS_LNL 2 diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c index d69cd0d93569..d4a9bcda4100 100644 --- a/drivers/accel/ivpu/ivpu_hw.c +++ b/drivers/accel/ivpu/ivpu_hw.c @@ -70,8 +70,10 @@ static void wa_init(struct ivpu_device *vdev) if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) vdev->wa.interrupt_clear_with_0 = ivpu_hw_btrs_irqs_clear_with_0_mtl(vdev); - if (ivpu_device_id(vdev) == PCI_DEVICE_ID_LNL && - ivpu_revision(vdev) < IVPU_HW_IP_REV_LNL_B0) + if ((ivpu_device_id(vdev) == PCI_DEVICE_ID_LNL && + ivpu_revision(vdev) < IVPU_HW_IP_REV_LNL_B0) || + (ivpu_device_id(vdev) == PCI_DEVICE_ID_NVL && + ivpu_revision(vdev) == IVPU_HW_IP_REV_NVL_A0)) vdev->wa.disable_clock_relinquish = true; if (ivpu_test_mode & IVPU_TEST_MODE_CLK_RELINQ_ENABLE) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 5f63ed120a2c..6f0065257a77 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1656,6 +1656,8 @@ static int acpi_ec_setup(struct acpi_ec *ec, struct acpi_device *device, bool ca ret = ec_install_handlers(ec, device, call_reg); if (ret) { + ec_remove_handlers(ec); + if (ec == first_ec) first_ec = NULL; diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 607c1246d994..e388b19850e3 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1545,6 +1545,7 @@ static int _regmap_select_page(struct regmap *map, unsigned int *reg, unsigned int val_num) { void *orig_work_buf; + unsigned int selector_reg; unsigned int win_offset; unsigned int win_page; bool page_chg; @@ -1563,10 +1564,31 @@ static int _regmap_select_page(struct regmap *map, unsigned int *reg, return -EINVAL; } - /* It is possible to have selector register inside data window. - In that case, selector register is located on every page and - it needs no page switching, when accessed alone. */ + /* + * Calculate the address of the selector register in the corresponding + * data window if it is located on every page. + */ + page_chg = in_range(range->selector_reg, range->window_start, range->window_len); + if (page_chg) + selector_reg = range->range_min + win_page * range->window_len + + range->selector_reg - range->window_start; + + /* + * It is possible to have selector register inside data window. + * In that case, selector register is located on every page and it + * needs no page switching, when accessed alone. + * + * Nevertheless we should synchronize the cache values for it. + * This can't be properly achieved if the selector register is + * the first and the only one to be read inside the data window. + * That's why we update it in that case as well. + * + * However, we specifically avoid updating it for the default page, + * when it's overlapped with the real data window, to prevent from + * infinite looping. + */ if (val_num > 1 || + (page_chg && selector_reg != range->selector_reg) || range->window_start + win_offset != range->selector_reg) { /* Use separate work_buf during page switching */ orig_work_buf = map->work_buf; @@ -1575,7 +1597,7 @@ static int _regmap_select_page(struct regmap *map, unsigned int *reg, ret = _regmap_update_bits(map, range->selector_reg, range->selector_mask, win_page << range->selector_shift, - &page_chg, false); + NULL, false); map->work_buf = orig_work_buf; diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a324ede6206d..af679375b193 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -917,9 +917,8 @@ static void zram_account_writeback_submit(struct zram *zram) static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req) { - u32 size, index = req->pps->index; - int err, prio; - bool huge; + u32 index = req->pps->index; + int err; err = blk_status_to_errno(req->bio.bi_status); if (err) { @@ -946,28 +945,13 @@ static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req) goto out; } - if (zram->compressed_wb) { - /* - * ZRAM_WB slots get freed, we need to preserve data required - * for read decompression. - */ - size = get_slot_size(zram, index); - prio = get_slot_comp_priority(zram, index); - huge = test_slot_flag(zram, index, ZRAM_HUGE); - } - - slot_free(zram, index); - set_slot_flag(zram, index, ZRAM_WB); + clear_slot_flag(zram, index, ZRAM_IDLE); + if (test_slot_flag(zram, index, ZRAM_HUGE)) + atomic64_dec(&zram->stats.huge_pages); + atomic64_sub(get_slot_size(zram, index), &zram->stats.compr_data_size); + zs_free(zram->mem_pool, get_slot_handle(zram, index)); set_slot_handle(zram, index, req->blk_idx); - - if (zram->compressed_wb) { - if (huge) - set_slot_flag(zram, index, ZRAM_HUGE); - set_slot_size(zram, index, size); - set_slot_comp_priority(zram, index, prio); - } - - atomic64_inc(&zram->stats.pages_stored); + set_slot_flag(zram, index, ZRAM_WB); out: slot_unlock(zram, index); @@ -2010,8 +1994,13 @@ static void slot_free(struct zram *zram, u32 index) set_slot_comp_priority(zram, index, 0); if (test_slot_flag(zram, index, ZRAM_HUGE)) { + /* + * Writeback completion decrements ->huge_pages but keeps + * ZRAM_HUGE flag for deferred decompression path. + */ + if (!test_slot_flag(zram, index, ZRAM_WB)) + atomic64_dec(&zram->stats.huge_pages); clear_slot_flag(zram, index, ZRAM_HUGE); - atomic64_dec(&zram->stats.huge_pages); } if (test_slot_flag(zram, index, ZRAM_WB)) { diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 246b6205c5e0..ab146894ba4e 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -251,11 +251,13 @@ void btintel_hw_error(struct hci_dev *hdev, u8 code) bt_dev_err(hdev, "Hardware error 0x%2.2x", code); + hci_req_sync_lock(hdev); + skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { bt_dev_err(hdev, "Reset after hardware error failed (%ld)", PTR_ERR(skb)); - return; + goto unlock; } kfree_skb(skb); @@ -263,18 +265,21 @@ void btintel_hw_error(struct hci_dev *hdev, u8 code) if (IS_ERR(skb)) { bt_dev_err(hdev, "Retrieving Intel exception info failed (%ld)", PTR_ERR(skb)); - return; + goto unlock; } if (skb->len != 13) { bt_dev_err(hdev, "Exception info size mismatch"); kfree_skb(skb); - return; + goto unlock; } bt_dev_err(hdev, "Exception info %s", (char *)(skb->data + 1)); kfree_skb(skb); + +unlock: + hci_req_sync_unlock(hdev); } EXPORT_SYMBOL_GPL(btintel_hw_error); diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index a1c5eb993e47..5c535f3ab722 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2376,8 +2376,11 @@ static void btusb_work(struct work_struct *work) if (data->air_mode == HCI_NOTIFY_ENABLE_SCO_CVSD) { if (hdev->voice_setting & 0x0020) { static const int alts[3] = { 2, 4, 5 }; + unsigned int sco_idx; - new_alts = alts[data->sco_num - 1]; + sco_idx = min_t(unsigned int, data->sco_num - 1, + ARRAY_SIZE(alts) - 1); + new_alts = alts[sco_idx]; } else { new_alts = data->sco_num; } diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c index 91acf24f1ef5..91c96ad12342 100644 --- a/drivers/bluetooth/hci_ll.c +++ b/drivers/bluetooth/hci_ll.c @@ -541,6 +541,8 @@ static int download_firmware(struct ll_device *lldev) if (err || !fw->data || !fw->size) { bt_dev_err(lldev->hu.hdev, "request_firmware failed(errno %d) for %s", err, bts_scr_name); + if (!err) + release_firmware(fw); return -EINVAL; } ptr = (void *)fw->data; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 277884d91913..1f794524a1d9 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1427,12 +1427,9 @@ static int cpufreq_policy_online(struct cpufreq_policy *policy, * If there is a problem with its frequency table, take it * offline and drop it. */ - if (policy->freq_table_sorted != CPUFREQ_TABLE_SORTED_ASCENDING && - policy->freq_table_sorted != CPUFREQ_TABLE_SORTED_DESCENDING) { - ret = cpufreq_table_validate_and_sort(policy); - if (ret) - goto out_offline_policy; - } + ret = cpufreq_table_validate_and_sort(policy); + if (ret) + goto out_offline_policy; /* related_cpus should at least include policy->cpus. */ cpumask_copy(policy->related_cpus, policy->cpus); diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index e0e847764511..df01d33993d8 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -313,6 +313,17 @@ static void cs_start(struct cpufreq_policy *policy) dbs_info->requested_freq = policy->cur; } +static void cs_limits(struct cpufreq_policy *policy) +{ + struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); + + /* + * The limits have changed, so may have the current frequency. Reset + * requested_freq to avoid any unintended outcomes due to the mismatch. + */ + dbs_info->requested_freq = policy->cur; +} + static struct dbs_governor cs_governor = { .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"), .kobj_type = { .default_groups = cs_groups }, @@ -322,6 +333,7 @@ static struct dbs_governor cs_governor = { .init = cs_init, .exit = cs_exit, .start = cs_start, + .limits = cs_limits, }; #define CPU_FREQ_GOV_CONSERVATIVE (cs_governor.gov) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 36eb7aee4bcd..acf101878733 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -563,6 +563,7 @@ EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_stop); void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy) { + struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs; /* Protect gov->gdbs_data against cpufreq_dbs_governor_exit() */ @@ -574,6 +575,8 @@ void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy) mutex_lock(&policy_dbs->update_mutex); cpufreq_policy_apply_limits(policy); gov_update_sample_delay(policy_dbs, 0); + if (gov->limits) + gov->limits(policy); mutex_unlock(&policy_dbs->update_mutex); out: diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 168c23fd7fca..1462d59277bd 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -138,6 +138,7 @@ struct dbs_governor { int (*init)(struct dbs_data *dbs_data); void (*exit)(struct dbs_data *dbs_data); void (*start)(struct cpufreq_policy *policy); + void (*limits)(struct cpufreq_policy *policy); }; static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy) diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index 7f251daf03ce..5b364d8da4f9 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -360,6 +360,10 @@ int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy) if (policy_has_boost_freq(policy)) policy->boost_supported = true; + if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING || + policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_DESCENDING) + return 0; + return set_freq_table_sorted(policy); } diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 4589bf11d3fe..80aeb0d556bd 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -59,6 +59,7 @@ config CXL_ACPI tristate "CXL ACPI: Platform Support" depends on ACPI depends on ACPI_NUMA + depends on CXL_PMEM || !CXL_PMEM default CXL_BUS select ACPI_TABLE_LIB select ACPI_HMAT diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index c222e98ae736..cb5d5a047a9d 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -94,7 +94,6 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) struct cxl_hdm *cxlhdm; void __iomem *hdm; u32 ctrl; - int i; if (!info) return false; @@ -113,22 +112,16 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) return false; /* - * If any decoders are committed already, there should not be any - * emulated DVSEC decoders. + * If HDM decoders are globally enabled, do not fall back to DVSEC + * range emulation. Zeroed decoder registers after region teardown + * do not imply absence of HDM capability. + * + * Falling back to DVSEC here would treat the decoder as AUTO and + * may incorrectly latch default interleave settings. */ - for (i = 0; i < cxlhdm->decoder_count; i++) { - ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i)); - dev_dbg(&info->port->dev, - "decoder%d.%d: committed: %ld base: %#x_%.8x size: %#x_%.8x\n", - info->port->id, i, - FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl), - readl(hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i)), - readl(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(i)), - readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i)), - readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i))); - if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl)) - return false; - } + ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); + if (ctrl & CXL_HDM_DECODER_ENABLE) + return false; return true; } diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index e7a6452bf544..12386d912705 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1301,7 +1301,7 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd) * Require an endpoint to be safe otherwise the driver can not * be sure that the device is unmapped. */ - if (endpoint && cxl_num_decoders_committed(endpoint) == 0) + if (cxlmd->dev.driver && cxl_num_decoders_committed(endpoint) == 0) return __cxl_mem_sanitize(mds, cmd); return -EBUSY; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 0c5957d1d329..c5aacd7054f1 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -552,10 +552,13 @@ static void cxl_port_release(struct device *dev) xa_destroy(&port->dports); xa_destroy(&port->regions); ida_free(&cxl_port_ida, port->id); - if (is_cxl_root(port)) + + if (is_cxl_root(port)) { kfree(to_cxl_root(port)); - else + } else { + put_device(dev->parent); kfree(port); + } } static ssize_t decoders_committed_show(struct device *dev, @@ -707,6 +710,7 @@ static struct cxl_port *cxl_port_alloc(struct device *uport_dev, struct cxl_port *iter; dev->parent = &parent_port->dev; + get_device(dev->parent); port->depth = parent_port->depth + 1; port->parent_dport = parent_dport; diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 42874948b589..c37ae0b28bbb 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3854,8 +3854,10 @@ static int __construct_region(struct cxl_region *cxlr, } rc = sysfs_update_group(&cxlr->dev.kobj, &cxl_region_group); - if (rc) + if (rc) { + kfree(res); return rc; + } rc = insert_resource(cxlrd->res, res); if (rc) { diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 082ec0f1c3a0..261dff7ced9f 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -554,7 +554,7 @@ static __exit void cxl_pmem_exit(void) MODULE_DESCRIPTION("CXL PMEM: Persistent Memory Support"); MODULE_LICENSE("GPL v2"); -module_init(cxl_pmem_init); +subsys_initcall(cxl_pmem_init); module_exit(cxl_pmem_exit); MODULE_IMPORT_NS("CXL"); MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM_BRIDGE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 40c22438b1d2..4f27c75abedb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -692,9 +692,9 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, goto err_ib_sched; } - /* Drop the initial kref_init count (see drm_sched_main as example) */ - dma_fence_put(f); ret = dma_fence_wait(f, false); + /* Drop the returned fence reference after the wait completes */ + dma_fence_put(f); err_ib_sched: amdgpu_job_free(job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d8296dfc5e8a..6d8531f9b882 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4207,7 +4207,8 @@ fail: static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) { - char *input = amdgpu_lockup_timeout; + char buf[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH]; + char *input = buf; char *timeout_setting = NULL; int index = 0; long timeout; @@ -4217,9 +4218,17 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout = adev->video_timeout = msecs_to_jiffies(2000); - if (!strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) + if (!strnlen(amdgpu_lockup_timeout, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) return 0; + /* + * strsep() destructively modifies its input by replacing delimiters + * with '\0'. Use a stack copy so the global module parameter buffer + * remains intact for multi-GPU systems where this function is called + * once per device. + */ + strscpy(buf, amdgpu_lockup_timeout, sizeof(buf)); + while ((timeout_setting = strsep(&input, ",")) && strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { ret = kstrtol(timeout_setting, 0, &timeout); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 64c519cd7395..d88523568b62 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -35,10 +35,13 @@ * PASIDs are global address space identifiers that can be shared * between the GPU, an IOMMU and the driver. VMs on different devices * may use the same PASID if they share the same address - * space. Therefore PASIDs are allocated using a global IDA. VMs are - * looked up from the PASID per amdgpu_device. + * space. Therefore PASIDs are allocated using IDR cyclic allocator + * (similar to kernel PID allocation) which naturally delays reuse. + * VMs are looked up from the PASID per amdgpu_device. */ -static DEFINE_IDA(amdgpu_pasid_ida); + +static DEFINE_IDR(amdgpu_pasid_idr); +static DEFINE_SPINLOCK(amdgpu_pasid_idr_lock); /* Helper to free pasid from a fence callback */ struct amdgpu_pasid_cb { @@ -50,8 +53,8 @@ struct amdgpu_pasid_cb { * amdgpu_pasid_alloc - Allocate a PASID * @bits: Maximum width of the PASID in bits, must be at least 1 * - * Allocates a PASID of the given width while keeping smaller PASIDs - * available if possible. + * Uses kernel's IDR cyclic allocator (same as PID allocation). + * Allocates sequentially with automatic wrap-around. * * Returns a positive integer on success. Returns %-EINVAL if bits==0. * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on @@ -59,14 +62,15 @@ struct amdgpu_pasid_cb { */ int amdgpu_pasid_alloc(unsigned int bits) { - int pasid = -EINVAL; + int pasid; - for (bits = min(bits, 31U); bits > 0; bits--) { - pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1), - (1U << bits) - 1, GFP_KERNEL); - if (pasid != -ENOSPC) - break; - } + if (bits == 0) + return -EINVAL; + + spin_lock(&amdgpu_pasid_idr_lock); + pasid = idr_alloc_cyclic(&amdgpu_pasid_idr, NULL, 1, + 1U << bits, GFP_KERNEL); + spin_unlock(&amdgpu_pasid_idr_lock); if (pasid >= 0) trace_amdgpu_pasid_allocated(pasid); @@ -81,7 +85,10 @@ int amdgpu_pasid_alloc(unsigned int bits) void amdgpu_pasid_free(u32 pasid) { trace_amdgpu_pasid_freed(pasid); - ida_free(&amdgpu_pasid_ida, pasid); + + spin_lock(&amdgpu_pasid_idr_lock); + idr_remove(&amdgpu_pasid_idr, pasid); + spin_unlock(&amdgpu_pasid_idr_lock); } static void amdgpu_pasid_free_cb(struct dma_fence *fence, @@ -616,3 +623,15 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev) } } } + +/** + * amdgpu_pasid_mgr_cleanup - cleanup PASID manager + * + * Cleanup the IDR allocator. + */ +void amdgpu_pasid_mgr_cleanup(void) +{ + spin_lock(&amdgpu_pasid_idr_lock); + idr_destroy(&amdgpu_pasid_idr); + spin_unlock(&amdgpu_pasid_idr_lock); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index b3649cd3af56..a57919478d3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -74,6 +74,7 @@ int amdgpu_pasid_alloc(unsigned int bits); void amdgpu_pasid_free(u32 pasid); void amdgpu_pasid_free_delayed(struct dma_resv *resv, u32 pasid); +void amdgpu_pasid_mgr_cleanup(void); bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index c60cbce356cf..a677e38a493b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2898,6 +2898,7 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) xa_destroy(&adev->vm_manager.pasids); amdgpu_vmid_mgr_fini(adev); + amdgpu_pasid_mgr_cleanup(); } /** @@ -2973,14 +2974,14 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, if (!root) return false; - addr /= AMDGPU_GPU_PAGE_SIZE; - if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid, - node_id, addr, ts, write_fault)) { + node_id, addr >> PAGE_SHIFT, ts, write_fault)) { amdgpu_bo_unref(&root); return true; } + addr /= AMDGPU_GPU_PAGE_SIZE; + r = amdgpu_bo_reserve(root, true); if (r) goto error_unref; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 09dabb3b3297..462a32abf720 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -3170,11 +3170,11 @@ static int kfd_ioctl_create_process(struct file *filep, struct kfd_process *p, v struct kfd_process *process; int ret; - /* Each FD owns only one kfd_process */ - if (p->context_id != KFD_CONTEXT_ID_PRIMARY) + if (!filep->private_data || !p) return -EINVAL; - if (!filep->private_data || !p) + /* Each FD owns only one kfd_process */ + if (p->context_id != KFD_CONTEXT_ID_PRIMARY) return -EINVAL; mutex_lock(&kfd_processes_mutex); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 085cc98bd875..2328c1aa0ead 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3909,8 +3909,9 @@ void amdgpu_dm_update_connector_after_detect( aconnector->dc_sink = sink; dc_sink_retain(aconnector->dc_sink); + drm_edid_free(aconnector->drm_edid); + aconnector->drm_edid = NULL; if (sink->dc_edid.length == 0) { - aconnector->drm_edid = NULL; hdmi_cec_unset_edid(aconnector); if (aconnector->dc_link->aux_mode) { drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux); @@ -5422,7 +5423,7 @@ static void setup_backlight_device(struct amdgpu_display_manager *dm, caps = &dm->backlight_caps[aconnector->bl_idx]; /* Only offer ABM property when non-OLED and user didn't turn off by module parameter */ - if (!caps->ext_caps->bits.oled && amdgpu_dm_abm_level < 0) + if (caps->ext_caps && !caps->ext_caps->bits.oled && amdgpu_dm_abm_level < 0) drm_object_attach_property(&aconnector->base.base, dm->adev->mode_info.abm_level_property, ABM_SYSFS_CONTROL); @@ -12524,6 +12525,11 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, if (dc_resource_is_dsc_encoding_supported(dc)) { for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); + dm_new_crtc_state->mode_changed_independent_from_dsc = new_crtc_state->mode_changed; + } + + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { if (drm_atomic_crtc_needs_modeset(new_crtc_state)) { ret = add_affected_mst_dsc_crtcs(state, crtc); if (ret) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 800813671748..d15812d51d72 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -984,6 +984,7 @@ struct dm_crtc_state { bool freesync_vrr_info_changed; + bool mode_changed_independent_from_dsc; bool dsc_force_changed; bool vrr_supported; struct mod_freesync_config freesync_config; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 7be50e8c0636..5d8c4c7020b1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1744,9 +1744,11 @@ int pre_validate_dsc(struct drm_atomic_state *state, int ind = find_crtc_index_in_state_by_stream(state, stream); if (ind >= 0) { + struct dm_crtc_state *dm_new_crtc_state = to_dm_crtc_state(state->crtcs[ind].new_state); + DRM_INFO_ONCE("%s:%d MST_DSC no mode changed for stream 0x%p\n", __func__, __LINE__, stream); - state->crtcs[ind].new_state->mode_changed = 0; + dm_new_crtc_state->base.mode_changed = dm_new_crtc_state->mode_changed_independent_from_dsc; } } } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c index 92c123aca0c9..fdcf8db6be50 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c @@ -650,9 +650,6 @@ static struct link_encoder *dce100_link_encoder_create( return &enc110->base; } - if (enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) - return NULL; - link_regs_id = map_transmitter_id_to_phy_instance(enc_init_data->transmitter); @@ -661,7 +658,8 @@ static struct link_encoder *dce100_link_encoder_create( &link_enc_feature, &link_enc_regs[link_regs_id], &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); + enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? + NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c index 95852d277c22..ab71f645c90e 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c @@ -671,7 +671,7 @@ static struct link_encoder *dce110_link_encoder_create( kzalloc_obj(struct dce110_link_encoder); int link_regs_id; - if (!enc110 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) + if (!enc110) return NULL; link_regs_id = @@ -682,7 +682,8 @@ static struct link_encoder *dce110_link_encoder_create( &link_enc_feature, &link_enc_regs[link_regs_id], &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); + enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? + NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c index 58c6a00397cf..b7051bfd4326 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c @@ -632,7 +632,7 @@ static struct link_encoder *dce112_link_encoder_create( kzalloc_obj(struct dce110_link_encoder); int link_regs_id; - if (!enc110 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) + if (!enc110) return NULL; link_regs_id = @@ -643,7 +643,8 @@ static struct link_encoder *dce112_link_encoder_create( &link_enc_feature, &link_enc_regs[link_regs_id], &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); + enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? + NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c index 71d76b021375..7ee70f7b3aa7 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c @@ -716,7 +716,7 @@ static struct link_encoder *dce120_link_encoder_create( kzalloc_obj(struct dce110_link_encoder); int link_regs_id; - if (!enc110 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) + if (!enc110) return NULL; link_regs_id = @@ -727,7 +727,8 @@ static struct link_encoder *dce120_link_encoder_create( &link_enc_feature, &link_enc_regs[link_regs_id], &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); + enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? + NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c index c27645708286..3d52973dd7f2 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c @@ -746,18 +746,16 @@ static struct link_encoder *dce60_link_encoder_create( return &enc110->base; } - if (enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) - return NULL; - link_regs_id = map_transmitter_id_to_phy_instance(enc_init_data->transmitter); dce60_link_encoder_construct(enc110, - enc_init_data, - &link_enc_feature, - &link_enc_regs[link_regs_id], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); + enc_init_data, + &link_enc_feature, + &link_enc_regs[link_regs_id], + &link_enc_aux_regs[enc_init_data->channel - 1], + enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? + NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c index d66d8ac6d897..89927727a0d9 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c @@ -752,9 +752,6 @@ static struct link_encoder *dce80_link_encoder_create( return &enc110->base; } - if (enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) - return NULL; - link_regs_id = map_transmitter_id_to_phy_instance(enc_init_data->transmitter); @@ -763,7 +760,8 @@ static struct link_encoder *dce80_link_encoder_create( &link_enc_feature, &link_enc_regs[link_regs_id], &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); + enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? + NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index a8d63d4d1f6e..554f616328c3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -59,6 +59,10 @@ #define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c)) +static void smu_v13_0_0_get_od_setting_limits(struct smu_context *smu, + int od_feature_bit, + int32_t *min, int32_t *max); + static const struct smu_feature_bits smu_v13_0_0_dpm_features = { .bits = { SMU_FEATURE_BIT_INIT(FEATURE_DPM_GFXCLK_BIT), @@ -1043,8 +1047,35 @@ static bool smu_v13_0_0_is_od_feature_supported(struct smu_context *smu, PPTable_t *pptable = smu->smu_table.driver_pptable; const OverDriveLimits_t * const overdrive_upperlimits = &pptable->SkuTable.OverDriveLimitsBasicMax; + int32_t min_value, max_value; + bool feature_enabled; - return overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit); + switch (od_feature_bit) { + case PP_OD_FEATURE_FAN_CURVE_BIT: + feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit)); + if (feature_enabled) { + smu_v13_0_0_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_TEMP, + &min_value, &max_value); + if (!min_value && !max_value) { + feature_enabled = false; + goto out; + } + + smu_v13_0_0_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_PWM, + &min_value, &max_value); + if (!min_value && !max_value) { + feature_enabled = false; + goto out; + } + } + break; + default: + feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit)); + break; + } + +out: + return feature_enabled; } static void smu_v13_0_0_get_od_setting_limits(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 896b51c8a9a7..870bcc86fd79 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -1391,7 +1391,7 @@ static int smu_v13_0_6_emit_clk_levels(struct smu_context *smu, break; case SMU_OD_MCLK: if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(SET_UCLK_MAX))) - return 0; + return -EOPNOTSUPP; size += sysfs_emit_at(buf, size, "%s:\n", "OD_MCLK"); size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMhz\n", @@ -2122,6 +2122,7 @@ static int smu_v13_0_6_usr_edit_dpm_table(struct smu_context *smu, { struct smu_dpm_context *smu_dpm = &(smu->smu_dpm); struct smu_13_0_dpm_context *dpm_context = smu_dpm->dpm_context; + struct smu_dpm_table *uclk_table = &dpm_context->dpm_tables.uclk_table; struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; uint32_t min_clk; uint32_t max_clk; @@ -2221,14 +2222,16 @@ static int smu_v13_0_6_usr_edit_dpm_table(struct smu_context *smu, if (ret) return ret; - min_clk = SMU_DPM_TABLE_MIN( - &dpm_context->dpm_tables.uclk_table); - max_clk = SMU_DPM_TABLE_MAX( - &dpm_context->dpm_tables.uclk_table); - ret = smu_v13_0_6_set_soft_freq_limited_range( - smu, SMU_UCLK, min_clk, max_clk, false); - if (ret) - return ret; + if (SMU_DPM_TABLE_MAX(uclk_table) != + pstate_table->uclk_pstate.curr.max) { + min_clk = SMU_DPM_TABLE_MIN(&dpm_context->dpm_tables.uclk_table); + max_clk = SMU_DPM_TABLE_MAX(&dpm_context->dpm_tables.uclk_table); + ret = smu_v13_0_6_set_soft_freq_limited_range(smu, + SMU_UCLK, min_clk, + max_clk, false); + if (ret) + return ret; + } smu_v13_0_reset_custom_level(smu); } break; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 5500a0f12f0e..f331e87858c9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -59,6 +59,10 @@ #define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c)) +static void smu_v13_0_7_get_od_setting_limits(struct smu_context *smu, + int od_feature_bit, + int32_t *min, int32_t *max); + static const struct smu_feature_bits smu_v13_0_7_dpm_features = { .bits = { SMU_FEATURE_BIT_INIT(FEATURE_DPM_GFXCLK_BIT), @@ -1053,8 +1057,35 @@ static bool smu_v13_0_7_is_od_feature_supported(struct smu_context *smu, PPTable_t *pptable = smu->smu_table.driver_pptable; const OverDriveLimits_t * const overdrive_upperlimits = &pptable->SkuTable.OverDriveLimitsBasicMax; + int32_t min_value, max_value; + bool feature_enabled; - return overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit); + switch (od_feature_bit) { + case PP_OD_FEATURE_FAN_CURVE_BIT: + feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit)); + if (feature_enabled) { + smu_v13_0_7_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_TEMP, + &min_value, &max_value); + if (!min_value && !max_value) { + feature_enabled = false; + goto out; + } + + smu_v13_0_7_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_PWM, + &min_value, &max_value); + if (!min_value && !max_value) { + feature_enabled = false; + goto out; + } + } + break; + default: + feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit)); + break; + } + +out: + return feature_enabled; } static void smu_v13_0_7_get_od_setting_limits(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 73762d9b5969..c3ebfac062a7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -56,6 +56,10 @@ #define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c)) +static void smu_v14_0_2_get_od_setting_limits(struct smu_context *smu, + int od_feature_bit, + int32_t *min, int32_t *max); + static const struct smu_feature_bits smu_v14_0_2_dpm_features = { .bits = { SMU_FEATURE_BIT_INIT(FEATURE_DPM_GFXCLK_BIT), SMU_FEATURE_BIT_INIT(FEATURE_DPM_UCLK_BIT), @@ -922,8 +926,35 @@ static bool smu_v14_0_2_is_od_feature_supported(struct smu_context *smu, PPTable_t *pptable = smu->smu_table.driver_pptable; const OverDriveLimits_t * const overdrive_upperlimits = &pptable->SkuTable.OverDriveLimitsBasicMax; + int32_t min_value, max_value; + bool feature_enabled; - return overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit); + switch (od_feature_bit) { + case PP_OD_FEATURE_FAN_CURVE_BIT: + feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit)); + if (feature_enabled) { + smu_v14_0_2_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_TEMP, + &min_value, &max_value); + if (!min_value && !max_value) { + feature_enabled = false; + goto out; + } + + smu_v14_0_2_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_PWM, + &min_value, &max_value); + if (!min_value && !max_value) { + feature_enabled = false; + goto out; + } + } + break; + default: + feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit)); + break; + } + +out: + return feature_enabled; } static void smu_v14_0_2_get_od_setting_limits(struct smu_context *smu, diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 7b5a49935ae4..c549293b5bb6 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -550,27 +550,27 @@ int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev, } EXPORT_SYMBOL_GPL(drm_gem_shmem_dumb_create); -static bool drm_gem_shmem_try_map_pmd(struct vm_fault *vmf, unsigned long addr, - struct page *page) +static vm_fault_t try_insert_pfn(struct vm_fault *vmf, unsigned int order, + unsigned long pfn) { + if (!order) { + return vmf_insert_pfn(vmf->vma, vmf->address, pfn); #ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP - unsigned long pfn = page_to_pfn(page); - unsigned long paddr = pfn << PAGE_SHIFT; - bool aligned = (addr & ~PMD_MASK) == (paddr & ~PMD_MASK); - - if (aligned && - pmd_none(*vmf->pmd) && - folio_test_pmd_mappable(page_folio(page))) { - pfn &= PMD_MASK >> PAGE_SHIFT; - if (vmf_insert_pfn_pmd(vmf, pfn, false) == VM_FAULT_NOPAGE) - return true; - } + } else if (order == PMD_ORDER) { + unsigned long paddr = pfn << PAGE_SHIFT; + bool aligned = (vmf->address & ~PMD_MASK) == (paddr & ~PMD_MASK); + + if (aligned && + folio_test_pmd_mappable(page_folio(pfn_to_page(pfn)))) { + pfn &= PMD_MASK >> PAGE_SHIFT; + return vmf_insert_pfn_pmd(vmf, pfn, false); + } #endif - - return false; + } + return VM_FAULT_FALLBACK; } -static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) +static vm_fault_t drm_gem_shmem_any_fault(struct vm_fault *vmf, unsigned int order) { struct vm_area_struct *vma = vmf->vma; struct drm_gem_object *obj = vma->vm_private_data; @@ -581,6 +581,9 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) pgoff_t page_offset; unsigned long pfn; + if (order && order != PMD_ORDER) + return VM_FAULT_FALLBACK; + /* Offset to faulty address in the VMA. */ page_offset = vmf->pgoff - vma->vm_pgoff; @@ -593,13 +596,8 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) goto out; } - if (drm_gem_shmem_try_map_pmd(vmf, vmf->address, pages[page_offset])) { - ret = VM_FAULT_NOPAGE; - goto out; - } - pfn = page_to_pfn(pages[page_offset]); - ret = vmf_insert_pfn(vma, vmf->address, pfn); + ret = try_insert_pfn(vmf, order, pfn); out: dma_resv_unlock(shmem->base.resv); @@ -607,6 +605,11 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) return ret; } +static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) +{ + return drm_gem_shmem_any_fault(vmf, 0); +} + static void drm_gem_shmem_vm_open(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; @@ -643,6 +646,9 @@ static void drm_gem_shmem_vm_close(struct vm_area_struct *vma) const struct vm_operations_struct drm_gem_shmem_vm_ops = { .fault = drm_gem_shmem_fault, +#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP + .huge_fault = drm_gem_shmem_any_fault, +#endif .open = drm_gem_shmem_vm_open, .close = drm_gem_shmem_vm_close, }; diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 250734dee928..8d9fd1917c6e 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -602,7 +602,7 @@ int drm_syncobj_get_handle(struct drm_file *file_private, drm_syncobj_get(syncobj); ret = xa_alloc(&file_private->syncobj_xa, handle, syncobj, xa_limit_32b, - GFP_NOWAIT); + GFP_KERNEL); if (ret) drm_syncobj_put(syncobj); @@ -716,7 +716,7 @@ static int drm_syncobj_fd_to_handle(struct drm_file *file_private, drm_syncobj_get(syncobj); ret = xa_alloc(&file_private->syncobj_xa, handle, syncobj, xa_limit_32b, - GFP_NOWAIT); + GFP_KERNEL); if (ret) drm_syncobj_put(syncobj); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index c4246481fc2f..0f82bf771a92 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -4602,6 +4602,7 @@ intel_crtc_prepare_cleared_state(struct intel_atomic_state *state, struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); struct intel_crtc_state *saved_state; + int err; saved_state = intel_crtc_state_alloc(crtc); if (!saved_state) @@ -4610,7 +4611,12 @@ intel_crtc_prepare_cleared_state(struct intel_atomic_state *state, /* free the old crtc_state->hw members */ intel_crtc_free_hw_state(crtc_state); - intel_dp_tunnel_atomic_clear_stream_bw(state, crtc_state); + err = intel_dp_tunnel_atomic_clear_stream_bw(state, crtc_state); + if (err) { + kfree(saved_state); + + return err; + } /* FIXME: before the switch to atomic started, a new pipe_config was * kzalloc'd. Code that depends on any field being zero should be diff --git a/drivers/gpu/drm/i915/display/intel_dp_tunnel.c b/drivers/gpu/drm/i915/display/intel_dp_tunnel.c index 83865c02d477..55b423fd6b6f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_tunnel.c +++ b/drivers/gpu/drm/i915/display/intel_dp_tunnel.c @@ -621,19 +621,27 @@ int intel_dp_tunnel_atomic_compute_stream_bw(struct intel_atomic_state *state, * * Clear any DP tunnel stream BW requirement set by * intel_dp_tunnel_atomic_compute_stream_bw(). + * + * Returns 0 in case of success, a negative error code otherwise. */ -void intel_dp_tunnel_atomic_clear_stream_bw(struct intel_atomic_state *state, - struct intel_crtc_state *crtc_state) +int intel_dp_tunnel_atomic_clear_stream_bw(struct intel_atomic_state *state, + struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + int err; if (!crtc_state->dp_tunnel_ref.tunnel) - return; + return 0; + + err = drm_dp_tunnel_atomic_set_stream_bw(&state->base, + crtc_state->dp_tunnel_ref.tunnel, + crtc->pipe, 0); + if (err) + return err; - drm_dp_tunnel_atomic_set_stream_bw(&state->base, - crtc_state->dp_tunnel_ref.tunnel, - crtc->pipe, 0); drm_dp_tunnel_ref_put(&crtc_state->dp_tunnel_ref); + + return 0; } /** diff --git a/drivers/gpu/drm/i915/display/intel_dp_tunnel.h b/drivers/gpu/drm/i915/display/intel_dp_tunnel.h index 7f0f720e8dca..10ab9eebcef6 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_tunnel.h +++ b/drivers/gpu/drm/i915/display/intel_dp_tunnel.h @@ -40,8 +40,8 @@ int intel_dp_tunnel_atomic_compute_stream_bw(struct intel_atomic_state *state, struct intel_dp *intel_dp, const struct intel_connector *connector, struct intel_crtc_state *crtc_state); -void intel_dp_tunnel_atomic_clear_stream_bw(struct intel_atomic_state *state, - struct intel_crtc_state *crtc_state); +int intel_dp_tunnel_atomic_clear_stream_bw(struct intel_atomic_state *state, + struct intel_crtc_state *crtc_state); int intel_dp_tunnel_atomic_add_state_for_crtc(struct intel_atomic_state *state, struct intel_crtc *crtc); @@ -88,9 +88,12 @@ intel_dp_tunnel_atomic_compute_stream_bw(struct intel_atomic_state *state, return 0; } -static inline void +static inline int intel_dp_tunnel_atomic_clear_stream_bw(struct intel_atomic_state *state, - struct intel_crtc_state *crtc_state) {} + struct intel_crtc_state *crtc_state) +{ + return 0; +} static inline int intel_dp_tunnel_atomic_add_state_for_crtc(struct intel_atomic_state *state, diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index a7bce0c6a17e..264e6843bff1 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -496,8 +496,10 @@ gmbus_xfer_read_chunk(struct intel_display *display, val = intel_de_read_fw(display, GMBUS3(display)); do { - if (extra_byte_added && len == 1) + if (extra_byte_added && len == 1) { + len--; break; + } *buf++ = val & 0xff; val >>= 8; diff --git a/drivers/gpu/drm/i915/display/intel_plane.c b/drivers/gpu/drm/i915/display/intel_plane.c index e06a0618b4c6..076b9b356481 100644 --- a/drivers/gpu/drm/i915/display/intel_plane.c +++ b/drivers/gpu/drm/i915/display/intel_plane.c @@ -436,11 +436,16 @@ void intel_plane_copy_hw_state(struct intel_plane_state *plane_state, drm_framebuffer_get(plane_state->hw.fb); } +static void unlink_nv12_plane(struct intel_crtc_state *crtc_state, + struct intel_plane_state *plane_state); + void intel_plane_set_invisible(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + unlink_nv12_plane(crtc_state, plane_state); + crtc_state->active_planes &= ~BIT(plane->id); crtc_state->scaled_planes &= ~BIT(plane->id); crtc_state->nv12_planes &= ~BIT(plane->id); @@ -1513,6 +1518,9 @@ static void unlink_nv12_plane(struct intel_crtc_state *crtc_state, struct intel_display *display = to_intel_display(plane_state); struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + if (!plane_state->planar_linked_plane) + return; + plane_state->planar_linked_plane = NULL; if (!plane_state->is_y_plane) @@ -1550,8 +1558,7 @@ static int icl_check_nv12_planes(struct intel_atomic_state *state, if (plane->pipe != crtc->pipe) continue; - if (plane_state->planar_linked_plane) - unlink_nv12_plane(crtc_state, plane_state); + unlink_nv12_plane(crtc_state, plane_state); } if (!crtc_state->nv12_planes) diff --git a/drivers/gpu/drm/i915/i915_wait_util.h b/drivers/gpu/drm/i915/i915_wait_util.h index 7376898e3bf8..e1ed7921ec70 100644 --- a/drivers/gpu/drm/i915/i915_wait_util.h +++ b/drivers/gpu/drm/i915/i915_wait_util.h @@ -25,9 +25,9 @@ might_sleep(); \ for (;;) { \ const bool expired__ = ktime_after(ktime_get_raw(), end__); \ - OP; \ /* Guarantee COND check prior to timeout */ \ barrier(); \ + OP; \ if (COND) { \ ret__ = 0; \ break; \ diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index 17c67f02016b..aaf6c9ebd319 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -1236,6 +1236,11 @@ static int mtk_dsi_probe(struct platform_device *pdev) dsi->host.ops = &mtk_dsi_ops; dsi->host.dev = dev; + + init_waitqueue_head(&dsi->irq_wait_queue); + + platform_set_drvdata(pdev, dsi); + ret = mipi_dsi_host_register(&dsi->host); if (ret < 0) return dev_err_probe(dev, ret, "Failed to register DSI host\n"); @@ -1247,10 +1252,6 @@ static int mtk_dsi_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, ret, "Failed to request DSI irq\n"); } - init_waitqueue_head(&dsi->irq_wait_queue); - - platform_set_drvdata(pdev, dsi); - dsi->bridge.of_node = dev->of_node; dsi->bridge.type = DRM_MODE_CONNECTOR_DSI; diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 24fc64fc832e..9d66f168ab8a 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -553,6 +553,7 @@ #define ENABLE_SMP_LD_RENDER_SURFACE_CONTROL REG_BIT(44 - 32) #define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32) #define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32) +#define L3_128B_256B_WRT_DIS REG_BIT(40 - 32) #define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32) #define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 2d9ce2c4cb4f..713a303c9053 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1442,9 +1442,9 @@ static int op_check_svm_userptr(struct xe_vm *vm, struct xe_vma_op *op, err = vma_check_userptr(vm, op->map.vma, pt_update); break; case DRM_GPUVA_OP_REMAP: - if (op->remap.prev) + if (op->remap.prev && !op->remap.skip_prev) err = vma_check_userptr(vm, op->remap.prev, pt_update); - if (!err && op->remap.next) + if (!err && op->remap.next && !op->remap.skip_next) err = vma_check_userptr(vm, op->remap.next, pt_update); break; case DRM_GPUVA_OP_UNMAP: @@ -2198,12 +2198,12 @@ static int op_prepare(struct xe_vm *vm, err = unbind_op_prepare(tile, pt_update_ops, old); - if (!err && op->remap.prev) { + if (!err && op->remap.prev && !op->remap.skip_prev) { err = bind_op_prepare(vm, tile, pt_update_ops, op->remap.prev, false); pt_update_ops->wait_vm_bookkeep = true; } - if (!err && op->remap.next) { + if (!err && op->remap.next && !op->remap.skip_next) { err = bind_op_prepare(vm, tile, pt_update_ops, op->remap.next, false); pt_update_ops->wait_vm_bookkeep = true; @@ -2428,10 +2428,10 @@ static void op_commit(struct xe_vm *vm, unbind_op_commit(vm, tile, pt_update_ops, old, fence, fence2); - if (op->remap.prev) + if (op->remap.prev && !op->remap.skip_prev) bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, fence, fence2, false); - if (op->remap.next) + if (op->remap.next && !op->remap.skip_next) bind_op_commit(vm, tile, pt_update_ops, op->remap.next, fence, fence2, false); break; diff --git a/drivers/gpu/drm/xe/xe_sriov_packet.c b/drivers/gpu/drm/xe/xe_sriov_packet.c index 968f32496282..2ae9eff2a7c0 100644 --- a/drivers/gpu/drm/xe/xe_sriov_packet.c +++ b/drivers/gpu/drm/xe/xe_sriov_packet.c @@ -341,6 +341,8 @@ ssize_t xe_sriov_packet_write_single(struct xe_device *xe, unsigned int vfid, ret = xe_sriov_pf_migration_restore_produce(xe, vfid, *data); if (ret) { xe_sriov_packet_free(*data); + *data = NULL; + return ret; } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index a82e3a4fb389..ffdbab106a58 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2554,7 +2554,6 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) if (!err && op->remap.skip_prev) { op->remap.prev->tile_present = tile_present; - op->remap.prev = NULL; } } if (op->remap.next) { @@ -2564,11 +2563,13 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) if (!err && op->remap.skip_next) { op->remap.next->tile_present = tile_present; - op->remap.next = NULL; } } - /* Adjust for partial unbind after removing VMA from VM */ + /* + * Adjust for partial unbind after removing VMA from VM. In case + * of unwind we might need to undo this later. + */ if (!err) { op->base.remap.unmap->va->va.addr = op->remap.start; op->base.remap.unmap->va->va.range = op->remap.range; @@ -2687,6 +2688,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, op->remap.start = xe_vma_start(old); op->remap.range = xe_vma_size(old); + op->remap.old_start = op->remap.start; + op->remap.old_range = op->remap.range; flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK; if (op->base.remap.prev) { @@ -2835,8 +2838,19 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, xe_svm_notifier_lock(vm); vma->gpuva.flags &= ~XE_VMA_DESTROYED; xe_svm_notifier_unlock(vm); - if (post_commit) + if (post_commit) { + /* + * Restore the old va range, in case of the + * prev/next skip optimisation. Otherwise what + * we re-insert here could be smaller than the + * original range. + */ + op->base.remap.unmap->va->va.addr = + op->remap.old_start; + op->base.remap.unmap->va->va.range = + op->remap.old_range; xe_vm_insert_vma(vm, vma); + } } break; } diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 437f64202f3b..e2946e311d7a 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -373,6 +373,10 @@ struct xe_vma_op_remap { u64 start; /** @range: range of the VMA unmap */ u64 range; + /** @old_start: Original start of the VMA we unmap */ + u64 old_start; + /** @old_range: Original range of the VMA we unmap */ + u64 old_range; /** @skip_prev: skip prev rebind */ bool skip_prev; /** @skip_next: skip next rebind */ diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 462c2fa712e0..d7e309ad9aba 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -247,7 +247,8 @@ static const struct xe_rtp_entry_sr gt_was[] = { LSN_DIM_Z_WGT_MASK, LSN_LNI_WGT(1) | LSN_LNE_WGT(1) | LSN_DIM_X_WGT(1) | LSN_DIM_Y_WGT(1) | - LSN_DIM_Z_WGT(1))) + LSN_DIM_Z_WGT(1)), + SET(LSC_CHICKEN_BIT_0_UDW, L3_128B_256B_WRT_DIS)) }, /* Xe2_HPM */ diff --git a/drivers/hwmon/adm1177.c b/drivers/hwmon/adm1177.c index 8b2c965480e3..7888afe8dafd 100644 --- a/drivers/hwmon/adm1177.c +++ b/drivers/hwmon/adm1177.c @@ -10,6 +10,8 @@ #include <linux/hwmon.h> #include <linux/i2c.h> #include <linux/init.h> +#include <linux/math64.h> +#include <linux/minmax.h> #include <linux/module.h> #include <linux/regulator/consumer.h> @@ -33,7 +35,7 @@ struct adm1177_state { struct i2c_client *client; u32 r_sense_uohm; - u32 alert_threshold_ua; + u64 alert_threshold_ua; bool vrange_high; }; @@ -48,7 +50,7 @@ static int adm1177_write_cmd(struct adm1177_state *st, u8 cmd) } static int adm1177_write_alert_thr(struct adm1177_state *st, - u32 alert_threshold_ua) + u64 alert_threshold_ua) { u64 val; int ret; @@ -91,8 +93,8 @@ static int adm1177_read(struct device *dev, enum hwmon_sensor_types type, *val = div_u64((105840000ull * dummy), 4096 * st->r_sense_uohm); return 0; - case hwmon_curr_max_alarm: - *val = st->alert_threshold_ua; + case hwmon_curr_max: + *val = div_u64(st->alert_threshold_ua, 1000); return 0; default: return -EOPNOTSUPP; @@ -126,9 +128,10 @@ static int adm1177_write(struct device *dev, enum hwmon_sensor_types type, switch (type) { case hwmon_curr: switch (attr) { - case hwmon_curr_max_alarm: - adm1177_write_alert_thr(st, val); - return 0; + case hwmon_curr_max: + val = clamp_val(val, 0, + div_u64(105840000ULL, st->r_sense_uohm)); + return adm1177_write_alert_thr(st, (u64)val * 1000); default: return -EOPNOTSUPP; } @@ -156,7 +159,7 @@ static umode_t adm1177_is_visible(const void *data, if (st->r_sense_uohm) return 0444; return 0; - case hwmon_curr_max_alarm: + case hwmon_curr_max: if (st->r_sense_uohm) return 0644; return 0; @@ -170,7 +173,7 @@ static umode_t adm1177_is_visible(const void *data, static const struct hwmon_channel_info * const adm1177_info[] = { HWMON_CHANNEL_INFO(curr, - HWMON_C_INPUT | HWMON_C_MAX_ALARM), + HWMON_C_INPUT | HWMON_C_MAX), HWMON_CHANNEL_INFO(in, HWMON_I_INPUT), NULL @@ -192,7 +195,8 @@ static int adm1177_probe(struct i2c_client *client) struct device *dev = &client->dev; struct device *hwmon_dev; struct adm1177_state *st; - u32 alert_threshold_ua; + u64 alert_threshold_ua; + u32 prop; int ret; st = devm_kzalloc(dev, sizeof(*st), GFP_KERNEL); @@ -208,22 +212,26 @@ static int adm1177_probe(struct i2c_client *client) if (device_property_read_u32(dev, "shunt-resistor-micro-ohms", &st->r_sense_uohm)) st->r_sense_uohm = 0; - if (device_property_read_u32(dev, "adi,shutdown-threshold-microamp", - &alert_threshold_ua)) { - if (st->r_sense_uohm) - /* - * set maximum default value from datasheet based on - * shunt-resistor - */ - alert_threshold_ua = div_u64(105840000000, - st->r_sense_uohm); - else - alert_threshold_ua = 0; + if (!device_property_read_u32(dev, "adi,shutdown-threshold-microamp", + &prop)) { + alert_threshold_ua = prop; + } else if (st->r_sense_uohm) { + /* + * set maximum default value from datasheet based on + * shunt-resistor + */ + alert_threshold_ua = div_u64(105840000000ULL, + st->r_sense_uohm); + } else { + alert_threshold_ua = 0; } st->vrange_high = device_property_read_bool(dev, "adi,vrange-high-enable"); - if (alert_threshold_ua && st->r_sense_uohm) - adm1177_write_alert_thr(st, alert_threshold_ua); + if (alert_threshold_ua && st->r_sense_uohm) { + ret = adm1177_write_alert_thr(st, alert_threshold_ua); + if (ret) + return ret; + } ret = adm1177_write_cmd(st, ADM1177_CMD_V_CONT | ADM1177_CMD_I_CONT | diff --git a/drivers/hwmon/peci/cputemp.c b/drivers/hwmon/peci/cputemp.c index b2fc936851e1..457089c561b4 100644 --- a/drivers/hwmon/peci/cputemp.c +++ b/drivers/hwmon/peci/cputemp.c @@ -131,7 +131,7 @@ static int get_temp_target(struct peci_cputemp *priv, enum peci_temp_target_type *val = priv->temp.target.tjmax; break; case crit_hyst_type: - *val = priv->temp.target.tjmax - priv->temp.target.tcontrol; + *val = priv->temp.target.tcontrol; break; default: ret = -EOPNOTSUPP; @@ -319,7 +319,7 @@ static umode_t cputemp_is_visible(const void *data, enum hwmon_sensor_types type { const struct peci_cputemp *priv = data; - if (channel > CPUTEMP_CHANNEL_NUMS) + if (channel >= CPUTEMP_CHANNEL_NUMS) return 0; if (channel < channel_core) diff --git a/drivers/hwmon/pmbus/ina233.c b/drivers/hwmon/pmbus/ina233.c index 2d8b5a5347ed..7aebd854763a 100644 --- a/drivers/hwmon/pmbus/ina233.c +++ b/drivers/hwmon/pmbus/ina233.c @@ -72,7 +72,8 @@ static int ina233_read_word_data(struct i2c_client *client, int page, /* Adjust returned value to match VIN coefficients */ /* VIN: 1.25 mV VSHUNT: 2.5 uV LSB */ - ret = DIV_ROUND_CLOSEST(ret * 25, 12500); + ret = clamp_val(DIV_ROUND_CLOSEST((s16)ret * 25, 12500), + S16_MIN, S16_MAX) & 0xffff; break; default: ret = -ENODATA; diff --git a/drivers/hwmon/pmbus/isl68137.c b/drivers/hwmon/pmbus/isl68137.c index e7dac26b5be6..3e3a887aad05 100644 --- a/drivers/hwmon/pmbus/isl68137.c +++ b/drivers/hwmon/pmbus/isl68137.c @@ -96,7 +96,15 @@ static ssize_t isl68137_avs_enable_show_page(struct i2c_client *client, int page, char *buf) { - int val = pmbus_read_byte_data(client, page, PMBUS_OPERATION); + int val; + + val = pmbus_lock_interruptible(client); + if (val) + return val; + + val = pmbus_read_byte_data(client, page, PMBUS_OPERATION); + + pmbus_unlock(client); if (val < 0) return val; @@ -118,6 +126,10 @@ static ssize_t isl68137_avs_enable_store_page(struct i2c_client *client, op_val = result ? ISL68137_VOUT_AVS : 0; + rc = pmbus_lock_interruptible(client); + if (rc) + return rc; + /* * Writes to VOUT setpoint over AVSBus will persist after the VRM is * switched to PMBus control. Switching back to AVSBus control @@ -129,17 +141,20 @@ static ssize_t isl68137_avs_enable_store_page(struct i2c_client *client, rc = pmbus_read_word_data(client, page, 0xff, PMBUS_VOUT_COMMAND); if (rc < 0) - return rc; + goto unlock; rc = pmbus_write_word_data(client, page, PMBUS_VOUT_COMMAND, rc); if (rc < 0) - return rc; + goto unlock; } rc = pmbus_update_byte_data(client, page, PMBUS_OPERATION, ISL68137_VOUT_AVS, op_val); +unlock: + pmbus_unlock(client); + return (rc < 0) ? rc : count; } diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index be6d05def115..572be3ebc03d 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -6,6 +6,7 @@ * Copyright (c) 2012 Guenter Roeck */ +#include <linux/atomic.h> #include <linux/debugfs.h> #include <linux/delay.h> #include <linux/dcache.h> @@ -21,8 +22,8 @@ #include <linux/pmbus.h> #include <linux/regulator/driver.h> #include <linux/regulator/machine.h> -#include <linux/of.h> #include <linux/thermal.h> +#include <linux/workqueue.h> #include "pmbus.h" /* @@ -112,6 +113,11 @@ struct pmbus_data { struct mutex update_lock; +#if IS_ENABLED(CONFIG_REGULATOR) + atomic_t regulator_events[PMBUS_PAGES]; + struct work_struct regulator_notify_work; +#endif + bool has_status_word; /* device uses STATUS_WORD register */ int (*read_status)(struct i2c_client *client, int page); @@ -1209,6 +1215,12 @@ static ssize_t pmbus_show_boolean(struct device *dev, return sysfs_emit(buf, "%d\n", val); } +static ssize_t pmbus_show_zero(struct device *dev, + struct device_attribute *devattr, char *buf) +{ + return sysfs_emit(buf, "0\n"); +} + static ssize_t pmbus_show_sensor(struct device *dev, struct device_attribute *devattr, char *buf) { @@ -1407,7 +1419,7 @@ static struct pmbus_sensor *pmbus_add_sensor(struct pmbus_data *data, int reg, enum pmbus_sensor_classes class, bool update, bool readonly, - bool convert) + bool writeonly, bool convert) { struct pmbus_sensor *sensor; struct device_attribute *a; @@ -1436,7 +1448,8 @@ static struct pmbus_sensor *pmbus_add_sensor(struct pmbus_data *data, sensor->data = -ENODATA; pmbus_dev_attr_init(a, sensor->name, readonly ? 0444 : 0644, - pmbus_show_sensor, pmbus_set_sensor); + writeonly ? pmbus_show_zero : pmbus_show_sensor, + pmbus_set_sensor); if (pmbus_add_attribute(data, &a->attr)) return NULL; @@ -1495,8 +1508,10 @@ static int pmbus_add_label(struct pmbus_data *data, struct pmbus_limit_attr { u16 reg; /* Limit register */ u16 sbit; /* Alarm attribute status bit */ - bool update; /* True if register needs updates */ - bool low; /* True if low limit; for limits with compare functions only */ + bool readonly:1; /* True if the attribute is read-only */ + bool writeonly:1; /* True if the attribute is write-only */ + bool update:1; /* True if register needs updates */ + bool low:1; /* True if low limit; for limits with compare functions only */ const char *attr; /* Attribute name */ const char *alarm; /* Alarm attribute name */ }; @@ -1511,9 +1526,9 @@ struct pmbus_sensor_attr { u8 nlimit; /* # of limit registers */ enum pmbus_sensor_classes class;/* sensor class */ const char *label; /* sensor label */ - bool paged; /* true if paged sensor */ - bool update; /* true if update needed */ - bool compare; /* true if compare function needed */ + bool paged:1; /* true if paged sensor */ + bool update:1; /* true if update needed */ + bool compare:1; /* true if compare function needed */ u32 func; /* sensor mask */ u32 sfunc; /* sensor status mask */ int sreg; /* status register */ @@ -1544,7 +1559,7 @@ static int pmbus_add_limit_attrs(struct i2c_client *client, curr = pmbus_add_sensor(data, name, l->attr, index, page, 0xff, l->reg, attr->class, attr->update || l->update, - false, true); + l->readonly, l->writeonly, true); if (!curr) return -ENOMEM; if (l->sbit && (info->func[page] & attr->sfunc)) { @@ -1584,7 +1599,7 @@ static int pmbus_add_sensor_attrs_one(struct i2c_client *client, return ret; } base = pmbus_add_sensor(data, name, "input", index, page, phase, - attr->reg, attr->class, true, true, true); + attr->reg, attr->class, true, true, false, true); if (!base) return -ENOMEM; /* No limit and alarm attributes for phase specific sensors */ @@ -1707,23 +1722,29 @@ static const struct pmbus_limit_attr vin_limit_attrs[] = { }, { .reg = PMBUS_VIRT_READ_VIN_AVG, .update = true, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_VIN_MIN, .update = true, + .readonly = true, .attr = "lowest", }, { .reg = PMBUS_VIRT_READ_VIN_MAX, .update = true, + .readonly = true, .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_VIN_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_VIN_MIN, + .readonly = true, .attr = "rated_min", }, { .reg = PMBUS_MFR_VIN_MAX, + .readonly = true, .attr = "rated_max", }, }; @@ -1776,23 +1797,29 @@ static const struct pmbus_limit_attr vout_limit_attrs[] = { }, { .reg = PMBUS_VIRT_READ_VOUT_AVG, .update = true, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_VOUT_MIN, .update = true, + .readonly = true, .attr = "lowest", }, { .reg = PMBUS_VIRT_READ_VOUT_MAX, .update = true, + .readonly = true, .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_VOUT_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_VOUT_MIN, + .readonly = true, .attr = "rated_min", }, { .reg = PMBUS_MFR_VOUT_MAX, + .readonly = true, .attr = "rated_max", }, }; @@ -1852,20 +1879,25 @@ static const struct pmbus_limit_attr iin_limit_attrs[] = { }, { .reg = PMBUS_VIRT_READ_IIN_AVG, .update = true, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_IIN_MIN, .update = true, + .readonly = true, .attr = "lowest", }, { .reg = PMBUS_VIRT_READ_IIN_MAX, .update = true, + .readonly = true, .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_IIN_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_IIN_MAX, + .readonly = true, .attr = "rated_max", }, }; @@ -1889,20 +1921,25 @@ static const struct pmbus_limit_attr iout_limit_attrs[] = { }, { .reg = PMBUS_VIRT_READ_IOUT_AVG, .update = true, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_IOUT_MIN, .update = true, + .readonly = true, .attr = "lowest", }, { .reg = PMBUS_VIRT_READ_IOUT_MAX, .update = true, + .readonly = true, .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_IOUT_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_IOUT_MAX, + .readonly = true, .attr = "rated_max", }, }; @@ -1943,20 +1980,25 @@ static const struct pmbus_limit_attr pin_limit_attrs[] = { }, { .reg = PMBUS_VIRT_READ_PIN_AVG, .update = true, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_PIN_MIN, .update = true, + .readonly = true, .attr = "input_lowest", }, { .reg = PMBUS_VIRT_READ_PIN_MAX, .update = true, + .readonly = true, .attr = "input_highest", }, { .reg = PMBUS_VIRT_RESET_PIN_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_PIN_MAX, + .readonly = true, .attr = "rated_max", }, }; @@ -1980,20 +2022,25 @@ static const struct pmbus_limit_attr pout_limit_attrs[] = { }, { .reg = PMBUS_VIRT_READ_POUT_AVG, .update = true, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_POUT_MIN, .update = true, + .readonly = true, .attr = "input_lowest", }, { .reg = PMBUS_VIRT_READ_POUT_MAX, .update = true, + .readonly = true, .attr = "input_highest", }, { .reg = PMBUS_VIRT_RESET_POUT_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_POUT_MAX, + .readonly = true, .attr = "rated_max", }, }; @@ -2049,18 +2096,23 @@ static const struct pmbus_limit_attr temp_limit_attrs[] = { .sbit = PB_TEMP_OT_FAULT, }, { .reg = PMBUS_VIRT_READ_TEMP_MIN, + .readonly = true, .attr = "lowest", }, { .reg = PMBUS_VIRT_READ_TEMP_AVG, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_TEMP_MAX, + .readonly = true, .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_TEMP_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_MAX_TEMP_1, + .readonly = true, .attr = "rated_max", }, }; @@ -2090,18 +2142,23 @@ static const struct pmbus_limit_attr temp_limit_attrs2[] = { .sbit = PB_TEMP_OT_FAULT, }, { .reg = PMBUS_VIRT_READ_TEMP2_MIN, + .readonly = true, .attr = "lowest", }, { .reg = PMBUS_VIRT_READ_TEMP2_AVG, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_TEMP2_MAX, + .readonly = true, .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_TEMP2_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_MAX_TEMP_2, + .readonly = true, .attr = "rated_max", }, }; @@ -2131,6 +2188,7 @@ static const struct pmbus_limit_attr temp_limit_attrs3[] = { .sbit = PB_TEMP_OT_FAULT, }, { .reg = PMBUS_MFR_MAX_TEMP_3, + .readonly = true, .attr = "rated_max", }, }; @@ -2214,7 +2272,7 @@ static int pmbus_add_fan_ctrl(struct i2c_client *client, sensor = pmbus_add_sensor(data, "fan", "target", index, page, 0xff, PMBUS_VIRT_FAN_TARGET_1 + id, PSC_FAN, - false, false, true); + false, false, false, true); if (!sensor) return -ENOMEM; @@ -2225,14 +2283,14 @@ static int pmbus_add_fan_ctrl(struct i2c_client *client, sensor = pmbus_add_sensor(data, "pwm", NULL, index, page, 0xff, PMBUS_VIRT_PWM_1 + id, PSC_PWM, - false, false, true); + false, false, false, true); if (!sensor) return -ENOMEM; sensor = pmbus_add_sensor(data, "pwm", "enable", index, page, 0xff, PMBUS_VIRT_PWM_ENABLE_1 + id, PSC_PWM, - true, false, false); + true, false, false, false); if (!sensor) return -ENOMEM; @@ -2274,7 +2332,7 @@ static int pmbus_add_fan_attributes(struct i2c_client *client, if (pmbus_add_sensor(data, "fan", "input", index, page, 0xff, pmbus_fan_registers[f], - PSC_FAN, true, true, true) == NULL) + PSC_FAN, true, true, false, true) == NULL) return -ENOMEM; /* Fan control */ @@ -3176,12 +3234,19 @@ static int pmbus_regulator_get_voltage(struct regulator_dev *rdev) .class = PSC_VOLTAGE_OUT, .convert = true, }; + int ret; + mutex_lock(&data->update_lock); s.data = _pmbus_read_word_data(client, s.page, 0xff, PMBUS_READ_VOUT); - if (s.data < 0) - return s.data; + if (s.data < 0) { + ret = s.data; + goto unlock; + } - return (int)pmbus_reg2data(data, &s) * 1000; /* unit is uV */ + ret = (int)pmbus_reg2data(data, &s) * 1000; /* unit is uV */ +unlock: + mutex_unlock(&data->update_lock); + return ret; } static int pmbus_regulator_set_voltage(struct regulator_dev *rdev, int min_uv, @@ -3198,16 +3263,22 @@ static int pmbus_regulator_set_voltage(struct regulator_dev *rdev, int min_uv, }; int val = DIV_ROUND_CLOSEST(min_uv, 1000); /* convert to mV */ int low, high; + int ret; *selector = 0; + mutex_lock(&data->update_lock); low = pmbus_regulator_get_low_margin(client, s.page); - if (low < 0) - return low; + if (low < 0) { + ret = low; + goto unlock; + } high = pmbus_regulator_get_high_margin(client, s.page); - if (high < 0) - return high; + if (high < 0) { + ret = high; + goto unlock; + } /* Make sure we are within margins */ if (low > val) @@ -3217,7 +3288,10 @@ static int pmbus_regulator_set_voltage(struct regulator_dev *rdev, int min_uv, val = pmbus_data2reg(data, &s, val); - return _pmbus_write_word_data(client, s.page, PMBUS_VOUT_COMMAND, (u16)val); + ret = _pmbus_write_word_data(client, s.page, PMBUS_VOUT_COMMAND, (u16)val); +unlock: + mutex_unlock(&data->update_lock); + return ret; } static int pmbus_regulator_list_voltage(struct regulator_dev *rdev, @@ -3227,6 +3301,7 @@ static int pmbus_regulator_list_voltage(struct regulator_dev *rdev, struct i2c_client *client = to_i2c_client(dev->parent); struct pmbus_data *data = i2c_get_clientdata(client); int val, low, high; + int ret; if (data->flags & PMBUS_VOUT_PROTECTED) return 0; @@ -3239,18 +3314,29 @@ static int pmbus_regulator_list_voltage(struct regulator_dev *rdev, val = DIV_ROUND_CLOSEST(rdev->desc->min_uV + (rdev->desc->uV_step * selector), 1000); /* convert to mV */ + mutex_lock(&data->update_lock); + low = pmbus_regulator_get_low_margin(client, rdev_get_id(rdev)); - if (low < 0) - return low; + if (low < 0) { + ret = low; + goto unlock; + } high = pmbus_regulator_get_high_margin(client, rdev_get_id(rdev)); - if (high < 0) - return high; + if (high < 0) { + ret = high; + goto unlock; + } - if (val >= low && val <= high) - return val * 1000; /* unit is uV */ + if (val >= low && val <= high) { + ret = val * 1000; /* unit is uV */ + goto unlock; + } - return 0; + ret = 0; +unlock: + mutex_unlock(&data->update_lock); + return ret; } const struct regulator_ops pmbus_regulator_ops = { @@ -3281,12 +3367,42 @@ int pmbus_regulator_init_cb(struct regulator_dev *rdev, } EXPORT_SYMBOL_NS_GPL(pmbus_regulator_init_cb, "PMBUS"); +static void pmbus_regulator_notify_work_cancel(void *data) +{ + struct pmbus_data *pdata = data; + + cancel_work_sync(&pdata->regulator_notify_work); +} + +static void pmbus_regulator_notify_worker(struct work_struct *work) +{ + struct pmbus_data *data = + container_of(work, struct pmbus_data, regulator_notify_work); + int i, j; + + for (i = 0; i < data->info->pages; i++) { + int event; + + event = atomic_xchg(&data->regulator_events[i], 0); + if (!event) + continue; + + for (j = 0; j < data->info->num_regulators; j++) { + if (i == rdev_get_id(data->rdevs[j])) { + regulator_notifier_call_chain(data->rdevs[j], + event, NULL); + break; + } + } + } +} + static int pmbus_regulator_register(struct pmbus_data *data) { struct device *dev = data->dev; const struct pmbus_driver_info *info = data->info; const struct pmbus_platform_data *pdata = dev_get_platdata(dev); - int i; + int i, ret; data->rdevs = devm_kzalloc(dev, sizeof(struct regulator_dev *) * info->num_regulators, GFP_KERNEL); @@ -3310,19 +3426,19 @@ static int pmbus_regulator_register(struct pmbus_data *data) info->reg_desc[i].name); } + INIT_WORK(&data->regulator_notify_work, pmbus_regulator_notify_worker); + + ret = devm_add_action_or_reset(dev, pmbus_regulator_notify_work_cancel, data); + if (ret) + return ret; + return 0; } static void pmbus_regulator_notify(struct pmbus_data *data, int page, int event) { - int j; - - for (j = 0; j < data->info->num_regulators; j++) { - if (page == rdev_get_id(data->rdevs[j])) { - regulator_notifier_call_chain(data->rdevs[j], event, NULL); - break; - } - } + atomic_or(event, &data->regulator_events[page]); + schedule_work(&data->regulator_notify_work); } #else static int pmbus_regulator_register(struct pmbus_data *data) diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index fc45c384833f..4fafe393a48c 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -608,14 +608,29 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, if (rdma_rw_io_needs_mr(qp->device, port_num, dir, sg_cnt)) { ret = rdma_rw_init_mr_wrs(ctx, qp, port_num, sg, sg_cnt, sg_offset, remote_addr, rkey, dir); - } else if (sg_cnt > 1) { + /* + * If MR init succeeded or failed for a reason other + * than pool exhaustion, that result is final. + * + * Pool exhaustion (-EAGAIN) from the max_sgl_rd + * optimization is recoverable: fall back to + * direct SGE posting. iWARP and force_mr require + * MRs unconditionally, so -EAGAIN is terminal. + */ + if (ret != -EAGAIN || + rdma_protocol_iwarp(qp->device, port_num) || + unlikely(rdma_rw_force_mr)) + goto out; + } + + if (sg_cnt > 1) ret = rdma_rw_init_map_wrs(ctx, qp, sg, sg_cnt, sg_offset, remote_addr, rkey, dir); - } else { + else ret = rdma_rw_init_single_wr(ctx, qp, sg, sg_offset, remote_addr, rkey, dir); - } +out: if (ret < 0) goto out_unmap_sg; return ret; @@ -686,14 +701,16 @@ int rdma_rw_ctx_init_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp, return ret; /* - * IOVA mapping not available. Check if MR registration provides - * better performance than multiple SGE entries. + * IOVA not available; fall back to the map_wrs path, which maps + * each bvec as a direct SGE. This is always correct: the MR path + * is a throughput optimization, not a correctness requirement. + * (iWARP, which does require MRs, is handled by the check above.) + * + * The rdma_rw_io_needs_mr() gate is not used here because nr_bvec + * is a raw page count that overstates DMA entry demand -- the bvec + * caller has no post-DMA-coalescing segment count, and feeding the + * inflated count into the MR path exhausts the pool on RDMA READs. */ - if (rdma_rw_io_needs_mr(dev, port_num, dir, nr_bvec)) - return rdma_rw_init_mr_wrs_bvec(ctx, qp, port_num, bvecs, - nr_bvec, &iter, remote_addr, - rkey, dir); - return rdma_rw_init_map_wrs_bvec(ctx, qp, bvecs, nr_bvec, &iter, remote_addr, rkey, dir); } diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index cff4fcca2c34..edc34c69f0f2 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -55,7 +55,8 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d if (dirty) ib_dma_unmap_sgtable_attrs(dev, &umem->sgt_append.sgt, - DMA_BIDIRECTIONAL, 0); + DMA_BIDIRECTIONAL, + DMA_ATTR_REQUIRE_COHERENT); for_each_sgtable_sg(&umem->sgt_append.sgt, sg, i) { unpin_user_page_range_dirty_lock(sg_page(sg), @@ -169,7 +170,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, unsigned long lock_limit; unsigned long new_pinned; unsigned long cur_base; - unsigned long dma_attr = 0; + unsigned long dma_attr = DMA_ATTR_REQUIRE_COHERENT; struct mm_struct *mm; unsigned long npages; int pinned, ret; diff --git a/drivers/infiniband/hw/bng_re/bng_dev.c b/drivers/infiniband/hw/bng_re/bng_dev.c index d34b5f88cd40..71a7ca2196ad 100644 --- a/drivers/infiniband/hw/bng_re/bng_dev.c +++ b/drivers/infiniband/hw/bng_re/bng_dev.c @@ -210,7 +210,7 @@ static int bng_re_stats_ctx_alloc(struct bng_re_dev *rdev) return rc; } -static void bng_re_query_hwrm_version(struct bng_re_dev *rdev) +static int bng_re_query_hwrm_version(struct bng_re_dev *rdev) { struct bnge_auxr_dev *aux_dev = rdev->aux_dev; struct hwrm_ver_get_output ver_get_resp = {}; @@ -230,7 +230,7 @@ static void bng_re_query_hwrm_version(struct bng_re_dev *rdev) if (rc) { ibdev_err(&rdev->ibdev, "Failed to query HW version, rc = 0x%x", rc); - return; + return rc; } cctx = rdev->chip_ctx; @@ -244,6 +244,8 @@ static void bng_re_query_hwrm_version(struct bng_re_dev *rdev) if (!cctx->hwrm_cmd_max_timeout) cctx->hwrm_cmd_max_timeout = BNG_ROCE_FW_MAX_TIMEOUT; + + return 0; } static void bng_re_dev_uninit(struct bng_re_dev *rdev) @@ -306,13 +308,15 @@ static int bng_re_dev_init(struct bng_re_dev *rdev) goto msix_ctx_fail; } - bng_re_query_hwrm_version(rdev); + rc = bng_re_query_hwrm_version(rdev); + if (rc) + goto destroy_chip_ctx; rc = bng_re_alloc_fw_channel(&rdev->bng_res, &rdev->rcfw); if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate RCFW Channel: %#x\n", rc); - goto alloc_fw_chl_fail; + goto destroy_chip_ctx; } /* Allocate nq record memory */ @@ -391,7 +395,7 @@ free_rcfw: kfree(rdev->nqr); nq_alloc_fail: bng_re_free_rcfw_channel(&rdev->rcfw); -alloc_fw_chl_fail: +destroy_chip_ctx: bng_re_destroy_chip_ctx(rdev); msix_ctx_fail: bnge_unregister_dev(rdev->aux_dev); diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index 229b0ad3b0cb..e97b5f0d7003 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2026 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include <linux/log2.h> @@ -310,23 +310,19 @@ static inline struct efa_comp_ctx *efa_com_get_comp_ctx_by_cmd_id(struct efa_com return &aq->comp_ctx[ctx_id]; } -static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq, - struct efa_admin_aq_entry *cmd, - size_t cmd_size_in_bytes, - struct efa_admin_acq_entry *comp, - size_t comp_size_in_bytes) +static void __efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq, + struct efa_comp_ctx *comp_ctx, + struct efa_admin_aq_entry *cmd, + size_t cmd_size_in_bytes, + struct efa_admin_acq_entry *comp, + size_t comp_size_in_bytes) { struct efa_admin_aq_entry *aqe; - struct efa_comp_ctx *comp_ctx; u16 queue_size_mask; u16 cmd_id; u16 ctx_id; u16 pi; - comp_ctx = efa_com_alloc_comp_ctx(aq); - if (!comp_ctx) - return ERR_PTR(-EINVAL); - queue_size_mask = aq->depth - 1; pi = aq->sq.pc & queue_size_mask; ctx_id = efa_com_get_comp_ctx_id(aq, comp_ctx); @@ -360,8 +356,6 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu /* barrier not needed in case of writel */ writel(aq->sq.pc, aq->sq.db_addr); - - return comp_ctx; } static inline int efa_com_init_comp_ctxt(struct efa_com_admin_queue *aq) @@ -394,28 +388,25 @@ static inline int efa_com_init_comp_ctxt(struct efa_com_admin_queue *aq) return 0; } -static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq, - struct efa_admin_aq_entry *cmd, - size_t cmd_size_in_bytes, - struct efa_admin_acq_entry *comp, - size_t comp_size_in_bytes) +static int efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq, + struct efa_comp_ctx *comp_ctx, + struct efa_admin_aq_entry *cmd, + size_t cmd_size_in_bytes, + struct efa_admin_acq_entry *comp, + size_t comp_size_in_bytes) { - struct efa_comp_ctx *comp_ctx; - spin_lock(&aq->sq.lock); if (!test_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state)) { ibdev_err_ratelimited(aq->efa_dev, "Admin queue is closed\n"); spin_unlock(&aq->sq.lock); - return ERR_PTR(-ENODEV); + return -ENODEV; } - comp_ctx = __efa_com_submit_admin_cmd(aq, cmd, cmd_size_in_bytes, comp, - comp_size_in_bytes); + __efa_com_submit_admin_cmd(aq, comp_ctx, cmd, cmd_size_in_bytes, comp, + comp_size_in_bytes); spin_unlock(&aq->sq.lock); - if (IS_ERR(comp_ctx)) - clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); - return comp_ctx; + return 0; } static int efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq, @@ -512,7 +503,6 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c { unsigned long timeout; unsigned long flags; - int err; timeout = jiffies + usecs_to_jiffies(aq->completion_timeout); @@ -532,24 +522,20 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c atomic64_inc(&aq->stats.no_completion); clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); - err = -ETIME; - goto out; + return -ETIME; } msleep(aq->poll_interval); } - err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status); -out: - efa_com_dealloc_comp_ctx(aq, comp_ctx); - return err; + return efa_com_comp_status_to_errno( + comp_ctx->user_cqe->acq_common_descriptor.status); } static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *comp_ctx, struct efa_com_admin_queue *aq) { unsigned long flags; - int err; wait_for_completion_timeout(&comp_ctx->wait_event, usecs_to_jiffies(aq->completion_timeout)); @@ -585,14 +571,11 @@ static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *com aq->cq.cc); clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); - err = -ETIME; - goto out; + return -ETIME; } - err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status); -out: - efa_com_dealloc_comp_ctx(aq, comp_ctx); - return err; + return efa_com_comp_status_to_errno( + comp_ctx->user_cqe->acq_common_descriptor.status); } /* @@ -642,30 +625,39 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq, ibdev_dbg(aq->efa_dev, "%s (opcode %d)\n", efa_com_cmd_str(cmd->aq_common_descriptor.opcode), cmd->aq_common_descriptor.opcode); - comp_ctx = efa_com_submit_admin_cmd(aq, cmd, cmd_size, comp, comp_size); - if (IS_ERR(comp_ctx)) { + + comp_ctx = efa_com_alloc_comp_ctx(aq); + if (!comp_ctx) { + clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + up(&aq->avail_cmds); + return -EINVAL; + } + + err = efa_com_submit_admin_cmd(aq, comp_ctx, cmd, cmd_size, comp, comp_size); + if (err) { ibdev_err_ratelimited( aq->efa_dev, - "Failed to submit command %s (opcode %u) err %pe\n", + "Failed to submit command %s (opcode %u) err %d\n", efa_com_cmd_str(cmd->aq_common_descriptor.opcode), - cmd->aq_common_descriptor.opcode, comp_ctx); + cmd->aq_common_descriptor.opcode, err); + efa_com_dealloc_comp_ctx(aq, comp_ctx); up(&aq->avail_cmds); atomic64_inc(&aq->stats.cmd_err); - return PTR_ERR(comp_ctx); + return err; } err = efa_com_wait_and_process_admin_cq(comp_ctx, aq); if (err) { ibdev_err_ratelimited( aq->efa_dev, - "Failed to process command %s (opcode %u) comp_status %d err %d\n", + "Failed to process command %s (opcode %u) err %d\n", efa_com_cmd_str(cmd->aq_common_descriptor.opcode), - cmd->aq_common_descriptor.opcode, - comp_ctx->user_cqe->acq_common_descriptor.status, err); + cmd->aq_common_descriptor.opcode, err); atomic64_inc(&aq->stats.cmd_err); } + efa_com_dealloc_comp_ctx(aq, comp_ctx); up(&aq->avail_cmds); return err; diff --git a/drivers/infiniband/hw/ionic/ionic_controlpath.c b/drivers/infiniband/hw/ionic/ionic_controlpath.c index 4842931f5316..a5671da3db64 100644 --- a/drivers/infiniband/hw/ionic/ionic_controlpath.c +++ b/drivers/infiniband/hw/ionic/ionic_controlpath.c @@ -508,6 +508,7 @@ static int ionic_build_hdr(struct ionic_ibdev *dev, { const struct ib_global_route *grh; enum rdma_network_type net; + u8 smac[ETH_ALEN]; u16 vlan; int rc; @@ -518,7 +519,7 @@ static int ionic_build_hdr(struct ionic_ibdev *dev, grh = rdma_ah_read_grh(attr); - rc = rdma_read_gid_l2_fields(grh->sgid_attr, &vlan, &hdr->eth.smac_h[0]); + rc = rdma_read_gid_l2_fields(grh->sgid_attr, &vlan, smac); if (rc) return rc; @@ -536,6 +537,7 @@ static int ionic_build_hdr(struct ionic_ibdev *dev, if (rc) return rc; + ether_addr_copy(hdr->eth.smac_h, smac); ether_addr_copy(hdr->eth.dmac_h, attr->roce.dmac); if (net == RDMA_NETWORK_IPV4) { diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 3d084d4ff577..91c0e7298283 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -2241,11 +2241,12 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, int oldarpindex; int arpindex; struct net_device *netdev = iwdev->netdev; + int ret; /* create an hte and cm_node for this instance */ cm_node = kzalloc_obj(*cm_node, GFP_ATOMIC); if (!cm_node) - return NULL; + return ERR_PTR(-ENOMEM); /* set our node specific transport info */ cm_node->ipv4 = cm_info->ipv4; @@ -2348,8 +2349,10 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, arpindex = -EINVAL; } - if (arpindex < 0) + if (arpindex < 0) { + ret = -EINVAL; goto err; + } ether_addr_copy(cm_node->rem_mac, iwdev->rf->arp_table[arpindex].mac_addr); @@ -2360,7 +2363,7 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, err: kfree(cm_node); - return NULL; + return ERR_PTR(ret); } static void irdma_destroy_connection(struct irdma_cm_node *cm_node) @@ -3021,8 +3024,8 @@ static int irdma_create_cm_node(struct irdma_cm_core *cm_core, /* create a CM connection node */ cm_node = irdma_make_cm_node(cm_core, iwdev, cm_info, NULL); - if (!cm_node) - return -ENOMEM; + if (IS_ERR(cm_node)) + return PTR_ERR(cm_node); /* set our node side to client (active) side */ cm_node->tcp_cntxt.client = 1; @@ -3219,9 +3222,9 @@ void irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf) cm_info.cm_id = listener->cm_id; cm_node = irdma_make_cm_node(cm_core, iwdev, &cm_info, listener); - if (!cm_node) { + if (IS_ERR(cm_node)) { ibdev_dbg(&cm_core->iwdev->ibdev, - "CM: allocate node failed\n"); + "CM: allocate node failed ret=%ld\n", PTR_ERR(cm_node)); refcount_dec(&listener->refcnt); return; } @@ -4239,21 +4242,21 @@ static void irdma_cm_event_handler(struct work_struct *work) irdma_cm_event_reset(event); break; case IRDMA_CM_EVENT_CONNECTED: - if (!event->cm_node->cm_id || - event->cm_node->state != IRDMA_CM_STATE_OFFLOADED) + if (!cm_node->cm_id || + cm_node->state != IRDMA_CM_STATE_OFFLOADED) break; irdma_cm_event_connected(event); break; case IRDMA_CM_EVENT_MPA_REJECT: - if (!event->cm_node->cm_id || + if (!cm_node->cm_id || cm_node->state == IRDMA_CM_STATE_OFFLOADED) break; irdma_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED); break; case IRDMA_CM_EVENT_ABORTED: - if (!event->cm_node->cm_id || - event->cm_node->state == IRDMA_CM_STATE_OFFLOADED) + if (!cm_node->cm_id || + cm_node->state == IRDMA_CM_STATE_OFFLOADED) break; irdma_event_connect_error(event); break; @@ -4263,7 +4266,7 @@ static void irdma_cm_event_handler(struct work_struct *work) break; } - irdma_rem_ref_cm_node(event->cm_node); + irdma_rem_ref_cm_node(cm_node); kfree(event); } diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index ac3721a5747a..4718acf6c6fd 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -1438,7 +1438,7 @@ exit: * irdma_round_up_wq - return round up qp wq depth * @wqdepth: wq depth in quanta to round up */ -static int irdma_round_up_wq(u32 wqdepth) +static u64 irdma_round_up_wq(u64 wqdepth) { int scount = 1; @@ -1491,15 +1491,16 @@ void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge, int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, u32 *sqdepth) { - u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift; + u32 min_hw_quanta = (u32)uk_attrs->min_hw_wq_size << shift; + u64 hw_quanta = + irdma_round_up_wq(((u64)sq_size << shift) + IRDMA_SQ_RSVD); - *sqdepth = irdma_round_up_wq((sq_size << shift) + IRDMA_SQ_RSVD); - - if (*sqdepth < min_size) - *sqdepth = min_size; - else if (*sqdepth > uk_attrs->max_hw_wq_quanta) + if (hw_quanta < min_hw_quanta) + hw_quanta = min_hw_quanta; + else if (hw_quanta > uk_attrs->max_hw_wq_quanta) return -EINVAL; + *sqdepth = hw_quanta; return 0; } @@ -1513,15 +1514,16 @@ int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, u32 *rqdepth) { - u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift; - - *rqdepth = irdma_round_up_wq((rq_size << shift) + IRDMA_RQ_RSVD); + u32 min_hw_quanta = (u32)uk_attrs->min_hw_wq_size << shift; + u64 hw_quanta = + irdma_round_up_wq(((u64)rq_size << shift) + IRDMA_RQ_RSVD); - if (*rqdepth < min_size) - *rqdepth = min_size; - else if (*rqdepth > uk_attrs->max_hw_rq_quanta) + if (hw_quanta < min_hw_quanta) + hw_quanta = min_hw_quanta; + else if (hw_quanta > uk_attrs->max_hw_rq_quanta) return -EINVAL; + *rqdepth = hw_quanta; return 0; } @@ -1535,13 +1537,16 @@ int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, int irdma_get_srqdepth(struct irdma_uk_attrs *uk_attrs, u32 srq_size, u8 shift, u32 *srqdepth) { - *srqdepth = irdma_round_up_wq((srq_size << shift) + IRDMA_RQ_RSVD); + u32 min_hw_quanta = (u32)uk_attrs->min_hw_wq_size << shift; + u64 hw_quanta = + irdma_round_up_wq(((u64)srq_size << shift) + IRDMA_RQ_RSVD); - if (*srqdepth < ((u32)uk_attrs->min_hw_wq_size << shift)) - *srqdepth = uk_attrs->min_hw_wq_size << shift; - else if (*srqdepth > uk_attrs->max_hw_srq_quanta) + if (hw_quanta < min_hw_quanta) + hw_quanta = min_hw_quanta; + else if (hw_quanta > uk_attrs->max_hw_srq_quanta) return -EINVAL; + *srqdepth = hw_quanta; return 0; } diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index ab8c5284d4be..495e5daff4b4 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -2322,8 +2322,6 @@ void irdma_modify_qp_to_err(struct irdma_sc_qp *sc_qp) struct irdma_qp *qp = sc_qp->qp_uk.back_qp; struct ib_qp_attr attr; - if (qp->iwdev->rf->reset) - return; attr.qp_state = IB_QPS_ERR; if (rdma_protocol_roce(qp->ibqp.device, 1)) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 7251cd7a2147..95f590c10c05 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -558,7 +558,8 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) } irdma_qp_rem_ref(&iwqp->ibqp); - wait_for_completion(&iwqp->free_qp); + if (!iwdev->rf->reset) + wait_for_completion(&iwqp->free_qp); irdma_free_lsmm_rsrc(iwqp); irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp); @@ -1105,6 +1106,7 @@ static int irdma_create_qp(struct ib_qp *ibqp, spin_lock_init(&iwqp->sc_qp.pfpdu.lock); iwqp->sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR; rf->qp_table[qp_num] = iwqp; + init_completion(&iwqp->free_qp); if (udata) { /* GEN_1 legacy support with libi40iw does not have expanded uresp struct */ @@ -1129,7 +1131,6 @@ static int irdma_create_qp(struct ib_qp *ibqp, } } - init_completion(&iwqp->free_qp); return 0; error: @@ -1462,8 +1463,6 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, ctx_info->remote_atomics_en = true; } - wait_event(iwqp->mod_qp_waitq, !atomic_read(&iwqp->hw_mod_qp_pend)); - ibdev_dbg(&iwdev->ibdev, "VERBS: caller: %pS qp_id=%d to_ibqpstate=%d ibqpstate=%d irdma_qpstate=%d attr_mask=0x%x\n", __builtin_return_address(0), ibqp->qp_num, attr->qp_state, @@ -1540,6 +1539,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, case IB_QPS_ERR: case IB_QPS_RESET: if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) { + iwqp->ibqp_state = attr->qp_state; spin_unlock_irqrestore(&iwqp->lock, flags); if (udata && udata->inlen) { if (ib_copy_from_udata(&ureq, udata, @@ -1745,6 +1745,7 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, case IB_QPS_ERR: case IB_QPS_RESET: if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) { + iwqp->ibqp_state = attr->qp_state; spin_unlock_irqrestore(&iwqp->lock, flags); if (udata && udata->inlen) { if (ib_copy_from_udata(&ureq, udata, @@ -3723,6 +3724,7 @@ static int irdma_rereg_mr_trans(struct irdma_mr *iwmr, u64 start, u64 len, err: ib_umem_release(region); + iwmr->region = NULL; return err; } diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 5dac64be61bb..94d514169642 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1211,7 +1211,7 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, */ if (dev_use_swiotlb(dev, size, dir) && iova_unaligned(iovad, phys, size)) { - if (attrs & DMA_ATTR_MMIO) + if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) return DMA_MAPPING_ERROR; phys = iommu_dma_map_swiotlb(dev, phys, size, dir, attrs); @@ -1223,7 +1223,8 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, arch_sync_dma_for_device(phys, size, dir); iova = __iommu_dma_map(dev, phys, size, prot, dma_mask); - if (iova == DMA_MAPPING_ERROR && !(attrs & DMA_ATTR_MMIO)) + if (iova == DMA_MAPPING_ERROR && + !(attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))) swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); return iova; } @@ -1233,7 +1234,7 @@ void iommu_dma_unmap_phys(struct device *dev, dma_addr_t dma_handle, { phys_addr_t phys; - if (attrs & DMA_ATTR_MMIO) { + if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) { __iommu_dma_unmap(dev, dma_handle, size); return; } @@ -1945,9 +1946,21 @@ int dma_iova_link(struct device *dev, struct dma_iova_state *state, if (WARN_ON_ONCE(iova_start_pad && offset > 0)) return -EIO; + /* + * DMA_IOVA_USE_SWIOTLB is set on state after some entry + * took SWIOTLB path, which we were supposed to prevent + * for DMA_ATTR_REQUIRE_COHERENT attribute. + */ + if (WARN_ON_ONCE((state->__size & DMA_IOVA_USE_SWIOTLB) && + (attrs & DMA_ATTR_REQUIRE_COHERENT))) + return -EOPNOTSUPP; + + if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT)) + return -EOPNOTSUPP; + if (dev_use_swiotlb(dev, size, dir) && iova_unaligned(iovad, phys, size)) { - if (attrs & DMA_ATTR_MMIO) + if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) return -EPERM; return iommu_dma_iova_link_swiotlb(dev, state, phys, offset, diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c index 83f31ea657b7..181320528a47 100644 --- a/drivers/irqchip/irq-qcom-mpm.c +++ b/drivers/irqchip/irq-qcom-mpm.c @@ -306,6 +306,8 @@ static int mpm_pd_power_off(struct generic_pm_domain *genpd) if (ret < 0) return ret; + mbox_client_txdone(priv->mbox_chan, 0); + return 0; } @@ -434,6 +436,7 @@ static int qcom_mpm_probe(struct platform_device *pdev, struct device_node *pare } priv->mbox_client.dev = dev; + priv->mbox_client.knows_txdone = true; priv->mbox_chan = mbox_request_channel(&priv->mbox_client, 0); if (IS_ERR(priv->mbox_chan)) { ret = PTR_ERR(priv->mbox_chan); diff --git a/drivers/irqchip/irq-renesas-rzv2h.c b/drivers/irqchip/irq-renesas-rzv2h.c index da2bc43a0e12..03e93b061edd 100644 --- a/drivers/irqchip/irq-renesas-rzv2h.c +++ b/drivers/irqchip/irq-renesas-rzv2h.c @@ -621,7 +621,7 @@ static int rzv2h_icu_probe_common(struct platform_device *pdev, struct device_no return 0; pm_put: - pm_runtime_put(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); return ret; } diff --git a/drivers/media/i2c/ccs/ccs-core.c b/drivers/media/i2c/ccs/ccs-core.c index aa4dd7e7cf5a..8e25f970fd12 100644 --- a/drivers/media/i2c/ccs/ccs-core.c +++ b/drivers/media/i2c/ccs/ccs-core.c @@ -3080,8 +3080,6 @@ static int ccs_init_state(struct v4l2_subdev *sd, struct v4l2_rect *crop = v4l2_subdev_state_get_crop(sd_state, pad); - guard(mutex)(&sensor->mutex); - ccs_get_native_size(ssd, crop); fmt->width = crop->width; diff --git a/drivers/media/mc/mc-request.c b/drivers/media/mc/mc-request.c index c7c7d4a86c6b..13e77648807c 100644 --- a/drivers/media/mc/mc-request.c +++ b/drivers/media/mc/mc-request.c @@ -192,6 +192,8 @@ static long media_request_ioctl_reinit(struct media_request *req) struct media_device *mdev = req->mdev; unsigned long flags; + mutex_lock(&mdev->req_queue_mutex); + spin_lock_irqsave(&req->lock, flags); if (req->state != MEDIA_REQUEST_STATE_IDLE && req->state != MEDIA_REQUEST_STATE_COMPLETE) { @@ -199,6 +201,7 @@ static long media_request_ioctl_reinit(struct media_request *req) "request: %s not in idle or complete state, cannot reinit\n", req->debug_str); spin_unlock_irqrestore(&req->lock, flags); + mutex_unlock(&mdev->req_queue_mutex); return -EBUSY; } if (req->access_count) { @@ -206,6 +209,7 @@ static long media_request_ioctl_reinit(struct media_request *req) "request: %s is being accessed, cannot reinit\n", req->debug_str); spin_unlock_irqrestore(&req->lock, flags); + mutex_unlock(&mdev->req_queue_mutex); return -EBUSY; } req->state = MEDIA_REQUEST_STATE_CLEANING; @@ -216,6 +220,7 @@ static long media_request_ioctl_reinit(struct media_request *req) spin_lock_irqsave(&req->lock, flags); req->state = MEDIA_REQUEST_STATE_IDLE; spin_unlock_irqrestore(&req->lock, flags); + mutex_unlock(&mdev->req_queue_mutex); return 0; } diff --git a/drivers/media/platform/rockchip/rkvdec/rkvdec-hevc-common.c b/drivers/media/platform/rockchip/rkvdec/rkvdec-hevc-common.c index 28267ee30190..3119f3bc9f98 100644 --- a/drivers/media/platform/rockchip/rkvdec/rkvdec-hevc-common.c +++ b/drivers/media/platform/rockchip/rkvdec/rkvdec-hevc-common.c @@ -500,11 +500,15 @@ void rkvdec_hevc_run_preamble(struct rkvdec_ctx *ctx, ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS); run->ext_sps_st_rps = ctrl ? ctrl->p_cur.p : NULL; + } else { + run->ext_sps_st_rps = NULL; } if (ctx->has_sps_lt_rps) { ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, V4L2_CID_STATELESS_HEVC_EXT_SPS_LT_RPS); run->ext_sps_lt_rps = ctrl ? ctrl->p_cur.p : NULL; + } else { + run->ext_sps_lt_rps = NULL; } rkvdec_run_preamble(ctx, &run->base); diff --git a/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c b/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c index 97f1efde2e47..fb4f849d7366 100644 --- a/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c +++ b/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c @@ -130,7 +130,7 @@ struct rkvdec_h264_ctx { struct vdpu383_regs_h26x regs; }; -static void set_field_order_cnt(struct rkvdec_pps *pps, const struct v4l2_h264_dpb_entry *dpb) +static noinline_for_stack void set_field_order_cnt(struct rkvdec_pps *pps, const struct v4l2_h264_dpb_entry *dpb) { pps->top_field_order_cnt0 = dpb[0].top_field_order_cnt; pps->bot_field_order_cnt0 = dpb[0].bottom_field_order_cnt; @@ -166,6 +166,31 @@ static void set_field_order_cnt(struct rkvdec_pps *pps, const struct v4l2_h264_d pps->bot_field_order_cnt15 = dpb[15].bottom_field_order_cnt; } +static noinline_for_stack void set_dec_params(struct rkvdec_pps *pps, const struct v4l2_ctrl_h264_decode_params *dec_params) +{ + const struct v4l2_h264_dpb_entry *dpb = dec_params->dpb; + + for (int i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) { + if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) + pps->is_longterm |= (1 << i); + pps->ref_field_flags |= + (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD)) << i; + pps->ref_colmv_use_flag |= + (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) << i; + pps->ref_topfield_used |= + (!!(dpb[i].fields & V4L2_H264_TOP_FIELD_REF)) << i; + pps->ref_botfield_used |= + (!!(dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF)) << i; + } + pps->pic_field_flag = + !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC); + pps->pic_associated_flag = + !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD); + + pps->cur_top_field = dec_params->top_field_order_cnt; + pps->cur_bot_field = dec_params->bottom_field_order_cnt; +} + static void assemble_hw_pps(struct rkvdec_ctx *ctx, struct rkvdec_h264_run *run) { @@ -177,7 +202,6 @@ static void assemble_hw_pps(struct rkvdec_ctx *ctx, struct rkvdec_h264_priv_tbl *priv_tbl = h264_ctx->priv_tbl.cpu; struct rkvdec_sps_pps *hw_ps; u32 pic_width, pic_height; - u32 i; /* * HW read the SPS/PPS information from PPS packet index by PPS id. @@ -261,28 +285,8 @@ static void assemble_hw_pps(struct rkvdec_ctx *ctx, !!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT); set_field_order_cnt(&hw_ps->pps, dpb); + set_dec_params(&hw_ps->pps, dec_params); - for (i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) { - if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) - hw_ps->pps.is_longterm |= (1 << i); - - hw_ps->pps.ref_field_flags |= - (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD)) << i; - hw_ps->pps.ref_colmv_use_flag |= - (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) << i; - hw_ps->pps.ref_topfield_used |= - (!!(dpb[i].fields & V4L2_H264_TOP_FIELD_REF)) << i; - hw_ps->pps.ref_botfield_used |= - (!!(dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF)) << i; - } - - hw_ps->pps.pic_field_flag = - !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC); - hw_ps->pps.pic_associated_flag = - !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD); - - hw_ps->pps.cur_top_field = dec_params->top_field_order_cnt; - hw_ps->pps.cur_bot_field = dec_params->bottom_field_order_cnt; } static void rkvdec_write_regs(struct rkvdec_ctx *ctx) diff --git a/drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c b/drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c index e4cdd2122873..2751f5396ee8 100644 --- a/drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c +++ b/drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c @@ -893,7 +893,8 @@ out_update_last: update_ctx_last_info(vp9_ctx); } -static void rkvdec_init_v4l2_vp9_count_tbl(struct rkvdec_ctx *ctx) +static noinline_for_stack void +rkvdec_init_v4l2_vp9_count_tbl(struct rkvdec_ctx *ctx) { struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv; struct rkvdec_vp9_intra_frame_symbol_counts *intra_cnts = vp9_ctx->count_tbl.cpu; diff --git a/drivers/media/platform/synopsys/Kconfig b/drivers/media/platform/synopsys/Kconfig index e798ec00b189..bf2ac092fbb3 100644 --- a/drivers/media/platform/synopsys/Kconfig +++ b/drivers/media/platform/synopsys/Kconfig @@ -7,6 +7,7 @@ config VIDEO_DW_MIPI_CSI2RX depends on VIDEO_DEV depends on V4L_PLATFORM_DRIVERS depends on PM && COMMON_CLK + select GENERIC_PHY_MIPI_DPHY select MEDIA_CONTROLLER select V4L2_FWNODE select VIDEO_V4L2_SUBDEV_API diff --git a/drivers/media/platform/synopsys/dw-mipi-csi2rx.c b/drivers/media/platform/synopsys/dw-mipi-csi2rx.c index 170346ae1a59..4d96171a650b 100644 --- a/drivers/media/platform/synopsys/dw-mipi-csi2rx.c +++ b/drivers/media/platform/synopsys/dw-mipi-csi2rx.c @@ -301,7 +301,7 @@ dw_mipi_csi2rx_enum_mbus_code(struct v4l2_subdev *sd, return 0; case DW_MIPI_CSI2RX_PAD_SINK: - if (code->index > csi2->formats_num) + if (code->index >= csi2->formats_num) return -EINVAL; code->code = csi2->formats[code->index].code; diff --git a/drivers/media/platform/verisilicon/imx8m_vpu_hw.c b/drivers/media/platform/verisilicon/imx8m_vpu_hw.c index 6f8e43b7f157..fa4224de4b99 100644 --- a/drivers/media/platform/verisilicon/imx8m_vpu_hw.c +++ b/drivers/media/platform/verisilicon/imx8m_vpu_hw.c @@ -343,7 +343,7 @@ const struct hantro_variant imx8mq_vpu_variant = { .num_regs = ARRAY_SIZE(imx8mq_reg_names) }; -static const struct of_device_id imx8mq_vpu_shared_resources[] __initconst = { +static const struct of_device_id imx8mq_vpu_shared_resources[] = { { .compatible = "nxp,imx8mq-vpu-g1", }, { .compatible = "nxp,imx8mq-vpu-g2", }, { /* sentinel */ } diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c index 40c76c051da2..f6c8e3223796 100644 --- a/drivers/media/usb/uvc/uvc_video.c +++ b/drivers/media/usb/uvc/uvc_video.c @@ -1751,7 +1751,8 @@ static void uvc_video_complete(struct urb *urb) /* * Free transfer buffers. */ -static void uvc_free_urb_buffers(struct uvc_streaming *stream) +static void uvc_free_urb_buffers(struct uvc_streaming *stream, + unsigned int size) { struct usb_device *udev = stream->dev->udev; struct uvc_urb *uvc_urb; @@ -1760,7 +1761,7 @@ static void uvc_free_urb_buffers(struct uvc_streaming *stream) if (!uvc_urb->buffer) continue; - usb_free_noncoherent(udev, stream->urb_size, uvc_urb->buffer, + usb_free_noncoherent(udev, size, uvc_urb->buffer, uvc_stream_dir(stream), uvc_urb->sgt); uvc_urb->buffer = NULL; uvc_urb->sgt = NULL; @@ -1820,7 +1821,7 @@ static int uvc_alloc_urb_buffers(struct uvc_streaming *stream, if (!uvc_alloc_urb_buffer(stream, uvc_urb, urb_size, gfp_flags)) { - uvc_free_urb_buffers(stream); + uvc_free_urb_buffers(stream, urb_size); break; } @@ -1868,7 +1869,7 @@ static void uvc_video_stop_transfer(struct uvc_streaming *stream, } if (free_buffers) - uvc_free_urb_buffers(stream); + uvc_free_urb_buffers(stream, stream->urb_size); } /* diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 37d33d4a363d..a2b650f4ec3c 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -3082,13 +3082,14 @@ static long __video_do_ioctl(struct file *file, } /* - * We need to serialize streamon/off with queueing new requests. + * We need to serialize streamon/off/reqbufs with queueing new requests. * These ioctls may trigger the cancellation of a streaming * operation, and that should not be mixed with queueing a new * request at the same time. */ if (v4l2_device_supports_requests(vfd->v4l2_dev) && - (cmd == VIDIOC_STREAMON || cmd == VIDIOC_STREAMOFF)) { + (cmd == VIDIOC_STREAMON || cmd == VIDIOC_STREAMOFF || + cmd == VIDIOC_REQBUFS)) { req_queue_lock = &vfd->v4l2_dev->mdev->req_queue_mutex; if (mutex_lock_interruptible(req_queue_lock)) diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c index 0498198a4696..766d455950f5 100644 --- a/drivers/net/can/dev/netlink.c +++ b/drivers/net/can/dev/netlink.c @@ -601,7 +601,9 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], /* We need synchronization with dev->stop() */ ASSERT_RTNL(); - can_ctrlmode_changelink(dev, data, extack); + err = can_ctrlmode_changelink(dev, data, extack); + if (err) + return err; if (data[IFLA_CAN_BITTIMING]) { struct can_bittiming bt; diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index bb7782582f40..0d0190ae094a 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -1225,7 +1225,11 @@ static int mcp251x_open(struct net_device *net) } mutex_lock(&priv->mcp_lock); - mcp251x_power_enable(priv->transceiver, 1); + ret = mcp251x_power_enable(priv->transceiver, 1); + if (ret) { + dev_err(&spi->dev, "failed to enable transceiver power: %pe\n", ERR_PTR(ret)); + goto out_close_candev; + } priv->force_quit = 0; priv->tx_skb = NULL; @@ -1272,6 +1276,7 @@ out_free_irq: mcp251x_hw_sleep(spi); out_close: mcp251x_power_enable(priv->transceiver, 0); +out_close_candev: close_candev(net); mutex_unlock(&priv->mcp_lock); if (release_irq) @@ -1516,11 +1521,25 @@ static int __maybe_unused mcp251x_can_resume(struct device *dev) { struct spi_device *spi = to_spi_device(dev); struct mcp251x_priv *priv = spi_get_drvdata(spi); + int ret = 0; - if (priv->after_suspend & AFTER_SUSPEND_POWER) - mcp251x_power_enable(priv->power, 1); - if (priv->after_suspend & AFTER_SUSPEND_UP) - mcp251x_power_enable(priv->transceiver, 1); + if (priv->after_suspend & AFTER_SUSPEND_POWER) { + ret = mcp251x_power_enable(priv->power, 1); + if (ret) { + dev_err(dev, "failed to restore power: %pe\n", ERR_PTR(ret)); + return ret; + } + } + + if (priv->after_suspend & AFTER_SUSPEND_UP) { + ret = mcp251x_power_enable(priv->transceiver, 1); + if (ret) { + dev_err(dev, "failed to restore transceiver power: %pe\n", ERR_PTR(ret)); + if (priv->after_suspend & AFTER_SUSPEND_POWER) + mcp251x_power_enable(priv->power, 0); + return ret; + } + } if (priv->after_suspend & (AFTER_SUSPEND_POWER | AFTER_SUSPEND_UP)) queue_work(priv->wq, &priv->restart_work); diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 42dbe8f93231..5724f8f2defd 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -227,7 +227,9 @@ static int airoha_ppe_get_wdma_info(struct net_device *dev, const u8 *addr, if (!dev) return -ENODEV; + rcu_read_lock(); err = dev_fill_forward_path(dev, addr, &stack); + rcu_read_unlock(); if (err) return err; diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index cd7dddeb91dd..9787c1857e13 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -25,7 +25,7 @@ config B44 select SSB select MII select PHYLIB - select FIXED_PHY if BCM47XX + select FIXED_PHY help If you have a network (Ethernet) controller of this type, say Y or M here. diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c index aa6d8606849f..972474893a6b 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c @@ -1152,12 +1152,6 @@ void bcmasp_enable_wol(struct bcmasp_intf *intf, bool en) } } -static void bcmasp_wol_irq_destroy(struct bcmasp_priv *priv) -{ - if (priv->wol_irq > 0) - free_irq(priv->wol_irq, priv); -} - static void bcmasp_eee_fixup(struct bcmasp_intf *intf, bool en) { u32 reg, phy_lpi_overwrite; @@ -1255,7 +1249,7 @@ static int bcmasp_probe(struct platform_device *pdev) if (priv->irq <= 0) return -EINVAL; - priv->clk = devm_clk_get_optional_enabled(dev, "sw_asp"); + priv->clk = devm_clk_get_optional(dev, "sw_asp"); if (IS_ERR(priv->clk)) return dev_err_probe(dev, PTR_ERR(priv->clk), "failed to request clock\n"); @@ -1283,6 +1277,10 @@ static int bcmasp_probe(struct platform_device *pdev) bcmasp_set_pdata(priv, pdata); + ret = clk_prepare_enable(priv->clk); + if (ret) + return dev_err_probe(dev, ret, "failed to start clock\n"); + /* Enable all clocks to ensure successful probing */ bcmasp_core_clock_set(priv, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE, 0); @@ -1294,8 +1292,10 @@ static int bcmasp_probe(struct platform_device *pdev) ret = devm_request_irq(&pdev->dev, priv->irq, bcmasp_isr, 0, pdev->name, priv); - if (ret) - return dev_err_probe(dev, ret, "failed to request ASP interrupt: %d", ret); + if (ret) { + dev_err(dev, "Failed to request ASP interrupt: %d", ret); + goto err_clock_disable; + } /* Register mdio child nodes */ of_platform_populate(dev->of_node, bcmasp_mdio_of_match, NULL, dev); @@ -1307,13 +1307,17 @@ static int bcmasp_probe(struct platform_device *pdev) priv->mda_filters = devm_kcalloc(dev, priv->num_mda_filters, sizeof(*priv->mda_filters), GFP_KERNEL); - if (!priv->mda_filters) - return -ENOMEM; + if (!priv->mda_filters) { + ret = -ENOMEM; + goto err_clock_disable; + } priv->net_filters = devm_kcalloc(dev, priv->num_net_filters, sizeof(*priv->net_filters), GFP_KERNEL); - if (!priv->net_filters) - return -ENOMEM; + if (!priv->net_filters) { + ret = -ENOMEM; + goto err_clock_disable; + } bcmasp_core_init_filters(priv); @@ -1322,7 +1326,8 @@ static int bcmasp_probe(struct platform_device *pdev) ports_node = of_find_node_by_name(dev->of_node, "ethernet-ports"); if (!ports_node) { dev_warn(dev, "No ports found\n"); - return -EINVAL; + ret = -EINVAL; + goto err_clock_disable; } i = 0; @@ -1344,8 +1349,6 @@ static int bcmasp_probe(struct platform_device *pdev) */ bcmasp_core_clock_set(priv, 0, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE); - clk_disable_unprepare(priv->clk); - /* Now do the registration of the network ports which will take care * of managing the clock properly. */ @@ -1358,13 +1361,16 @@ static int bcmasp_probe(struct platform_device *pdev) count++; } + clk_disable_unprepare(priv->clk); + dev_info(dev, "Initialized %d port(s)\n", count); return ret; err_cleanup: - bcmasp_wol_irq_destroy(priv); bcmasp_remove_intfs(priv); +err_clock_disable: + clk_disable_unprepare(priv->clk); return ret; } @@ -1376,7 +1382,6 @@ static void bcmasp_remove(struct platform_device *pdev) if (!priv) return; - bcmasp_wol_irq_destroy(priv); bcmasp_remove_intfs(priv); } diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index c16ac9c76aa3..99e7d5cf3786 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1071,7 +1071,7 @@ static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb, int budge } if (tx_skb->skb) { - napi_consume_skb(tx_skb->skb, budget); + dev_consume_skb_any(tx_skb->skb); tx_skb->skb = NULL; } } @@ -3224,7 +3224,7 @@ static void gem_get_ethtool_stats(struct net_device *dev, spin_lock_irq(&bp->stats_lock); gem_update_stats(bp); memcpy(data, &bp->ethtool_stats, sizeof(u64) - * (GEM_STATS_LEN + QUEUE_STATS_LEN * MACB_MAX_QUEUES)); + * (GEM_STATS_LEN + QUEUE_STATS_LEN * bp->num_queues)); spin_unlock_irq(&bp->stats_lock); } @@ -5776,9 +5776,9 @@ static int __maybe_unused macb_suspend(struct device *dev) struct macb_queue *queue; struct in_device *idev; unsigned long flags; + u32 tmp, ifa_local; unsigned int q; int err; - u32 tmp; if (!device_may_wakeup(&bp->dev->dev)) phy_exit(bp->phy); @@ -5787,14 +5787,21 @@ static int __maybe_unused macb_suspend(struct device *dev) return 0; if (bp->wol & MACB_WOL_ENABLED) { - /* Check for IP address in WOL ARP mode */ - idev = __in_dev_get_rcu(bp->dev); - if (idev) - ifa = rcu_dereference(idev->ifa_list); - if ((bp->wolopts & WAKE_ARP) && !ifa) { - netdev_err(netdev, "IP address not assigned as required by WoL walk ARP\n"); - return -EOPNOTSUPP; + if (bp->wolopts & WAKE_ARP) { + /* Check for IP address in WOL ARP mode */ + rcu_read_lock(); + idev = __in_dev_get_rcu(bp->dev); + if (idev) + ifa = rcu_dereference(idev->ifa_list); + if (!ifa) { + rcu_read_unlock(); + netdev_err(netdev, "IP address not assigned as required by WoL walk ARP\n"); + return -EOPNOTSUPP; + } + ifa_local = be32_to_cpu(ifa->ifa_local); + rcu_read_unlock(); } + spin_lock_irqsave(&bp->lock, flags); /* Disable Tx and Rx engines before disabling the queues, @@ -5833,8 +5840,9 @@ static int __maybe_unused macb_suspend(struct device *dev) if (bp->wolopts & WAKE_ARP) { tmp |= MACB_BIT(ARP); /* write IP address into register */ - tmp |= MACB_BFEXT(IP, be32_to_cpu(ifa->ifa_local)); + tmp |= MACB_BFEXT(IP, ifa_local); } + spin_unlock_irqrestore(&bp->lock, flags); /* Change interrupt handler and * Enable WoL IRQ on queue 0 @@ -5847,11 +5855,12 @@ static int __maybe_unused macb_suspend(struct device *dev) dev_err(dev, "Unable to request IRQ %d (error %d)\n", bp->queues[0].irq, err); - spin_unlock_irqrestore(&bp->lock, flags); return err; } + spin_lock_irqsave(&bp->lock, flags); queue_writel(bp->queues, IER, GEM_BIT(WOL)); gem_writel(bp, WOL, tmp); + spin_unlock_irqrestore(&bp->lock, flags); } else { err = devm_request_irq(dev, bp->queues[0].irq, macb_wol_interrupt, IRQF_SHARED, netdev->name, bp->queues); @@ -5859,13 +5868,13 @@ static int __maybe_unused macb_suspend(struct device *dev) dev_err(dev, "Unable to request IRQ %d (error %d)\n", bp->queues[0].irq, err); - spin_unlock_irqrestore(&bp->lock, flags); return err; } + spin_lock_irqsave(&bp->lock, flags); queue_writel(bp->queues, IER, MACB_BIT(WOL)); macb_writel(bp, WOL, tmp); + spin_unlock_irqrestore(&bp->lock, flags); } - spin_unlock_irqrestore(&bp->lock, flags); enable_irq_wake(bp->queues[0].irq); } @@ -5932,6 +5941,8 @@ static int __maybe_unused macb_resume(struct device *dev) queue_readl(bp->queues, ISR); if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) queue_writel(bp->queues, ISR, -1); + spin_unlock_irqrestore(&bp->lock, flags); + /* Replace interrupt handler on queue 0 */ devm_free_irq(dev, bp->queues[0].irq, bp->queues); err = devm_request_irq(dev, bp->queues[0].irq, macb_interrupt, @@ -5940,10 +5951,8 @@ static int __maybe_unused macb_resume(struct device *dev) dev_err(dev, "Unable to request IRQ %d (error %d)\n", bp->queues[0].irq, err); - spin_unlock_irqrestore(&bp->lock, flags); return err; } - spin_unlock_irqrestore(&bp->lock, flags); disable_irq_wake(bp->queues[0].irq); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index fed89d4f1e1d..2fe140ddebb2 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -813,6 +813,8 @@ static void enetc_get_ringparam(struct net_device *ndev, { struct enetc_ndev_priv *priv = netdev_priv(ndev); + ring->rx_max_pending = priv->rx_bd_count; + ring->tx_max_pending = priv->tx_bd_count; ring->rx_pending = priv->rx_bd_count; ring->tx_pending = priv->tx_bd_count; diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index ab67c709d5a0..1cd1f3f2930a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -313,14 +313,13 @@ static int iavf_get_sset_count(struct net_device *netdev, int sset) { /* Report the maximum number queues, even if not every queue is * currently configured. Since allocation of queues is in pairs, - * use netdev->real_num_tx_queues * 2. The real_num_tx_queues is set - * at device creation and never changes. + * use netdev->num_tx_queues * 2. The num_tx_queues is set at + * device creation and never changes. */ if (sset == ETH_SS_STATS) return IAVF_STATS_LEN + - (IAVF_QUEUE_STATS_LEN * 2 * - netdev->real_num_tx_queues); + (IAVF_QUEUE_STATS_LEN * 2 * netdev->num_tx_queues); else return -EINVAL; } @@ -345,19 +344,19 @@ static void iavf_get_ethtool_stats(struct net_device *netdev, iavf_add_ethtool_stats(&data, adapter, iavf_gstrings_stats); rcu_read_lock(); - /* As num_active_queues describe both tx and rx queues, we can use - * it to iterate over rings' stats. + /* Use num_tx_queues to report stats for the maximum number of queues. + * Queues beyond num_active_queues will report zero. */ - for (i = 0; i < adapter->num_active_queues; i++) { - struct iavf_ring *ring; + for (i = 0; i < netdev->num_tx_queues; i++) { + struct iavf_ring *tx_ring = NULL, *rx_ring = NULL; - /* Tx rings stats */ - ring = &adapter->tx_rings[i]; - iavf_add_queue_stats(&data, ring); + if (i < adapter->num_active_queues) { + tx_ring = &adapter->tx_rings[i]; + rx_ring = &adapter->rx_rings[i]; + } - /* Rx rings stats */ - ring = &adapter->rx_rings[i]; - iavf_add_queue_stats(&data, ring); + iavf_add_queue_stats(&data, tx_ring); + iavf_add_queue_stats(&data, rx_ring); } rcu_read_unlock(); } @@ -376,9 +375,9 @@ static void iavf_get_stat_strings(struct net_device *netdev, u8 *data) iavf_add_stat_strings(&data, iavf_gstrings_stats); /* Queues are always allocated in pairs, so we just use - * real_num_tx_queues for both Tx and Rx queues. + * num_tx_queues for both Tx and Rx queues. */ - for (i = 0; i < netdev->real_num_tx_queues; i++) { + for (i = 0; i < netdev->num_tx_queues; i++) { iavf_add_stat_strings(&data, iavf_gstrings_queue_stats, "tx", i); iavf_add_stat_strings(&data, iavf_gstrings_queue_stats, diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 2b2b22af42be..eb3a48330cc1 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -840,6 +840,28 @@ static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid) } /** + * ice_get_max_txq - return the maximum number of Tx queues for in a PF + * @pf: PF structure + * + * Return: maximum number of Tx queues + */ +static inline int ice_get_max_txq(struct ice_pf *pf) +{ + return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_txq); +} + +/** + * ice_get_max_rxq - return the maximum number of Rx queues for in a PF + * @pf: PF structure + * + * Return: maximum number of Rx queues + */ +static inline int ice_get_max_rxq(struct ice_pf *pf) +{ + return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_rxq); +} + +/** * ice_get_main_vsi - Get the PF VSI * @pf: PF instance * diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 301947d53ede..e6a20af6f63d 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -1930,6 +1930,17 @@ __ice_get_ethtool_stats(struct net_device *netdev, int i = 0; char *p; + if (ice_is_port_repr_netdev(netdev)) { + ice_update_eth_stats(vsi); + + for (j = 0; j < ICE_VSI_STATS_LEN; j++) { + p = (char *)vsi + ice_gstrings_vsi_stats[j].stat_offset; + data[i++] = (ice_gstrings_vsi_stats[j].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + return; + } + ice_update_pf_stats(pf); ice_update_vsi_stats(vsi); @@ -1939,9 +1950,6 @@ __ice_get_ethtool_stats(struct net_device *netdev, sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } - if (ice_is_port_repr_netdev(netdev)) - return; - /* populate per queue stats */ rcu_read_lock(); @@ -3774,24 +3782,6 @@ ice_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) } /** - * ice_get_max_txq - return the maximum number of Tx queues for in a PF - * @pf: PF structure - */ -static int ice_get_max_txq(struct ice_pf *pf) -{ - return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_txq); -} - -/** - * ice_get_max_rxq - return the maximum number of Rx queues for in a PF - * @pf: PF structure - */ -static int ice_get_max_rxq(struct ice_pf *pf) -{ - return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_rxq); -} - -/** * ice_get_combined_cnt - return the current number of combined channels * @vsi: PF VSI pointer * diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index e7308e381e2f..3c36e3641b9e 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4699,8 +4699,8 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) struct net_device *netdev; u8 mac_addr[ETH_ALEN]; - netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq, - vsi->alloc_rxq); + netdev = alloc_etherdev_mqs(sizeof(*np), ice_get_max_txq(vsi->back), + ice_get_max_rxq(vsi->back)); if (!netdev) return -ENOMEM; diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index 90f99443a922..096566c697f4 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -2,6 +2,7 @@ /* Copyright (C) 2019-2021, Intel Corporation. */ #include "ice.h" +#include "ice_lib.h" #include "ice_eswitch.h" #include "devlink/devlink.h" #include "devlink/port.h" @@ -67,7 +68,7 @@ ice_repr_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) return; vsi = repr->src_vsi; - ice_update_vsi_stats(vsi); + ice_update_eth_stats(vsi); eth_stats = &vsi->eth_stats; stats->tx_packets = eth_stats->tx_unicast + eth_stats->tx_broadcast + @@ -315,7 +316,7 @@ ice_repr_reg_netdev(struct net_device *netdev, const struct net_device_ops *ops) static int ice_repr_ready_vf(struct ice_repr *repr) { - return !ice_check_vf_ready_for_cfg(repr->vf); + return ice_check_vf_ready_for_cfg(repr->vf); } static int ice_repr_ready_sf(struct ice_repr *repr) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index b206fba092c8..ec1b75f039bb 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -1066,7 +1066,7 @@ bool idpf_vport_set_hsplit(const struct idpf_vport *vport, u8 val); int idpf_idc_init(struct idpf_adapter *adapter); int idpf_idc_init_aux_core_dev(struct idpf_adapter *adapter, enum iidc_function_type ftype); -void idpf_idc_deinit_core_aux_device(struct iidc_rdma_core_dev_info *cdev_info); +void idpf_idc_deinit_core_aux_device(struct idpf_adapter *adapter); void idpf_idc_deinit_vport_aux_device(struct iidc_rdma_vport_dev_info *vdev_info); void idpf_idc_issue_reset_event(struct iidc_rdma_core_dev_info *cdev_info); void idpf_idc_vdev_mtu_event(struct iidc_rdma_vport_dev_info *vdev_info, diff --git a/drivers/net/ethernet/intel/idpf/idpf_idc.c b/drivers/net/ethernet/intel/idpf/idpf_idc.c index bd4785fb8d3e..7e4f4ac92653 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_idc.c +++ b/drivers/net/ethernet/intel/idpf/idpf_idc.c @@ -470,10 +470,11 @@ err_privd_alloc: /** * idpf_idc_deinit_core_aux_device - de-initialize Auxiliary Device(s) - * @cdev_info: IDC core device info pointer + * @adapter: driver private data structure */ -void idpf_idc_deinit_core_aux_device(struct iidc_rdma_core_dev_info *cdev_info) +void idpf_idc_deinit_core_aux_device(struct idpf_adapter *adapter) { + struct iidc_rdma_core_dev_info *cdev_info = adapter->cdev_info; struct iidc_rdma_priv_dev_info *privd; if (!cdev_info) @@ -485,6 +486,7 @@ void idpf_idc_deinit_core_aux_device(struct iidc_rdma_core_dev_info *cdev_info) kfree(privd->mapped_mem_regions); kfree(privd); kfree(cdev_info); + adapter->cdev_info = NULL; } /** diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 252259993022..f6b3b15364ff 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -1860,13 +1860,13 @@ static int idpf_rxq_group_alloc(struct idpf_vport *vport, idpf_queue_assign(HSPLIT_EN, q, hs); idpf_queue_assign(RSC_EN, q, rsc); - bufq_set->num_refillqs = num_rxq; bufq_set->refillqs = kcalloc(num_rxq, swq_size, GFP_KERNEL); if (!bufq_set->refillqs) { err = -ENOMEM; goto err_alloc; } + bufq_set->num_refillqs = num_rxq; for (unsigned int k = 0; k < bufq_set->num_refillqs; k++) { struct idpf_sw_queue *refillq = &bufq_set->refillqs[k]; diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index d5a877e1fef8..113ecfc16dd7 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -3668,7 +3668,7 @@ void idpf_vc_core_deinit(struct idpf_adapter *adapter) idpf_ptp_release(adapter); idpf_deinit_task(adapter); - idpf_idc_deinit_core_aux_device(adapter->cdev_info); + idpf_idc_deinit_core_aux_device(adapter); idpf_rel_rx_pt_lkup(adapter); idpf_intr_rel(adapter); diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index a3845edf0e48..f0b5dd752f08 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -3053,6 +3053,11 @@ static void lan743x_phylink_mac_link_up(struct phylink_config *config, else if (speed == SPEED_100) mac_cr |= MAC_CR_CFG_L_; + if (duplex == DUPLEX_FULL) + mac_cr |= MAC_CR_DPX_; + else + mac_cr &= ~MAC_CR_DPX_; + lan743x_csr_write(adapter, MAC_CR, mac_cr); lan743x_ptp_update_latency(adapter, speed); diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 9017e806ecda..dca62fb9a3a9 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -3425,6 +3425,7 @@ static int add_adev(struct gdma_dev *gd, const char *name) struct auxiliary_device *adev; struct mana_adev *madev; int ret; + int id; madev = kzalloc_obj(*madev); if (!madev) @@ -3434,7 +3435,8 @@ static int add_adev(struct gdma_dev *gd, const char *name) ret = mana_adev_idx_alloc(); if (ret < 0) goto idx_fail; - adev->id = ret; + id = ret; + adev->id = id; adev->name = name; adev->dev.parent = gd->gdma_context->dev; @@ -3460,7 +3462,7 @@ add_fail: auxiliary_device_uninit(adev); init_fail: - mana_adev_idx_free(adev->id); + mana_adev_idx_free(id); idx_fail: kfree(madev); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 8d040e611d5a..637e635bbf03 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1719,13 +1719,18 @@ static int ionic_set_mac_address(struct net_device *netdev, void *sa) if (ether_addr_equal(netdev->dev_addr, mac)) return 0; - err = ionic_program_mac(lif, mac); - if (err < 0) - return err; + /* Only program macs for virtual functions to avoid losing the permanent + * Mac across warm reset/reboot. + */ + if (lif->ionic->pdev->is_virtfn) { + err = ionic_program_mac(lif, mac); + if (err < 0) + return err; - if (err > 0) - netdev_dbg(netdev, "%s: SET and GET ATTR Mac are not equal-due to old FW running\n", - __func__); + if (err > 0) + netdev_dbg(netdev, "%s: SET and GET ATTR Mac are not equal-due to old FW running\n", + __func__); + } err = eth_prepare_mac_addr_change(netdev, addr); if (err) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 0a3cf2f848a5..fd4e7622f123 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -962,7 +962,6 @@ static int emac_rx_packet_zc(struct prueth_emac *emac, u32 flow_id, pkt_len -= 4; cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL); psdata = cppi5_hdesc_get_psdata(desc_rx); - k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); count++; xsk_buff_set_size(xdp, pkt_len); xsk_buff_dma_sync_for_cpu(xdp); @@ -988,6 +987,7 @@ static int emac_rx_packet_zc(struct prueth_emac *emac, u32 flow_id, emac_dispatch_skb_zc(emac, xdp, psdata); xsk_buff_free(xdp); } + k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); } if (xdp_status & ICSSG_XDP_REDIR) @@ -1057,7 +1057,6 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) /* firmware adds 4 CRC bytes, strip them */ pkt_len -= 4; cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL); - k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); /* if allocation fails we drop the packet but push the * descriptor back to the ring with old page to prevent a stall @@ -1115,6 +1114,7 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) ndev->stats.rx_packets++; requeue: + k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); /* queue another RX DMA */ ret = prueth_dma_rx_push_mapped(emac, &emac->rx_chns, new_page, PRUETH_MAX_PKT_SIZE); diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index b7282f5c9632..120aeb539d9f 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -2058,6 +2058,68 @@ static const struct ethtool_ops team_ethtool_ops = { * rt netlink interface ***********************/ +/* For tx path we need a linkup && enabled port and for parse any port + * suffices. + */ +static struct team_port *team_header_port_get_rcu(struct team *team, + bool txable) +{ + struct team_port *port; + + list_for_each_entry_rcu(port, &team->port_list, list) { + if (!txable || team_port_txable(port)) + return port; + } + + return NULL; +} + +static int team_header_create(struct sk_buff *skb, struct net_device *team_dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) +{ + struct team *team = netdev_priv(team_dev); + const struct header_ops *port_ops; + struct team_port *port; + int ret = 0; + + rcu_read_lock(); + port = team_header_port_get_rcu(team, true); + if (port) { + port_ops = READ_ONCE(port->dev->header_ops); + if (port_ops && port_ops->create) + ret = port_ops->create(skb, port->dev, + type, daddr, saddr, len); + } + rcu_read_unlock(); + return ret; +} + +static int team_header_parse(const struct sk_buff *skb, + const struct net_device *team_dev, + unsigned char *haddr) +{ + struct team *team = netdev_priv(team_dev); + const struct header_ops *port_ops; + struct team_port *port; + int ret = 0; + + rcu_read_lock(); + port = team_header_port_get_rcu(team, false); + if (port) { + port_ops = READ_ONCE(port->dev->header_ops); + if (port_ops && port_ops->parse) + ret = port_ops->parse(skb, port->dev, haddr); + } + rcu_read_unlock(); + return ret; +} + +static const struct header_ops team_header_ops = { + .create = team_header_create, + .parse = team_header_parse, +}; + static void team_setup_by_port(struct net_device *dev, struct net_device *port_dev) { @@ -2066,7 +2128,8 @@ static void team_setup_by_port(struct net_device *dev, if (port_dev->type == ARPHRD_ETHER) dev->header_ops = team->header_ops_cache; else - dev->header_ops = port_dev->header_ops; + dev->header_ops = port_dev->header_ops ? + &team_header_ops : NULL; dev->type = port_dev->type; dev->hard_header_len = port_dev->hard_header_len; dev->needed_headroom = port_dev->needed_headroom; diff --git a/drivers/net/tun_vnet.h b/drivers/net/tun_vnet.h index a5f93b6c4482..fa5cab9d3e55 100644 --- a/drivers/net/tun_vnet.h +++ b/drivers/net/tun_vnet.h @@ -244,7 +244,7 @@ tun_vnet_hdr_tnl_from_skb(unsigned int flags, if (virtio_net_hdr_tnl_from_skb(skb, tnl_hdr, has_tnl_offload, tun_vnet_is_little_endian(flags), - vlan_hlen, true)) { + vlan_hlen, true, false)) { struct virtio_net_hdr_v1 *hdr = &tnl_hdr->hash_hdr.hdr; struct skb_shared_info *sinfo = skb_shinfo(skb); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 72d6a9c6a5a2..ab2108ee206a 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3267,8 +3267,12 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) struct virtio_net_hdr_v1_hash_tunnel *hdr; int num_sg; unsigned hdr_len = vi->hdr_len; + bool feature_hdrlen; bool can_push; + feature_hdrlen = virtio_has_feature(vi->vdev, + VIRTIO_NET_F_GUEST_HDRLEN); + pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); /* Make sure it's safe to cast between formats */ @@ -3288,7 +3292,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) if (virtio_net_hdr_tnl_from_skb(skb, hdr, vi->tx_tnl, virtio_is_little_endian(vi->vdev), 0, - false)) + false, feature_hdrlen)) return -EPROTO; if (vi->mergeable_rx_bufs) @@ -3351,6 +3355,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) /* Don't wait up for transmitted skbs to be freed. */ if (!use_napi) { skb_orphan(skb); + skb_dst_drop(skb); nf_reset_ct(skb); } diff --git a/drivers/pci/pwrctrl/core.c b/drivers/pci/pwrctrl/core.c index 7754baed67f2..97cff5b8ca88 100644 --- a/drivers/pci/pwrctrl/core.c +++ b/drivers/pci/pwrctrl/core.c @@ -299,8 +299,10 @@ static bool pci_pwrctrl_is_required(struct device_node *np) struct device_node *remote __free(device_node) = of_graph_get_remote_port_parent(endpoint); if (remote) { - if (of_pci_supply_present(remote)) + if (of_pci_supply_present(remote)) { + of_node_put(endpoint); return true; + } } } } diff --git a/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c b/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c index 0d0377283c37..c7e4beec160a 100644 --- a/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c +++ b/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c @@ -68,13 +68,6 @@ static int pwrseq_pwrctrl_power_off(struct pci_pwrctrl *pwrctrl) return pwrseq_power_off(pwrseq->pwrseq); } -static void devm_pwrseq_pwrctrl_power_off(void *data) -{ - struct pwrseq_pwrctrl *pwrseq = data; - - pwrseq_pwrctrl_power_off(&pwrseq->pwrctrl); -} - static int pwrseq_pwrctrl_probe(struct platform_device *pdev) { const struct pwrseq_pwrctrl_pdata *pdata; @@ -101,11 +94,6 @@ static int pwrseq_pwrctrl_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(pwrseq->pwrseq), "Failed to get the power sequencer\n"); - ret = devm_add_action_or_reset(dev, devm_pwrseq_pwrctrl_power_off, - pwrseq); - if (ret) - return ret; - pwrseq->pwrctrl.power_on = pwrseq_pwrctrl_power_on; pwrseq->pwrctrl.power_off = pwrseq_pwrctrl_power_off; diff --git a/drivers/pci/pwrctrl/slot.c b/drivers/pci/pwrctrl/slot.c index 082af81efe25..b87639253ae2 100644 --- a/drivers/pci/pwrctrl/slot.c +++ b/drivers/pci/pwrctrl/slot.c @@ -63,7 +63,6 @@ static void devm_slot_pwrctrl_release(void *data) { struct slot_pwrctrl *slot = data; - slot_pwrctrl_power_off(&slot->pwrctrl); regulator_bulk_free(slot->num_supplies, slot->supplies); } diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index d6a46fe0cda8..3f518dce6d23 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -1135,9 +1135,12 @@ int mtk_pctrl_init(struct platform_device *pdev, goto chip_error; } - ret = mtk_eint_init(pctl, pdev); - if (ret) - goto chip_error; + /* Only initialize EINT if we have EINT pins */ + if (data->eint_hw.ap_num > 0) { + ret = mtk_eint_init(pctl, pdev); + if (ret) + goto chip_error; + } return 0; diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c index 83f940fe30b2..d02d42513ebb 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c @@ -723,6 +723,21 @@ static const struct pinconf_ops pmic_gpio_pinconf_ops = { .pin_config_group_dbg_show = pmic_gpio_config_dbg_show, }; +static int pmic_gpio_get_direction(struct gpio_chip *chip, unsigned pin) +{ + struct pmic_gpio_state *state = gpiochip_get_data(chip); + struct pmic_gpio_pad *pad; + + pad = state->ctrl->desc->pins[pin].drv_data; + + if (!pad->is_enabled || pad->analog_pass || + (!pad->input_enabled && !pad->output_enabled)) + return -EINVAL; + + /* Make sure the state is aligned on what pmic_gpio_get() returns */ + return pad->input_enabled ? GPIO_LINE_DIRECTION_IN : GPIO_LINE_DIRECTION_OUT; +} + static int pmic_gpio_direction_input(struct gpio_chip *chip, unsigned pin) { struct pmic_gpio_state *state = gpiochip_get_data(chip); @@ -801,6 +816,7 @@ static void pmic_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) } static const struct gpio_chip pmic_gpio_gpio_template = { + .get_direction = pmic_gpio_get_direction, .direction_input = pmic_gpio_direction_input, .direction_output = pmic_gpio_direction_output, .get = pmic_gpio_get, diff --git a/drivers/pinctrl/renesas/pinctrl-rza1.c b/drivers/pinctrl/renesas/pinctrl-rza1.c index bc8b9b9ad05b..d2949e4dbaf7 100644 --- a/drivers/pinctrl/renesas/pinctrl-rza1.c +++ b/drivers/pinctrl/renesas/pinctrl-rza1.c @@ -589,7 +589,7 @@ static inline unsigned int rza1_get_bit(struct rza1_port *port, { void __iomem *mem = RZA1_ADDR(port->base, reg, port->id); - return ioread16(mem) & BIT(bit); + return !!(ioread16(mem) & BIT(bit)); } /** diff --git a/drivers/pinctrl/renesas/pinctrl-rzt2h.c b/drivers/pinctrl/renesas/pinctrl-rzt2h.c index 9949108a35bb..5927744c7a96 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzt2h.c +++ b/drivers/pinctrl/renesas/pinctrl-rzt2h.c @@ -85,7 +85,7 @@ struct rzt2h_pinctrl { struct gpio_chip gpio_chip; struct pinctrl_gpio_range gpio_range; DECLARE_BITMAP(used_irqs, RZT2H_INTERRUPTS_NUM); - spinlock_t lock; /* lock read/write registers */ + raw_spinlock_t lock; /* lock read/write registers */ struct mutex mutex; /* serialize adding groups and functions */ bool safety_port_enabled; atomic_t wakeup_path; @@ -145,7 +145,7 @@ static void rzt2h_pinctrl_set_pfc_mode(struct rzt2h_pinctrl *pctrl, u64 reg64; u16 reg16; - guard(spinlock_irqsave)(&pctrl->lock); + guard(raw_spinlock_irqsave)(&pctrl->lock); /* Set pin to 'Non-use (Hi-Z input protection)' */ reg16 = rzt2h_pinctrl_readw(pctrl, port, PM(port)); @@ -474,7 +474,7 @@ static int rzt2h_gpio_request(struct gpio_chip *chip, unsigned int offset) if (ret) return ret; - guard(spinlock_irqsave)(&pctrl->lock); + guard(raw_spinlock_irqsave)(&pctrl->lock); /* Select GPIO mode in PMC Register */ rzt2h_pinctrl_set_gpio_en(pctrl, port, bit, true); @@ -487,7 +487,7 @@ static void rzt2h_gpio_set_direction(struct rzt2h_pinctrl *pctrl, u32 port, { u16 reg; - guard(spinlock_irqsave)(&pctrl->lock); + guard(raw_spinlock_irqsave)(&pctrl->lock); reg = rzt2h_pinctrl_readw(pctrl, port, PM(port)); reg &= ~PM_PIN_MASK(bit); @@ -509,7 +509,7 @@ static int rzt2h_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) if (ret) return ret; - guard(spinlock_irqsave)(&pctrl->lock); + guard(raw_spinlock_irqsave)(&pctrl->lock); if (rzt2h_pinctrl_readb(pctrl, port, PMC(port)) & BIT(bit)) { /* @@ -547,7 +547,7 @@ static int rzt2h_gpio_set(struct gpio_chip *chip, unsigned int offset, u8 bit = RZT2H_PIN_ID_TO_PIN(offset); u8 reg; - guard(spinlock_irqsave)(&pctrl->lock); + guard(raw_spinlock_irqsave)(&pctrl->lock); reg = rzt2h_pinctrl_readb(pctrl, port, P(port)); if (value) @@ -833,6 +833,7 @@ static int rzt2h_gpio_register(struct rzt2h_pinctrl *pctrl) if (ret) return dev_err_probe(dev, ret, "Unable to parse gpio-ranges\n"); + of_node_put(of_args.np); if (of_args.args[0] != 0 || of_args.args[1] != 0 || of_args.args[2] != pctrl->data->n_port_pins) return dev_err_probe(dev, -EINVAL, @@ -964,7 +965,7 @@ static int rzt2h_pinctrl_probe(struct platform_device *pdev) if (ret) return ret; - spin_lock_init(&pctrl->lock); + raw_spin_lock_init(&pctrl->lock); mutex_init(&pctrl->mutex); platform_set_drvdata(pdev, pctrl); diff --git a/drivers/pinctrl/stm32/Kconfig b/drivers/pinctrl/stm32/Kconfig index 5f67e1ee66dd..d6a171523012 100644 --- a/drivers/pinctrl/stm32/Kconfig +++ b/drivers/pinctrl/stm32/Kconfig @@ -65,6 +65,7 @@ config PINCTRL_STM32_HDP select PINMUX select GENERIC_PINCONF select GPIOLIB + select GPIO_GENERIC help The Hardware Debug Port allows the observation of internal signals. It uses configurable multiplexer to route signals in a dedicated observation register. diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index c990b6118172..d3042e0c9712 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -157,6 +157,7 @@ sunxi_pinctrl_desc_find_function_by_name(struct sunxi_pinctrl *pctl, const char *pin_name, const char *func_name) { + unsigned long variant = pctl->flags & SUNXI_PINCTRL_VARIANT_MASK; int i; for (i = 0; i < pctl->desc->npins; i++) { @@ -168,7 +169,7 @@ sunxi_pinctrl_desc_find_function_by_name(struct sunxi_pinctrl *pctl, while (func->name) { if (!strcmp(func->name, func_name) && (!func->variant || - func->variant & pctl->variant)) + func->variant & variant)) return func; func++; @@ -209,6 +210,8 @@ sunxi_pinctrl_desc_find_function_by_pin_and_mux(struct sunxi_pinctrl *pctl, const u16 pin_num, const u8 muxval) { + unsigned long variant = pctl->flags & SUNXI_PINCTRL_VARIANT_MASK; + for (unsigned int i = 0; i < pctl->desc->npins; i++) { const struct sunxi_desc_pin *pin = pctl->desc->pins + i; struct sunxi_desc_function *func = pin->functions; @@ -216,7 +219,7 @@ sunxi_pinctrl_desc_find_function_by_pin_and_mux(struct sunxi_pinctrl *pctl, if (pin->pin.number != pin_num) continue; - if (pin->variant && !(pctl->variant & pin->variant)) + if (pin->variant && !(variant & pin->variant)) continue; while (func->name) { @@ -1089,6 +1092,9 @@ static int sunxi_pinctrl_irq_request_resources(struct irq_data *d) { struct sunxi_pinctrl *pctl = irq_data_get_irq_chip_data(d); struct sunxi_desc_function *func; + unsigned int offset; + u32 reg, shift, mask; + u8 disabled_mux, muxval; int ret; func = sunxi_pinctrl_desc_find_function_by_pin(pctl, @@ -1096,8 +1102,21 @@ static int sunxi_pinctrl_irq_request_resources(struct irq_data *d) if (!func) return -EINVAL; - ret = gpiochip_lock_as_irq(pctl->chip, - pctl->irq_array[d->hwirq] - pctl->desc->pin_base); + offset = pctl->irq_array[d->hwirq] - pctl->desc->pin_base; + sunxi_mux_reg(pctl, offset, ®, &shift, &mask); + muxval = (readl(pctl->membase + reg) & mask) >> shift; + + /* Change muxing to GPIO INPUT mode if at reset value */ + if (pctl->flags & SUNXI_PINCTRL_NEW_REG_LAYOUT) + disabled_mux = SUN4I_FUNC_DISABLED_NEW; + else + disabled_mux = SUN4I_FUNC_DISABLED_OLD; + + if (muxval == disabled_mux) + sunxi_pmx_set(pctl->pctl_dev, pctl->irq_array[d->hwirq], + SUN4I_FUNC_INPUT); + + ret = gpiochip_lock_as_irq(pctl->chip, offset); if (ret) { dev_err(pctl->dev, "unable to lock HW IRQ %lu for IRQ\n", irqd_to_hwirq(d)); @@ -1338,6 +1357,7 @@ static int sunxi_pinctrl_add_function(struct sunxi_pinctrl *pctl, static int sunxi_pinctrl_build_state(struct platform_device *pdev) { struct sunxi_pinctrl *pctl = platform_get_drvdata(pdev); + unsigned long variant = pctl->flags & SUNXI_PINCTRL_VARIANT_MASK; void *ptr; int i; @@ -1362,7 +1382,7 @@ static int sunxi_pinctrl_build_state(struct platform_device *pdev) const struct sunxi_desc_pin *pin = pctl->desc->pins + i; struct sunxi_pinctrl_group *group = pctl->groups + pctl->ngroups; - if (pin->variant && !(pctl->variant & pin->variant)) + if (pin->variant && !(variant & pin->variant)) continue; group->name = pin->pin.name; @@ -1387,11 +1407,11 @@ static int sunxi_pinctrl_build_state(struct platform_device *pdev) const struct sunxi_desc_pin *pin = pctl->desc->pins + i; struct sunxi_desc_function *func; - if (pin->variant && !(pctl->variant & pin->variant)) + if (pin->variant && !(variant & pin->variant)) continue; for (func = pin->functions; func->name; func++) { - if (func->variant && !(pctl->variant & func->variant)) + if (func->variant && !(variant & func->variant)) continue; /* Create interrupt mapping while we're at it */ @@ -1419,14 +1439,14 @@ static int sunxi_pinctrl_build_state(struct platform_device *pdev) const struct sunxi_desc_pin *pin = pctl->desc->pins + i; struct sunxi_desc_function *func; - if (pin->variant && !(pctl->variant & pin->variant)) + if (pin->variant && !(variant & pin->variant)) continue; for (func = pin->functions; func->name; func++) { struct sunxi_pinctrl_function *func_item; const char **func_grp; - if (func->variant && !(pctl->variant & func->variant)) + if (func->variant && !(variant & func->variant)) continue; func_item = sunxi_pinctrl_find_function_by_name(pctl, @@ -1568,7 +1588,7 @@ int sunxi_pinctrl_init_with_flags(struct platform_device *pdev, pctl->dev = &pdev->dev; pctl->desc = desc; - pctl->variant = flags & SUNXI_PINCTRL_VARIANT_MASK; + pctl->flags = flags; if (flags & SUNXI_PINCTRL_NEW_REG_LAYOUT) { pctl->bank_mem_size = D1_BANK_MEM_SIZE; pctl->pull_regs_offset = D1_PULL_REGS_OFFSET; @@ -1604,8 +1624,9 @@ int sunxi_pinctrl_init_with_flags(struct platform_device *pdev, for (i = 0, pin_idx = 0; i < pctl->desc->npins; i++) { const struct sunxi_desc_pin *pin = pctl->desc->pins + i; + unsigned long variant = pctl->flags & SUNXI_PINCTRL_VARIANT_MASK; - if (pin->variant && !(pctl->variant & pin->variant)) + if (pin->variant && !(variant & pin->variant)) continue; pins[pin_idx++] = pin->pin; diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.h b/drivers/pinctrl/sunxi/pinctrl-sunxi.h index ad26e4de16a8..0daf7600e2fb 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.h +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.h @@ -86,6 +86,8 @@ #define SUN4I_FUNC_INPUT 0 #define SUN4I_FUNC_IRQ 6 +#define SUN4I_FUNC_DISABLED_OLD 7 +#define SUN4I_FUNC_DISABLED_NEW 15 #define SUNXI_PINCTRL_VARIANT_MASK GENMASK(7, 0) #define SUNXI_PINCTRL_NEW_REG_LAYOUT BIT(8) @@ -174,7 +176,7 @@ struct sunxi_pinctrl { unsigned *irq_array; raw_spinlock_t lock; struct pinctrl_dev *pctl_dev; - unsigned long variant; + unsigned long flags; u32 bank_mem_size; u32 pull_regs_offset; u32 dlevel_field_width; diff --git a/drivers/platform/olpc/olpc-xo175-ec.c b/drivers/platform/olpc/olpc-xo175-ec.c index fa7b3bda688a..bee271a4fda1 100644 --- a/drivers/platform/olpc/olpc-xo175-ec.c +++ b/drivers/platform/olpc/olpc-xo175-ec.c @@ -482,7 +482,7 @@ static int olpc_xo175_ec_cmd(u8 cmd, u8 *inbuf, size_t inlen, u8 *resp, dev_dbg(dev, "CMD %x, %zd bytes expected\n", cmd, resp_len); if (inlen > 5) { - dev_err(dev, "command len %zd too big!\n", resp_len); + dev_err(dev, "command len %zd too big!\n", inlen); return -EOVERFLOW; } diff --git a/drivers/platform/x86/amd/hsmp/hsmp.c b/drivers/platform/x86/amd/hsmp/hsmp.c index 19f82c1d3090..631ffc0978d1 100644 --- a/drivers/platform/x86/amd/hsmp/hsmp.c +++ b/drivers/platform/x86/amd/hsmp/hsmp.c @@ -117,7 +117,7 @@ static int __hsmp_send_message(struct hsmp_socket *sock, struct hsmp_message *ms } if (unlikely(mbox_status == HSMP_STATUS_NOT_READY)) { - dev_err(sock->dev, "Message ID 0x%X failure : SMU tmeout (status = 0x%X)\n", + dev_err(sock->dev, "Message ID 0x%X failure : SMU timeout (status = 0x%X)\n", msg->msg_id, mbox_status); return -ETIMEDOUT; } else if (unlikely(mbox_status == HSMP_ERR_INVALID_MSG)) { diff --git a/drivers/platform/x86/asus-armoury.h b/drivers/platform/x86/asus-armoury.h index 208f6fe16168..569743746347 100644 --- a/drivers/platform/x86/asus-armoury.h +++ b/drivers/platform/x86/asus-armoury.h @@ -1082,6 +1082,20 @@ static const struct dmi_system_id power_limits[] = { }, { .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GA503QM"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 15, + .ppt_pl1_spl_def = 35, + .ppt_pl1_spl_max = 80, + .ppt_pl2_sppt_min = 65, + .ppt_pl2_sppt_max = 80, + }, + }, + }, + { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "GA503QR"), }, .driver_data = &(struct power_data) { @@ -1520,6 +1534,35 @@ static const struct dmi_system_id power_limits[] = { }, { .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GZ302EA"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 28, + .ppt_pl1_spl_def = 60, + .ppt_pl1_spl_max = 80, + .ppt_pl2_sppt_min = 32, + .ppt_pl2_sppt_def = 75, + .ppt_pl2_sppt_max = 92, + .ppt_pl3_fppt_min = 45, + .ppt_pl3_fppt_def = 86, + .ppt_pl3_fppt_max = 93, + }, + .dc_data = &(struct power_limits) { + .ppt_pl1_spl_min = 28, + .ppt_pl1_spl_def = 45, + .ppt_pl1_spl_max = 80, + .ppt_pl2_sppt_min = 32, + .ppt_pl2_sppt_def = 52, + .ppt_pl2_sppt_max = 92, + .ppt_pl3_fppt_min = 45, + .ppt_pl3_fppt_def = 71, + .ppt_pl3_fppt_max = 93, + }, + }, + }, + { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "G513I"), }, .driver_data = &(struct power_data) { @@ -1598,6 +1641,40 @@ static const struct dmi_system_id power_limits[] = { }, { .matches = { + DMI_MATCH(DMI_BOARD_NAME, "G614FP"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 30, + .ppt_pl1_spl_max = 120, + .ppt_pl2_sppt_min = 65, + .ppt_pl2_sppt_def = 140, + .ppt_pl2_sppt_max = 165, + .ppt_pl3_fppt_min = 65, + .ppt_pl3_fppt_def = 140, + .ppt_pl3_fppt_max = 165, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + .nv_dynamic_boost_min = 5, + .nv_dynamic_boost_max = 15, + .nv_tgp_min = 50, + .nv_tgp_max = 100, + }, + .dc_data = &(struct power_limits) { + .ppt_pl1_spl_min = 25, + .ppt_pl1_spl_max = 65, + .ppt_pl2_sppt_min = 25, + .ppt_pl2_sppt_max = 65, + .ppt_pl3_fppt_min = 35, + .ppt_pl3_fppt_max = 75, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + }, + .requires_fan_curve = true, + }, + }, + { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "G614J"), }, .driver_data = &(struct power_data) { diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index a38a65f5c550..b4677c5bba5b 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -548,7 +548,7 @@ static const struct dmi_system_id asus_quirks[] = { .callback = dmi_matched, .ident = "ASUS ROG Z13", .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_SYS_VENDOR, "ASUS"), DMI_MATCH(DMI_PRODUCT_NAME, "ROG Flow Z13"), }, .driver_data = &quirk_asus_z13, diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c index 68ede7e5757a..988a0acc9622 100644 --- a/drivers/platform/x86/hp/hp-wmi.c +++ b/drivers/platform/x86/hp/hp-wmi.c @@ -120,6 +120,13 @@ static const struct thermal_profile_params omen_v1_thermal_params = { .ec_tp_offset = HP_VICTUS_S_EC_THERMAL_PROFILE_OFFSET, }; +static const struct thermal_profile_params omen_v1_legacy_thermal_params = { + .performance = HP_OMEN_V1_THERMAL_PROFILE_PERFORMANCE, + .balanced = HP_OMEN_V1_THERMAL_PROFILE_DEFAULT, + .low_power = HP_OMEN_V1_THERMAL_PROFILE_DEFAULT, + .ec_tp_offset = HP_OMEN_EC_THERMAL_PROFILE_OFFSET, +}; + /* * A generic pointer for the currently-active board's thermal profile * parameters. @@ -176,6 +183,10 @@ static const char * const victus_thermal_profile_boards[] = { /* DMI Board names of Victus 16-r and Victus 16-s laptops */ static const struct dmi_system_id victus_s_thermal_profile_boards[] __initconst = { { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "8A4D") }, + .driver_data = (void *)&omen_v1_legacy_thermal_params, + }, + { .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BAB") }, .driver_data = (void *)&omen_v1_thermal_params, }, @@ -184,6 +195,10 @@ static const struct dmi_system_id victus_s_thermal_profile_boards[] __initconst .driver_data = (void *)&victus_s_thermal_params, }, { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BCA") }, + .driver_data = (void *)&omen_v1_thermal_params, + }, + { .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BCD") }, .driver_data = (void *)&omen_v1_thermal_params, }, @@ -196,6 +211,10 @@ static const struct dmi_system_id victus_s_thermal_profile_boards[] __initconst .driver_data = (void *)&victus_s_thermal_params, }, { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "8C76") }, + .driver_data = (void *)&omen_v1_thermal_params, + }, + { .matches = { DMI_MATCH(DMI_BOARD_NAME, "8C78") }, .driver_data = (void *)&omen_v1_thermal_params, }, diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index 95c405c8bac0..2ddd8af8c1ce 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -438,6 +438,14 @@ static int intel_hid_pl_suspend_handler(struct device *device) return 0; } +static int intel_hid_pl_freeze_handler(struct device *device) +{ + struct intel_hid_priv *priv = dev_get_drvdata(device); + + priv->wakeup_mode = false; + return intel_hid_pl_suspend_handler(device); +} + static int intel_hid_pl_resume_handler(struct device *device) { intel_hid_pm_complete(device); @@ -452,7 +460,7 @@ static int intel_hid_pl_resume_handler(struct device *device) static const struct dev_pm_ops intel_hid_pl_pm_ops = { .prepare = intel_hid_pm_prepare, .complete = intel_hid_pm_complete, - .freeze = intel_hid_pl_suspend_handler, + .freeze = intel_hid_pl_freeze_handler, .thaw = intel_hid_pl_resume_handler, .restore = intel_hid_pl_resume_handler, .suspend = intel_hid_pl_suspend_handler, diff --git a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c index b8cdaa233ea9..e238c3105c78 100644 --- a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c +++ b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c @@ -558,6 +558,9 @@ static bool disable_dynamic_sst_features(void) { u64 value; + if (!static_cpu_has(X86_FEATURE_HWP)) + return true; + rdmsrq(MSR_PM_ENABLE, value); return !(value & 0x1); } @@ -869,7 +872,7 @@ static int isst_if_get_perf_level(void __user *argp) _read_pp_info("current_level", perf_level.current_level, SST_PP_STATUS_OFFSET, SST_PP_LEVEL_START, SST_PP_LEVEL_WIDTH, SST_MUL_FACTOR_NONE) _read_pp_info("locked", perf_level.locked, SST_PP_STATUS_OFFSET, - SST_PP_LOCK_START, SST_PP_LEVEL_WIDTH, SST_MUL_FACTOR_NONE) + SST_PP_LOCK_START, SST_PP_LOCK_WIDTH, SST_MUL_FACTOR_NONE) _read_pp_info("feature_state", perf_level.feature_state, SST_PP_STATUS_OFFSET, SST_PP_FEATURE_STATE_START, SST_PP_FEATURE_STATE_WIDTH, SST_MUL_FACTOR_NONE) perf_level.enabled = !!(power_domain_info->sst_header.cap_mask & BIT(1)); diff --git a/drivers/platform/x86/lenovo/wmi-gamezone.c b/drivers/platform/x86/lenovo/wmi-gamezone.c index 381836d29a96..c7fe7e3c9f17 100644 --- a/drivers/platform/x86/lenovo/wmi-gamezone.c +++ b/drivers/platform/x86/lenovo/wmi-gamezone.c @@ -31,8 +31,6 @@ #define LWMI_GZ_METHOD_ID_SMARTFAN_SET 44 #define LWMI_GZ_METHOD_ID_SMARTFAN_GET 45 -static BLOCKING_NOTIFIER_HEAD(gz_chain_head); - struct lwmi_gz_priv { enum thermal_mode current_mode; struct notifier_block event_nb; diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index a20fce04fe79..3dd2adda195e 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -4966,7 +4966,8 @@ static void ibmvfc_discover_targets_done(struct ibmvfc_event *evt) switch (mad_status) { case IBMVFC_MAD_SUCCESS: ibmvfc_dbg(vhost, "Discover Targets succeeded\n"); - vhost->num_targets = be32_to_cpu(rsp->num_written); + vhost->num_targets = min_t(u32, be32_to_cpu(rsp->num_written), + max_targets); ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_ALLOC_TGTS); break; case IBMVFC_MAD_FAILED: diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 12124f9d5ccd..13412702188e 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -1734,7 +1734,7 @@ static int sas_user_scan(struct Scsi_Host *shost, uint channel, break; default: - if (channel < shost->max_channel) { + if (channel <= shost->max_channel) { res = scsi_scan_host_selected(shost, channel, id, lun, SCSI_SCAN_MANUAL); } else { diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c index 8e1686358e25..4c348645b04e 100644 --- a/drivers/scsi/ses.c +++ b/drivers/scsi/ses.c @@ -215,7 +215,7 @@ static unsigned char *ses_get_page2_descriptor(struct enclosure_device *edev, unsigned char *type_ptr = ses_dev->page1_types; unsigned char *desc_ptr = ses_dev->page2 + 8; - if (ses_recv_diag(sdev, 2, ses_dev->page2, ses_dev->page2_len) < 0) + if (ses_recv_diag(sdev, 2, ses_dev->page2, ses_dev->page2_len)) return NULL; for (i = 0; i < ses_dev->page1_num_types; i++, type_ptr += 4) { diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index b361c1bb3e43..45390e9b8cae 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -1009,7 +1009,7 @@ static int fsl_lpspi_probe(struct platform_device *pdev) enable_irq(irq); } - ret = devm_spi_register_controller(&pdev->dev, controller); + ret = spi_register_controller(controller); if (ret < 0) { dev_err_probe(&pdev->dev, ret, "spi_register_controller error\n"); goto free_dma; @@ -1035,6 +1035,7 @@ static void fsl_lpspi_remove(struct platform_device *pdev) struct fsl_lpspi_data *fsl_lpspi = spi_controller_get_devdata(controller); + spi_unregister_controller(controller); fsl_lpspi_dma_exit(controller); pm_runtime_dont_use_autosuspend(fsl_lpspi->dev); diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c index a7001b9e36e6..57768da3205d 100644 --- a/drivers/spi/spi-meson-spicc.c +++ b/drivers/spi/spi-meson-spicc.c @@ -1101,8 +1101,6 @@ static void meson_spicc_remove(struct platform_device *pdev) /* Disable SPI */ writel(0, spicc->base + SPICC_CONREG); - - spi_controller_put(spicc->host); } static const struct meson_spicc_data meson_spicc_gx_data = { diff --git a/drivers/spi/spi-sn-f-ospi.c b/drivers/spi/spi-sn-f-ospi.c index bfcc140df810..3c61c799723b 100644 --- a/drivers/spi/spi-sn-f-ospi.c +++ b/drivers/spi/spi-sn-f-ospi.c @@ -612,7 +612,7 @@ static int f_ospi_probe(struct platform_device *pdev) u32 num_cs = OSPI_NUM_CS; int ret; - ctlr = spi_alloc_host(dev, sizeof(*ospi)); + ctlr = devm_spi_alloc_host(dev, sizeof(*ospi)); if (!ctlr) return -ENOMEM; @@ -635,43 +635,22 @@ static int f_ospi_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ospi); ospi->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(ospi->base)) { - ret = PTR_ERR(ospi->base); - goto err_put_ctlr; - } + if (IS_ERR(ospi->base)) + return PTR_ERR(ospi->base); ospi->clk = devm_clk_get_enabled(dev, NULL); - if (IS_ERR(ospi->clk)) { - ret = PTR_ERR(ospi->clk); - goto err_put_ctlr; - } + if (IS_ERR(ospi->clk)) + return PTR_ERR(ospi->clk); - mutex_init(&ospi->mlock); - - ret = f_ospi_init(ospi); + ret = devm_mutex_init(dev, &ospi->mlock); if (ret) - goto err_destroy_mutex; + return ret; - ret = devm_spi_register_controller(dev, ctlr); + ret = f_ospi_init(ospi); if (ret) - goto err_destroy_mutex; - - return 0; - -err_destroy_mutex: - mutex_destroy(&ospi->mlock); - -err_put_ctlr: - spi_controller_put(ctlr); - - return ret; -} - -static void f_ospi_remove(struct platform_device *pdev) -{ - struct f_ospi *ospi = platform_get_drvdata(pdev); + return ret; - mutex_destroy(&ospi->mlock); + return devm_spi_register_controller(dev, ctlr); } static const struct of_device_id f_ospi_dt_ids[] = { @@ -686,7 +665,6 @@ static struct platform_driver f_ospi_driver = { .of_match_table = f_ospi_dt_ids, }, .probe = f_ospi_probe, - .remove = f_ospi_remove, }; module_platform_driver(f_ospi_driver); diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 53dee314d76a..9b1125556d29 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -50,7 +50,6 @@ static void spidev_release(struct device *dev) struct spi_device *spi = to_spi_device(dev); spi_controller_put(spi->controller); - kfree(spi->driver_override); free_percpu(spi->pcpu_statistics); kfree(spi); } @@ -73,10 +72,9 @@ static ssize_t driver_override_store(struct device *dev, struct device_attribute *a, const char *buf, size_t count) { - struct spi_device *spi = to_spi_device(dev); int ret; - ret = driver_set_override(dev, &spi->driver_override, buf, count); + ret = __device_set_driver_override(dev, buf, count); if (ret) return ret; @@ -86,13 +84,8 @@ static ssize_t driver_override_store(struct device *dev, static ssize_t driver_override_show(struct device *dev, struct device_attribute *a, char *buf) { - const struct spi_device *spi = to_spi_device(dev); - ssize_t len; - - device_lock(dev); - len = sysfs_emit(buf, "%s\n", spi->driver_override ? : ""); - device_unlock(dev); - return len; + guard(spinlock)(&dev->driver_override.lock); + return sysfs_emit(buf, "%s\n", dev->driver_override.name ?: ""); } static DEVICE_ATTR_RW(driver_override); @@ -376,10 +369,12 @@ static int spi_match_device(struct device *dev, const struct device_driver *drv) { const struct spi_device *spi = to_spi_device(dev); const struct spi_driver *sdrv = to_spi_driver(drv); + int ret; /* Check override first, and if set, only use the named driver */ - if (spi->driver_override) - return strcmp(spi->driver_override, drv->name) == 0; + ret = device_match_driver_override(dev, drv); + if (ret >= 0) + return ret; /* Attempt an OF style match */ if (of_driver_match_device(dev, drv)) @@ -3539,8 +3534,19 @@ int devm_spi_register_controller(struct device *dev, if (ret) return ret; - return devm_add_action_or_reset(dev, devm_spi_unregister_controller, ctlr); + /* + * Prevent controller from being freed by spi_unregister_controller() + * if devm_add_action_or_reset() fails for a non-devres allocated + * controller. + */ + spi_controller_get(ctlr); + + ret = devm_add_action_or_reset(dev, devm_spi_unregister_controller, ctlr); + if (ret == 0 || ctlr->devm_allocated) + spi_controller_put(ctlr); + + return ret; } EXPORT_SYMBOL_GPL(devm_spi_register_controller); diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index d668bd19fd4a..528883d989b8 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -26,6 +26,7 @@ #include <linux/slab.h> #include <linux/types.h> #include <linux/configfs.h> +#include <linux/blk-mq.h> #include <scsi/scsi.h> #include <scsi/scsi_tcq.h> #include <scsi/scsi_host.h> @@ -269,15 +270,27 @@ static int tcm_loop_device_reset(struct scsi_cmnd *sc) return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED; } +static bool tcm_loop_flush_work_iter(struct request *rq, void *data) +{ + struct scsi_cmnd *sc = blk_mq_rq_to_pdu(rq); + struct tcm_loop_cmd *tl_cmd = scsi_cmd_priv(sc); + struct se_cmd *se_cmd = &tl_cmd->tl_se_cmd; + + flush_work(&se_cmd->work); + return true; +} + static int tcm_loop_target_reset(struct scsi_cmnd *sc) { struct tcm_loop_hba *tl_hba; struct tcm_loop_tpg *tl_tpg; + struct Scsi_Host *sh = sc->device->host; + int ret; /* * Locate the tcm_loop_hba_t pointer */ - tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host); + tl_hba = *(struct tcm_loop_hba **)shost_priv(sh); if (!tl_hba) { pr_err("Unable to perform device reset without active I_T Nexus\n"); return FAILED; @@ -286,11 +299,38 @@ static int tcm_loop_target_reset(struct scsi_cmnd *sc) * Locate the tl_tpg pointer from TargetID in sc->device->id */ tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id]; - if (tl_tpg) { - tl_tpg->tl_transport_status = TCM_TRANSPORT_ONLINE; - return SUCCESS; - } - return FAILED; + if (!tl_tpg) + return FAILED; + + /* + * Issue a LUN_RESET to drain all commands that the target core + * knows about. This handles commands not yet marked CMD_T_COMPLETE. + */ + ret = tcm_loop_issue_tmr(tl_tpg, sc->device->lun, 0, TMR_LUN_RESET); + if (ret != TMR_FUNCTION_COMPLETE) + return FAILED; + + /* + * Flush any deferred target core completion work that may still be + * queued. Commands that already had CMD_T_COMPLETE set before the TMR + * are skipped by the TMR drain, but their async completion work + * (transport_lun_remove_cmd → percpu_ref_put, release_cmd → scsi_done) + * may still be pending in target_completion_wq. + * + * The SCSI EH will reuse in-flight scsi_cmnd structures for recovery + * commands (e.g. TUR) immediately after this handler returns SUCCESS — + * if deferred work is still pending, the memset in queuecommand would + * zero the se_cmd while the work accesses it, leaking the LUN + * percpu_ref and hanging configfs unlink forever. + * + * Use blk_mq_tagset_busy_iter() to find all started requests and + * flush_work() on each — the same pattern used by mpi3mr, scsi_debug, + * and other SCSI drivers to drain outstanding commands during reset. + */ + blk_mq_tagset_busy_iter(&sh->tag_set, tcm_loop_flush_work_iter, NULL); + + tl_tpg->tl_transport_status = TCM_TRANSPORT_ONLINE; + return SUCCESS; } static const struct scsi_host_template tcm_loop_driver_template = { diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 3ae1f7137d9d..3d593af30aa5 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -276,7 +276,7 @@ fd_execute_rw_aio(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, ssize_t len = 0; int ret = 0, i; - aio_cmd = kmalloc_flex(*aio_cmd, bvecs, sgl_nents); + aio_cmd = kzalloc_flex(*aio_cmd, bvecs, sgl_nents); if (!aio_cmd) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_soc_slider.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_soc_slider.c index 49ff3bae7271..91f291627132 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_soc_slider.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_soc_slider.c @@ -176,15 +176,21 @@ static inline void write_soc_slider(struct proc_thermal_device *proc_priv, u64 v static void set_soc_power_profile(struct proc_thermal_device *proc_priv, int slider) { + u8 offset; u64 val; val = read_soc_slider(proc_priv); val &= ~SLIDER_MASK; val |= FIELD_PREP(SLIDER_MASK, slider) | BIT(SLIDER_ENABLE_BIT); + if (slider == SOC_SLIDER_VALUE_MINIMUM || slider == SOC_SLIDER_VALUE_MAXIMUM) + offset = 0; + else + offset = slider_offset; + /* Set the slider offset from module params */ val &= ~SLIDER_OFFSET_MASK; - val |= FIELD_PREP(SLIDER_OFFSET_MASK, slider_offset); + val |= FIELD_PREP(SLIDER_OFFSET_MASK, offset); write_soc_slider(proc_priv, val); } diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c index 478beafc6ac3..b1d658b8f7b5 100644 --- a/drivers/vfio/pci/vfio_pci_dmabuf.c +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c @@ -301,11 +301,10 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags, */ ret = dma_buf_fd(priv->dmabuf, get_dma_buf.open_flags); if (ret < 0) - goto err_dma_buf; + dma_buf_put(priv->dmabuf); + return ret; -err_dma_buf: - dma_buf_put(priv->dmabuf); err_dev_put: vfio_device_put_registration(&vdev->vdev); err_free_phys: diff --git a/drivers/virt/coco/tdx-guest/tdx-guest.c b/drivers/virt/coco/tdx-guest/tdx-guest.c index 4252b147593a..7cee97559ba2 100644 --- a/drivers/virt/coco/tdx-guest/tdx-guest.c +++ b/drivers/virt/coco/tdx-guest/tdx-guest.c @@ -171,6 +171,8 @@ static void tdx_mr_deinit(const struct attribute_group *mr_grp) #define GET_QUOTE_SUCCESS 0 #define GET_QUOTE_IN_FLIGHT 0xffffffffffffffff +#define TDX_QUOTE_MAX_LEN (GET_QUOTE_BUF_SIZE - sizeof(struct tdx_quote_buf)) + /* struct tdx_quote_buf: Format of Quote request buffer. * @version: Quote format version, filled by TD. * @status: Status code of Quote request, filled by VMM. @@ -269,6 +271,7 @@ static int tdx_report_new_locked(struct tsm_report *report, void *data) u8 *buf; struct tdx_quote_buf *quote_buf = quote_data; struct tsm_report_desc *desc = &report->desc; + u32 out_len; int ret; u64 err; @@ -306,12 +309,17 @@ static int tdx_report_new_locked(struct tsm_report *report, void *data) return ret; } - buf = kvmemdup(quote_buf->data, quote_buf->out_len, GFP_KERNEL); + out_len = READ_ONCE(quote_buf->out_len); + + if (out_len > TDX_QUOTE_MAX_LEN) + return -EFBIG; + + buf = kvmemdup(quote_buf->data, out_len, GFP_KERNEL); if (!buf) return -ENOMEM; report->outblob = buf; - report->outblob_len = quote_buf->out_len; + report->outblob_len = out_len; /* * TODO: parse the PEM-formatted cert chain out of the quote buffer when diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 335692d41617..fbca7ce1c6bf 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2912,10 +2912,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); * @data: the token identifying the buffer. * @gfp: how to do memory allocations (if necessary). * - * Same as virtqueue_add_inbuf but passes DMA_ATTR_CPU_CACHE_CLEAN to indicate - * that the CPU will not dirty any cacheline overlapping this buffer while it - * is available, and to suppress overlapping cacheline warnings in DMA debug - * builds. + * Same as virtqueue_add_inbuf but passes DMA_ATTR_DEBUGGING_IGNORE_CACHELINES + * to indicate that the CPU will not dirty any cacheline overlapping this buffer + * while it is available, and to suppress overlapping cacheline warnings in DMA + * debug builds. * * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). @@ -2928,7 +2928,7 @@ int virtqueue_add_inbuf_cache_clean(struct virtqueue *vq, gfp_t gfp) { return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp, - DMA_ATTR_CPU_CACHE_CLEAN); + DMA_ATTR_DEBUGGING_IGNORE_CACHELINES); } EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_cache_clean); diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 1759cc18753f..bbf9ee21306c 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -12,6 +12,7 @@ #include <linux/eventfd.h> #include <linux/file.h> #include <linux/kernel.h> +#include <linux/kstrtox.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/poll.h> @@ -30,7 +31,10 @@ #include <linux/seq_file.h> #include <linux/miscdevice.h> #include <linux/moduleparam.h> +#include <linux/notifier.h> +#include <linux/security.h> #include <linux/virtio_mmio.h> +#include <linux/wait.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> @@ -46,6 +50,7 @@ #include <xen/page.h> #include <xen/xen-ops.h> #include <xen/balloon.h> +#include <xen/xenbus.h> #ifdef CONFIG_XEN_ACPI #include <xen/acpi.h> #endif @@ -68,10 +73,20 @@ module_param_named(dm_op_buf_max_size, privcmd_dm_op_buf_max_size, uint, MODULE_PARM_DESC(dm_op_buf_max_size, "Maximum size of a dm_op hypercall buffer"); +static bool unrestricted; +module_param(unrestricted, bool, 0); +MODULE_PARM_DESC(unrestricted, + "Don't restrict hypercalls to target domain if running in a domU"); + struct privcmd_data { domid_t domid; }; +/* DOMID_INVALID implies no restriction */ +static domid_t target_domain = DOMID_INVALID; +static bool restrict_wait; +static DECLARE_WAIT_QUEUE_HEAD(restrict_wait_wq); + static int privcmd_vma_range_is_mapped( struct vm_area_struct *vma, unsigned long addr, @@ -1563,13 +1578,16 @@ static long privcmd_ioctl(struct file *file, static int privcmd_open(struct inode *ino, struct file *file) { - struct privcmd_data *data = kzalloc_obj(*data); + struct privcmd_data *data; + + if (wait_event_interruptible(restrict_wait_wq, !restrict_wait) < 0) + return -EINTR; + data = kzalloc_obj(*data); if (!data) return -ENOMEM; - /* DOMID_INVALID implies no restriction */ - data->domid = DOMID_INVALID; + data->domid = target_domain; file->private_data = data; return 0; @@ -1662,6 +1680,52 @@ static struct miscdevice privcmd_dev = { .fops = &xen_privcmd_fops, }; +static int init_restrict(struct notifier_block *notifier, + unsigned long event, + void *data) +{ + char *target; + unsigned int domid; + + /* Default to an guaranteed unused domain-id. */ + target_domain = DOMID_IDLE; + + target = xenbus_read(XBT_NIL, "target", "", NULL); + if (IS_ERR(target) || kstrtouint(target, 10, &domid)) { + pr_err("No target domain found, blocking all hypercalls\n"); + goto out; + } + + target_domain = domid; + + out: + if (!IS_ERR(target)) + kfree(target); + + restrict_wait = false; + wake_up_all(&restrict_wait_wq); + + return NOTIFY_DONE; +} + +static struct notifier_block xenstore_notifier = { + .notifier_call = init_restrict, +}; + +static void __init restrict_driver(void) +{ + if (unrestricted) { + if (security_locked_down(LOCKDOWN_XEN_USER_ACTIONS)) + pr_warn("Kernel is locked down, parameter \"unrestricted\" ignored\n"); + else + return; + } + + restrict_wait = true; + + register_xenstore_notifier(&xenstore_notifier); +} + static int __init privcmd_init(void) { int err; @@ -1669,6 +1733,9 @@ static int __init privcmd_init(void) if (!xen_domain()) return -ENODEV; + if (!xen_initial_domain()) + restrict_driver(); + err = misc_register(&privcmd_dev); if (err != 0) { pr_err("Could not register Xen privcmd device\n"); diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index ebf5079096af..c0d17a369bda 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -4583,7 +4583,7 @@ static void check_removing_space_info(struct btrfs_space_info *space_info) for (int i = 0; i < BTRFS_SPACE_INFO_SUB_GROUP_MAX; i++) { if (space_info->sub_group[i]) { check_removing_space_info(space_info->sub_group[i]); - kfree(space_info->sub_group[i]); + btrfs_sysfs_remove_space_info(space_info->sub_group[i]); space_info->sub_group[i] = NULL; } } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 01f2dbb69832..1b0eb246b714 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2531,8 +2531,8 @@ int btrfs_validate_super(const struct btrfs_fs_info *fs_info, if (mirror_num >= 0 && btrfs_super_bytenr(sb) != btrfs_sb_offset(mirror_num)) { - btrfs_err(fs_info, "super offset mismatch %llu != %u", - btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET); + btrfs_err(fs_info, "super offset mismatch %llu != %llu", + btrfs_super_bytenr(sb), btrfs_sb_offset(mirror_num)); ret = -EINVAL; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ab0d460c7139..ac871efb9763 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4616,21 +4616,32 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, struct inode *inode, bool log_inode_only, u64 logged_isize) { + u64 gen = BTRFS_I(inode)->generation; u64 flags; if (log_inode_only) { - /* set the generation to zero so the recover code - * can tell the difference between an logging - * just to say 'this inode exists' and a logging - * to say 'update this inode with these values' + /* + * Set the generation to zero so the recover code can tell the + * difference between a logging just to say 'this inode exists' + * and a logging to say 'update this inode with these values'. + * But only if the inode was not already logged before. + * We access ->logged_trans directly since it was already set + * up in the call chain by btrfs_log_inode(), and data_race() + * to avoid false alerts from KCSAN and since it was set already + * and one can set it to 0 since that only happens on eviction + * and we are holding a ref on the inode. */ - btrfs_set_inode_generation(leaf, item, 0); + ASSERT(data_race(BTRFS_I(inode)->logged_trans) > 0); + if (data_race(BTRFS_I(inode)->logged_trans) < trans->transid) + gen = 0; + btrfs_set_inode_size(leaf, item, logged_isize); } else { - btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); btrfs_set_inode_size(leaf, item, inode->i_size); } + btrfs_set_inode_generation(leaf, item, gen); + btrfs_set_inode_uid(leaf, item, i_uid_read(inode)); btrfs_set_inode_gid(leaf, item, i_gid_read(inode)); btrfs_set_inode_mode(leaf, item, inode->i_mode); @@ -5448,42 +5459,63 @@ process: return 0; } -static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode, - struct btrfs_path *path, u64 *size_ret) +static int get_inode_size_to_log(struct btrfs_trans_handle *trans, + struct btrfs_inode *inode, + struct btrfs_path *path, u64 *size_ret) { struct btrfs_key key; + struct btrfs_inode_item *item; int ret; key.objectid = btrfs_ino(inode); key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - ret = btrfs_search_slot(NULL, log, &key, path, 0, 0); - if (ret < 0) { - return ret; - } else if (ret > 0) { - *size_ret = 0; - } else { - struct btrfs_inode_item *item; + /* + * Our caller called inode_logged(), so logged_trans is up to date. + * Use data_race() to silence any warning from KCSAN. Once logged_trans + * is set, it can only be reset to 0 after inode eviction. + */ + if (data_race(inode->logged_trans) == trans->transid) { + ret = btrfs_search_slot(NULL, inode->root->log_root, &key, path, 0, 0); + } else if (inode->generation < trans->transid) { + path->search_commit_root = true; + path->skip_locking = true; + ret = btrfs_search_slot(NULL, inode->root, &key, path, 0, 0); + path->search_commit_root = false; + path->skip_locking = false; - item = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_inode_item); - *size_ret = btrfs_inode_size(path->nodes[0], item); - /* - * If the in-memory inode's i_size is smaller then the inode - * size stored in the btree, return the inode's i_size, so - * that we get a correct inode size after replaying the log - * when before a power failure we had a shrinking truncate - * followed by addition of a new name (rename / new hard link). - * Otherwise return the inode size from the btree, to avoid - * data loss when replaying a log due to previously doing a - * write that expands the inode's size and logging a new name - * immediately after. - */ - if (*size_ret > inode->vfs_inode.i_size) - *size_ret = inode->vfs_inode.i_size; + } else { + *size_ret = 0; + return 0; } + /* + * If the inode was logged before or is from a past transaction, then + * its inode item must exist in the log root or in the commit root. + */ + ASSERT(ret <= 0); + if (WARN_ON_ONCE(ret > 0)) + ret = -ENOENT; + + if (ret < 0) + return ret; + + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + *size_ret = btrfs_inode_size(path->nodes[0], item); + /* + * If the in-memory inode's i_size is smaller then the inode size stored + * in the btree, return the inode's i_size, so that we get a correct + * inode size after replaying the log when before a power failure we had + * a shrinking truncate followed by addition of a new name (rename / new + * hard link). Otherwise return the inode size from the btree, to avoid + * data loss when replaying a log due to previously doing a write that + * expands the inode's size and logging a new name immediately after. + */ + if (*size_ret > inode->vfs_inode.i_size) + *size_ret = inode->vfs_inode.i_size; + btrfs_release_path(path); return 0; } @@ -6996,7 +7028,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, ret = drop_inode_items(trans, log, path, inode, BTRFS_XATTR_ITEM_KEY); } else { - if (inode_only == LOG_INODE_EXISTS && ctx->logged_before) { + if (inode_only == LOG_INODE_EXISTS) { /* * Make sure the new inode item we write to the log has * the same isize as the current one (if it exists). @@ -7010,7 +7042,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, * (zeroes), as if an expanding truncate happened, * instead of getting a file of 4Kb only. */ - ret = logged_inode_size(log, inode, path, &logged_isize); + ret = get_inode_size_to_log(trans, inode, path, &logged_isize); if (ret) goto out_unlock; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ab51e6ecfdef..6b8e810a35ce 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -8099,8 +8099,9 @@ int btrfs_run_dev_stats(struct btrfs_trans_handle *trans) smp_rmb(); ret = update_dev_stat_item(trans, device); - if (!ret) - atomic_sub(stats_cnt, &device->dev_stats_ccnt); + if (ret) + break; + atomic_sub(stats_cnt, &device->dev_stats_ccnt); } mutex_unlock(&fs_devices->device_list_mutex); diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 1b7544aa88a2..c676e715b4f8 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -308,7 +308,9 @@ int zlib_compress_bio(struct list_head *ws, struct compressed_bio *cb) } /* Queue the remaining part of the folio. */ if (workspace->strm.total_out > bio->bi_iter.bi_size) { - u32 cur_len = offset_in_folio(out_folio, workspace->strm.total_out); + const u32 cur_len = workspace->strm.total_out - bio->bi_iter.bi_size; + + ASSERT(cur_len <= folio_size(out_folio)); if (!bio_add_folio(bio, out_folio, cur_len, 0)) { ret = -E2BIG; diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig index a9f645f57bb2..97c48ebe8458 100644 --- a/fs/erofs/Kconfig +++ b/fs/erofs/Kconfig @@ -16,22 +16,36 @@ config EROFS_FS select ZLIB_INFLATE if EROFS_FS_ZIP_DEFLATE select ZSTD_DECOMPRESS if EROFS_FS_ZIP_ZSTD help - EROFS (Enhanced Read-Only File System) is a lightweight read-only - file system with modern designs (e.g. no buffer heads, inline - xattrs/data, chunk-based deduplication, multiple devices, etc.) for - scenarios which need high-performance read-only solutions, e.g. - smartphones with Android OS, LiveCDs and high-density hosts with - numerous containers; - - It also provides transparent compression and deduplication support to - improve storage density and maintain relatively high compression - ratios, and it implements in-place decompression to temporarily reuse - page cache for compressed data using proper strategies, which is - quite useful for ensuring guaranteed end-to-end runtime decompression + EROFS (Enhanced Read-Only File System) is a modern, lightweight, + secure read-only filesystem for various use cases, such as immutable + system images, container images, application sandboxes, and datasets. + + EROFS uses a flexible, hierarchical on-disk design so that features + can be enabled on demand: the core on-disk format is block-aligned in + order to perform optimally on all kinds of devices, including block + and memory-backed devices; the format is easy to parse and has zero + metadata redundancy, unlike generic filesystems, making it ideal for + filesystem auditing and remote access; inline data, random-access + friendly directory data, inline/shared extended attributes and + chunk-based deduplication ensure space efficiency while maintaining + high performance. + + Optionally, it supports multiple devices to reference external data, + enabling data sharing for container images. + + It also has advanced encoded on-disk layouts, particularly for data + compression and fine-grained deduplication. It utilizes fixed-size + output compression to improve storage density while keeping relatively + high compression ratios. Furthermore, it implements in-place + decompression to reuse file pages to keep compressed data temporarily + with proper strategies, which ensures guaranteed end-to-end runtime performance under extreme memory pressure without extra cost. - See the documentation at <file:Documentation/filesystems/erofs.rst> - and the web pages at <https://erofs.docs.kernel.org> for more details. + For more details, see the web pages at <https://erofs.docs.kernel.org> + and the documentation at <file:Documentation/filesystems/erofs.rst>. + + To compile EROFS filesystem support as a module, choose M here. The + module will be called erofs. If unsure, say N. @@ -105,7 +119,8 @@ config EROFS_FS_ZIP depends on EROFS_FS default y help - Enable transparent compression support for EROFS file systems. + Enable EROFS compression layouts so that filesystems containing + compressed files can be parsed by the kernel. If you don't want to enable compression feature, say N. diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c index abe873f01297..98cdaa1cd1a7 100644 --- a/fs/erofs/fileio.c +++ b/fs/erofs/fileio.c @@ -25,10 +25,8 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret) container_of(iocb, struct erofs_fileio_rq, iocb); struct folio_iter fi; - if (ret >= 0 && ret != rq->bio.bi_iter.bi_size) { - bio_advance(&rq->bio, ret); - zero_fill_bio(&rq->bio); - } + if (ret >= 0 && ret != rq->bio.bi_iter.bi_size) + ret = -EIO; if (!rq->bio.bi_end_io) { bio_for_each_folio_all(fi, &rq->bio) { DBG_BUGON(folio_test_uptodate(fi.folio)); diff --git a/fs/erofs/ishare.c b/fs/erofs/ishare.c index 829d50d5c717..ec433bacc592 100644 --- a/fs/erofs/ishare.c +++ b/fs/erofs/ishare.c @@ -200,8 +200,19 @@ struct inode *erofs_real_inode(struct inode *inode, bool *need_iput) int __init erofs_init_ishare(void) { - erofs_ishare_mnt = kern_mount(&erofs_anon_fs_type); - return PTR_ERR_OR_ZERO(erofs_ishare_mnt); + struct vfsmount *mnt; + int ret; + + mnt = kern_mount(&erofs_anon_fs_type); + if (IS_ERR(mnt)) + return PTR_ERR(mnt); + /* generic_fadvise() doesn't work if s_bdi == &noop_backing_dev_info */ + ret = super_setup_bdi(mnt->mnt_sb); + if (ret) + kern_unmount(mnt); + else + erofs_ishare_mnt = mnt; + return ret; } void erofs_exit_ishare(void) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 3977e42b9516..fe8121df9ef2 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1445,6 +1445,7 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, int bios) { struct erofs_sb_info *const sbi = EROFS_SB(io->sb); + int gfp_flag; /* wake up the caller thread for sync decompression */ if (io->sync) { @@ -1477,7 +1478,9 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, sbi->sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON; return; } + gfp_flag = memalloc_noio_save(); z_erofs_decompressqueue_work(&io->u.work); + memalloc_noio_restore(gfp_flag); } static void z_erofs_fill_bio_vec(struct bio_vec *bvec, diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 72206a292676..3baee4e7c1cf 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -14,7 +14,8 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \ ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o -ext4-inode-test-objs += inode-test.o -obj-$(CONFIG_EXT4_KUNIT_TESTS) += ext4-inode-test.o +ext4-test-objs += inode-test.o mballoc-test.o \ + extents-test.o +obj-$(CONFIG_EXT4_KUNIT_TESTS) += ext4-test.o ext4-$(CONFIG_FS_VERITY) += verity.o ext4-$(CONFIG_FS_ENCRYPTION) += crypto.o diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index cf0a0970c095..f41f320f4437 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -163,10 +163,17 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, */ if (handle) { + /* + * Since the inode is new it is ok to pass the + * XATTR_CREATE flag. This is necessary to match the + * remaining journal credits check in the set_handle + * function with the credits allocated for the new + * inode. + */ res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION, EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, - ctx, len, 0); + ctx, len, XATTR_CREATE); if (!res) { ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); ext4_clear_inode_state(inode, diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 293f698b7042..7617e2d454ea 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1570,6 +1570,7 @@ struct ext4_sb_info { struct proc_dir_entry *s_proc; struct kobject s_kobj; struct completion s_kobj_unregister; + struct mutex s_error_notify_mutex; /* protects sysfs_notify vs kobject_del */ struct super_block *s_sb; struct buffer_head *s_mmp_bh; @@ -3944,6 +3945,11 @@ static inline bool ext4_inode_can_atomic_write(struct inode *inode) extern int ext4_block_write_begin(handle_t *handle, struct folio *folio, loff_t pos, unsigned len, get_block_t *get_block); + +#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS) +#define EXPORT_SYMBOL_FOR_EXT4_TEST(sym) \ + EXPORT_SYMBOL_FOR_MODULES(sym, "ext4-test") +#endif #endif /* __KERNEL__ */ #endif /* _EXT4_H */ diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index c484125d963f..ebaf7cc42430 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -264,5 +264,17 @@ static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, 0xffff); } +extern int __ext4_ext_dirty(const char *where, unsigned int line, + handle_t *handle, struct inode *inode, + struct ext4_ext_path *path); +extern int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex); +#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS) +extern int ext4_ext_space_root_idx_test(struct inode *inode, int check); +extern struct ext4_ext_path *ext4_split_convert_extents_test( + handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, + struct ext4_ext_path *path, + int flags, unsigned int *allocated); +#endif #endif /* _EXT4_EXTENTS */ diff --git a/fs/ext4/extents-test.c b/fs/ext4/extents-test.c index 7c4690eb7dad..5496b2c8e2cd 100644 --- a/fs/ext4/extents-test.c +++ b/fs/ext4/extents-test.c @@ -142,8 +142,10 @@ static struct file_system_type ext_fs_type = { static void extents_kunit_exit(struct kunit *test) { - struct ext4_sb_info *sbi = k_ctx.k_ei->vfs_inode.i_sb->s_fs_info; + struct super_block *sb = k_ctx.k_ei->vfs_inode.i_sb; + struct ext4_sb_info *sbi = sb->s_fs_info; + ext4_es_unregister_shrinker(sbi); kfree(sbi); kfree(k_ctx.k_ei); kfree(k_ctx.k_data); @@ -280,8 +282,8 @@ static int extents_kunit_init(struct kunit *test) eh->eh_depth = 0; eh->eh_entries = cpu_to_le16(1); eh->eh_magic = EXT4_EXT_MAGIC; - eh->eh_max = - cpu_to_le16(ext4_ext_space_root_idx(&k_ctx.k_ei->vfs_inode, 0)); + eh->eh_max = cpu_to_le16(ext4_ext_space_root_idx_test( + &k_ctx.k_ei->vfs_inode, 0)); eh->eh_generation = 0; /* @@ -384,8 +386,8 @@ static void test_split_convert(struct kunit *test) switch (param->type) { case TEST_SPLIT_CONVERT: - path = ext4_split_convert_extents(NULL, inode, &map, path, - param->split_flags, NULL); + path = ext4_split_convert_extents_test(NULL, inode, &map, + path, param->split_flags, NULL); break; case TEST_CREATE_BLOCKS: ext4_map_create_blocks_helper(test, inode, &map, param->split_flags); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ae3804f36535..8cce1479be6d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -184,9 +184,9 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode, * - ENOMEM * - EIO */ -static int __ext4_ext_dirty(const char *where, unsigned int line, - handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) +int __ext4_ext_dirty(const char *where, unsigned int line, + handle_t *handle, struct inode *inode, + struct ext4_ext_path *path) { int err; @@ -1736,6 +1736,13 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, err = ext4_ext_get_access(handle, inode, path + k); if (err) return err; + if (unlikely(path[k].p_idx > EXT_LAST_INDEX(path[k].p_hdr))) { + EXT4_ERROR_INODE(inode, + "path[%d].p_idx %p > EXT_LAST_INDEX %p", + k, path[k].p_idx, + EXT_LAST_INDEX(path[k].p_hdr)); + return -EFSCORRUPTED; + } path[k].p_idx->ei_block = border; err = ext4_ext_dirty(handle, inode, path + k); if (err) @@ -1748,6 +1755,14 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, err = ext4_ext_get_access(handle, inode, path + k); if (err) goto clean; + if (unlikely(path[k].p_idx > EXT_LAST_INDEX(path[k].p_hdr))) { + EXT4_ERROR_INODE(inode, + "path[%d].p_idx %p > EXT_LAST_INDEX %p", + k, path[k].p_idx, + EXT_LAST_INDEX(path[k].p_hdr)); + err = -EFSCORRUPTED; + goto clean; + } path[k].p_idx->ei_block = border; err = ext4_ext_dirty(handle, inode, path + k); if (err) @@ -3144,7 +3159,7 @@ static void ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex) } /* FIXME!! we need to try to merge to left or right after zero-out */ -static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) +int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) { ext4_fsblk_t ee_pblock; unsigned int ee_len; @@ -3239,6 +3254,9 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, insert_err = PTR_ERR(path); err = 0; + if (insert_err != -ENOSPC && insert_err != -EDQUOT && + insert_err != -ENOMEM) + goto out_path; /* * Get a new path to try to zeroout or fix the extent length. @@ -3255,13 +3273,20 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, goto out_path; } + depth = ext_depth(inode); + ex = path[depth].p_ext; + if (!ex) { + EXT4_ERROR_INODE(inode, + "bad extent address lblock: %lu, depth: %d pblock %llu", + (unsigned long)ee_block, depth, path[depth].p_block); + err = -EFSCORRUPTED; + goto out; + } + err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; - depth = ext_depth(inode); - ex = path[depth].p_ext; - fix_extent_len: ex->ee_len = orig_ex.ee_len; err = ext4_ext_dirty(handle, inode, path + path->p_depth); @@ -3363,7 +3388,7 @@ static int ext4_split_extent_zeroout(handle_t *handle, struct inode *inode, ext4_ext_mark_initialized(ex); - ext4_ext_dirty(handle, inode, path + depth); + err = ext4_ext_dirty(handle, inode, path + depth); if (err) return err; @@ -4457,9 +4482,13 @@ got_allocated_blocks: path = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (IS_ERR(path)) { err = PTR_ERR(path); - if (allocated_clusters) { + /* + * Gracefully handle out of space conditions. If the filesystem + * is inconsistent, we'll just leak allocated blocks to avoid + * causing even more damage. + */ + if (allocated_clusters && (err == -EDQUOT || err == -ENOSPC)) { int fb_flags = 0; - /* * free data blocks we just allocated. * not a good idea to call discard here directly, @@ -6238,6 +6267,33 @@ out: return 0; } -#ifdef CONFIG_EXT4_KUNIT_TESTS -#include "extents-test.c" +#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS) +int ext4_ext_space_root_idx_test(struct inode *inode, int check) +{ + return ext4_ext_space_root_idx(inode, check); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_space_root_idx_test); + +struct ext4_ext_path *ext4_split_convert_extents_test(handle_t *handle, + struct inode *inode, struct ext4_map_blocks *map, + struct ext4_ext_path *path, int flags, + unsigned int *allocated) +{ + return ext4_split_convert_extents(handle, inode, map, path, + flags, allocated); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_split_convert_extents_test); + +EXPORT_SYMBOL_FOR_EXT4_TEST(__ext4_ext_dirty); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_zeroout); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_register_shrinker); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_unregister_shrinker); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_map_create_blocks); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_init_tree); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_lookup_extent); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_insert_extent); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_insert_extent); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_find_extent); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_issue_zeroout); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_map_query_blocks); #endif diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index f575751f1cae..2f0057e04934 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -975,13 +975,13 @@ static int ext4_fc_flush_data(journal_t *journal) int ret = 0; list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { - ret = jbd2_submit_inode_data(journal, ei->jinode); + ret = jbd2_submit_inode_data(journal, READ_ONCE(ei->jinode)); if (ret) return ret; } list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { - ret = jbd2_wait_inode_data(journal, ei->jinode); + ret = jbd2_wait_inode_data(journal, READ_ONCE(ei->jinode)); if (ret) return ret; } @@ -1613,19 +1613,21 @@ static int ext4_fc_replay_inode(struct super_block *sb, /* Immediately update the inode on disk. */ ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); if (ret) - goto out; + goto out_brelse; ret = sync_dirty_buffer(iloc.bh); if (ret) - goto out; + goto out_brelse; ret = ext4_mark_inode_used(sb, ino); if (ret) - goto out; + goto out_brelse; /* Given that we just wrote the inode on disk, this SHOULD succeed. */ inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); if (IS_ERR(inode)) { ext4_debug("Inode not found."); - return -EFSCORRUPTED; + inode = NULL; + ret = -EFSCORRUPTED; + goto out_brelse; } /* @@ -1642,13 +1644,14 @@ static int ext4_fc_replay_inode(struct super_block *sb, ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode)); ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); sync_dirty_buffer(iloc.bh); +out_brelse: brelse(iloc.bh); out: iput(inode); if (!ret) blkdev_issue_flush(sb->s_bdev); - return 0; + return ret; } /* diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index e476c6de3074..bd8f230fa507 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -83,11 +83,23 @@ static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end, int datasync, bool *needs_barrier) { struct inode *inode = file->f_inode; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = 0, + }; int ret; ret = generic_buffers_fsync_noflush(file, start, end, datasync); - if (!ret) - ret = ext4_sync_parent(inode); + if (ret) + return ret; + + /* Force writeout of inode table buffer to disk */ + ret = ext4_write_inode(inode, &wbc); + if (ret) + return ret; + + ret = ext4_sync_parent(inode); + if (test_opt(inode->i_sb, BARRIER)) *needs_barrier = true; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index b20a1bf866ab..b1bc1950c9f0 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -686,6 +686,12 @@ static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino) if (unlikely(!gdp)) return 0; + /* Inode was never used in this filesystem? */ + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT) || + ino >= EXT4_INODES_PER_GROUP(sb) - ext4_itable_unused_count(sb, gdp))) + return 0; + bh = sb_find_get_block(sb, ext4_inode_table(sb, gdp) + (ino / inodes_per_block)); if (!bh || !buffer_uptodate(bh)) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 1f6bc05593df..408677fa8196 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -522,7 +522,15 @@ static int ext4_read_inline_folio(struct inode *inode, struct folio *folio) goto out; len = min_t(size_t, ext4_get_inline_size(inode), i_size_read(inode)); - BUG_ON(len > PAGE_SIZE); + + if (len > PAGE_SIZE) { + ext4_error_inode(inode, __func__, __LINE__, 0, + "inline size %zu exceeds PAGE_SIZE", len); + ret = -EFSCORRUPTED; + brelse(iloc.bh); + goto out; + } + kaddr = kmap_local_folio(folio, 0); ret = ext4_read_inline_data(inode, kaddr, len, &iloc); kaddr = folio_zero_tail(folio, len, kaddr + len); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 396dc3a5d16b..1123d995494b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -128,6 +128,8 @@ void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, static inline int ext4_begin_ordered_truncate(struct inode *inode, loff_t new_size) { + struct jbd2_inode *jinode = READ_ONCE(EXT4_I(inode)->jinode); + trace_ext4_begin_ordered_truncate(inode, new_size); /* * If jinode is zero, then we never opened the file for @@ -135,10 +137,10 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, * jbd2_journal_begin_ordered_truncate() since there's no * outstanding writes we need to flush. */ - if (!EXT4_I(inode)->jinode) + if (!jinode) return 0; return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode), - EXT4_I(inode)->jinode, + jinode, new_size); } @@ -184,6 +186,14 @@ void ext4_evict_inode(struct inode *inode) if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL) ext4_evict_ea_inode(inode); if (inode->i_nlink) { + /* + * If there's dirty page will lead to data loss, user + * could see stale data. + */ + if (unlikely(!ext4_emergency_state(inode->i_sb) && + mapping_tagged(&inode->i_data, PAGECACHE_TAG_DIRTY))) + ext4_warning_inode(inode, "data will be lost"); + truncate_inode_pages_final(&inode->i_data); goto no_delete; @@ -4451,8 +4461,13 @@ int ext4_inode_attach_jinode(struct inode *inode) spin_unlock(&inode->i_lock); return -ENOMEM; } - ei->jinode = jinode; - jbd2_journal_init_jbd_inode(ei->jinode, inode); + jbd2_journal_init_jbd_inode(jinode, inode); + /* + * Publish ->jinode only after it is fully initialized so that + * readers never observe a partially initialized jbd2_inode. + */ + smp_wmb(); + WRITE_ONCE(ei->jinode, jinode); jinode = NULL; } spin_unlock(&inode->i_lock); @@ -5401,18 +5416,36 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, inode->i_op = &ext4_encrypted_symlink_inode_operations; } else if (ext4_inode_is_fast_symlink(inode)) { inode->i_op = &ext4_fast_symlink_inode_operations; - if (inode->i_size == 0 || - inode->i_size >= sizeof(ei->i_data) || - strnlen((char *)ei->i_data, inode->i_size + 1) != - inode->i_size) { - ext4_error_inode(inode, function, line, 0, - "invalid fast symlink length %llu", - (unsigned long long)inode->i_size); - ret = -EFSCORRUPTED; - goto bad_inode; + + /* + * Orphan cleanup can see inodes with i_size == 0 + * and i_data uninitialized. Skip size checks in + * that case. This is safe because the first thing + * ext4_evict_inode() does for fast symlinks is + * clearing of i_data and i_size. + */ + if ((EXT4_SB(sb)->s_mount_state & EXT4_ORPHAN_FS)) { + if (inode->i_nlink != 0) { + ext4_error_inode(inode, function, line, 0, + "invalid orphan symlink nlink %d", + inode->i_nlink); + ret = -EFSCORRUPTED; + goto bad_inode; + } + } else { + if (inode->i_size == 0 || + inode->i_size >= sizeof(ei->i_data) || + strnlen((char *)ei->i_data, inode->i_size + 1) != + inode->i_size) { + ext4_error_inode(inode, function, line, 0, + "invalid fast symlink length %llu", + (unsigned long long)inode->i_size); + ret = -EFSCORRUPTED; + goto bad_inode; + } + inode_set_cached_link(inode, (char *)ei->i_data, + inode->i_size); } - inode_set_cached_link(inode, (char *)ei->i_data, - inode->i_size); } else { inode->i_op = &ext4_symlink_inode_operations; } @@ -5849,6 +5882,18 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (attr->ia_size == inode->i_size) inc_ivers = false; + /* + * If file has inline data but new size exceeds inline capacity, + * convert to extent-based storage first to prevent inconsistent + * state (inline flag set but size exceeds inline capacity). + */ + if (ext4_has_inline_data(inode) && + attr->ia_size > EXT4_I(inode)->i_inline_size) { + error = ext4_convert_inline_data(inode); + if (error) + goto err_out; + } + if (shrink) { if (ext4_should_order_data(inode)) { error = ext4_begin_ordered_truncate(inode, diff --git a/fs/ext4/mballoc-test.c b/fs/ext4/mballoc-test.c index 9fbdf6a09489..6f5bfbb0e8a4 100644 --- a/fs/ext4/mballoc-test.c +++ b/fs/ext4/mballoc-test.c @@ -8,6 +8,7 @@ #include <linux/random.h> #include "ext4.h" +#include "mballoc.h" struct mbt_grp_ctx { struct buffer_head bitmap_bh; @@ -336,7 +337,7 @@ ext4_mb_mark_context_stub(handle_t *handle, struct super_block *sb, bool state, if (state) mb_set_bits(bitmap_bh->b_data, blkoff, len); else - mb_clear_bits(bitmap_bh->b_data, blkoff, len); + mb_clear_bits_test(bitmap_bh->b_data, blkoff, len); return 0; } @@ -413,14 +414,14 @@ static void test_new_blocks_simple(struct kunit *test) /* get block at goal */ ar.goal = ext4_group_first_block_no(sb, goal_group); - found = ext4_mb_new_blocks_simple(&ar, &err); + found = ext4_mb_new_blocks_simple_test(&ar, &err); KUNIT_ASSERT_EQ_MSG(test, ar.goal, found, "failed to alloc block at goal, expected %llu found %llu", ar.goal, found); /* get block after goal in goal group */ ar.goal = ext4_group_first_block_no(sb, goal_group); - found = ext4_mb_new_blocks_simple(&ar, &err); + found = ext4_mb_new_blocks_simple_test(&ar, &err); KUNIT_ASSERT_EQ_MSG(test, ar.goal + EXT4_C2B(sbi, 1), found, "failed to alloc block after goal in goal group, expected %llu found %llu", ar.goal + 1, found); @@ -428,7 +429,7 @@ static void test_new_blocks_simple(struct kunit *test) /* get block after goal group */ mbt_ctx_mark_used(sb, goal_group, 0, EXT4_CLUSTERS_PER_GROUP(sb)); ar.goal = ext4_group_first_block_no(sb, goal_group); - found = ext4_mb_new_blocks_simple(&ar, &err); + found = ext4_mb_new_blocks_simple_test(&ar, &err); KUNIT_ASSERT_EQ_MSG(test, ext4_group_first_block_no(sb, goal_group + 1), found, "failed to alloc block after goal group, expected %llu found %llu", @@ -438,7 +439,7 @@ static void test_new_blocks_simple(struct kunit *test) for (i = goal_group; i < ext4_get_groups_count(sb); i++) mbt_ctx_mark_used(sb, i, 0, EXT4_CLUSTERS_PER_GROUP(sb)); ar.goal = ext4_group_first_block_no(sb, goal_group); - found = ext4_mb_new_blocks_simple(&ar, &err); + found = ext4_mb_new_blocks_simple_test(&ar, &err); KUNIT_ASSERT_EQ_MSG(test, ext4_group_first_block_no(sb, 0) + EXT4_C2B(sbi, 1), found, "failed to alloc block before goal group, expected %llu found %llu", @@ -448,7 +449,7 @@ static void test_new_blocks_simple(struct kunit *test) for (i = 0; i < ext4_get_groups_count(sb); i++) mbt_ctx_mark_used(sb, i, 0, EXT4_CLUSTERS_PER_GROUP(sb)); ar.goal = ext4_group_first_block_no(sb, goal_group); - found = ext4_mb_new_blocks_simple(&ar, &err); + found = ext4_mb_new_blocks_simple_test(&ar, &err); KUNIT_ASSERT_NE_MSG(test, err, 0, "unexpectedly get block when no block is available"); } @@ -492,16 +493,16 @@ validate_free_blocks_simple(struct kunit *test, struct super_block *sb, continue; bitmap = mbt_ctx_bitmap(sb, i); - bit = mb_find_next_zero_bit(bitmap, max, 0); + bit = mb_find_next_zero_bit_test(bitmap, max, 0); KUNIT_ASSERT_EQ_MSG(test, bit, max, "free block on unexpected group %d", i); } bitmap = mbt_ctx_bitmap(sb, goal_group); - bit = mb_find_next_zero_bit(bitmap, max, 0); + bit = mb_find_next_zero_bit_test(bitmap, max, 0); KUNIT_ASSERT_EQ(test, bit, start); - bit = mb_find_next_bit(bitmap, max, bit + 1); + bit = mb_find_next_bit_test(bitmap, max, bit + 1); KUNIT_ASSERT_EQ(test, bit, start + len); } @@ -524,7 +525,7 @@ test_free_blocks_simple_range(struct kunit *test, ext4_group_t goal_group, block = ext4_group_first_block_no(sb, goal_group) + EXT4_C2B(sbi, start); - ext4_free_blocks_simple(inode, block, len); + ext4_free_blocks_simple_test(inode, block, len); validate_free_blocks_simple(test, sb, goal_group, start, len); mbt_ctx_mark_used(sb, goal_group, 0, EXT4_CLUSTERS_PER_GROUP(sb)); } @@ -566,15 +567,15 @@ test_mark_diskspace_used_range(struct kunit *test, bitmap = mbt_ctx_bitmap(sb, TEST_GOAL_GROUP); memset(bitmap, 0, sb->s_blocksize); - ret = ext4_mb_mark_diskspace_used(ac, NULL); + ret = ext4_mb_mark_diskspace_used_test(ac, NULL); KUNIT_ASSERT_EQ(test, ret, 0); max = EXT4_CLUSTERS_PER_GROUP(sb); - i = mb_find_next_bit(bitmap, max, 0); + i = mb_find_next_bit_test(bitmap, max, 0); KUNIT_ASSERT_EQ(test, i, start); - i = mb_find_next_zero_bit(bitmap, max, i + 1); + i = mb_find_next_zero_bit_test(bitmap, max, i + 1); KUNIT_ASSERT_EQ(test, i, start + len); - i = mb_find_next_bit(bitmap, max, i + 1); + i = mb_find_next_bit_test(bitmap, max, i + 1); KUNIT_ASSERT_EQ(test, max, i); } @@ -617,54 +618,54 @@ static void mbt_generate_buddy(struct super_block *sb, void *buddy, max = EXT4_CLUSTERS_PER_GROUP(sb); bb_h = buddy + sbi->s_mb_offsets[1]; - off = mb_find_next_zero_bit(bb, max, 0); + off = mb_find_next_zero_bit_test(bb, max, 0); grp->bb_first_free = off; while (off < max) { grp->bb_counters[0]++; grp->bb_free++; - if (!(off & 1) && !mb_test_bit(off + 1, bb)) { + if (!(off & 1) && !mb_test_bit_test(off + 1, bb)) { grp->bb_free++; grp->bb_counters[0]--; - mb_clear_bit(off >> 1, bb_h); + mb_clear_bit_test(off >> 1, bb_h); grp->bb_counters[1]++; grp->bb_largest_free_order = 1; off++; } - off = mb_find_next_zero_bit(bb, max, off + 1); + off = mb_find_next_zero_bit_test(bb, max, off + 1); } for (order = 1; order < MB_NUM_ORDERS(sb) - 1; order++) { bb = buddy + sbi->s_mb_offsets[order]; bb_h = buddy + sbi->s_mb_offsets[order + 1]; max = max >> 1; - off = mb_find_next_zero_bit(bb, max, 0); + off = mb_find_next_zero_bit_test(bb, max, 0); while (off < max) { - if (!(off & 1) && !mb_test_bit(off + 1, bb)) { + if (!(off & 1) && !mb_test_bit_test(off + 1, bb)) { mb_set_bits(bb, off, 2); grp->bb_counters[order] -= 2; - mb_clear_bit(off >> 1, bb_h); + mb_clear_bit_test(off >> 1, bb_h); grp->bb_counters[order + 1]++; grp->bb_largest_free_order = order + 1; off++; } - off = mb_find_next_zero_bit(bb, max, off + 1); + off = mb_find_next_zero_bit_test(bb, max, off + 1); } } max = EXT4_CLUSTERS_PER_GROUP(sb); - off = mb_find_next_zero_bit(bitmap, max, 0); + off = mb_find_next_zero_bit_test(bitmap, max, 0); while (off < max) { grp->bb_fragments++; - off = mb_find_next_bit(bitmap, max, off + 1); + off = mb_find_next_bit_test(bitmap, max, off + 1); if (off + 1 >= max) break; - off = mb_find_next_zero_bit(bitmap, max, off + 1); + off = mb_find_next_zero_bit_test(bitmap, max, off + 1); } } @@ -706,7 +707,7 @@ do_test_generate_buddy(struct kunit *test, struct super_block *sb, void *bitmap, /* needed by validation in ext4_mb_generate_buddy */ ext4_grp->bb_free = mbt_grp->bb_free; memset(ext4_buddy, 0xff, sb->s_blocksize); - ext4_mb_generate_buddy(sb, ext4_buddy, bitmap, TEST_GOAL_GROUP, + ext4_mb_generate_buddy_test(sb, ext4_buddy, bitmap, TEST_GOAL_GROUP, ext4_grp); KUNIT_ASSERT_EQ(test, memcmp(mbt_buddy, ext4_buddy, sb->s_blocksize), @@ -760,7 +761,7 @@ test_mb_mark_used_range(struct kunit *test, struct ext4_buddy *e4b, ex.fe_group = TEST_GOAL_GROUP; ext4_lock_group(sb, TEST_GOAL_GROUP); - mb_mark_used(e4b, &ex); + mb_mark_used_test(e4b, &ex); ext4_unlock_group(sb, TEST_GOAL_GROUP); mb_set_bits(bitmap, start, len); @@ -769,7 +770,7 @@ test_mb_mark_used_range(struct kunit *test, struct ext4_buddy *e4b, memset(buddy, 0xff, sb->s_blocksize); for (i = 0; i < MB_NUM_ORDERS(sb); i++) grp->bb_counters[i] = 0; - ext4_mb_generate_buddy(sb, buddy, bitmap, 0, grp); + ext4_mb_generate_buddy_test(sb, buddy, bitmap, 0, grp); KUNIT_ASSERT_EQ(test, memcmp(buddy, e4b->bd_buddy, sb->s_blocksize), 0); @@ -798,7 +799,7 @@ static void test_mb_mark_used(struct kunit *test) bb_counters[MB_NUM_ORDERS(sb)]), GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, grp); - ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b); + ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b); KUNIT_ASSERT_EQ(test, ret, 0); grp->bb_free = EXT4_CLUSTERS_PER_GROUP(sb); @@ -809,7 +810,7 @@ static void test_mb_mark_used(struct kunit *test) test_mb_mark_used_range(test, &e4b, ranges[i].start, ranges[i].len, bitmap, buddy, grp); - ext4_mb_unload_buddy(&e4b); + ext4_mb_unload_buddy_test(&e4b); } static void @@ -825,16 +826,16 @@ test_mb_free_blocks_range(struct kunit *test, struct ext4_buddy *e4b, return; ext4_lock_group(sb, e4b->bd_group); - mb_free_blocks(NULL, e4b, start, len); + mb_free_blocks_test(NULL, e4b, start, len); ext4_unlock_group(sb, e4b->bd_group); - mb_clear_bits(bitmap, start, len); + mb_clear_bits_test(bitmap, start, len); /* bypass bb_free validatoin in ext4_mb_generate_buddy */ grp->bb_free += len; memset(buddy, 0xff, sb->s_blocksize); for (i = 0; i < MB_NUM_ORDERS(sb); i++) grp->bb_counters[i] = 0; - ext4_mb_generate_buddy(sb, buddy, bitmap, 0, grp); + ext4_mb_generate_buddy_test(sb, buddy, bitmap, 0, grp); KUNIT_ASSERT_EQ(test, memcmp(buddy, e4b->bd_buddy, sb->s_blocksize), 0); @@ -865,7 +866,7 @@ static void test_mb_free_blocks(struct kunit *test) bb_counters[MB_NUM_ORDERS(sb)]), GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, grp); - ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b); + ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b); KUNIT_ASSERT_EQ(test, ret, 0); ex.fe_start = 0; @@ -873,7 +874,7 @@ static void test_mb_free_blocks(struct kunit *test) ex.fe_group = TEST_GOAL_GROUP; ext4_lock_group(sb, TEST_GOAL_GROUP); - mb_mark_used(&e4b, &ex); + mb_mark_used_test(&e4b, &ex); ext4_unlock_group(sb, TEST_GOAL_GROUP); grp->bb_free = 0; @@ -886,7 +887,7 @@ static void test_mb_free_blocks(struct kunit *test) test_mb_free_blocks_range(test, &e4b, ranges[i].start, ranges[i].len, bitmap, buddy, grp); - ext4_mb_unload_buddy(&e4b); + ext4_mb_unload_buddy_test(&e4b); } #define COUNT_FOR_ESTIMATE 100000 @@ -904,7 +905,7 @@ static void test_mb_mark_used_cost(struct kunit *test) if (sb->s_blocksize > PAGE_SIZE) kunit_skip(test, "blocksize exceeds pagesize"); - ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b); + ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b); KUNIT_ASSERT_EQ(test, ret, 0); ex.fe_group = TEST_GOAL_GROUP; @@ -918,7 +919,7 @@ static void test_mb_mark_used_cost(struct kunit *test) ex.fe_start = ranges[i].start; ex.fe_len = ranges[i].len; ext4_lock_group(sb, TEST_GOAL_GROUP); - mb_mark_used(&e4b, &ex); + mb_mark_used_test(&e4b, &ex); ext4_unlock_group(sb, TEST_GOAL_GROUP); } end = jiffies; @@ -929,14 +930,14 @@ static void test_mb_mark_used_cost(struct kunit *test) continue; ext4_lock_group(sb, TEST_GOAL_GROUP); - mb_free_blocks(NULL, &e4b, ranges[i].start, + mb_free_blocks_test(NULL, &e4b, ranges[i].start, ranges[i].len); ext4_unlock_group(sb, TEST_GOAL_GROUP); } } kunit_info(test, "costed jiffies %lu\n", all); - ext4_mb_unload_buddy(&e4b); + ext4_mb_unload_buddy_test(&e4b); } static const struct mbt_ext4_block_layout mbt_test_layouts[] = { diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 20e9fdaf4301..bb58eafb87bc 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1199,6 +1199,8 @@ static int ext4_mb_scan_groups(struct ext4_allocation_context *ac) /* searching for the right group start from the goal value specified */ start = ac->ac_g_ex.fe_group; + if (start >= ngroups) + start = 0; ac->ac_prefetch_grp = start; ac->ac_prefetch_nr = 0; @@ -2443,8 +2445,12 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, return 0; err = ext4_mb_load_buddy(ac->ac_sb, group, e4b); - if (err) + if (err) { + if (EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info) && + !(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) + return 0; return err; + } ext4_lock_group(ac->ac_sb, group); if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) @@ -3580,9 +3586,7 @@ err_freebuddy: rcu_read_unlock(); iput(sbi->s_buddy_cache); err_freesgi: - rcu_read_lock(); - kvfree(rcu_dereference(sbi->s_group_info)); - rcu_read_unlock(); + kvfree(rcu_access_pointer(sbi->s_group_info)); return -ENOMEM; } @@ -3889,15 +3893,14 @@ void ext4_mb_release(struct super_block *sb) struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); int count; - if (test_opt(sb, DISCARD)) { - /* - * wait the discard work to drain all of ext4_free_data - */ - flush_work(&sbi->s_discard_work); - WARN_ON_ONCE(!list_empty(&sbi->s_discard_list)); - } + /* + * wait the discard work to drain all of ext4_free_data + */ + flush_work(&sbi->s_discard_work); + WARN_ON_ONCE(!list_empty(&sbi->s_discard_list)); - if (sbi->s_group_info) { + group_info = rcu_access_pointer(sbi->s_group_info); + if (group_info) { for (i = 0; i < ngroups; i++) { cond_resched(); grinfo = ext4_get_group_info(sb, i); @@ -3915,12 +3918,9 @@ void ext4_mb_release(struct super_block *sb) num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); - rcu_read_lock(); - group_info = rcu_dereference(sbi->s_group_info); for (i = 0; i < num_meta_group_infos; i++) kfree(group_info[i]); kvfree(group_info); - rcu_read_unlock(); } ext4_mb_avg_fragment_size_destroy(sbi); ext4_mb_largest_free_orders_destroy(sbi); @@ -4084,7 +4084,7 @@ void ext4_exit_mballoc(void) #define EXT4_MB_BITMAP_MARKED_CHECK 0x0001 #define EXT4_MB_SYNC_UPDATE 0x0002 -static int +int ext4_mb_mark_context(handle_t *handle, struct super_block *sb, bool state, ext4_group_t group, ext4_grpblk_t blkoff, ext4_grpblk_t len, int flags, ext4_grpblk_t *ret_changed) @@ -7188,6 +7188,102 @@ out_unload: return error; } -#ifdef CONFIG_EXT4_KUNIT_TESTS -#include "mballoc-test.c" +#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS) +void mb_clear_bits_test(void *bm, int cur, int len) +{ + mb_clear_bits(bm, cur, len); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_clear_bits_test); + +ext4_fsblk_t +ext4_mb_new_blocks_simple_test(struct ext4_allocation_request *ar, + int *errp) +{ + return ext4_mb_new_blocks_simple(ar, errp); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_new_blocks_simple_test); + +int mb_find_next_zero_bit_test(void *addr, int max, int start) +{ + return mb_find_next_zero_bit(addr, max, start); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_find_next_zero_bit_test); + +int mb_find_next_bit_test(void *addr, int max, int start) +{ + return mb_find_next_bit(addr, max, start); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_find_next_bit_test); + +void mb_clear_bit_test(int bit, void *addr) +{ + mb_clear_bit(bit, addr); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_clear_bit_test); + +int mb_test_bit_test(int bit, void *addr) +{ + return mb_test_bit(bit, addr); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_test_bit_test); + +int ext4_mb_mark_diskspace_used_test(struct ext4_allocation_context *ac, + handle_t *handle) +{ + return ext4_mb_mark_diskspace_used(ac, handle); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_mark_diskspace_used_test); + +int mb_mark_used_test(struct ext4_buddy *e4b, struct ext4_free_extent *ex) +{ + return mb_mark_used(e4b, ex); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_mark_used_test); + +void ext4_mb_generate_buddy_test(struct super_block *sb, void *buddy, + void *bitmap, ext4_group_t group, + struct ext4_group_info *grp) +{ + ext4_mb_generate_buddy(sb, buddy, bitmap, group, grp); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_generate_buddy_test); + +int ext4_mb_load_buddy_test(struct super_block *sb, ext4_group_t group, + struct ext4_buddy *e4b) +{ + return ext4_mb_load_buddy(sb, group, e4b); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_load_buddy_test); + +void ext4_mb_unload_buddy_test(struct ext4_buddy *e4b) +{ + ext4_mb_unload_buddy(e4b); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_unload_buddy_test); + +void mb_free_blocks_test(struct inode *inode, struct ext4_buddy *e4b, + int first, int count) +{ + mb_free_blocks(inode, e4b, first, count); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_free_blocks_test); + +void ext4_free_blocks_simple_test(struct inode *inode, ext4_fsblk_t block, + unsigned long count) +{ + return ext4_free_blocks_simple(inode, block, count); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_free_blocks_simple_test); + +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_wait_block_bitmap); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_init); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_get_group_desc); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_count_free_clusters); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_get_group_info); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_free_group_clusters_set); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_release); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_read_block_bitmap_nowait); +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_set_bits); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_fc_init_inode); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_mark_context); #endif diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 15a049f05d04..39333ce72cbd 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -270,4 +270,34 @@ ext4_mballoc_query_range( ext4_mballoc_query_range_fn formatter, void *priv); +extern int ext4_mb_mark_context(handle_t *handle, + struct super_block *sb, bool state, + ext4_group_t group, ext4_grpblk_t blkoff, + ext4_grpblk_t len, int flags, + ext4_grpblk_t *ret_changed); +#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS) +extern void mb_clear_bits_test(void *bm, int cur, int len); +extern ext4_fsblk_t +ext4_mb_new_blocks_simple_test(struct ext4_allocation_request *ar, + int *errp); +extern int mb_find_next_zero_bit_test(void *addr, int max, int start); +extern int mb_find_next_bit_test(void *addr, int max, int start); +extern void mb_clear_bit_test(int bit, void *addr); +extern int mb_test_bit_test(int bit, void *addr); +extern int +ext4_mb_mark_diskspace_used_test(struct ext4_allocation_context *ac, + handle_t *handle); +extern int mb_mark_used_test(struct ext4_buddy *e4b, + struct ext4_free_extent *ex); +extern void ext4_mb_generate_buddy_test(struct super_block *sb, + void *buddy, void *bitmap, ext4_group_t group, + struct ext4_group_info *grp); +extern int ext4_mb_load_buddy_test(struct super_block *sb, + ext4_group_t group, struct ext4_buddy *e4b); +extern void ext4_mb_unload_buddy_test(struct ext4_buddy *e4b); +extern void mb_free_blocks_test(struct inode *inode, + struct ext4_buddy *e4b, int first, int count); +extern void ext4_free_blocks_simple_test(struct inode *inode, + ext4_fsblk_t block, unsigned long count); +#endif #endif diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index a8c95eee91b7..39fe50b3c662 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -524,9 +524,15 @@ int ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *folio, nr_to_submit++; } while ((bh = bh->b_this_page) != head); - /* Nothing to submit? Just unlock the folio... */ - if (!nr_to_submit) + if (!nr_to_submit) { + /* + * We have nothing to submit. Just cycle the folio through + * writeback state to properly update xarray tags. + */ + __folio_start_writeback(folio, keep_towrite); + folio_end_writeback(folio); return 0; + } bh = head = folio_buffers(folio); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 43f680c750ae..a34efb44e73d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1254,12 +1254,10 @@ static void ext4_group_desc_free(struct ext4_sb_info *sbi) struct buffer_head **group_desc; int i; - rcu_read_lock(); - group_desc = rcu_dereference(sbi->s_group_desc); + group_desc = rcu_access_pointer(sbi->s_group_desc); for (i = 0; i < sbi->s_gdb_count; i++) brelse(group_desc[i]); kvfree(group_desc); - rcu_read_unlock(); } static void ext4_flex_groups_free(struct ext4_sb_info *sbi) @@ -1267,14 +1265,12 @@ static void ext4_flex_groups_free(struct ext4_sb_info *sbi) struct flex_groups **flex_groups; int i; - rcu_read_lock(); - flex_groups = rcu_dereference(sbi->s_flex_groups); + flex_groups = rcu_access_pointer(sbi->s_flex_groups); if (flex_groups) { for (i = 0; i < sbi->s_flex_groups_allocated; i++) kvfree(flex_groups[i]); kvfree(flex_groups); } - rcu_read_unlock(); } static void ext4_put_super(struct super_block *sb) @@ -1527,6 +1523,27 @@ void ext4_clear_inode(struct inode *inode) invalidate_inode_buffers(inode); clear_inode(inode); ext4_discard_preallocations(inode); + /* + * We must remove the inode from the hash before ext4_free_inode() + * clears the bit in inode bitmap as otherwise another process reusing + * the inode will block in insert_inode_hash() waiting for inode + * eviction to complete while holding transaction handle open, but + * ext4_evict_inode() still running for that inode could block waiting + * for transaction commit if the inode is marked as IS_SYNC => deadlock. + * + * Removing the inode from the hash here is safe. There are two cases + * to consider: + * 1) The inode still has references to it (i_nlink > 0). In that case + * we are keeping the inode and once we remove the inode from the hash, + * iget() can create the new inode structure for the same inode number + * and we are fine with that as all IO on behalf of the inode is + * finished. + * 2) We are deleting the inode (i_nlink == 0). In that case inode + * number cannot be reused until ext4_free_inode() clears the bit in + * the inode bitmap, at which point all IO is done and reuse is fine + * again. + */ + remove_inode_hash(inode); ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); dquot_drop(inode); if (EXT4_I(inode)->jinode) { @@ -3633,6 +3650,13 @@ int ext4_feature_set_ok(struct super_block *sb, int readonly) "extents feature\n"); return 0; } + if (ext4_has_feature_bigalloc(sb) && + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { + ext4_msg(sb, KERN_WARNING, + "bad geometry: bigalloc file system with non-zero " + "first_data_block\n"); + return 0; + } #if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2) if (!readonly && (ext4_has_feature_quota(sb) || @@ -5403,6 +5427,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) timer_setup(&sbi->s_err_report, print_daily_error_info, 0); spin_lock_init(&sbi->s_error_lock); + mutex_init(&sbi->s_error_notify_mutex); INIT_WORK(&sbi->s_sb_upd_work, update_super_work); err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed); diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index b87d7bdab06a..923b375e017f 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -597,7 +597,10 @@ static const struct kobj_type ext4_feat_ktype = { void ext4_notify_error_sysfs(struct ext4_sb_info *sbi) { - sysfs_notify(&sbi->s_kobj, NULL, "errors_count"); + mutex_lock(&sbi->s_error_notify_mutex); + if (sbi->s_kobj.state_in_sysfs) + sysfs_notify(&sbi->s_kobj, NULL, "errors_count"); + mutex_unlock(&sbi->s_error_notify_mutex); } static struct kobject *ext4_root; @@ -610,8 +613,10 @@ int ext4_register_sysfs(struct super_block *sb) int err; init_completion(&sbi->s_kobj_unregister); + mutex_lock(&sbi->s_error_notify_mutex); err = kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, ext4_root, "%s", sb->s_id); + mutex_unlock(&sbi->s_error_notify_mutex); if (err) { kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); @@ -644,7 +649,10 @@ void ext4_unregister_sysfs(struct super_block *sb) if (sbi->s_proc) remove_proc_subtree(sb->s_id, ext4_proc_root); + + mutex_lock(&sbi->s_error_notify_mutex); kobject_del(&sbi->s_kobj); + mutex_unlock(&sbi->s_error_notify_mutex); } int __init ext4_init_sysfs(void) diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index de89c5bef607..1508e2f54462 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -267,7 +267,15 @@ restart: */ BUFFER_TRACE(bh, "queue"); get_bh(bh); - J_ASSERT_BH(bh, !buffer_jwrite(bh)); + if (WARN_ON_ONCE(buffer_jwrite(bh))) { + put_bh(bh); /* drop the ref we just took */ + spin_unlock(&journal->j_list_lock); + /* Clean up any previously batched buffers */ + if (batch_count) + __flush_batch(journal, &batch_count); + jbd2_journal_abort(journal, -EFSCORRUPTED); + return -EFSCORRUPTED; + } journal->j_chkpt_bhs[batch_count++] = bh; transaction->t_chp_stats.cs_written++; transaction->t_checkpoint_list = jh->b_cpnext; @@ -325,7 +333,10 @@ int jbd2_cleanup_journal_tail(journal_t *journal) if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr)) return 1; - J_ASSERT(blocknr != 0); + if (WARN_ON_ONCE(blocknr == 0)) { + jbd2_journal_abort(journal, -EFSCORRUPTED); + return -EFSCORRUPTED; + } /* * We need to make sure that any blocks that were recently written out diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 758611ee4475..13cb60b52bd6 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -1146,15 +1146,15 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, return -EOVERFLOW; /* - * With metacopy disabled, we fsync after final metadata copyup, for + * With "fsync=strict", we fsync after final metadata copyup, for * both regular files and directories to get atomic copyup semantics * on filesystems that do not use strict metadata ordering (e.g. ubifs). * - * With metacopy enabled we want to avoid fsync on all meta copyup + * By default, we want to avoid fsync on all meta copyup, because * that will hurt performance of workloads such as chown -R, so we * only fsync on data copyup as legacy behavior. */ - ctx.metadata_fsync = !OVL_FS(dentry->d_sb)->config.metacopy && + ctx.metadata_fsync = ovl_should_sync_metadata(OVL_FS(dentry->d_sb)) && (S_ISREG(ctx.stat.mode) || S_ISDIR(ctx.stat.mode)); ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index cad2055ebf18..63b299bf12f7 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -99,6 +99,12 @@ enum { OVL_VERITY_REQUIRE, }; +enum { + OVL_FSYNC_VOLATILE, + OVL_FSYNC_AUTO, + OVL_FSYNC_STRICT, +}; + /* * The tuple (fh,uuid) is a universal unique identifier for a copy up origin, * where: @@ -656,6 +662,21 @@ static inline bool ovl_xino_warn(struct ovl_fs *ofs) return ofs->config.xino == OVL_XINO_ON; } +static inline bool ovl_should_sync(struct ovl_fs *ofs) +{ + return ofs->config.fsync_mode != OVL_FSYNC_VOLATILE; +} + +static inline bool ovl_should_sync_metadata(struct ovl_fs *ofs) +{ + return ofs->config.fsync_mode == OVL_FSYNC_STRICT; +} + +static inline bool ovl_is_volatile(struct ovl_config *config) +{ + return config->fsync_mode == OVL_FSYNC_VOLATILE; +} + /* * To avoid regressions in existing setups with overlay lower offline changes, * we allow lower changes only if none of the new features are used. diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 1d4828dbcf7a..80cad4ea96a3 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -18,7 +18,7 @@ struct ovl_config { int xino; bool metacopy; bool userxattr; - bool ovl_volatile; + int fsync_mode; }; struct ovl_sb { @@ -120,11 +120,6 @@ static inline struct ovl_fs *OVL_FS(struct super_block *sb) return (struct ovl_fs *)sb->s_fs_info; } -static inline bool ovl_should_sync(struct ovl_fs *ofs) -{ - return !ofs->config.ovl_volatile; -} - static inline unsigned int ovl_numlower(struct ovl_entry *oe) { return oe ? oe->__numlower : 0; diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c index 8111b437ae5d..c93fcaa45d4a 100644 --- a/fs/overlayfs/params.c +++ b/fs/overlayfs/params.c @@ -58,6 +58,7 @@ enum ovl_opt { Opt_xino, Opt_metacopy, Opt_verity, + Opt_fsync, Opt_volatile, Opt_override_creds, }; @@ -140,6 +141,23 @@ static int ovl_verity_mode_def(void) return OVL_VERITY_OFF; } +static const struct constant_table ovl_parameter_fsync[] = { + { "volatile", OVL_FSYNC_VOLATILE }, + { "auto", OVL_FSYNC_AUTO }, + { "strict", OVL_FSYNC_STRICT }, + {} +}; + +static const char *ovl_fsync_mode(struct ovl_config *config) +{ + return ovl_parameter_fsync[config->fsync_mode].name; +} + +static int ovl_fsync_mode_def(void) +{ + return OVL_FSYNC_AUTO; +} + const struct fs_parameter_spec ovl_parameter_spec[] = { fsparam_string_empty("lowerdir", Opt_lowerdir), fsparam_file_or_string("lowerdir+", Opt_lowerdir_add), @@ -155,6 +173,7 @@ const struct fs_parameter_spec ovl_parameter_spec[] = { fsparam_enum("xino", Opt_xino, ovl_parameter_xino), fsparam_enum("metacopy", Opt_metacopy, ovl_parameter_bool), fsparam_enum("verity", Opt_verity, ovl_parameter_verity), + fsparam_enum("fsync", Opt_fsync, ovl_parameter_fsync), fsparam_flag("volatile", Opt_volatile), fsparam_flag_no("override_creds", Opt_override_creds), {} @@ -665,8 +684,11 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_verity: config->verity_mode = result.uint_32; break; + case Opt_fsync: + config->fsync_mode = result.uint_32; + break; case Opt_volatile: - config->ovl_volatile = true; + config->fsync_mode = OVL_FSYNC_VOLATILE; break; case Opt_userxattr: config->userxattr = true; @@ -800,6 +822,7 @@ int ovl_init_fs_context(struct fs_context *fc) ofs->config.nfs_export = ovl_nfs_export_def; ofs->config.xino = ovl_xino_def(); ofs->config.metacopy = ovl_metacopy_def; + ofs->config.fsync_mode = ovl_fsync_mode_def(); fc->s_fs_info = ofs; fc->fs_private = ctx; @@ -870,9 +893,9 @@ int ovl_fs_params_verify(const struct ovl_fs_context *ctx, config->index = false; } - if (!config->upperdir && config->ovl_volatile) { + if (!config->upperdir && ovl_is_volatile(config)) { pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n"); - config->ovl_volatile = false; + config->fsync_mode = ovl_fsync_mode_def(); } if (!config->upperdir && config->uuid == OVL_UUID_ON) { @@ -1070,8 +1093,8 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry) seq_printf(m, ",xino=%s", ovl_xino_mode(&ofs->config)); if (ofs->config.metacopy != ovl_metacopy_def) seq_printf(m, ",metacopy=%s", str_on_off(ofs->config.metacopy)); - if (ofs->config.ovl_volatile) - seq_puts(m, ",volatile"); + if (ofs->config.fsync_mode != ovl_fsync_mode_def()) + seq_printf(m, ",fsync=%s", ovl_fsync_mode(&ofs->config)); if (ofs->config.userxattr) seq_puts(m, ",userxattr"); if (ofs->config.verity_mode != ovl_verity_mode_def()) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index d4c12feec039..0822987cfb51 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -776,7 +776,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, * For volatile mount, create a incompat/volatile/dirty file to keep * track of it. */ - if (ofs->config.ovl_volatile) { + if (ovl_is_volatile(&ofs->config)) { err = ovl_create_volatile_dirty(ofs); if (err < 0) { pr_err("Failed to create volatile/dirty file.\n"); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 3f1b763a8bb4..2ea769f311c3 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -85,7 +85,10 @@ int ovl_can_decode_fh(struct super_block *sb) if (!exportfs_can_decode_fh(sb->s_export_op)) return 0; - return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN; + if (sb->s_export_op->encode_fh == generic_encode_ino32_fh) + return FILEID_INO32_GEN; + + return -1; } struct dentry *ovl_indexdir(struct super_block *sb) diff --git a/fs/smb/client/Makefile b/fs/smb/client/Makefile index 26b6105f04d1..1a6e1e1c9764 100644 --- a/fs/smb/client/Makefile +++ b/fs/smb/client/Makefile @@ -48,8 +48,8 @@ cifs-$(CONFIG_CIFS_COMPRESSION) += compress.o compress/lz77.o # Build the SMB2 error mapping table from smb2status.h # $(obj)/smb2_mapping_table.c: $(src)/../common/smb2status.h \ - $(src)/gen_smb2_mapping - $(call cmd,gen_smb2_mapping) + $(src)/gen_smb2_mapping FORCE + $(call if_changed,gen_smb2_mapping) $(obj)/smb2maperror.o: $(obj)/smb2_mapping_table.c @@ -58,4 +58,5 @@ quiet_cmd_gen_smb2_mapping = GEN $@ obj-$(CONFIG_SMB_KUNIT_TESTS) += smb2maperror_test.o -clean-files += smb2_mapping_table.c +# Let Kbuild handle tracking and cleaning +targets += smb2_mapping_table.c diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 393a4ae47cc1..9b2bb8764a80 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -82,11 +82,19 @@ static void lease_del_list(struct oplock_info *opinfo) spin_unlock(&lb->lb_lock); } -static void lb_add(struct lease_table *lb) +static struct lease_table *alloc_lease_table(struct oplock_info *opinfo) { - write_lock(&lease_list_lock); - list_add(&lb->l_entry, &lease_table_list); - write_unlock(&lease_list_lock); + struct lease_table *lb; + + lb = kmalloc_obj(struct lease_table, KSMBD_DEFAULT_GFP); + if (!lb) + return NULL; + + memcpy(lb->client_guid, opinfo->conn->ClientGUID, + SMB2_CLIENT_GUID_SIZE); + INIT_LIST_HEAD(&lb->lease_list); + spin_lock_init(&lb->lb_lock); + return lb; } static int alloc_lease(struct oplock_info *opinfo, struct lease_ctx_info *lctx) @@ -1042,34 +1050,27 @@ static void copy_lease(struct oplock_info *op1, struct oplock_info *op2) lease2->version = lease1->version; } -static int add_lease_global_list(struct oplock_info *opinfo) +static void add_lease_global_list(struct oplock_info *opinfo, + struct lease_table *new_lb) { struct lease_table *lb; - read_lock(&lease_list_lock); + write_lock(&lease_list_lock); list_for_each_entry(lb, &lease_table_list, l_entry) { if (!memcmp(lb->client_guid, opinfo->conn->ClientGUID, SMB2_CLIENT_GUID_SIZE)) { opinfo->o_lease->l_lb = lb; lease_add_list(opinfo); - read_unlock(&lease_list_lock); - return 0; + write_unlock(&lease_list_lock); + kfree(new_lb); + return; } } - read_unlock(&lease_list_lock); - lb = kmalloc_obj(struct lease_table, KSMBD_DEFAULT_GFP); - if (!lb) - return -ENOMEM; - - memcpy(lb->client_guid, opinfo->conn->ClientGUID, - SMB2_CLIENT_GUID_SIZE); - INIT_LIST_HEAD(&lb->lease_list); - spin_lock_init(&lb->lb_lock); - opinfo->o_lease->l_lb = lb; + opinfo->o_lease->l_lb = new_lb; lease_add_list(opinfo); - lb_add(lb); - return 0; + list_add(&new_lb->l_entry, &lease_table_list); + write_unlock(&lease_list_lock); } static void set_oplock_level(struct oplock_info *opinfo, int level, @@ -1189,6 +1190,7 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid, int err = 0; struct oplock_info *opinfo = NULL, *prev_opinfo = NULL; struct ksmbd_inode *ci = fp->f_ci; + struct lease_table *new_lb = NULL; bool prev_op_has_lease; __le32 prev_op_state = 0; @@ -1291,21 +1293,37 @@ set_lev: set_oplock_level(opinfo, req_op_level, lctx); out: - opinfo_count_inc(fp); - opinfo_add(opinfo, fp); - + /* + * Set o_fp before any publication so that concurrent readers + * (e.g. find_same_lease_key() on the lease list) that + * dereference opinfo->o_fp don't hit a NULL pointer. + * + * Keep the original publication order so concurrent opens can + * still observe the in-flight grant via ci->m_op_list, but make + * everything after opinfo_add() no-fail by preallocating any new + * lease_table first. + */ + opinfo->o_fp = fp; if (opinfo->is_lease) { - err = add_lease_global_list(opinfo); - if (err) + new_lb = alloc_lease_table(opinfo); + if (!new_lb) { + err = -ENOMEM; goto err_out; + } } + opinfo_count_inc(fp); + opinfo_add(opinfo, fp); + + if (opinfo->is_lease) + add_lease_global_list(opinfo, new_lb); + rcu_assign_pointer(fp->f_opinfo, opinfo); - opinfo->o_fp = fp; return 0; err_out: - __free_opinfo(opinfo); + kfree(new_lb); + opinfo_put(opinfo); return err; } diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 9c44e71e3c3b..6fb7a795ff5d 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1939,8 +1939,14 @@ out_err: if (sess->user && sess->user->flags & KSMBD_USER_FLAG_DELAY_SESSION) try_delay = true; - sess->last_active = jiffies; - sess->state = SMB2_SESSION_EXPIRED; + /* + * For binding requests, session belongs to another + * connection. Do not expire it. + */ + if (!(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) { + sess->last_active = jiffies; + sess->state = SMB2_SESSION_EXPIRED; + } ksmbd_user_session_put(sess); work->sess = NULL; if (try_delay) { @@ -4446,8 +4452,9 @@ int smb2_query_dir(struct ksmbd_work *work) d_info.wptr = (char *)rsp->Buffer; d_info.rptr = (char *)rsp->Buffer; d_info.out_buf_len = - smb2_calc_max_out_buf_len(work, 8, - le32_to_cpu(req->OutputBufferLength)); + smb2_calc_max_out_buf_len(work, + offsetof(struct smb2_query_directory_rsp, Buffer), + le32_to_cpu(req->OutputBufferLength)); if (d_info.out_buf_len < 0) { rc = -EINVAL; goto err_out; @@ -4714,8 +4721,9 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp, } buf_free_len = - smb2_calc_max_out_buf_len(work, 8, - le32_to_cpu(req->OutputBufferLength)); + smb2_calc_max_out_buf_len(work, + offsetof(struct smb2_query_info_rsp, Buffer), + le32_to_cpu(req->OutputBufferLength)); if (buf_free_len < 0) return -EINVAL; @@ -4932,7 +4940,8 @@ static int get_file_all_info(struct ksmbd_work *work, int conv_len; char *filename; u64 time; - int ret; + int ret, buf_free_len, filename_len; + struct smb2_query_info_req *req = ksmbd_req_buf_next(work); if (!(fp->daccess & FILE_READ_ATTRIBUTES_LE)) { ksmbd_debug(SMB, "no right to read the attributes : 0x%x\n", @@ -4944,6 +4953,16 @@ static int get_file_all_info(struct ksmbd_work *work, if (IS_ERR(filename)) return PTR_ERR(filename); + filename_len = strlen(filename); + buf_free_len = smb2_calc_max_out_buf_len(work, + offsetof(struct smb2_query_info_rsp, Buffer) + + offsetof(struct smb2_file_all_info, FileName), + le32_to_cpu(req->OutputBufferLength)); + if (buf_free_len < (filename_len + 1) * 2) { + kfree(filename); + return -EINVAL; + } + ret = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); if (ret) { @@ -4987,7 +5006,8 @@ static int get_file_all_info(struct ksmbd_work *work, file_info->Mode = fp->coption; file_info->AlignmentRequirement = 0; conv_len = smbConvertToUTF16((__le16 *)file_info->FileName, filename, - PATH_MAX, conn->local_nls, 0); + min(filename_len, PATH_MAX), + conn->local_nls, 0); conv_len *= 2; file_info->FileNameLength = cpu_to_le32(conv_len); rsp->OutputBufferLength = @@ -5041,8 +5061,9 @@ static int get_file_stream_info(struct ksmbd_work *work, file_info = (struct smb2_file_stream_info *)rsp->Buffer; buf_free_len = - smb2_calc_max_out_buf_len(work, 8, - le32_to_cpu(req->OutputBufferLength)); + smb2_calc_max_out_buf_len(work, + offsetof(struct smb2_query_info_rsp, Buffer), + le32_to_cpu(req->OutputBufferLength)); if (buf_free_len < 0) goto out; @@ -7586,14 +7607,15 @@ retry: rc = vfs_lock_file(filp, smb_lock->cmd, flock, NULL); skip: if (smb_lock->flags & SMB2_LOCKFLAG_UNLOCK) { + locks_free_lock(flock); + kfree(smb_lock); if (!rc) { ksmbd_debug(SMB, "File unlocked\n"); } else if (rc == -ENOENT) { rsp->hdr.Status = STATUS_NOT_LOCKED; + err = rc; goto out; } - locks_free_lock(flock); - kfree(smb_lock); } else { if (rc == FILE_LOCK_DEFERRED) { void **argv; @@ -7662,6 +7684,9 @@ skip: spin_unlock(&work->conn->llist_lock); ksmbd_debug(SMB, "successful in taking lock\n"); } else { + locks_free_lock(flock); + kfree(smb_lock); + err = rc; goto out; } } @@ -7692,13 +7717,17 @@ out: struct file_lock *rlock = NULL; rlock = smb_flock_init(filp); - rlock->c.flc_type = F_UNLCK; - rlock->fl_start = smb_lock->start; - rlock->fl_end = smb_lock->end; + if (rlock) { + rlock->c.flc_type = F_UNLCK; + rlock->fl_start = smb_lock->start; + rlock->fl_end = smb_lock->end; - rc = vfs_lock_file(filp, F_SETLK, rlock, NULL); - if (rc) - pr_err("rollback unlock fail : %d\n", rc); + rc = vfs_lock_file(filp, F_SETLK, rlock, NULL); + if (rc) + pr_err("rollback unlock fail : %d\n", rc); + } else { + pr_err("rollback unlock alloc failed\n"); + } list_del(&smb_lock->llist); spin_lock(&work->conn->llist_lock); @@ -7708,7 +7737,8 @@ out: spin_unlock(&work->conn->llist_lock); locks_free_lock(smb_lock->fl); - locks_free_lock(rlock); + if (rlock) + locks_free_lock(rlock); kfree(smb_lock); } out2: @@ -8191,8 +8221,9 @@ int smb2_ioctl(struct ksmbd_work *work) buffer = (char *)req + le32_to_cpu(req->InputOffset); cnt_code = le32_to_cpu(req->CtlCode); - ret = smb2_calc_max_out_buf_len(work, 48, - le32_to_cpu(req->MaxOutputResponse)); + ret = smb2_calc_max_out_buf_len(work, + offsetof(struct smb2_ioctl_rsp, Buffer), + le32_to_cpu(req->MaxOutputResponse)); if (ret < 0) { rsp->hdr.Status = STATUS_INVALID_PARAMETER; goto out; diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 8244305949de..67fd9c75ac3f 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -55,7 +55,8 @@ struct xfs_attr_list_context { struct xfs_trans *tp; struct xfs_inode *dp; /* inode */ struct xfs_attrlist_cursor_kern cursor; /* position in list */ - void *buffer; /* output buffer */ + /* output buffer */ + void *buffer __counted_by_ptr(bufsize); /* * Abort attribute list iteration if non-zero. Can be used to pass diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 47f48ae555c0..2b78041e8672 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -1416,6 +1416,28 @@ xfs_attr3_leaf_create( } /* + * Reinitialize an existing attr fork block as an empty leaf, and attach + * the buffer to tp. + */ +int +xfs_attr3_leaf_init( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dablk_t blkno) +{ + struct xfs_buf *bp = NULL; + struct xfs_da_args args = { + .trans = tp, + .dp = dp, + .owner = dp->i_ino, + .geo = dp->i_mount->m_attr_geo, + }; + + ASSERT(tp != NULL); + + return xfs_attr3_leaf_create(&args, blkno, &bp); +} +/* * Split the leaf node, rebalance, then add the new entry. * * Returns 0 if the entry was added, 1 if a further split is needed or a diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index aca46da2bc50..72639efe6ac3 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -87,6 +87,9 @@ int xfs_attr3_leaf_list_int(struct xfs_buf *bp, /* * Routines used for shrinking the Btree. */ + +int xfs_attr3_leaf_init(struct xfs_trans *tp, struct xfs_inode *dp, + xfs_dablk_t blkno); int xfs_attr3_leaf_toosmall(struct xfs_da_state *state, int *retval); void xfs_attr3_leaf_unbalance(struct xfs_da_state *state, struct xfs_da_state_blk *drop_blk, diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 09d4c17b3e7b..ad801b7bd2dd 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -1506,21 +1506,20 @@ xfs_da3_fixhashpath( } /* - * Remove an entry from an intermediate node. + * Internal implementation to remove an entry from an intermediate node. */ STATIC void -xfs_da3_node_remove( - struct xfs_da_state *state, - struct xfs_da_state_blk *drop_blk) +__xfs_da3_node_remove( + struct xfs_trans *tp, + struct xfs_inode *dp, + struct xfs_da_geometry *geo, + struct xfs_da_state_blk *drop_blk) { struct xfs_da_intnode *node; struct xfs_da3_icnode_hdr nodehdr; struct xfs_da_node_entry *btree; int index; int tmp; - struct xfs_inode *dp = state->args->dp; - - trace_xfs_da_node_remove(state->args); node = drop_blk->bp->b_addr; xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); @@ -1536,17 +1535,17 @@ xfs_da3_node_remove( tmp = nodehdr.count - index - 1; tmp *= (uint)sizeof(xfs_da_node_entry_t); memmove(&btree[index], &btree[index + 1], tmp); - xfs_trans_log_buf(state->args->trans, drop_blk->bp, + xfs_trans_log_buf(tp, drop_blk->bp, XFS_DA_LOGRANGE(node, &btree[index], tmp)); index = nodehdr.count - 1; } memset(&btree[index], 0, sizeof(xfs_da_node_entry_t)); - xfs_trans_log_buf(state->args->trans, drop_blk->bp, + xfs_trans_log_buf(tp, drop_blk->bp, XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index]))); nodehdr.count -= 1; xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr); - xfs_trans_log_buf(state->args->trans, drop_blk->bp, - XFS_DA_LOGRANGE(node, &node->hdr, state->args->geo->node_hdr_size)); + xfs_trans_log_buf(tp, drop_blk->bp, + XFS_DA_LOGRANGE(node, &node->hdr, geo->node_hdr_size)); /* * Copy the last hash value from the block to propagate upwards. @@ -1555,6 +1554,38 @@ xfs_da3_node_remove( } /* + * Remove an entry from an intermediate node. + */ +STATIC void +xfs_da3_node_remove( + struct xfs_da_state *state, + struct xfs_da_state_blk *drop_blk) +{ + trace_xfs_da_node_remove(state->args); + + __xfs_da3_node_remove(state->args->trans, state->args->dp, + state->args->geo, drop_blk); +} + +/* + * Remove an entry from an intermediate attr node at the specified index. + */ +void +xfs_attr3_node_entry_remove( + struct xfs_trans *tp, + struct xfs_inode *dp, + struct xfs_buf *bp, + int index) +{ + struct xfs_da_state_blk blk = { + .index = index, + .bp = bp, + }; + + __xfs_da3_node_remove(tp, dp, dp->i_mount->m_attr_geo, &blk); +} + +/* * Unbalance the elements between two intermediate nodes, * move all Btree elements from one node into another. */ diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index 354d5d65043e..afcf2d3c7a21 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -184,6 +184,8 @@ int xfs_da3_split(xfs_da_state_t *state); int xfs_da3_join(xfs_da_state_t *state); void xfs_da3_fixhashpath(struct xfs_da_state *state, struct xfs_da_state_path *path_to_to_fix); +void xfs_attr3_node_entry_remove(struct xfs_trans *tp, struct xfs_inode *dp, + struct xfs_buf *bp, int index); /* * Routines used for finding things in the Btree. diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 1d25bd5b892e..222812fe202c 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -171,8 +171,10 @@ xchk_quota_item( error = xchk_quota_item_bmap(sc, dq, offset); xchk_iunlock(sc, XFS_ILOCK_SHARED); - if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, offset, &error)) + if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, offset, &error)) { + mutex_unlock(&dq->q_qlock); return error; + } /* * Warn if the hard limits are larger than the fs. diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 39ea651cbb75..286c5f5e0544 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -972,20 +972,12 @@ TRACE_EVENT(xfile_create, TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, ino) - __array(char, pathname, MAXNAMELEN) ), TP_fast_assign( - char *path; - __entry->ino = file_inode(xf->file)->i_ino; - path = file_path(xf->file, __entry->pathname, MAXNAMELEN); - if (IS_ERR(path)) - strncpy(__entry->pathname, "(unknown)", - sizeof(__entry->pathname)); ), - TP_printk("xfino 0x%lx path '%s'", - __entry->ino, - __entry->pathname) + TP_printk("xfino 0x%lx", + __entry->ino) ); TRACE_EVENT(xfile_destroy, diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 92331991f9fd..a5b69c0fbfd0 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -140,7 +140,7 @@ xfs_attr3_node_inactive( xfs_daddr_t parent_blkno, child_blkno; struct xfs_buf *child_bp; struct xfs_da3_icnode_hdr ichdr; - int error, i; + int error; /* * Since this code is recursive (gasp!) we must protect ourselves. @@ -152,7 +152,7 @@ xfs_attr3_node_inactive( return -EFSCORRUPTED; } - xfs_da3_node_hdr_from_disk(dp->i_mount, &ichdr, bp->b_addr); + xfs_da3_node_hdr_from_disk(mp, &ichdr, bp->b_addr); parent_blkno = xfs_buf_daddr(bp); if (!ichdr.count) { xfs_trans_brelse(*trans, bp); @@ -167,7 +167,7 @@ xfs_attr3_node_inactive( * over the leaves removing all of them. If this is higher up * in the tree, recurse downward. */ - for (i = 0; i < ichdr.count; i++) { + while (ichdr.count > 0) { /* * Read the subsidiary block to see what we have to work with. * Don't do this in a transaction. This is a depth-first @@ -218,29 +218,32 @@ xfs_attr3_node_inactive( xfs_trans_binval(*trans, child_bp); child_bp = NULL; + error = xfs_da3_node_read_mapped(*trans, dp, + parent_blkno, &bp, XFS_ATTR_FORK); + if (error) + return error; + /* - * If we're not done, re-read the parent to get the next - * child block number. + * Remove entry from parent node, prevents being indexed to. */ - if (i + 1 < ichdr.count) { - struct xfs_da3_icnode_hdr phdr; + xfs_attr3_node_entry_remove(*trans, dp, bp, 0); + + xfs_da3_node_hdr_from_disk(mp, &ichdr, bp->b_addr); + bp = NULL; - error = xfs_da3_node_read_mapped(*trans, dp, - parent_blkno, &bp, XFS_ATTR_FORK); + if (ichdr.count > 0) { + /* + * If we're not done, get the next child block number. + */ + child_fsb = be32_to_cpu(ichdr.btree[0].before); + + /* + * Atomically commit the whole invalidate stuff. + */ + error = xfs_trans_roll_inode(trans, dp); if (error) return error; - xfs_da3_node_hdr_from_disk(dp->i_mount, &phdr, - bp->b_addr); - child_fsb = be32_to_cpu(phdr.btree[i + 1].before); - xfs_trans_brelse(*trans, bp); - bp = NULL; } - /* - * Atomically commit the whole invalidate stuff. - */ - error = xfs_trans_roll_inode(trans, dp); - if (error) - return error; } return 0; @@ -257,10 +260,8 @@ xfs_attr3_root_inactive( struct xfs_trans **trans, struct xfs_inode *dp) { - struct xfs_mount *mp = dp->i_mount; struct xfs_da_blkinfo *info; struct xfs_buf *bp; - xfs_daddr_t blkno; int error; /* @@ -272,7 +273,6 @@ xfs_attr3_root_inactive( error = xfs_da3_node_read(*trans, dp, 0, &bp, XFS_ATTR_FORK); if (error) return error; - blkno = xfs_buf_daddr(bp); /* * Invalidate the tree, even if the "tree" is only a single leaf block. @@ -283,10 +283,26 @@ xfs_attr3_root_inactive( case cpu_to_be16(XFS_DA_NODE_MAGIC): case cpu_to_be16(XFS_DA3_NODE_MAGIC): error = xfs_attr3_node_inactive(trans, dp, bp, 1); + /* + * Empty root node block are not allowed, convert it to leaf. + */ + if (!error) + error = xfs_attr3_leaf_init(*trans, dp, 0); + if (!error) + error = xfs_trans_roll_inode(trans, dp); break; case cpu_to_be16(XFS_ATTR_LEAF_MAGIC): case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC): error = xfs_attr3_leaf_inactive(trans, dp, bp); + /* + * Reinit the leaf before truncating extents so that a crash + * mid-truncation leaves an empty leaf rather than one with + * entries that may reference freed remote value blocks. + */ + if (!error) + error = xfs_attr3_leaf_init(*trans, dp, 0); + if (!error) + error = xfs_trans_roll_inode(trans, dp); break; default: xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK); @@ -295,21 +311,6 @@ xfs_attr3_root_inactive( xfs_trans_brelse(*trans, bp); break; } - if (error) - return error; - - /* - * Invalidate the incore copy of the root block. - */ - error = xfs_trans_get_buf(*trans, mp->m_ddev_targp, blkno, - XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0, &bp); - if (error) - return error; - xfs_trans_binval(*trans, bp); /* remove from cache */ - /* - * Commit the invalidate and start the next transaction. - */ - error = xfs_trans_roll_inode(trans, dp); return error; } @@ -328,6 +329,7 @@ xfs_attr_inactive( { struct xfs_trans *trans; struct xfs_mount *mp; + struct xfs_buf *bp; int lock_mode = XFS_ILOCK_SHARED; int error = 0; @@ -363,10 +365,27 @@ xfs_attr_inactive( * removal below. */ if (dp->i_af.if_nextents > 0) { + /* + * Invalidate and truncate all blocks but leave the root block. + */ error = xfs_attr3_root_inactive(&trans, dp); if (error) goto out_cancel; + error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, + XFS_FSB_TO_B(mp, mp->m_attr_geo->fsbcount)); + if (error) + goto out_cancel; + + /* + * Invalidate and truncate the root block and ensure that the + * operation is completed within a single transaction. + */ + error = xfs_da_get_buf(trans, dp, 0, &bp, XFS_ATTR_FORK); + if (error) + goto out_cancel; + + xfs_trans_binval(trans, bp); error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); if (error) goto out_cancel; diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index 354472bf45f1..deab14f31b38 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -653,7 +653,6 @@ xfs_attri_recover_work( break; } if (error) { - xfs_irele(ip); XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attrp, sizeof(*attrp)); return ERR_PTR(-EFSCORRUPTED); @@ -1047,8 +1046,8 @@ xlog_recover_attri_commit_pass2( break; case XFS_ATTRI_OP_FLAGS_SET: case XFS_ATTRI_OP_FLAGS_REPLACE: - /* Log item, attr name, attr value */ - if (item->ri_total != 3) { + /* Log item, attr name, optional attr value */ + if (item->ri_total != 2 + !!attri_formatp->alfi_value_len) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, len); return -EFSCORRUPTED; @@ -1132,52 +1131,6 @@ xlog_recover_attri_commit_pass2( return -EFSCORRUPTED; } - switch (op) { - case XFS_ATTRI_OP_FLAGS_REMOVE: - /* Regular remove operations operate only on names. */ - if (attr_value != NULL || value_len != 0) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - attri_formatp, len); - return -EFSCORRUPTED; - } - fallthrough; - case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: - case XFS_ATTRI_OP_FLAGS_PPTR_SET: - case XFS_ATTRI_OP_FLAGS_SET: - case XFS_ATTRI_OP_FLAGS_REPLACE: - /* - * Regular xattr set/remove/replace operations require a name - * and do not take a newname. Values are optional for set and - * replace. - * - * Name-value set/remove operations must have a name, do not - * take a newname, and can take a value. - */ - if (attr_name == NULL || name_len == 0) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - attri_formatp, len); - return -EFSCORRUPTED; - } - break; - case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: - /* - * Name-value replace operations require the caller to - * specify the old and new names and values explicitly. - * Values are optional. - */ - if (attr_name == NULL || name_len == 0) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - attri_formatp, len); - return -EFSCORRUPTED; - } - if (attr_new_name == NULL || new_name_len == 0) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - attri_formatp, len); - return -EFSCORRUPTED; - } - break; - } - /* * Memory alloc failure will cause replay to abort. We attach the * name/value buffer to the recovered incore log item and drop our diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 491e2a7053a3..65a0e69c3d08 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -125,6 +125,7 @@ xfs_qm_dquot_logitem_push( struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); struct xfs_dquot *dqp = qlip->qli_dquot; struct xfs_buf *bp; + struct xfs_ail *ailp = lip->li_ailp; uint rval = XFS_ITEM_SUCCESS; int error; @@ -153,7 +154,7 @@ xfs_qm_dquot_logitem_push( goto out_unlock; } - spin_unlock(&lip->li_ailp->ail_lock); + spin_unlock(&ailp->ail_lock); error = xfs_dquot_use_attached_buf(dqp, &bp); if (error == -EAGAIN) { @@ -172,9 +173,13 @@ xfs_qm_dquot_logitem_push( rval = XFS_ITEM_FLUSHING; } xfs_buf_relse(bp); + /* + * The buffer no longer protects the log item from reclaim, so + * do not reference lip after this point. + */ out_relock_ail: - spin_lock(&lip->li_ailp->ail_lock); + spin_lock(&ailp->ail_lock); out_unlock: mutex_unlock(&dqp->q_qlock); return rval; diff --git a/fs/xfs/xfs_handle.c b/fs/xfs/xfs_handle.c index d1291ca15239..2b8617ae7ec2 100644 --- a/fs/xfs/xfs_handle.c +++ b/fs/xfs/xfs_handle.c @@ -443,8 +443,8 @@ xfs_ioc_attr_list( context.dp = dp; context.resynch = 1; context.attr_filter = xfs_attr_filter(flags); - context.buffer = buffer; context.bufsize = round_down(bufsize, sizeof(uint32_t)); + context.buffer = buffer; context.firstu = context.bufsize; context.put_listent = xfs_ioc_attr_put_listent; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 50c0404f9064..beaa26ec62da 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1048,7 +1048,8 @@ xfs_itruncate_extents_flags( xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); if (icount_read(VFS_I(ip))) xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL); - ASSERT(new_size <= XFS_ISIZE(ip)); + if (whichfork == XFS_DATA_FORK) + ASSERT(new_size <= XFS_ISIZE(ip)); ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(ip->i_itemp != NULL); ASSERT(ip->i_itemp->ili_lock_flags == 0); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 8913036b8024..4ae81eed0442 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -746,6 +746,7 @@ xfs_inode_item_push( struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; struct xfs_buf *bp = lip->li_buf; + struct xfs_ail *ailp = lip->li_ailp; uint rval = XFS_ITEM_SUCCESS; int error; @@ -771,7 +772,7 @@ xfs_inode_item_push( if (!xfs_buf_trylock(bp)) return XFS_ITEM_LOCKED; - spin_unlock(&lip->li_ailp->ail_lock); + spin_unlock(&ailp->ail_lock); /* * We need to hold a reference for flushing the cluster buffer as it may @@ -795,7 +796,11 @@ xfs_inode_item_push( rval = XFS_ITEM_LOCKED; } - spin_lock(&lip->li_ailp->ail_lock); + /* + * The buffer no longer protects the log item from reclaim, so + * do not reference lip after this point. + */ + spin_lock(&ailp->ail_lock); return rval; } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 9c295abd0a0a..ef1ea8a1238c 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -608,8 +608,9 @@ xfs_unmount_check( * have been retrying in the background. This will prevent never-ending * retries in AIL pushing from hanging the unmount. * - * Finally, we can push the AIL to clean all the remaining dirty objects, then - * reclaim the remaining inodes that are still in memory at this point in time. + * Stop inodegc and background reclaim before pushing the AIL so that they + * are not running while the AIL is being flushed. Then push the AIL to + * clean all the remaining dirty objects and reclaim the remaining inodes. */ static void xfs_unmount_flush_inodes( @@ -621,9 +622,9 @@ xfs_unmount_flush_inodes( xfs_set_unmounting(mp); - xfs_ail_push_all_sync(mp->m_ail); xfs_inodegc_stop(mp); cancel_delayed_work_sync(&mp->m_reclaim_work); + xfs_ail_push_all_sync(mp->m_ail); xfs_reclaim_inodes(mp); xfs_health_unmount(mp); xfs_healthmon_unmount(mp); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 813e5a9f57eb..5e8190fe2be9 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -56,6 +56,7 @@ #include <linux/tracepoint.h> struct xfs_agf; +struct xfs_ail; struct xfs_alloc_arg; struct xfs_attr_list_context; struct xfs_buf_log_item; @@ -1650,16 +1651,43 @@ TRACE_EVENT(xfs_log_force, DEFINE_EVENT(xfs_log_item_class, name, \ TP_PROTO(struct xfs_log_item *lip), \ TP_ARGS(lip)) -DEFINE_LOG_ITEM_EVENT(xfs_ail_push); -DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned); -DEFINE_LOG_ITEM_EVENT(xfs_ail_locked); -DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_mark); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_skip); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_unpin); DEFINE_LOG_ITEM_EVENT(xlog_ail_insert_abort); DEFINE_LOG_ITEM_EVENT(xfs_trans_free_abort); +DECLARE_EVENT_CLASS(xfs_ail_push_class, + TP_PROTO(struct xfs_ail *ailp, uint type, unsigned long flags, xfs_lsn_t lsn), + TP_ARGS(ailp, type, flags, lsn), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(uint, type) + __field(unsigned long, flags) + __field(xfs_lsn_t, lsn) + ), + TP_fast_assign( + __entry->dev = ailp->ail_log->l_mp->m_super->s_dev; + __entry->type = type; + __entry->flags = flags; + __entry->lsn = lsn; + ), + TP_printk("dev %d:%d lsn %d/%d type %s flags %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + CYCLE_LSN(__entry->lsn), BLOCK_LSN(__entry->lsn), + __print_symbolic(__entry->type, XFS_LI_TYPE_DESC), + __print_flags(__entry->flags, "|", XFS_LI_FLAGS)) +) + +#define DEFINE_AIL_PUSH_EVENT(name) \ +DEFINE_EVENT(xfs_ail_push_class, name, \ + TP_PROTO(struct xfs_ail *ailp, uint type, unsigned long flags, xfs_lsn_t lsn), \ + TP_ARGS(ailp, type, flags, lsn)) +DEFINE_AIL_PUSH_EVENT(xfs_ail_push); +DEFINE_AIL_PUSH_EVENT(xfs_ail_pinned); +DEFINE_AIL_PUSH_EVENT(xfs_ail_locked); +DEFINE_AIL_PUSH_EVENT(xfs_ail_flushing); + DECLARE_EVENT_CLASS(xfs_ail_class, TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn), TP_ARGS(lip, old_lsn, new_lsn), @@ -5091,23 +5119,16 @@ TRACE_EVENT(xmbuf_create, TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, ino) - __array(char, pathname, MAXNAMELEN) ), TP_fast_assign( - char *path; struct file *file = btp->bt_file; __entry->dev = btp->bt_mount->m_super->s_dev; __entry->ino = file_inode(file)->i_ino; - path = file_path(file, __entry->pathname, MAXNAMELEN); - if (IS_ERR(path)) - strncpy(__entry->pathname, "(unknown)", - sizeof(__entry->pathname)); ), - TP_printk("dev %d:%d xmino 0x%lx path '%s'", + TP_printk("dev %d:%d xmino 0x%lx", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->pathname) + __entry->ino) ); TRACE_EVENT(xmbuf_free, diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 923729af4206..99a9bf3762b7 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -365,6 +365,12 @@ xfsaild_resubmit_item( return XFS_ITEM_SUCCESS; } +/* + * Push a single log item from the AIL. + * + * @lip may have been released and freed by the time this function returns, + * so callers must not dereference the log item afterwards. + */ static inline uint xfsaild_push_item( struct xfs_ail *ailp, @@ -458,6 +464,74 @@ xfs_ail_calc_push_target( return target_lsn; } +static void +xfsaild_process_logitem( + struct xfs_ail *ailp, + struct xfs_log_item *lip, + int *stuck, + int *flushing) +{ + struct xfs_mount *mp = ailp->ail_log->l_mp; + uint type = lip->li_type; + unsigned long flags = lip->li_flags; + xfs_lsn_t item_lsn = lip->li_lsn; + int lock_result; + + /* + * Note that iop_push may unlock and reacquire the AIL lock. We + * rely on the AIL cursor implementation to be able to deal with + * the dropped lock. + * + * The log item may have been freed by the push, so it must not + * be accessed or dereferenced below this line. + */ + lock_result = xfsaild_push_item(ailp, lip); + switch (lock_result) { + case XFS_ITEM_SUCCESS: + XFS_STATS_INC(mp, xs_push_ail_success); + trace_xfs_ail_push(ailp, type, flags, item_lsn); + + ailp->ail_last_pushed_lsn = item_lsn; + break; + + case XFS_ITEM_FLUSHING: + /* + * The item or its backing buffer is already being + * flushed. The typical reason for that is that an + * inode buffer is locked because we already pushed the + * updates to it as part of inode clustering. + * + * We do not want to stop flushing just because lots + * of items are already being flushed, but we need to + * re-try the flushing relatively soon if most of the + * AIL is being flushed. + */ + XFS_STATS_INC(mp, xs_push_ail_flushing); + trace_xfs_ail_flushing(ailp, type, flags, item_lsn); + + (*flushing)++; + ailp->ail_last_pushed_lsn = item_lsn; + break; + + case XFS_ITEM_PINNED: + XFS_STATS_INC(mp, xs_push_ail_pinned); + trace_xfs_ail_pinned(ailp, type, flags, item_lsn); + + (*stuck)++; + ailp->ail_log_flush++; + break; + case XFS_ITEM_LOCKED: + XFS_STATS_INC(mp, xs_push_ail_locked); + trace_xfs_ail_locked(ailp, type, flags, item_lsn); + + (*stuck)++; + break; + default: + ASSERT(0); + break; + } +} + static long xfsaild_push( struct xfs_ail *ailp) @@ -505,62 +579,11 @@ xfsaild_push( lsn = lip->li_lsn; while ((XFS_LSN_CMP(lip->li_lsn, ailp->ail_target) <= 0)) { - int lock_result; if (test_bit(XFS_LI_FLUSHING, &lip->li_flags)) goto next_item; - /* - * Note that iop_push may unlock and reacquire the AIL lock. We - * rely on the AIL cursor implementation to be able to deal with - * the dropped lock. - */ - lock_result = xfsaild_push_item(ailp, lip); - switch (lock_result) { - case XFS_ITEM_SUCCESS: - XFS_STATS_INC(mp, xs_push_ail_success); - trace_xfs_ail_push(lip); - - ailp->ail_last_pushed_lsn = lsn; - break; - - case XFS_ITEM_FLUSHING: - /* - * The item or its backing buffer is already being - * flushed. The typical reason for that is that an - * inode buffer is locked because we already pushed the - * updates to it as part of inode clustering. - * - * We do not want to stop flushing just because lots - * of items are already being flushed, but we need to - * re-try the flushing relatively soon if most of the - * AIL is being flushed. - */ - XFS_STATS_INC(mp, xs_push_ail_flushing); - trace_xfs_ail_flushing(lip); - - flushing++; - ailp->ail_last_pushed_lsn = lsn; - break; - - case XFS_ITEM_PINNED: - XFS_STATS_INC(mp, xs_push_ail_pinned); - trace_xfs_ail_pinned(lip); - - stuck++; - ailp->ail_log_flush++; - break; - case XFS_ITEM_LOCKED: - XFS_STATS_INC(mp, xs_push_ail_locked); - trace_xfs_ail_locked(lip); - - stuck++; - break; - default: - ASSERT(0); - break; - } - + xfsaild_process_logitem(ailp, lip, &stuck, &flushing); count++; /* diff --git a/fs/xfs/xfs_verify_media.c b/fs/xfs/xfs_verify_media.c index 8bbd4ec567f8..5ead3976d511 100644 --- a/fs/xfs/xfs_verify_media.c +++ b/fs/xfs/xfs_verify_media.c @@ -183,10 +183,9 @@ xfs_verify_iosize( min_not_zero(SZ_1M, me->me_max_io_size); BUILD_BUG_ON(BBSHIFT != SECTOR_SHIFT); - ASSERT(BBTOB(bbcount) >= bdev_logical_block_size(btp->bt_bdev)); + ASSERT(BBTOB(bbcount) >= btp->bt_logical_sectorsize); - return clamp(iosize, bdev_logical_block_size(btp->bt_bdev), - BBTOB(bbcount)); + return clamp(iosize, btp->bt_logical_sectorsize, BBTOB(bbcount)); } /* Allocate as much memory as we can get for verification buffer. */ @@ -218,8 +217,8 @@ xfs_verify_media_error( unsigned int bio_bbcount, blk_status_t bio_status) { - trace_xfs_verify_media_error(mp, me, btp->bt_bdev->bd_dev, daddr, - bio_bbcount, bio_status); + trace_xfs_verify_media_error(mp, me, btp->bt_dev, daddr, bio_bbcount, + bio_status); /* * Pass any error, I/O or otherwise, up to the caller if we didn't @@ -280,7 +279,7 @@ xfs_verify_media( btp = mp->m_ddev_targp; break; case XFS_DEV_LOG: - if (mp->m_logdev_targp->bt_bdev != mp->m_ddev_targp->bt_bdev) + if (mp->m_logdev_targp != mp->m_ddev_targp) btp = mp->m_logdev_targp; break; case XFS_DEV_RT: @@ -299,7 +298,7 @@ xfs_verify_media( /* start and end have to be aligned to the lba size */ if (!IS_ALIGNED(BBTOB(me->me_start_daddr | me->me_end_daddr), - bdev_logical_block_size(btp->bt_bdev))) + btp->bt_logical_sectorsize)) return -EINVAL; /* @@ -331,8 +330,7 @@ xfs_verify_media( if (!folio) return -ENOMEM; - trace_xfs_verify_media(mp, me, btp->bt_bdev->bd_dev, daddr, bbcount, - folio); + trace_xfs_verify_media(mp, me, btp->bt_dev, daddr, bbcount, folio); bio = bio_alloc(btp->bt_bdev, 1, REQ_OP_READ, GFP_KERNEL); if (!bio) { @@ -400,7 +398,7 @@ out_folio: * an operational error. */ me->me_start_daddr = daddr; - trace_xfs_verify_media_end(mp, me, btp->bt_bdev->bd_dev); + trace_xfs_verify_media_end(mp, me, btp->bt_dev); return 0; } diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index a735f16d9cd8..544213067d59 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -332,8 +332,8 @@ xfs_vn_listxattr( memset(&context, 0, sizeof(context)); context.dp = XFS_I(inode); context.resynch = 1; - context.buffer = size ? data : NULL; context.bufsize = size; + context.buffer = size ? data : NULL; context.firstu = context.bufsize; context.put_listent = xfs_xattr_put_listent; diff --git a/include/linux/damon.h b/include/linux/damon.h index a4fea23da857..be3d198043ff 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -810,6 +810,12 @@ struct damon_ctx { struct damos_walk_control *walk_control; struct mutex walk_control_lock; + /* + * indicate if this may be corrupted. Currentonly this is set only for + * damon_commit_ctx() failure. + */ + bool maybe_corrupted; + /* Working thread of the given DAMON context */ struct task_struct *kdamond; /* Protects @kdamond field access */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 29973baa0581..99ef042ecdb4 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -80,11 +80,18 @@ #define DMA_ATTR_MMIO (1UL << 10) /* - * DMA_ATTR_CPU_CACHE_CLEAN: Indicates the CPU will not dirty any cacheline - * overlapping this buffer while it is mapped for DMA. All mappings sharing - * a cacheline must have this attribute for this to be considered safe. + * DMA_ATTR_DEBUGGING_IGNORE_CACHELINES: Indicates the CPU cache line can be + * overlapped. All mappings sharing a cacheline must have this attribute for + * this to be considered safe. */ -#define DMA_ATTR_CPU_CACHE_CLEAN (1UL << 11) +#define DMA_ATTR_DEBUGGING_IGNORE_CACHELINES (1UL << 11) + +/* + * DMA_ATTR_REQUIRE_COHERENT: Indicates that DMA coherency is required. + * All mappings that carry this attribute can't work with SWIOTLB and cache + * flushing. + */ +#define DMA_ATTR_REQUIRE_COHERENT (1UL << 12) /* * A dma_addr_t can hold any valid DMA or bus address for the platform. It can @@ -248,8 +255,8 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size, { return NULL; } -static void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_handle, unsigned long attrs) +static inline void dma_free_attrs(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) { } static inline void *dmam_alloc_attrs(struct device *dev, size_t size, diff --git a/include/linux/leafops.h b/include/linux/leafops.h index a9ff94b744f2..05673d3529e7 100644 --- a/include/linux/leafops.h +++ b/include/linux/leafops.h @@ -363,6 +363,23 @@ static inline unsigned long softleaf_to_pfn(softleaf_t entry) return swp_offset(entry) & SWP_PFN_MASK; } +static inline void softleaf_migration_sync(softleaf_t entry, + struct folio *folio) +{ + /* + * Ensure we do not race with split, which might alter tail pages into new + * folios and thus result in observing an unlocked folio. + * This matches the write barrier in __split_folio_to_order(). + */ + smp_rmb(); + + /* + * Any use of migration entries may only occur while the + * corresponding page is locked + */ + VM_WARN_ON_ONCE(!folio_test_locked(folio)); +} + /** * softleaf_to_page() - Obtains struct page for PFN encoded within leaf entry. * @entry: Leaf entry, softleaf_has_pfn(@entry) must return true. @@ -374,11 +391,8 @@ static inline struct page *softleaf_to_page(softleaf_t entry) struct page *page = pfn_to_page(softleaf_to_pfn(entry)); VM_WARN_ON_ONCE(!softleaf_has_pfn(entry)); - /* - * Any use of migration entries may only occur while the - * corresponding page is locked - */ - VM_WARN_ON_ONCE(softleaf_is_migration(entry) && !PageLocked(page)); + if (softleaf_is_migration(entry)) + softleaf_migration_sync(entry, page_folio(page)); return page; } @@ -394,12 +408,8 @@ static inline struct folio *softleaf_to_folio(softleaf_t entry) struct folio *folio = pfn_folio(softleaf_to_pfn(entry)); VM_WARN_ON_ONCE(!softleaf_has_pfn(entry)); - /* - * Any use of migration entries may only occur while the - * corresponding folio is locked. - */ - VM_WARN_ON_ONCE(softleaf_is_migration(entry) && - !folio_test_locked(folio)); + if (softleaf_is_migration(entry)) + softleaf_migration_sync(entry, folio); return folio; } diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 0fe96f3ab3ef..65c732d440d2 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -55,6 +55,7 @@ struct mempolicy { nodemask_t cpuset_mems_allowed; /* relative to these nodes */ nodemask_t user_nodemask; /* nodemask passed by user */ } w; + struct rcu_head rcu; }; /* diff --git a/include/linux/security.h b/include/linux/security.h index 83a646d72f6f..ee88dd2d2d1f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -145,6 +145,7 @@ enum lockdown_reason { LOCKDOWN_BPF_WRITE_USER, LOCKDOWN_DBG_WRITE_KERNEL, LOCKDOWN_RTAS_ERROR_INJECTION, + LOCKDOWN_XEN_USER_ACTIONS, LOCKDOWN_INTEGRITY_MAX, LOCKDOWN_KCORE, LOCKDOWN_KPROBES, diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index af7cfee7b8f6..0dc671c07d3a 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -159,10 +159,6 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * @modalias: Name of the driver to use with this device, or an alias * for that name. This appears in the sysfs "modalias" attribute * for driver coldplugging, and in uevents used for hotplugging - * @driver_override: If the name of a driver is written to this attribute, then - * the device will bind to the named driver and only the named driver. - * Do not set directly, because core frees it; use driver_set_override() to - * set or clear it. * @pcpu_statistics: statistics for the spi_device * @word_delay: delay to be inserted between consecutive * words of a transfer @@ -224,7 +220,6 @@ struct spi_device { void *controller_state; void *controller_data; char modalias[SPI_NAME_SIZE]; - const char *driver_override; /* The statistics */ struct spi_statistics __percpu *pcpu_statistics; diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index dec7cbe015aa..905b629e8fa3 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -11,6 +11,7 @@ #ifndef _LINUX_SRCU_TINY_H #define _LINUX_SRCU_TINY_H +#include <linux/irq_work_types.h> #include <linux/swait.h> struct srcu_struct { @@ -24,18 +25,21 @@ struct srcu_struct { struct rcu_head *srcu_cb_head; /* Pending callbacks: Head. */ struct rcu_head **srcu_cb_tail; /* Pending callbacks: Tail. */ struct work_struct srcu_work; /* For driving grace periods. */ + struct irq_work srcu_irq_work; /* Defer schedule_work() to irq work. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ }; void srcu_drive_gp(struct work_struct *wp); +void srcu_tiny_irq_work(struct irq_work *irq_work); #define __SRCU_STRUCT_INIT(name, __ignored, ___ignored, ____ignored) \ { \ .srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq), \ .srcu_cb_tail = &name.srcu_cb_head, \ .srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp), \ + .srcu_irq_work = { .func = srcu_tiny_irq_work }, \ __SRCU_DEP_MAP_INIT(name) \ } diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 958cb7ef41cb..be76fa4fc170 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -34,7 +34,7 @@ struct srcu_data { /* Values: SRCU_READ_FLAVOR_.* */ /* Update-side state. */ - spinlock_t __private lock ____cacheline_internodealigned_in_smp; + raw_spinlock_t __private lock ____cacheline_internodealigned_in_smp; struct rcu_segcblist srcu_cblist; /* List of callbacks.*/ unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */ unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ @@ -55,7 +55,7 @@ struct srcu_data { * Node in SRCU combining tree, similar in function to rcu_data. */ struct srcu_node { - spinlock_t __private lock; + raw_spinlock_t __private lock; unsigned long srcu_have_cbs[4]; /* GP seq for children having CBs, but only */ /* if greater than ->srcu_gp_seq. */ unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs have CBs for given GP? */ @@ -74,7 +74,7 @@ struct srcu_usage { /* First node at each level. */ int srcu_size_state; /* Small-to-big transition state. */ struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ - spinlock_t __private lock; /* Protect counters and size state. */ + raw_spinlock_t __private lock; /* Protect counters and size state. */ struct mutex srcu_gp_mutex; /* Serialize GP work. */ unsigned long srcu_gp_seq; /* Grace-period seq #. */ unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */ @@ -95,6 +95,7 @@ struct srcu_usage { unsigned long reschedule_jiffies; unsigned long reschedule_count; struct delayed_work work; + struct irq_work irq_work; struct srcu_struct *srcu_ssp; }; @@ -156,7 +157,7 @@ struct srcu_struct { #define __SRCU_USAGE_INIT(name) \ { \ - .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ .srcu_gp_seq = SRCU_GP_SEQ_INITIAL_VAL, \ .srcu_gp_seq_needed = SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE, \ .srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL, \ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 75dabb763c65..f36d21b5bc19 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -207,6 +207,39 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, return __virtio_net_hdr_to_skb(skb, hdr, little_endian, hdr->gso_type); } +/* This function must be called after virtio_net_hdr_from_skb(). */ +static inline void __virtio_net_set_hdrlen(const struct sk_buff *skb, + struct virtio_net_hdr *hdr, + bool little_endian) +{ + u16 hdr_len; + + hdr_len = skb_transport_offset(skb); + + if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP_L4) + hdr_len += sizeof(struct udphdr); + else + hdr_len += tcp_hdrlen(skb); + + hdr->hdr_len = __cpu_to_virtio16(little_endian, hdr_len); +} + +/* This function must be called after virtio_net_hdr_from_skb(). */ +static inline void __virtio_net_set_tnl_hdrlen(const struct sk_buff *skb, + struct virtio_net_hdr *hdr) +{ + u16 hdr_len; + + hdr_len = skb_inner_transport_offset(skb); + + if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP_L4) + hdr_len += sizeof(struct udphdr); + else + hdr_len += inner_tcp_hdrlen(skb); + + hdr->hdr_len = __cpu_to_virtio16(true, hdr_len); +} + static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, struct virtio_net_hdr *hdr, bool little_endian, @@ -385,7 +418,8 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, bool tnl_hdr_negotiated, bool little_endian, int vlan_hlen, - bool has_data_valid) + bool has_data_valid, + bool feature_hdrlen) { struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)vhdr; unsigned int inner_nh, outer_th; @@ -394,9 +428,17 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, tnl_gso_type = skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM); - if (!tnl_gso_type) - return virtio_net_hdr_from_skb(skb, hdr, little_endian, - has_data_valid, vlan_hlen); + if (!tnl_gso_type) { + ret = virtio_net_hdr_from_skb(skb, hdr, little_endian, + has_data_valid, vlan_hlen); + if (ret) + return ret; + + if (feature_hdrlen && hdr->hdr_len) + __virtio_net_set_hdrlen(skb, hdr, little_endian); + + return ret; + } /* Tunnel support not negotiated but skb ask for it. */ if (!tnl_hdr_negotiated) @@ -414,6 +456,9 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, if (ret) return ret; + if (feature_hdrlen && hdr->hdr_len) + __virtio_net_set_tnl_hdrlen(skb, hdr); + if (skb->protocol == htons(ETH_P_IPV6)) hdr->gso_type |= VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6; else diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 010f1a8fd15f..5172afee5494 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -658,6 +658,7 @@ struct l2cap_conn { struct sk_buff *rx_skb; __u32 rx_len; struct ida tx_ida; + __u8 tx_ident; struct sk_buff_head pending_rx; struct work_struct pending_rx_work; diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h index 78a27ac73070..b2c359c6dd1b 100644 --- a/include/net/codel_impl.h +++ b/include/net/codel_impl.h @@ -158,6 +158,7 @@ static struct sk_buff *codel_dequeue(void *ctx, bool drop; if (!skb) { + vars->first_above_time = 0; vars->dropping = false; return skb; } diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 5a979dcab538..6d936e9f2fd3 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -264,6 +264,20 @@ inet_bhashfn_portaddr(const struct inet_hashinfo *hinfo, const struct sock *sk, return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)]; } +static inline bool inet_use_hash2_on_bind(const struct sock *sk) +{ +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) { + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) + return false; + + if (!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + return true; + } +#endif + return sk->sk_rcv_saddr != htonl(INADDR_ANY); +} + struct inet_bind_hashbucket * inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port); diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 88b0dd4d8e09..9f8b6814a96a 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -507,12 +507,14 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt, void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int flags); +void fib6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args, + unsigned long now); void fib6_run_gc(unsigned long expires, struct net *net, bool force); - void fib6_gc_cleanup(void); int fib6_init(void); +#if IS_ENABLED(CONFIG_IPV6) /* Add the route to the gc list if it is not already there * * The callers should hold f6i->fib6_table->tb6_lock. @@ -545,6 +547,23 @@ static inline void fib6_remove_gc_list(struct fib6_info *f6i) hlist_del_init(&f6i->gc_link); } +static inline void fib6_may_remove_gc_list(struct net *net, + struct fib6_info *f6i) +{ + struct fib6_gc_args gc_args; + + if (hlist_unhashed(&f6i->gc_link)) + return; + + gc_args.timeout = READ_ONCE(net->ipv6.sysctl.ip6_rt_gc_interval); + gc_args.more = 0; + + rcu_read_lock(); + fib6_age_exceptions(f6i, &gc_args, jiffies); + rcu_read_unlock(); +} +#endif + struct ipv6_route_iter { struct seq_net_private p; struct fib6_walker w; diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 3384859a8921..8883575adcc1 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -83,6 +83,11 @@ void nf_conntrack_lock(spinlock_t *lock); extern spinlock_t nf_conntrack_expect_lock; +static inline void lockdep_nfct_expect_lock_held(void) +{ + lockdep_assert_held(&nf_conntrack_expect_lock); +} + /* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout) diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 165e7a03b8e9..e9a8350e7ccf 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -22,10 +22,16 @@ struct nf_conntrack_expect { /* Hash member */ struct hlist_node hnode; + /* Network namespace */ + possible_net_t net; + /* We expect this tuple, with the following mask */ struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_mask mask; +#ifdef CONFIG_NF_CONNTRACK_ZONES + struct nf_conntrack_zone zone; +#endif /* Usage count. */ refcount_t use; @@ -40,7 +46,7 @@ struct nf_conntrack_expect { struct nf_conntrack_expect *this); /* Helper to assign to new connection */ - struct nf_conntrack_helper *helper; + struct nf_conntrack_helper __rcu *helper; /* The conntrack of the master connection */ struct nf_conn *master; @@ -62,7 +68,17 @@ struct nf_conntrack_expect { static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp) { - return nf_ct_net(exp->master); + return read_pnet(&exp->net); +} + +static inline bool nf_ct_exp_zone_equal_any(const struct nf_conntrack_expect *a, + const struct nf_conntrack_zone *b) +{ +#ifdef CONFIG_NF_CONNTRACK_ZONES + return a->zone.id == b->id; +#else + return true; +#endif } #define NF_CT_EXP_POLICY_NAME_LEN 16 diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 23dd647fe024..b73983a17e08 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -59,7 +59,7 @@ struct netns_xfrm { struct list_head inexact_bins; - struct sock *nlsk; + struct sock __rcu *nlsk; struct sock *nlsk_stash; u32 sysctl_aevent_etime; diff --git a/include/sound/sdca_function.h b/include/sound/sdca_function.h index 79bd5a7a0f88..0e871c786513 100644 --- a/include/sound/sdca_function.h +++ b/include/sound/sdca_function.h @@ -27,11 +27,6 @@ struct sdca_function_desc; #define SDCA_MAX_ENTITY_COUNT 128 /* - * Sanity check on number of initialization writes, can be expanded if needed. - */ -#define SDCA_MAX_INIT_COUNT 2048 - -/* * The Cluster IDs are 16-bit, so a maximum of 65535 Clusters per * function can be represented, however limit this to a slightly * more reasonable value. Can be expanded if needed. diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 125bdc166bfe..0864700f76e0 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -769,12 +769,15 @@ TRACE_EVENT(btrfs_sync_file, ), TP_fast_assign( - const struct dentry *dentry = file->f_path.dentry; - const struct inode *inode = d_inode(dentry); + struct dentry *dentry = file_dentry(file); + struct inode *inode = file_inode(file); + struct dentry *parent = dget_parent(dentry); + struct inode *parent_inode = d_inode(parent); - TP_fast_assign_fsid(btrfs_sb(file->f_path.dentry->d_sb)); + dput(parent); + TP_fast_assign_fsid(btrfs_sb(inode->i_sb)); __entry->ino = btrfs_ino(BTRFS_I(inode)); - __entry->parent = btrfs_ino(BTRFS_I(d_inode(dentry->d_parent))); + __entry->parent = btrfs_ino(BTRFS_I(parent_inode)); __entry->datasync = datasync; __entry->root_objectid = btrfs_root_id(BTRFS_I(inode)->root); ), diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h index 33e99e792f1a..63597b004424 100644 --- a/include/trace/events/dma.h +++ b/include/trace/events/dma.h @@ -32,7 +32,9 @@ TRACE_DEFINE_ENUM(DMA_NONE); { DMA_ATTR_ALLOC_SINGLE_PAGES, "ALLOC_SINGLE_PAGES" }, \ { DMA_ATTR_NO_WARN, "NO_WARN" }, \ { DMA_ATTR_PRIVILEGED, "PRIVILEGED" }, \ - { DMA_ATTR_MMIO, "MMIO" }) + { DMA_ATTR_MMIO, "MMIO" }, \ + { DMA_ATTR_DEBUGGING_IGNORE_CACHELINES, "CACHELINES_OVERLAP" }, \ + { DMA_ATTR_REQUIRE_COHERENT, "REQUIRE_COHERENT" }) DECLARE_EVENT_CLASS(dma_map, TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 26071021e986..56b6b60a814f 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -159,5 +159,9 @@ enum ip_conntrack_expect_events { #define NF_CT_EXPECT_INACTIVE 0x2 #define NF_CT_EXPECT_USERSPACE 0x4 +#ifdef __KERNEL__ +#define NF_CT_EXPECT_MASK (NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE | \ + NF_CT_EXPECT_USERSPACE) +#endif #endif /* _UAPI_NF_CONNTRACK_COMMON_H */ diff --git a/init/Kconfig b/init/Kconfig index 444ce811ea67..7484cd703bc1 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -146,7 +146,7 @@ config CC_HAS_COUNTED_BY config CC_HAS_COUNTED_BY_PTR bool # supported since clang 22 - default y if CC_IS_CLANG && CLANG_VERSION >= 220000 + default y if CC_IS_CLANG && CLANG_VERSION >= 220100 # supported since gcc 16.0.0 default y if CC_IS_GCC && GCC_VERSION >= 160000 diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index 80178b69e05a..c2d3e45544bb 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -119,12 +119,14 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) sq_idx); break; } - if ((++sq_head & sq_mask) == 0) { + if (sq_idx == sq_mask) { seq_printf(m, "%5u: corrupted sqe, wrapping 128B entry\n", sq_idx); break; } + sq_head++; + i++; sqe128 = true; } seq_printf(m, "%5u: opcode:%s, fd:%d, flags:%x, off:%llu, " diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 86f87e43438c..0677918f06a8 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -453,7 +453,7 @@ static int active_cacheline_set_overlap(phys_addr_t cln, int overlap) return overlap; } -static void active_cacheline_inc_overlap(phys_addr_t cln) +static void active_cacheline_inc_overlap(phys_addr_t cln, bool is_cache_clean) { int overlap = active_cacheline_read_overlap(cln); @@ -462,7 +462,7 @@ static void active_cacheline_inc_overlap(phys_addr_t cln) /* If we overflowed the overlap counter then we're potentially * leaking dma-mappings. */ - WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP, + WARN_ONCE(!is_cache_clean && overlap > ACTIVE_CACHELINE_MAX_OVERLAP, pr_fmt("exceeded %d overlapping mappings of cacheline %pa\n"), ACTIVE_CACHELINE_MAX_OVERLAP, &cln); } @@ -495,7 +495,7 @@ static int active_cacheline_insert(struct dma_debug_entry *entry, if (rc == -EEXIST) { struct dma_debug_entry *existing; - active_cacheline_inc_overlap(cln); + active_cacheline_inc_overlap(cln, entry->is_cache_clean); existing = radix_tree_lookup(&dma_active_cacheline, cln); /* A lookup failure here after we got -EEXIST is unexpected. */ WARN_ON(!existing); @@ -601,7 +601,8 @@ static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs) unsigned long flags; int rc; - entry->is_cache_clean = !!(attrs & DMA_ATTR_CPU_CACHE_CLEAN); + entry->is_cache_clean = attrs & (DMA_ATTR_DEBUGGING_IGNORE_CACHELINES | + DMA_ATTR_REQUIRE_COHERENT); bucket = get_hash_bucket(entry, &flags); hash_bucket_add(bucket, entry); diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h index e89f175e9c2d..6184ff303f08 100644 --- a/kernel/dma/direct.h +++ b/kernel/dma/direct.h @@ -84,7 +84,7 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev, dma_addr_t dma_addr; if (is_swiotlb_force_bounce(dev)) { - if (attrs & DMA_ATTR_MMIO) + if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) return DMA_MAPPING_ERROR; return swiotlb_map(dev, phys, size, dir, attrs); @@ -98,7 +98,8 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev, dma_addr = phys_to_dma(dev, phys); if (unlikely(!dma_capable(dev, dma_addr, size, true)) || dma_kmalloc_needs_bounce(dev, size, dir)) { - if (is_swiotlb_active(dev)) + if (is_swiotlb_active(dev) && + !(attrs & DMA_ATTR_REQUIRE_COHERENT)) return swiotlb_map(dev, phys, size, dir, attrs); goto err_overflow; @@ -123,7 +124,7 @@ static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr, { phys_addr_t phys; - if (attrs & DMA_ATTR_MMIO) + if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) /* nothing to do: uncached and no swiotlb */ return; diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 3928a509c44c..6d3dd0bd3a88 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -164,6 +164,9 @@ dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, if (WARN_ON_ONCE(!dev->dma_mask)) return DMA_MAPPING_ERROR; + if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT)) + return DMA_MAPPING_ERROR; + if (dma_map_direct(dev, ops) || (!is_mmio && arch_dma_map_phys_direct(dev, phys + size))) addr = dma_direct_map_phys(dev, phys, size, dir, attrs); @@ -235,6 +238,9 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, BUG_ON(!valid_dma_direction(dir)); + if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT)) + return -EOPNOTSUPP; + if (WARN_ON_ONCE(!dev->dma_mask)) return 0; diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index d8e6f1d889d5..9fd73700ddcf 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -30,6 +30,7 @@ #include <linux/gfp.h> #include <linux/highmem.h> #include <linux/io.h> +#include <linux/kmsan-checks.h> #include <linux/iommu-helper.h> #include <linux/init.h> #include <linux/memblock.h> @@ -901,10 +902,19 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size local_irq_save(flags); page = pfn_to_page(pfn); - if (dir == DMA_TO_DEVICE) + if (dir == DMA_TO_DEVICE) { + /* + * Ideally, kmsan_check_highmem_page() + * could be used here to detect infoleaks, + * but callers may map uninitialized buffers + * that will be written by the device, + * causing false positives. + */ memcpy_from_page(vaddr, page, offset, sz); - else + } else { + kmsan_unpoison_memory(vaddr, sz); memcpy_to_page(page, offset, vaddr, sz); + } local_irq_restore(flags); size -= sz; @@ -913,8 +923,15 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size offset = 0; } } else if (dir == DMA_TO_DEVICE) { + /* + * Ideally, kmsan_check_memory() could be used here to detect + * infoleaks (uninitialized data being sent to device), but + * callers may map uninitialized buffers that will be written + * by the device, causing false positives. + */ memcpy(vaddr, phys_to_virt(orig_addr), size); } else { + kmsan_unpoison_memory(vaddr, size); memcpy(phys_to_virt(orig_addr), vaddr, size); } } diff --git a/kernel/futex/core.c b/kernel/futex/core.c index cf7e610eac42..31e83a09789e 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -342,7 +342,7 @@ static int __futex_key_to_node(struct mm_struct *mm, unsigned long addr) if (!vma) return FUTEX_NO_NODE; - mpol = vma_policy(vma); + mpol = READ_ONCE(vma->vm_policy); if (!mpol) return FUTEX_NO_NODE; diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c index bc1f7e83a37e..7808068fa59e 100644 --- a/kernel/futex/pi.c +++ b/kernel/futex/pi.c @@ -918,7 +918,7 @@ int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked) int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock) { struct hrtimer_sleeper timeout, *to; - struct task_struct *exiting = NULL; + struct task_struct *exiting; struct rt_mutex_waiter rt_waiter; struct futex_q q = futex_q_init; DEFINE_WAKE_Q(wake_q); @@ -933,6 +933,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl to = futex_setup_timer(time, &timeout, flags, 0); retry: + exiting = NULL; ret = get_futex_key(uaddr, flags, &q.key, FUTEX_WRITE); if (unlikely(ret != 0)) goto out; diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 743c7a728237..77ad9691f6a6 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -459,6 +459,14 @@ SYSCALL_DEFINE4(futex_requeue, if (ret) return ret; + /* + * For now mandate both flags are identical, like the sys_futex() + * interface has. If/when we merge the variable sized futex support, + * that patch can modify this test to allow a difference in size. + */ + if (futexes[0].w.flags != futexes[1].w.flags) + return -EINVAL; + cmpval = futexes[0].w.val; return futex_requeue(u64_to_user_ptr(futexes[0].w.uaddr), futexes[0].w.flags, diff --git a/kernel/power/main.c b/kernel/power/main.c index 5f8c9e12eaec..5429e9f19b65 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -40,7 +40,7 @@ void pm_restore_gfp_mask(void) { WARN_ON(!mutex_is_locked(&system_transition_mutex)); - if (WARN_ON(!saved_gfp_count) || --saved_gfp_count) + if (!saved_gfp_count || --saved_gfp_count) return; gfp_allowed_mask = saved_gfp_mask; diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 6e1321837c66..a564650734dc 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -2855,6 +2855,17 @@ int snapshot_write_finalize(struct snapshot_handle *handle) { int error; + /* + * Call snapshot_write_next() to drain any trailing zero pages, + * but make sure we're in the data page region first. + * This function can return PAGE_SIZE if the kernel was expecting + * another copy page. Return -ENODATA in that situation. + */ + if (handle->cur > nr_meta_pages + 1) { + error = snapshot_write_next(handle); + if (error) + return error > 0 ? -ENODATA : error; + } copy_last_highmem_page(); error = hibernate_restore_protect_page(handle->buffer); /* Do that only if we have loaded the image entirely */ diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index dc5d614b372c..9b10b57b79ad 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -502,6 +502,15 @@ do { \ ___locked; \ }) +#define raw_spin_trylock_irqsave_rcu_node(p, flags) \ +({ \ + bool ___locked = raw_spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ + \ + if (___locked) \ + smp_mb__after_unlock_lock(); \ + ___locked; \ +}) + #define raw_lockdep_assert_held_rcu_node(p) \ lockdep_assert_held(&ACCESS_PRIVATE(p, lock)) diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 3450c3751ef7..a2e2d516e51b 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -9,6 +9,7 @@ */ #include <linux/export.h> +#include <linux/irq_work.h> #include <linux/mutex.h> #include <linux/preempt.h> #include <linux/rcupdate_wait.h> @@ -41,6 +42,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp) ssp->srcu_idx_max = 0; INIT_WORK(&ssp->srcu_work, srcu_drive_gp); INIT_LIST_HEAD(&ssp->srcu_work.entry); + init_irq_work(&ssp->srcu_irq_work, srcu_tiny_irq_work); return 0; } @@ -84,6 +86,7 @@ EXPORT_SYMBOL_GPL(init_srcu_struct); void cleanup_srcu_struct(struct srcu_struct *ssp) { WARN_ON(ssp->srcu_lock_nesting[0] || ssp->srcu_lock_nesting[1]); + irq_work_sync(&ssp->srcu_irq_work); flush_work(&ssp->srcu_work); WARN_ON(ssp->srcu_gp_running); WARN_ON(ssp->srcu_gp_waiting); @@ -177,6 +180,20 @@ void srcu_drive_gp(struct work_struct *wp) } EXPORT_SYMBOL_GPL(srcu_drive_gp); +/* + * Use an irq_work to defer schedule_work() to avoid acquiring the workqueue + * pool->lock while the caller might hold scheduler locks, causing lockdep + * splats due to workqueue_init() doing a wakeup. + */ +void srcu_tiny_irq_work(struct irq_work *irq_work) +{ + struct srcu_struct *ssp; + + ssp = container_of(irq_work, struct srcu_struct, srcu_irq_work); + schedule_work(&ssp->srcu_work); +} +EXPORT_SYMBOL_GPL(srcu_tiny_irq_work); + static void srcu_gp_start_if_needed(struct srcu_struct *ssp) { unsigned long cookie; @@ -189,7 +206,7 @@ static void srcu_gp_start_if_needed(struct srcu_struct *ssp) WRITE_ONCE(ssp->srcu_idx_max, cookie); if (!READ_ONCE(ssp->srcu_gp_running)) { if (likely(srcu_init_done)) - schedule_work(&ssp->srcu_work); + irq_work_queue(&ssp->srcu_irq_work); else if (list_empty(&ssp->srcu_work.entry)) list_add(&ssp->srcu_work.entry, &srcu_boot_list); } diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index aef8e91ad33e..0d01cd8c4b4a 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -19,6 +19,7 @@ #include <linux/mutex.h> #include <linux/percpu.h> #include <linux/preempt.h> +#include <linux/irq_work.h> #include <linux/rcupdate_wait.h> #include <linux/sched.h> #include <linux/smp.h> @@ -75,44 +76,9 @@ static bool __read_mostly srcu_init_done; static void srcu_invoke_callbacks(struct work_struct *work); static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay); static void process_srcu(struct work_struct *work); +static void srcu_irq_work(struct irq_work *work); static void srcu_delay_timer(struct timer_list *t); -/* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */ -#define spin_lock_rcu_node(p) \ -do { \ - spin_lock(&ACCESS_PRIVATE(p, lock)); \ - smp_mb__after_unlock_lock(); \ -} while (0) - -#define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock)) - -#define spin_lock_irq_rcu_node(p) \ -do { \ - spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \ - smp_mb__after_unlock_lock(); \ -} while (0) - -#define spin_unlock_irq_rcu_node(p) \ - spin_unlock_irq(&ACCESS_PRIVATE(p, lock)) - -#define spin_lock_irqsave_rcu_node(p, flags) \ -do { \ - spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ - smp_mb__after_unlock_lock(); \ -} while (0) - -#define spin_trylock_irqsave_rcu_node(p, flags) \ -({ \ - bool ___locked = spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ - \ - if (___locked) \ - smp_mb__after_unlock_lock(); \ - ___locked; \ -}) - -#define spin_unlock_irqrestore_rcu_node(p, flags) \ - spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \ - /* * Initialize SRCU per-CPU data. Note that statically allocated * srcu_struct structures might already have srcu_read_lock() and @@ -131,7 +97,7 @@ static void init_srcu_struct_data(struct srcu_struct *ssp) */ for_each_possible_cpu(cpu) { sdp = per_cpu_ptr(ssp->sda, cpu); - spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); + raw_spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); rcu_segcblist_init(&sdp->srcu_cblist); sdp->srcu_cblist_invoking = false; sdp->srcu_gp_seq_needed = ssp->srcu_sup->srcu_gp_seq; @@ -186,7 +152,7 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags) /* Each pass through this loop initializes one srcu_node structure. */ srcu_for_each_node_breadth_first(ssp, snp) { - spin_lock_init(&ACCESS_PRIVATE(snp, lock)); + raw_spin_lock_init(&ACCESS_PRIVATE(snp, lock)); BUILD_BUG_ON(ARRAY_SIZE(snp->srcu_have_cbs) != ARRAY_SIZE(snp->srcu_data_have_cbs)); for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) { @@ -242,7 +208,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) if (!ssp->srcu_sup) return -ENOMEM; if (!is_static) - spin_lock_init(&ACCESS_PRIVATE(ssp->srcu_sup, lock)); + raw_spin_lock_init(&ACCESS_PRIVATE(ssp->srcu_sup, lock)); ssp->srcu_sup->srcu_size_state = SRCU_SIZE_SMALL; ssp->srcu_sup->node = NULL; mutex_init(&ssp->srcu_sup->srcu_cb_mutex); @@ -252,6 +218,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) mutex_init(&ssp->srcu_sup->srcu_barrier_mutex); atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0); INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu); + init_irq_work(&ssp->srcu_sup->irq_work, srcu_irq_work); ssp->srcu_sup->sda_is_static = is_static; if (!is_static) { ssp->sda = alloc_percpu(struct srcu_data); @@ -263,9 +230,12 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) ssp->srcu_sup->srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL; ssp->srcu_sup->srcu_last_gp_end = ktime_get_mono_fast_ns(); if (READ_ONCE(ssp->srcu_sup->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) { - if (!init_srcu_struct_nodes(ssp, is_static ? GFP_ATOMIC : GFP_KERNEL)) + if (!preemptible()) + WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_ALLOC); + else if (init_srcu_struct_nodes(ssp, GFP_KERNEL)) + WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG); + else goto err_free_sda; - WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG); } ssp->srcu_sup->srcu_ssp = ssp; smp_store_release(&ssp->srcu_sup->srcu_gp_seq_needed, @@ -394,20 +364,20 @@ static void srcu_transition_to_big(struct srcu_struct *ssp) /* Double-checked locking on ->srcu_size-state. */ if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) return; - spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); + raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) { - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); return; } __srcu_transition_to_big(ssp); - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); } /* * Check to see if the just-encountered contention event justifies * a transition to SRCU_SIZE_BIG. */ -static void spin_lock_irqsave_check_contention(struct srcu_struct *ssp) +static void raw_spin_lock_irqsave_check_contention(struct srcu_struct *ssp) { unsigned long j; @@ -429,16 +399,16 @@ static void spin_lock_irqsave_check_contention(struct srcu_struct *ssp) * to SRCU_SIZE_BIG. But only if the srcutree.convert_to_big module * parameter permits this. */ -static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned long *flags) +static void raw_spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned long *flags) { struct srcu_struct *ssp = sdp->ssp; - if (spin_trylock_irqsave_rcu_node(sdp, *flags)) + if (raw_spin_trylock_irqsave_rcu_node(sdp, *flags)) return; - spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); - spin_lock_irqsave_check_contention(ssp); - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, *flags); - spin_lock_irqsave_rcu_node(sdp, *flags); + raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); + raw_spin_lock_irqsave_check_contention(ssp); + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, *flags); + raw_spin_lock_irqsave_rcu_node(sdp, *flags); } /* @@ -447,12 +417,12 @@ static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned lon * to SRCU_SIZE_BIG. But only if the srcutree.convert_to_big module * parameter permits this. */ -static void spin_lock_irqsave_ssp_contention(struct srcu_struct *ssp, unsigned long *flags) +static void raw_spin_lock_irqsave_ssp_contention(struct srcu_struct *ssp, unsigned long *flags) { - if (spin_trylock_irqsave_rcu_node(ssp->srcu_sup, *flags)) + if (raw_spin_trylock_irqsave_rcu_node(ssp->srcu_sup, *flags)) return; - spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); - spin_lock_irqsave_check_contention(ssp); + raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); + raw_spin_lock_irqsave_check_contention(ssp); } /* @@ -470,13 +440,13 @@ static void check_init_srcu_struct(struct srcu_struct *ssp) /* The smp_load_acquire() pairs with the smp_store_release(). */ if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed))) /*^^^*/ return; /* Already initialized. */ - spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); + raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); if (!rcu_seq_state(ssp->srcu_sup->srcu_gp_seq_needed)) { - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); return; } init_srcu_struct_fields(ssp, true); - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); } /* @@ -742,13 +712,15 @@ void cleanup_srcu_struct(struct srcu_struct *ssp) unsigned long delay; struct srcu_usage *sup = ssp->srcu_sup; - spin_lock_irq_rcu_node(ssp->srcu_sup); + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); delay = srcu_get_delay(ssp); - spin_unlock_irq_rcu_node(ssp->srcu_sup); + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); if (WARN_ON(!delay)) return; /* Just leak it! */ if (WARN_ON(srcu_readers_active(ssp))) return; /* Just leak it! */ + /* Wait for irq_work to finish first as it may queue a new work. */ + irq_work_sync(&sup->irq_work); flush_delayed_work(&sup->work); for_each_possible_cpu(cpu) { struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); @@ -960,7 +932,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) mutex_lock(&sup->srcu_cb_mutex); /* End the current grace period. */ - spin_lock_irq_rcu_node(sup); + raw_spin_lock_irq_rcu_node(sup); idx = rcu_seq_state(sup->srcu_gp_seq); WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); if (srcu_gp_is_expedited(ssp)) @@ -971,7 +943,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) gpseq = rcu_seq_current(&sup->srcu_gp_seq); if (ULONG_CMP_LT(sup->srcu_gp_seq_needed_exp, gpseq)) WRITE_ONCE(sup->srcu_gp_seq_needed_exp, gpseq); - spin_unlock_irq_rcu_node(sup); + raw_spin_unlock_irq_rcu_node(sup); mutex_unlock(&sup->srcu_gp_mutex); /* A new grace period can start at this point. But only one. */ @@ -983,7 +955,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) } else { idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); srcu_for_each_node_breadth_first(ssp, snp) { - spin_lock_irq_rcu_node(snp); + raw_spin_lock_irq_rcu_node(snp); cbs = false; last_lvl = snp >= sup->level[rcu_num_lvls - 1]; if (last_lvl) @@ -998,7 +970,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) else mask = snp->srcu_data_have_cbs[idx]; snp->srcu_data_have_cbs[idx] = 0; - spin_unlock_irq_rcu_node(snp); + raw_spin_unlock_irq_rcu_node(snp); if (cbs) srcu_schedule_cbs_snp(ssp, snp, mask, cbdelay); } @@ -1008,27 +980,27 @@ static void srcu_gp_end(struct srcu_struct *ssp) if (!(gpseq & counter_wrap_check)) for_each_possible_cpu(cpu) { sdp = per_cpu_ptr(ssp->sda, cpu); - spin_lock_irq_rcu_node(sdp); + raw_spin_lock_irq_rcu_node(sdp); if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100)) sdp->srcu_gp_seq_needed = gpseq; if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed_exp + 100)) sdp->srcu_gp_seq_needed_exp = gpseq; - spin_unlock_irq_rcu_node(sdp); + raw_spin_unlock_irq_rcu_node(sdp); } /* Callback initiation done, allow grace periods after next. */ mutex_unlock(&sup->srcu_cb_mutex); /* Start a new grace period if needed. */ - spin_lock_irq_rcu_node(sup); + raw_spin_lock_irq_rcu_node(sup); gpseq = rcu_seq_current(&sup->srcu_gp_seq); if (!rcu_seq_state(gpseq) && ULONG_CMP_LT(gpseq, sup->srcu_gp_seq_needed)) { srcu_gp_start(ssp); - spin_unlock_irq_rcu_node(sup); + raw_spin_unlock_irq_rcu_node(sup); srcu_reschedule(ssp, 0); } else { - spin_unlock_irq_rcu_node(sup); + raw_spin_unlock_irq_rcu_node(sup); } /* Transition to big if needed. */ @@ -1059,19 +1031,19 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, s)) || (!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s))) return; - spin_lock_irqsave_rcu_node(snp, flags); + raw_spin_lock_irqsave_rcu_node(snp, flags); sgsne = snp->srcu_gp_seq_needed_exp; if (!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s)) { - spin_unlock_irqrestore_rcu_node(snp, flags); + raw_spin_unlock_irqrestore_rcu_node(snp, flags); return; } WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); - spin_unlock_irqrestore_rcu_node(snp, flags); + raw_spin_unlock_irqrestore_rcu_node(snp, flags); } - spin_lock_irqsave_ssp_contention(ssp, &flags); + raw_spin_lock_irqsave_ssp_contention(ssp, &flags); if (ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed_exp, s)) WRITE_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp, s); - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); } /* @@ -1109,12 +1081,12 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, for (snp = snp_leaf; snp != NULL; snp = snp->srcu_parent) { if (WARN_ON_ONCE(rcu_seq_done(&sup->srcu_gp_seq, s)) && snp != snp_leaf) return; /* GP already done and CBs recorded. */ - spin_lock_irqsave_rcu_node(snp, flags); + raw_spin_lock_irqsave_rcu_node(snp, flags); snp_seq = snp->srcu_have_cbs[idx]; if (!srcu_invl_snp_seq(snp_seq) && ULONG_CMP_GE(snp_seq, s)) { if (snp == snp_leaf && snp_seq == s) snp->srcu_data_have_cbs[idx] |= sdp->grpmask; - spin_unlock_irqrestore_rcu_node(snp, flags); + raw_spin_unlock_irqrestore_rcu_node(snp, flags); if (snp == snp_leaf && snp_seq != s) { srcu_schedule_cbs_sdp(sdp, do_norm ? SRCU_INTERVAL : 0); return; @@ -1129,11 +1101,11 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, sgsne = snp->srcu_gp_seq_needed_exp; if (!do_norm && (srcu_invl_snp_seq(sgsne) || ULONG_CMP_LT(sgsne, s))) WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); - spin_unlock_irqrestore_rcu_node(snp, flags); + raw_spin_unlock_irqrestore_rcu_node(snp, flags); } /* Top of tree, must ensure the grace period will be started. */ - spin_lock_irqsave_ssp_contention(ssp, &flags); + raw_spin_lock_irqsave_ssp_contention(ssp, &flags); if (ULONG_CMP_LT(sup->srcu_gp_seq_needed, s)) { /* * Record need for grace period s. Pair with load @@ -1154,13 +1126,17 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, // it isn't. And it does not have to be. After all, it // can only be executed during early boot when there is only // the one boot CPU running with interrupts still disabled. + // + // Use an irq_work here to avoid acquiring runqueue lock with + // srcu rcu_node::lock held. BPF instrument could introduce the + // opposite dependency, hence we need to break the possible + // locking dependency here. if (likely(srcu_init_done)) - queue_delayed_work(rcu_gp_wq, &sup->work, - !!srcu_get_delay(ssp)); + irq_work_queue(&sup->irq_work); else if (list_empty(&sup->work.work.entry)) list_add(&sup->work.work.entry, &srcu_boot_list); } - spin_unlock_irqrestore_rcu_node(sup, flags); + raw_spin_unlock_irqrestore_rcu_node(sup, flags); } /* @@ -1172,9 +1148,9 @@ static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount) { unsigned long curdelay; - spin_lock_irq_rcu_node(ssp->srcu_sup); + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); curdelay = !srcu_get_delay(ssp); - spin_unlock_irq_rcu_node(ssp->srcu_sup); + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); for (;;) { if (srcu_readers_active_idx_check(ssp, idx)) @@ -1285,12 +1261,12 @@ static bool srcu_should_expedite(struct srcu_struct *ssp) return false; /* If the local srcu_data structure has callbacks, not idle. */ sdp = raw_cpu_ptr(ssp->sda); - spin_lock_irqsave_rcu_node(sdp, flags); + raw_spin_lock_irqsave_rcu_node(sdp, flags); if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) { - spin_unlock_irqrestore_rcu_node(sdp, flags); + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); return false; /* Callbacks already present, so not idle. */ } - spin_unlock_irqrestore_rcu_node(sdp, flags); + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); /* * No local callbacks, so probabilistically probe global state. @@ -1350,7 +1326,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id()); else sdp = raw_cpu_ptr(ssp->sda); - spin_lock_irqsave_sdp_contention(sdp, &flags); + raw_spin_lock_irqsave_sdp_contention(sdp, &flags); if (rhp) rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp); /* @@ -1410,7 +1386,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, sdp->srcu_gp_seq_needed_exp = s; needexp = true; } - spin_unlock_irqrestore_rcu_node(sdp, flags); + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); /* Ensure that snp node tree is fully initialized before traversing it */ if (ss_state < SRCU_SIZE_WAIT_BARRIER) @@ -1522,7 +1498,7 @@ static void __synchronize_srcu(struct srcu_struct *ssp, bool do_norm) /* * Make sure that later code is ordered after the SRCU grace - * period. This pairs with the spin_lock_irq_rcu_node() + * period. This pairs with the raw_spin_lock_irq_rcu_node() * in srcu_invoke_callbacks(). Unlike Tree RCU, this is needed * because the current CPU might have been totally uninvolved with * (and thus unordered against) that grace period. @@ -1701,7 +1677,7 @@ static void srcu_barrier_cb(struct rcu_head *rhp) */ static void srcu_barrier_one_cpu(struct srcu_struct *ssp, struct srcu_data *sdp) { - spin_lock_irq_rcu_node(sdp); + raw_spin_lock_irq_rcu_node(sdp); atomic_inc(&ssp->srcu_sup->srcu_barrier_cpu_cnt); sdp->srcu_barrier_head.func = srcu_barrier_cb; debug_rcu_head_queue(&sdp->srcu_barrier_head); @@ -1710,7 +1686,7 @@ static void srcu_barrier_one_cpu(struct srcu_struct *ssp, struct srcu_data *sdp) debug_rcu_head_unqueue(&sdp->srcu_barrier_head); atomic_dec(&ssp->srcu_sup->srcu_barrier_cpu_cnt); } - spin_unlock_irq_rcu_node(sdp); + raw_spin_unlock_irq_rcu_node(sdp); } /** @@ -1761,7 +1737,7 @@ static void srcu_expedite_current_cb(struct rcu_head *rhp) bool needcb = false; struct srcu_data *sdp = container_of(rhp, struct srcu_data, srcu_ec_head); - spin_lock_irqsave_sdp_contention(sdp, &flags); + raw_spin_lock_irqsave_sdp_contention(sdp, &flags); if (sdp->srcu_ec_state == SRCU_EC_IDLE) { WARN_ON_ONCE(1); } else if (sdp->srcu_ec_state == SRCU_EC_PENDING) { @@ -1771,7 +1747,7 @@ static void srcu_expedite_current_cb(struct rcu_head *rhp) sdp->srcu_ec_state = SRCU_EC_PENDING; needcb = true; } - spin_unlock_irqrestore_rcu_node(sdp, flags); + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); // If needed, requeue ourselves as an expedited SRCU callback. if (needcb) __call_srcu(sdp->ssp, &sdp->srcu_ec_head, srcu_expedite_current_cb, false); @@ -1795,7 +1771,7 @@ void srcu_expedite_current(struct srcu_struct *ssp) migrate_disable(); sdp = this_cpu_ptr(ssp->sda); - spin_lock_irqsave_sdp_contention(sdp, &flags); + raw_spin_lock_irqsave_sdp_contention(sdp, &flags); if (sdp->srcu_ec_state == SRCU_EC_IDLE) { sdp->srcu_ec_state = SRCU_EC_PENDING; needcb = true; @@ -1804,7 +1780,7 @@ void srcu_expedite_current(struct srcu_struct *ssp) } else { WARN_ON_ONCE(sdp->srcu_ec_state != SRCU_EC_REPOST); } - spin_unlock_irqrestore_rcu_node(sdp, flags); + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); // If needed, queue an expedited SRCU callback. if (needcb) __call_srcu(ssp, &sdp->srcu_ec_head, srcu_expedite_current_cb, false); @@ -1848,17 +1824,17 @@ static void srcu_advance_state(struct srcu_struct *ssp) */ idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq)); /* ^^^ */ if (idx == SRCU_STATE_IDLE) { - spin_lock_irq_rcu_node(ssp->srcu_sup); + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) { WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq)); - spin_unlock_irq_rcu_node(ssp->srcu_sup); + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex); return; } idx = rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)); if (idx == SRCU_STATE_IDLE) srcu_gp_start(ssp); - spin_unlock_irq_rcu_node(ssp->srcu_sup); + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); if (idx != SRCU_STATE_IDLE) { mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex); return; /* Someone else started the grace period. */ @@ -1872,10 +1848,10 @@ static void srcu_advance_state(struct srcu_struct *ssp) return; /* readers present, retry later. */ } srcu_flip(ssp); - spin_lock_irq_rcu_node(ssp->srcu_sup); + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); rcu_seq_set_state(&ssp->srcu_sup->srcu_gp_seq, SRCU_STATE_SCAN2); ssp->srcu_sup->srcu_n_exp_nodelay = 0; - spin_unlock_irq_rcu_node(ssp->srcu_sup); + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); } if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN2) { @@ -1913,7 +1889,7 @@ static void srcu_invoke_callbacks(struct work_struct *work) ssp = sdp->ssp; rcu_cblist_init(&ready_cbs); - spin_lock_irq_rcu_node(sdp); + raw_spin_lock_irq_rcu_node(sdp); WARN_ON_ONCE(!rcu_segcblist_segempty(&sdp->srcu_cblist, RCU_NEXT_TAIL)); rcu_segcblist_advance(&sdp->srcu_cblist, rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq)); @@ -1924,7 +1900,7 @@ static void srcu_invoke_callbacks(struct work_struct *work) */ if (sdp->srcu_cblist_invoking || !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) { - spin_unlock_irq_rcu_node(sdp); + raw_spin_unlock_irq_rcu_node(sdp); return; /* Someone else on the job or nothing to do. */ } @@ -1932,7 +1908,7 @@ static void srcu_invoke_callbacks(struct work_struct *work) sdp->srcu_cblist_invoking = true; rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs); len = ready_cbs.len; - spin_unlock_irq_rcu_node(sdp); + raw_spin_unlock_irq_rcu_node(sdp); rhp = rcu_cblist_dequeue(&ready_cbs); for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { debug_rcu_head_unqueue(rhp); @@ -1947,11 +1923,11 @@ static void srcu_invoke_callbacks(struct work_struct *work) * Update counts, accelerate new callbacks, and if needed, * schedule another round of callback invocation. */ - spin_lock_irq_rcu_node(sdp); + raw_spin_lock_irq_rcu_node(sdp); rcu_segcblist_add_len(&sdp->srcu_cblist, -len); sdp->srcu_cblist_invoking = false; more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); - spin_unlock_irq_rcu_node(sdp); + raw_spin_unlock_irq_rcu_node(sdp); /* An SRCU barrier or callbacks from previous nesting work pending */ if (more) srcu_schedule_cbs_sdp(sdp, 0); @@ -1965,7 +1941,7 @@ static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay) { bool pushgp = true; - spin_lock_irq_rcu_node(ssp->srcu_sup); + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) { if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq))) { /* All requests fulfilled, time to go idle. */ @@ -1975,7 +1951,7 @@ static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay) /* Outstanding request and no GP. Start one. */ srcu_gp_start(ssp); } - spin_unlock_irq_rcu_node(ssp->srcu_sup); + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); if (pushgp) queue_delayed_work(rcu_gp_wq, &ssp->srcu_sup->work, delay); @@ -1995,9 +1971,9 @@ static void process_srcu(struct work_struct *work) ssp = sup->srcu_ssp; srcu_advance_state(ssp); - spin_lock_irq_rcu_node(ssp->srcu_sup); + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); curdelay = srcu_get_delay(ssp); - spin_unlock_irq_rcu_node(ssp->srcu_sup); + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); if (curdelay) { WRITE_ONCE(sup->reschedule_count, 0); } else { @@ -2015,6 +1991,23 @@ static void process_srcu(struct work_struct *work) srcu_reschedule(ssp, curdelay); } +static void srcu_irq_work(struct irq_work *work) +{ + struct srcu_struct *ssp; + struct srcu_usage *sup; + unsigned long delay; + unsigned long flags; + + sup = container_of(work, struct srcu_usage, irq_work); + ssp = sup->srcu_ssp; + + raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); + delay = srcu_get_delay(ssp); + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); + + queue_delayed_work(rcu_gp_wq, &sup->work, !!delay); +} + void srcutorture_get_gp_data(struct srcu_struct *ssp, int *flags, unsigned long *gp_seq) { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9d3a666ffde1..c9efb17cc255 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1118,7 +1118,7 @@ int proc_do_large_bitmap(const struct ctl_table *table, int dir, unsigned long bitmap_len = table->maxlen; unsigned long *bitmap = *(unsigned long **) table->data; unsigned long *tmp_bitmap = NULL; - char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c; + char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c = 0; if (!bitmap || !bitmap_len || !left || (*ppos && SYSCTL_KERN_TO_USER(dir))) { *lenp = 0; diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index d5230b759a2d..655db2e82513 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -22,6 +22,39 @@ static struct task_struct *trigger_kthread; static struct llist_head trigger_data_free_list; static DEFINE_MUTEX(trigger_data_kthread_mutex); +static int trigger_kthread_fn(void *ignore); + +static void trigger_create_kthread_locked(void) +{ + lockdep_assert_held(&trigger_data_kthread_mutex); + + if (!trigger_kthread) { + struct task_struct *kthread; + + kthread = kthread_create(trigger_kthread_fn, NULL, + "trigger_data_free"); + if (!IS_ERR(kthread)) + WRITE_ONCE(trigger_kthread, kthread); + } +} + +static void trigger_data_free_queued_locked(void) +{ + struct event_trigger_data *data, *tmp; + struct llist_node *llnodes; + + lockdep_assert_held(&trigger_data_kthread_mutex); + + llnodes = llist_del_all(&trigger_data_free_list); + if (!llnodes) + return; + + tracepoint_synchronize_unregister(); + + llist_for_each_entry_safe(data, tmp, llnodes, llist) + kfree(data); +} + /* Bulk garbage collection of event_trigger_data elements */ static int trigger_kthread_fn(void *ignore) { @@ -56,30 +89,50 @@ void trigger_data_free(struct event_trigger_data *data) if (data->cmd_ops->set_filter) data->cmd_ops->set_filter(NULL, data, NULL); + /* + * Boot-time trigger registration can fail before kthread creation + * works. Keep the deferred-free semantics during boot and let late + * init start the kthread to drain the list. + */ + if (system_state == SYSTEM_BOOTING && !trigger_kthread) { + llist_add(&data->llist, &trigger_data_free_list); + return; + } + if (unlikely(!trigger_kthread)) { guard(mutex)(&trigger_data_kthread_mutex); + + trigger_create_kthread_locked(); /* Check again after taking mutex */ if (!trigger_kthread) { - struct task_struct *kthread; - - kthread = kthread_create(trigger_kthread_fn, NULL, - "trigger_data_free"); - if (!IS_ERR(kthread)) - WRITE_ONCE(trigger_kthread, kthread); + llist_add(&data->llist, &trigger_data_free_list); + /* Drain the queued frees synchronously if creation failed. */ + trigger_data_free_queued_locked(); + return; } } - if (!trigger_kthread) { - /* Do it the slow way */ - tracepoint_synchronize_unregister(); - kfree(data); - return; - } - llist_add(&data->llist, &trigger_data_free_list); wake_up_process(trigger_kthread); } +static int __init trigger_data_free_init(void) +{ + guard(mutex)(&trigger_data_kthread_mutex); + + if (llist_empty(&trigger_data_free_list)) + return 0; + + trigger_create_kthread_locked(); + if (trigger_kthread) + wake_up_process(trigger_kthread); + else + trigger_data_free_queued_locked(); + + return 0; +} +late_initcall(trigger_data_free_init); + static inline void data_ops_trigger(struct event_trigger_data *data, struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index dee610e465b9..be6cf0bb3c03 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -2073,8 +2073,8 @@ static void osnoise_hotplug_workfn(struct work_struct *dummy) if (!osnoise_has_registered_instances()) return; - guard(mutex)(&interface_lock); guard(cpus_read_lock)(); + guard(mutex)(&interface_lock); if (!cpu_online(cpu)) return; @@ -2237,11 +2237,11 @@ static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf, if (running) stop_per_cpu_kthreads(); - mutex_lock(&interface_lock); /* * avoid CPU hotplug operations that might read options. */ cpus_read_lock(); + mutex_lock(&interface_lock); retval = cnt; @@ -2257,8 +2257,8 @@ static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf, clear_bit(option, &osnoise_options); } - cpus_read_unlock(); mutex_unlock(&interface_lock); + cpus_read_unlock(); if (running) start_per_cpu_kthreads(); @@ -2345,16 +2345,16 @@ osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, if (running) stop_per_cpu_kthreads(); - mutex_lock(&interface_lock); /* * osnoise_cpumask is read by CPU hotplug operations. */ cpus_read_lock(); + mutex_lock(&interface_lock); cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new); - cpus_read_unlock(); mutex_unlock(&interface_lock); + cpus_read_unlock(); if (running) start_per_cpu_kthreads(); diff --git a/lib/bug.c b/lib/bug.c index 623c467a8b76..aab9e6a40c5f 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -173,10 +173,8 @@ struct bug_entry *find_bug(unsigned long bugaddr) return module_find_bug(bugaddr); } -__diag_push(); -__diag_ignore(GCC, all, "-Wsuggest-attribute=format", - "Not a valid __printf() conversion candidate."); -static void __warn_printf(const char *fmt, struct pt_regs *regs) +static __printf(1, 0) +void __warn_printf(const char *fmt, struct pt_regs *regs) { if (!fmt) return; @@ -195,7 +193,6 @@ static void __warn_printf(const char *fmt, struct pt_regs *regs) printk("%s", fmt); } -__diag_pop(); static enum bug_trap_type __report_bug(struct bug_entry *bug, unsigned long bugaddr, struct pt_regs *regs) { diff --git a/mm/damon/core.c b/mm/damon/core.c index c1d1091d307e..3e1890d64d06 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1252,6 +1252,7 @@ int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src) { int err; + dst->maybe_corrupted = true; if (!is_power_of_2(src->min_region_sz)) return -EINVAL; @@ -1277,6 +1278,7 @@ int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src) dst->addr_unit = src->addr_unit; dst->min_region_sz = src->min_region_sz; + dst->maybe_corrupted = false; return 0; } @@ -2678,6 +2680,8 @@ static void kdamond_call(struct damon_ctx *ctx, bool cancel) complete(&control->completion); else if (control->canceled && control->dealloc_on_cancel) kfree(control); + if (!cancel && ctx->maybe_corrupted) + break; } mutex_lock(&ctx->call_controls_lock); @@ -2707,6 +2711,8 @@ static int kdamond_wait_activation(struct damon_ctx *ctx) kdamond_usleep(min_wait_time); kdamond_call(ctx, false); + if (ctx->maybe_corrupted) + return -EINVAL; damos_walk_cancel(ctx); } return -EBUSY; @@ -2790,6 +2796,8 @@ static int kdamond_fn(void *data) * kdamond_merge_regions() if possible, to reduce overhead */ kdamond_call(ctx, false); + if (ctx->maybe_corrupted) + break; if (!list_empty(&ctx->schemes)) kdamond_apply_schemes(ctx); else diff --git a/mm/damon/stat.c b/mm/damon/stat.c index 25fb44ccf99d..cf2c5a541eee 100644 --- a/mm/damon/stat.c +++ b/mm/damon/stat.c @@ -145,12 +145,59 @@ static int damon_stat_damon_call_fn(void *data) return 0; } +struct damon_stat_system_ram_range_walk_arg { + bool walked; + struct resource res; +}; + +static int damon_stat_system_ram_walk_fn(struct resource *res, void *arg) +{ + struct damon_stat_system_ram_range_walk_arg *a = arg; + + if (!a->walked) { + a->walked = true; + a->res.start = res->start; + } + a->res.end = res->end; + return 0; +} + +static unsigned long damon_stat_res_to_core_addr(resource_size_t ra, + unsigned long addr_unit) +{ + /* + * Use div_u64() for avoiding linking errors related with __udivdi3, + * __aeabi_uldivmod, or similar problems. This should also improve the + * performance optimization (read div_u64() comment for the detail). + */ + if (sizeof(ra) == 8 && sizeof(addr_unit) == 4) + return div_u64(ra, addr_unit); + return ra / addr_unit; +} + +static int damon_stat_set_monitoring_region(struct damon_target *t, + unsigned long addr_unit, unsigned long min_region_sz) +{ + struct damon_addr_range addr_range; + struct damon_stat_system_ram_range_walk_arg arg = {}; + + walk_system_ram_res(0, -1, &arg, damon_stat_system_ram_walk_fn); + if (!arg.walked) + return -EINVAL; + addr_range.start = damon_stat_res_to_core_addr( + arg.res.start, addr_unit); + addr_range.end = damon_stat_res_to_core_addr( + arg.res.end + 1, addr_unit); + if (addr_range.end <= addr_range.start) + return -EINVAL; + return damon_set_regions(t, &addr_range, 1, min_region_sz); +} + static struct damon_ctx *damon_stat_build_ctx(void) { struct damon_ctx *ctx; struct damon_attrs attrs; struct damon_target *target; - unsigned long start = 0, end = 0; ctx = damon_new_ctx(); if (!ctx) @@ -180,8 +227,8 @@ static struct damon_ctx *damon_stat_build_ctx(void) if (!target) goto free_out; damon_add_target(ctx, target); - if (damon_set_region_biggest_system_ram_default(target, &start, &end, - ctx->min_region_sz)) + if (damon_stat_set_monitoring_region(target, ctx->addr_unit, + ctx->min_region_sz)) goto free_out; return ctx; free_out: diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 576d1ddd736b..6a44a2f3d8fc 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1524,8 +1524,10 @@ static int damon_sysfs_commit_input(void *data) if (IS_ERR(param_ctx)) return PTR_ERR(param_ctx); test_ctx = damon_sysfs_new_test_ctx(kdamond->damon_ctx); - if (!test_ctx) + if (!test_ctx) { + damon_destroy_ctx(param_ctx); return -ENOMEM; + } err = damon_commit_ctx(test_ctx, param_ctx); if (err) goto out; @@ -1618,9 +1620,12 @@ static int damon_sysfs_repeat_call_fn(void *data) if (!mutex_trylock(&damon_sysfs_lock)) return 0; + if (sysfs_kdamond->contexts->nr != 1) + goto out; damon_sysfs_upd_tuned_intervals(sysfs_kdamond); damon_sysfs_upd_schemes_stats(sysfs_kdamond); damon_sysfs_upd_schemes_effective_quotas(sysfs_kdamond); +out: mutex_unlock(&damon_sysfs_lock); return 0; } @@ -1747,6 +1752,9 @@ static int damon_sysfs_update_schemes_tried_regions( static int damon_sysfs_handle_cmd(enum damon_sysfs_cmd cmd, struct damon_sysfs_kdamond *kdamond) { + if (cmd != DAMON_SYSFS_CMD_OFF && kdamond->contexts->nr != 1) + return -EINVAL; + switch (cmd) { case DAMON_SYSFS_CMD_ON: return damon_sysfs_turn_damon_on(kdamond); @@ -778,7 +778,7 @@ dma_addr_t hmm_dma_map_pfn(struct device *dev, struct hmm_dma_map *map, struct page *page = hmm_pfn_to_page(pfns[idx]); phys_addr_t paddr = hmm_pfn_to_phys(pfns[idx]); size_t offset = idx * map->dma_entry_size; - unsigned long attrs = 0; + unsigned long attrs = DMA_ATTR_REQUIRE_COHERENT; dma_addr_t dma_addr; int ret; @@ -871,7 +871,7 @@ bool hmm_dma_unmap_pfn(struct device *dev, struct hmm_dma_map *map, size_t idx) struct dma_iova_state *state = &map->state; dma_addr_t *dma_addrs = map->dma_list; unsigned long *pfns = map->pfn_list; - unsigned long attrs = 0; + unsigned long attrs = DMA_ATTR_REQUIRE_COHERENT; if ((pfns[idx] & valid_dma) != valid_dma) return false; diff --git a/mm/memory.c b/mm/memory.c index 2f815a34d924..c65e82c86fed 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -6815,11 +6815,16 @@ retry: pudp = pud_offset(p4dp, address); pud = pudp_get(pudp); - if (pud_none(pud)) + if (!pud_present(pud)) goto out; if (pud_leaf(pud)) { lock = pud_lock(mm, pudp); - if (!unlikely(pud_leaf(pud))) { + pud = pudp_get(pudp); + + if (unlikely(!pud_present(pud))) { + spin_unlock(lock); + goto out; + } else if (unlikely(!pud_leaf(pud))) { spin_unlock(lock); goto retry; } @@ -6831,9 +6836,16 @@ retry: pmdp = pmd_offset(pudp, address); pmd = pmdp_get_lockless(pmdp); + if (!pmd_present(pmd)) + goto out; if (pmd_leaf(pmd)) { lock = pmd_lock(mm, pmdp); - if (!unlikely(pmd_leaf(pmd))) { + pmd = pmdp_get(pmdp); + + if (unlikely(!pmd_present(pmd))) { + spin_unlock(lock); + goto out; + } else if (unlikely(!pmd_leaf(pmd))) { spin_unlock(lock); goto retry; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 0e5175f1c767..cf92bd6a8226 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -487,7 +487,13 @@ void __mpol_put(struct mempolicy *pol) { if (!atomic_dec_and_test(&pol->refcnt)) return; - kmem_cache_free(policy_cache, pol); + /* + * Required to allow mmap_lock_speculative*() access, see for example + * futex_key_to_node_opt(). All accesses are serialized by mmap_lock, + * however the speculative lock section unbound by the normal lock + * boundaries, requiring RCU freeing. + */ + kfree_rcu(pol, rcu); } EXPORT_SYMBOL_FOR_MODULES(__mpol_put, "kvm"); @@ -1020,7 +1026,7 @@ static int vma_replace_policy(struct vm_area_struct *vma, } old = vma->vm_policy; - vma->vm_policy = new; /* protected by mmap_lock */ + WRITE_ONCE(vma->vm_policy, new); /* protected by mmap_lock */ mpol_put(old); return 0; diff --git a/mm/mseal.c b/mm/mseal.c index 316b5e1dec78..ac58643181f7 100644 --- a/mm/mseal.c +++ b/mm/mseal.c @@ -56,7 +56,6 @@ static int mseal_apply(struct mm_struct *mm, unsigned long start, unsigned long end) { struct vm_area_struct *vma, *prev; - unsigned long curr_start = start; VMA_ITERATOR(vmi, mm, start); /* We know there are no gaps so this will be non-NULL. */ @@ -66,6 +65,7 @@ static int mseal_apply(struct mm_struct *mm, prev = vma; for_each_vma_range(vmi, vma, end) { + const unsigned long curr_start = MAX(vma->vm_start, start); const unsigned long curr_end = MIN(vma->vm_end, end); if (!(vma->vm_flags & VM_SEALED)) { @@ -79,7 +79,6 @@ static int mseal_apply(struct mm_struct *mm, } prev = vma; - curr_start = curr_end; } return 0; diff --git a/mm/pagewalk.c b/mm/pagewalk.c index a94c401ab2cf..4e7bcd975c54 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -97,6 +97,7 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, struct mm_walk *walk) { + pud_t pudval = pudp_get(pud); pmd_t *pmd; unsigned long next; const struct mm_walk_ops *ops = walk->ops; @@ -105,6 +106,24 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, int err = 0; int depth = real_depth(3); + /* + * For PTE handling, pte_offset_map_lock() takes care of checking + * whether there actually is a page table. But it also has to be + * very careful about concurrent page table reclaim. + * + * Similarly, we have to be careful here - a PUD entry that points + * to a PMD table cannot go away, so we can just walk it. But if + * it's something else, we need to ensure we didn't race something, + * so need to retry. + * + * A pertinent example of this is a PUD refault after PUD split - + * we will need to split again or risk accessing invalid memory. + */ + if (!pud_present(pudval) || pud_leaf(pudval)) { + walk->action = ACTION_AGAIN; + return 0; + } + pmd = pmd_offset(pud, addr); do { again: @@ -218,12 +237,12 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, else if (pud_leaf(*pud) || !pud_present(*pud)) continue; /* Nothing to do. */ - if (pud_none(*pud)) - goto again; - err = walk_pmd_range(pud, addr, next, walk); if (err) break; + + if (walk->action == ACTION_AGAIN) + goto again; } while (pud++, addr = next, addr != end); return err; diff --git a/mm/rmap.c b/mm/rmap.c index 391337282e3f..8f08090d7eb9 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -457,6 +457,13 @@ static void cleanup_partial_anon_vmas(struct vm_area_struct *vma) list_del(&avc->same_vma); anon_vma_chain_free(avc); } + + /* + * The anon_vma assigned to this VMA is no longer valid, as we were not + * able to correctly clone AVC state. Avoid inconsistent anon_vma tree + * state by resetting. + */ + vma->anon_vma = NULL; } /** diff --git a/mm/swap_state.c b/mm/swap_state.c index 6d0eef7470be..48aff2c917c0 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -494,6 +494,10 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry, __folio_set_locked(folio); __folio_set_swapbacked(folio); + + if (!charged && mem_cgroup_swapin_charge_folio(folio, NULL, gfp, entry)) + goto failed; + for (;;) { ret = swap_cache_add_folio(folio, entry, &shadow); if (!ret) @@ -514,11 +518,6 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry, goto failed; } - if (!charged && mem_cgroup_swapin_charge_folio(folio, NULL, gfp, entry)) { - swap_cache_del_folio(folio); - goto failed; - } - memcg1_swapin(entry, folio_nr_pages(folio)); if (shadow) workingset_refault(folio, shadow); diff --git a/mm/zswap.c b/mm/zswap.c index e6ec3295bdb0..16b2ef7223e1 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -942,9 +942,15 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio) /* zswap entries of length PAGE_SIZE are not compressed. */ if (entry->length == PAGE_SIZE) { + void *dst; + WARN_ON_ONCE(input->length != PAGE_SIZE); - memcpy_from_sglist(kmap_local_folio(folio, 0), input, 0, PAGE_SIZE); + + dst = kmap_local_folio(folio, 0); + memcpy_from_sglist(dst, input, 0, PAGE_SIZE); dlen = PAGE_SIZE; + kunmap_local(dst); + flush_dcache_folio(folio); } else { sg_init_table(&output, 1); sg_set_folio(&output, folio, PAGE_SIZE, 0); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 6eb59e9f2aa8..e6393f17576b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -3095,7 +3095,7 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) * hci_connect_le serializes the connection attempts so only one * connection can be in BT_CONNECT at time. */ - if (conn->state == BT_CONNECT && hdev->req_status == HCI_REQ_PEND) { + if (conn->state == BT_CONNECT && READ_ONCE(hdev->req_status) == HCI_REQ_PEND) { switch (hci_skb_event(hdev->sent_cmd)) { case HCI_EV_CONN_COMPLETE: case HCI_EV_LE_CONN_COMPLETE: diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 31308c1de4ec..01f8ceeb1c0c 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -4126,7 +4126,7 @@ static int hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) kfree_skb(skb); } - if (hdev->req_status == HCI_REQ_PEND && + if (READ_ONCE(hdev->req_status) == HCI_REQ_PEND && !hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) { kfree_skb(hdev->req_skb); hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 3166914b0d6c..45d16639874a 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -25,11 +25,11 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, { bt_dev_dbg(hdev, "result 0x%2.2x", result); - if (hdev->req_status != HCI_REQ_PEND) + if (READ_ONCE(hdev->req_status) != HCI_REQ_PEND) return; hdev->req_result = result; - hdev->req_status = HCI_REQ_DONE; + WRITE_ONCE(hdev->req_status, HCI_REQ_DONE); /* Free the request command so it is not used as response */ kfree_skb(hdev->req_skb); @@ -167,20 +167,20 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, hci_cmd_sync_add(&req, opcode, plen, param, event, sk); - hdev->req_status = HCI_REQ_PEND; + WRITE_ONCE(hdev->req_status, HCI_REQ_PEND); err = hci_req_sync_run(&req); if (err < 0) return ERR_PTR(err); err = wait_event_interruptible_timeout(hdev->req_wait_q, - hdev->req_status != HCI_REQ_PEND, + READ_ONCE(hdev->req_status) != HCI_REQ_PEND, timeout); if (err == -ERESTARTSYS) return ERR_PTR(-EINTR); - switch (hdev->req_status) { + switch (READ_ONCE(hdev->req_status)) { case HCI_REQ_DONE: err = -bt_to_errno(hdev->req_result); break; @@ -194,7 +194,7 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, break; } - hdev->req_status = 0; + WRITE_ONCE(hdev->req_status, 0); hdev->req_result = 0; skb = hdev->req_rsp; hdev->req_rsp = NULL; @@ -665,9 +665,9 @@ void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); - if (hdev->req_status == HCI_REQ_PEND) { + if (READ_ONCE(hdev->req_status) == HCI_REQ_PEND) { hdev->req_result = err; - hdev->req_status = HCI_REQ_CANCELED; + WRITE_ONCE(hdev->req_status, HCI_REQ_CANCELED); queue_work(hdev->workqueue, &hdev->cmd_sync_cancel_work); } @@ -683,12 +683,12 @@ void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); - if (hdev->req_status == HCI_REQ_PEND) { + if (READ_ONCE(hdev->req_status) == HCI_REQ_PEND) { /* req_result is __u32 so error must be positive to be properly * propagated. */ hdev->req_result = err < 0 ? -err : err; - hdev->req_status = HCI_REQ_CANCELED; + WRITE_ONCE(hdev->req_status, HCI_REQ_CANCELED); wake_up_interruptible(&hdev->req_wait_q); } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 5deb6c4f1e41..95c65fece39b 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -926,16 +926,39 @@ int l2cap_chan_check_security(struct l2cap_chan *chan, bool initiator) static int l2cap_get_ident(struct l2cap_conn *conn) { + u8 max; + int ident; + /* LE link does not support tools like l2ping so use the full range */ if (conn->hcon->type == LE_LINK) - return ida_alloc_range(&conn->tx_ida, 1, 255, GFP_ATOMIC); - + max = 255; /* Get next available identificator. * 1 - 128 are used by kernel. * 129 - 199 are reserved. * 200 - 254 are used by utilities like l2ping, etc. */ - return ida_alloc_range(&conn->tx_ida, 1, 128, GFP_ATOMIC); + else + max = 128; + + /* Allocate ident using min as last used + 1 (cyclic) */ + ident = ida_alloc_range(&conn->tx_ida, READ_ONCE(conn->tx_ident) + 1, + max, GFP_ATOMIC); + /* Force min 1 to start over */ + if (ident <= 0) { + ident = ida_alloc_range(&conn->tx_ida, 1, max, GFP_ATOMIC); + if (ident <= 0) { + /* If all idents are in use, log an error, this is + * extremely unlikely to happen and would indicate a bug + * in the code that idents are not being freed properly. + */ + BT_ERR("Unable to allocate ident: %d", ident); + return 0; + } + } + + WRITE_ONCE(conn->tx_ident, ident); + + return ident; } static void l2cap_send_acl(struct l2cap_conn *conn, struct sk_buff *skb, @@ -1748,6 +1771,9 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); + disable_delayed_work_sync(&conn->info_timer); + disable_delayed_work_sync(&conn->id_addr_timer); + mutex_lock(&conn->lock); kfree_skb(conn->rx_skb); @@ -1763,8 +1789,6 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) ida_destroy(&conn->tx_ida); - cancel_delayed_work_sync(&conn->id_addr_timer); - l2cap_unregister_all_users(conn); /* Force the connection to be immediately dropped */ @@ -1783,9 +1807,6 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) l2cap_chan_put(chan); } - if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) - cancel_delayed_work_sync(&conn->info_timer); - hci_chan_del(conn->hchan); conn->hchan = NULL; @@ -2377,6 +2398,9 @@ static int l2cap_segment_sdu(struct l2cap_chan *chan, /* Remote device may have requested smaller PDUs */ pdu_len = min_t(size_t, pdu_len, chan->remote_mps); + if (!pdu_len) + return -EINVAL; + if (len <= pdu_len) { sar = L2CAP_SAR_UNSEGMENTED; sdu_len = 0; @@ -4312,14 +4336,16 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, if (test_bit(CONF_INPUT_DONE, &chan->conf_state)) { set_default_fcs(chan); - if (chan->mode == L2CAP_MODE_ERTM || - chan->mode == L2CAP_MODE_STREAMING) - err = l2cap_ertm_init(chan); + if (chan->state != BT_CONNECTED) { + if (chan->mode == L2CAP_MODE_ERTM || + chan->mode == L2CAP_MODE_STREAMING) + err = l2cap_ertm_init(chan); - if (err < 0) - l2cap_send_disconn_req(chan, -err); - else - l2cap_chan_ready(chan); + if (err < 0) + l2cap_send_disconn_req(chan, -err); + else + l2cap_chan_ready(chan); + } goto unlock; } @@ -5081,14 +5107,14 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, cmd_len -= sizeof(*req); num_scid = cmd_len / sizeof(u16); - /* Always respond with the same number of scids as in the request */ - rsp_len = cmd_len; - if (num_scid > L2CAP_ECRED_MAX_CID) { result = L2CAP_CR_LE_INVALID_PARAMS; goto response; } + /* Always respond with the same number of scids as in the request */ + rsp_len = cmd_len; + mtu = __le16_to_cpu(req->mtu); mps = __le16_to_cpu(req->mps); @@ -6607,6 +6633,10 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan) struct l2cap_le_credits pkt; u16 return_credits = l2cap_le_rx_credits(chan); + if (chan->mode != L2CAP_MODE_LE_FLOWCTL && + chan->mode != L2CAP_MODE_EXT_FLOWCTL) + return; + if (chan->rx_credits >= return_credits) return; @@ -6690,6 +6720,11 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) if (!chan->sdu) { u16 sdu_len; + if (!pskb_may_pull(skb, L2CAP_SDULEN_SIZE)) { + err = -EINVAL; + goto failed; + } + sdu_len = get_unaligned_le16(skb->data); skb_pull(skb, L2CAP_SDULEN_SIZE); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 597686790371..71e8c1b45bce 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1698,6 +1698,9 @@ static void l2cap_sock_ready_cb(struct l2cap_chan *chan) struct sock *sk = chan->data; struct sock *parent; + if (!sk) + return; + lock_sock(sk); parent = bt_sk(sk)->parent; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d52238ce6a9a..e5f9287fb826 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5355,7 +5355,7 @@ static void mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, * hci_adv_monitors_clear is about to be called which will take care of * freeing the adv_monitor instances. */ - if (status == -ECANCELED && !mgmt_pending_valid(hdev, cmd)) + if (status == -ECANCELED || !mgmt_pending_valid(hdev, cmd)) return; monitor = cmd->user_data; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index e7db50165879..584e059de20a 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -401,7 +401,7 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb) struct sock *sk; sco_conn_lock(conn); - sk = conn->sk; + sk = sco_sock_hold(conn); sco_conn_unlock(conn); if (!sk) @@ -410,11 +410,15 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb) BT_DBG("sk %p len %u", sk, skb->len); if (sk->sk_state != BT_CONNECTED) - goto drop; + goto drop_put; - if (!sock_queue_rcv_skb(sk, skb)) + if (!sock_queue_rcv_skb(sk, skb)) { + sock_put(sk); return; + } +drop_put: + sock_put(sk); drop: kfree_skb(skb); } diff --git a/net/can/af_can.c b/net/can/af_can.c index f70e2ba0aadc..7bc86b176b4d 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -469,7 +469,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id, rcv->can_id = can_id; rcv->mask = mask; - rcv->matches = 0; + atomic_long_set(&rcv->matches, 0); rcv->func = func; rcv->data = data; rcv->ident = ident; @@ -573,7 +573,7 @@ EXPORT_SYMBOL(can_rx_unregister); static inline void deliver(struct sk_buff *skb, struct receiver *rcv) { rcv->func(skb, rcv->data); - rcv->matches++; + atomic_long_inc(&rcv->matches); } static int can_rcv_filter(struct can_dev_rcv_lists *dev_rcv_lists, struct sk_buff *skb) diff --git a/net/can/af_can.h b/net/can/af_can.h index 22f3352c77fe..87887014f562 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -52,7 +52,7 @@ struct receiver { struct hlist_node list; canid_t can_id; canid_t mask; - unsigned long matches; + atomic_long_t matches; void (*func)(struct sk_buff *skb, void *data); void *data; char *ident; diff --git a/net/can/gw.c b/net/can/gw.c index 8ee4d67a07d3..0ec99f68aa45 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -375,10 +375,10 @@ static void cgw_csum_crc8_rel(struct canfd_frame *cf, return; if (from <= to) { - for (i = crc8->from_idx; i <= crc8->to_idx; i++) + for (i = from; i <= to; i++) crc = crc8->crctab[crc ^ cf->data[i]]; } else { - for (i = crc8->from_idx; i >= crc8->to_idx; i--) + for (i = from; i >= to; i--) crc = crc8->crctab[crc ^ cf->data[i]]; } @@ -397,7 +397,7 @@ static void cgw_csum_crc8_rel(struct canfd_frame *cf, break; } - cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val; + cf->data[res] = crc ^ crc8->final_xor_val; } static void cgw_csum_crc8_pos(struct canfd_frame *cf, diff --git a/net/can/isotp.c b/net/can/isotp.c index da3b72e7afcc..2770f43f4951 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1248,12 +1248,6 @@ static int isotp_release(struct socket *sock) so->ifindex = 0; so->bound = 0; - if (so->rx.buf != so->rx.sbuf) - kfree(so->rx.buf); - - if (so->tx.buf != so->tx.sbuf) - kfree(so->tx.buf); - sock_orphan(sk); sock->sk = NULL; @@ -1622,6 +1616,21 @@ static int isotp_notifier(struct notifier_block *nb, unsigned long msg, return NOTIFY_DONE; } +static void isotp_sock_destruct(struct sock *sk) +{ + struct isotp_sock *so = isotp_sk(sk); + + /* do the standard CAN sock destruct work */ + can_sock_destruct(sk); + + /* free potential extended PDU buffers */ + if (so->rx.buf != so->rx.sbuf) + kfree(so->rx.buf); + + if (so->tx.buf != so->tx.sbuf) + kfree(so->tx.buf); +} + static int isotp_init(struct sock *sk) { struct isotp_sock *so = isotp_sk(sk); @@ -1666,6 +1675,9 @@ static int isotp_init(struct sock *sk) list_add_tail(&so->notifier, &isotp_notifier_list); spin_unlock(&isotp_notifier_lock); + /* re-assign default can_sock_destruct() reference */ + sk->sk_destruct = isotp_sock_destruct; + return 0; } diff --git a/net/can/proc.c b/net/can/proc.c index 0938bf7dd646..de4d05ae3459 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -196,7 +196,8 @@ static void can_print_rcvlist(struct seq_file *m, struct hlist_head *rx_list, " %-5s %03x %08x %pK %pK %8ld %s\n"; seq_printf(m, fmt, DNAME(dev), r->can_id, r->mask, - r->func, r->data, r->matches, r->ident); + r->func, r->data, atomic_long_read(&r->matches), + r->ident); } } diff --git a/net/core/dev.c b/net/core/dev.c index 14a83f2035b9..fc5557062414 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3769,6 +3769,22 @@ static netdev_features_t dflt_features_check(struct sk_buff *skb, return vlan_features_check(skb, features); } +static bool skb_gso_has_extension_hdr(const struct sk_buff *skb) +{ + if (!skb->encapsulation) + return ((skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 || + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && + vlan_get_protocol(skb) == htons(ETH_P_IPV6))) && + skb_transport_header_was_set(skb) && + skb_network_header_len(skb) != sizeof(struct ipv6hdr)); + else + return (!skb_inner_network_header_was_set(skb) || + ((skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 || + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && + inner_ip_hdr(skb)->version == 6)) && + skb_inner_network_header_len(skb) != sizeof(struct ipv6hdr))); +} + static netdev_features_t gso_features_check(const struct sk_buff *skb, struct net_device *dev, netdev_features_t features) @@ -3816,11 +3832,7 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, * so neither does TSO that depends on it. */ if (features & NETIF_F_IPV6_CSUM && - (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 || - (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && - vlan_get_protocol(skb) == htons(ETH_P_IPV6))) && - skb_transport_header_was_set(skb) && - skb_network_header_len(skb) != sizeof(struct ipv6hdr)) + skb_gso_has_extension_hdr(skb)) features &= ~(NETIF_F_IPV6_CSUM | NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4); return features; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index dad4b1054955..fae8034efbff 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -629,6 +629,9 @@ int rtnl_link_register(struct rtnl_link_ops *ops) unlock: mutex_unlock(&link_ops_mutex); + if (err) + cleanup_srcu_struct(&ops->srcu); + return err; } EXPORT_SYMBOL_GPL(rtnl_link_register); @@ -707,11 +710,14 @@ static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev) goto out; ops = master_dev->rtnl_link_ops; - if (!ops || !ops->get_slave_size) + if (!ops) + goto out; + size += nla_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_SLAVE_KIND */ + if (!ops->get_slave_size) goto out; /* IFLA_INFO_SLAVE_DATA + nested data */ - size = nla_total_size(sizeof(struct nlattr)) + - ops->get_slave_size(master_dev, dev); + size += nla_total_size(sizeof(struct nlattr)) + + ops->get_slave_size(master_dev, dev); out: rcu_read_unlock(); @@ -1267,6 +1273,21 @@ static size_t rtnl_dpll_pin_size(const struct net_device *dev) return size; } +static size_t rtnl_dev_parent_size(const struct net_device *dev) +{ + size_t size = 0; + + /* IFLA_PARENT_DEV_NAME */ + if (dev->dev.parent) + size += nla_total_size(strlen(dev_name(dev->dev.parent)) + 1); + + /* IFLA_PARENT_DEV_BUS_NAME */ + if (dev->dev.parent && dev->dev.parent->bus) + size += nla_total_size(strlen(dev->dev.parent->bus->name) + 1); + + return size; +} + static noinline size_t if_nlmsg_size(const struct net_device *dev, u32 ext_filter_mask) { @@ -1328,6 +1349,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(8) /* IFLA_MAX_PACING_OFFLOAD_HORIZON */ + nla_total_size(2) /* IFLA_HEADROOM */ + nla_total_size(2) /* IFLA_TAILROOM */ + + rtnl_dev_parent_size(dev) + 0; if (!(ext_filter_mask & RTEXT_FILTER_SKIP_STATS)) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 2c922afadb8f..6dfc0bcdef65 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -235,10 +235,13 @@ static void esp_output_done(void *data, int err) xfrm_dev_resume(skb); } else { if (!err && - x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) - esp_output_tail_tcp(x, skb); - else + x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) { + err = esp_output_tail_tcp(x, skb); + if (err != -EINPROGRESS) + kfree_skb(skb); + } else { xfrm_output_resume(skb_to_full_sk(skb), skb, err); + } } } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 5dfac6ce1110..e961936b6be7 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -154,20 +154,6 @@ bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high) } EXPORT_SYMBOL(inet_sk_get_local_port_range); -static bool inet_use_bhash2_on_bind(const struct sock *sk) -{ -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) - return false; - - if (!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) - return true; - } -#endif - return sk->sk_rcv_saddr != htonl(INADDR_ANY); -} - static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2, kuid_t uid, bool relax, bool reuseport_cb_ok, bool reuseport_ok) @@ -259,7 +245,7 @@ static int inet_csk_bind_conflict(const struct sock *sk, * checks separately because their spinlocks have to be acquired/released * independently of each other, to prevent possible deadlocks */ - if (inet_use_bhash2_on_bind(sk)) + if (inet_use_hash2_on_bind(sk)) return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, reuseport_ok); @@ -376,7 +362,7 @@ other_parity_scan: head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; spin_lock_bh(&head->lock); - if (inet_use_bhash2_on_bind(sk)) { + if (inet_use_hash2_on_bind(sk)) { if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, relax, false)) goto next_port; } @@ -562,7 +548,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) check_bind_conflict = false; } - if (check_bind_conflict && inet_use_bhash2_on_bind(sk)) { + if (check_bind_conflict && inet_use_hash2_on_bind(sk)) { if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, true, true)) goto fail_unlock; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b60fad393e18..cb99a3c27053 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -287,7 +287,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, } else { hslot = udp_hashslot(udptable, net, snum); spin_lock_bh(&hslot->lock); - if (hslot->count > 10) { + if (inet_use_hash2_on_bind(sk) && hslot->count > 10) { int exist; unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0e55f139e05d..f4e23b543585 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2862,7 +2862,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) fib6_add_gc_list(rt); } else { fib6_clean_expires(rt); - fib6_remove_gc_list(rt); + fib6_may_remove_gc_list(net, rt); } spin_unlock_bh(&table->tb6_lock); @@ -4840,7 +4840,7 @@ static int modify_prefix_route(struct net *net, struct inet6_ifaddr *ifp, if (!(flags & RTF_EXPIRES)) { fib6_clean_expires(f6i); - fib6_remove_gc_list(f6i); + fib6_may_remove_gc_list(net, f6i); } else { fib6_set_expires(f6i, expires); fib6_add_gc_list(f6i); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index e75da98f5283..9f75313734f8 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -271,10 +271,13 @@ static void esp_output_done(void *data, int err) xfrm_dev_resume(skb); } else { if (!err && - x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) - esp_output_tail_tcp(x, skb); - else + x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) { + err = esp_output_tail_tcp(x, skb); + if (err != -EINPROGRESS) + kfree_skb(skb); + } else { xfrm_output_resume(skb_to_full_sk(skb), skb, err); + } } } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 9058e71241dc..dd26657b6a4a 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1133,7 +1133,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, return -EEXIST; if (!(rt->fib6_flags & RTF_EXPIRES)) { fib6_clean_expires(iter); - fib6_remove_gc_list(iter); + fib6_may_remove_gc_list(info->nl_net, iter); } else { fib6_set_expires(iter, rt->expires); fib6_add_gc_list(iter); @@ -2348,6 +2348,17 @@ static void fib6_flush_trees(struct net *net) /* * Garbage collection */ +void fib6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args, + unsigned long now) +{ + bool may_expire = rt->fib6_flags & RTF_EXPIRES && rt->expires; + int old_more = gc_args->more; + + rt6_age_exceptions(rt, gc_args, now); + + if (!may_expire && old_more == gc_args->more) + fib6_remove_gc_list(rt); +} static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) { @@ -2370,7 +2381,7 @@ static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) * Note, that clones are aged out * only if they are not in use now. */ - rt6_age_exceptions(rt, gc_args, now); + fib6_age_exceptions(rt, gc_args, now); return 0; } diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 4ad8b2032f1f..5561bd9cea81 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -157,6 +157,10 @@ static int rt_mt6_check(const struct xt_mtchk_param *par) pr_debug("unknown flags %X\n", rtinfo->invflags); return -EINVAL; } + if (rtinfo->addrnr > IP6T_RT_HOPS) { + pr_debug("too many addresses specified\n"); + return -EINVAL; + } if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) && (!(rtinfo->flags & IP6T_RT_TYP) || (rtinfo->rt_type != 0) || diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 08cd86f49bf9..cb521700cee7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1033,7 +1033,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, if (!addrconf_finite_timeout(lifetime)) { fib6_clean_expires(rt); - fib6_remove_gc_list(rt); + fib6_may_remove_gc_list(net, rt); } else { fib6_set_expires(rt, jiffies + HZ * lifetime); fib6_add_gc_list(rt); diff --git a/net/key/af_key.c b/net/key/af_key.c index 0756bac62f7c..72ac2ace419d 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3518,7 +3518,7 @@ static int set_sadb_kmaddress(struct sk_buff *skb, const struct xfrm_kmaddress * static int set_ipsecrequest(struct sk_buff *skb, uint8_t proto, uint8_t mode, int level, - uint32_t reqid, uint8_t family, + uint32_t reqid, sa_family_t family, const xfrm_address_t *src, const xfrm_address_t *dst) { struct sadb_x_ipsecrequest *rq; @@ -3583,12 +3583,17 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, /* ipsecrequests */ for (i = 0, mp = m; i < num_bundles; i++, mp++) { - /* old locator pair */ - size_pol += sizeof(struct sadb_x_ipsecrequest) + - pfkey_sockaddr_pair_size(mp->old_family); - /* new locator pair */ - size_pol += sizeof(struct sadb_x_ipsecrequest) + - pfkey_sockaddr_pair_size(mp->new_family); + int pair_size; + + pair_size = pfkey_sockaddr_pair_size(mp->old_family); + if (!pair_size) + return -EINVAL; + size_pol += sizeof(struct sadb_x_ipsecrequest) + pair_size; + + pair_size = pfkey_sockaddr_pair_size(mp->new_family); + if (!pair_size) + return -EINVAL; + size_pol += sizeof(struct sadb_x_ipsecrequest) + pair_size; } size += sizeof(struct sadb_msg) + size_pol; diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c index a7552a46d6ac..4f39bf7c843f 100644 --- a/net/netfilter/nf_conntrack_broadcast.c +++ b/net/netfilter/nf_conntrack_broadcast.c @@ -21,6 +21,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb, unsigned int timeout) { const struct nf_conntrack_helper *helper; + struct net *net = read_pnet(&ct->ct_net); struct nf_conntrack_expect *exp; struct iphdr *iph = ip_hdr(skb); struct rtable *rt = skb_rtable(skb); @@ -70,8 +71,11 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb, exp->expectfn = NULL; exp->flags = NF_CT_EXPECT_PERMANENT; exp->class = NF_CT_EXPECT_CLASS_DEFAULT; - exp->helper = NULL; - + rcu_assign_pointer(exp->helper, helper); + write_pnet(&exp->net, net); +#ifdef CONFIG_NF_CONNTRACK_ZONES + exp->zone = ct->zone; +#endif nf_ct_expect_related(exp, 0); nf_ct_expect_put(exp); diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 81baf2082604..9df159448b89 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -247,6 +247,8 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, struct nf_ct_event_notifier *notify; struct nf_conntrack_ecache *e; + lockdep_nfct_expect_lock_held(); + rcu_read_lock(); notify = rcu_dereference(net->ct.nf_conntrack_event_cb); if (!notify) diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index cfc2daa3fc7f..24d0576d84b7 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -51,6 +51,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, struct net *net = nf_ct_exp_net(exp); struct nf_conntrack_net *cnet; + lockdep_nfct_expect_lock_held(); WARN_ON(!master_help); WARN_ON(timer_pending(&exp->timeout)); @@ -112,12 +113,14 @@ nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple, const struct net *net) { return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && - net_eq(net, nf_ct_net(i->master)) && - nf_ct_zone_equal_any(i->master, zone); + net_eq(net, read_pnet(&i->net)) && + nf_ct_exp_zone_equal_any(i, zone); } bool nf_ct_remove_expect(struct nf_conntrack_expect *exp) { + lockdep_nfct_expect_lock_held(); + if (timer_delete(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); @@ -177,6 +180,8 @@ nf_ct_find_expectation(struct net *net, struct nf_conntrack_expect *i, *exp = NULL; unsigned int h; + lockdep_nfct_expect_lock_held(); + if (!cnet->expect_count) return NULL; @@ -309,12 +314,20 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me) } EXPORT_SYMBOL_GPL(nf_ct_expect_alloc); +/* This function can only be used from packet path, where accessing + * master's helper is safe, because the packet holds a reference on + * the conntrack object. Never use it from control plane. + */ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, u_int8_t family, const union nf_inet_addr *saddr, const union nf_inet_addr *daddr, u_int8_t proto, const __be16 *src, const __be16 *dst) { + struct nf_conntrack_helper *helper = NULL; + struct nf_conn *ct = exp->master; + struct net *net = read_pnet(&ct->ct_net); + struct nf_conn_help *help; int len; if (family == AF_INET) @@ -325,7 +338,16 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, exp->flags = 0; exp->class = class; exp->expectfn = NULL; - exp->helper = NULL; + + help = nfct_help(ct); + if (help) + helper = rcu_dereference(help->helper); + + rcu_assign_pointer(exp->helper, helper); + write_pnet(&exp->net, net); +#ifdef CONFIG_NF_CONNTRACK_ZONES + exp->zone = ct->zone; +#endif exp->tuple.src.l3num = family; exp->tuple.dst.protonum = proto; @@ -442,6 +464,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect, unsigned int h; int ret = 0; + lockdep_nfct_expect_lock_held(); + if (!master_help) { ret = -ESHUTDOWN; goto out; @@ -498,8 +522,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, nf_ct_expect_insert(expect); - spin_unlock_bh(&nf_conntrack_expect_lock); nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report); + spin_unlock_bh(&nf_conntrack_expect_lock); + return 0; out: spin_unlock_bh(&nf_conntrack_expect_lock); @@ -627,11 +652,15 @@ static int exp_seq_show(struct seq_file *s, void *v) { struct nf_conntrack_expect *expect; struct nf_conntrack_helper *helper; + struct net *net = seq_file_net(s); struct hlist_node *n = v; char *delim = ""; expect = hlist_entry(n, struct nf_conntrack_expect, hnode); + if (!net_eq(nf_ct_exp_net(expect), net)) + return 0; + if (expect->timeout.function) seq_printf(s, "%ld ", timer_pending(&expect->timeout) ? (long)(expect->timeout.expires - jiffies)/HZ : 0); @@ -654,7 +683,7 @@ static int exp_seq_show(struct seq_file *s, void *v) if (expect->flags & NF_CT_EXPECT_USERSPACE) seq_printf(s, "%sUSERSPACE", delim); - helper = rcu_dereference(nfct_help(expect->master)->helper); + helper = rcu_dereference(expect->helper); if (helper) { seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name); if (helper->expect_policy[expect->class].name[0]) diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index a2a0e22ccee1..3f5c50455b71 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -643,7 +643,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &ct->tuplehash[!dir].tuple.dst.u3, IPPROTO_TCP, NULL, &port); - exp->helper = &nf_conntrack_helper_h245; + rcu_assign_pointer(exp->helper, &nf_conntrack_helper_h245); nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -767,7 +767,7 @@ static int expect_callforwarding(struct sk_buff *skb, nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); - exp->helper = nf_conntrack_helper_q931; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -1234,7 +1234,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3 : NULL, &ct->tuplehash[!dir].tuple.dst.u3, IPPROTO_TCP, NULL, &port); - exp->helper = nf_conntrack_helper_q931; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */ nathook = rcu_dereference(nfct_h323_nat_hook); @@ -1306,7 +1306,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_UDP, NULL, &port); - exp->helper = nf_conntrack_helper_ras; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_ras); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect RAS "); @@ -1523,7 +1523,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; - exp->helper = nf_conntrack_helper_q931; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); @@ -1577,7 +1577,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; - exp->helper = nf_conntrack_helper_q931; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index ceb48c3ca0a4..1b330ba6613b 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -395,14 +395,10 @@ EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data) { - struct nf_conn_help *help = nfct_help(exp->master); const struct nf_conntrack_helper *me = data; const struct nf_conntrack_helper *this; - if (exp->helper == me) - return true; - - this = rcu_dereference_protected(help->helper, + this = rcu_dereference_protected(exp->helper, lockdep_is_held(&nf_conntrack_expect_lock)); return this == me; } @@ -421,6 +417,11 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) nf_ct_expect_iterate_destroy(expect_iter_me, NULL); nf_ct_iterate_destroy(unhelp, me); + + /* nf_ct_iterate_destroy() does an unconditional synchronize_rcu() as + * last step, this ensures rcu readers of exp->helper are done. + * No need for another synchronize_rcu() here. + */ } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c156574e1273..3f408f3713bb 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -910,8 +910,8 @@ struct ctnetlink_filter { }; static const struct nla_policy cta_filter_nla_policy[CTA_FILTER_MAX + 1] = { - [CTA_FILTER_ORIG_FLAGS] = { .type = NLA_U32 }, - [CTA_FILTER_REPLY_FLAGS] = { .type = NLA_U32 }, + [CTA_FILTER_ORIG_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL), + [CTA_FILTER_REPLY_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL), }; static int ctnetlink_parse_filter(const struct nlattr *attr, @@ -925,17 +925,11 @@ static int ctnetlink_parse_filter(const struct nlattr *attr, if (ret) return ret; - if (tb[CTA_FILTER_ORIG_FLAGS]) { + if (tb[CTA_FILTER_ORIG_FLAGS]) filter->orig_flags = nla_get_u32(tb[CTA_FILTER_ORIG_FLAGS]); - if (filter->orig_flags & ~CTA_FILTER_F_ALL) - return -EOPNOTSUPP; - } - if (tb[CTA_FILTER_REPLY_FLAGS]) { + if (tb[CTA_FILTER_REPLY_FLAGS]) filter->reply_flags = nla_get_u32(tb[CTA_FILTER_REPLY_FLAGS]); - if (filter->reply_flags & ~CTA_FILTER_F_ALL) - return -EOPNOTSUPP; - } return 0; } @@ -2634,7 +2628,7 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN - 1 }, [CTA_EXPECT_ZONE] = { .type = NLA_U16 }, - [CTA_EXPECT_FLAGS] = { .type = NLA_U32 }, + [CTA_EXPECT_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NF_CT_EXPECT_MASK), [CTA_EXPECT_CLASS] = { .type = NLA_U32 }, [CTA_EXPECT_NAT] = { .type = NLA_NESTED }, [CTA_EXPECT_FN] = { .type = NLA_NUL_STRING }, @@ -3012,7 +3006,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, { struct nf_conn *master = exp->master; long timeout = ((long)exp->timeout.expires - (long)jiffies) / HZ; - struct nf_conn_help *help; + struct nf_conntrack_helper *helper; #if IS_ENABLED(CONFIG_NF_NAT) struct nlattr *nest_parms; struct nf_conntrack_tuple nat_tuple = {}; @@ -3057,15 +3051,12 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) || nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class))) goto nla_put_failure; - help = nfct_help(master); - if (help) { - struct nf_conntrack_helper *helper; - helper = rcu_dereference(help->helper); - if (helper && - nla_put_string(skb, CTA_EXPECT_HELP_NAME, helper->name)) - goto nla_put_failure; - } + helper = rcu_dereference(exp->helper); + if (helper && + nla_put_string(skb, CTA_EXPECT_HELP_NAME, helper->name)) + goto nla_put_failure; + expfn = nf_ct_helper_expectfn_find_by_symbol(exp->expectfn); if (expfn != NULL && nla_put_string(skb, CTA_EXPECT_FN, expfn->name)) @@ -3358,31 +3349,37 @@ static int ctnetlink_get_expect(struct sk_buff *skb, if (err < 0) return err; + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + spin_lock_bh(&nf_conntrack_expect_lock); exp = nf_ct_expect_find_get(info->net, &zone, &tuple); - if (!exp) + if (!exp) { + spin_unlock_bh(&nf_conntrack_expect_lock); + kfree_skb(skb2); return -ENOENT; + } if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); + spin_unlock_bh(&nf_conntrack_expect_lock); + kfree_skb(skb2); return -ENOENT; } } - skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!skb2) { - nf_ct_expect_put(exp); - return -ENOMEM; - } - rcu_read_lock(); err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp); rcu_read_unlock(); nf_ct_expect_put(exp); + spin_unlock_bh(&nf_conntrack_expect_lock); + if (err <= 0) { kfree_skb(skb2); return -ENOMEM; @@ -3394,12 +3391,9 @@ static int ctnetlink_get_expect(struct sk_buff *skb, static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data) { struct nf_conntrack_helper *helper; - const struct nf_conn_help *m_help; const char *name = data; - m_help = nfct_help(exp->master); - - helper = rcu_dereference(m_help->helper); + helper = rcu_dereference(exp->helper); if (!helper) return false; @@ -3432,22 +3426,26 @@ static int ctnetlink_del_expect(struct sk_buff *skb, if (err < 0) return err; + spin_lock_bh(&nf_conntrack_expect_lock); + /* bump usage count to 2 */ exp = nf_ct_expect_find_get(info->net, &zone, &tuple); - if (!exp) + if (!exp) { + spin_unlock_bh(&nf_conntrack_expect_lock); return -ENOENT; + } if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); + spin_unlock_bh(&nf_conntrack_expect_lock); return -ENOENT; } } /* after list removal, usage count == 1 */ - spin_lock_bh(&nf_conntrack_expect_lock); if (timer_delete(&exp->timeout)) { nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid, nlmsg_report(info->nlh)); @@ -3534,9 +3532,10 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *mask) { - u_int32_t class = 0; + struct net *net = read_pnet(&ct->ct_net); struct nf_conntrack_expect *exp; struct nf_conn_help *help; + u32 class = 0; int err; help = nfct_help(ct); @@ -3573,7 +3572,13 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, exp->class = class; exp->master = ct; - exp->helper = helper; + write_pnet(&exp->net, net); +#ifdef CONFIG_NF_CONNTRACK_ZONES + exp->zone = ct->zone; +#endif + if (!helper) + helper = rcu_dereference(help->helper); + rcu_assign_pointer(exp->helper, helper); exp->tuple = *tuple; exp->mask.src.u3 = mask->src.u3; exp->mask.src.u.all = mask->src.u.all; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 0c1d086e96cb..b67426c2189b 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1385,9 +1385,9 @@ nla_put_failure: } static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = { - [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 }, - [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 }, - [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 }, + [CTA_PROTOINFO_TCP_STATE] = NLA_POLICY_MAX(NLA_U8, TCP_CONNTRACK_SYN_SENT2), + [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE), + [CTA_PROTOINFO_TCP_WSCALE_REPLY] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE), [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) }, [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) }, }; @@ -1414,10 +1414,6 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) if (err < 0) return err; - if (tb[CTA_PROTOINFO_TCP_STATE] && - nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX) - return -EINVAL; - spin_lock_bh(&ct->lock); if (tb[CTA_PROTOINFO_TCP_STATE]) ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]); diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 4ab5ef71d96d..939502ff7c87 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -924,7 +924,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple); if (!exp || exp->master == ct || - nfct_help(exp->master)->helper != nfct_help(ct)->helper || + exp->helper != nfct_help(ct)->helper || exp->class != class) break; #if IS_ENABLED(CONFIG_NF_NAT) @@ -1040,6 +1040,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, unsigned int port; const struct sdp_media_type *t; int ret = NF_ACCEPT; + bool have_rtp_addr = false; hooks = rcu_dereference(nf_nat_sip_hooks); @@ -1056,8 +1057,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, caddr_len = 0; if (ct_sip_parse_sdp_addr(ct, *dptr, sdpoff, *datalen, SDP_HDR_CONNECTION, SDP_HDR_MEDIA, - &matchoff, &matchlen, &caddr) > 0) + &matchoff, &matchlen, &caddr) > 0) { caddr_len = matchlen; + memcpy(&rtp_addr, &caddr, sizeof(rtp_addr)); + have_rtp_addr = true; + } mediaoff = sdpoff; for (i = 0; i < ARRAY_SIZE(sdp_media_types); ) { @@ -1091,9 +1095,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, &matchoff, &matchlen, &maddr) > 0) { maddr_len = matchlen; memcpy(&rtp_addr, &maddr, sizeof(rtp_addr)); - } else if (caddr_len) + have_rtp_addr = true; + } else if (caddr_len) { memcpy(&rtp_addr, &caddr, sizeof(rtp_addr)); - else { + have_rtp_addr = true; + } else { nf_ct_helper_log(skb, ct, "cannot parse SDP message"); return NF_DROP; } @@ -1125,7 +1131,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, /* Update session connection and owner addresses */ hooks = rcu_dereference(nf_nat_sip_hooks); - if (hooks && ct->status & IPS_NAT_MASK) + if (hooks && ct->status & IPS_NAT_MASK && have_rtp_addr) ret = hooks->sdp_session(skb, protoff, dataoff, dptr, datalen, sdpoff, &rtp_addr); @@ -1297,7 +1303,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct), saddr, &daddr, proto, NULL, &port); exp->timeout.expires = sip_timeout * HZ; - exp->helper = helper; + rcu_assign_pointer(exp->helper, helper); exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE; hooks = rcu_dereference(nf_nat_sip_hooks); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b35a90955e2e..fcbe54940b2e 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -647,15 +647,11 @@ __build_packet_message(struct nfnl_log_net *log, if (data_len) { struct nlattr *nla; - int size = nla_attr_size(data_len); - if (skb_tailroom(inst->skb) < nla_total_size(data_len)) + nla = nla_reserve(inst->skb, NFULA_PAYLOAD, data_len); + if (!nla) goto nla_put_failure; - nla = skb_put(inst->skb, nla_total_size(data_len)); - nla->nla_type = NFULA_PAYLOAD; - nla->nla_len = size; - if (skb_copy_bits(skb, 0, nla_data(nla), data_len)) BUG(); } diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 7ff90325c97f..6395982e4d95 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -242,7 +242,7 @@ static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -319,7 +319,7 @@ static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -414,7 +414,7 @@ static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -505,7 +505,7 @@ static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -641,7 +641,7 @@ static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -699,7 +699,7 @@ static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -764,7 +764,7 @@ static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -839,7 +839,7 @@ static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -925,7 +925,7 @@ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -1019,7 +1019,7 @@ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index fe8bd497d74a..737c339decd0 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -572,14 +572,12 @@ static struct nft_array *nft_array_alloc(u32 max_intervals) return array; } -#define NFT_ARRAY_EXTRA_SIZE 10240 - /* Similar to nft_rbtree_{u,k}size to hide details to userspace, but consider * packed representation coming from userspace for anonymous sets too. */ static u32 nft_array_elems(const struct nft_set *set) { - u32 nelems = atomic_read(&set->nelems); + u32 nelems = atomic_read(&set->nelems) - set->ndeact; /* Adjacent intervals are represented with a single start element in * anonymous sets, use the current element counter as is. @@ -595,27 +593,87 @@ static u32 nft_array_elems(const struct nft_set *set) return (nelems / 2) + 2; } -static int nft_array_may_resize(const struct nft_set *set) +#define NFT_ARRAY_INITIAL_SIZE 1024 +#define NFT_ARRAY_INITIAL_ANON_SIZE 16 +#define NFT_ARRAY_INITIAL_ANON_THRESH (8192U / sizeof(struct nft_array_interval)) + +static int nft_array_may_resize(const struct nft_set *set, bool flush) { - u32 nelems = nft_array_elems(set), new_max_intervals; + u32 initial_intervals, max_intervals, new_max_intervals, delta; + u32 shrinked_max_intervals, nelems = nft_array_elems(set); struct nft_rbtree *priv = nft_set_priv(set); struct nft_array *array; - if (!priv->array_next) { - array = nft_array_alloc(nelems + NFT_ARRAY_EXTRA_SIZE); - if (!array) - return -ENOMEM; + if (nft_set_is_anonymous(set)) + initial_intervals = NFT_ARRAY_INITIAL_ANON_SIZE; + else + initial_intervals = NFT_ARRAY_INITIAL_SIZE; + + if (priv->array_next) { + max_intervals = priv->array_next->max_intervals; + new_max_intervals = priv->array_next->max_intervals; + } else { + if (priv->array) { + max_intervals = priv->array->max_intervals; + new_max_intervals = priv->array->max_intervals; + } else { + max_intervals = 0; + new_max_intervals = initial_intervals; + } + } - priv->array_next = array; + if (nft_set_is_anonymous(set)) + goto maybe_grow; + + if (flush) { + /* Set flush just started, nelems still report elements.*/ + nelems = 0; + new_max_intervals = NFT_ARRAY_INITIAL_SIZE; + goto realloc_array; } - if (nelems < priv->array_next->max_intervals) - return 0; + if (check_add_overflow(new_max_intervals, new_max_intervals, + &shrinked_max_intervals)) + return -EOVERFLOW; + + shrinked_max_intervals = DIV_ROUND_UP(shrinked_max_intervals, 3); - new_max_intervals = priv->array_next->max_intervals + NFT_ARRAY_EXTRA_SIZE; - if (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0) + if (shrinked_max_intervals > NFT_ARRAY_INITIAL_SIZE && + nelems < shrinked_max_intervals) { + new_max_intervals = shrinked_max_intervals; + goto realloc_array; + } +maybe_grow: + if (nelems > new_max_intervals) { + if (nft_set_is_anonymous(set) && + new_max_intervals < NFT_ARRAY_INITIAL_ANON_THRESH) { + new_max_intervals <<= 1; + } else { + delta = new_max_intervals >> 1; + if (check_add_overflow(new_max_intervals, delta, + &new_max_intervals)) + return -EOVERFLOW; + } + } + +realloc_array: + if (WARN_ON_ONCE(nelems > new_max_intervals)) return -ENOMEM; + if (priv->array_next) { + if (max_intervals == new_max_intervals) + return 0; + + if (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0) + return -ENOMEM; + } else { + array = nft_array_alloc(new_max_intervals); + if (!array) + return -ENOMEM; + + priv->array_next = array; + } + return 0; } @@ -630,7 +688,7 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, nft_rbtree_maybe_reset_start_cookie(priv, tstamp); - if (nft_array_may_resize(set) < 0) + if (nft_array_may_resize(set, false) < 0) return -ENOMEM; do { @@ -741,7 +799,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set, nft_rbtree_interval_null(set, this)) priv->start_rbe_cookie = 0; - if (nft_array_may_resize(set) < 0) + if (nft_array_may_resize(set, false) < 0) return NULL; while (parent != NULL) { @@ -811,7 +869,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, switch (iter->type) { case NFT_ITER_UPDATE_CLONE: - if (nft_array_may_resize(set) < 0) { + if (nft_array_may_resize(set, true) < 0) { iter->err = -ENOMEM; break; } diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 43d871525dbc..5f46c4b5720f 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -579,8 +579,7 @@ static int nci_close_device(struct nci_dev *ndev) skb_queue_purge(&ndev->rx_q); skb_queue_purge(&ndev->tx_q); - /* Flush RX and TX wq */ - flush_workqueue(ndev->rx_wq); + /* Flush TX wq, RX wq flush can't be under the lock */ flush_workqueue(ndev->tx_wq); /* Reset device */ @@ -592,13 +591,13 @@ static int nci_close_device(struct nci_dev *ndev) msecs_to_jiffies(NCI_RESET_TIMEOUT)); /* After this point our queues are empty - * and no works are scheduled. + * rx work may be running but will see that NCI_UP was cleared */ ndev->ops->close(ndev); clear_bit(NCI_INIT, &ndev->flags); - /* Flush cmd wq */ + /* Flush cmd and tx wq */ flush_workqueue(ndev->cmd_wq); timer_delete_sync(&ndev->cmd_timer); @@ -613,6 +612,9 @@ static int nci_close_device(struct nci_dev *ndev) mutex_unlock(&ndev->req_lock); + /* rx_work may take req_lock via nci_deactivate_target */ + flush_workqueue(ndev->rx_wq); + return 0; } diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 67fbf6e48a30..13052408a132 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2953,6 +2953,8 @@ static int validate_set(const struct nlattr *a, case OVS_KEY_ATTR_MPLS: if (!eth_p_mpls(eth_type)) return -EINVAL; + if (key_len != sizeof(struct ovs_key_mpls)) + return -EINVAL; break; case OVS_KEY_ATTR_SCTP: diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 6574f9bcdc02..12055af832dc 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -151,11 +151,15 @@ static void vport_netdev_free(struct rcu_head *rcu) void ovs_netdev_detach_dev(struct vport *vport) { ASSERT_RTNL(); - vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; netdev_rx_handler_unregister(vport->dev); netdev_upper_dev_unlink(vport->dev, netdev_master_upper_dev_get(vport->dev)); dev_set_promiscuity(vport->dev, -1); + + /* paired with smp_mb() in netdev_destroy() */ + smp_wmb(); + + vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; } static void netdev_destroy(struct vport *vport) @@ -174,6 +178,9 @@ static void netdev_destroy(struct vport *vport) rtnl_unlock(); } + /* paired with smp_wmb() in ovs_netdev_detach_dev() */ + smp_mb(); + call_rcu(&vport->rcu, vport_netdev_free); } @@ -189,8 +196,6 @@ void ovs_netdev_tunnel_destroy(struct vport *vport) */ if (vport->dev->reg_state == NETREG_REGISTERED) rtnl_delete_link(vport->dev, 0, NULL); - netdev_put(vport->dev, &vport->dev_tracker); - vport->dev = NULL; rtnl_unlock(); call_rcu(&vport->rcu, vport_netdev_free); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 72d0935139f0..bb2d88205e5a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3135,6 +3135,7 @@ static int packet_release(struct socket *sock) spin_lock(&po->bind_lock); unregister_prot_hook(sk, false); + WRITE_ONCE(po->num, 0); packet_cached_dev_reset(po); if (po->prot_hook.dev) { diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index d833e36f7fd4..c1d9b923938d 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -135,9 +135,16 @@ out: sock_put(sk); } +static bool smc_rx_pipe_buf_get(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + /* smc_spd_priv in buf->private is not shareable; disallow cloning. */ + return false; +} + static const struct pipe_buf_operations smc_pipe_ops = { .release = smc_rx_pipe_buf_release, - .get = generic_pipe_buf_get + .get = smc_rx_pipe_buf_get, }; static void smc_rx_spd_release(struct splice_pipe_desc *spd, diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 5fe07f110fe8..dd9dda759bbb 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -246,6 +246,7 @@ static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); atomic_inc(&ctx->decrypt_pending); + __skb_queue_purge(&ctx->async_hold); return ctx->async_wait.err; } @@ -2225,7 +2226,6 @@ recv_end: /* Wait for all previously submitted records to be decrypted */ ret = tls_decrypt_async_wait(ctx); - __skb_queue_purge(&ctx->async_hold); if (ret) { if (err >= 0 || err == -EINPROGRESS) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 4ed346e682c7..dc1312ed5a09 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -75,7 +75,10 @@ int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo) spin_lock_bh(&xfrm_input_afinfo_lock); if (likely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family])) { - if (unlikely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family] != afinfo)) + const struct xfrm_input_afinfo *cur; + + cur = rcu_access_pointer(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family]); + if (unlikely(cur != afinfo)) err = -EINVAL; else RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family], NULL); diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c index 050a82101ca5..97bc979e55ba 100644 --- a/net/xfrm/xfrm_iptfs.c +++ b/net/xfrm/xfrm_iptfs.c @@ -901,6 +901,12 @@ static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq, iptfs_skb_can_add_frags(newskb, fragwalk, data, copylen)) { iptfs_skb_add_frags(newskb, fragwalk, data, copylen); } else { + if (skb_linearize(newskb)) { + XFRM_INC_STATS(xs_net(xtfs->x), + LINUX_MIB_XFRMINBUFFERERROR); + goto abandon; + } + /* copy fragment data into newskb */ if (skb_copy_seq_read(st, data, skb_put(newskb, copylen), copylen)) { @@ -991,6 +997,11 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, iplen = be16_to_cpu(iph->tot_len); iphlen = iph->ihl << 2; + if (iplen < iphlen || iphlen < sizeof(*iph)) { + XFRM_INC_STATS(net, + LINUX_MIB_XFRMINHDRERROR); + goto done; + } protocol = cpu_to_be16(ETH_P_IP); XFRM_MODE_SKB_CB(skbseq->root_skb)->tos = iph->tos; } else if (iph->version == 0x6) { @@ -2653,9 +2664,6 @@ static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig) if (!xtfs) return -ENOMEM; - x->mode_data = xtfs; - xtfs->x = x; - xtfs->ra_newskb = NULL; if (xtfs->cfg.reorder_win_size) { xtfs->w_saved = kzalloc_objs(*xtfs->w_saved, @@ -2666,6 +2674,9 @@ static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig) } } + x->mode_data = xtfs; + xtfs->x = x; + return 0; } diff --git a/net/xfrm/xfrm_nat_keepalive.c b/net/xfrm/xfrm_nat_keepalive.c index ebf95d48e86c..1856beee0149 100644 --- a/net/xfrm/xfrm_nat_keepalive.c +++ b/net/xfrm/xfrm_nat_keepalive.c @@ -261,7 +261,7 @@ int __net_init xfrm_nat_keepalive_net_init(struct net *net) int xfrm_nat_keepalive_net_fini(struct net *net) { - cancel_delayed_work_sync(&net->xfrm.nat_keepalive_work); + disable_delayed_work_sync(&net->xfrm.nat_keepalive_work); return 0; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9e1a522ab1c5..362939aa56cf 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -4156,7 +4156,7 @@ void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo) int i; for (i = 0; i < ARRAY_SIZE(xfrm_policy_afinfo); i++) { - if (xfrm_policy_afinfo[i] != afinfo) + if (rcu_access_pointer(xfrm_policy_afinfo[i]) != afinfo) continue; RCU_INIT_POINTER(xfrm_policy_afinfo[i], NULL); break; @@ -4242,7 +4242,7 @@ static int __net_init xfrm_policy_init(struct net *net) net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0; htab = &net->xfrm.policy_bydst[dir]; - htab->table = xfrm_hash_alloc(sz); + rcu_assign_pointer(htab->table, xfrm_hash_alloc(sz)); if (!htab->table) goto out_bydst; htab->hmask = hmask; @@ -4269,7 +4269,7 @@ out_bydst: struct xfrm_policy_hash *htab; htab = &net->xfrm.policy_bydst[dir]; - xfrm_hash_free(htab->table, sz); + xfrm_hash_free(rcu_dereference_protected(htab->table, true), sz); } xfrm_hash_free(net->xfrm.policy_byidx, sz); out_byidx: @@ -4282,6 +4282,8 @@ static void xfrm_policy_fini(struct net *net) unsigned int sz; int dir; + disable_work_sync(&net->xfrm.policy_hthresh.work); + flush_work(&net->xfrm.policy_hash_work); #ifdef CONFIG_XFRM_SUB_POLICY xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false); @@ -4295,8 +4297,8 @@ static void xfrm_policy_fini(struct net *net) htab = &net->xfrm.policy_bydst[dir]; sz = (htab->hmask + 1) * sizeof(struct hlist_head); - WARN_ON(!hlist_empty(htab->table)); - xfrm_hash_free(htab->table, sz); + WARN_ON(!hlist_empty(rcu_dereference_protected(htab->table, true))); + xfrm_hash_free(rcu_dereference_protected(htab->table, true), sz); } sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 98b362d51836..1748d374abca 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -53,7 +53,7 @@ static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task); static HLIST_HEAD(xfrm_state_gc_list); static HLIST_HEAD(xfrm_state_dev_gc_list); -static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x) +static inline bool xfrm_state_hold_rcu(struct xfrm_state *x) { return refcount_inc_not_zero(&x->refcnt); } @@ -870,7 +870,7 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) for (i = 0; i <= net->xfrm.state_hmask; i++) { struct xfrm_state *x; - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto) && (err = security_xfrm_state_delete(x)) != 0) { xfrm_audit_state_delete(x, 0, task_valid); @@ -891,7 +891,7 @@ xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool struct xfrm_state *x; struct xfrm_dev_offload *xso; - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) { xso = &x->xso; if (xso->dev == dev && @@ -931,7 +931,7 @@ int xfrm_state_flush(struct net *net, u8 proto, bool task_valid) for (i = 0; i <= net->xfrm.state_hmask; i++) { struct xfrm_state *x; restart: - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) { if (!xfrm_state_kern(x) && xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); @@ -973,7 +973,7 @@ int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_vali err = -ESRCH; for (i = 0; i <= net->xfrm.state_hmask; i++) { restart: - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) { xso = &x->xso; if (!xfrm_state_kern(x) && xso->dev == dev) { @@ -1563,23 +1563,23 @@ found: list_add(&x->km.all, &net->xfrm.state_all); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); XFRM_STATE_INSERT(bydst, &x->bydst, - net->xfrm.state_bydst + h, + xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, x->xso.type); h = xfrm_src_hash(net, daddr, saddr, encap_family); XFRM_STATE_INSERT(bysrc, &x->bysrc, - net->xfrm.state_bysrc + h, + xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, x->xso.type); INIT_HLIST_NODE(&x->state_cache); if (x->id.spi) { h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); XFRM_STATE_INSERT(byspi, &x->byspi, - net->xfrm.state_byspi + h, + xfrm_state_deref_prot(net->xfrm.state_byspi, net) + h, x->xso.type); } if (x->km.seq) { h = xfrm_seq_hash(net, x->km.seq); XFRM_STATE_INSERT(byseq, &x->byseq, - net->xfrm.state_byseq + h, + xfrm_state_deref_prot(net->xfrm.state_byseq, net) + h, x->xso.type); } x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; @@ -1652,7 +1652,7 @@ xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id, spin_lock_bh(&net->xfrm.xfrm_state_lock); h = xfrm_dst_hash(net, daddr, saddr, reqid, family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) { if (x->props.family == family && x->props.reqid == reqid && (mark & x->mark.m) == x->mark.v && @@ -1703,18 +1703,12 @@ static struct xfrm_state *xfrm_state_lookup_spi_proto(struct net *net, __be32 sp struct xfrm_state *x; unsigned int i; - rcu_read_lock(); for (i = 0; i <= net->xfrm.state_hmask; i++) { - hlist_for_each_entry_rcu(x, &net->xfrm.state_byspi[i], byspi) { - if (x->id.spi == spi && x->id.proto == proto) { - if (!xfrm_state_hold_rcu(x)) - continue; - rcu_read_unlock(); + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_byspi, net) + i, byspi) { + if (x->id.spi == spi && x->id.proto == proto) return x; - } } } - rcu_read_unlock(); return NULL; } @@ -1730,25 +1724,29 @@ static void __xfrm_state_insert(struct xfrm_state *x) h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); - XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h, + XFRM_STATE_INSERT(bydst, &x->bydst, + xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, x->xso.type); h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family); - XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h, + XFRM_STATE_INSERT(bysrc, &x->bysrc, + xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, x->xso.type); if (x->id.spi) { h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); - XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h, + XFRM_STATE_INSERT(byspi, &x->byspi, + xfrm_state_deref_prot(net->xfrm.state_byspi, net) + h, x->xso.type); } if (x->km.seq) { h = xfrm_seq_hash(net, x->km.seq); - XFRM_STATE_INSERT(byseq, &x->byseq, net->xfrm.state_byseq + h, + XFRM_STATE_INSERT(byseq, &x->byseq, + xfrm_state_deref_prot(net->xfrm.state_byseq, net) + h, x->xso.type); } @@ -1775,7 +1773,7 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) u32 cpu_id = xnew->pcpu_num; h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) { if (x->props.family == family && x->props.reqid == reqid && x->if_id == if_id && @@ -1811,7 +1809,7 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_state *x; u32 mark = m->v & m->m; - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) { if (x->props.reqid != reqid || x->props.mode != mode || x->props.family != family || @@ -1868,10 +1866,12 @@ static struct xfrm_state *__find_acq_core(struct net *net, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL_SOFT); list_add(&x->km.all, &net->xfrm.state_all); - XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h, + XFRM_STATE_INSERT(bydst, &x->bydst, + xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, x->xso.type); h = xfrm_src_hash(net, daddr, saddr, family); - XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h, + XFRM_STATE_INSERT(bysrc, &x->bysrc, + xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, x->xso.type); net->xfrm.state_num++; @@ -2091,7 +2091,7 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n if (m->reqid) { h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr, m->reqid, m->old_family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -2110,7 +2110,7 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n } else { h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr, m->old_family); - hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, bysrc) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -2264,6 +2264,7 @@ out: err = 0; x->km.state = XFRM_STATE_DEAD; + xfrm_dev_state_delete(x); __xfrm_state_put(x); } @@ -2312,7 +2313,7 @@ void xfrm_state_update_stats(struct net *net) spin_lock_bh(&net->xfrm.xfrm_state_lock); for (i = 0; i <= net->xfrm.state_hmask; i++) { - hlist_for_each_entry(x, net->xfrm.state_bydst + i, bydst) + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) xfrm_dev_state_update_stats(x); } spin_unlock_bh(&net->xfrm.xfrm_state_lock); @@ -2503,7 +2504,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s unsigned int h = xfrm_seq_hash(net, seq); struct xfrm_state *x; - hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_byseq, net) + h, byseq) { if (x->km.seq == seq && (mark & x->mark.m) == x->mark.v && x->pcpu_num == pcpu_num && @@ -2602,12 +2603,13 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high, if (!x0) { x->id.spi = newspi; h = xfrm_spi_hash(net, &x->id.daddr, newspi, x->id.proto, x->props.family); - XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h, x->xso.type); + XFRM_STATE_INSERT(byspi, &x->byspi, + xfrm_state_deref_prot(net->xfrm.state_byspi, net) + h, + x->xso.type); spin_unlock_bh(&net->xfrm.xfrm_state_lock); err = 0; goto unlock; } - xfrm_state_put(x0); spin_unlock_bh(&net->xfrm.xfrm_state_lock); next: @@ -3258,6 +3260,7 @@ EXPORT_SYMBOL(xfrm_init_state); int __net_init xfrm_state_init(struct net *net) { + struct hlist_head *ndst, *nsrc, *nspi, *nseq; unsigned int sz; if (net_eq(net, &init_net)) @@ -3268,18 +3271,25 @@ int __net_init xfrm_state_init(struct net *net) sz = sizeof(struct hlist_head) * 8; - net->xfrm.state_bydst = xfrm_hash_alloc(sz); - if (!net->xfrm.state_bydst) + ndst = xfrm_hash_alloc(sz); + if (!ndst) goto out_bydst; - net->xfrm.state_bysrc = xfrm_hash_alloc(sz); - if (!net->xfrm.state_bysrc) + rcu_assign_pointer(net->xfrm.state_bydst, ndst); + + nsrc = xfrm_hash_alloc(sz); + if (!nsrc) goto out_bysrc; - net->xfrm.state_byspi = xfrm_hash_alloc(sz); - if (!net->xfrm.state_byspi) + rcu_assign_pointer(net->xfrm.state_bysrc, nsrc); + + nspi = xfrm_hash_alloc(sz); + if (!nspi) goto out_byspi; - net->xfrm.state_byseq = xfrm_hash_alloc(sz); - if (!net->xfrm.state_byseq) + rcu_assign_pointer(net->xfrm.state_byspi, nspi); + + nseq = xfrm_hash_alloc(sz); + if (!nseq) goto out_byseq; + rcu_assign_pointer(net->xfrm.state_byseq, nseq); net->xfrm.state_cache_input = alloc_percpu(struct hlist_head); if (!net->xfrm.state_cache_input) @@ -3295,17 +3305,19 @@ int __net_init xfrm_state_init(struct net *net) return 0; out_state_cache_input: - xfrm_hash_free(net->xfrm.state_byseq, sz); + xfrm_hash_free(nseq, sz); out_byseq: - xfrm_hash_free(net->xfrm.state_byspi, sz); + xfrm_hash_free(nspi, sz); out_byspi: - xfrm_hash_free(net->xfrm.state_bysrc, sz); + xfrm_hash_free(nsrc, sz); out_bysrc: - xfrm_hash_free(net->xfrm.state_bydst, sz); + xfrm_hash_free(ndst, sz); out_bydst: return -ENOMEM; } +#define xfrm_state_deref_netexit(table) \ + rcu_dereference_protected((table), true /* netns is going away */) void xfrm_state_fini(struct net *net) { unsigned int sz; @@ -3318,17 +3330,17 @@ void xfrm_state_fini(struct net *net) WARN_ON(!list_empty(&net->xfrm.state_all)); for (i = 0; i <= net->xfrm.state_hmask; i++) { - WARN_ON(!hlist_empty(net->xfrm.state_byseq + i)); - WARN_ON(!hlist_empty(net->xfrm.state_byspi + i)); - WARN_ON(!hlist_empty(net->xfrm.state_bysrc + i)); - WARN_ON(!hlist_empty(net->xfrm.state_bydst + i)); + WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_byseq) + i)); + WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_byspi) + i)); + WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_bysrc) + i)); + WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_bydst) + i)); } sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head); - xfrm_hash_free(net->xfrm.state_byseq, sz); - xfrm_hash_free(net->xfrm.state_byspi, sz); - xfrm_hash_free(net->xfrm.state_bysrc, sz); - xfrm_hash_free(net->xfrm.state_bydst, sz); + xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_byseq), sz); + xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_byspi), sz); + xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_bysrc), sz); + xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_bydst), sz); free_percpu(net->xfrm.state_cache_input); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 403b5ecac2c5..1656b487f833 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -35,6 +35,15 @@ #endif #include <linux/unaligned.h> +static struct sock *xfrm_net_nlsk(const struct net *net, const struct sk_buff *skb) +{ + /* get the source of this request, see netlink_unicast_kernel */ + const struct sock *sk = NETLINK_CB(skb).sk; + + /* sk is refcounted, the netns stays alive and nlsk with it */ + return rcu_dereference_protected(net->xfrm.nlsk, sk->sk_net_refcnt); +} + static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type, struct netlink_ext_ack *extack) { @@ -1727,7 +1736,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, err = build_spdinfo(r_skb, net, sportid, seq, *flags); BUG_ON(err < 0); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); + return nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, sportid); } static inline unsigned int xfrm_sadinfo_msgsize(void) @@ -1787,7 +1796,7 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, err = build_sadinfo(r_skb, net, sportid, seq, *flags); BUG_ON(err < 0); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); + return nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, sportid); } static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1807,7 +1816,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); + err = nlmsg_unicast(xfrm_net_nlsk(net, skb), resp_skb, NETLINK_CB(skb).portid); } xfrm_state_put(x); out_noput: @@ -1850,6 +1859,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]); if (pcpu_num >= num_possible_cpus()) { err = -EINVAL; + NL_SET_ERR_MSG(extack, "pCPU number too big"); goto out_noput; } } @@ -1897,7 +1907,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, } } - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); + err = nlmsg_unicast(xfrm_net_nlsk(net, skb), resp_skb, NETLINK_CB(skb).portid); out: xfrm_state_put(x); @@ -2542,7 +2552,7 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh, r_up->out = net->xfrm.policy_default[XFRM_POLICY_OUT]; nlmsg_end(r_skb, r_nlh); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid); + return nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, portid); } static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2608,7 +2618,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, + err = nlmsg_unicast(xfrm_net_nlsk(net, skb), resp_skb, NETLINK_CB(skb).portid); } } else { @@ -2781,7 +2791,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, err = build_aevent(r_skb, x, &c); BUG_ON(err < 0); - err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid); + err = nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, NETLINK_CB(skb).portid); spin_unlock_bh(&x->lock); xfrm_state_put(x); return err; @@ -3001,8 +3011,10 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, if (attrs[XFRMA_SA_PCPU]) { x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]); err = -EINVAL; - if (x->pcpu_num >= num_possible_cpus()) + if (x->pcpu_num >= num_possible_cpus()) { + NL_SET_ERR_MSG(extack, "pCPU number too big"); goto free_state; + } } err = verify_newpolicy_info(&ua->policy, extack); @@ -3483,7 +3495,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, goto err; } - err = netlink_dump_start(net->xfrm.nlsk, skb, nlh, &c); + err = netlink_dump_start(xfrm_net_nlsk(net, skb), skb, nlh, &c); goto err; } @@ -3673,7 +3685,7 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) } if (x->if_id) l += nla_total_size(sizeof(x->if_id)); - if (x->pcpu_num) + if (x->pcpu_num != UINT_MAX) l += nla_total_size(sizeof(x->pcpu_num)); /* Must count x->lastused as it may become non-zero behind our back. */ diff --git a/rust/kernel/regulator.rs b/rust/kernel/regulator.rs index 4f7837c7e53a..41e730cedc81 100644 --- a/rust/kernel/regulator.rs +++ b/rust/kernel/regulator.rs @@ -23,7 +23,10 @@ use crate::{ prelude::*, }; -use core::{marker::PhantomData, mem::ManuallyDrop, ptr::NonNull}; +use core::{ + marker::PhantomData, + mem::ManuallyDrop, // +}; mod private { pub trait Sealed {} @@ -229,15 +232,17 @@ pub fn devm_enable_optional(dev: &Device<Bound>, name: &CStr) -> Result { /// /// # Invariants /// -/// - `inner` is a non-null wrapper over a pointer to a `struct -/// regulator` obtained from [`regulator_get()`]. +/// - `inner` is a pointer obtained from a successful call to +/// [`regulator_get()`]. It is treated as an opaque token that may only be +/// accessed using C API methods (e.g., it may be `NULL` if the C API returns +/// `NULL`). /// /// [`regulator_get()`]: https://docs.kernel.org/driver-api/regulator.html#c.regulator_get pub struct Regulator<State> where State: RegulatorState, { - inner: NonNull<bindings::regulator>, + inner: *mut bindings::regulator, _phantom: PhantomData<State>, } @@ -249,7 +254,7 @@ impl<T: RegulatorState> Regulator<T> { // SAFETY: Safe as per the type invariants of `Regulator`. to_result(unsafe { bindings::regulator_set_voltage( - self.inner.as_ptr(), + self.inner, min_voltage.as_microvolts(), max_voltage.as_microvolts(), ) @@ -259,7 +264,7 @@ impl<T: RegulatorState> Regulator<T> { /// Gets the current voltage of the regulator. pub fn get_voltage(&self) -> Result<Voltage> { // SAFETY: Safe as per the type invariants of `Regulator`. - let voltage = unsafe { bindings::regulator_get_voltage(self.inner.as_ptr()) }; + let voltage = unsafe { bindings::regulator_get_voltage(self.inner) }; to_result(voltage).map(|()| Voltage::from_microvolts(voltage)) } @@ -270,10 +275,8 @@ impl<T: RegulatorState> Regulator<T> { // received from the C code. from_err_ptr(unsafe { bindings::regulator_get(dev.as_raw(), name.as_char_ptr()) })?; - // SAFETY: We can safely trust `inner` to be a pointer to a valid - // regulator if `ERR_PTR` was not returned. - let inner = unsafe { NonNull::new_unchecked(inner) }; - + // INVARIANT: `inner` is a pointer obtained from `regulator_get()`, and + // the call was successful. Ok(Self { inner, _phantom: PhantomData, @@ -282,12 +285,12 @@ impl<T: RegulatorState> Regulator<T> { fn enable_internal(&self) -> Result { // SAFETY: Safe as per the type invariants of `Regulator`. - to_result(unsafe { bindings::regulator_enable(self.inner.as_ptr()) }) + to_result(unsafe { bindings::regulator_enable(self.inner) }) } fn disable_internal(&self) -> Result { // SAFETY: Safe as per the type invariants of `Regulator`. - to_result(unsafe { bindings::regulator_disable(self.inner.as_ptr()) }) + to_result(unsafe { bindings::regulator_disable(self.inner) }) } } @@ -349,7 +352,7 @@ impl<T: IsEnabled> Regulator<T> { /// Checks if the regulator is enabled. pub fn is_enabled(&self) -> bool { // SAFETY: Safe as per the type invariants of `Regulator`. - unsafe { bindings::regulator_is_enabled(self.inner.as_ptr()) != 0 } + unsafe { bindings::regulator_is_enabled(self.inner) != 0 } } } @@ -359,11 +362,11 @@ impl<T: RegulatorState> Drop for Regulator<T> { // SAFETY: By the type invariants, we know that `self` owns a // reference on the enabled refcount, so it is safe to relinquish it // now. - unsafe { bindings::regulator_disable(self.inner.as_ptr()) }; + unsafe { bindings::regulator_disable(self.inner) }; } // SAFETY: By the type invariants, we know that `self` owns a reference, // so it is safe to relinquish it now. - unsafe { bindings::regulator_put(self.inner.as_ptr()) }; + unsafe { bindings::regulator_put(self.inner) }; } } diff --git a/samples/landlock/sandboxer.c b/samples/landlock/sandboxer.c index e7af02f98208..9f21088c0855 100644 --- a/samples/landlock/sandboxer.c +++ b/samples/landlock/sandboxer.c @@ -299,7 +299,7 @@ out_unset: /* clang-format on */ -#define LANDLOCK_ABI_LAST 7 +#define LANDLOCK_ABI_LAST 8 #define XSTR(s) #s #define STR(s) XSTR(s) @@ -436,7 +436,8 @@ int main(const int argc, char *const argv[], char *const *const envp) /* Removes LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON for ABI < 7 */ supported_restrict_flags &= ~LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON; - + __attribute__((fallthrough)); + case 7: /* Must be printed for any ABI < LANDLOCK_ABI_LAST. */ fprintf(stderr, "Hint: You should update the running kernel " diff --git a/scripts/coccinelle/api/kmalloc_objs.cocci b/scripts/coccinelle/api/kmalloc_objs.cocci index db12b7be7247..e9a415b7b6f4 100644 --- a/scripts/coccinelle/api/kmalloc_objs.cocci +++ b/scripts/coccinelle/api/kmalloc_objs.cocci @@ -122,3 +122,14 @@ fresh identifier ALLOC_OBJS = script:python(ALLOC_ARRAY) { alloc_array(ALLOC_ARR - ALLOC(struct_size_t(TYPE, FLEX, COUNT), GFP) + ALLOC_FLEX(TYPE, FLEX, COUNT, GFP) ) + +@drop_gfp_kernel depends on patch && !(file in "tools") && !(file in "samples")@ +identifier ALLOC = {kmalloc_obj,kmalloc_objs,kmalloc_flex, + kzalloc_obj,kzalloc_objs,kzalloc_flex, + kvmalloc_obj,kvmalloc_objs,kvmalloc_flex, + kvzalloc_obj,kvzalloc_objs,kvzalloc_flex}; +@@ + + ALLOC(... +- , GFP_KERNEL + ) diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh index 735e1de450c6..f08e0863b712 100755 --- a/scripts/kconfig/merge_config.sh +++ b/scripts/kconfig/merge_config.sh @@ -151,6 +151,7 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do if ! "$AWK" -v prefix="$CONFIG_PREFIX" \ -v warnoverride="$WARNOVERRIDE" \ -v strict="$STRICT" \ + -v outfile="$TMP_FILE.new" \ -v builtin="$BUILTIN" \ -v warnredun="$WARNREDUN" ' BEGIN { @@ -195,7 +196,7 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do # First pass: read merge file, store all lines and index FILENAME == ARGV[1] { - mergefile = FILENAME + mergefile = FILENAME merge_lines[FNR] = $0 merge_total = FNR cfg = get_cfg($0) @@ -212,17 +213,17 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do # Not a config or not in merge file - keep it if (cfg == "" || !(cfg in merge_cfg)) { - print $0 >> ARGV[3] + print $0 >> outfile next } - prev_val = $0 + prev_val = $0 new_val = merge_cfg[cfg] # BUILTIN: do not demote y to m if (builtin == "true" && new_val ~ /=m$/ && prev_val ~ /=y$/) { warn_builtin(cfg, prev_val, new_val) - print $0 >> ARGV[3] + print $0 >> outfile skip_merge[merge_cfg_line[cfg]] = 1 next } @@ -235,7 +236,7 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do # "=n" is the same as "is not set" if (prev_val ~ /=n$/ && new_val ~ / is not set$/) { - print $0 >> ARGV[3] + print $0 >> outfile next } @@ -246,25 +247,20 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do } } - # output file, skip all lines - FILENAME == ARGV[3] { - nextfile - } - END { # Newline in case base file lacks trailing newline - print "" >> ARGV[3] + print "" >> outfile # Append merge file, skipping lines marked for builtin preservation for (i = 1; i <= merge_total; i++) { if (!(i in skip_merge)) { - print merge_lines[i] >> ARGV[3] + print merge_lines[i] >> outfile } } if (strict_violated) { exit 1 } }' \ - "$ORIG_MERGE_FILE" "$TMP_FILE" "$TMP_FILE.new"; then + "$ORIG_MERGE_FILE" "$TMP_FILE"; then # awk exited non-zero, strict mode was violated STRICT_MODE_VIOLATED=true fi @@ -381,7 +377,7 @@ END { STRICT_MODE_VIOLATED=true fi -if [ "$STRICT" == "true" ] && [ "$STRICT_MODE_VIOLATED" == "true" ]; then +if [ "$STRICT" = "true" ] && [ "$STRICT_MODE_VIOLATED" = "true" ]; then echo "Requested and effective config differ" exit 1 fi diff --git a/security/landlock/domain.c b/security/landlock/domain.c index f5b78d4766cd..f0d83f43afa1 100644 --- a/security/landlock/domain.c +++ b/security/landlock/domain.c @@ -94,8 +94,7 @@ static struct landlock_details *get_current_details(void) * allocate with GFP_KERNEL_ACCOUNT because it is independent from the * caller. */ - details = - kzalloc_flex(*details, exe_path, path_size); + details = kzalloc_flex(*details, exe_path, path_size); if (!details) return ERR_PTR(-ENOMEM); diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c index 319873586385..73018dc8d6c7 100644 --- a/security/landlock/ruleset.c +++ b/security/landlock/ruleset.c @@ -32,9 +32,8 @@ static struct landlock_ruleset *create_ruleset(const u32 num_layers) { struct landlock_ruleset *new_ruleset; - new_ruleset = - kzalloc_flex(*new_ruleset, access_masks, num_layers, - GFP_KERNEL_ACCOUNT); + new_ruleset = kzalloc_flex(*new_ruleset, access_masks, num_layers, + GFP_KERNEL_ACCOUNT); if (!new_ruleset) return ERR_PTR(-ENOMEM); refcount_set(&new_ruleset->usage, 1); @@ -559,8 +558,8 @@ landlock_merge_ruleset(struct landlock_ruleset *const parent, if (IS_ERR(new_dom)) return new_dom; - new_dom->hierarchy = kzalloc_obj(*new_dom->hierarchy, - GFP_KERNEL_ACCOUNT); + new_dom->hierarchy = + kzalloc_obj(*new_dom->hierarchy, GFP_KERNEL_ACCOUNT); if (!new_dom->hierarchy) return ERR_PTR(-ENOMEM); diff --git a/security/landlock/tsync.c b/security/landlock/tsync.c index de01aa899751..4d4427ba8d93 100644 --- a/security/landlock/tsync.c +++ b/security/landlock/tsync.c @@ -203,6 +203,40 @@ static struct tsync_work *tsync_works_provide(struct tsync_works *s, return ctx; } +/** + * tsync_works_trim - Put the last tsync_work element + * + * @s: TSYNC works to trim. + * + * Put the last task and decrement the size of @s. + * + * This helper does not cancel a running task, but just reset the last element + * to zero. + */ +static void tsync_works_trim(struct tsync_works *s) +{ + struct tsync_work *ctx; + + if (WARN_ON_ONCE(s->size <= 0)) + return; + + ctx = s->works[s->size - 1]; + + /* + * For consistency, remove the task from ctx so that it does not look like + * we handed it a task_work. + */ + put_task_struct(ctx->task); + *ctx = (typeof(*ctx)){}; + + /* + * Cancel the tsync_works_provide() change to recycle the reserved memory + * for the next thread, if any. This also ensures that cancel_tsync_works() + * and tsync_works_release() do not see any NULL task pointers. + */ + s->size--; +} + /* * tsync_works_grow_by - preallocates space for n more contexts in s * @@ -256,13 +290,14 @@ static int tsync_works_grow_by(struct tsync_works *s, size_t n, gfp_t flags) * tsync_works_contains - checks for presence of task in s */ static bool tsync_works_contains_task(const struct tsync_works *s, - struct task_struct *task) + const struct task_struct *task) { size_t i; for (i = 0; i < s->size; i++) if (s->works[i]->task == task) return true; + return false; } @@ -276,7 +311,7 @@ static void tsync_works_release(struct tsync_works *s) size_t i; for (i = 0; i < s->size; i++) { - if (!s->works[i]->task) + if (WARN_ON_ONCE(!s->works[i]->task)) continue; put_task_struct(s->works[i]->task); @@ -284,6 +319,7 @@ static void tsync_works_release(struct tsync_works *s) for (i = 0; i < s->capacity; i++) kfree(s->works[i]); + kfree(s->works); s->works = NULL; s->size = 0; @@ -295,7 +331,7 @@ static void tsync_works_release(struct tsync_works *s) */ static size_t count_additional_threads(const struct tsync_works *works) { - struct task_struct *thread, *caller; + const struct task_struct *caller, *thread; size_t n = 0; caller = current; @@ -334,7 +370,8 @@ static bool schedule_task_work(struct tsync_works *works, struct tsync_shared_context *shared_ctx) { int err; - struct task_struct *thread, *caller; + const struct task_struct *caller; + struct task_struct *thread; struct tsync_work *ctx; bool found_more_threads = false; @@ -379,16 +416,14 @@ static bool schedule_task_work(struct tsync_works *works, init_task_work(&ctx->work, restrict_one_thread_callback); err = task_work_add(thread, &ctx->work, TWA_SIGNAL); - if (err) { + if (unlikely(err)) { /* * task_work_add() only fails if the task is about to exit. We * checked that earlier, but it can happen as a race. Resume * without setting an error, as the task is probably gone in the - * next loop iteration. For consistency, remove the task from ctx - * so that it does not look like we handed it a task_work. + * next loop iteration. */ - put_task_struct(ctx->task); - ctx->task = NULL; + tsync_works_trim(works); atomic_dec(&shared_ctx->num_preparing); atomic_dec(&shared_ctx->num_unfinished); @@ -406,12 +441,15 @@ static bool schedule_task_work(struct tsync_works *works, * shared_ctx->num_preparing and shared_ctx->num_unfished and mark the two * completions if needed, as if the task was never scheduled. */ -static void cancel_tsync_works(struct tsync_works *works, +static void cancel_tsync_works(const struct tsync_works *works, struct tsync_shared_context *shared_ctx) { - int i; + size_t i; for (i = 0; i < works->size; i++) { + if (WARN_ON_ONCE(!works->works[i]->task)) + continue; + if (!task_work_cancel(works->works[i]->task, &works->works[i]->work)) continue; @@ -448,6 +486,16 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred, shared_ctx.set_no_new_privs = task_no_new_privs(current); /* + * Serialize concurrent TSYNC operations to prevent deadlocks when + * multiple threads call landlock_restrict_self() simultaneously. + * If the lock is already held, we gracefully yield by restarting the + * syscall. This allows the current thread to process pending + * task_works before retrying. + */ + if (!down_write_trylock(¤t->signal->exec_update_lock)) + return restart_syscall(); + + /* * We schedule a pseudo-signal task_work for each of the calling task's * sibling threads. In the task work, each thread: * @@ -527,24 +575,30 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred, -ERESTARTNOINTR); /* - * Cancel task works for tasks that did not start running yet, - * and decrement all_prepared and num_unfinished accordingly. + * Opportunistic improvement: try to cancel task + * works for tasks that did not start running + * yet. We do not have a guarantee that it + * cancels any of the enqueued task works + * because task_work_run() might already have + * dequeued them. */ cancel_tsync_works(&works, &shared_ctx); /* - * The remaining task works have started running, so waiting for - * their completion will finish. + * Break the loop with error. The cleanup code + * after the loop unblocks the remaining + * task_works. */ - wait_for_completion(&shared_ctx.all_prepared); + break; } } } while (found_more_threads && !atomic_read(&shared_ctx.preparation_error)); /* - * We now have all sibling threads blocking and in "prepared" state in the - * task work. Ask all threads to commit. + * We now have either (a) all sibling threads blocking and in "prepared" + * state in the task work, or (b) the preparation error is set. Ask all + * threads to commit (or abort). */ complete_all(&shared_ctx.ready_to_commit); @@ -556,6 +610,6 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred, wait_for_completion(&shared_ctx.all_finished); tsync_works_release(&works); - + up_write(¤t->signal->exec_update_lock); return atomic_read(&shared_ctx.preparation_error); } diff --git a/security/security.c b/security/security.c index 67af9228c4e9..a26c1474e2e4 100644 --- a/security/security.c +++ b/security/security.c @@ -61,6 +61,7 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX + 1] = { [LOCKDOWN_BPF_WRITE_USER] = "use of bpf to write user RAM", [LOCKDOWN_DBG_WRITE_KERNEL] = "use of kgdb/kdb to write kernel RAM", [LOCKDOWN_RTAS_ERROR_INJECTION] = "RTAS error injection", + [LOCKDOWN_XEN_USER_ACTIONS] = "Xen guest user action", [LOCKDOWN_INTEGRITY_MAX] = "integrity", [LOCKDOWN_KCORE] = "/proc/kcore access", [LOCKDOWN_KPROBES] = "use of kprobes", diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index e97721f80f65..8e70da850fac 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -1164,7 +1164,7 @@ static void process_rx_packets(struct fw_iso_context *context, u32 tstamp, size_ struct pkt_desc *desc = s->packet_descs_cursor; unsigned int pkt_header_length; unsigned int packets; - u32 curr_cycle_time; + u32 curr_cycle_time = 0; bool need_hw_irq; int i; diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index ab4b22fcb72e..b6a58852752a 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -1017,12 +1017,30 @@ static int alc269_resume(struct hda_codec *codec) return 0; } -#define STARLABS_STARFIGHTER_SHUTUP_DELAY_MS 30 +#define ALC233_STARFIGHTER_SPK_PIN 0x1b +#define ALC233_STARFIGHTER_GPIO2 0x04 -static void starlabs_starfighter_shutup(struct hda_codec *codec) +static void alc233_starfighter_update_amp(struct hda_codec *codec, bool on) { - if (snd_hda_gen_shutup_speakers(codec)) - msleep(STARLABS_STARFIGHTER_SHUTUP_DELAY_MS); + snd_hda_codec_write(codec, ALC233_STARFIGHTER_SPK_PIN, 0, + AC_VERB_SET_EAPD_BTLENABLE, + on ? AC_EAPDBTL_EAPD : 0); + alc_update_gpio_data(codec, ALC233_STARFIGHTER_GPIO2, on); +} + +static void alc233_starfighter_pcm_hook(struct hda_pcm_stream *hinfo, + struct hda_codec *codec, + struct snd_pcm_substream *substream, + int action) +{ + switch (action) { + case HDA_GEN_PCM_ACT_PREPARE: + alc233_starfighter_update_amp(codec, true); + break; + case HDA_GEN_PCM_ACT_CLEANUP: + alc233_starfighter_update_amp(codec, false); + break; + } } static void alc233_fixup_starlabs_starfighter(struct hda_codec *codec, @@ -1031,8 +1049,16 @@ static void alc233_fixup_starlabs_starfighter(struct hda_codec *codec, { struct alc_spec *spec = codec->spec; - if (action == HDA_FIXUP_ACT_PRE_PROBE) - spec->shutup = starlabs_starfighter_shutup; + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + spec->gpio_mask |= ALC233_STARFIGHTER_GPIO2; + spec->gpio_dir |= ALC233_STARFIGHTER_GPIO2; + spec->gpio_data &= ~ALC233_STARFIGHTER_GPIO2; + break; + case HDA_FIXUP_ACT_PROBE: + spec->gen.pcm_playback_hook = alc233_starfighter_pcm_hook; + break; + } } static void alc269_fixup_pincfg_no_hp_to_lineout(struct hda_codec *codec, @@ -3699,22 +3725,42 @@ static void alc245_tas2781_spi_hp_fixup_muteled(struct hda_codec *codec, alc_fixup_hp_gpio_led(codec, action, 0x04, 0x0); alc285_fixup_hp_coef_micmute_led(codec, fix, action); } + +static void alc245_hp_spk_mute_led_update(void *private_data, int enabled) +{ + struct hda_codec *codec = private_data; + unsigned int val; + + val = enabled ? 0x08 : 0x04; /* 0x08 led on, 0x04 led off */ + alc_update_coef_idx(codec, 0x0b, 0x0c, val); +} + /* JD2: mute led GPIO3: micmute led */ static void alc245_tas2781_i2c_hp_fixup_muteled(struct hda_codec *codec, const struct hda_fixup *fix, int action) { struct alc_spec *spec = codec->spec; + hda_nid_t hp_pin = alc_get_hp_pin(spec); static const hda_nid_t conn[] = { 0x02 }; switch (action) { case HDA_FIXUP_ACT_PRE_PROBE: + if (!hp_pin) { + spec->gen.vmaster_mute.hook = alc245_hp_spk_mute_led_update; + spec->gen.vmaster_mute_led = 1; + } spec->gen.auto_mute_via_amp = 1; snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn); break; + case HDA_FIXUP_ACT_INIT: + if (!hp_pin) + alc245_hp_spk_mute_led_update(codec, !spec->gen.master_mute); + break; } tas2781_fixup_txnw_i2c(codec, fix, action); - alc245_fixup_hp_mute_led_coefbit(codec, fix, action); + if (hp_pin) + alc245_fixup_hp_mute_led_coefbit(codec, fix, action); alc285_fixup_hp_coef_micmute_led(codec, fix, action); } /* @@ -6854,6 +6900,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8730, "HP ProBook 445 G7", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8735, "HP ProBook 435 G7", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8736, "HP", ALC285_FIXUP_HP_GPIO_AMP_INIT), + SND_PCI_QUIRK(0x103c, 0x8756, "HP ENVY Laptop 13-ba0xxx", ALC245_FIXUP_HP_X360_MUTE_LEDS), SND_PCI_QUIRK(0x103c, 0x8760, "HP EliteBook 8{4,5}5 G7", ALC285_FIXUP_HP_BEEP_MICMUTE_LED), SND_PCI_QUIRK(0x103c, 0x876e, "HP ENVY x360 Convertible 13-ay0xxx", ALC245_FIXUP_HP_X360_MUTE_LEDS), SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED), @@ -6867,6 +6914,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x87b7, "HP Laptop 14-fq0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x87cb, "HP Pavilion 15-eg0xxx", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87cc, "HP Pavilion 15-eg0xxx", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87d3, "HP Laptop 15-gw0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x87df, "HP ProBook 430 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), @@ -7098,6 +7146,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8da7, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8da8, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8dd4, "HP EliteStudio 8 AIO", ALC274_FIXUP_HP_AIO_BIND_DACS), + SND_PCI_QUIRK(0x103c, 0x8dd7, "HP Laptop 15-fd0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8de8, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8de9, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8dec, "HP EliteBook 640 G12", ALC236_FIXUP_HP_GPIO_LED), @@ -7177,6 +7226,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x1194, "ASUS UM3406KA", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x11c0, "ASUS X556UR", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), + HDA_CODEC_QUIRK(0x1043, 0x1204, "ASUS Strix G16 G615JMR", ALC287_FIXUP_TXNW2781_I2C_ASUS), SND_PCI_QUIRK(0x1043, 0x1204, "ASUS Strix G615JHR_JMR_JPR", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x1214, "ASUS Strix G615LH_LM_LP", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x125e, "ASUS Q524UQK", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), @@ -7206,6 +7256,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x14e3, "ASUS G513PI/PU/PV", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x14f2, "ASUS VivoBook X515JA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1503, "ASUS G733PY/PZ/PZV/PYV", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x1514, "ASUS ROG Flow Z13 GZ302EAC", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A), SND_PCI_QUIRK(0x1043, 0x1533, "ASUS GV302XA/XJ/XQ/XU/XV/XI", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1573, "ASUS GZ301VV/VQ/VU/VJ/VA/VC/VE/VVC/VQC/VUC/VJC/VEC/VCC", ALC285_FIXUP_ASUS_HEADSET_MIC), @@ -7575,6 +7626,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x38ab, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x38b4, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2), HDA_CODEC_QUIRK(0x17aa, 0x391c, "Lenovo Yoga 7 2-in-1 14AKP10", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), + HDA_CODEC_QUIRK(0x17aa, 0x391d, "Lenovo Yoga 7 2-in-1 16AKP10", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x38b5, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x38b6, "Legion Slim 7 16APH8", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x38b7, "Legion Slim 7 16APH8", ALC287_FIXUP_CS35L41_I2C_2), diff --git a/sound/hda/controllers/intel.c b/sound/hda/controllers/intel.c index 3f434994c18d..2edbcab597c8 100644 --- a/sound/hda/controllers/intel.c +++ b/sound/hda/controllers/intel.c @@ -2077,7 +2077,6 @@ static const struct pci_device_id driver_denylist[] = { { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1043, 0x874f) }, /* ASUS ROG Zenith II / Strix */ { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb59) }, /* MSI TRX40 Creator */ { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb60) }, /* MSI TRX40 */ - { PCI_DEVICE_SUB(0x1022, 0x15e3, 0x1462, 0xee59) }, /* MSI X870E Tomahawk WiFi */ {} }; diff --git a/sound/pci/asihpi/hpimsgx.c b/sound/pci/asihpi/hpimsgx.c index b68e6bfbbfba..ed1c7b774436 100644 --- a/sound/pci/asihpi/hpimsgx.c +++ b/sound/pci/asihpi/hpimsgx.c @@ -581,8 +581,10 @@ static u16 adapter_prepare(u16 adapter) HPI_ADAPTER_OPEN); hm.adapter_index = adapter; hw_entry_point(&hm, &hr); - memcpy(&rESP_HPI_ADAPTER_OPEN[adapter], &hr, - sizeof(rESP_HPI_ADAPTER_OPEN[0])); + memcpy(&rESP_HPI_ADAPTER_OPEN[adapter].h, &hr, + sizeof(rESP_HPI_ADAPTER_OPEN[adapter].h)); + memcpy(&rESP_HPI_ADAPTER_OPEN[adapter].a, &hr.u.ax.info, + sizeof(rESP_HPI_ADAPTER_OPEN[adapter].a)); if (hr.error) return hr.error; diff --git a/sound/soc/amd/acp-config.c b/sound/soc/amd/acp-config.c index 365209ea53f3..1604ed679224 100644 --- a/sound/soc/amd/acp-config.c +++ b/sound/soc/amd/acp-config.c @@ -23,6 +23,16 @@ static int acp_quirk_data; +static const struct dmi_system_id acp70_acpi_flag_override_table[] = { + { + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "HN7306EA"), + }, + }, + {} +}; + static const struct config_entry config_table[] = { { .flags = FLAG_AMD_SOF, @@ -186,8 +196,11 @@ int snd_amd_acp_find_config(struct pci_dev *pci) */ if (!pci->revision) return 0; - else if (pci->revision >= ACP_7_0_REV) + else if (pci->revision >= ACP_7_0_REV) { + if (dmi_check_system(acp70_acpi_flag_override_table)) + return 0; return snd_amd_acp_acpi_find_config(pci); + } for (i = 0; i < ARRAY_SIZE(config_table); i++, table++) { if (table->device != device) diff --git a/sound/soc/amd/acp/acp-sdw-legacy-mach.c b/sound/soc/amd/acp/acp-sdw-legacy-mach.c index c30ccf23005a..6388cd7cb28e 100644 --- a/sound/soc/amd/acp/acp-sdw-legacy-mach.c +++ b/sound/soc/amd/acp/acp-sdw-legacy-mach.c @@ -111,6 +111,14 @@ static const struct dmi_system_id soc_sdw_quirk_table[] = { }, .driver_data = (void *)(ASOC_SDW_CODEC_SPKR), }, + { + .callback = soc_sdw_quirk_cb, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "HN7306EA"), + }, + .driver_data = (void *)(ASOC_SDW_ACP_DMIC), + }, {} }; diff --git a/sound/soc/amd/acp/amd-acp70-acpi-match.c b/sound/soc/amd/acp/amd-acp70-acpi-match.c index 7a567ba02292..1ae43df5da6c 100644 --- a/sound/soc/amd/acp/amd-acp70-acpi-match.c +++ b/sound/soc/amd/acp/amd-acp70-acpi-match.c @@ -69,6 +69,28 @@ static const struct snd_soc_acpi_endpoint jack_amp_g1_dmic_endpoints[] = { }, }; +static const struct snd_soc_acpi_endpoint jack_dmic_endpoints[] = { + /* Jack Endpoint */ + { + .num = 0, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, + /* DMIC Endpoint */ + { + /* + * rt721 endpoint #2 maps to AIF3 (internal DMIC capture). + * Endpoint #1 is AIF2 amp path and is handled by external amps + * on this platform. + */ + .num = 2, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, +}; + static const struct snd_soc_acpi_adr_device rt712_vb_1_group1_adr[] = { { .adr = 0x000130025D071201ull, @@ -563,6 +585,40 @@ static const struct snd_soc_acpi_link_adr acp70_rt1320_l0_rt722_l1[] = { {} }; +static const struct snd_soc_acpi_adr_device rt721_l1u0_tas2783x2_l1u8b_adr[] = { + { + .adr = 0x000130025D072101ull, + /* + * On this platform speakers are provided by two TAS2783 amps. + * Keep rt721 as UAJ + DMIC only. + */ + .num_endpoints = ARRAY_SIZE(jack_dmic_endpoints), + .endpoints = jack_dmic_endpoints, + .name_prefix = "rt721", + }, + { + .adr = 0x0001380102000001ull, + .num_endpoints = 1, + .endpoints = &spk_l_endpoint, + .name_prefix = "tas2783-1", + }, + { + .adr = 0x00013B0102000001ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "tas2783-2", + }, +}; + +static const struct snd_soc_acpi_link_adr acp70_rt721_l1u0_tas2783x2_l1u8b[] = { + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(rt721_l1u0_tas2783x2_l1u8b_adr), + .adr_d = rt721_l1u0_tas2783x2_l1u8b_adr, + }, + {} +}; + struct snd_soc_acpi_mach snd_soc_acpi_amd_acp70_sdw_machines[] = { { .link_mask = BIT(0) | BIT(1), @@ -650,6 +706,11 @@ struct snd_soc_acpi_mach snd_soc_acpi_amd_acp70_sdw_machines[] = { .machine_check = snd_soc_acpi_amd_sdca_is_device_rt712_vb, .drv_name = "amd_sdw", }, + { + .link_mask = BIT(1), + .links = acp70_rt721_l1u0_tas2783x2_l1u8b, + .drv_name = "amd_sdw", + }, {}, }; EXPORT_SYMBOL(snd_soc_acpi_amd_acp70_sdw_machines); diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 1324543b42d7..6f1c105ca77e 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -717,6 +717,20 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_BOARD_NAME, "PM1503CDA"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_BOARD_NAME, "BM1403CDA"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."), + DMI_MATCH(DMI_PRODUCT_NAME, "Thin A15 B7VE"), + } + }, {} }; diff --git a/sound/soc/codecs/adau1372.c b/sound/soc/codecs/adau1372.c index fdee689cae53..d7363f9d53bb 100644 --- a/sound/soc/codecs/adau1372.c +++ b/sound/soc/codecs/adau1372.c @@ -762,7 +762,7 @@ static int adau1372_startup(struct snd_pcm_substream *substream, struct snd_soc_ return 0; } -static void adau1372_enable_pll(struct adau1372 *adau1372) +static int adau1372_enable_pll(struct adau1372 *adau1372) { unsigned int val, timeout = 0; int ret; @@ -778,19 +778,26 @@ static void adau1372_enable_pll(struct adau1372 *adau1372) timeout++; } while (!(val & 1) && timeout < 3); - if (ret < 0 || !(val & 1)) + if (ret < 0 || !(val & 1)) { dev_err(adau1372->dev, "Failed to lock PLL\n"); + return ret < 0 ? ret : -ETIMEDOUT; + } + + return 0; } -static void adau1372_set_power(struct adau1372 *adau1372, bool enable) +static int adau1372_set_power(struct adau1372 *adau1372, bool enable) { if (adau1372->enabled == enable) - return; + return 0; if (enable) { unsigned int clk_ctrl = ADAU1372_CLK_CTRL_MCLK_EN; + int ret; - clk_prepare_enable(adau1372->mclk); + ret = clk_prepare_enable(adau1372->mclk); + if (ret) + return ret; if (adau1372->pd_gpio) gpiod_set_value(adau1372->pd_gpio, 0); @@ -804,7 +811,14 @@ static void adau1372_set_power(struct adau1372 *adau1372, bool enable) * accessed. */ if (adau1372->use_pll) { - adau1372_enable_pll(adau1372); + ret = adau1372_enable_pll(adau1372); + if (ret) { + regcache_cache_only(adau1372->regmap, true); + if (adau1372->pd_gpio) + gpiod_set_value(adau1372->pd_gpio, 1); + clk_disable_unprepare(adau1372->mclk); + return ret; + } clk_ctrl |= ADAU1372_CLK_CTRL_CLKSRC; } @@ -829,6 +843,8 @@ static void adau1372_set_power(struct adau1372 *adau1372, bool enable) } adau1372->enabled = enable; + + return 0; } static int adau1372_set_bias_level(struct snd_soc_component *component, @@ -842,11 +858,9 @@ static int adau1372_set_bias_level(struct snd_soc_component *component, case SND_SOC_BIAS_PREPARE: break; case SND_SOC_BIAS_STANDBY: - adau1372_set_power(adau1372, true); - break; + return adau1372_set_power(adau1372, true); case SND_SOC_BIAS_OFF: - adau1372_set_power(adau1372, false); - break; + return adau1372_set_power(adau1372, false); } return 0; diff --git a/sound/soc/codecs/sma1307.c b/sound/soc/codecs/sma1307.c index 4bb59e5c0891..5850bf6e71ca 100644 --- a/sound/soc/codecs/sma1307.c +++ b/sound/soc/codecs/sma1307.c @@ -1759,8 +1759,10 @@ static void sma1307_setting_loaded(struct sma1307_priv *sma1307, const char *fil sma1307->set.mode_size * 2 * sizeof(int), GFP_KERNEL); if (!sma1307->set.mode_set[i]) { - for (int j = 0; j < i; j++) - kfree(sma1307->set.mode_set[j]); + for (int j = 0; j < i; j++) { + devm_kfree(sma1307->dev, sma1307->set.mode_set[j]); + sma1307->set.mode_set[j] = NULL; + } sma1307->set.status = false; return; } diff --git a/sound/soc/codecs/tas2781-fmwlib.c b/sound/soc/codecs/tas2781-fmwlib.c index 5798d518d94c..a1d86bd309f4 100644 --- a/sound/soc/codecs/tas2781-fmwlib.c +++ b/sound/soc/codecs/tas2781-fmwlib.c @@ -2550,6 +2550,9 @@ static void tasdev_load_calibrated_data(struct tasdevice_priv *priv, int i) int k = i * (cali_data->cali_dat_sz_per_dev + 1); int rc; + if (!data || !cali_data->total_sz) + return; + if (data[k] != i) { dev_err(priv->dev, "%s: no cal-data for dev %d from usr-spc\n", __func__, i); diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c index c8db33f78a1b..bc41a1466c70 100644 --- a/sound/soc/codecs/wcd934x.c +++ b/sound/soc/codecs/wcd934x.c @@ -2172,7 +2172,7 @@ static int wcd934x_init_dmic(struct snd_soc_component *comp) u32 def_dmic_rate, dmic_clk_drv; int ret; - ret = wcd_dt_parse_mbhc_data(comp->dev, &wcd->mbhc_cfg); + ret = wcd_dt_parse_micbias_info(&wcd->common); if (ret) return ret; diff --git a/sound/soc/fsl/imx-card.c b/sound/soc/fsl/imx-card.c index 05b4e971a366..a4518fefad69 100644 --- a/sound/soc/fsl/imx-card.c +++ b/sound/soc/fsl/imx-card.c @@ -710,6 +710,8 @@ static int imx_card_parse_of(struct imx_card_data *data) link->ops = &imx_aif_ops; } + playback_only = false; + capture_only = false; graph_util_parse_link_direction(np, &playback_only, &capture_only); link->playback_only = playback_only; link->capture_only = capture_only; diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index 9e5be0eaa77f..89d694c2cbdd 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -1183,9 +1183,9 @@ void graph_util_parse_link_direction(struct device_node *np, bool is_playback_only = of_property_read_bool(np, "playback-only"); bool is_capture_only = of_property_read_bool(np, "capture-only"); - if (np && playback_only) + if (playback_only && is_playback_only) *playback_only = is_playback_only; - if (np && capture_only) + if (capture_only && is_capture_only) *capture_only = is_capture_only; } EXPORT_SYMBOL_GPL(graph_util_parse_link_direction); diff --git a/sound/soc/intel/catpt/device.c b/sound/soc/intel/catpt/device.c index 0638aecba40d..0b3f20e384c7 100644 --- a/sound/soc/intel/catpt/device.c +++ b/sound/soc/intel/catpt/device.c @@ -281,7 +281,15 @@ static int catpt_acpi_probe(struct platform_device *pdev) if (IS_ERR(cdev->pci_ba)) return PTR_ERR(cdev->pci_ba); - /* alloc buffer for storing DRAM context during dx transitions */ + /* + * As per design HOST is responsible for preserving firmware's runtime + * context during D0 -> D3 -> D0 transitions. Addresses used for DMA + * to/from HOST memory shall be outside the reserved range of 0xFFFxxxxx. + */ + ret = dma_coerce_mask_and_coherent(cdev->dev, DMA_BIT_MASK(31)); + if (ret) + return ret; + cdev->dxbuf_vaddr = dmam_alloc_coherent(dev, catpt_dram_size(cdev), &cdev->dxbuf_paddr, GFP_KERNEL); if (!cdev->dxbuf_vaddr) diff --git a/sound/soc/intel/catpt/dsp.c b/sound/soc/intel/catpt/dsp.c index 008a20a2acbd..677f348909c8 100644 --- a/sound/soc/intel/catpt/dsp.c +++ b/sound/soc/intel/catpt/dsp.c @@ -125,9 +125,6 @@ int catpt_dmac_probe(struct catpt_dev *cdev) dmac->dev = cdev->dev; dmac->irq = cdev->irq; - ret = dma_coerce_mask_and_coherent(cdev->dev, DMA_BIT_MASK(31)); - if (ret) - return ret; /* * Caller is responsible for putting device in D0 to allow * for I/O and memory access before probing DW. diff --git a/sound/soc/sdca/sdca_functions.c b/sound/soc/sdca/sdca_functions.c index e0ed593697ba..dca60ee8e62c 100644 --- a/sound/soc/sdca/sdca_functions.c +++ b/sound/soc/sdca/sdca_functions.c @@ -216,9 +216,6 @@ static int find_sdca_init_table(struct device *dev, } else if (num_init_writes % sizeof(*raw) != 0) { dev_err(dev, "%pfwP: init table size invalid\n", function_node); return -EINVAL; - } else if ((num_init_writes / sizeof(*raw)) > SDCA_MAX_INIT_COUNT) { - dev_err(dev, "%pfwP: maximum init table size exceeded\n", function_node); - return -EINVAL; } raw = kzalloc(num_init_writes, GFP_KERNEL); @@ -1604,10 +1601,19 @@ static int find_sdca_entities(struct device *dev, struct sdw_slave *sdw, static struct sdca_entity *find_sdca_entity_by_label(struct sdca_function_data *function, const char *entity_label) { + struct sdca_entity *entity = NULL; int i; for (i = 0; i < function->num_entities; i++) { - struct sdca_entity *entity = &function->entities[i]; + entity = &function->entities[i]; + + /* check whole string first*/ + if (!strcmp(entity->label, entity_label)) + return entity; + } + + for (i = 0; i < function->num_entities; i++) { + entity = &function->entities[i]; if (!strncmp(entity->label, entity_label, strlen(entity_label))) return entity; diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c index db077e9d5644..c12ffdcfe4e3 100644 --- a/sound/soc/sof/ipc4-topology.c +++ b/sound/soc/sof/ipc4-topology.c @@ -2950,7 +2950,7 @@ static int sof_ipc4_control_load_bytes(struct snd_sof_dev *sdev, struct snd_sof_ return -EINVAL; } - if (scontrol->priv_size < sizeof(struct sof_abi_hdr)) { + if (scontrol->priv_size && scontrol->priv_size < sizeof(struct sof_abi_hdr)) { dev_err(sdev->dev, "bytes control %s initial data size %zu is insufficient.\n", scontrol->name, scontrol->priv_size); diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c index 18e2401152c8..35200d801fb7 100644 --- a/sound/soc/sof/topology.c +++ b/sound/soc/sof/topology.c @@ -736,7 +736,7 @@ static int sof_parse_token_sets(struct snd_soc_component *scomp, asize = le32_to_cpu(array->size); /* validate asize */ - if (asize < 0) { /* FIXME: A zero-size array makes no sense */ + if (asize < sizeof(*array)) { dev_err(scomp->dev, "error: invalid array size 0x%x\n", asize); return -EINVAL; diff --git a/sound/usb/Kconfig b/sound/usb/Kconfig index 9b890abd96d3..b4588915efa1 100644 --- a/sound/usb/Kconfig +++ b/sound/usb/Kconfig @@ -192,6 +192,7 @@ config SND_USB_AUDIO_QMI tristate "Qualcomm Audio Offload driver" depends on QCOM_QMI_HELPERS && SND_USB_AUDIO && SND_SOC_USB depends on USB_XHCI_HCD && USB_XHCI_SIDEBAND + select AUXILIARY_BUS help Say Y here to enable the Qualcomm USB audio offloading feature. diff --git a/sound/usb/qcom/qc_audio_offload.c b/sound/usb/qcom/qc_audio_offload.c index 510b68cced33..f161eb29f911 100644 --- a/sound/usb/qcom/qc_audio_offload.c +++ b/sound/usb/qcom/qc_audio_offload.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2022-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include <linux/auxiliary_bus.h> diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 049a94079f9e..1f82e9e02d4b 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2148,6 +2148,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { /* Device matches */ DEVICE_FLG(0x001f, 0x0b21, /* AB13X USB Audio */ QUIRK_FLAG_FORCE_IFACE_RESET | QUIRK_FLAG_IFACE_DELAY), + DEVICE_FLG(0x001f, 0x0b23, /* AB17X USB Audio */ + QUIRK_FLAG_FORCE_IFACE_RESET | QUIRK_FLAG_IFACE_DELAY), DEVICE_FLG(0x0020, 0x0b21, /* GHW-123P */ QUIRK_FLAG_FORCE_IFACE_RESET | QUIRK_FLAG_IFACE_DELAY), DEVICE_FLG(0x03f0, 0x654a, /* HP 320 FHD Webcam */ @@ -2429,6 +2431,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY | QUIRK_FLAG_IFACE_DELAY), VENDOR_FLG(0x07fd, /* MOTU */ QUIRK_FLAG_VALIDATE_RATES), + DEVICE_FLG(0x1235, 0x8006, 0), /* Focusrite Scarlett 2i2 1st Gen */ + DEVICE_FLG(0x1235, 0x800a, 0), /* Focusrite Scarlett 2i4 1st Gen */ VENDOR_FLG(0x1235, /* Focusrite Novation */ QUIRK_FLAG_SKIP_CLOCK_SELECTOR | QUIRK_FLAG_SKIP_IFACE_SETUP), diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index da5275d8eda6..6673601246b3 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -740,7 +740,10 @@ #define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) #define MSR_AMD64_SNP_SECURE_AVIC_BIT 18 #define MSR_AMD64_SNP_SECURE_AVIC BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT) -#define MSR_AMD64_SNP_RESV_BIT 19 +#define MSR_AMD64_SNP_RESERVED_BITS19_22 GENMASK_ULL(22, 19) +#define MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT 23 +#define MSR_AMD64_SNP_IBPB_ON_ENTRY BIT_ULL(MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT) +#define MSR_AMD64_SNP_RESV_BIT 24 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) #define MSR_AMD64_SAVIC_CONTROL 0xc0010138 #define MSR_AMD64_SAVIC_EN_BIT 0 diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 846a63215ce1..0d4538fa6c31 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -476,6 +476,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7) #define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8) #define KVM_X86_QUIRK_IGNORE_GUEST_PAT (1 << 9) +#define KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM (1 << 10) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1 diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h index ab2aa97bd8ce..406923bd4846 100644 --- a/tools/include/linux/build_bug.h +++ b/tools/include/linux/build_bug.h @@ -32,7 +32,8 @@ /** * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied * error message. - * @condition: the condition which the compiler should know is false. + * @cond: the condition which the compiler should know is false. + * @msg: build-time error message * * See BUILD_BUG_ON for description. */ @@ -60,6 +61,7 @@ /** * static_assert - check integer constant expression at build time + * @expr: expression to be checked * * static_assert() is a wrapper for the C11 _Static_assert, with a * little macro magic to make the message optional (defaulting to the diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 65500f5db379..80364d4dbebb 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -14,6 +14,10 @@ #include <linux/ioctl.h> #include <asm/kvm.h> +#ifdef __KERNEL__ +#include <linux/kvm_types.h> +#endif + #define KVM_API_VERSION 12 /* @@ -1601,7 +1605,11 @@ struct kvm_stats_desc { __u16 size; __u32 offset; __u32 bucket_size; +#ifdef __KERNEL__ + char name[KVM_STATS_NAME_SIZE]; +#else char name[]; +#endif }; #define KVM_GET_STATS_FD _IO(KVMIO, 0xce) diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index da3aca87457f..31826621eebd 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -187,7 +187,6 @@ done check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))" -I"^#include <linux/cfi_types.h>"' check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"' check arch/x86/include/asm/amd/ibs.h '-I "^#include .*/msr-index.h"' -check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"' check include/linux/unaligned.h '-I "^#include <linux/unaligned/packed_struct.h>" -I "^#include <asm/byteorder.h>" -I "^#pragma GCC diagnostic"' check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' diff --git a/tools/perf/util/kvm-stat-arch/kvm-stat-x86.c b/tools/perf/util/kvm-stat-arch/kvm-stat-x86.c index 43275d25b6cb..0f626db3a439 100644 --- a/tools/perf/util/kvm-stat-arch/kvm-stat-x86.c +++ b/tools/perf/util/kvm-stat-arch/kvm-stat-x86.c @@ -4,9 +4,9 @@ #include "../kvm-stat.h" #include "../evsel.h" #include "../env.h" -#include "../../arch/x86/include/uapi/asm/svm.h" -#include "../../arch/x86/include/uapi/asm/vmx.h" -#include "../../arch/x86/include/uapi/asm/kvm.h" +#include "../../../arch/x86/include/uapi/asm/svm.h" +#include "../../../arch/x86/include/uapi/asm/vmx.h" +#include "../../../arch/x86/include/uapi/asm/kvm.h" #include <subcmd/parse-options.h> define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS); diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 46bf4dfeebc8..7e39d469111b 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -1605,9 +1605,9 @@ bool metricgroup__has_metric_or_groups(const char *pmu, const char *metric_or_gr .metric_or_groups = metric_or_groups, }; - return pmu_metrics_table__for_each_metric(table, - metricgroup__has_metric_or_groups_callback, - &data) + return metricgroup__for_each_metric(table, + metricgroup__has_metric_or_groups_callback, + &data) ? true : false; } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index b9efb296bba5..7b4629625b1e 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1117,7 +1117,7 @@ static int config_attr(struct perf_event_attr *attr, static struct evsel_config_term *add_config_term(enum evsel_term_type type, struct list_head *head_terms, - bool weak) + bool weak, char *str, u64 val) { struct evsel_config_term *t; @@ -1128,8 +1128,62 @@ static struct evsel_config_term *add_config_term(enum evsel_term_type type, INIT_LIST_HEAD(&t->list); t->type = type; t->weak = weak; - list_add_tail(&t->list, head_terms); + switch (type) { + case EVSEL__CONFIG_TERM_PERIOD: + case EVSEL__CONFIG_TERM_FREQ: + case EVSEL__CONFIG_TERM_STACK_USER: + case EVSEL__CONFIG_TERM_USR_CHG_CONFIG: + case EVSEL__CONFIG_TERM_USR_CHG_CONFIG1: + case EVSEL__CONFIG_TERM_USR_CHG_CONFIG2: + case EVSEL__CONFIG_TERM_USR_CHG_CONFIG3: + case EVSEL__CONFIG_TERM_USR_CHG_CONFIG4: + t->val.val = val; + break; + case EVSEL__CONFIG_TERM_TIME: + t->val.time = val; + break; + case EVSEL__CONFIG_TERM_INHERIT: + t->val.inherit = val; + break; + case EVSEL__CONFIG_TERM_OVERWRITE: + t->val.overwrite = val; + break; + case EVSEL__CONFIG_TERM_MAX_STACK: + t->val.max_stack = val; + break; + case EVSEL__CONFIG_TERM_MAX_EVENTS: + t->val.max_events = val; + break; + case EVSEL__CONFIG_TERM_PERCORE: + t->val.percore = val; + break; + case EVSEL__CONFIG_TERM_AUX_OUTPUT: + t->val.aux_output = val; + break; + case EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE: + t->val.aux_sample_size = val; + break; + case EVSEL__CONFIG_TERM_CALLGRAPH: + case EVSEL__CONFIG_TERM_BRANCH: + case EVSEL__CONFIG_TERM_DRV_CFG: + case EVSEL__CONFIG_TERM_RATIO_TO_PREV: + case EVSEL__CONFIG_TERM_AUX_ACTION: + if (str) { + t->val.str = strdup(str); + if (!t->val.str) { + zfree(&t); + return NULL; + } + t->free_str = true; + } + break; + default: + t->val.val = val; + break; + } + + list_add_tail(&t->list, head_terms); return t; } @@ -1142,7 +1196,7 @@ static int get_config_terms(const struct parse_events_terms *head_config, struct evsel_config_term *new_term; enum evsel_term_type new_type; bool str_type = false; - u64 val; + u64 val = 0; switch (term->type_term) { case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: @@ -1234,20 +1288,15 @@ static int get_config_terms(const struct parse_events_terms *head_config, continue; } - new_term = add_config_term(new_type, head_terms, term->weak); + /* + * Note: Members evsel_config_term::val and + * parse_events_term::val are unions and endianness needs + * to be taken into account when changing such union members. + */ + new_term = add_config_term(new_type, head_terms, term->weak, + str_type ? term->val.str : NULL, val); if (!new_term) return -ENOMEM; - - if (str_type) { - new_term->val.str = strdup(term->val.str); - if (!new_term->val.str) { - zfree(&new_term); - return -ENOMEM; - } - new_term->free_str = true; - } else { - new_term->val.val = val; - } } return 0; } @@ -1277,10 +1326,9 @@ static int add_cfg_chg(const struct perf_pmu *pmu, if (bits) { struct evsel_config_term *new_term; - new_term = add_config_term(new_term_type, head_terms, false); + new_term = add_config_term(new_term_type, head_terms, false, NULL, bits); if (!new_term) return -ENOMEM; - new_term->val.cfg_chg = bits; } return 0; diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile index 45a3e7ad3dcb..02d6f51d5a06 100644 --- a/tools/testing/selftests/drivers/net/team/Makefile +++ b/tools/testing/selftests/drivers/net/team/Makefile @@ -3,6 +3,7 @@ TEST_PROGS := \ dev_addr_lists.sh \ + non_ether_header_ops.sh \ options.sh \ propagation.sh \ refleak.sh \ diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config index 558e1d0cf565..5d36a22ef080 100644 --- a/tools/testing/selftests/drivers/net/team/config +++ b/tools/testing/selftests/drivers/net/team/config @@ -1,7 +1,9 @@ +CONFIG_BONDING=y CONFIG_DUMMY=y CONFIG_IPV6=y CONFIG_MACVLAN=y CONFIG_NETDEVSIM=m +CONFIG_NET_IPGRE=y CONFIG_NET_TEAM=y CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=y CONFIG_NET_TEAM_MODE_LOADBALANCE=y diff --git a/tools/testing/selftests/drivers/net/team/non_ether_header_ops.sh b/tools/testing/selftests/drivers/net/team/non_ether_header_ops.sh new file mode 100755 index 000000000000..948a43576bdc --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/non_ether_header_ops.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# shellcheck disable=SC2154 +# +# Reproduce the non-Ethernet header_ops confusion scenario with: +# g0 (gre) -> b0 (bond) -> t0 (team) +# +# Before the fix, direct header_ops inheritance in this stack could call +# callbacks with the wrong net_device context and crash. + +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/lib.sh + +trap cleanup_all_ns EXIT + +setup_ns ns1 + +ip -n "$ns1" link add d0 type dummy +ip -n "$ns1" addr add 10.10.10.1/24 dev d0 +ip -n "$ns1" link set d0 up + +ip -n "$ns1" link add g0 type gre local 10.10.10.1 +ip -n "$ns1" link add b0 type bond mode active-backup +ip -n "$ns1" link add t0 type team + +ip -n "$ns1" link set g0 master b0 +ip -n "$ns1" link set b0 master t0 + +ip -n "$ns1" link set g0 up +ip -n "$ns1" link set b0 up +ip -n "$ns1" link set t0 up + +# IPv6 address assignment triggers MLD join reports that call +# dev_hard_header() on t0, exercising the inherited header_ops path. +ip -n "$ns1" -6 addr add 2001:db8:1::1/64 dev t0 nodad +for i in $(seq 1 20); do + ip netns exec "$ns1" ping -6 -I t0 ff02::1 -c1 -W1 &>/dev/null || true +done + +echo "PASS: non-Ethernet header_ops stacking did not crash" +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index dc68371f76a3..6471fa214a9f 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -206,6 +206,7 @@ TEST_GEN_PROGS_s390 += s390/ucontrol_test TEST_GEN_PROGS_s390 += s390/user_operexec TEST_GEN_PROGS_s390 += s390/keyop TEST_GEN_PROGS_s390 += rseq_test +TEST_GEN_PROGS_s390 += s390/irq_routing TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test diff --git a/tools/testing/selftests/kvm/s390/irq_routing.c b/tools/testing/selftests/kvm/s390/irq_routing.c new file mode 100644 index 000000000000..7819a0af19a8 --- /dev/null +++ b/tools/testing/selftests/kvm/s390/irq_routing.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * IRQ routing offset tests. + * + * Copyright IBM Corp. 2026 + * + * Authors: + * Janosch Frank <frankja@linux.ibm.com> + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "kselftest.h" +#include "ucall_common.h" + +extern char guest_code[]; +asm("guest_code:\n" + "diag %r0,%r0,0\n" + "j .\n"); + +static void test(void) +{ + struct kvm_irq_routing *routing; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + vm_paddr_t mem; + int ret; + + struct kvm_irq_routing_entry ue = { + .type = KVM_IRQ_ROUTING_S390_ADAPTER, + .gsi = 1, + }; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + mem = vm_phy_pages_alloc(vm, 2, 4096 * 42, 0); + + routing = kvm_gsi_routing_create(); + routing->nr = 1; + routing->entries[0] = ue; + routing->entries[0].u.adapter.summary_addr = (uintptr_t)mem; + routing->entries[0].u.adapter.ind_addr = (uintptr_t)mem; + + routing->entries[0].u.adapter.summary_offset = 4096 * 8; + ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); + ksft_test_result(ret == -1 && errno == EINVAL, "summary offset outside of page\n"); + + routing->entries[0].u.adapter.summary_offset -= 4; + ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); + ksft_test_result(ret == 0, "summary offset inside of page\n"); + + routing->entries[0].u.adapter.ind_offset = 4096 * 8; + ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); + ksft_test_result(ret == -1 && errno == EINVAL, "ind offset outside of page\n"); + + routing->entries[0].u.adapter.ind_offset -= 4; + ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); + ksft_test_result(ret == 0, "ind offset inside of page\n"); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_IRQ_ROUTING)); + + ksft_print_header(); + ksft_set_plan(4); + test(); + + ksft_finished(); /* Print results and exit() accordingly */ +} diff --git a/tools/testing/selftests/landlock/tsync_test.c b/tools/testing/selftests/landlock/tsync_test.c index 37ef0d2270db..2b9ad4f154f4 100644 --- a/tools/testing/selftests/landlock/tsync_test.c +++ b/tools/testing/selftests/landlock/tsync_test.c @@ -6,9 +6,10 @@ */ #define _GNU_SOURCE +#include <linux/landlock.h> #include <pthread.h> +#include <signal.h> #include <sys/prctl.h> -#include <linux/landlock.h> #include "common.h" @@ -158,4 +159,92 @@ TEST(competing_enablement) EXPECT_EQ(0, close(ruleset_fd)); } +static void signal_nop_handler(int sig) +{ +} + +struct signaler_data { + pthread_t target; + volatile bool stop; +}; + +static void *signaler_thread(void *data) +{ + struct signaler_data *sd = data; + + while (!sd->stop) + pthread_kill(sd->target, SIGUSR1); + + return NULL; +} + +/* + * Number of idle sibling threads. This must be large enough that even on + * machines with many cores, the sibling threads cannot all complete their + * credential preparation in a single parallel wave, otherwise the signaler + * thread has no window to interrupt wait_for_completion_interruptible(). + * 200 threads on a 64-core machine yields ~3 serialized waves, giving the + * tight signal loop enough time to land an interruption. + */ +#define NUM_IDLE_THREADS 200 + +/* + * Exercises the tsync interruption and cancellation paths in tsync.c. + * + * When a signal interrupts the calling thread while it waits for sibling + * threads to finish their credential preparation + * (wait_for_completion_interruptible in landlock_restrict_sibling_threads), + * the kernel sets ERESTARTNOINTR, cancels queued task works that have not + * started yet (cancel_tsync_works), then waits for the remaining works to + * finish. On the error return, syscalls.c aborts the prepared credentials. + * The kernel automatically restarts the syscall, so userspace sees success. + */ +TEST(tsync_interrupt) +{ + size_t i; + pthread_t threads[NUM_IDLE_THREADS]; + pthread_t signaler; + struct signaler_data sd; + struct sigaction sa = {}; + const int ruleset_fd = create_ruleset(_metadata); + + disable_caps(_metadata); + + /* Install a no-op SIGUSR1 handler so the signal does not kill us. */ + sa.sa_handler = signal_nop_handler; + sigemptyset(&sa.sa_mask); + ASSERT_EQ(0, sigaction(SIGUSR1, &sa, NULL)); + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + + for (i = 0; i < NUM_IDLE_THREADS; i++) + ASSERT_EQ(0, pthread_create(&threads[i], NULL, idle, NULL)); + + /* + * Start a signaler thread that continuously sends SIGUSR1 to the + * calling thread. This maximizes the chance of interrupting + * wait_for_completion_interruptible() in the kernel's tsync path. + */ + sd.target = pthread_self(); + sd.stop = false; + ASSERT_EQ(0, pthread_create(&signaler, NULL, signaler_thread, &sd)); + + /* + * The syscall may be interrupted and transparently restarted by the + * kernel (ERESTARTNOINTR). From userspace, it should always succeed. + */ + EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, + LANDLOCK_RESTRICT_SELF_TSYNC)); + + sd.stop = true; + ASSERT_EQ(0, pthread_join(signaler, NULL)); + + for (i = 0; i < NUM_IDLE_THREADS; i++) { + ASSERT_EQ(0, pthread_cancel(threads[i])); + ASSERT_EQ(0, pthread_join(threads[i], NULL)); + } + + EXPECT_EQ(0, close(ruleset_fd)); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index c5694cc4ddd2..829f72c8ee07 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -868,6 +868,64 @@ fib6_gc_test() check_rt_num 5 $($IP -6 route list |grep -v expires|grep 2001:20::|wc -l) log_test $ret 0 "ipv6 route garbage collection (replace with permanent)" + # Delete dummy_10 and remove all routes + $IP link del dev dummy_10 + + # rd6 is required for the next test. (ipv6toolkit) + if [ ! -x "$(command -v rd6)" ]; then + echo "SKIP: rd6 not found." + set +e + cleanup &> /dev/null + return + fi + + setup_ns ns2 + $IP link add veth1 type veth peer veth2 netns $ns2 + $IP link set veth1 up + ip -netns $ns2 link set veth2 up + $IP addr add fe80:dead::1/64 dev veth1 + ip -netns $ns2 addr add fe80:dead::2/64 dev veth2 + + # Add NTF_ROUTER neighbour to prevent rt6_age_examine_exception() + # from removing not-yet-expired exceptions. + ip -netns $ns2 link set veth2 address 00:11:22:33:44:55 + $IP neigh add fe80:dead::3 lladdr 00:11:22:33:44:55 dev veth1 router + + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_redirects=1 + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.forwarding=0 + + # Temporary routes + for i in $(seq 1 5); do + # Expire route after $EXPIRE seconds + $IP -6 route add 2001:10::$i \ + via fe80:dead::2 dev veth1 expires $EXPIRE + + ip netns exec $ns2 rd6 -i veth2 \ + -s fe80:dead::2 -d fe80:dead::1 \ + -r 2001:10::$i -t fe80:dead::3 -p ICMP6 + done + + check_rt_num 5 $($IP -6 route list | grep expires | grep 2001:10:: | wc -l) + + # Promote to permanent routes by "prepend" (w/o NLM_F_EXCL and NLM_F_REPLACE) + for i in $(seq 1 5); do + # -EEXIST, but the temporary route becomes the permanent route. + $IP -6 route append 2001:10::$i \ + via fe80:dead::2 dev veth1 2>/dev/null || true + done + + check_rt_num 5 $($IP -6 route list | grep -v expires | grep 2001:10:: | wc -l) + check_rt_num 5 $($IP -6 route list cache | grep 2001:10:: | wc -l) + + # Trigger GC instead of waiting $GC_WAIT_TIME. + # rt6_nh_dump_exceptions() just skips expired exceptions. + $NS_EXEC sysctl -wq net.ipv6.route.flush=1 + check_rt_num 0 $($IP -6 route list cache | grep 2001:10:: | wc -l) + log_test $ret 0 "ipv6 route garbage collection (promote to permanent routes)" + + $IP neigh del fe80:dead::3 lladdr 00:11:22:33:44:55 dev veth1 router + $IP link del veth1 + # ra6 is required for the next test. (ipv6toolkit) if [ ! -x "$(command -v ra6)" ]; then echo "SKIP: ra6 not found." @@ -876,9 +934,6 @@ fib6_gc_test() return fi - # Delete dummy_10 and remove all routes - $IP link del dev dummy_10 - # Create a pair of veth devices to send a RA message from one # device to another. $IP link add veth1 type veth peer name veth2 diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index 394166f224a4..ffdc6ccc6511 100755 --- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -29,7 +29,8 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate insert_overlap" +BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate + insert_overlap load_flush_load4 load_flush_load8" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -432,6 +433,30 @@ race_repeat 0 perf_duration 0 " +TYPE_load_flush_load4=" +display reload with flush, 4bit groups +type_spec ipv4_addr . ipv4_addr +chain_spec ip saddr . ip daddr +dst addr4 +proto icmp + +race_repeat 0 + +perf_duration 0 +" + +TYPE_load_flush_load8=" +display reload with flush, 8bit groups +type_spec ipv4_addr . ipv4_addr +chain_spec ip saddr . ip daddr +dst addr4 +proto icmp + +race_repeat 0 + +perf_duration 0 +" + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -1997,6 +2022,49 @@ test_bug_insert_overlap() return 0 } +test_bug_load_flush_load4() +{ + local i + + setup veth send_"${proto}" set || return ${ksft_skip} + + for i in $(seq 0 255); do + local addelem="add element inet filter test" + local j + + for j in $(seq 0 20); do + echo "$addelem { 10.$j.0.$i . 10.$j.1.$i }" + echo "$addelem { 10.$j.0.$i . 10.$j.2.$i }" + done + done > "$tmp" + + nft -f "$tmp" || return 1 + + ( echo "flush set inet filter test";cat "$tmp") | nft -f - + [ $? -eq 0 ] || return 1 + + return 0 +} + +test_bug_load_flush_load8() +{ + local i + + setup veth send_"${proto}" set || return ${ksft_skip} + + for i in $(seq 1 100); do + echo "add element inet filter test { 10.0.0.$i . 10.0.1.$i }" + echo "add element inet filter test { 10.0.0.$i . 10.0.2.$i }" + done > "$tmp" + + nft -f "$tmp" || return 1 + + ( echo "flush set inet filter test";cat "$tmp") | nft -f - + [ $? -eq 0 ] || return 1 + + return 0 +} + test_reported_issues() { eval test_bug_"${subtest}" } |
