1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
|
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PAGE_64_H
#define _ASM_X86_PAGE_64_H
#include <asm/page_64_types.h>
#ifndef __ASSEMBLER__
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
#include <linux/kmsan-checks.h>
#include <linux/mmdebug.h>
/* duplicated to the one in bootmem.h */
extern unsigned long max_pfn;
extern unsigned long phys_base;
extern unsigned long page_offset_base;
extern unsigned long vmalloc_base;
extern unsigned long vmemmap_base;
extern unsigned long direct_map_physmem_end;
static __always_inline unsigned long __phys_addr_nodebug(unsigned long x)
{
unsigned long y = x - __START_KERNEL_map;
/* use the carry flag to determine if x was < __START_KERNEL_map */
x = y + ((x > y) ? phys_base : (__START_KERNEL_map - PAGE_OFFSET));
return x;
}
#ifdef CONFIG_DEBUG_VIRTUAL
extern unsigned long __phys_addr(unsigned long);
#else
#define __phys_addr(x) __phys_addr_nodebug(x)
#endif
static inline unsigned long __phys_addr_symbol(unsigned long x)
{
unsigned long y = x - __START_KERNEL_map;
/* only check upper bounds since lower bounds will trigger carry */
VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
return y + phys_base;
}
#define __phys_reloc_hide(x) (x)
void __clear_pages_unrolled(void *page);
KCFI_REFERENCE(__clear_pages_unrolled);
/**
* clear_pages() - clear a page range using a kernel virtual address.
* @addr: start address of kernel page range
* @npages: number of pages
*
* Switch between three implementations of page clearing based on CPU
* capabilities:
*
* - __clear_pages_unrolled(): the oldest, slowest and universally
* supported method. Zeroes via 8-byte MOV instructions unrolled 8x
* to write a 64-byte cacheline in each loop iteration.
*
* - "REP; STOSQ": really old CPUs had crummy REP implementations.
* Vendor CPU setup code sets 'REP_GOOD' on CPUs where REP can be
* trusted. The instruction writes 8-byte per REP iteration but
* CPUs can internally batch these together and do larger writes.
*
* - "REP; STOSB": used on CPUs with "enhanced REP MOVSB/STOSB",
* which enumerate 'ERMS' and provide an implementation which
* unlike "REP; STOSQ" above wasn't overly picky about alignment.
* The instruction writes 1-byte per REP iteration with CPUs
* internally batching these together into larger writes and is
* generally fastest of the three.
*
* Note that when running as a guest, features exposed by the CPU
* might be mediated by the hypervisor. So, the STOSQ variant might
* be in active use on some systems even when the hardware enumerates
* ERMS.
*
* Does absolutely no exception handling.
*/
static inline void clear_pages(void *addr, unsigned int npages)
{
u64 len = npages * PAGE_SIZE;
/*
* Clean up KMSAN metadata for the pages being cleared. The assembly call
* below clobbers @addr, so perform unpoisoning before it.
*/
kmsan_unpoison_memory(addr, len);
/*
* The inline asm embeds a CALL instruction and usually that is a no-no
* due to the compiler not knowing that and thus being unable to track
* callee-clobbered registers.
*
* In this case that is fine because the registers clobbered by
* __clear_pages_unrolled() are part of the inline asm register
* specification.
*/
asm volatile(ALTERNATIVE_2("call __clear_pages_unrolled",
"shrq $3, %%rcx; rep stosq", X86_FEATURE_REP_GOOD,
"rep stosb", X86_FEATURE_ERMS)
: "+c" (len), "+D" (addr), ASM_CALL_CONSTRAINT
: "a" (0)
: "cc", "memory");
}
#define clear_pages clear_pages
static inline void clear_page(void *addr)
{
clear_pages(addr, 1);
}
void copy_page(void *to, void *from);
KCFI_REFERENCE(copy_page);
/*
* User space process size. This is the first address outside the user range.
* There are a few constraints that determine this:
*
* On Intel CPUs, if a SYSCALL instruction is at the highest canonical
* address, then that syscall will enter the kernel with a
* non-canonical return address, and SYSRET will explode dangerously.
* We avoid this particular problem by preventing anything
* from being mapped at the maximum canonical address.
*
* On AMD CPUs in the Ryzen family, there's a nasty bug in which the
* CPUs malfunction if they execute code from the highest canonical page.
* They'll speculate right off the end of the canonical space, and
* bad things happen. This is worked around in the same way as the
* Intel problem.
*
* With page table isolation enabled, we map the LDT in ... [stay tuned]
*/
static __always_inline unsigned long task_size_max(void)
{
unsigned long ret;
alternative_io("movq %[small],%0","movq %[large],%0",
X86_FEATURE_LA57,
"=r" (ret),
[small] "i" ((1ul << 47)-PAGE_SIZE),
[large] "i" ((1ul << 56)-PAGE_SIZE));
return ret;
}
#endif /* !__ASSEMBLER__ */
#ifdef CONFIG_X86_VSYSCALL_EMULATION
# define __HAVE_ARCH_GATE_AREA 1
#endif
#endif /* _ASM_X86_PAGE_64_H */
|