1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
#include <cstdint>
#include <arch/x86_64/cpu/sse.hpp>
#include <etc/assembly.hpp>
#include <etc/logging.hpp>
std::uint64_t __sse_size = 0;
char __sse_is_initializied = 0;
char __sse_legacy_save = 0;
std::uint64_t __sse_cr4_read() {
uint64_t val;
asm volatile("mov %%cr4, %0" : "=r"(val));
return val;
}
void __sse_cr4_write(std::uint64_t val) {
asm volatile("mov %0, %%cr4" : : "r"(val) : "memory");
}
std::uint64_t __sse_cr0_read() {
uint64_t val;
asm volatile("mov %%cr0, %0" : "=r"(val));
return val;
}
void __sse_cr0_write(std::uint64_t val) {
asm volatile("mov %0, %%cr0" : : "r"(val) : "memory");
}
void __sse_xsetbv(std::uint64_t val) {
asm volatile("xsetbv" : : "a"(val), "d"(val >> 32),"c"(0) : "memory");
}
std::uint64_t __sse_xgetbv() {
uint32_t a,d;
asm volatile("xgetbv" : "=a"(a),"=d"(d) : "c"(0) : "memory");
return ((std::uint64_t)d << 32) | a;
}
void __sse_xsave(void* buf) {
std::uint64_t xcr0 = __sse_xgetbv();
asm volatile("xsave (%0)" :: "r"(buf), "a"(xcr0 & 0xFFFFFFFF), "d"(xcr0 >> 32), "c"(0): "memory");
}
void __sse_xrstor(void* buf) {
std::uint64_t xcr0 = __sse_xgetbv();
asm volatile("xrstor (%0)" :: "r"(buf), "a"(xcr0 & 0xFFFFFFFF), "d"(xcr0 >> 32), "c"(0): "memory");
}
using namespace arch::x86_64::cpu;
void sse::init() {
uint32_t a,b,c,d;
__cpuid(1,0,&a,&b,&c,&d);
if(!__sse_is_initializied) {
if(c & SSE_XSAVE_SUPPORT) {
__cpuid(13,0,&a,&b,&c,&d);
__sse_legacy_save = 0;
__sse_size = c;
} else {
__sse_legacy_save = 1;
__sse_size = 512;
}
__sse_is_initializied = 1;
}
std::uint64_t cr4 = __sse_cr4_read();
cr4 |= DEFAULT_SSE_FLAGS;
std::uint64_t cr0 = __sse_cr0_read();
cr0 &= ~(1 << 2);
cr0 |= (1 << 1);
__sse_cr0_write(cr0);
std::uint64_t sse_control = 0;
__sse_cr4_write(cr4);
__cpuid(1,0,&a,&b,&c,&d);
if(c & SSE_XSAVE_SUPPORT)
cr4 |= SSE_XSAVE_CR4;
else
return;
__sse_cr4_write(cr4);
__cpuid(13,0,&a,&b,&c,&d);
sse_control |= SSE_CONTROL_DEFAULT;
SSE_CHECK_AND_SET((1 << 2));
SSE_CHECK_AND_SET((1 << 9));
SSE_CHECK_AND_SET((0b11 < 3));
SSE_CHECK_AND_SET((0b11 < 17))
SSE_CHECK_AND_SET((0b111 < 5));
__sse_xsetbv(sse_control);
}
std::uint64_t sse::size() {
return __sse_size;
}
void sse::save(std::uint8_t* buf) {
if(__sse_legacy_save)
asm volatile("fxsave (%0)" : : "r"(buf));
else
__sse_xsave(buf);
}
void sse::load(std::uint8_t* buf) {
if(__sse_legacy_save)
asm volatile("fxrstor (%0)" : : "r"(buf));
else
__sse_xrstor(buf);
}
|