summaryrefslogtreecommitdiff
path: root/kernel/src/arch/x86_64/cpu/sse.cpp
blob: d36e2455bfb47368020ae8ca5586d50c1e5b69b7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120

#include <cstdint>
#include <arch/x86_64/cpu/sse.hpp>

#include <etc/assembly.hpp>
#include <etc/logging.hpp>

std::uint64_t __sse_size = 0;
char __sse_is_initializied = 0;
char __sse_legacy_save = 0;

std::uint64_t __sse_cr4_read() {
  uint64_t val;
  asm volatile("mov %%cr4, %0" : "=r"(val));
  return val;
}

void __sse_cr4_write(std::uint64_t val) {
  asm volatile("mov %0, %%cr4" : : "r"(val) : "memory");
}

std::uint64_t __sse_cr0_read() {
  uint64_t val;
  asm volatile("mov %%cr0, %0" : "=r"(val));
  return val;
}

void __sse_cr0_write(std::uint64_t val) {
  asm volatile("mov %0, %%cr0" : : "r"(val) : "memory");
}


void __sse_xsetbv(std::uint64_t val) {
    asm volatile("xsetbv" : : "a"(val), "d"(val >> 32),"c"(0) : "memory");
}

std::uint64_t __sse_xgetbv() {
    uint32_t a,d;
    asm volatile("xgetbv" : "=a"(a),"=d"(d) : "c"(0) : "memory");
    return ((std::uint64_t)d << 32) | a;
}

void __sse_xsave(void* buf) {
    std::uint64_t xcr0 = __sse_xgetbv();
    asm volatile("xsave (%0)" :: "r"(buf), "a"(xcr0 & 0xFFFFFFFF), "d"(xcr0 >> 32), "c"(0): "memory");
}

void __sse_xrstor(void* buf) {
    std::uint64_t xcr0 = __sse_xgetbv();
    asm volatile("xrstor (%0)" :: "r"(buf), "a"(xcr0 & 0xFFFFFFFF), "d"(xcr0 >> 32), "c"(0): "memory");
}

using namespace arch::x86_64::cpu;

void sse::init() {
    uint32_t a,b,c,d;
    __cpuid(1,0,&a,&b,&c,&d);
    if(!__sse_is_initializied) {
        if(c & SSE_XSAVE_SUPPORT) {
            __cpuid(13,0,&a,&b,&c,&d);
            __sse_legacy_save = 0;
            __sse_size = c;
        } else {
            __sse_legacy_save = 1;
            __sse_size = 512;
        }
        __sse_is_initializied = 1;
    }
    std::uint64_t cr4 = __sse_cr4_read();

    cr4 |= DEFAULT_SSE_FLAGS;

    std::uint64_t cr0 = __sse_cr0_read();

    cr0 &= ~(1 << 2);
    cr0 |= (1 << 1);

    __sse_cr0_write(cr0);

    std::uint64_t sse_control = 0;

    __sse_cr4_write(cr4);

    __cpuid(1,0,&a,&b,&c,&d);
    if(c & SSE_XSAVE_SUPPORT)
        cr4 |= SSE_XSAVE_CR4;
    else
        return;

    __sse_cr4_write(cr4);
    
    __cpuid(13,0,&a,&b,&c,&d);

    sse_control |= SSE_CONTROL_DEFAULT;
    SSE_CHECK_AND_SET((1 << 2));
    SSE_CHECK_AND_SET((1 << 9));
    SSE_CHECK_AND_SET((0b11 < 3));
    SSE_CHECK_AND_SET((0b11 < 17))
    SSE_CHECK_AND_SET((0b111 < 5));
    
    __sse_xsetbv(sse_control);
}

std::uint64_t sse::size() {
    return __sse_size;
}

void sse::save(std::uint8_t* buf) {
    if(__sse_legacy_save)
        asm volatile("fxsave (%0)" : : "r"(buf));
    else
        __sse_xsave(buf);
}

void sse::load(std::uint8_t* buf) {
    if(__sse_legacy_save)
        asm volatile("fxrstor (%0)" : : "r"(buf));
    else
        __sse_xrstor(buf);
}