Skip to content

Commit

Permalink
Call __builtin_cpu_init() so that the checks work
Browse files Browse the repository at this point in the history
  • Loading branch information
robertdfrench committed Jul 24, 2024
1 parent a39883a commit 8e30fae
Show file tree
Hide file tree
Showing 7 changed files with 62 additions and 4 deletions.
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@ argue that two longstanding design decisions in critical open source
software are what made this attack possible: [linking OpenSSH against
SystemD][biebl], and the existence of [GNU IFUNC][sourceware].


**Before You Start**: Much of this discussion deals with the intricacies
of dynamic linking on Linux. If you are not already intimately familiar
with this, check out [`dynamic_linking.md`](dynamic_linking.md) for a
quick primer.



## Quick Recap of CVE-2024-3094
Expand Down Expand Up @@ -46,9 +50,6 @@ flowchart TD
G --> D
```

Much of this discussion deals with the intricacies of dynamic linking on
Linux. If you are not already intimately familiar with this, check out
[`dynamic_linking.md`](dynamic_linking.md) for a quick primer.


## Why do Linux Distros modify OpenSSH?
Expand Down
3 changes: 3 additions & 0 deletions code/cpu_demo.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ static void* resolve_cpu_info(void) {
// This is to show that the resolver runs before main.
printf("Running the Resolver\n");

// Initialize the CPU feature checks
__builtin_cpu_init();

// Here we check for CPU features to determine which function should be
// run when the program calls the stub function "print_cpu_info".
if (__builtin_cpu_supports("avx2")) {
Expand Down
1 change: 1 addition & 0 deletions code/speed_demo_always.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ void normal_incrementer() {
// Global Offset Table. When this stub is invoked in the future, the PLT will
// cause the program to jump directly to the selected function.
void increment_counter() {
__builtin_cpu_init ();
if (__builtin_cpu_supports("avx2")) {
avx2_incrementer();
} else {
Expand Down
1 change: 1 addition & 0 deletions code/speed_demo_ifunc.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ void normal_incrementer() {
// Select an "appropriate" incrementer based on CPU features. The actual choice
// doesn't matter in this case, we just need something for the resolver to do.
static void* resolver(void) {
__builtin_cpu_init();
if (__builtin_cpu_supports("avx2")) {
return fancy_incrementer;
} else {
Expand Down
1 change: 1 addition & 0 deletions code/speed_demo_pointer.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ void normal_incrementer() {
// Select an "appropriate" incrementer based on CPU features. The actual choice
// doesn't matter in this case, we just need something for the resolver to do.
void (*resolver(void))(void) {
__builtin_cpu_init();
if (__builtin_cpu_supports("avx2")) {
return fancy_incrementer;
} else {
Expand Down
1 change: 1 addition & 0 deletions code/speed_demo_upfront.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ void increment_counter() {
}

void detect_cpu_features() {
__builtin_cpu_init();
cpu_has_avx2 = __builtin_cpu_supports("avx2");
}

Expand Down
50 changes: 50 additions & 0 deletions code/vector_add.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#include <smmintrin.h>
#include <stdio.h>

void vector_add_sse42(float *a, float *b, float *c, int n) {
int i;
printf("Implementation: sse4.2\n");
for (int i = 0; i <= n - 4; i += 4) {
__m128 va = _mm_loadu_ps(&a[i]);
__m128 vb = _mm_loadu_ps(&b[i]);
__m128 vc = _mm_add_ps(va, vb);
_mm_storeu_ps(&c[i], vc);
}
// Handle the remaining elements
for (; i < n; i++) {
c[i] = a[i] + b[i];
}
}

void vector_add_plain(float *a, float *b, float *c, int n) {
printf("Implementation: plain\n");
for (int i = 0; i < n; i++) {
c[i] = a[i] + b[i];
}
}

void vector_add(float*, float*, float*, int)
__attribute__((ifunc("resolve_vector_add")));

void (*resolve_vector_add(void))(float*, float*, float*, int) {
__builtin_cpu_init();
if (__builtin_cpu_supports("sse4.2")) {
return vector_add_sse42;
} else {
return vector_add_plain;
}
}

int main() {
float a[16] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
float b[16] = {16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0};
float c[16];
vector_add(a, b, c, 16);

for (int i = 0; i < 16; i++) {
printf("%f ", c[i]);
}
printf("\n");

return 0;
}

0 comments on commit 8e30fae

Please sign in to comment.