Call __builtin_cpu_init() so that the checks work

robertdfrench · Jul 24, 2024 · 8e30fae · 8e30fae
1 parent a39883a
commit 8e30fae
Show file tree

Hide file tree

Showing 7 changed files with 62 additions and 4 deletions.
diff --git a/README.md b/README.md
@@ -15,7 +15,11 @@ argue that two longstanding design decisions in critical open source
 software are what made this attack possible: [linking OpenSSH against
 SystemD][biebl], and the existence of [GNU IFUNC][sourceware].
 
-
+**Before You Start**: Much of this discussion deals with the intricacies
+of dynamic linking on Linux. If you are not already intimately familiar
+with this, check out [`dynamic_linking.md`](dynamic_linking.md) for a
+quick primer.
+
 
 
 ## Quick Recap of CVE-2024-3094
@@ -46,9 +50,6 @@ flowchart TD
     G --> D
 ```
 
-Much of this discussion deals with the intricacies of dynamic linking on
-Linux. If you are not already intimately familiar with this, check out
-[`dynamic_linking.md`](dynamic_linking.md) for a quick primer.
 
 
 ## Why do Linux Distros modify OpenSSH?

diff --git a/code/cpu_demo.c b/code/cpu_demo.c
@@ -44,6 +44,9 @@ static void* resolve_cpu_info(void) {
         // This is to show that the resolver runs before main.
 	printf("Running the Resolver\n");
 
+	// Initialize the CPU feature checks
+	__builtin_cpu_init();
+
         // Here we check for CPU features to determine which function should be
         // run when the program calls the stub function "print_cpu_info".
 	if (__builtin_cpu_supports("avx2")) {

diff --git a/code/speed_demo_always.c b/code/speed_demo_always.c
@@ -27,6 +27,7 @@ void normal_incrementer() {
 // Global Offset Table. When this stub is invoked in the future, the PLT will
 // cause the program to jump directly to the selected function.
 void increment_counter() {
+        __builtin_cpu_init ();
 	if (__builtin_cpu_supports("avx2")) {
 		avx2_incrementer();
 	} else {

diff --git a/code/speed_demo_ifunc.c b/code/speed_demo_ifunc.c
@@ -24,6 +24,7 @@ void normal_incrementer() {
 // Select an "appropriate" incrementer based on CPU features. The actual choice
 // doesn't matter in this case, we just need something for the resolver to do.
 static void* resolver(void) {
+	__builtin_cpu_init();
 	if (__builtin_cpu_supports("avx2")) {
 		return fancy_incrementer;
 	} else {

diff --git a/code/speed_demo_pointer.c b/code/speed_demo_pointer.c
@@ -31,6 +31,7 @@ void normal_incrementer() {
 // Select an "appropriate" incrementer based on CPU features. The actual choice
 // doesn't matter in this case, we just need something for the resolver to do.
 void (*resolver(void))(void) {
+	__builtin_cpu_init();
 	if (__builtin_cpu_supports("avx2")) {
 		return fancy_incrementer;
 	} else {

diff --git a/code/speed_demo_upfront.c b/code/speed_demo_upfront.c
@@ -35,6 +35,7 @@ void increment_counter() {
 }
 
 void detect_cpu_features() {
+	__builtin_cpu_init();
 	cpu_has_avx2 = __builtin_cpu_supports("avx2");
 }
 

diff --git a/code/vector_add.c b/code/vector_add.c
@@ -0,0 +1,50 @@
+#include <smmintrin.h>
+#include <stdio.h>
+
+void vector_add_sse42(float *a, float *b, float *c, int n) {
+	int i;
+	printf("Implementation: sse4.2\n");
+	for (int i = 0; i <= n - 4; i += 4) {
+		__m128 va = _mm_loadu_ps(&a[i]);
+		__m128 vb = _mm_loadu_ps(&b[i]);
+		__m128 vc = _mm_add_ps(va, vb);
+		_mm_storeu_ps(&c[i], vc);
+	}
+	// Handle the remaining elements
+	for (; i < n; i++) {
+		c[i] = a[i] + b[i];
+	}
+}
+
+void vector_add_plain(float *a, float *b, float *c, int n) {
+	printf("Implementation: plain\n");
+	for (int i = 0; i < n; i++) {
+		c[i] = a[i] + b[i];
+	}
+}
+
+void vector_add(float*, float*, float*, int)
+	__attribute__((ifunc("resolve_vector_add")));
+
+void (*resolve_vector_add(void))(float*, float*, float*, int) {
+	__builtin_cpu_init();
+	if (__builtin_cpu_supports("sse4.2")) {
+		return vector_add_sse42;
+	} else {
+		return vector_add_plain;
+	}
+}
+
+int main() {
+	float a[16] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
+	float b[16] = {16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0};
+	float c[16];
+	vector_add(a, b, c, 16);
+
+	for (int i = 0; i < 16; i++) {
+		printf("%f ", c[i]);
+	}
+	printf("\n");
+
+	return 0;
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -35,6 +35,7 @@ void increment_counter() { @@
     }
     void detect_cpu_features() {
+    	__builtin_cpu_init();
     	cpu_has_avx2 = __builtin_cpu_supports("avx2");
     }
@@ Expand Down @@