Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

General performance and readability optimisation pass #16

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.16)
# Set the project name
project(ichida-algo)

set(CMAKE_C_FLAGS "-O3 -march=native -ffast-math -funroll-loops -Wall -Wextra")
set(CMAKE_C_FLAGS "-O3 -march=native -ffast-math -mfma -funroll-loops -Wall -Wextra")
set(CMAKE_C_STANDARD 99)
set(CMAKE_C_STANDARD_REQUIRED True)
set(CMAKE_VERBOSE_MAKEFILE ON)
Expand Down
10 changes: 5 additions & 5 deletions include/matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
typedef struct {
int rows;
int cols;
float* data;
float* data __attribute__((aligned(32)));
} matrix;

matrix* new_matrix(int rows, int cols);

void matrix_mul(const matrix* a, const matrix* b, const matrix* result);
void matrix_mul(const float* inputs, const float* weights, float* __restrict__ results, int res_rows, int w_cols);

void matrix_add(matrix* a, const matrix* b);
void matrix_add_inplace_1d(const float* src, float* __restrict__ dest, int rows);

void relu(matrix* a);
void relu(float* dest, int rows);

void softmax(matrix* a);
void softmax(float* dest, int rows);
22 changes: 22 additions & 0 deletions script/convertbin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import os
import struct

def read_and_convert_file(file_path, output_file):
with open(file_path, 'r') as file:
content = file.read().strip()
float_values = [float(x) for x in content.split(',')]

with open(output_file, 'wb') as bin_file:
for value in float_values:
bin_file.write(struct.pack('f', value))

if __name__ == "__main__":
directory = "./txttensors" # Replace with the path to your directory

for i in range(1, 53): # Assuming files are named 01out.txt to 52out.txt
file_name = f"{i:02d}out.txt"
file_path = os.path.join(directory, file_name)
output_file = os.path.join(directory, f"{i:02d}out.bin")

read_and_convert_file(file_path, output_file)
print(f"Binary file saved to {output_file}")
152 changes: 84 additions & 68 deletions src/main.c
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
#include "../include/matrix.h"

#define _DEFAULT_SOURCE // Fixes
#include <dirent.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>

#define NUM_LAYERS 7
#define TENSOR_SIZE 225

typedef uint8_t u8;
typedef float f32;

matrix* weights[NUM_LAYERS];
matrix* biases[NUM_LAYERS];

char letters[52] = {'A', 'a', 'B', 'b', 'C', 'c', 'D', 'd', 'E', 'e', 'F', 'f', 'G', 'g', 'H', 'h', 'I', 'i',
'J', 'j', 'K', 'k', 'L', 'l', 'M', 'm', 'N', 'n', 'O', 'o', 'P', 'p', 'Q', 'q', 'R', 'r',
'S', 's', 'T', 't', 'U', 'u', 'V', 'v', 'W', 'w', 'X', 'x', 'Y', 'y', 'Z', 'z'};
static char letters[52] = {'A', 'a', 'B', 'b', 'C', 'c', 'D', 'd', 'E', 'e', 'F', 'f', 'G', 'g', 'H', 'h', 'I', 'i',
'J', 'j', 'K', 'k', 'L', 'l', 'M', 'm', 'N', 'n', 'O', 'o', 'P', 'p', 'Q', 'q', 'R', 'r',
'S', 's', 'T', 't', 'U', 'u', 'V', 'v', 'W', 'w', 'X', 'x', 'Y', 'y', 'Z', 'z'};

void process_weights_str(char* line, int layer) {
char* token;
Expand Down Expand Up @@ -65,45 +72,25 @@ void read_model(const char* file_name) {
fclose(file);
}

void read_tensor(matrix* a, const char* fileName) {
FILE* file = fopen(fileName, "r");
char* line = NULL;
size_t len = 0;

getline(&line, &len, file);
char* token;
float value;
const char* delimiter = ",";
token = strtok(line, delimiter);

for (int i = 0; i < 225; i++) {
value = strtof(token, NULL);
(a->data)[i] = value;
token = strtok(NULL, delimiter);
}
free(line);
fclose(file);
}

void propagate_fwd(const matrix* weights, const matrix* input_layer, matrix* output_layer, const matrix* biases) {
matrix_mul(weights, input_layer, output_layer);
matrix_add(output_layer, biases);
void propagate_fwd(const matrix* weights, const matrix* inputs, matrix* outputs, const matrix* biases) {
matrix_mul(inputs->data, weights->data, outputs->data, outputs->rows, weights->cols);
matrix_add_inplace_1d(biases->data, inputs->data, inputs->rows);
}

// Get result from output layer
int get_max(matrix* a) {
u8 get_max(float* src, int rows) {
int idx = 0;
float res = (a->data)[0];
for (int i = 0; i < a->rows; i++) {
if (res < (a->data)[i]) {
res = (a->data)[i];
float res = src[0];
for (int i = 0; i < rows; i++) {
if (res < src[i]) {
res = src[i];
idx = i;
}
}
return idx;
}

int infer(matrix* input) {
u8 infer(matrix* input) {
matrix* mdl_layers[NUM_LAYERS];
mdl_layers[0] = new_matrix(98, 1);
mdl_layers[1] = new_matrix(65, 1);
Expand All @@ -114,36 +101,54 @@ int infer(matrix* input) {
mdl_layers[6] = new_matrix(52, 1);

propagate_fwd(weights[0], input, mdl_layers[0], biases[0]);
relu(mdl_layers[0]);
relu(mdl_layers[0]->data, mdl_layers[0]->rows);

propagate_fwd(weights[1], mdl_layers[0], mdl_layers[1], biases[1]);
relu(mdl_layers[1]);
relu(mdl_layers[1]->data, mdl_layers[1]->rows);

propagate_fwd(weights[2], mdl_layers[1], mdl_layers[2], biases[2]);
relu(mdl_layers[2]);
relu(mdl_layers[2]->data, mdl_layers[2]->rows);

propagate_fwd(weights[3], mdl_layers[2], mdl_layers[3], biases[3]);
relu(mdl_layers[3]);
relu(mdl_layers[3]->data, mdl_layers[3]->rows);

propagate_fwd(weights[4], mdl_layers[3], mdl_layers[4], biases[4]);
relu(mdl_layers[4]);
relu(mdl_layers[4]->data, mdl_layers[4]->rows);

propagate_fwd(weights[5], mdl_layers[4], mdl_layers[5], biases[5]);
relu(mdl_layers[5]);
relu(mdl_layers[5]->data, mdl_layers[5]->rows);

propagate_fwd(weights[6], mdl_layers[5], mdl_layers[6], biases[6]);
softmax(mdl_layers[6]);
softmax(mdl_layers[6]->data, mdl_layers[6]->rows);

return get_max(mdl_layers[6]);
return get_max(mdl_layers[6]->data, mdl_layers[6]->rows);
}

int file_count(const char* dir_path) {
struct dirent* entry;
DIR* dir = opendir(dir_path);

// Count inputs
int num_inputs = 0;
while ((entry = readdir(dir)) != NULL) {
if (entry->d_type == DT_REG)
num_inputs++;
}

return num_inputs;
}

int main(int argc, char* argv[]) {
if (argc < 3) {
printf("Not enough arguments.");
printf("Not enough arguments. Usage: speed_cpu <path_to_model.txt> <tensors_dir/>");
return EXIT_FAILURE;
}

// Start timing
struct timeval stop, start;
gettimeofday(&start, NULL);

// TODO: find a way to load static weights and biases
// Load model (The memory of those code should be initialize during compile time to enchance the speed)
// Dimensions of target model are hardcoded
weights[0] = new_matrix(98, 225);
weights[1] = new_matrix(65, 98);
weights[2] = new_matrix(50, 65);
Expand All @@ -162,48 +167,59 @@ int main(int argc, char* argv[]) {

read_model(argv[1]);

// Run program
// Holding place for input tensors
matrix* input = new_matrix(225, 1);

// ---------------------------------------------------------------------------------- Read from dir

const char* directory_path = argv[2];
struct dirent* entry;
DIR* dir = opendir(directory_path);
int input_count = file_count(directory_path);
printf("Number of input tensors: %d\n", input_count);

matrix* input = new_matrix(225, 1);
// +1 because file idx starts at 1
u8* results = (u8*)malloc(input_count * sizeof(u8));

// Read and process inputs
char* file_name = (char*)malloc((100) * sizeof(char));
char* file_num_str = (char*)malloc((100) * sizeof(char));
__attribute__((aligned(32))) f32* tensors = (f32*)malloc(sizeof(f32) * TENSOR_SIZE * input_count);
nhatdongdang marked this conversation as resolved.
Show resolved Hide resolved

int file_num;
int size = 0;
while ((entry = readdir(dir)) != NULL) {
if (entry->d_type == DT_REG) {
size++;
}
}
char* file_path = (char*)malloc((256) * sizeof(char));
char* file_num_str = (char*)malloc((50) * sizeof(char));

int* results = (int*)malloc((size + 1) * sizeof(int));
dir = opendir(directory_path);
// Read all tensors into tensors arr
DIR* dir = opendir(directory_path);
struct dirent* entry;
while ((entry = readdir(dir)) != NULL) {
if (entry->d_type == DT_REG) {
// Get input number
strcpy(file_num_str, entry->d_name);
file_num_str[strlen(entry->d_name) - 7] = '\0';
file_num = atoi(entry->d_name);
strcpy(file_name, directory_path);
strcat(file_name, "/");
strcat(file_name, entry->d_name);
read_tensor(input, file_name);
results[file_num] = infer(input);
int file_num = atoi(entry->d_name);

// Get full path to file
strcpy(file_path, directory_path);
strcat(file_path, "/");
strcat(file_path, entry->d_name);

// Read tensor into full array
FILE* file = fopen(file_path, "rb");

// Offset into correct sector of array
fread(tensors + ((file_num - 1) * TENSOR_SIZE), TENSOR_SIZE, sizeof(f32), file);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current method for reading the input using a binary blob is causing issues with the CI tests. Please create a separate function specifically for reading the input using a binary blob. I recommend creating a corresponding target in the Makefile for this new method of reading input.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rather change the CI, since we definitely aren't sticking with txt for long

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The CI works with any inputs located in the ./tensors folder. To ensure compatibility with the CI, we need to decide whether to convert all files in ./tensors to binary blobs.

fclose(file);
}
}

free(file_name);
free(file_path);
free(file_num_str);
closedir(dir);

for (int i = 0; i < input_count; i++) {
input->data = tensors + (i * TENSOR_SIZE);
results[i] = infer(input);
}

// Write to csv file
FILE* csv_file = fopen("results.csv", "w+");
fprintf(csv_file, "image_number, guess\n");
for (int i = 1; i <= size; i++) {
for (int i = 0; i <= input_count; i++) {
rozukke marked this conversation as resolved.
Show resolved Hide resolved
fprintf(csv_file, "%d, %c\n", i, letters[results[i]]);
}
fclose(csv_file);
Expand Down
Loading