Skip to content

Commit

Permalink
Merge pull request #29 from nhatdongdang/feat/add-arg
Browse files Browse the repository at this point in the history
Add exe arg
  • Loading branch information
johnathanchann authored Jul 5, 2024
2 parents 31e4844 + 3adde2f commit 9759b85
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 8 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ build:
cp build/speed_gpu ./

run: build
./speed_gpu ./weights_and_biases.txt ./tensors
./speed_gpu ./weights_and_biases.txt ./tensors 100000

test: build
./speed_gpu ./weights_and_biases.txt ./tensors
./speed_gpu ./weights_and_biases.txt ./tensors 100000
mv ./results.csv ./test
python3 ./test/verify_csv.py
11 changes: 5 additions & 6 deletions src/main.cu
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,7 @@ __global__ void infer(matrix** d_inputs, int* d_results, matrix** d_weights, mat
int num_threads = blockDim.x * gridDim.x;
int thread_idx = (blockIdx.x * blockDim.x + threadIdx.x);

if (thread_idx > it_per_input)
return;
if (thread_idx > it_per_input) return;

matrix* input = d_inputs[in_num];

Expand Down Expand Up @@ -156,9 +155,8 @@ __global__ void infer(matrix** d_inputs, int* d_results, matrix** d_weights, mat
#define IT_PER_IN 1000000

int main(int argc, char* argv[]) {

if (argc < 3) {
printf("Not enough arguments.");
if (argc < 4) {
printf("Not enough arguments. Usage: speed_cpu <path_to_model.txt> <tensors_dir/> <number_of_inferences>\n");
return EXIT_FAILURE;
}

Expand Down Expand Up @@ -236,8 +234,9 @@ int main(int argc, char* argv[]) {

cudaMemset(d_results, 0, sizeof(int) * input_count);

int iter_per_in = atoi(argv[3]);
for (int i = 0; i < input_count; i++) {
infer<<<108, 69>>>(d_inputs, d_results, d_weights, d_biases, IT_PER_IN, i);
infer<<<108, 69>>>(d_inputs, d_results, d_weights, d_biases, iter_per_in, i);
}

cudaDeviceSynchronize();
Expand Down

0 comments on commit 9759b85

Please sign in to comment.