Skip to content

Commit

Permalink
Rename gpt2 backward
Browse files Browse the repository at this point in the history
  • Loading branch information
gordicaleksa committed Jun 22, 2024
1 parent c6e21c7 commit 71f10e7
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 6 deletions.
2 changes: 1 addition & 1 deletion profile_gpt2.cu
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ int main(int argc, char *argv[]) {
// do a training step
gpt2_forward(&model, x, y, B, T);
gpt2_zero_grad(&model);
gpt2_backward(&model, x, true);
gpt2_backward_and_reduce(&model, x, true);
gpt2_update(&model, 1e-4f, 0.9f, 0.999f, 1e-8f, 0.0f, 1.f, 1, &multi_gpu_config);
cudaCheck(cudaDeviceSynchronize()); // finish all CUDA work to get correct precise timings

Expand Down
6 changes: 3 additions & 3 deletions test_gpt2.cu
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ int main(int argc, char *argv[]) {
clock_gettime(CLOCK_MONOTONIC, &start);
gpt2_forward(&model, x, y, B, T);
gpt2_zero_grad(&model);
gpt2_backward(&model, x, true);
gpt2_backward_and_reduce(&model, x, true);
clock_gettime(CLOCK_MONOTONIC, &end);
double time_elapsed_s = (end.tv_sec - start.tv_sec) + (end.tv_nsec - start.tv_nsec) / 1e9;

Expand Down Expand Up @@ -328,7 +328,7 @@ int main(int argc, char *argv[]) {
dataloader_next_batch(&loader);
gpt2_forward(&model, loader.inputs, loader.targets, B, T);
gpt2_zero_grad(&model);
gpt2_backward(&model, loader.inputs, true);
gpt2_backward_and_reduce(&model, loader.inputs, true);
gpt2_update(&model, 1e-4f, 0.9f, 0.95f, 1e-8f, 0.0f, 1.0f, step+11, &multi_gpu_config);
losses[step] = model.mean_loss;
tokens[step] = loader.inputs[0];
Expand All @@ -343,7 +343,7 @@ int main(int argc, char *argv[]) {
dataloader_next_batch(&loader);
gpt2_forward(&model, loader.inputs, loader.targets, B, T);
gpt2_zero_grad(&model);
gpt2_backward(&model, loader.inputs, true);
gpt2_backward_and_reduce(&model, loader.inputs, true);
gpt2_update(&model, 1e-4f, 0.9f, 0.95f, 1e-8f, 0.0f, 1.0f, step+11, &multi_gpu_config);

if(loader.inputs[0] != tokens[step]) {
Expand Down
4 changes: 2 additions & 2 deletions train_gpt2.cu
Original file line number Diff line number Diff line change
Expand Up @@ -722,7 +722,7 @@ void gpt2_zero_grad(GPT2 *model) {
cudaCheck(cudaDeviceSynchronize());
}

void gpt2_backward(GPT2 *model, int* inputs, bool last_step) {
void gpt2_backward_and_reduce(GPT2 *model, int* inputs, bool last_step) {
NVTX_RANGE_FN();
// double check we forwarded previously, with targets
if (model->mean_loss == -1.0f) {
Expand Down Expand Up @@ -1702,7 +1702,7 @@ int main(int argc, char *argv[]) {
gpt2_forward(&model, train_loader.inputs, train_loader.targets, B, T, grad_accum_steps);
lossf += model.mean_loss; // the mean_loss was normalized by grad_accum_steps inside gpt2_forward
// backward pass. all model params accumulate gradients with += inside this inner loop
gpt2_backward(&model, train_loader.inputs, micro_step == grad_accum_steps - 1);
gpt2_backward_and_reduce(&model, train_loader.inputs, micro_step == grad_accum_steps - 1);
}
// override the mean loss, accounting for the gradient accumulation loop
// this is esp important to do here in multigpu update below, where model.mean_loss gets allreduced
Expand Down

0 comments on commit 71f10e7

Please sign in to comment.