Skip to content

Commit

Permalink
Make gpt_conf.py defaults match defaults of train.py
Browse files Browse the repository at this point in the history
We need this in order to ensure line 545 doesn't reoverride these when finetuning
  • Loading branch information
gkielian authored Sep 17, 2024
1 parent c9cc034 commit de054e7
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions gpt_conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
import json
@dataclass
class GPTConfig:
block_size: int = 1024
block_size: int = 256
vocab_size: int = 50304 # GPT-2 vocab_size of 50257, padded up to nearest multiple of 64 for efficiency
n_layer: int = 12
n_head: int = 12
n_kv_group: int = 12
n_embd: int = 768
n_layer: int = 6
n_head: int = 6
n_kv_group: int = 6
n_embd: int = 384

# Steering Vectors
## Where to intercept
Expand Down Expand Up @@ -156,7 +156,7 @@ class GPTConfig:
fire_outermost_sigma: bool = False

# Structuring Options, remember to compile the model
use_post_ln: bool = True
use_post_ln: bool = False

# Layernorm Alternatives and Options
norm_variant_attn: str = "rmsnorm"
Expand Down

0 comments on commit de054e7

Please sign in to comment.