Skip to content

Commit

Permalink
expose a few more vit hparams
Browse files Browse the repository at this point in the history
  • Loading branch information
lucidrains committed Jun 20, 2024
1 parent 6dcc12b commit 2f95258
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 5 deletions.
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ from titok_pytorch import TiTokTokenizer
images = torch.randn(2, 3, 256, 256)

titok = TiTokTokenizer(
dim = 512,
dim = 1024,
patch_size = 32,
num_latent_tokens = 32, # they claim only 32 tokens needed
codebook_size = 8192 # codebook size 8192
codebook_size = 4096 # codebook size 4096
)

loss = titok(images)
Expand All @@ -42,8 +43,6 @@ assert recon_images.shape == images.shape
## Todo

- [ ] add multi-resolution patches
- [ ] add lfq
- [ ] support video

## Citations

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "titok-pytorch"
version = "0.0.4"
version = "0.0.5"
description = "TiTok - Pytorch"
authors = [
{ name = "Phil Wang", email = "lucidrains@gmail.com" }
Expand Down
8 changes: 8 additions & 0 deletions titok_pytorch/titok.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,11 @@ def __init__(
channels = 3,
num_latent_tokens = 32,
enc_depth = 6,
enc_heads = 8,
enc_dim_head = 64,
dec_depth = 6,
dec_heads = 8,
dec_dim_head = 64,
codebook_size = 8192,
enc_kwargs: dict = dict(),
dec_kwargs: dict = dict(),
Expand Down Expand Up @@ -84,6 +88,8 @@ def __init__(
self.encoder = Encoder(
dim = dim,
depth = enc_depth,
heads = enc_heads,
attn_dim_head = enc_dim_head,
**enc_kwargs
)

Expand All @@ -97,6 +103,8 @@ def __init__(
self.decoder = Encoder(
dim = dim,
depth = dec_depth,
heads = dec_heads,
attn_dim_head = dec_dim_head,
**dec_kwargs
)

Expand Down

0 comments on commit 2f95258

Please sign in to comment.