Skip to content

Commit

Permalink
address #282
Browse files Browse the repository at this point in the history
  • Loading branch information
lucidrains committed Oct 28, 2024
1 parent eeed503 commit f0261da
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
setup(
name = 'x-transformers',
packages = find_packages(exclude=['examples']),
version = '1.40.9',
version = '1.40.10',
license='MIT',
description = 'X-Transformers - Pytorch',
author = 'Phil Wang',
Expand Down
2 changes: 1 addition & 1 deletion x_transformers/x_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1019,7 +1019,7 @@ def __init__(
self.qk_norm_q_scale = self.qk_norm_k_scale = 1
if qk_norm and qk_norm_dim_scale:
self.qk_norm_q_scale = nn.Parameter(torch.ones(heads, 1, dim_head))
self.qk_norm_k_scale = nn.Parameter(torch.ones(heads, 1, dim_head))
self.qk_norm_k_scale = nn.Parameter(torch.ones(kv_heads, 1, dim_head))

assert (not qk_norm) or divisible_by(dim_head, qk_norm_groups), 'dimension per attention head must be divisible by the qk norm groups'
assert not (qk_norm and (dim_head // qk_norm_groups) <= 2), 'the group dimension may be too small (2 was too small in my tests, but 4 still works, surprisingly)'
Expand Down

0 comments on commit f0261da

Please sign in to comment.