diff --git a/src/open_clip/model_configs/ViT-B-32-256.json b/src/open_clip/model_configs/ViT-B-32-256.json new file mode 100644 index 000000000..80a2597d8 --- /dev/null +++ b/src/open_clip/model_configs/ViT-B-32-256.json @@ -0,0 +1,16 @@ +{ + "embed_dim": 512, + "vision_cfg": { + "image_size": 256, + "layers": 12, + "width": 768, + "patch_size": 32 + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 512, + "heads": 8, + "layers": 12 + } +} diff --git a/src/open_clip/pretrained.py b/src/open_clip/pretrained.py index 728755a2e..1735cad40 100644 --- a/src/open_clip/pretrained.py +++ b/src/open_clip/pretrained.py @@ -113,6 +113,10 @@ def _pcfg(url='', hf_hub='', mean=None, std=None): "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_e32-46683a32.pt"), ) +_VITB32_256 = dict( + datacomp_s34b_b86k=_pcfg(hf_hub='laion/CLIP-ViT-B-32-256x256-DataComp-s34B-b86K/'), +) + _VITB16 = dict( openai=_pcfg( "https://openaipublic.azureedge.net/clip/models/5806e77cd80f8b59890b7e101eabd078d9fb84e6937f9e85e4ecb61988df416f/ViT-B-16.pt"), @@ -237,6 +241,7 @@ def _pcfg(url='', hf_hub='', mean=None, std=None): "RN50x16": _RN50x16, "RN50x64": _RN50x64, "ViT-B-32": _VITB32, + "ViT-B-32-256": _VITB32_256, "ViT-B-32-quickgelu": _VITB32_quickgelu, "ViT-B-16": _VITB16, "ViT-B-16-plus-240": _VITB16_PLUS_240,