From 5f62bd2dd863a6ee7199e84231ad958cb50e70ac Mon Sep 17 00:00:00 2001 From: Mehdi Cherti Date: Wed, 13 Sep 2023 14:57:24 +0200 Subject: [PATCH 1/2] add laion/CLIP-ViT-B-32-256x256-DataComp-s34B-b86K --- src/open_clip/pretrained.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/open_clip/pretrained.py b/src/open_clip/pretrained.py index 1465a2325..49439e4aa 100644 --- a/src/open_clip/pretrained.py +++ b/src/open_clip/pretrained.py @@ -100,6 +100,8 @@ def _pcfg(url='', hf_hub='', mean=None, std=None): commonpool_s_text_s13m_b4k=_pcfg(hf_hub='laion/CLIP-ViT-B-32-CommonPool.S.text-s13M-b4K/'), commonpool_s_basic_s13m_b4k=_pcfg(hf_hub='laion/CLIP-ViT-B-32-CommonPool.S.basic-s13M-b4K/'), commonpool_s_s13m_b4k=_pcfg(hf_hub='laion/CLIP-ViT-B-32-CommonPool.S-s13M-b4K/'), + # DataComp other models + datacomp_s34b_b86k=_pcfg(hf_hub='laion/CLIP-ViT-B-32-256x256-DataComp-s34B-b86K'), ) _VITB32_quickgelu = dict( From 0d66509e3f18249a33103708251a61ab5ef5270f Mon Sep 17 00:00:00 2001 From: Mehdi Cherti Date: Sat, 30 Sep 2023 07:57:41 +0200 Subject: [PATCH 2/2] add vitb32 256x256 datacomp 34B samples seen model --- src/open_clip/model_configs/ViT-B-32-256.json | 16 ++++++++++++++++ src/open_clip/pretrained.py | 7 +++++-- 2 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 src/open_clip/model_configs/ViT-B-32-256.json diff --git a/src/open_clip/model_configs/ViT-B-32-256.json b/src/open_clip/model_configs/ViT-B-32-256.json new file mode 100644 index 000000000..80a2597d8 --- /dev/null +++ b/src/open_clip/model_configs/ViT-B-32-256.json @@ -0,0 +1,16 @@ +{ + "embed_dim": 512, + "vision_cfg": { + "image_size": 256, + "layers": 12, + "width": 768, + "patch_size": 32 + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 512, + "heads": 8, + "layers": 12 + } +} diff --git a/src/open_clip/pretrained.py b/src/open_clip/pretrained.py index 8debbdfc0..1735cad40 100644 --- a/src/open_clip/pretrained.py +++ b/src/open_clip/pretrained.py @@ -102,8 +102,6 @@ def _pcfg(url='', hf_hub='', mean=None, std=None): commonpool_s_text_s13m_b4k=_pcfg(hf_hub='laion/CLIP-ViT-B-32-CommonPool.S.text-s13M-b4K/'), commonpool_s_basic_s13m_b4k=_pcfg(hf_hub='laion/CLIP-ViT-B-32-CommonPool.S.basic-s13M-b4K/'), commonpool_s_s13m_b4k=_pcfg(hf_hub='laion/CLIP-ViT-B-32-CommonPool.S-s13M-b4K/'), - # DataComp other models - datacomp_s34b_b86k=_pcfg(hf_hub='laion/CLIP-ViT-B-32-256x256-DataComp-s34B-b86K'), ) _VITB32_quickgelu = dict( @@ -115,6 +113,10 @@ def _pcfg(url='', hf_hub='', mean=None, std=None): "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_e32-46683a32.pt"), ) +_VITB32_256 = dict( + datacomp_s34b_b86k=_pcfg(hf_hub='laion/CLIP-ViT-B-32-256x256-DataComp-s34B-b86K/'), +) + _VITB16 = dict( openai=_pcfg( "https://openaipublic.azureedge.net/clip/models/5806e77cd80f8b59890b7e101eabd078d9fb84e6937f9e85e4ecb61988df416f/ViT-B-16.pt"), @@ -239,6 +241,7 @@ def _pcfg(url='', hf_hub='', mean=None, std=None): "RN50x16": _RN50x16, "RN50x64": _RN50x64, "ViT-B-32": _VITB32, + "ViT-B-32-256": _VITB32_256, "ViT-B-32-quickgelu": _VITB32_quickgelu, "ViT-B-16": _VITB16, "ViT-B-16-plus-240": _VITB16_PLUS_240,