diff --git a/src/open_clip/model_configs/ViTamin-L-384.json b/src/open_clip/model_configs/ViTamin-L-384.json new file mode 100644 index 000000000..1278d8393 --- /dev/null +++ b/src/open_clip/model_configs/ViTamin-L-384.json @@ -0,0 +1,20 @@ +{ + "embed_dim": 768, + "vision_cfg": { + "timm_model_name": "vitamin_large_384", + "timm_model_pretrained": false, + "timm_pool": "", + "timm_proj": "linear", + "timm_drop": 0.0, + "timm_drop_path": 0.1, + "image_size": 384 + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 768, + "heads": 12, + "layers": 12 + }, + "custom_text": true +} diff --git a/src/open_clip/model_configs/ViTamin-L2-384.json b/src/open_clip/model_configs/ViTamin-L2-384.json new file mode 100644 index 000000000..cc0faaae7 --- /dev/null +++ b/src/open_clip/model_configs/ViTamin-L2-384.json @@ -0,0 +1,20 @@ +{ + "embed_dim": 1024, + "vision_cfg": { + "timm_model_name": "vitamin_large2_384", + "timm_model_pretrained": false, + "timm_pool": "", + "timm_proj": "linear", + "timm_drop": 0.0, + "timm_drop_path": 0.1, + "image_size": 384 + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 1024, + "heads": 16, + "layers": 24 + }, + "custom_text": true +}