[chore] bump to 0.2.0 (#22)

gudgud96 · Oct 17, 2023 · 47dbd5c · 47dbd5c
1 parent 3b34195
commit 47dbd5c
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 8 deletions.
diff --git a/README.md b/README.md
@@ -4,7 +4,8 @@ A lightweight library of Frechet Audio Distance calculation.
 
 Currently, we support embedding from:
 - `VGGish` by [S. Hershey et al.](https://arxiv.org/abs/1812.08466)
-- `PANN` by [Kong et al.](https://arxiv.org/abs/1912.10211).
+- `PANN` by [Kong et al.](https://arxiv.org/abs/1912.10211)
+- `CLAP` by [Wu et al.](https://arxiv.org/abs/2211.06687)
 
 ### Installation
 
@@ -18,21 +19,35 @@ from frechet_audio_distance import FrechetAudioDistance
 # to use `vggish`
 frechet = FrechetAudioDistance(
     model_name="vggish",
+    sample_rate=16000,
     use_pca=False, 
     use_activation=False,
     verbose=False
 )
 # to use `PANN`
 frechet = FrechetAudioDistance(
     model_name="pann",
+    sample_rate=16000,
     use_pca=False, 
     use_activation=False,
     verbose=False
 )
+# to use `CLAP`
+frechet = FrechetAudioDistance(
+    model_name="clap",
+    sample_rate=48000,
+    submodel_name="630k-audioset",  # for CLAP only
+    verbose=False,
+    enable_fusion=False,            # for CLAP only
+)
 fad_score = frechet.score("/path/to/background/set", "/path/to/eval/set", dtype="float32")
 
 ```
 
+You can also have a look at [this notebook](https://github.com/gudgud96/frechet-audio-distance/blob/main/test/test_all.ipynb) for a better understanding of how each model is used.
+
+### Save pre-computed embeddings
+
 When computing the Frechet Audio Distance, you can choose to save the embeddings for future use. 
 
 This capability not only ensures consistency across evaluations but can also significantly reduce computation time, especially if you're evaluating multiple times using the same dataset.

diff --git a/frechet_audio_distance/fad.py b/frechet_audio_distance/fad.py
@@ -19,11 +19,8 @@
 
 from .models.pann import Cnn14, Cnn14_8k, Cnn14_16k
 
-# SAMPLE_RATE = 16000
-
 
 def load_audio_task(fname, sample_rate, dtype="float32"):
-    # print("LOAD AUDIO TASK")
     if dtype not in ['float64', 'float32', 'int32', 'int16']:
         raise ValueError(f"dtype not supported: {dtype}")
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "frechet_audio_distance"
-version = "0.1.2"
+version = "0.2.0"
 authors = [
   { name="Hao Hao Tan", email="helloharry66@gmail.com" },
 ]
@@ -18,13 +18,16 @@ classifiers = [
     "Operating System :: OS Independent",
 ]
 dependencies = [
-  'numpy',
+  'numpy==1.23.4',
   'torch',
-  'scipy',
+  'scipy==1.10.1',
   'tqdm',
   'soundfile',
   'resampy',
-  'torchlibrosa'
+  'torchlibrosa',
+  'laion_clap',
+  'transformers<=4.30.2',
+  'torchaudio',
 ]
 
 [project.urls]