Merge pull request #35 from airwzz999/main

Add How to run on macOS
X-LANCE · Aug 10, 2024 · 7aab4c1 · 7aab4c1
2 parents a710c55 + a2fd6ec
commit 7aab4c1
Show file tree

Hide file tree

Showing 17 changed files with 132 additions and 4 deletions.
diff --git a/.gitignore b/.gitignore
@@ -11,4 +11,5 @@ gfpgan
 enhance_face_test.py
 checkpoints
 tmp
-train_stage2.py
+train_stage2.py
+espnet
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/AniTalker.iml b/.idea/AniTalker.iml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/README.md b/README.md
@@ -95,6 +95,10 @@ ckpts/
 
 [Explanation of Parameters for demo.py](md_docs/config.md)
 
+## Run the demo on macOS
+
+[How to run on macOS](md_docs/run_on_macOS.md)
+
 
 ### Main Inference Scripts (Hubert, Better Result 💪) - Recommended
 

diff --git a/assets/models_huggingface.png b/assets/models_huggingface.png
diff --git a/assets/results_run_on_macOS.png b/assets/results_run_on_macOS.png
diff --git a/code/demo.py b/code/demo.py
@@ -205,7 +205,7 @@ def main(args):
             padding = np.tile(pose_obj[-1, :], (frame_end - pose_obj.shape[0], 1))
             pose_obj = np.vstack((pose_obj, padding))
 
-        pose_signal = torch.Tensor(pose_obj).unsqueeze(0).to(args.device) / 90 # 90 is for normalization here
+        pose_signal = torch.Tensor(pose_obj).unsqueeze(0).to(args.device)/ 90 # 90 is for normalization here
     else:
         yaw_signal = torch.zeros(1, frame_end, 1).to(args.device) + args.pose_yaw
         pitch_signal = torch.zeros(1, frame_end, 1).to(args.device) + args.pose_pitch
@@ -292,4 +292,13 @@ def main(args):
 
     args = parser.parse_args()
 
+    # macOS Config
+    # Check if MPS is available
+    if torch.backends.mps.is_available():
+        args.device = torch.device("mps")
+        print("MPS backend is available.")
+    # else:
+    #     args.device = torch.device("cpu")
+    #     print("MPS backend is not available. Using CPU instead.")
+
     main(args)
diff --git a/code/diffusion/base.py b/code/diffusion/base.py
@@ -960,7 +960,14 @@ def _extract_into_tensor(arr, timesteps, broadcast_shape):
                             dimension equal to the length of timesteps.
     :return: a tensor of shape [batch_size, 1, ...] where the shape has K dims.
     """
-    res = th.from_numpy(arr).to(device=timesteps.device)[timesteps].float()
+
+    if th.backends.mps.is_available():
+        arr = arr.astype(np.float32)
+        # Convert the numpy array to a tensor and then move to the device
+        res = th.from_numpy(arr).to(device=timesteps.device)[timesteps]
+    else:
+        res = th.from_numpy(arr).to(device=timesteps.device)[timesteps].float()
+
     while len(res.shape) < len(broadcast_shape):
         res = res[..., None]
     return res.expand(broadcast_shape)

diff --git a/code/networks/styledecoder.py b/code/networks/styledecoder.py
@@ -1,4 +1,6 @@
 import math
+# import os
+# os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
 import torch
 from torch import nn
 from torch.nn import functional as F

diff --git a/md_docs/run_on_macOS.md b/md_docs/run_on_macOS.md
@@ -0,0 +1,50 @@
+Successfully run on Macbook pro M3 Max (128GB + 8TB), Sonoma 14.6.1
+
+# 1. Project Download
+
+```
+git clone https://github.com/X-LANCE/AniTalker.git  
+```
+
+# 2. Dependencies Installation
+
+```
+conda create -n anitalker python==3.9.0 -c conda-forge 
+conda activate anitalker 
+conda install pytorch torchvision torchaudio -c pytorch 
+# install espnet 
+git clone https://github.com/espnet/espnet.git 
+cd espnet 
+pip install -e . 
+# install python_speech_features 
+pip install python_speech_features 
+conda install libffi
+pip install -r requirements_macOS.txt 
+```
+
+# 3. Assets Download
+
+```
+# Model  
+cd AniTalker 
+mkdir ckpts 
+Go to https://huggingface.co/taocode/anitalker_ckpts/tree/main  
+then download all six models in path ~/AniTalker/ckpts/ 
+
+# npy Go to https://huggingface.co/datasets/taocode/anitalker_hubert_feature_samples/blob/main/monalisa.npy  
+then download monalisa.npy in path ~/AniTalker/test_demos/audios_hubert/ 
+```
+![](../assets/models_huggingface.png)
+# 4. Run
+
+```
+ PYTORCH_ENABLE_MPS_FALLBACK=1 python ./code/demo.py \    --infer_type 'hubert_audio_only' \    --stage1_checkpoint_path 'ckpts/stage1.ckpt' \    --stage2_checkpoint_path 'ckpts/stage2_audio_only_hubert.ckpt' \    --test_image_path 'test_demos/portraits/monalisa.jpg' \    --test_audio_path 'test_demos/audios/monalisa.wav' \    --test_hubert_path 'test_demos/audios_hubert/monalisa.npy' \    --result_path 'outputs/monalisa_hubert/' 
+```
+![](../assets/results_run_on_macOS.png)
+
+# 5. Modify log
+
+- dependencies: requirements.txt
+- use mps insted of cuda
+- change float64 to float32
+- PYTORCH_ENABLE_MPS_FALLBACK=1
diff --git a/outputs/monalisa_hubert/monalisa-monalisa.mp4 b/outputs/monalisa_hubert/monalisa-monalisa.mp4
diff --git a/requirements.txt b/requirements.txt
@@ -2,7 +2,7 @@ pytorch-lightning==1.6.5
 torchmetrics==0.5.0
 torch==1.8.0
 torchvision
-scipy==1.5.4
+scipy
 numpy==1.19.5
 tqdm
 espnet==202301

diff --git a/requirements_macOS.txt b/requirements_macOS.txt
@@ -0,0 +1,7 @@
+pytorch-lightning==1.6.5
+torchmetrics==0.5.0
+scipy
+numpy==1.19.5
+tqdm
+moviepy
+transformers==4.19.2