Skip to content

Commit

Permalink
Merge branch 'main' into export_wordlist_fix
Browse files Browse the repository at this point in the history
  • Loading branch information
JimmyZhang12 committed May 1, 2024
2 parents c743937 + f658b6f commit 7678ff4
Show file tree
Hide file tree
Showing 40 changed files with 4,278 additions and 417 deletions.
149 changes: 144 additions & 5 deletions .github/workflows/cicd-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@ jobs:
apt-get update && apt-get install libsox-fmt-all -y && \
popd
# AMMO installation
pip install nvidia-ammo~=0.9.0 --extra-index-url https://pypi.nvidia.com --no-cache-dir
# PyTorch Lightning version
python -c "import pytorch_lightning; print(pytorch_lightning.__version__)"
Expand Down Expand Up @@ -220,7 +223,26 @@ jobs:
- uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
if: "failure()"


L0_Setup_Test_Data_And_Models:
needs: [cicd-test-container-setup]
runs-on: self-hosted-azure
container:
image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
options:
# --user 0:128
--device=/dev/nvidia0
--gpus all
--shm-size=8g
--env TRANSFORMERS_OFFLINE=0
--env HYDRA_FULL_ERROR=1
--volume /mnt/datadrive/TestData:/home/TestData
steps:
- name: Checkout repository
uses: actions/checkout@v4
- run: |
python -m tests.setup --save_dir /home/TestData/nlp
- uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
if: "failure()"

## - name: L2: Multimodal Imagen Train

Expand All @@ -243,10 +265,9 @@ jobs:
uses: actions/checkout@v4
- run: |
CUDA_VISIBLE_DEVICES=0 python scripts/checkpoint_converters/convert_llama_hf_to_nemo.py \
--input_name_or_path=/home/TestData/nlp/megatron_llama/llama-ci-hf \
--output_path=/home/TestData/nlp/megatron_llama/llama-ci-hf/llama_ci.nemo \
--input_name_or_path=/home/TestData/nlp/megatron_llama/llama-ci-hf-tiny \
--output_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
--precision=16
rm -f /home/TestData/nlp/megatron_llama/llama-ci-hf/llama_ci.nemo
- uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
if: "failure()"

Expand Down Expand Up @@ -322,6 +343,124 @@ jobs:
- uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
if: "failure()"

L2_PTQ_Llama2_Export_Only:
needs: [cicd-test-container-setup]
runs-on: self-hosted-azure
container:
image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
options:
# --user 0:128
--device=/dev/nvidia0
--gpus all
--shm-size=8g
--env TRANSFORMERS_OFFLINE=0
--env HYDRA_FULL_ERROR=1
--volume /mnt/datadrive/TestData:/home/TestData
steps:
- name: Checkout repository
uses: actions/checkout@v4
- run: |
python examples/nlp/language_modeling/megatron_llama_quantization.py \
model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
quantization.algorithm=null \
model_save=/home/TestData/nlp/megatron_llama/ci_baseline
rm -rf /home/TestData/nlp/megatron_llama/ci_baseline
- uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
if: "failure()"

L2_PTQ_Llama2_FP8:
needs: [cicd-test-container-setup]
runs-on: self-hosted-azure
container:
image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
options:
# --user 0:128
--device=/dev/nvidia0
--gpus all
--shm-size=8g
--env TRANSFORMERS_OFFLINE=0
--env HYDRA_FULL_ERROR=1
--volume /mnt/datadrive/TestData:/home/TestData
steps:
- name: Checkout repository
uses: actions/checkout@v4
- run: |
python examples/nlp/language_modeling/megatron_llama_quantization.py \
model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
tensor_model_parallel_size=2 \
trainer.devices=2 \
quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
quantization.algorithm=fp8 \
quantization.num_calib_size=8 \
inference.batch_size=2 \
export.inference_tensor_parallel=2 \
model_save=/home/TestData/nlp/megatron_llama/ci_fp8.qnemo
rm -rf /home/TestData/nlp/megatron_llama/ci_fp8.qnemo
- uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
if: "failure()"

L2_PTQ_Llama2_INT8_SQ:
needs: [cicd-test-container-setup]
runs-on: self-hosted-azure
container:
image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
options:
# --user 0:128
--device=/dev/nvidia0
--gpus all
--shm-size=8g
--env TRANSFORMERS_OFFLINE=0
--env HYDRA_FULL_ERROR=1
--volume /mnt/datadrive/TestData:/home/TestData
steps:
- name: Checkout repository
uses: actions/checkout@v4
- run: |
python examples/nlp/language_modeling/megatron_llama_quantization.py \
model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
quantization.algorithm=int8_sq \
quantization.num_calib_size=8 \
inference.batch_size=2 \
model_save=/home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
rm -rf /home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
- uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
if: "failure()"

L2_PTQ_Llama2_INT4_AWQ:
needs: [cicd-test-container-setup]
runs-on: self-hosted-azure
container:
image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
options:
# --user 0:128
--device=/dev/nvidia0
--gpus all
--shm-size=8g
--env TRANSFORMERS_OFFLINE=0
--env HYDRA_FULL_ERROR=1
--volume /mnt/datadrive/TestData:/home/TestData
steps:
- name: Checkout repository
uses: actions/checkout@v4
- run: |
python examples/nlp/language_modeling/megatron_llama_quantization.py \
model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
tensor_model_parallel_size=1 \
trainer.devices=1 \
quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
quantization.algorithm=int4_awq \
quantization.num_calib_size=8 \
inference.batch_size=2 \
model_save=/home/TestData/nlp/megatron_llama/ci_int4_awq.qnemo
rm -rf /home/TestData/nlp/megatron_llama/ci_int4_awq.qnemo
- uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
if: "failure()"

# L2: ASR dev run
ASR_dev_run_Speech_to_Text:
needs: [cicd-test-container-setup]
Expand Down Expand Up @@ -4664,7 +4803,7 @@ jobs:
--volume /mnt/datadrive/TestData:/home/TestData
steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v4
- run: |
rm -rf /home/TestData/nlp/megatron_ir/working_dir
Expand Down
25 changes: 25 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,31 @@ Latest News

</details>

<details open>
<summary><b>Speech Recognition</b></summary>
<details>
<summary><a href="https://developer.nvidia.com/blog/new-standard-for-speech-recognition-and-translation-from-the-nvidia-nemo-canary-model/">New Standard for Speech Recognition and Translation from the NVIDIA NeMo Canary Model</a> (2024/04/18) </summary>

The NeMo team just released Canary, a multilingual model that transcribes speech in English, Spanish, German, and French with punctuation and capitalization. Canary also provides bi-directional translation, between English and the three other supported languages.
<br><br>
</details>

<details>
<summary><a href="https://developer.nvidia.com/blog/pushing-the-boundaries-of-speech-recognition-with-nemo-parakeet-asr-models/">Pushing the Boundaries of Speech Recognition with NVIDIA NeMo Parakeet ASR Models</a> (2024/04/18) </summary>

NVIDIA NeMo, an end-to-end platform for the development of multimodal generative AI models at scale anywhere—on any cloud and on-premises—released the Parakeet family of automatic speech recognition (ASR) models. These state-of-the-art ASR models, developed in collaboration with Suno.ai, transcribe spoken English with exceptional accuracy.
<br><br>
</details>

<details>
<summary><a href="https://developer.nvidia.com/blog/turbocharge-asr-accuracy-and-speed-with-nvidia-nemo-parakeet-tdt/">Turbocharge ASR Accuracy and Speed with NVIDIA NeMo Parakeet-TDT</a> (2024/04/18) </summary>

NVIDIA NeMo, an end-to-end platform for developing multimodal generative AI models at scale anywhere—on any cloud and on-premises—recently released Parakeet-TDT. This new addition to the  NeMo ASR Parakeet model family boasts better accuracy and 64% greater speed over the previously best model, Parakeet-RNNT-1.1B.
<br><br>
</details>

</details>




Expand Down
18 changes: 17 additions & 1 deletion examples/audio_tasks/audio_to_audio_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
import json
import os
import tempfile
from collections import defaultdict
from dataclasses import dataclass, field, is_dataclass
from typing import List, Optional

Expand Down Expand Up @@ -101,6 +102,9 @@ class AudioEvaluationConfig(process_audio.ProcessConfig):
# Metrics to calculate
metrics: List[str] = field(default_factory=lambda: ['sdr', 'estoi'])

# Return metric values for each example
return_values_per_example: bool = False


def get_evaluation_dataloader(config):
"""Prepare a dataloader for evaluation.
Expand Down Expand Up @@ -174,6 +178,9 @@ def main(cfg: AudioEvaluationConfig):
# Setup metrics
metrics = get_metrics(cfg)

if cfg.return_values_per_example and cfg.batch_size > 1:
raise ValueError('return_example_values is only supported for batch_size=1.')

# Processing
if not cfg.only_score_manifest:
# Process audio using the configured model and save in the output directory
Expand Down Expand Up @@ -236,6 +243,10 @@ def main(cfg: AudioEvaluationConfig):

num_files += 1

if cfg.max_utts is not None and num_files >= cfg.max_utts:
logging.info('Reached max_utts: %s', cfg.max_utts)
break

# Prepare dataloader
config = {
'manifest_filepath': temporary_manifest_filepath,
Expand All @@ -249,6 +260,8 @@ def main(cfg: AudioEvaluationConfig):
}
temporary_dataloader = get_evaluation_dataloader(config)

metrics_value_per_example = defaultdict(list)

# Calculate metrics
for eval_batch in tqdm(temporary_dataloader, desc='Evaluating'):
processed_signal, processed_length, target_signal, target_length = eval_batch
Expand All @@ -257,7 +270,9 @@ def main(cfg: AudioEvaluationConfig):
raise RuntimeError(f'Length mismatch.')

for name, metric in metrics.items():
metric.update(preds=processed_signal, target=target_signal, input_length=target_length)
value = metric(preds=processed_signal, target=target_signal, input_length=target_length)
if cfg.return_values_per_example:
metrics_value_per_example[name].append(value.item())

# Convert to a dictionary with name: value
metrics_value = {name: metric.compute().item() for name, metric in metrics.items()}
Expand All @@ -277,6 +292,7 @@ def main(cfg: AudioEvaluationConfig):
# Inject the metric name and score into the config, and return the entire config
with open_dict(cfg):
cfg.metrics_value = metrics_value
cfg.metrics_value_per_example = dict(metrics_value_per_example)

return cfg

Expand Down
1 change: 0 additions & 1 deletion examples/audio_tasks/conf/beamforming.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ model:
_target_: nemo.collections.asr.modules.audio_preprocessing.AudioToSpectrogram
fft_length: 512 # Length of the window and FFT for calculating spectrogram
hop_length: 256 # Hop length for calculating spectrogram
power: null

decoder:
_target_: nemo.collections.asr.modules.audio_preprocessing.SpectrogramToAudio
Expand Down
3 changes: 0 additions & 3 deletions examples/audio_tasks/conf/masking.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# This configuration contains the exemplary values for training a multichannel speech enhancement model with a mask-based beamformer.
#
name: "masking"

model:
Expand Down Expand Up @@ -44,7 +42,6 @@ model:
_target_: nemo.collections.asr.modules.audio_preprocessing.AudioToSpectrogram
fft_length: 512 # Length of the window and FFT for calculating spectrogram
hop_length: 256 # Hop length for calculating spectrogram
power: null

decoder:
_target_: nemo.collections.asr.modules.audio_preprocessing.SpectrogramToAudio
Expand Down
Loading

0 comments on commit 7678ff4

Please sign in to comment.