Merge branch 'main' into export_wordlist_fix

JimmyZhang12 · May 1, 2024 · 7678ff4 · 7678ff4
2 parents c743937 + f658b6f
commit 7678ff4
Show file tree

Hide file tree

Showing 40 changed files with 4,278 additions and 417 deletions.
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
@@ -132,6 +132,9 @@ jobs:
  apt-get update && apt-get install libsox-fmt-all -y && \
  popd
 
+ # AMMO installation
+ pip install nvidia-ammo~=0.9.0 --extra-index-url https://pypi.nvidia.com --no-cache-dir
+
  # PyTorch Lightning version
  python -c "import pytorch_lightning; print(pytorch_lightning.__version__)"
 
@@ -220,7 +223,26 @@ jobs:
  - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
  if: "failure()"
 
-
+ L0_Setup_Test_Data_And_Models:
+ needs: [cicd-test-container-setup]
+ runs-on: self-hosted-azure
+ container:
+ image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
+ options:
+ # --user 0:128
+ --device=/dev/nvidia0
+ --gpus all
+ --shm-size=8g
+ --env TRANSFORMERS_OFFLINE=0
+ --env HYDRA_FULL_ERROR=1
+ --volume /mnt/datadrive/TestData:/home/TestData
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ - run: |
+ python -m tests.setup --save_dir /home/TestData/nlp
+ - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
+ if: "failure()"
 
 ## - name: L2: Multimodal Imagen Train
 
@@ -243,10 +265,9 @@ jobs:
  uses: actions/checkout@v4
  - run: |
  CUDA_VISIBLE_DEVICES=0 python scripts/checkpoint_converters/convert_llama_hf_to_nemo.py \
- --input_name_or_path=/home/TestData/nlp/megatron_llama/llama-ci-hf \
- --output_path=/home/TestData/nlp/megatron_llama/llama-ci-hf/llama_ci.nemo \
+ --input_name_or_path=/home/TestData/nlp/megatron_llama/llama-ci-hf-tiny \
+ --output_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
  --precision=16
- rm -f /home/TestData/nlp/megatron_llama/llama-ci-hf/llama_ci.nemo
  - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
  if: "failure()"
 
@@ -322,6 +343,124 @@ jobs:
  - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
  if: "failure()"
 
+ L2_PTQ_Llama2_Export_Only:
+ needs: [cicd-test-container-setup]
+ runs-on: self-hosted-azure
+ container:
+ image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
+ options:
+ # --user 0:128
+ --device=/dev/nvidia0
+ --gpus all
+ --shm-size=8g
+ --env TRANSFORMERS_OFFLINE=0
+ --env HYDRA_FULL_ERROR=1
+ --volume /mnt/datadrive/TestData:/home/TestData
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ - run: |
+ python examples/nlp/language_modeling/megatron_llama_quantization.py \
+ model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
+ quantization.algorithm=null \
+ model_save=/home/TestData/nlp/megatron_llama/ci_baseline
+
+ rm -rf /home/TestData/nlp/megatron_llama/ci_baseline
+ - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
+ if: "failure()"
+
+ L2_PTQ_Llama2_FP8:
+ needs: [cicd-test-container-setup]
+ runs-on: self-hosted-azure
+ container:
+ image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
+ options:
+ # --user 0:128
+ --device=/dev/nvidia0
+ --gpus all
+ --shm-size=8g
+ --env TRANSFORMERS_OFFLINE=0
+ --env HYDRA_FULL_ERROR=1
+ --volume /mnt/datadrive/TestData:/home/TestData
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ - run: |
+ python examples/nlp/language_modeling/megatron_llama_quantization.py \
+ model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
+ tensor_model_parallel_size=2 \
+ trainer.devices=2 \
+ quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
+ quantization.algorithm=fp8 \
+ quantization.num_calib_size=8 \
+ inference.batch_size=2 \
+ export.inference_tensor_parallel=2 \
+ model_save=/home/TestData/nlp/megatron_llama/ci_fp8.qnemo
+
+ rm -rf /home/TestData/nlp/megatron_llama/ci_fp8.qnemo
+ - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
+ if: "failure()"
+
+ L2_PTQ_Llama2_INT8_SQ:
+ needs: [cicd-test-container-setup]
+ runs-on: self-hosted-azure
+ container:
+ image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
+ options:
+ # --user 0:128
+ --device=/dev/nvidia0
+ --gpus all
+ --shm-size=8g
+ --env TRANSFORMERS_OFFLINE=0
+ --env HYDRA_FULL_ERROR=1
+ --volume /mnt/datadrive/TestData:/home/TestData
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ - run: |
+ python examples/nlp/language_modeling/megatron_llama_quantization.py \
+ model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
+ quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
+ quantization.algorithm=int8_sq \
+ quantization.num_calib_size=8 \
+ inference.batch_size=2 \
+ model_save=/home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
+
+ rm -rf /home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
+ - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
+ if: "failure()"
+
+ L2_PTQ_Llama2_INT4_AWQ:
+ needs: [cicd-test-container-setup]
+ runs-on: self-hosted-azure
+ container:
+ image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
+ options:
+ # --user 0:128
+ --device=/dev/nvidia0
+ --gpus all
+ --shm-size=8g
+ --env TRANSFORMERS_OFFLINE=0
+ --env HYDRA_FULL_ERROR=1
+ --volume /mnt/datadrive/TestData:/home/TestData
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ - run: |
+ python examples/nlp/language_modeling/megatron_llama_quantization.py \
+ model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
+ tensor_model_parallel_size=1 \
+ trainer.devices=1 \
+ quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
+ quantization.algorithm=int4_awq \
+ quantization.num_calib_size=8 \
+ inference.batch_size=2 \
+ model_save=/home/TestData/nlp/megatron_llama/ci_int4_awq.qnemo
+
+ rm -rf /home/TestData/nlp/megatron_llama/ci_int4_awq.qnemo
+ - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
+ if: "failure()"
+
  # L2: ASR dev run
  ASR_dev_run_Speech_to_Text:
  needs: [cicd-test-container-setup]
@@ -4664,7 +4803,7 @@ jobs:
  --volume /mnt/datadrive/TestData:/home/TestData
  steps:
  - name: Checkout repository
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
  - run: |
  rm -rf /home/TestData/nlp/megatron_ir/working_dir
 

diff --git a/README.rst b/README.rst
@@ -77,6 +77,31 @@ Latest News
 
  </details>
 
+ <details open>
+ <summary><b>Speech Recognition</b></summary>
+ <details>
+ <summary><a href="https://developer.nvidia.com/blog/new-standard-for-speech-recognition-and-translation-from-the-nvidia-nemo-canary-model/">New Standard for Speech Recognition and Translation from the NVIDIA NeMo Canary Model</a> (2024/04/18) </summary>
+
+ The NeMo team just released Canary, a multilingual model that transcribes speech in English, Spanish, German, and French with punctuation and capitalization. Canary also provides bi-directional translation, between English and the three other supported languages.
+ <br><br>
+ </details>
+
+ <details>
+ <summary><a href="https://developer.nvidia.com/blog/pushing-the-boundaries-of-speech-recognition-with-nemo-parakeet-asr-models/">Pushing the Boundaries of Speech Recognition with NVIDIA NeMo Parakeet ASR Models</a> (2024/04/18) </summary>
+
+ NVIDIA NeMo, an end-to-end platform for the development of multimodal generative AI models at scale anywhere—on any cloud and on-premises—released the Parakeet family of automatic speech recognition (ASR) models. These state-of-the-art ASR models, developed in collaboration with Suno.ai, transcribe spoken English with exceptional accuracy.
+ <br><br>
+ </details>
+
+ <details>
+ <summary><a href="https://developer.nvidia.com/blog/turbocharge-asr-accuracy-and-speed-with-nvidia-nemo-parakeet-tdt/">Turbocharge ASR Accuracy and Speed with NVIDIA NeMo Parakeet-TDT</a> (2024/04/18) </summary>
+
+ NVIDIA NeMo, an end-to-end platform for developing multimodal generative AI models at scale anywhere—on any cloud and on-premises—recently released Parakeet-TDT. This new addition to the  NeMo ASR Parakeet model family boasts better accuracy and 64% greater speed over the previously best model, Parakeet-RNNT-1.1B.
+ <br><br>
+ </details>
+
+ </details>
+
 
 
 

diff --git a/examples/audio_tasks/audio_to_audio_eval.py b/examples/audio_tasks/audio_to_audio_eval.py
@@ -61,6 +61,7 @@
 import json
 import os
 import tempfile
+from collections import defaultdict
 from dataclasses import dataclass, field, is_dataclass
 from typing import List, Optional
 
@@ -101,6 +102,9 @@ class AudioEvaluationConfig(process_audio.ProcessConfig):
  # Metrics to calculate
  metrics: List[str] = field(default_factory=lambda: ['sdr', 'estoi'])
 
+ # Return metric values for each example
+ return_values_per_example: bool = False
+
 
 def get_evaluation_dataloader(config):
  """Prepare a dataloader for evaluation.
@@ -174,6 +178,9 @@ def main(cfg: AudioEvaluationConfig):
  # Setup metrics
  metrics = get_metrics(cfg)
 
+ if cfg.return_values_per_example and cfg.batch_size > 1:
+ raise ValueError('return_example_values is only supported for batch_size=1.')
+
  # Processing
  if not cfg.only_score_manifest:
  # Process audio using the configured model and save in the output directory
@@ -236,6 +243,10 @@ def main(cfg: AudioEvaluationConfig):
 
  num_files += 1
 
+ if cfg.max_utts is not None and num_files >= cfg.max_utts:
+ logging.info('Reached max_utts: %s', cfg.max_utts)
+ break
+
  # Prepare dataloader
  config = {
  'manifest_filepath': temporary_manifest_filepath,
@@ -249,6 +260,8 @@ def main(cfg: AudioEvaluationConfig):
  }
  temporary_dataloader = get_evaluation_dataloader(config)
 
+ metrics_value_per_example = defaultdict(list)
+
  # Calculate metrics
  for eval_batch in tqdm(temporary_dataloader, desc='Evaluating'):
  processed_signal, processed_length, target_signal, target_length = eval_batch
@@ -257,7 +270,9 @@ def main(cfg: AudioEvaluationConfig):
  raise RuntimeError(f'Length mismatch.')
 
  for name, metric in metrics.items():
- metric.update(preds=processed_signal, target=target_signal, input_length=target_length)
+ value = metric(preds=processed_signal, target=target_signal, input_length=target_length)
+ if cfg.return_values_per_example:
+ metrics_value_per_example[name].append(value.item())
 
  # Convert to a dictionary with name: value
  metrics_value = {name: metric.compute().item() for name, metric in metrics.items()}
@@ -277,6 +292,7 @@ def main(cfg: AudioEvaluationConfig):
  # Inject the metric name and score into the config, and return the entire config
  with open_dict(cfg):
  cfg.metrics_value = metrics_value
+ cfg.metrics_value_per_example = dict(metrics_value_per_example)
 
  return cfg
 

diff --git a/examples/audio_tasks/conf/beamforming.yaml b/examples/audio_tasks/conf/beamforming.yaml
@@ -44,7 +44,6 @@ model:
  _target_: nemo.collections.asr.modules.audio_preprocessing.AudioToSpectrogram
  fft_length: 512 # Length of the window and FFT for calculating spectrogram
  hop_length: 256 # Hop length for calculating spectrogram
- power: null
 
  decoder:
  _target_: nemo.collections.asr.modules.audio_preprocessing.SpectrogramToAudio

diff --git a/examples/audio_tasks/conf/masking.yaml b/examples/audio_tasks/conf/masking.yaml
@@ -1,5 +1,3 @@
-# This configuration contains the exemplary values for training a multichannel speech enhancement model with a mask-based beamformer.
-#
 name: "masking"
 
 model:
@@ -44,7 +42,6 @@ model:
  _target_: nemo.collections.asr.modules.audio_preprocessing.AudioToSpectrogram
  fft_length: 512 # Length of the window and FFT for calculating spectrogram
  hop_length: 256 # Hop length for calculating spectrogram
- power: null
 
  decoder:
  _target_: nemo.collections.asr.modules.audio_preprocessing.SpectrogramToAudio