Merge remote-tracking branch 'origin/main' into gkaushik/gh200-hotfix…

…-main
NVIDIA · Dec 18, 2024 · 10148a5 · 10148a5
2 parents ee2041b + 6f34fad
commit 10148a5
Show file tree

Hide file tree

Showing 23 changed files with 1,269 additions and 489 deletions.
diff --git a/CODEOWNERS b/CODEOWNERS
@@ -93,4 +93,4 @@ sub-packages/bionemo-geneformer @jstjohn @malcolmgreaves @skothenhill-nv
 
 sub-packages/bionemo-scdl @jstjohn @malcolmgreaves @polinabinder1 @skothenhill-nv
 
-sub-packages/bionemo-noodles @skothenhill-nv @malcolmgreaves @jstjohn @edawson
+sub-packages/bionemo-noodles @skothenhill-nv @malcolmgreaves @jstjohn @edawson @cspades
diff --git a/Dockerfile b/Dockerfile
@@ -85,7 +85,8 @@ COPY --from=ghcr.io/astral-sh/uv:0.4.25 /uv /usr/local/bin/uv
 ENV UV_LINK_MODE=copy \
   UV_COMPILE_BYTECODE=1 \
   UV_PYTHON_DOWNLOADS=never \
-  UV_SYSTEM_PYTHON=true
+  UV_SYSTEM_PYTHON=true \
+  UV_NO_CACHE=1
 
 # Install the bionemo-geomtric requirements ahead of copying over the rest of the repo, so that we can cache their
 # installation. These involve building some torch extensions, so they can take a while to install.

diff --git a/ci/scripts/run_pytest.sh b/ci/scripts/run_pytest.sh
@@ -19,6 +19,8 @@ set -xueo pipefail
 export PYTHONDONTWRITEBYTECODE=1
 # NOTE: if a non-nvidia user wants to run the test suite, just run `export BIONEMO_DATA_SOURCE=ngc` prior to this call.
 export BIONEMO_DATA_SOURCE="${BIONEMO_DATA_SOURCE:-pbss}"
+# flexible GPU memory management, reducing the risk of fragmentation-related CUDA OOM
+export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
 source "$(dirname "$0")/utils.sh"
 
 if ! set_bionemo_home; then
@@ -27,12 +29,16 @@ fi
 
 python -m coverage erase
 
+error=false
 for dir in docs/ ./sub-packages/bionemo-*/; do
     echo "Running pytest in $dir"
     python -m coverage run --parallel-mode --source=bionemo \
-    -m pytest -v --nbval-lax --durations=0 --durations-min=60.0 --ignore-glob='*docs/docs/user-guide/examples/bionemo-esm2/mutant-design.ipynb' "$dir"
-
+    -m pytest -v --nbval-lax --durations=0 --durations-min=60.0 "$dir" || error=true
 done
 
 python -m coverage combine
 python -m coverage report --show-missing
+
+if [ "$error" = true ]; then
+    exit 1
+fi
diff --git a/docs/docs/user-guide/examples/bionemo-esm2/finetune.md b/docs/docs/user-guide/examples/bionemo-esm2/finetune.md
@@ -230,22 +230,22 @@ We download a CSV example dataset of articical sequences for this inference exam
 mkdir -p $WORKDIR/esm2_finetune_tutorial
 
 # download sample data CSV for inference
-DATA_PATH=$(download_bionemo_data esm2/testdata_esm2_infer:2.0 --source ngc)
-RESULTS_PATH=$WORKDIR/esm2_finetune_tutorial/inference_results.pt
+DATA_PATH=$(download_bionemo_data esm2/testdata_esm2_infer:2.0)
+RESULTS_PATH=$WORKDIR/esm2_finetune_tutorial/
 
 infer_esm2 --checkpoint-path <finetune checkpoint path> \
            --data-path $DATA_PATH \
            --results-path $RESULTS_PATH \
            --config-class ESM2FineTuneSeqConfig
 ```
 
-This will create a result `.pt` file under `$WORKDIR/esm2_finetune_tutorial/inference_results.pt` which can be loaded via PyTorch library in python environment:
+This will create a result `.pt` file under `$WORKDIR/esm2_finetune_tutorial/predictions__rank_0.pt` which can be loaded via PyTorch library in python environment:
 
 ```python
 import torch
 
-# Set the path to results file e.g. /workspace/bionemo2/esm2_finetune_tutorial/inference_results.pt
-# results_path = /workspace/bionemo2/esm2_finetune_tutorial/inference_results.pt
+# Set the path to results file e.g. /workspace/bionemo2/esm2_finetune_tutorial/predictions__rank_0.pt
+# results_path = /workspace/bionemo2/esm2_finetune_tutorial/predictions__rank_0.pt
 results = torch.load(results_path)
 
 # results is a python dict which includes the following result tensors for this example:

diff --git a/docs/docs/user-guide/examples/bionemo-esm2/inference.ipynb b/docs/docs/user-guide/examples/bionemo-esm2/inference.ipynb
@@ -152,7 +152,7 @@
    "source": [
     "from bionemo.core.data.load import load\n",
     "\n",
-    "checkpoint_path = load(\"esm2/650m:2.0\", source=\"ngc\")\n",
+    "checkpoint_path = load(\"esm2/650m:2.0\")\n",
     "print(checkpoint_path)"
    ]
   },
@@ -238,21 +238,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2024-11-25 21:18:43 - faiss.loader - INFO - Loading faiss with AVX512 support.\n",
-      "2024-11-25 21:18:43 - faiss.loader - INFO - Successfully loaded faiss with AVX512 support.\n",
-      "[NeMo W 2024-11-25 21:18:43 nemo_logging:361] /usr/local/lib/python3.10/dist-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work\n",
+      "2024-12-16 20:19:23 - faiss.loader - INFO - Loading faiss with AVX512 support.\n",
+      "2024-12-16 20:19:23 - faiss.loader - INFO - Successfully loaded faiss with AVX512 support.\n",
+      "[NeMo W 2024-12-16 20:19:24 nemo_logging:361] /usr/local/lib/python3.10/dist-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work\n",
       "      warn(\"Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work\", RuntimeWarning)\n",
       "    \n",
+      "[NeMo W 2024-12-16 20:19:24 nemo_logging:361] /usr/local/lib/python3.10/dist-packages/pyannote/core/notebook.py:134: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.\n",
+      "      cm = get_cmap(\"Set1\")\n",
+      "    \n",
       "usage: infer_esm2 [-h] --checkpoint-path CHECKPOINT_PATH --data-path DATA_PATH\n",
       "                  --results-path RESULTS_PATH\n",
       "                  [--precision {fp16,bf16,fp32,bf16-mixed,fp32-mixed,16-mixed,fp16-mixed,16,32}]\n",
       "                  [--num-gpus NUM_GPUS] [--num-nodes NUM_NODES]\n",
       "                  [--micro-batch-size MICRO_BATCH_SIZE]\n",
       "                  [--pipeline-model-parallel-size PIPELINE_MODEL_PARALLEL_SIZE]\n",
       "                  [--tensor-model-parallel-size TENSOR_MODEL_PARALLEL_SIZE]\n",
-      "                  [--include-hiddens] [--include-input-ids]\n",
-      "                  [--include-embeddings] [--include-logits]\n",
-      "                  [--config-class CONFIG_CLASS]\n",
+      "                  [--prediction-interval {epoch,batch}] [--include-hiddens]\n",
+      "                  [--include-input-ids] [--include-embeddings]\n",
+      "                  [--include-logits] [--config-class CONFIG_CLASS]\n",
       "\n",
       "Infer ESM2.\n",
       "\n",
@@ -264,7 +267,7 @@
       "                        Path to the CSV file containing sequences and label\n",
       "                        columns\n",
       "  --results-path RESULTS_PATH\n",
-      "                        Path to the results file.\n",
+      "                        Path to the results directory.\n",
       "  --precision {fp16,bf16,fp32,bf16-mixed,fp32-mixed,16-mixed,fp16-mixed,16,32}\n",
       "                        Precision type to use for training.\n",
       "  --num-gpus NUM_GPUS   Number of GPUs to use for training. Default is 1.\n",
@@ -277,6 +280,8 @@
       "                        Pipeline model parallel size. Default is 1.\n",
       "  --tensor-model-parallel-size TENSOR_MODEL_PARALLEL_SIZE\n",
       "                        Tensor model parallel size. Default is 1.\n",
+      "  --prediction-interval {epoch,batch}\n",
+      "                        Intervals to write DDP predictions into disk\n",
       "  --include-hiddens     Include hiddens in output of inference\n",
       "  --include-input-ids   Include input_ids in output of inference\n",
       "  --include-embeddings  Include embeddings in output of inference\n",
@@ -327,12 +332,12 @@
    "source": [
     "%%capture --no-display --no-stderr cell_output\n",
     "\n",
-    "results_path = os.path.join(work_dir, \"inference_results.pt\")\n",
-    "\n",
     "! infer_esm2 --checkpoint-path {checkpoint_path} \\\n",
     "             --data-path {data_path} \\\n",
-    "             --results-path {results_path} \\\n",
-    "             --precision \"fp32\" \\\n",
+    "             --results-path {work_dir} \\\n",
+    "             --micro-batch-size 3 \\\n",
+    "             --num-gpus 1 \\\n",
+    "             --precision \"bf16-mixed\" \\\n",
     "             --include-hiddens \\\n",
     "             --include-embeddings \\\n",
     "             --include-logits \\\n",
@@ -350,7 +355,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The bash command in previous step creates the `inference_results.pt` file under the work directory of this notebook (defined above) to stores the results. The `.pt` file containes a dictionary of `{'result_key': torch.Tensor}` that be loaded with PyTorch:"
+    "Inference predictions are stored into `.pt` files for each device. Since we only used one device to run the inference (`--num-gpus 1`) in the previous step, the results were written to `{work_dir}/predictions__rank_0.pt` under the work directory of this notebook (defined above). The `.pt` file containes a dictionary of `{'result_key': torch.Tensor}` that be loaded with PyTorch:"
    ]
   },
   {
@@ -371,7 +376,7 @@
    ],
    "source": [
     "import torch\n",
-    "results = torch.load(results_path)\n",
+    "results = torch.load(f\"{work_dir}/predictions__rank_0.pt\")\n",
     "\n",
     "for key, val in results.items():\n",
     "    if val is not None:\n",
@@ -472,6 +477,38 @@
     "mask = torch.isin(input_ids, torch.tensor(extra_indices))"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## DDP Inference Support\n",
+    "\n",
+    "Although this tutorial is utilizing one devive to run the inference, distributed inference is supported for ESM2 in BioNeMo Framework. One can simply set the the `--num-gpus n` to run distributed inference on `n` devices. The output predictions will be written into `predictions__rank_<0...n-1>.pt` under the `--results-path` provided. Moreover, by optionally including input token IDs with `--include-input-ids` we can snure 1:1 mapping between input sequences and output predictions."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The following snippet can be used to load and collate the predictions into a single dictionary.\n",
+    "\n",
+    "\n",
+    "```python\n",
+    "import glob\n",
+    "from bionemo.llm.lightning import batch_collator\n",
+    "\n",
+    "collated_preditions = batch_collator([torch.load(path) for path in glob.glob(f\"{work_dir}/predictions__rank_*.pt\")])\n",
+    "for key, val in collated_preditions.items():\n",
+    "    if val is not None:\n",
+    "        print(f'{key}\\t{val.shape}')\n",
+    "\n",
+    "# token_logits\ttorch.Size([1024, 10, 128])\n",
+    "# hidden_states\ttorch.Size([10, 1024, 1280])\n",
+    "# input_ids     torch.Size([10, 1024])\n",
+    "# embeddings\ttorch.Size([10, 1280])\n",
+    "```"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},

diff --git a/docs/docs/user-guide/examples/bionemo-esm2/mutant-design.ipynb b/docs/docs/user-guide/examples/bionemo-esm2/mutant-design.ipynb
Original file line number	Diff line number	Diff line change
Expand Up		@@ -93,4 +93,4 @@ sub-packages/bionemo-geneformer @jstjohn @malcolmgreaves @skothenhill-nv

		sub-packages/bionemo-scdl @jstjohn @malcolmgreaves @polinabinder1 @skothenhill-nv

		sub-packages/bionemo-noodles @skothenhill-nv @malcolmgreaves @jstjohn @edawson
		sub-packages/bionemo-noodles @skothenhill-nv @malcolmgreaves @jstjohn @edawson @cspades