-
Notifications
You must be signed in to change notification settings - Fork 29
Install
Simple dependency installation instructions in the README
For installing a specific PyTorch version on a machine that has only CPU:
pip3 install torch==1.6.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
pip3 install torch-scatter==latest+cpu -f https://pytorch-geometric.com/whl/torch-1.6.0.html
pip3 install torch-sparse==latest+cpu -f https://pytorch-geometric.com/whl/torch-1.6.0.html
pip3 install torch-cluster==latest+cpu -f https://pytorch-geometric.com/whl/torch-1.6.0.html
pip3 install torch-spline-conv==latest+cpu -f https://pytorch-geometric.com/whl/torch-1.6.0.html
pip3 install torch-geometric
For installing on a machine that has GPUs and PyTorch already installed:
pip install --no-index torch-scatter -f https://pytorch-geometric.com/whl/torch-${TORCH}+${CUDA}.html
pip install --no-index torch-sparse -f https://pytorch-geometric.com/whl/torch-${TORCH}+${CUDA}.html
pip install --no-index torch-cluster -f https://pytorch-geometric.com/whl/torch-${TORCH}+${CUDA}.html
pip install --no-index torch-spline-conv -f https://pytorch-geometric.com/whl/torch-${TORCH}+${CUDA}.html
pip install torch-geometric
where ${CUDA} and ${TORCH} should be replaced by your specific CUDA version (cpu, cu92, cu101, cu102, cu110) and PyTorch version (1.4.0, 1.5.0, 1.6.0, 1.7.0)
- Get an interactive job from a queue with GPUs, e.g.,
gpu_p100
salloc -A ccsd -p gpu_p100 -N 1 -n1 -c1 -G1 --mem=0G -t 01:00:00 /bin/bash
- Set up a python virtual environment called
name-env
and save it in directorysubdir/
python3 -m venv subdir/name-env
- Activate the environment
name-env
source subdir/name-env/bin/activate
2.1 load PE-gnu
for MPI
module load PE-gnu/3.0
- Install
torch
packages with CUDA support (you can customize your own versions)
pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://pytorch-geometric.com/whl/torch-1.9.0+cu102.html
Installation done! Next, time to run the code.
Installation has been done already (non-trivial for IBM/NVIDIA machine) - skip to run instructions.
- MODULE SETTING
module reset
ml PrgEnv-gnu
ml rocm/5.4.3
ml cmake/3.23.2
ml craype-accel-amd-gfx90a
ml amd-mixed/5.4.3
ml cray-mpich/8.1.26
- CREATE CONDA ENVIRONMENT Download Anaconda
wget https://repo.anaconda.com/archive/Anaconda3-2023.09-0-Linux-x86_64.sh
- Create base conda environment
bash Anaconda3-2023.09-0-Linux-x86_64.sh -p conda_frontier_dir
source conda_frontier_dir/bin/activate
Max: higher version of python cause issues with linking to MKL libraries
- Create environment for HydraGNN dependencies
conda create -n hydragnn python=3.8
source activate hydragnn
- Uninstall pre-loaded version of MPI
conda uninstall -y mpi
- Install package dependencies
conda install -y ninja
conda install -y astunparse expecttest hypothesis numpy psutil pyyaml requests setuptools
conda install -y typing-extensions sympy filelock networkx jinja2 tqdm
conda install -y -c conda-forge types-dataclasses
conda install -y pyparsing build
conda install -y cython
conda install -y scipy scikit-learn tensorboard
conda install -y -c conda-forge ase
conda install -y -c conda-forge rdkit
pip install mendeleev
pip install pymatgen # Max: conda-forge was spinning all time
pip install jarvis-tools
7. Install PyTorch-Geometric dependencies
Git Checkouts
git clone --recursive git@github.com:rusty1s/pytorch_scatter.git \
&& pushd pytorch_scatter \
&& git checkout 2.1.2-2-gc095c62 \
&& popd ## This includes Ashwin's fix
git clone --recursive git@github.com:rusty1s/pytorch_sparse.git \
&& pushd pytorch_sparse \
&& git checkout 0.6.18-3-g1577470 \
&& popd ## This includes PyTorch 2.2 support
git clone --recursive git@github.com:rusty1s/pytorch_cluster.git \
&& pushd pytorch_cluster \
&& git checkout e0eb0c1143de632786e074dee185f120afe7b852 \
&& popd ## This includes PyTorch 2.2 support
git clone --recursive git@github.com:rusty1s/pytorch_spline_conv.git \
&& pushd pytorch_spline_conv \
&& git checkout 1.2.2-5-gecf8a4a \
&& popd ## This includes PyTorch 2.2 support
git clone --recursive git@github.com:rusty1s/pytorch_geometric.git \
&& pushd pytorch_geometric \
&& git checkout 2.4.0-354-gf4c3e3895 \
&& popd ## This includes PyTorch 2.2 support
- Use the following command to install per each directory
pip install . –verbose
- INSTALL MPI4PY from source
git clone -b 3.1.5 https://github.com/mpi4py/mpi4py.git
CC=cc MPICC=cc pip install . --verbose
- INSTALL ADIOS
ADIOS_DIRPATH = …..
- Checkout from GitHub repository
git clone -b v2.8.3 git@github.com:ornladios/ADIOS2.git
12. Install with CMAKE
mkdir install
mkdir build && cd build
CC=mpicc CXX=mpicxx FC=mpifort \
cmake -DCMAKE_INSTALL_PREFIX=ADIOS_DIRPATH/install \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_TESTING=OFF \
-DADIOS2_USE_MPI=ON \
-DADIOS2_USE_Fortran=ON \
-DADIOS2_BUILD_EXAMPLES_EXPERIMENTAL=OFF \
-DADIOS2_BUILD_TESTING=OFF \
-DADIOS2_USE_HDF5=OFF \
-DADIOS2_USE_SST=OFF \
-DADIOS2_USE_BZip2=OFF \
-DADIOS2_USE_PNG=OFF \
-DADIOS2_USE_DataSpaces=OFF \
-DADIOS2_USE_Python=ON \
-DPython_EXECUTABLE=`which python` \
../ADIOS2
- install build in exportable path for python
make install
- Check that ADIOS binary files for pip have been correctly installed
ls ADIOS_DIRPATH/install/lib/python3.8/site-packages/
- Export path of ADIOS installation (this must be exported every time you submit a job)
export PYTHONPATH=ADIOS_DIRPATH/install/lib/python3.8/site-packages:$PYTHONPATH
Existing limitations that do now allow to use rocm>=5.7.1
https://docs.olcf.ornl.gov/systems/crusher_quick_start_guide.html
When running a program using GPU-aware MPI is linked to ROCm >= 5.5.0 and cray-mpich < 8.1.26, the following error is thrown:
Assertion failed in file ../src/mpid/common/cray/cray_gpu_ops.c at line 188: mpi_errno == MPI_SUCCESS /opt/cray/pe/lib64/libmpi_cray.so.12(MPL_backtrace_show+0x26) [0x7fffed4079ab] /opt/cray/pe/lib64/libmpi_cray.so.12(+0x1fedbf4) [0x7fffece41bf4] /opt/cray/pe/lib64/libmpi_cray.so.12(+0x2444148) [0x7fffed298148] /opt/cray/pe/lib64/libmpi_cray.so.12(+0x1e9be40) [0x7fffeccefe40] /opt/cray/pe/lib64/libmpi_cray.so.12(+0x1cdb68c) [0x7fffecb2f68c] /opt/cray/pe/lib64/libmpi_cray.so.12(+0x1cd959c) [0x7fffecb2d59c] /opt/cray/pe/lib64/libmpi_cray.so.12(+0x468461) [0x7fffeb2bc461] /opt/cray/pe/lib64/libmpi_cray.so.12(+0x1cdd910) [0x7fffecb31910] /opt/cray/pe/lib64/libmpi_cray.so.12(+0x1e9cb93) [0x7fffeccf0b93] /opt/cray/pe/lib64/libmpi_cray.so.12(+0x291c40) [0x7fffeb0e5c40] /opt/cray/pe/lib64/libmpi_cray.so.12(PMPI_Barrier+0x16f) [0x7fffeb0e617f] /autofs/nccs-svm1_home2/hagertnl/Scratch/vadd_hip_mpi/./hip_vadd() [0x20cbe6] /lib64/libc.so.6(__libc_start_main+0xef) [0x7fffe87e729d] /autofs/nccs-svm1_home2/hagertnl/Scratch/vadd_hip_mpi/./hip_vadd() [0x20c94a] MPICH ERROR [Rank 1] [job id 39817.0] [Wed Jul 12 14:06:11 2023] [borg006] - Abort(1): Internal error
Workarounds: • Use cray-mpich >= 8.1.26 • Use ROCm < 5.5.0 • Disable GPU-aware MPI with export MPICH_GPU_SUPPORT_ENABLED=0
- MODULE SETTING
module reset
ml hsi/5.0.2.p5
ml gcc/9.3.0
ml openmpi/4.0.4
ml DefApps
ml cmake/3.22.2
ml git-lfs/2.11.0
ml python/3.7-anaconda3
- CREATE CONDA ENVIRONMENT Download Anaconda
wget https://repo.anaconda.com/archive/Anaconda3-2023.09-0-Linux-x86_64.sh
- Create base conda environment
bash Anaconda3-2023.09-0-Linux-x86_64.sh -p conda_frontier_dir
source conda_andes_dir/bin/activate
# Max: higher version of python cause issues with linking to MKL libraries
- Create environment for HydraGNN dependencies
conda create -n hydragnn python=3.8
source activate hydragnn
- Uninstall pre-loaded version of MPI
conda uninstall -y mpi
- Install package dependencies
conda inbstall nomkl
conda install -y ninja
conda install -y astunparse expecttest hypothesis numpy psutil pyyaml requests setuptools
conda install -y typing-extensions sympy filelock networkx jinja2 tqdm
conda install -y -c conda-forge types-dataclasses
conda install -y pyparsing build
conda install -y cython
conda install -y scipy scikit-learn tensorboard
conda install -y -c conda-forge ase
conda install -y -c conda-forge rdkit
pip install mendeleev
pip install pymatgen # Max: conda-forge was spinning all time
pip install jarvis-tools
7. Install PyTorch-Geometric dependencies
pip install torch-scatter
pip install torch-sparse
pip install torch-cluster
pip install torch-spline-conv
pip install torch-geometric
- Use the following command to install per each directory
pip install . –verbose
- INSTALL MPI4PY from source
git clone -b 3.1.5 https://github.com/mpi4py/mpi4py.git
CC=mpicc CXX=mpicxx FC=mpifort pip install . --verbose
- INSTALL ADIOS
ADIOS_DIRPATH = …..
- Checkout from GitHub repository
git clone -b v2.8.3 git@github.com:ornladios/ADIOS2.git
- Install with CMAKE
mkdir build && cd build
CC=mpicc CXX=mpicxx FC=mpifort \
cmake -DCMAKE_INSTALL_PREFIX=ADIOS_DIRPATH/install \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_TESTING=OFF \
-DADIOS2_USE_MPI=ON \
-DADIOS2_USE_Fortran=ON \
-DADIOS2_BUILD_EXAMPLES_EXPERIMENTAL=OFF \
-DADIOS2_BUILD_TESTING=OFF \
-DADIOS2_USE_HDF5=OFF \
-DADIOS2_USE_SST=OFF \
-DADIOS2_USE_BZip2=OFF \
-DADIOS2_USE_PNG=OFF \
-DADIOS2_USE_DataSpaces=OFF \
-DADIOS2_USE_Python=ON \
-DPython_EXECUTABLE=`which python` \
../ADIOS2
- install build in exportable path for python
make install
- Check that ADIOS binary files for pip have been correctly installed
ls ADIOS_DIRPATH/install/lib/python3.8/site-packages/
- Export path of ADIOS installation (this must be exported every time you submit a job)
export PYTHONPATH=ADIOS_DIRPATH/install/lib/python3.11/site-packages:$PYTHONPATH