From 5d9c75cbf6da0b1f5a2f5d8a60390fa3c17925ab Mon Sep 17 00:00:00 2001 From: Adam DeConinck Date: Thu, 28 Apr 2022 15:49:38 +0000 Subject: [PATCH] Update NVIDIA signing key for package repos See https://developer.nvidia.com/blog/updating-the-cuda-linux-gpg-repository-key/ for details on the key changes. This commit: - Removes the nvidia-ml repo, which is deprecated and will not be updated - Updates the nvidia_cuda and nvidia_dcgm roles to use the new key and install workflow - Updates roles/requirements.txt to point to an updated version of nvidia.nvidia_driver --- playbooks/nvidia-software/nvidia-ml.yml | 21 -------------- roles/nvidia-ml/defaults/main.yml | 10 ------- roles/nvidia-ml/tasks/main.yml | 30 -------------------- roles/nvidia-ml/tasks/redhat-pre-install.yml | 18 ------------ roles/nvidia-ml/tasks/ubuntu-pre-install.yml | 10 ------- roles/nvidia-ml/vars/main.yml | 2 -- roles/nvidia_cuda/defaults/main.yml | 7 +++-- roles/nvidia_cuda/files/cuda-ubuntu.pin | 3 -- roles/nvidia_cuda/tasks/install-ubuntu.yml | 27 ++++++++---------- roles/nvidia_dcgm/defaults/main.yml | 7 +++-- roles/nvidia_dcgm/files/cuda-ubuntu.pin | 3 -- roles/nvidia_dcgm/tasks/install-ubuntu.yml | 27 ++++++++---------- roles/requirements.yml | 2 +- 13 files changed, 33 insertions(+), 134 deletions(-) delete mode 100644 playbooks/nvidia-software/nvidia-ml.yml delete mode 100644 roles/nvidia-ml/defaults/main.yml delete mode 100644 roles/nvidia-ml/tasks/main.yml delete mode 100644 roles/nvidia-ml/tasks/redhat-pre-install.yml delete mode 100644 roles/nvidia-ml/tasks/ubuntu-pre-install.yml delete mode 100644 roles/nvidia-ml/vars/main.yml delete mode 100644 roles/nvidia_cuda/files/cuda-ubuntu.pin delete mode 100644 roles/nvidia_dcgm/files/cuda-ubuntu.pin diff --git a/playbooks/nvidia-software/nvidia-ml.yml b/playbooks/nvidia-software/nvidia-ml.yml deleted file mode 100644 index 407bf82c8..000000000 --- a/playbooks/nvidia-software/nvidia-ml.yml +++ /dev/null @@ -1,21 +0,0 @@ ---- -- hosts: all - become: true - tasks: - - name: Install NVIDIA Machine Learning packages - include_role: - name: nvidia-ml - tags: - - install - -- hosts: all - become: true - tasks: - - name: Remove NVIDIA Machine Learning packages - include_role: - name: nvidia-ml - vars: - nvidia_ml_package_state: absent - tags: - - never - - remove diff --git a/roles/nvidia-ml/defaults/main.yml b/roles/nvidia-ml/defaults/main.yml deleted file mode 100644 index 2a9aadcea..000000000 --- a/roles/nvidia-ml/defaults/main.yml +++ /dev/null @@ -1,10 +0,0 @@ -nvidia_ml_package_state: present -nvidia_cudnn_package_version: '' -nvidia_nccl_package_version: '' -rhel_nvidiaml_gpgkey: "https://developer.download.nvidia.com/compute/machine-learning/repos/{{ rhel_repo_dir }}/7fa2af80.pub" -rhel_nvidiaml_baseurl: "https://developer.download.nvidia.com/compute/machine-learning/repos/{{ rhel_repo_dir }}/" -epel_package: "https://dl.fedoraproject.org/pub/epel/epel-release-latest-{{ ansible_distribution_major_version }}.noarch.rpm" -epel_key_url: "https://getfedora.org/static/fedora.gpg" -ubuntu_nvidiaml_gpgkey: "https://developer.download.nvidia.com/compute/machine-learning/repos/{{ ubuntu_repo_dir }}/7fa2af80.pub" -ubuntu_nvidiaml_baseurl: "http://developer.download.nvidia.com/compute/machine-learning/repos/{{ ubuntu_repo_dir }}" -ubuntu_nvidiaml_keyid: "7fa2af80" diff --git a/roles/nvidia-ml/tasks/main.yml b/roles/nvidia-ml/tasks/main.yml deleted file mode 100644 index f2998b378..000000000 --- a/roles/nvidia-ml/tasks/main.yml +++ /dev/null @@ -1,30 +0,0 @@ ---- -- name: ubuntu pre-install tasks - include_tasks: ubuntu-pre-install.yml - when: ansible_distribution == 'Ubuntu' - -- name: redhat family pre-install tasks - include_tasks: redhat-pre-install.yml - when: ansible_os_family == 'RedHat' - -- name: Manage cuDNN and NCCL packages - package: - name: "{{ item }}" - state: "{{ nvidia_ml_package_state }}" - with_items: - - "{{ nvidia_cudnn_package_version | ternary('libcudnn7='+nvidia_cudnn_package_version, 'libcudnn7') }}" - - "{{ nvidia_cudnn_package_version | ternary('libcudnn7-dev='+nvidia_cudnn_package_version, 'libcudnn7-dev') }}" - - "{{ nvidia_nccl_package_version | ternary('libnccl2='+nvidia_nccl_package_version, 'libnccl2') }}" - - "{{ nvidia_nccl_package_version | ternary('libnccl-dev='+nvidia_nccl_package_version, 'libnccl-dev') }}" - when: ansible_os_family == "Debian" - -- name: Manage cuDNN and NCCL packages - package: - name: "{{ item }}" - state: "{{ nvidia_ml_package_state }}" - with_items: - - "{{ nvidia_cudnn_package_version | ternary('libcudnn7='+nvidia_cudnn_package_version, 'libcudnn7') }}" - - "{{ nvidia_cudnn_package_version | ternary('libcudnn7-devel='+nvidia_cudnn_package_version, 'libcudnn7-devel') }}" - - "{{ nvidia_nccl_package_version | ternary('libnccl='+nvidia_nccl_package_version, 'libnccl') }}" - - "{{ nvidia_nccl_package_version | ternary('libnccl-devel='+nvidia_nccl_package_version, 'libnccl-devel') }}" - when: ansible_os_family == "RedHat" diff --git a/roles/nvidia-ml/tasks/redhat-pre-install.yml b/roles/nvidia-ml/tasks/redhat-pre-install.yml deleted file mode 100644 index 97020db80..000000000 --- a/roles/nvidia-ml/tasks/redhat-pre-install.yml +++ /dev/null @@ -1,18 +0,0 @@ ---- -- name: trust GPG key for EPEL - rpm_key: - key: "{{ epel_key_url }}" - state: present - -- name: add epel repo - yum: - name: - - "{{ epel_package }}" - state: present - -- name: add repo - yum_repository: - name: machine-learning - description: NVIDIA Machine Learning YUM Repo - gpgkey: "{{ rhel_nvidiaml_gpgkey }}" - baseurl: "{{ rhel_nvidiaml_baseurl }}" diff --git a/roles/nvidia-ml/tasks/ubuntu-pre-install.yml b/roles/nvidia-ml/tasks/ubuntu-pre-install.yml deleted file mode 100644 index aa8d76c98..000000000 --- a/roles/nvidia-ml/tasks/ubuntu-pre-install.yml +++ /dev/null @@ -1,10 +0,0 @@ ---- -- name: add key - apt_key: - url: "{{ ubuntu_nvidiaml_gpgkey }}" - id: "{{ ubuntu_nvidiaml_keyid }}" - -- name: add repo - apt_repository: - repo: "deb {{ ubuntu_nvidiaml_baseurl }} /" - update_cache: yes diff --git a/roles/nvidia-ml/vars/main.yml b/roles/nvidia-ml/vars/main.yml deleted file mode 100644 index 6a48d9568..000000000 --- a/roles/nvidia-ml/vars/main.yml +++ /dev/null @@ -1,2 +0,0 @@ -ubuntu_repo_dir: "{{ ansible_distribution | lower }}{{ ansible_distribution_version | replace('.', '') }}/{{ ansible_architecture }}" -rhel_repo_dir: "rhel{{ ansible_distribution_major_version }}/{{ ansible_architecture }}" diff --git a/roles/nvidia_cuda/defaults/main.yml b/roles/nvidia_cuda/defaults/main.yml index 628eb5675..0a4f19c75 100644 --- a/roles/nvidia_cuda/defaults/main.yml +++ b/roles/nvidia_cuda/defaults/main.yml @@ -21,9 +21,10 @@ cuda_toolkit_add_profile_script: yes epel_package: "https://dl.fedoraproject.org/pub/epel/epel-release-latest-{{ ansible_distribution_major_version }}.noarch.rpm" epel_key_url: "https://getfedora.org/static/fedora.gpg" nvidia_driver_rhel_cuda_repo_baseurl: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/" -nvidia_driver_rhel_cuda_repo_gpgkey: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/7fa2af80.pub" +nvidia_driver_rhel_cuda_repo_gpgkey: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/D42D0685.pub" # Ubuntu -nvidia_driver_ubuntu_cuda_repo_gpgkey_url: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}/7fa2af80.pub" -nvidia_driver_ubuntu_cuda_repo_gpgkey_id: "7fa2af80" +old_nvidia_driver_ubuntu_cuda_repo_gpgkey_id: "7fa2af80" nvidia_driver_ubuntu_cuda_repo_baseurl: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}" +nvidia_driver_ubuntu_cuda_keyring_package: "cuda-keyring_1.0-1_all.deb" +nvidia_driver_ubuntu_cuda_keyring_url: "{{ nvidia_driver_ubuntu_cuda_repo_baseurl }}/{{ nvidia_driver_ubuntu_cuda_keyring_package }}" diff --git a/roles/nvidia_cuda/files/cuda-ubuntu.pin b/roles/nvidia_cuda/files/cuda-ubuntu.pin deleted file mode 100644 index b050308d8..000000000 --- a/roles/nvidia_cuda/files/cuda-ubuntu.pin +++ /dev/null @@ -1,3 +0,0 @@ -Package: * -Pin: release l=NVIDIA CUDA -Pin-Priority: 600 diff --git a/roles/nvidia_cuda/tasks/install-ubuntu.yml b/roles/nvidia_cuda/tasks/install-ubuntu.yml index 4bd5f7b36..453c20b06 100644 --- a/roles/nvidia_cuda/tasks/install-ubuntu.yml +++ b/roles/nvidia_cuda/tasks/install-ubuntu.yml @@ -4,25 +4,22 @@ repo: ppa:graphics-drivers/ppa state: absent -- name: Ubuntu | add pin file - copy: - src: "cuda-ubuntu.pin" - dest: "/etc/apt/preferences.d/cuda-repository-pin-600" - owner: "root" - group: "root" - mode: "0644" - -- name: Ubuntu | add key +- name: Ubuntu | ensure old key is absent apt_key: - url: "{{ nvidia_driver_ubuntu_cuda_repo_gpgkey_url }}" - id: "{{ nvidia_driver_ubuntu_cuda_repo_gpgkey_id }}" + id: "{{ old_nvidia_driver_ubuntu_cuda_repo_gpgkey_id }}" + state: "absent" + +- name: Ubuntu | install CUDA keyring + apt: + deb: "{{ nvidia_driver_ubuntu_cuda_keyring_url }}" + state: "present" environment: "{{ proxy_env if proxy_env is defined else {} }}" -- name: Ubuntu | add CUDA repo - apt_repository: - repo: "deb {{ nvidia_driver_ubuntu_cuda_repo_baseurl }} /" - update_cache: yes +- name: Ubuntu | force apt update + apt: + update_cache: true environment: "{{ proxy_env if proxy_env is defined else {} }}" + changed_when: false - name: Ubuntu | install cuda package: diff --git a/roles/nvidia_dcgm/defaults/main.yml b/roles/nvidia_dcgm/defaults/main.yml index 262a4e35b..691902a3a 100644 --- a/roles/nvidia_dcgm/defaults/main.yml +++ b/roles/nvidia_dcgm/defaults/main.yml @@ -5,9 +5,10 @@ dcgm_pkg_name: "datacenter-gpu-manager" epel_package: "https://dl.fedoraproject.org/pub/epel/epel-release-latest-{{ ansible_distribution_major_version }}.noarch.rpm" epel_key_url: "https://getfedora.org/static/fedora.gpg" nvidia_driver_rhel_cuda_repo_baseurl: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/" -nvidia_driver_rhel_cuda_repo_gpgkey: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/7fa2af80.pub" +nvidia_driver_rhel_cuda_repo_gpgkey: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/D42D0685.pub" # Ubuntu -nvidia_driver_ubuntu_cuda_repo_gpgkey_url: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}/7fa2af80.pub" -nvidia_driver_ubuntu_cuda_repo_gpgkey_id: "7fa2af80" +old_nvidia_driver_ubuntu_cuda_repo_gpgkey_id: "7fa2af80" nvidia_driver_ubuntu_cuda_repo_baseurl: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}" +nvidia_driver_ubuntu_cuda_keyring_package: "cuda-keyring_1.0-1_all.deb" +nvidia_driver_ubuntu_cuda_keyring_url: "{{ nvidia_driver_ubuntu_cuda_repo_baseurl }}/{{ nvidia_driver_ubuntu_cuda_keyring_package }}" diff --git a/roles/nvidia_dcgm/files/cuda-ubuntu.pin b/roles/nvidia_dcgm/files/cuda-ubuntu.pin deleted file mode 100644 index b050308d8..000000000 --- a/roles/nvidia_dcgm/files/cuda-ubuntu.pin +++ /dev/null @@ -1,3 +0,0 @@ -Package: * -Pin: release l=NVIDIA CUDA -Pin-Priority: 600 diff --git a/roles/nvidia_dcgm/tasks/install-ubuntu.yml b/roles/nvidia_dcgm/tasks/install-ubuntu.yml index 4155bc4b8..ec9a1255b 100644 --- a/roles/nvidia_dcgm/tasks/install-ubuntu.yml +++ b/roles/nvidia_dcgm/tasks/install-ubuntu.yml @@ -1,23 +1,20 @@ --- -- name: Ubuntu | add pin file - copy: - src: "cuda-ubuntu.pin" - dest: "/etc/apt/preferences.d/cuda-repository-pin-600" - owner: "root" - group: "root" - mode: "0644" - -- name: Ubuntu | add key +- name: Ubuntu | remove old key apt_key: - url: "{{ nvidia_driver_ubuntu_cuda_repo_gpgkey_url }}" - id: "{{ nvidia_driver_ubuntu_cuda_repo_gpgkey_id }}" + id: "{{ old_nvidia_driver_ubuntu_cuda_repo_gpgkey_id }}" + state: "absent" + +- name: Ubuntu | install CUDA keyring + apt: + deb: "{{ nvidia_driver_ubuntu_cuda_keyring_url }}" + state: "present" environment: "{{ proxy_env if proxy_env is defined else {} }}" -- name: Ubuntu | add CUDA repo - apt_repository: - repo: "deb {{ nvidia_driver_ubuntu_cuda_repo_baseurl }} /" - update_cache: yes +- name: Ubuntu | force apt update + apt: + update_cache: true environment: "{{ proxy_env if proxy_env is defined else {} }}" + changed_when: false - name: Ubuntu | install package apt: diff --git a/roles/requirements.yml b/roles/requirements.yml index 727b34f78..7dfa13ff8 100644 --- a/roles/requirements.yml +++ b/roles/requirements.yml @@ -30,7 +30,7 @@ roles: version: "v5.2.6" - src: nvidia.nvidia_driver - version: "v2.2.0" + version: "v2.2.1" - src: nvidia.nvidia_docker version: "v1.2.4"