Skip to content

Commit

Permalink
Add DRA Integration E2E test
Browse files Browse the repository at this point in the history
Signed-off-by: Vasilis Remmas <vremmas@nvidia.com>
  • Loading branch information
vasrem committed Apr 11, 2024
1 parent 202533c commit 19e6da4
Show file tree
Hide file tree
Showing 7 changed files with 134 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/kind-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ jobs:
working-directory: ./e2e
run: ./test-default-route1.sh

- name: Test DRA integration
working-directory: ./e2e
run: ./test-dra-integration.sh

- name: Export kind logs
if: always()
run: |
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
bin/
e2e/bin/
e2e/yamls/
e2e/repos/

# GOPATH created by the build script
gopath/
Expand Down
1 change: 1 addition & 0 deletions e2e/get_tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ curl -Lo ./bin/koko https://github.com/redhat-nfvpe/koko/releases/download/v0.83
chmod +x ./bin/koko
curl -Lo ./bin/jq https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64
chmod +x ./bin/jq
wget -qO- https://get.helm.sh/helm-v3.14.3-linux-amd64.tar.gz | tar xvzf - --strip-components=1 -C ./bin linux-amd64/helm
14 changes: 14 additions & 0 deletions e2e/setup_cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,21 @@ nodes:
nodeRegistration:
kubeletExtraArgs:
pod-manifest-path: "/etc/kubernetes/manifests/"
feature-gates: "DynamicResourceAllocation=true,KubeletPodResourcesDynamicResources=true"
- role: worker
# Required by DRA Integration
##
featureGates:
DynamicResourceAllocation: true
runtimeConfig:
"api/alpha": "true"
containerdConfigPatches:
# Enable CDI as described in
# https://github.com/container-orchestrated-devices/container-device-interface#containerd-configuration
- |-
[plugins."io.containerd.grpc.v1.cri"]
enable_cdi = true
##
EOF

# load multus image from container host to kind node
Expand Down
49 changes: 49 additions & 0 deletions e2e/templates/dra-integration.yml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
---
apiVersion: resource.k8s.io/v1alpha2
kind: ResourceClaimTemplate
metadata:
name: gpu.example.com
spec:
spec:
resourceClassName: gpu.example.com
---
apiVersion: "k8s.cni.cncf.io/v1"
kind: NetworkAttachmentDefinition
metadata:
name: dra-net
annotations:
k8s.v1.cni.cncf.io/resourceName: gpu.example.com
spec:
config: '{
"cniVersion": "{{ CNI_VERSION }}",
"plugins": [{
"name": "mynet",
"type": "dummy",
"ipam": {
"type": "host-local",
"subnet": "10.1.2.0/24"
}
}]
}'
---
apiVersion: v1
kind: Pod
metadata:
name: dra-integration
labels:
app: dra-integration
annotations:
k8s.v1.cni.cncf.io/networks: default/dra-net
spec:
containers:
- name: ctr0
image: ubuntu:22.04
command: ["bash", "-c"]
args: ["export; sleep 9999"]
resources:
claims:
- name: gpu
resourceClaims:
- name: gpu
source:
resourceClaimTemplateName: gpu.example.com
6 changes: 6 additions & 0 deletions e2e/templates/multus-daemonset-thick.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ spec:
- name: multus-daemon-config
mountPath: /etc/cni/net.d/multus.d
readOnly: true
- name: kubelet-pod-resources
mountPath: /var/lib/kubelet/pod-resources
readOnly: true
env:
- name: MULTUS_NODE_NAME
valueFrom:
Expand Down Expand Up @@ -187,6 +190,9 @@ spec:
- name: cnibin
hostPath:
path: /opt/cni/bin
- name: kubelet-pod-resources
hostPath:
path: /var/lib/kubelet/pod-resources
- name: multus-daemon-config
configMap:
name: multus-daemon-config
Expand Down
59 changes: 59 additions & 0 deletions e2e/test-dra-integration.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/bin/sh
set -o errexit

export PATH=${PATH}:./bin

# This test is using an example implementation of a DRA driver. This driver is mocking GPU resources. At our test we
# don't care about what these resources are. We want to ensure that such resource is correctly passed in the Pod using
# Multus configurations. A couple of notes:
# - We explitictly don't pin the revision of the dra-example-driver to a specific commit to ensure that the integration
# continues to work even when the dra-example-driver is updated (which may also indicate API changes on the DRA).
# - The chart and latest is image is not published somewhere, therefore we have to build locally. This leads to slower
# e2e suite runs.
echo "installing dra-example-driver"
repo_path="repos/dra-example-driver"

rm -rf $repo_path || true
git clone https://github.com/kubernetes-sigs/dra-example-driver.git ${repo_path}
${repo_path}/demo/build-driver.sh
KIND_CLUSTER_NAME=kind ${repo_path}/demo/scripts/load-driver-image-into-kind.sh
chart_path=${repo_path}/deployments/helm/dra-example-driver/
overriden_values_path=${chart_path}/overriden_values.yaml

# With the thick plugin, in kind, the primary network on the control plane is not working as expected. The pods are not
# able to communicate with the control plane and the error looks like this:
# failed to list *v1alpha2.PodSchedulingContext: Get "https://10.96.0.1:443/apis/resource.k8s.io/v1alpha2/podschedulingcontexts?limit=500&resourceVersion=0": dial tcp 10.96.0.1:443: connect: no route to host
# We override the values here to schedule the controller on the worker nodes where the network is working as expected.
cat <<EOF >> ${overriden_values_path}
controller:
nodeSelector: null
tolerations: null
EOF

helm install \
-n dra-example-driver \
--create-namespace \
-f ${overriden_values_path} \
dra-example-driver \
${chart_path}

echo "installing testing pods"
kubectl create -f yamls/dra-integration.yml
kubectl wait --for=condition=ready -l app=dra-integration --timeout=300s pod

echo "check dra-integration pod for DRA injected environment variable"

# We can validate that the resource is correctly injected by checking an environment variable this dra driver is injecting
# in the Pod.
# https://github.com/kubernetes-sigs/dra-example-driver/blob/be2b8b1db47b8c757440e955ce5ced88c23bfe86/cmd/dra-example-kubeletplugin/cdi.go#L71C20-L71C44
env_variable=$(kubectl exec dra-integration -- bash -c "echo \$DRA_RESOURCE_DRIVER_NAME | grep gpu.resource.example.com")
if [ $? -eq 0 ];then
echo "dra-integration pod has DRA injected environment variable"
else
echo "dra-integration pod doesn't have DRA injected environment variable"
exit 1
fi

echo "cleanup resources"
kubectl delete -f yamls/dra-integration.yml
helm uninstall -n dra-example-driver dra-example-driver

0 comments on commit 19e6da4

Please sign in to comment.