diff --git a/kube/services/argo-events/workflows/configmap.yaml b/kube/services/argo-events/workflows/configmap.yaml index f57ae07d0..c084533fe 100644 --- a/kube/services/argo-events/workflows/configmap.yaml +++ b/kube/services/argo-events/workflows/configmap.yaml @@ -4,119 +4,44 @@ metadata: name: karpenter-templates namespace: argo-events data: - provisioner.yaml: | - apiVersion: karpenter.sh/v1alpha5 - kind: Provisioner + nodeclass.yaml: | + apiVersion: karpenter.k8s.aws/v1beta1 + kind: EC2NodeClass metadata: name: workflow-WORKFLOW_NAME spec: - requirements: - - key: karpenter.sh/capacity-type - operator: In - values: ["on-demand"] - - key: kubernetes.io/arch - operator: In - values: - - amd64 - - key: node.kubernetes.io/instance-type - operator: In - values: - - c6a.large - - c6a.xlarge - - c6a.2xlarge - - c6a.4xlarge - - c6a.8xlarge - - c6a.12xlarge - - c7a.large - - c7a.xlarge - - c7a.2xlarge - - c7a.4xlarge - - c7a.8xlarge - - c7a.12xlarge - - c6i.large - - c6i.xlarge - - c6i.2xlarge - - c6i.4xlarge - - c6i.8xlarge - - c6i.12xlarge - - c7i.large - - c7i.xlarge - - c7i.2xlarge - - c7i.4xlarge - - c7i.8xlarge - - c7i.12xlarge - - m6a.2xlarge - - m6a.4xlarge - - m6a.8xlarge - - m6a.12xlarge - - m6a.16xlarge - - m6a.24xlarge - - m7a.2xlarge - - m7a.4xlarge - - m7a.8xlarge - - m7a.12xlarge - - m7a.16xlarge - - m7a.24xlarge - - m6i.2xlarge - - m6i.4xlarge - - m6i.8xlarge - - m6i.12xlarge - - m6i.16xlarge - - m6i.24xlarge - - m7i.2xlarge - - m7i.4xlarge - - m7i.8xlarge - - m7i.12xlarge - - m7i.16xlarge - - m7i.24xlarge - - r7iz.2xlarge - - r7iz.4xlarge - - r7iz.8xlarge - - r7iz.12xlarge - - r7iz.16xlarge - - r7iz.24xlarge - taints: - - key: role - value: WORKFLOW_NAME - effect: NoSchedule - labels: - role: WORKFLOW_NAME - purpose: workflow - limits: - resources: - cpu: 4000 - providerRef: - name: workflow-WORKFLOW_NAME - # Kill nodes after 2 days to ensure they stay up to date - ttlSecondsUntilExpired: 172800 - ttlSecondsAfterEmpty: 10 - - nodetemplate.yaml: | - apiVersion: karpenter.k8s.aws/v1alpha1 - kind: AWSNodeTemplate - metadata: - name: workflow-WORKFLOW_NAME - spec: - amiSelector: - aws::name: EKS-FIPS* - aws::owners: "143731057154" - subnetSelector: - karpenter.sh/discovery: ENVIRONMENT - securityGroupSelector: - karpenter.sh/discovery: ENVIRONMENT-workflow - tags: - Environment: ENVIRONMENT - Name: eks-ENVIRONMENT-workflow-karpenter - karpenter.sh/discovery: ENVIRONMENT - workflowname: WORKFLOW_NAME - gen3username: GEN3_USERNAME - gen3service: argo-workflows - purpose: workflow + amiFamily: AL2 + amiSelectorTerms: + - name: 1-31-EKS-FIPS* + owner: "143731057154" + blockDeviceMappings: + - deviceName: /dev/xvda + ebs: + deleteOnTermination: true + encrypted: true + volumeSize: 100Gi + volumeType: gp2 metadataOptions: httpEndpoint: enabled httpProtocolIPv6: disabled httpPutResponseHopLimit: 2 httpTokens: optional + role: eks_ENVIRONMENT_workers_role + securityGroupSelectorTerms: + - tags: + karpenter.sh/discovery: ENVIRONMENT-workflow + subnetSelectorTerms: + - tags: + karpenter.sh/discovery: ENVIRONMENT + tags: + Environment: ENVIRONMENT + Name: eks-ENVIRONMENT-workflow-karpenter + gen3service: argo-workflows + gen3username: GEN3_USERNAME + gen3teamproject: "GEN3_TEAMNAME" + karpenter.sh/discovery: ENVIRONMENT + purpose: workflow + workflowname: WORKFLOW_NAME userData: | MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="BOUNDARY" @@ -133,10 +58,98 @@ data: sysctl -w fs.inotify.max_user_watches=12000 --BOUNDARY-- - blockDeviceMappings: - - deviceName: /dev/xvda - ebs: - volumeSize: 100Gi - volumeType: gp2 - encrypted: true - deleteOnTermination: true + + nodepool.yaml: | + apiVersion: karpenter.sh/v1beta1 + kind: NodePool + metadata: + name: workflow-WORKFLOW_NAME + spec: + disruption: + consolidateAfter: 10s + consolidationPolicy: WhenEmpty + expireAfter: 48h0m0s + limits: + cpu: 4k + template: + metadata: + labels: + purpose: workflow + role: WORKFLOW_NAME + spec: + nodeClassRef: + name: workflow-WORKFLOW_NAME + requirements: + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + - key: kubernetes.io/arch + operator: In + values: + - amd64 + - key: node.kubernetes.io/instance-type + operator: In + values: + - c6a.large + - c6a.xlarge + - c6a.2xlarge + - c6a.4xlarge + - c6a.8xlarge + - c6a.12xlarge + - c7a.large + - c7a.xlarge + - c7a.2xlarge + - c7a.4xlarge + - c7a.8xlarge + - c7a.12xlarge + - c6i.large + - c6i.xlarge + - c6i.2xlarge + - c6i.4xlarge + - c6i.8xlarge + - c6i.12xlarge + - c7i.large + - c7i.xlarge + - c7i.2xlarge + - c7i.4xlarge + - c7i.8xlarge + - c7i.12xlarge + - m6a.2xlarge + - m6a.4xlarge + - m6a.8xlarge + - m6a.12xlarge + - m6a.16xlarge + - m6a.24xlarge + - m7a.2xlarge + - m7a.4xlarge + - m7a.8xlarge + - m7a.12xlarge + - m7a.16xlarge + - m7a.24xlarge + - m6i.2xlarge + - m6i.4xlarge + - m6i.8xlarge + - m6i.12xlarge + - m6i.16xlarge + - m6i.24xlarge + - m7i.2xlarge + - m7i.4xlarge + - m7i.8xlarge + - m7i.12xlarge + - m7i.16xlarge + - m7i.24xlarge + - r7iz.2xlarge + - r7iz.4xlarge + - r7iz.8xlarge + - r7iz.12xlarge + - r7iz.16xlarge + - r7iz.24xlarge + - key: kubernetes.io/os + operator: In + values: + - linux + taints: + - effect: NoSchedule + key: role + value: WORKFLOW_NAME diff --git a/kube/services/argo-events/workflows/sensor-completed.yaml b/kube/services/argo-events/workflows/sensor-completed.yaml index 293c0e119..e483d3297 100644 --- a/kube/services/argo-events/workflows/sensor-completed.yaml +++ b/kube/services/argo-events/workflows/sensor-completed.yaml @@ -51,12 +51,12 @@ spec: args: - "-c" - | - if kubectl get awsnodetemplate workflow-$WORKFLOW_NAME >/dev/null 2>&1; then - kubectl delete awsnodetemplate workflow-$WORKFLOW_NAME + if kubectl get nodepool workflow-$WORKFLOW_NAME >/dev/null 2>&1; then + kubectl delete nodepool workflow-$WORKFLOW_NAME fi - if kubectl get provisioner workflow-$WORKFLOW_NAME >/dev/null 2>&1; then - kubectl delete provisioners workflow-$WORKFLOW_NAME + if kubectl get ec2nodeclass workflow-$WORKFLOW_NAME >/dev/null 2>&1; then + kubectl delete ec2nodeclass workflow-$WORKFLOW_NAME fi env: - name: WORKFLOW_NAME diff --git a/kube/services/argo-events/workflows/sensor-created.yaml b/kube/services/argo-events/workflows/sensor-created.yaml index 9f6de2c83..05da3bc38 100644 --- a/kube/services/argo-events/workflows/sensor-created.yaml +++ b/kube/services/argo-events/workflows/sensor-created.yaml @@ -36,6 +36,10 @@ spec: dependencyName: workflow-created-event dataKey: body.metadata.labels.gen3username dest: spec.template.spec.containers.0.env.1.value + - src: + dependencyName: workflow-created-event + dataKey: body.metadata.labels.gen3teamproject + dest: spec.template.spec.containers.0.env.2.value source: resource: apiVersion: batch/v1 @@ -60,36 +64,38 @@ spec: - "-c" - | #!/bin/bash - if [ -z "$PROVISIONER_TEMPLATE" ]; then - PROVISIONER_TEMPLATE="provisioner.yaml" - fi - if [ -z "$AWSNODETEMPLATE_TEMPLATE" ]; then - AWSNODETEMPLATE_TEMPLATE="nodetemplate.yaml" + if [ -z "$NODEPOOL_TEMPLATE" ]; then + NODEPOOL_TEMPLATE="/manifests/nodepool.yaml" fi + if [ -z "$NODECLASS_TEMPLATE" ]; then + NODECLASS_TEMPLATE="/manifests/nodeclass.yaml" + fi - if ! kubectl get awsnodetemplate workflow-$WORKFLOW_NAME >/dev/null 2>&1; then - sed -e "s/WORKFLOW_NAME/$WORKFLOW_NAME/" -e "s/GEN3_USERNAME/$GEN3_USERNAME/" -e "s/ENVIRONMENT/$ENVIRONMENT/" "$AWSNODETEMPLATE_TEMPLATE" | kubectl apply -f - + if ! kubectl get ec2nodeclass workflow-$WORKFLOW_NAME >/dev/null 2>&1; then + sed -e "s/WORKFLOW_NAME/$WORKFLOW_NAME/" -e "s/GEN3_USERNAME/$GEN3_USERNAME/" -e "s/GEN3_TEAMNAME/$GEN3_TEAMNAME/" -e "s/ENVIRONMENT/$ENVIRONMENT/" "$NODECLASS_TEMPLATE" | kubectl apply -f - fi - if ! kubectl get provisioner workflow-$WORKFLOW_NAME >/dev/null 2>&1; then - sed -e "s/WORKFLOW_NAME/$WORKFLOW_NAME/" -e "s/GEN3_USERNAME/$GEN3_USERNAME/" -e "s/ENVIRONMENT/$ENVIRONMENT/" "$PROVISIONER_TEMPLATE" | kubectl apply -f - + if ! kubectl get nodepool workflow-$WORKFLOW_NAME >/dev/null 2>&1; then + sed -e "s/WORKFLOW_NAME/$WORKFLOW_NAME/" -e "s/GEN3_USERNAME/$GEN3_USERNAME/" -e "s/GEN3_TEAMNAME/$GEN3_TEAMNAME/" -e "s/ENVIRONMENT/$ENVIRONMENT/" "$NODEPOOL_TEMPLATE" | kubectl apply -f - fi env: - name: WORKFLOW_NAME value: "" - name: GEN3_USERNAME value: "" + - name: GEN3_TEAMNAME + value: "" - name: ENVIRONMENT valueFrom: configMapKeyRef: name: environment key: environment - - name: PROVISIONER_TEMPLATE - value: /manifests/provisioner.yaml - - name: AWSNODETEMPLATE_TEMPLATE - value: /manifests/nodetemplate.yaml + - name: NODEPOOL_TEMPLATE + value: /manifests/nodepool.yaml + - name: NODECLASS_TEMPLATE + value: /manifests/nodeclass.yaml volumeMounts: - name: karpenter-templates-volume mountPath: /manifests diff --git a/kube/services/argo-events/workflows/sensor-deleted.yaml b/kube/services/argo-events/workflows/sensor-deleted.yaml index c235a820a..0b12cb118 100644 --- a/kube/services/argo-events/workflows/sensor-deleted.yaml +++ b/kube/services/argo-events/workflows/sensor-deleted.yaml @@ -47,12 +47,12 @@ spec: args: - "-c" - | - if kubectl get awsnodetemplate workflow-$WORKFLOW_NAME >/dev/null 2>&1; then - kubectl delete awsnodetemplate workflow-$WORKFLOW_NAME + if kubectl get nodepool workflow-$WORKFLOW_NAME >/dev/null 2>&1; then + kubectl delete nodepool workflow-$WORKFLOW_NAME fi - if kubectl get provisioner workflow-$WORKFLOW_NAME >/dev/null 2>&1; then - kubectl delete provisioners workflow-$WORKFLOW_NAME + if kubectl get ec2nodeclass workflow-$WORKFLOW_NAME >/dev/null 2>&1; then + kubectl delete ec2nodeclass workflow-$WORKFLOW_NAME fi env: - name: WORKFLOW_NAME diff --git a/kube/services/karpenter-reconciler/karpenter-reconciler-cronjob-va-testing.yaml b/kube/services/karpenter-reconciler/karpenter-reconciler-cronjob-va-testing.yaml deleted file mode 100644 index aaba57b07..000000000 --- a/kube/services/karpenter-reconciler/karpenter-reconciler-cronjob-va-testing.yaml +++ /dev/null @@ -1,71 +0,0 @@ -apiVersion: batch/v1 -kind: CronJob -metadata: - name: karpenter-reconciler-cronjob-va-testing - namespace: argo-events -spec: - schedule: "*/5 * * * *" - jobTemplate: - spec: - template: - metadata: - labels: - app: gen3job - spec: - serviceAccount: karpenter-reconciler - volumes: - - name: karpenter-templates-volume - configMap: - name: karpenter-templates - containers: - - name: karpenter-reconciler - image: quay.io/cdis/awshelper - volumeMounts: - - name: karpenter-templates-volume - mountPath: /manifests - env: - - name: PROVISIONER_TEMPLATE - value: /manifests/provisioner.yaml - - name: AWSNODETEMPLATE_TEMPLATE - value: /manifests/nodetemplate.yaml - command: ["/bin/bash"] - args: - - "-c" - - | - #!/bin/bash - if [ -z "$PROVISIONER_TEMPLATE" ]; then - PROVISIONER_TEMPLATE="provisioner.yaml" - fi - - if [ -z "$AWSNODETEMPLATE_TEMPLATE" ]; then - AWSNODETEMPLATE_TEMPLATE="nodetemplate.yaml" - fi - - ENVIRONMENT=$(kubectl -n va-testing get configmap global -o jsonpath="{.data.environment}") - - WORKFLOWS=$(kubectl get workflows -n argo -o=jsonpath='{range .items[*]}{.metadata.name}{" "}{.metadata.labels.gen3username}{"\n"}') - - WORKFLOW_ARRAY=() - - while IFS= read -r line; do - WORKFLOW_ARRAY+=("$line") - done <<< "$WORKFLOWS" - - for workflow in "${WORKFLOW_ARRAY[@]}" - do - echo "Running loop for workflow: $workflow" - workflow_name=$(echo "$workflow" | awk '{print $1}') - workflow_user=$(echo "$workflow" | awk '{print $2}') - - if ! kubectl get awsnodetemplate workflow-$workflow_name >/dev/null 2>&1; then - echo "No awsnodetemplate found for ${workflow_name}, creating one" - sed -e "s/WORKFLOW_NAME/$workflow_name/" -e "s/GEN3_USERNAME/$workflow_user/" -e "s/ENVIRONMENT/$ENVIRONMENT/" "$AWSNODETEMPLATE_TEMPLATE" | kubectl apply -f - - fi - - if ! kubectl get provisioner workflow-$workflow_name >/dev/null 2>&1; then - echo "No provisioner found for ${workflow_name}, creating one" - sed -e "s/WORKFLOW_NAME/$workflow_name/" -e "s/GEN3_USERNAME/$workflow_user/" -e "s/ENVIRONMENT/$ENVIRONMENT/" "$PROVISIONER_TEMPLATE" | kubectl apply -f - - - fi - done - restartPolicy: OnFailure diff --git a/kube/services/karpenter-reconciler/karpenter-reconciler-cronjob.yaml b/kube/services/karpenter-reconciler/karpenter-reconciler-cronjob.yaml index aef5d6c49..3c6d58768 100644 --- a/kube/services/karpenter-reconciler/karpenter-reconciler-cronjob.yaml +++ b/kube/services/karpenter-reconciler/karpenter-reconciler-cronjob.yaml @@ -24,26 +24,26 @@ spec: - name: karpenter-templates-volume mountPath: /manifests env: - - name: PROVISIONER_TEMPLATE - value: /manifests/provisioner.yaml - - name: AWSNODETEMPLATE_TEMPLATE - value: /manifests/nodetemplate.yaml + - name: NODEPOOL_TEMPLATE + value: /manifests/nodepool.yaml + - name: NODECLASS_TEMPLATE + value: /manifests/nodeclass.yaml command: ["/bin/bash"] args: - "-c" - | #!/bin/bash - if [ -z "$PROVISIONER_TEMPLATE" ]; then - PROVISIONER_TEMPLATE="provisioner.yaml" + if [ -z "$NODEPOOL_TEMPLATE" ]; then + NODEPOOL_TEMPLATE="/manifests/provisioner.yaml" fi - if [ -z "$AWSNODETEMPLATE_TEMPLATE" ]; then - AWSNODETEMPLATE_TEMPLATE="nodetemplate.yaml" + if [ -z "$NODECLASS_TEMPLATE" ]; then + NODECLASS_TEMPLATE="/manifests/nodeclass.yaml" fi ENVIRONMENT=$(kubectl -n default get configmap global -o jsonpath="{.data.environment}") - WORKFLOWS=$(kubectl get workflows -n argo -o=jsonpath='{range .items[*]}{.metadata.name}{" "}{.metadata.labels.gen3username}{"\n"}') + WORKFLOWS=$(kubectl get workflows -n argo -o=jsonpath='{range .items[*]}{.metadata.name}{" "}{.metadata.labels.gen3username}{" "}{.metadata.labels.gen3teamproject}{"\n"}') WORKFLOW_ARRAY=() @@ -57,16 +57,17 @@ spec: do workflow_name=$(echo "$workflow" | awk '{print $1}') workflow_user=$(echo "$workflow" | awk '{print $2}') + workflow_team=$(echo "$workflow" | awk '{print $3}') if [ ! -z "$workflow_name" ]; then - if ! kubectl get awsnodetemplate workflow-$workflow_name >/dev/null 2>&1; then + if ! kubectl get ec2nodeclass workflow-$workflow_name >/dev/null 2>&1; then echo "No awsnodetemplate found for ${workflow_name}, creating one" - sed -e "s/WORKFLOW_NAME/$workflow_name/" -e "s/GEN3_USERNAME/$workflow_user/" -e "s/ENVIRONMENT/$ENVIRONMENT/" "$AWSNODETEMPLATE_TEMPLATE" | kubectl apply -f - + sed -e "s/WORKFLOW_NAME/$workflow_name/" -e "s/GEN3_TEAMNAME/$workflow_team/" -e "s/GEN3_USERNAME/$workflow_user/" -e "s/ENVIRONMENT/$ENVIRONMENT/" "$NODECLASS_TEMPLATE" | kubectl apply -f - fi - if ! kubectl get provisioner workflow-$workflow_name >/dev/null 2>&1; then + if ! kubectl get nodepool workflow-$workflow_name >/dev/null 2>&1; then echo "No provisioner found for ${workflow_name}, creating one" - sed -e "s/WORKFLOW_NAME/$workflow_name/" -e "s/GEN3_USERNAME/$workflow_user/" -e "s/ENVIRONMENT/$ENVIRONMENT/" "$PROVISIONER_TEMPLATE" | kubectl apply -f - + sed -e "s/WORKFLOW_NAME/$workflow_name/" -e "s/GEN3_TEAMNAME/$workflow_team/" -e "s/GEN3_USERNAME/$workflow_user/" -e "s/ENVIRONMENT/$ENVIRONMENT/" "$NODEPOOL_TEMPLATE" | kubectl apply -f - fi fi