Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/team project cost tracking #2658

Merged
merged 23 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
237 changes: 125 additions & 112 deletions kube/services/argo-events/workflows/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,119 +4,44 @@ metadata:
name: karpenter-templates
namespace: argo-events
data:
provisioner.yaml: |
apiVersion: karpenter.sh/v1alpha5
kind: Provisioner
nodeclass.yaml: |
apiVersion: karpenter.k8s.aws/v1beta1
kind: EC2NodeClass
metadata:
name: workflow-WORKFLOW_NAME
spec:
requirements:
- key: karpenter.sh/capacity-type
operator: In
values: ["on-demand"]
- key: kubernetes.io/arch
operator: In
values:
- amd64
- key: node.kubernetes.io/instance-type
operator: In
values:
- c6a.large
- c6a.xlarge
- c6a.2xlarge
- c6a.4xlarge
- c6a.8xlarge
- c6a.12xlarge
- c7a.large
- c7a.xlarge
- c7a.2xlarge
- c7a.4xlarge
- c7a.8xlarge
- c7a.12xlarge
- c6i.large
- c6i.xlarge
- c6i.2xlarge
- c6i.4xlarge
- c6i.8xlarge
- c6i.12xlarge
- c7i.large
- c7i.xlarge
- c7i.2xlarge
- c7i.4xlarge
- c7i.8xlarge
- c7i.12xlarge
- m6a.2xlarge
- m6a.4xlarge
- m6a.8xlarge
- m6a.12xlarge
- m6a.16xlarge
- m6a.24xlarge
- m7a.2xlarge
- m7a.4xlarge
- m7a.8xlarge
- m7a.12xlarge
- m7a.16xlarge
- m7a.24xlarge
- m6i.2xlarge
- m6i.4xlarge
- m6i.8xlarge
- m6i.12xlarge
- m6i.16xlarge
- m6i.24xlarge
- m7i.2xlarge
- m7i.4xlarge
- m7i.8xlarge
- m7i.12xlarge
- m7i.16xlarge
- m7i.24xlarge
- r7iz.2xlarge
- r7iz.4xlarge
- r7iz.8xlarge
- r7iz.12xlarge
- r7iz.16xlarge
- r7iz.24xlarge
taints:
- key: role
value: WORKFLOW_NAME
effect: NoSchedule
labels:
role: WORKFLOW_NAME
purpose: workflow
limits:
resources:
cpu: 4000
providerRef:
name: workflow-WORKFLOW_NAME
# Kill nodes after 2 days to ensure they stay up to date
ttlSecondsUntilExpired: 172800
ttlSecondsAfterEmpty: 10

nodetemplate.yaml: |
apiVersion: karpenter.k8s.aws/v1alpha1
kind: AWSNodeTemplate
metadata:
name: workflow-WORKFLOW_NAME
spec:
amiSelector:
aws::name: EKS-FIPS*
aws::owners: "143731057154"
subnetSelector:
karpenter.sh/discovery: ENVIRONMENT
securityGroupSelector:
karpenter.sh/discovery: ENVIRONMENT-workflow
tags:
Environment: ENVIRONMENT
Name: eks-ENVIRONMENT-workflow-karpenter
karpenter.sh/discovery: ENVIRONMENT
workflowname: WORKFLOW_NAME
gen3username: GEN3_USERNAME
gen3service: argo-workflows
purpose: workflow
amiFamily: AL2
amiSelectorTerms:
- name: 1-31-EKS-FIPS*
owner: "143731057154"
blockDeviceMappings:
- deviceName: /dev/xvda
ebs:
deleteOnTermination: true
encrypted: true
volumeSize: 100Gi
volumeType: gp2
metadataOptions:
httpEndpoint: enabled
httpProtocolIPv6: disabled
httpPutResponseHopLimit: 2
httpTokens: optional
role: eks_ENVIRONMENT_workers_role
securityGroupSelectorTerms:
- tags:
karpenter.sh/discovery: ENVIRONMENT-workflow
subnetSelectorTerms:
- tags:
karpenter.sh/discovery: ENVIRONMENT
tags:
Environment: ENVIRONMENT
Name: eks-ENVIRONMENT-workflow-karpenter
gen3service: argo-workflows
gen3username: GEN3_USERNAME
gen3teamproject: "GEN3_TEAMNAME"
karpenter.sh/discovery: ENVIRONMENT
purpose: workflow
workflowname: WORKFLOW_NAME
userData: |
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="BOUNDARY"
Expand All @@ -133,10 +58,98 @@ data:
sysctl -w fs.inotify.max_user_watches=12000

--BOUNDARY--
blockDeviceMappings:
- deviceName: /dev/xvda
ebs:
volumeSize: 100Gi
volumeType: gp2
encrypted: true
deleteOnTermination: true

nodepool.yaml: |
apiVersion: karpenter.sh/v1beta1
kind: NodePool
metadata:
name: workflow-WORKFLOW_NAME
spec:
disruption:
consolidateAfter: 10s
consolidationPolicy: WhenEmpty
expireAfter: 48h0m0s
limits:
cpu: 4k
template:
metadata:
labels:
purpose: workflow
role: WORKFLOW_NAME
spec:
nodeClassRef:
name: workflow-WORKFLOW_NAME
requirements:
- key: karpenter.sh/capacity-type
operator: In
values:
- on-demand
- key: kubernetes.io/arch
operator: In
values:
- amd64
- key: node.kubernetes.io/instance-type
operator: In
values:
- c6a.large
- c6a.xlarge
- c6a.2xlarge
- c6a.4xlarge
- c6a.8xlarge
- c6a.12xlarge
- c7a.large
- c7a.xlarge
- c7a.2xlarge
- c7a.4xlarge
- c7a.8xlarge
- c7a.12xlarge
- c6i.large
- c6i.xlarge
- c6i.2xlarge
- c6i.4xlarge
- c6i.8xlarge
- c6i.12xlarge
- c7i.large
- c7i.xlarge
- c7i.2xlarge
- c7i.4xlarge
- c7i.8xlarge
- c7i.12xlarge
- m6a.2xlarge
- m6a.4xlarge
- m6a.8xlarge
- m6a.12xlarge
- m6a.16xlarge
- m6a.24xlarge
- m7a.2xlarge
- m7a.4xlarge
- m7a.8xlarge
- m7a.12xlarge
- m7a.16xlarge
- m7a.24xlarge
- m6i.2xlarge
- m6i.4xlarge
- m6i.8xlarge
- m6i.12xlarge
- m6i.16xlarge
- m6i.24xlarge
- m7i.2xlarge
- m7i.4xlarge
- m7i.8xlarge
- m7i.12xlarge
- m7i.16xlarge
- m7i.24xlarge
- r7iz.2xlarge
- r7iz.4xlarge
- r7iz.8xlarge
- r7iz.12xlarge
- r7iz.16xlarge
- r7iz.24xlarge
- key: kubernetes.io/os
operator: In
values:
- linux
taints:
- effect: NoSchedule
key: role
value: WORKFLOW_NAME
8 changes: 4 additions & 4 deletions kube/services/argo-events/workflows/sensor-completed.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,12 @@ spec:
args:
- "-c"
- |
if kubectl get awsnodetemplate workflow-$WORKFLOW_NAME >/dev/null 2>&1; then
kubectl delete awsnodetemplate workflow-$WORKFLOW_NAME
if kubectl get nodepool workflow-$WORKFLOW_NAME >/dev/null 2>&1; then
kubectl delete nodepool workflow-$WORKFLOW_NAME
fi
if kubectl get provisioner workflow-$WORKFLOW_NAME >/dev/null 2>&1; then
kubectl delete provisioners workflow-$WORKFLOW_NAME
if kubectl get ec2nodeclass workflow-$WORKFLOW_NAME >/dev/null 2>&1; then
kubectl delete ec2nodeclass workflow-$WORKFLOW_NAME
fi
env:
- name: WORKFLOW_NAME
Expand Down
32 changes: 19 additions & 13 deletions kube/services/argo-events/workflows/sensor-created.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ spec:
dependencyName: workflow-created-event
dataKey: body.metadata.labels.gen3username
dest: spec.template.spec.containers.0.env.1.value
- src:
dependencyName: workflow-created-event
dataKey: body.metadata.labels.gen3teamproject
dest: spec.template.spec.containers.0.env.2.value
source:
resource:
apiVersion: batch/v1
Expand All @@ -60,36 +64,38 @@ spec:
- "-c"
- |
#!/bin/bash
if [ -z "$PROVISIONER_TEMPLATE" ]; then
PROVISIONER_TEMPLATE="provisioner.yaml"
fi

if [ -z "$AWSNODETEMPLATE_TEMPLATE" ]; then
AWSNODETEMPLATE_TEMPLATE="nodetemplate.yaml"
if [ -z "$NODEPOOL_TEMPLATE" ]; then
NODEPOOL_TEMPLATE="/manifests/nodepool.yaml"
fi

if [ -z "$NODECLASS_TEMPLATE" ]; then
NODECLASS_TEMPLATE="/manifests/nodeclass.yaml"
fi

if ! kubectl get awsnodetemplate workflow-$WORKFLOW_NAME >/dev/null 2>&1; then
sed -e "s/WORKFLOW_NAME/$WORKFLOW_NAME/" -e "s/GEN3_USERNAME/$GEN3_USERNAME/" -e "s/ENVIRONMENT/$ENVIRONMENT/" "$AWSNODETEMPLATE_TEMPLATE" | kubectl apply -f -
if ! kubectl get ec2nodeclass workflow-$WORKFLOW_NAME >/dev/null 2>&1; then
sed -e "s/WORKFLOW_NAME/$WORKFLOW_NAME/" -e "s/GEN3_USERNAME/$GEN3_USERNAME/" -e "s/GEN3_TEAMNAME/$GEN3_TEAMNAME/" -e "s/ENVIRONMENT/$ENVIRONMENT/" "$NODECLASS_TEMPLATE" | kubectl apply -f -
fi

if ! kubectl get provisioner workflow-$WORKFLOW_NAME >/dev/null 2>&1; then
sed -e "s/WORKFLOW_NAME/$WORKFLOW_NAME/" -e "s/GEN3_USERNAME/$GEN3_USERNAME/" -e "s/ENVIRONMENT/$ENVIRONMENT/" "$PROVISIONER_TEMPLATE" | kubectl apply -f -
if ! kubectl get nodepool workflow-$WORKFLOW_NAME >/dev/null 2>&1; then
sed -e "s/WORKFLOW_NAME/$WORKFLOW_NAME/" -e "s/GEN3_USERNAME/$GEN3_USERNAME/" -e "s/GEN3_TEAMNAME/$GEN3_TEAMNAME/" -e "s/ENVIRONMENT/$ENVIRONMENT/" "$NODEPOOL_TEMPLATE" | kubectl apply -f -
fi
env:
- name: WORKFLOW_NAME
value: ""
- name: GEN3_USERNAME
value: ""
- name: GEN3_TEAMNAME
value: ""
- name: ENVIRONMENT
valueFrom:
configMapKeyRef:
name: environment
key: environment
- name: PROVISIONER_TEMPLATE
value: /manifests/provisioner.yaml
- name: AWSNODETEMPLATE_TEMPLATE
value: /manifests/nodetemplate.yaml
- name: NODEPOOL_TEMPLATE
value: /manifests/nodepool.yaml
- name: NODECLASS_TEMPLATE
value: /manifests/nodeclass.yaml
volumeMounts:
- name: karpenter-templates-volume
mountPath: /manifests
Expand Down
8 changes: 4 additions & 4 deletions kube/services/argo-events/workflows/sensor-deleted.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,12 @@ spec:
args:
- "-c"
- |
if kubectl get awsnodetemplate workflow-$WORKFLOW_NAME >/dev/null 2>&1; then
kubectl delete awsnodetemplate workflow-$WORKFLOW_NAME
if kubectl get nodepool workflow-$WORKFLOW_NAME >/dev/null 2>&1; then
kubectl delete nodepool workflow-$WORKFLOW_NAME
fi
if kubectl get provisioner workflow-$WORKFLOW_NAME >/dev/null 2>&1; then
kubectl delete provisioners workflow-$WORKFLOW_NAME
if kubectl get ec2nodeclass workflow-$WORKFLOW_NAME >/dev/null 2>&1; then
kubectl delete ec2nodeclass workflow-$WORKFLOW_NAME
fi
env:
- name: WORKFLOW_NAME
Expand Down
Loading
Loading