From e618b70b87c23fc3a7cfc0e81e7a79f5605d1cfb Mon Sep 17 00:00:00 2001 From: BryanFauble <17128019+BryanFauble@users.noreply.github.com> Date: Mon, 28 Oct 2024 11:12:13 -0700 Subject: [PATCH 1/9] Update tag --- modules/apache-airflow/templates/values.yaml | 488 +++++++++++++++++-- 1 file changed, 457 insertions(+), 31 deletions(-) diff --git a/modules/apache-airflow/templates/values.yaml b/modules/apache-airflow/templates/values.yaml index da1c2df8..99c9e395 100644 --- a/modules/apache-airflow/templates/values.yaml +++ b/modules/apache-airflow/templates/values.yaml @@ -25,6 +25,20 @@ fullnameOverride: "" # Provide a name to substitute for the name of the chart nameOverride: "" +# Use standard naming for all resources using airflow.fullname template +# Consider removing this later and default it to true +# to make this chart follow standard naming conventions using the fullname template. +# For now this is an opt-in switch for backwards compatibility to leverage the standard naming convention +# and being able to use fully fullnameOverride and nameOverride in all resources +# For new installations - it is recommended to set it to True to follow standard naming conventions +# For existing installations, this will rename and redeploy your resources with the new names. Be aware that +# this will recreate your deployment/statefulsets along with their persistent volume claims and data storage +# migration may be needed to keep your old data +# +# Note:fernet-key,redis-password and broker-url secrets don't use this logic yet, +# as this may break existing installations due to how they get installed via pre-install hook. +useStandardNaming: false + # Max number of old replicasets to retain. Can be overridden by each deployment's revisionHistoryLimit revisionHistoryLimit: ~ @@ -43,21 +57,27 @@ securityContexts: pod: {} containers: {} +# Global container lifecycle hooks for airflow containers +containerLifecycleHooks: {} + # Airflow home directory # Used for mount paths airflowHome: /opt/airflow +# TODO: Update this with out airflow repo/tag # Default airflow repository -- overridden by all the specific images below -defaultAirflowRepository: bfaublesage/airflow +defaultAirflowRepository: ghcr.io/sage-bionetworks-workflows/orca-recipes +# TODO: Update this with out airflow repo/tag # Default airflow tag to deploy -defaultAirflowTag: "2.7.1-python-3.10" +defaultAirflowTag: "development-0.0.1" # Default airflow digest. If specified, it takes precedence over tag defaultAirflowDigest: ~ +# TODO: Update this with out airflow repo/tag # Airflow version (Used to make some decisions based on Airflow Version being deployed) -airflowVersion: "2.7.1" +airflowVersion: "2.9.3" # Images images: @@ -88,23 +108,25 @@ images: pullPolicy: IfNotPresent statsd: repository: quay.io/prometheus/statsd-exporter - tag: v0.22.8 + tag: v0.26.1 pullPolicy: IfNotPresent redis: repository: redis - tag: 7-bullseye + # Redis is limited to 7.2-bookworm due to licencing change + # https://redis.io/blog/redis-adopts-dual-source-available-licensing/ + tag: 7.2-bookworm pullPolicy: IfNotPresent pgbouncer: repository: apache/airflow - tag: airflow-pgbouncer-2023.02.24-1.16.1 + tag: airflow-pgbouncer-2024.01.19-1.21.0 pullPolicy: IfNotPresent pgbouncerExporter: repository: apache/airflow - tag: airflow-pgbouncer-exporter-2023.02.21-0.14.0 + tag: airflow-pgbouncer-exporter-2024.06.18-0.17.0 pullPolicy: IfNotPresent gitSync: repository: registry.k8s.io/git-sync/git-sync - tag: v3.6.3 + tag: v4.1.0 pullPolicy: IfNotPresent # Select certain nodes for airflow pods. @@ -114,6 +136,7 @@ nodeSelector: { affinity: {} tolerations: [] topologySpreadConstraints: [] +schedulerName: ~ # Add common labels to all objects and pods defined in this chart. labels: {} @@ -142,6 +165,7 @@ ingress: # The hostnames or hosts configuration for the web Ingress hosts: [] + # # The hostname for the web Ingress (can be templated) # - name: "" # # configs for web Ingress TLS # tls: @@ -185,6 +209,7 @@ ingress: # The hostnames or hosts configuration for the flower Ingress hosts: [] + # # The hostname for the flower Ingress (can be templated) # - name: "" # tls: # # Enable TLS termination for the flower Ingress @@ -225,7 +250,8 @@ airflowLocalSettings: |- UIAlert( 'Usage of a dynamic webserver secret key detected. We recommend a static webserver secret key instead.' ' See the ' + '"https://airflow.apache.org/docs/helm-chart/stable/production-guide.html#webserver-secret-key" ' + 'target="_blank" rel="noopener noreferrer">' 'Helm Chart Production Guide for more details.', category="warning", roles=["Admin"], @@ -253,6 +279,8 @@ allowPodLaunching: true # Environment variables for all airflow containers env: [] +# - name: "" +# value: "" # Volumes for all airflow containers volumes: [] @@ -319,6 +347,11 @@ extraSecrets: {} # '{{ .Release.Name }}-other-secret-name-suffix': # data: | # ... +# 'proxy-config': +# stringData: | +# HTTP_PROXY: http://proxy_user:proxy_password@192.168.0.10:2080 +# HTTPS_PROXY: http://proxy_user:proxy_password@192.168.0.10:2080 +# NO_PROXY: "localhost,127.0.0.1,.svc.cluster.local,kubernetes.default.svc" # Extra ConfigMaps that will be managed by the chart # (You can use them with extraEnv or extraEnvFrom or some of the extraVolumes values). @@ -513,6 +546,9 @@ workers: pod: {} container: {} + # container level lifecycle hooks + containerLifecycleHooks: {} + # Create ServiceAccount serviceAccount: # default value is true @@ -569,9 +605,36 @@ workers: # This configuration will be ignored if PGBouncer is not enabled usePgbouncer: true + # Allow HPA (KEDA must be disabled). + hpa: + enabled: false + + # Minimum number of workers created by HPA + minReplicaCount: 0 + + # Maximum number of workers created by HPA + maxReplicaCount: 5 + + # Specifications for which to use to calculate the desired replica count + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 80 + + # Scaling behavior of the target in both Up and Down directions + behavior: {} + persistence: # Enable persistent volumes enabled: true + # This policy determines whether PVCs should be deleted when StatefulSet is scaled down or removed. + persistentVolumeClaimRetentionPolicy: ~ + # persistentVolumeClaimRetentionPolicy: + # whenDeleted: Delete + # whenScaled: Delete # Volume size for worker StatefulSet size: 30Gi # If using a custom storageClass, pass name ref to all statefulSets here @@ -585,6 +648,8 @@ workers: # Detailed default security context for persistence for container level securityContexts: container: {} + # container level lifecycle hooks + containerLifecycleHooks: {} kerberosSidecar: # Enable kerberos sidecar @@ -599,6 +664,20 @@ workers: # Detailed default security context for kerberosSidecar for container level securityContexts: container: {} + # container level lifecycle hooks + containerLifecycleHooks: {} + + kerberosInitContainer: + # Enable kerberos init container + enabled: false + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + resources: {} # limits: @@ -613,13 +692,13 @@ workers: # This setting tells kubernetes that its ok to evict # when it wants to scale a node down. - safeToEvict: true + safeToEvict: false - # Launch additional containers into worker. + # Launch additional containers into worker (templated). # Note: If used with KubernetesExecutor, you are responsible for signaling sidecars to exit when the main # container finishes so Airflow can continue the worker shutdown process! extraContainers: [] - # Add additional init containers into workers. + # Add additional init containers into workers (templated). extraInitContainers: [] # Mount additional volumes into worker. It can be templated like in the following example: @@ -704,8 +783,34 @@ workers: env: [] + volumeClaimTemplates: [] + # Additional volumeClaimTemplates needed. + # Comment out the above and uncomment the section below to enable it. + # Add more as needed + # Make sure to mount it under extraVolumeMounts. + # volumeClaimTemplates: + # - metadata: + # name: data-volume-1 + # spec: + # storageClassName: "storage-class-1" + # accessModes: + # - "ReadWriteOnce" + # resources: + # requests: + # storage: "10Gi" + # - metadata: + # name: data-volume-2 + # spec: + # storageClassName: "storage-class-2" + # accessModes: + # - "ReadWriteOnce" + # resources: + # requests: + # storage: "20Gi" + # Airflow scheduler settings scheduler: + enabled: true # hostAliases for the scheduler pod hostAliases: [] # - ip: "127.0.0.1" @@ -723,6 +828,15 @@ scheduler: failureThreshold: 5 periodSeconds: 60 command: ~ + + # Wait for at most 1 minute (6*10s) for the scheduler container to startup. + # livenessProbe kicks in after the first successful startupProbe + startupProbe: + failureThreshold: 6 + periodSeconds: 10 + timeoutSeconds: 20 + command: ~ + # Airflow 2.0 allows users to run multiple schedulers, # However this feature is only recommended for MySQL 8+ and Postgres replicas: 1 @@ -753,6 +867,9 @@ scheduler: pod: {} container: {} + # container level lifecycle hooks + containerLifecycleHooks: {} + # Create ServiceAccount serviceAccount: # default value is true @@ -789,9 +906,9 @@ scheduler: # when it wants to scale a node down. safeToEvict: true - # Launch additional containers into scheduler. + # Launch additional containers into scheduler (templated). extraContainers: [] - # Add additional init containers into scheduler. + # Add additional init containers into scheduler (templated). extraInitContainers: [] # Mount additional volumes into scheduler. It can be templated like in the following example: @@ -855,6 +972,8 @@ scheduler: # Detailed default security context for logGroomerSidecar for container level securityContexts: container: {} + # container level lifecycle hooks + containerLifecycleHooks: {} waitForMigrations: # Whether to create init container to wait for db migrations @@ -915,6 +1034,9 @@ createUserJob: pod: {} container: {} + # container level lifecycle hooks + containerLifecycleHooks: {} + # Create ServiceAccount serviceAccount: # default value is true @@ -932,6 +1054,9 @@ createUserJob: # Launch additional containers into user creation job extraContainers: [] + # Add additional init containers into user creation job (templated). + extraInitContainers: [] + # Mount additional volumes into user creation job. It can be templated like in the following example: # extraVolumes: # - name: my-templated-extra-volume @@ -951,6 +1076,7 @@ createUserJob: affinity: {} tolerations: [] topologySpreadConstraints: [] + priorityClassName: ~ # In case you need to disable the helm hooks that create the jobs after install. # Disable this if you are using ArgoCD for example useHelmHooks: false @@ -977,10 +1103,12 @@ migrateDatabaseJob: args: - "bash" - "-c" - # The format below is necessary to get `helm lint` happy - - |- + - >- exec \ - airflow {{ semverCompare ">=2.0.0" .Values.airflowVersion | ternary "db upgrade" "upgradedb" }} + + airflow {{ semverCompare ">=2.7.0" .Values.airflowVersion + | ternary "db migrate" (semverCompare ">=2.0.0" .Values.airflowVersion + | ternary "db upgrade" "upgradedb") }} # Annotations on the database migration pod annotations: {} @@ -989,6 +1117,9 @@ migrateDatabaseJob: argocd.argoproj.io/hook: Sync argocd.argoproj.io/hook-delete-policy: HookSucceeded + # Labels specific to migrate database job objects and pods + labels: {} + # When not set, the values defined in the global securityContext will be used securityContext: {} # runAsUser: 50000 @@ -1000,6 +1131,9 @@ migrateDatabaseJob: pod: {} container: {} + # container level lifecycle hooks + containerLifecycleHooks: {} + # Create ServiceAccount serviceAccount: # default value is true @@ -1025,6 +1159,9 @@ migrateDatabaseJob: # Launch additional containers into database migration job extraContainers: [] + # Add additional init containers into migrate database job (templated). + extraInitContainers: [] + # Mount additional volumes into database migration job. It can be templated like in the following example: # extraVolumes: # - name: my-templated-extra-volume @@ -1044,13 +1181,121 @@ migrateDatabaseJob: affinity: {} tolerations: [] topologySpreadConstraints: [] + priorityClassName: ~ # In case you need to disable the helm hooks that create the jobs after install. # Disable this if you are using ArgoCD for example useHelmHooks: false applyCustomEnv: true +# rpcServer support is experimental / dev purpose only and will later be renamed +_rpcServer: + enabled: false + + # Labels specific to workers objects and pods + labels: {} + + # Command to use when running the Airflow rpc server (templated). + command: + - "bash" + # Args to use when running the Airflow rpc server (templated). + args: ["-c", "exec airflow internal-api"] + env: [] + serviceAccount: + # default value is true + # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + automountServiceAccountToken: true + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the release name + name: ~ + + # Annotations to add to webserver kubernetes service account. + annotations: {} + service: + type: ClusterIP + ## service annotations + annotations: {} + ports: + - name: rpc-server + port: "{{ .Values.ports._rpcServer }}" + + loadBalancerIP: ~ + ## Limit load balancer source ips to list of CIDRs + # loadBalancerSourceRanges: + # - "10.123.0.0/16" + loadBalancerSourceRanges: [] + + podDisruptionBudget: + enabled: false + + # PDB configuration + config: + # minAvailable and maxUnavailable are mutually exclusive + maxUnavailable: 1 + # minAvailable: 1 + + # Detailed default security contexts for webserver deployments for container and pod level + securityContexts: + pod: {} + container: {} + + waitForMigrations: + # Whether to create init container to wait for db migrations + enabled: true + env: [] + # Detailed default security context for waitForMigrations for container level + securityContexts: + container: {} + + # Launch additional containers into the flower pods. + extraContainers: [] + + # Additional network policies as needed (Deprecated - renamed to `webserver.networkPolicy.ingress.from`) + extraNetworkPolicies: [] + networkPolicy: + ingress: + # Peers for webserver NetworkPolicy ingress + from: [] + # Ports for webserver NetworkPolicy ingress (if `from` is set) + ports: + - port: "{{ .Values.ports._rpcServer }}" + + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + livenessProbe: + initialDelaySeconds: 15 + timeoutSeconds: 5 + failureThreshold: 5 + periodSeconds: 10 + scheme: HTTP + + readinessProbe: + initialDelaySeconds: 15 + timeoutSeconds: 5 + failureThreshold: 5 + periodSeconds: 10 + scheme: HTTP + + # Wait for at most 1 minute (6*10s) for the RPC server container to startup. + # livenessProbe kicks in after the first successful startupProbe + startupProbe: + timeoutSeconds: 20 + failureThreshold: 6 + periodSeconds: 10 + scheme: HTTP + # Airflow webserver settings webserver: + enabled: true + # Add custom annotations to the webserver configmap + configMapAnnotations: {} # hostAliases for the webserver pod hostAliases: [] # - ip: "127.0.0.1" @@ -1074,6 +1319,14 @@ webserver: periodSeconds: 10 scheme: HTTP + # Wait for at most 1 minute (6*10s) for the webserver container to startup. + # livenessProbe kicks in after the first successful startupProbe + startupProbe: + timeoutSeconds: 20 + failureThreshold: 6 + periodSeconds: 10 + scheme: HTTP + # Number of webservers replicas: 1 # Max number of old replicasets to retain @@ -1123,6 +1376,9 @@ webserver: pod: {} container: {} + # container level lifecycle hooks + containerLifecycleHooks: {} + # Additional network policies as needed (Deprecated - renamed to `webserver.networkPolicy.ingress.from`) extraNetworkPolicies: [] networkPolicy: @@ -1151,9 +1407,9 @@ webserver: lastName: user password: admin # This is randomized during install - # Launch additional containers into webserver. + # Launch additional containers into webserver (templated). extraContainers: [] - # Add additional init containers into webserver. + # Add additional init containers into webserver (templated). extraInitContainers: [] # Mount additional volumes into webserver. It can be templated like in the following example: @@ -1304,9 +1560,15 @@ triggerer: securityContexts: pod: {} container: {} + + # container level lifecycle hooks + containerLifecycleHooks: {} + persistence: # Enable persistent volumes enabled: true + # This policy determines whether PVCs should be deleted when StatefulSet is scaled down or removed. + persistentVolumeClaimRetentionPolicy: ~ # Volume size for triggerer StatefulSet size: 30Gi # If using a custom storageClass, pass name ref to all statefulSets here @@ -1333,9 +1595,9 @@ triggerer: # when it wants to scale a node down. safeToEvict: true - # Launch additional containers into triggerer. + # Launch additional containers into triggerer (templated). extraContainers: [] - # Add additional init containers into triggerers. + # Add additional init containers into triggerers (templated). extraInitContainers: [] # Mount additional volumes into triggerer. It can be templated like in the following example: @@ -1400,6 +1662,9 @@ triggerer: securityContexts: container: {} + # container level lifecycle hooks + containerLifecycleHooks: {} + waitForMigrations: # Whether to create init container to wait for db migrations enabled: true @@ -1410,6 +1675,44 @@ triggerer: env: [] + # Allow KEDA autoscaling. + keda: + enabled: false + namespaceLabels: {} + + # How often KEDA polls the airflow DB to report new scale requests to the HPA + pollingInterval: 5 + + # How many seconds KEDA will wait before scaling to zero. + # Note that HPA has a separate cooldown period for scale-downs + cooldownPeriod: 30 + + # Minimum number of triggerers created by keda + minReplicaCount: 0 + + # Maximum number of triggerers created by keda + maxReplicaCount: 10 + + # Specify HPA related options + advanced: {} + # horizontalPodAutoscalerConfig: + # behavior: + # scaleDown: + # stabilizationWindowSeconds: 300 + # policies: + # - type: Percent + # value: 100 + # periodSeconds: 15 + + # Query to use for KEDA autoscaling. Must return a single integer. + query: >- + SELECT ceil(COUNT(*)::decimal / {{ .Values.config.triggerer.default_capacity }}) + FROM trigger + + # Whether to use PGBouncer to connect to the database or not when it is enabled + # This configuration will be ignored if PGBouncer is not enabled + usePgbouncer: false + # Airflow Dag Processor Config dagProcessor: enabled: false @@ -1463,6 +1766,9 @@ dagProcessor: pod: {} container: {} + # container level lifecycle hooks + containerLifecycleHooks: {} + resources: {} # limits: # cpu: 100m @@ -1478,9 +1784,9 @@ dagProcessor: # when it wants to scale a node down. safeToEvict: true - # Launch additional containers into dag processor. + # Launch additional containers into dag processor (templated). extraContainers: [] - # Add additional init containers into dag processors. + # Add additional init containers into dag processors (templated). extraInitContainers: [] # Mount additional volumes into dag processor. It can be templated like in the following example: @@ -1536,11 +1842,16 @@ dagProcessor: # requests: # cpu: 100m # memory: 128Mi + securityContexts: + container: {} waitForMigrations: # Whether to create init container to wait for db migrations enabled: true env: [] + # Detailed default security context for waitForMigrations for container level + securityContexts: + container: {} env: [] @@ -1549,6 +1860,19 @@ flower: # Enable flower. # If True, and using CeleryExecutor/CeleryKubernetesExecutor, will deploy flower app. enabled: false + + livenessProbe: + initialDelaySeconds: 10 + timeoutSeconds: 5 + failureThreshold: 10 + periodSeconds: 5 + + readinessProbe: + initialDelaySeconds: 10 + timeoutSeconds: 5 + failureThreshold: 10 + periodSeconds: 5 + # Max number of old replicasets to retain revisionHistoryLimit: ~ @@ -1592,6 +1916,9 @@ flower: pod: {} container: {} + # container level lifecycle hooks + containerLifecycleHooks: {} + # Create ServiceAccount serviceAccount: # default value is true @@ -1667,7 +1994,10 @@ flower: # StatsD settings statsd: - enabled: false + # Add custom annotations to the statsd configmap + configMapAnnotations: {} + + enabled: true # Max number of old replicasets to retain revisionHistoryLimit: ~ @@ -1705,6 +2035,9 @@ statsd: pod: {} container: {} + # container level lifecycle hooks + containerLifecycleHooks: {} + # Additional network policies as needed extraNetworkPolicies: [] resources: {} @@ -1737,6 +2070,7 @@ statsd: overrideMappings: [] podAnnotations: {} + env: [] # PgBouncer settings pgbouncer: @@ -1750,7 +2084,7 @@ pgbouncer: command: ["pgbouncer", "-u", "nobody", "/etc/pgbouncer/pgbouncer.ini"] # Args to use for PgBouncer(templated). args: ~ - auth_type: md5 + auth_type: scram-sha-256 auth_file: /etc/pgbouncer/users.txt # annotations to be added to the PgBouncer deployment @@ -1861,6 +2195,9 @@ pgbouncer: extraVolumes: [] extraVolumeMounts: [] + # Launch additional containers into pgbouncer. + extraContainers: [] + # Select certain nodes for PgBouncer pods. nodeSelector: {} affinity: {} @@ -1876,6 +2213,13 @@ pgbouncer: pod: {} container: {} + # container level lifecycle hooks + containerLifecycleHooks: + preStop: + exec: + # Allow existing queries clients to complete within 120 seconds + command: ["/bin/sh", "-c", "killall -INT pgbouncer && sleep 120"] + metricsExporterSidecar: resources: {} # limits: @@ -1886,10 +2230,31 @@ pgbouncer: # memory: 128Mi sslmode: "disable" + # supply the name of existing secret with PGBouncer connection URI containing + # stats user and password. + # you can load them to a k8s secret like the one below + # apiVersion: v1 + # kind: Secret + # metadata: + # name: pgbouncer-stats-secret + # data: + # connection: postgresql://:@127.0.0.1:6543/pgbouncer? + # type: Opaque + # + # statsSecretName: pgbouncer-stats-secret + # + statsSecretName: ~ + + # Key containing the PGBouncer connection URI, defaults to `connection` if not defined + statsSecretKey: ~ + # Detailed default security context for metricsExporterSidecar for container level securityContexts: container: {} + # container level lifecycle hooks + containerLifecycleHooks: {} + livenessProbe: initialDelaySeconds: 10 periodSeconds: 10 @@ -1900,11 +2265,17 @@ pgbouncer: periodSeconds: 10 timeoutSeconds: 1 + # Environment variables to add to pgbouncer container + env: [] + # Configuration for the redis provisioned by the chart redis: enabled: true terminationGracePeriodSeconds: 600 + # Annotations for Redis Statefulset + annotations: {} + # Create ServiceAccount serviceAccount: # default value is true @@ -1929,6 +2300,11 @@ redis: # Annotations to add to redis volumes annotations: {} + # Configuration for empty dir volume (if redis.persistence.enabled == false) + # emptyDirConfig: + # sizeLimit: 1Gi + # medium: Memory + resources: {} # limits: # cpu: 100m @@ -1956,6 +2332,7 @@ redis: affinity: {} tolerations: [] topologySpreadConstraints: [] + priorityClassName: ~ # Set to 0 for backwards-compatiblity uid: 0 @@ -1969,6 +2346,9 @@ redis: pod: {} container: {} + # container level lifecycle hooks + containerLifecycleHooks: {} + podAnnotations: {} # Auth secret for a private registry # This is used if pulling airflow images from a private registry @@ -1992,6 +2372,7 @@ elasticsearch: # Or an object representing the connection # Example: # connection: + # scheme: ~ # user: ~ # pass: ~ # host: ~ @@ -2009,6 +2390,8 @@ ports: statsdScrape: 9102 pgbouncer: 6543 pgbouncerScrape: 9127 + # rpcServer support is experimental / dev purpose only and will later be renamed + _rpcServer: 9080 # Define any ResourceQuotas for namespace quotas: {} @@ -2019,7 +2402,7 @@ limits: [] # This runs as a CronJob to cleanup old pods. cleanup: enabled: false - # Run every 15 minutes (templated). + # Run every 60 minutes (templated). schedule: "*/60 * * * *" # To select a random-ish, deterministic starting minute between 3 and 12 inclusive for each release: # '{{- add 3 (regexFind ".$" (adler32sum .Release.Name)) -}}-59/15 * * * *' @@ -2039,6 +2422,7 @@ cleanup: affinity: {} tolerations: [] topologySpreadConstraints: [] + priorityClassName: ~ podAnnotations: {} @@ -2075,8 +2459,12 @@ cleanup: # Detailed default security context for cleanup for container level securityContexts: + pod: {} container: {} + # container level lifecycle hooks + containerLifecycleHooks: {} + # Specify history limit # When set, overwrite the default k8s number of successful and failed CronJob executions that are saved. failedJobsHistoryLimit: ~ @@ -2086,8 +2474,6 @@ cleanup: # Not recommended for production postgresql: enabled: false - image: - tag: "11" auth: enablePostgresUser: true postgresPassword: postgres @@ -2121,13 +2507,13 @@ config: statsd_on: '{{ ternary "True" "False" .Values.statsd.enabled }}' statsd_port: 9125 statsd_prefix: airflow - statsd_host: '{{ printf "%s-statsd" .Release.Name }}' + statsd_host: '{{ printf "%s-statsd" (include "airflow.fullname" .) }}' webserver: enable_proxy_fix: 'True' # For Airflow 1.10 rbac: 'True' celery: - flower_url_prefix: '{{ .Values.ingress.flower.path }}' + flower_url_prefix: '{{ ternary "" .Values.ingress.flower.path (eq .Values.ingress.flower.path "/") }}' worker_concurrency: 16 scheduler: standalone_dag_processor: '{{ ternary "True" "False" .Values.dagProcessor.enabled }}' @@ -2135,7 +2521,7 @@ config: statsd_on: '{{ ternary "True" "False" .Values.statsd.enabled }}' statsd_port: 9125 statsd_prefix: airflow - statsd_host: '{{ printf "%s-statsd" .Release.Name }}' + statsd_host: '{{ printf "%s-statsd" (include "airflow.fullname" .) }}' # `run_duration` included for Airflow 1.10 backward compatibility; removed in 2.0. run_duration: 41460 elasticsearch: @@ -2173,6 +2559,8 @@ config: secrets: backend: airflow.providers.amazon.aws.secrets.secrets_manager.SecretsManagerBackend backend_kwargs: '{"connections_prefix": "airflow/connections", "variables_prefix": "airflow/variables", "region_name": "us-east-1"}' + triggerer: + default_capacity: 1000 # yamllint enable rule:line-length # Whether Airflow can launch workers and/or pods in multiple namespaces @@ -2206,6 +2594,9 @@ podTemplate: ~ # Git sync dags: + # Where dags volume will be mounted. Works for both persistence and gitSync. + # If not specified, dags mount path will be set to $AIRFLOW_HOME/dags + mountPath: ~ persistence: # Annotations for dags PVC annotations: {} @@ -2230,6 +2621,8 @@ dags: repo: https://github.com/Sage-Bionetworks-Workflows/orca-recipes branch: main rev: HEAD + # The git revision (branch, tag, or hash) to check out, v4 only + ref: v2-2-stable depth: 1 # the number of consecutive failures allowed before aborting maxFailures: 0 @@ -2244,8 +2637,12 @@ dags: # metadata: # name: git-credentials # data: + # # For git-sync v3 # GIT_SYNC_USERNAME: # GIT_SYNC_PASSWORD: + # # For git-sync v4 + # GITSYNC_USERNAME: + # GITSYNC_PASSWORD: # and specify the name of the secret below # # credentialsSecret: git-credentials @@ -2264,6 +2661,12 @@ dags: # and specify the name of the secret below # sshKeySecret: airflow-ssh-secret # + # Or set sshKeySecret with your key + # sshKey: |- + # -----BEGIN {OPENSSH PRIVATE KEY}----- + # ... + # -----END {OPENSSH PRIVATE KEY}----- + # # If you are using an ssh private key, you can additionally # specify the content of your known_hosts file, example: # @@ -2274,7 +2677,16 @@ dags: # interval between git sync attempts in seconds # high values are more likely to cause DAGs to become out of sync between different components # low values cause more traffic to the remote git repository + # Go-style duration string (e.g. "100ms" or "0.1s" = 100ms). + # For backwards compatibility, wait will be used if it is specified. + period: 5s wait: 600 + # add variables from secret into gitSync containers, such proxy-config + envFrom: ~ + # envFrom: | + # - secretRef: + # name: 'proxy-config' + containerName: git-sync uid: 65533 @@ -2286,6 +2698,9 @@ dags: securityContexts: container: {} + # container level lifecycle hooks + containerLifecycleHooks: {} + # Mount additional volumes into git-sync. It can be templated like in the following example: # extraVolumeMounts: # - name: my-templated-extra-volume @@ -2297,6 +2712,11 @@ dags: # - name: "" # value: "" + # Configuration for empty dir volume + # emptyDirConfig: + # sizeLimit: 1Gi + # medium: Memory + resources: {} # limits: # cpu: 100m @@ -2306,6 +2726,11 @@ dags: # memory: 128Mi logs: + # Configuration for empty dir volume (if logs.persistence.enabled == false) + # emptyDirConfig: + # sizeLimit: 1Gi + # medium: Memory + persistence: # Enable persistent volume for storing logs enabled: false @@ -2317,3 +2742,4 @@ logs: storageClassName: gp3 ## the name of an existing PVC to use existingClaim: + From 8c6cc6a6ad44abe562740e3ae25ec4787ee49ac5 Mon Sep 17 00:00:00 2001 From: BryanFauble <17128019+BryanFauble@users.noreply.github.com> Date: Mon, 28 Oct 2024 11:17:33 -0700 Subject: [PATCH 2/9] Point to updated helm chart --- modules/apache-airflow/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/apache-airflow/main.tf b/modules/apache-airflow/main.tf index 3ed255a5..81c219ec 100644 --- a/modules/apache-airflow/main.tf +++ b/modules/apache-airflow/main.tf @@ -66,7 +66,7 @@ spec: sources: - repoURL: 'https://airflow.apache.org' chart: airflow - targetRevision: 1.11.0 + targetRevision: 1.15.0 helm: releaseName: airflow valueFiles: From 1b72e8c99e1672d72cd6e53a8a9e02a300dc8105 Mon Sep 17 00:00:00 2001 From: BryanFauble <17128019+BryanFauble@users.noreply.github.com> Date: Mon, 28 Oct 2024 12:02:23 -0700 Subject: [PATCH 3/9] Update tag --- modules/apache-airflow/templates/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/apache-airflow/templates/values.yaml b/modules/apache-airflow/templates/values.yaml index 99c9e395..e47b35a9 100644 --- a/modules/apache-airflow/templates/values.yaml +++ b/modules/apache-airflow/templates/values.yaml @@ -70,7 +70,7 @@ defaultAirflowRepository: ghcr.io/sage-bionetworks-workflows/orca-recipes # TODO: Update this with out airflow repo/tag # Default airflow tag to deploy -defaultAirflowTag: "development-0.0.1" +defaultAirflowTag: "development-0.0.2" # Default airflow digest. If specified, it takes precedence over tag defaultAirflowDigest: ~ From d5bfd2d60e75cb49e69c91da21116e9f8dc32844 Mon Sep 17 00:00:00 2001 From: BryanFauble <17128019+BryanFauble@users.noreply.github.com> Date: Mon, 28 Oct 2024 12:11:46 -0700 Subject: [PATCH 4/9] Leave off `statsd` --- modules/apache-airflow/templates/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/apache-airflow/templates/values.yaml b/modules/apache-airflow/templates/values.yaml index e47b35a9..8f36e50a 100644 --- a/modules/apache-airflow/templates/values.yaml +++ b/modules/apache-airflow/templates/values.yaml @@ -1997,7 +1997,7 @@ statsd: # Add custom annotations to the statsd configmap configMapAnnotations: {} - enabled: true + enabled: false # Max number of old replicasets to retain revisionHistoryLimit: ~ From 8c8c39ea85c58dac7f628c85329f61bd8c823d12 Mon Sep 17 00:00:00 2001 From: BryanFauble <17128019+BryanFauble@users.noreply.github.com> Date: Mon, 28 Oct 2024 12:16:08 -0700 Subject: [PATCH 5/9] Point to updated airflow tag --- modules/apache-airflow/templates/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/apache-airflow/templates/values.yaml b/modules/apache-airflow/templates/values.yaml index 8f36e50a..99022309 100644 --- a/modules/apache-airflow/templates/values.yaml +++ b/modules/apache-airflow/templates/values.yaml @@ -70,7 +70,7 @@ defaultAirflowRepository: ghcr.io/sage-bionetworks-workflows/orca-recipes # TODO: Update this with out airflow repo/tag # Default airflow tag to deploy -defaultAirflowTag: "development-0.0.2" +defaultAirflowTag: "airflow-2.9.3-v1.0.0" # Default airflow digest. If specified, it takes precedence over tag defaultAirflowDigest: ~ From 1d99c45812b79eb659eff7bab5306c5f26122b34 Mon Sep 17 00:00:00 2001 From: BryanFauble <17128019+BryanFauble@users.noreply.github.com> Date: Mon, 28 Oct 2024 12:32:19 -0700 Subject: [PATCH 6/9] Remove todo comment --- modules/apache-airflow/templates/values.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/modules/apache-airflow/templates/values.yaml b/modules/apache-airflow/templates/values.yaml index 99022309..b4d2609d 100644 --- a/modules/apache-airflow/templates/values.yaml +++ b/modules/apache-airflow/templates/values.yaml @@ -64,18 +64,15 @@ containerLifecycleHooks: {} # Used for mount paths airflowHome: /opt/airflow -# TODO: Update this with out airflow repo/tag # Default airflow repository -- overridden by all the specific images below defaultAirflowRepository: ghcr.io/sage-bionetworks-workflows/orca-recipes -# TODO: Update this with out airflow repo/tag # Default airflow tag to deploy defaultAirflowTag: "airflow-2.9.3-v1.0.0" # Default airflow digest. If specified, it takes precedence over tag defaultAirflowDigest: ~ -# TODO: Update this with out airflow repo/tag # Airflow version (Used to make some decisions based on Airflow Version being deployed) airflowVersion: "2.9.3" From 7d00789ac49c0a647e0dcd450ff139e2e6b82c74 Mon Sep 17 00:00:00 2001 From: BryanFauble <17128019+BryanFauble@users.noreply.github.com> Date: Mon, 28 Oct 2024 12:34:52 -0700 Subject: [PATCH 7/9] Add notes about building a new image --- modules/apache-airflow/README.md | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/modules/apache-airflow/README.md b/modules/apache-airflow/README.md index 736303cf..96e4d7df 100644 --- a/modules/apache-airflow/README.md +++ b/modules/apache-airflow/README.md @@ -65,4 +65,20 @@ YAML ## Accessing the web UI An `admin` user is created for airflow via the `airflow-admin-user-secret` secret that is added to the namespace. Decode the base64 encoded password/username and use it for -the UI. \ No newline at end of file +the UI. + +## Building a new image for airflow +The deployment of our airflow instance depends on a custom apache airflow image being +created and pushed to a public available GCHR url. The image is created from the +`orca-recipes` git repo: + +1. Update the dockerfile within the orca-recipes repo +2. Build the new image `docker build .` +3. Tag the build image with the tag you want to use `docker tag sha256:... ghcr.io/sage-bionetworks-workflows/orca-recipes:0.0.1` +4. Push to GCHR `docker push ghcr.io/sage-bionetworks-workflows/orca-recipes:0.0.1` (May require an admin of the repo to push this) +5. Update the `values.yaml` file in this `modules/apache-airflow/templates` directory. + +Transitive dependencies may also need to be updated when building a new image for +airflow, for example `py-orca` was updated in this example PR: . +Additionally, this PR covers what was completed in order to update the +requirements/dockerfile: . From 4a8e4ab894365554405d42ab0d4da0ee0bead065 Mon Sep 17 00:00:00 2001 From: BryanFauble <17128019+BryanFauble@users.noreply.github.com> Date: Thu, 31 Oct 2024 09:40:51 -0700 Subject: [PATCH 8/9] Point to updated orca-recipes airflow image --- modules/apache-airflow/templates/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/apache-airflow/templates/values.yaml b/modules/apache-airflow/templates/values.yaml index b4d2609d..6a8f59e2 100644 --- a/modules/apache-airflow/templates/values.yaml +++ b/modules/apache-airflow/templates/values.yaml @@ -68,7 +68,7 @@ airflowHome: /opt/airflow defaultAirflowRepository: ghcr.io/sage-bionetworks-workflows/orca-recipes # Default airflow tag to deploy -defaultAirflowTag: "airflow-2.9.3-v1.0.0" +defaultAirflowTag: "0.1.0" # Default airflow digest. If specified, it takes precedence over tag defaultAirflowDigest: ~ From e438a5cbcb0f7edd34fd0cb99d92075eed7d8ec3 Mon Sep 17 00:00:00 2001 From: BryanFauble <17128019+BryanFauble@users.noreply.github.com> Date: Thu, 31 Oct 2024 09:54:37 -0700 Subject: [PATCH 9/9] Ensure same zone for deployed nodes --- modules/apache-airflow/templates/values.yaml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/modules/apache-airflow/templates/values.yaml b/modules/apache-airflow/templates/values.yaml index 6a8f59e2..884f32be 100644 --- a/modules/apache-airflow/templates/values.yaml +++ b/modules/apache-airflow/templates/values.yaml @@ -715,7 +715,8 @@ workers: # Select certain nodes for airflow worker pods. nodeSelector: { - spotinst.io/node-lifecycle: "od" + spotinst.io/node-lifecycle: "od", + topology.kubernetes.io/zone: "us-east-1a" } runtimeClassName: ~ priorityClassName: ~ @@ -925,7 +926,8 @@ scheduler: # Select certain nodes for airflow scheduler pods. nodeSelector: { - spotinst.io/node-lifecycle: "od" + spotinst.io/node-lifecycle: "od", + topology.kubernetes.io/zone: "us-east-1a" } affinity: # default scheduler affinity is: @@ -1614,7 +1616,8 @@ triggerer: # Select certain nodes for airflow triggerer pods. nodeSelector: { - spotinst.io/node-lifecycle: "od" + spotinst.io/node-lifecycle: "od", + topology.kubernetes.io/zone: "us-east-1a" } affinity: # default triggerer affinity is: @@ -2324,7 +2327,8 @@ redis: # Select certain nodes for redis pods. nodeSelector: { - spotinst.io/node-lifecycle: "od" + spotinst.io/node-lifecycle: "od", + topology.kubernetes.io/zone: "us-east-1a" } affinity: {} tolerations: []