Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[IBCDPE] Split out and start creating items as individual modules #8

Merged
merged 16 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions modules/apache-airflow/data.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
data "aws_eks_cluster" "cluster" {
name = var.cluster_name
}

data "aws_eks_cluster_auth" "cluster" {
name = var.cluster_name
}
101 changes: 101 additions & 0 deletions modules/apache-airflow/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
resource "kubernetes_namespace" "airflow" {
metadata {
name = "airflow"
}
}

resource "random_password" "airflow" {
length = 16
special = true
override_special = "!#$%&*()-_=+[]{}<>:?"
}

resource "kubernetes_secret" "airflow_webserver_secret" {
metadata {
name = "airflow-webserver-secret"
namespace = "airflow"
}

data = {
"webserver-secret-key" = random_password.airflow.result
}

depends_on = [kubernetes_namespace.airflow]
}

# TODO: Should a long-term deployment use a managed RDS instance?
# https://github.com/apache/airflow/blob/main/chart/values.yaml#L2321-L2329
resource "helm_release" "airflow" {
name = "apache-airflow"
repository = "https://airflow.apache.org"
chart = "airflow"
namespace = "airflow"
version = "1.11.0"
depends_on = [kubernetes_namespace.airflow]

# https://github.com/hashicorp/terraform-provider-helm/issues/683#issuecomment-830872443
wait = false

set {
name = "config.webserver.expose_config"
value = "true"
}

set {
name = "config.secrets.backend"
value = "airflow.providers.amazon.aws.secrets.secrets_manager.SecretsManagerBackend"
}

set {
name = "webserver.service.type"
value = "LoadBalancer"
}

set {
name = "webserverSecretKeySecretName"
value = "airflow-webserver-secret"
}

set {
name = "airflowVersion"
value = "2.7.1"
}

set {
name = "defaultAirflowRepository"
value = "bfaublesage/airflow"
}

set {
name = "defaultAirflowTag"
value = "2.7.1-python-3.10"
}

set {
name = "dags.persistence.enabled"
value = "false"
}

set {
name = "dags.gitSync.enabled"
value = "true"
}

set {
name = "dags.gitSync.repo"
value = "https://github.com/Sage-Bionetworks-Workflows/orca-recipes"
}

set {
name = "dags.gitSync.subPath"
value = "dags"
}

set {
name = "dags.gitSync.branch"
value = "main"
}


values = [templatefile("${path.module}/templates/airflow-values.yaml", {})]
}
16 changes: 16 additions & 0 deletions modules/apache-airflow/provider.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
provider "aws" {
region = var.region
}

provider "kubernetes" {
config_path = var.kube_config_path
host = data.aws_eks_cluster.cluster.endpoint
cluster_ca_certificate = base64decode(data.aws_eks_cluster.cluster.certificate_authority[0].data)
token = data.aws_eks_cluster_auth.cluster.token
}

provider "helm" {
kubernetes {
config_path = var.kube_config_path
}
}
20 changes: 20 additions & 0 deletions modules/apache-airflow/templates/airflow-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
config:
secrets:
backend_kwargs: '{"connections_prefix": "airflow/connections", "variables_prefix": "airflow/variables", "region_name": "us-east-1"}'
# webserver:
# authenticate: true
# auth_backend: airflow.contrib.auth.backends.google_auth
# web_server_ssl_cert = <path to cert>
# web_server_ssl_key = <path to key>
# web_server_port = 443
# base_url = http://<hostname or IP>:443
# celery:
# ssl_active = True
# ssl_key = <path to key>
# ssl_cert = <path to cert>
# ssl_cacert = <path to cacert>

# service:
# type: LoadBalancer # or another type as needed
# annotations:
# alb.ingress.kubernetes.io/scheme: "internal"
18 changes: 18 additions & 0 deletions modules/apache-airflow/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
variable "cluster_name" {
description = "Name of K8 cluster"
type = string
default = "dpe-k8"
}

variable "kube_config_path" {
description = "Kube config path"
type = string
default = "~/.kube/config"
}

variable "region" {
description = "AWS region"
type = string
default = "us-east-1"
}

3 changes: 3 additions & 0 deletions modules/apache-airflow/versions.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
terraform {
required_version = "<= 1.5.7"
}
4 changes: 2 additions & 2 deletions modules/internal-k8-infra/data.tf
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
data "aws_eks_cluster" "cluster" {
name = "dpe-k8"
name = var.cluster_name
}

data "aws_eks_cluster_auth" "cluster" {
name = "dpe-k8"
name = var.cluster_name
}

data "aws_secretsmanager_secret" "spotinst_token" {
Expand Down
11 changes: 5 additions & 6 deletions modules/internal-k8-infra/main.tf
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you have a list of questions you'd like spot to answer?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. What terraform modules are required to install the V2 spot ocean components into our EKS cluster?
  2. What dependencies need to be completed, and operational before attempting to install any spot ocean terraform resources?
  3. What does the removal of ocean's terraform resources entail, is everything being properly reset back to the "before" state if removed?

Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
module "kubernetes-controller" {
source = "spotinst/kubernetes-controller/ocean"
version = "0.0.2"
module "ocean-controller" {
source = "spotinst/ocean-controller/spotinst"
version = "0.54.0"

# Credentials
# Credentials.
spotinst_token = data.aws_secretsmanager_secret_version.secret_credentials.secret_string
spotinst_account = var.spotinst_account

# Configuration
# Configuration.
cluster_identifier = var.cluster_name
}


module "ocean-aws-k8s" {
source = "spotinst/ocean-aws-k8s/spotinst"
version = "1.2.0"
Expand Down
1 change: 1 addition & 0 deletions modules/k8s-node-autoscaler/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# The use of this module is experimental and is a WIP
39 changes: 39 additions & 0 deletions modules/k8s-node-autoscaler/data.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
data "aws_eks_cluster" "cluster" {
name = var.cluster_name
}

data "aws_eks_cluster_auth" "cluster" {
name = var.cluster_name
}

data "aws_secretsmanager_secret" "spotinst_token" {
name = "spotinst_token"
}

data "aws_secretsmanager_secret_version" "secret_credentials" {
secret_id = data.aws_secretsmanager_secret.spotinst_token.id
}

# TODO: This should search for the VPC using some other value as ID would change
# on first startup and teardown/restart
data "aws_subnets" "node_subnets" {
filter {
name = "vpc-id"
values = ["vpc-0f30cfca319ebc521"]
}
}

data "aws_eks_node_groups" "node_groups" {
cluster_name = var.cluster_name
}

data "aws_eks_node_group" "node_group" {
cluster_name = var.cluster_name
node_group_name = data.aws_eks_node_groups.node_groups[0].id
}

data "aws_security_group" "eks_cluster_security_group" {
tags = {
Name = "${var.cluster_name}-node"
}
}
27 changes: 27 additions & 0 deletions modules/k8s-node-autoscaler/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
module "ocean-controller" {
source = "spotinst/ocean-controller/spotinst"
version = "0.54.0"

# Credentials.
spotinst_token = data.aws_secretsmanager_secret_version.secret_credentials.secret_string
spotinst_account = var.spotinst_account

# Configuration.
cluster_identifier = var.cluster_name
}

module "ocean-aws-k8s" {
source = "spotinst/ocean-aws-k8s/spotinst"
version = "1.2.0"
# worker_instance_profile_arn = "arn:aws:iam::766808016710:role/airflow-node-group-eks-node-group-20240517054613935800000001"

# Configuration
cluster_name = var.cluster_name
region = var.region
subnet_ids = data.aws_subnets.node_subnets.ids
worker_instance_profile_arn = tolist(data.aws_eks_node_group.node_group.node_role_arn)[0]
security_groups = [data.aws_security_group.eks_cluster_security_group.id]
is_aggressive_scale_down_enabled = true
max_scale_down_percentage = 33
tags = var.tags
}
21 changes: 21 additions & 0 deletions modules/k8s-node-autoscaler/provider.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
provider "aws" {
region = var.region
}

provider "spotinst" {
account = var.spotinst_account
token = data.aws_secretsmanager_secret_version.secret_credentials.secret_string
}

provider "kubernetes" {
config_path = var.kube_config_path
host = data.aws_eks_cluster.cluster.endpoint
cluster_ca_certificate = base64decode(data.aws_eks_cluster.cluster.certificate_authority[0].data)
token = data.aws_eks_cluster_auth.cluster.token
}

provider "helm" {
kubernetes {
config_path = var.kube_config_path
}
}
31 changes: 31 additions & 0 deletions modules/k8s-node-autoscaler/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
variable "cluster_name" {
description = "Name of K8 cluster"
type = string
default = "dpe-k8"
}

variable "region" {
description = "AWS region"
type = string
default = "us-east-1"
}

variable "kube_config_path" {
description = "Kube config path"
type = string
default = "~/.kube/config"
}

variable "spotinst_account" {
description = "Spot.io account"
type = string
default = "act-ac6522b4"
}

variable "tags" {
description = "AWS Resource Tags"
type = map(string)
default = {
"CostCenter" = "No Program / 000000"
}
}
9 changes: 9 additions & 0 deletions modules/k8s-node-autoscaler/versions.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
terraform {
required_version = "<= 1.5.7"
required_providers {
spotinst = {
source = "spotinst/spotinst"
version = "1.172.0" # Specify the version you wish to use
}
}
}
31 changes: 31 additions & 0 deletions spacelift/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
resource "spacelift_stack" "root_administrative_stack" {
github_enterprise {
namespace = "Sage-Bionetworks-Workflows"
id = "sage-bionetworks-workflows-gh"
}

administrative = true
autodeploy = false
branch = "main"
description = "Manages other spacelift resources"
name = "Root Spacelift Administrative Stack"
project_root = "spacelift"
repository = "eks-stack"
terraform_version = "1.5.7"
space_id = "root"
}

module "policies" {
source = "./modules/policies"
}

module "policy-attachments" {
source = "./modules/policy-attachments"
depends_on = [
module.policies
]
}

module "stacks" {
source = "./modules/stacks"
}
20 changes: 20 additions & 0 deletions spacelift/modules/policies/check-estimated-cloud-spend.rego
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is nice - thanks for adding this!

Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package spacelift

# Warn if changes that will cause the monthly cost to go above a certain threshold
warn[sprintf("monthly cost greater than $%d ($%.2f)", [threshold, monthly_cost])] {
threshold := 100
monthly_cost := to_number(input.third_party_metadata.infracost.projects[0].breakdown.totalMonthlyCost)
monthly_cost > threshold
}

# Warn if the monthly costs increase more than a certain percentage
warn[sprintf("monthly cost increase greater than %d%% (%.2f%%)", [threshold, percentage_increase])] {
threshold := 5
previous_cost := to_number(input.third_party_metadata.infracost.projects[0].pastBreakdown.totalMonthlyCost)
previous_cost > 0

monthly_cost := to_number(input.third_party_metadata.infracost.projects[0].breakdown.totalMonthlyCost)
percentage_increase := ((monthly_cost - previous_cost) / previous_cost) * 100

percentage_increase > threshold
}
Loading