diff --git a/.terraform.lock.hcl b/.terraform.lock.hcl deleted file mode 100644 index bbd7a8c9..00000000 --- a/.terraform.lock.hcl +++ /dev/null @@ -1,108 +0,0 @@ -# This file is maintained automatically by "terraform init". -# Manual edits may be lost in future updates. - -provider "registry.terraform.io/hashicorp/aws" { - version = "5.49.0" - constraints = ">= 3.70.0, >= 4.33.0, >= 4.57.0" - hashes = [ - "h1:Y3xvYjzBIwYSbcnZDcs6moiy30uxRoY5oT2ExQHKG5A=", - "zh:0979b07cdeffb868ea605e4bbc008adc7cccb5f3ba1d3a0b794ea3e8fff20932", - "zh:2121a0a048a1d9419df69f3561e524b7e8a6b74ba0f57bd8948799f12b6ad3a1", - "zh:573362042ba0bd18e98567a4f45d91b09eb0d223513518ba04f16a646a906403", - "zh:57be7a4d6c362be2fa586d270203f4eac1ee239816239a9503b86ebc8fa1fef0", - "zh:5c72ed211d9234edd70eac9d77c3cafc7bbf819d1c28332a6d77acf227c9a23c", - "zh:7786d1a9781f8e8c0079bf58f4ed4aeddec0caf54ad7ddcf43c47936d545a04f", - "zh:82133e7d39787ee91ed41988da71beecc2ecb900b5da94b3f3d77fbc4d4dc722", - "zh:8cdb1c154dead85be8352afd30eaf41c59249de9e7e0a8eb4ab8e625b90a4922", - "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", - "zh:ac215fd1c3bd647ae38868940651b97a53197688daefcd70b3595c84560e5267", - "zh:c45db22356d20e431639061a72e07da5201f4937c1df6b9f03f32019facf3905", - "zh:c9ba90e62db9a4708ed1a4e094849f88ce9d44c52b49f613b30bb3f7523b8d97", - "zh:d2be3607be2209995c80dc1d66086d527de5d470f73509e813254067e8287106", - "zh:e3fa20090f3cebf3911fc7ef122bd8c0505e3330ab7d541fa945fea861205007", - "zh:ef1b9d5c0b6279323f2ecfc322db8083e141984cfe1bb2f33c0f4934fccb69e3", - ] -} - -provider "registry.terraform.io/hashicorp/cloudinit" { - version = "2.3.4" - constraints = ">= 2.0.0" - hashes = [ - "h1:S3j8poSaLbaftlKq2STBkQEkZH253ZLaHhBHBifdpBQ=", - "h1:cVIIhnXweOHavu1uV2bdKScTjLbM1WnKM/25wqYBJWo=", - "zh:09f1f1e1d232da96fbf9513b0fb5263bc2fe9bee85697aa15d40bb93835efbeb", - "zh:381e74b90d7a038c3a8dcdcc2ce8c72d6b86da9f208a27f4b98cabe1a1032773", - "zh:398eb321949e28c4c5f7c52e9b1f922a10d0b2b073b7db04cb69318d24ffc5a9", - "zh:4a425679614a8f0fe440845828794e609b35af17db59134c4f9e56d61e979813", - "zh:4d955d8608ece4984c9f1dacda2a59fdb4ea6b0243872f049b388181aab8c80a", - "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", - "zh:a48fbee1d58d55a1f4c92c2f38c83a37c8b2f2701ed1a3c926cefb0801fa446a", - "zh:b748fe6631b16a1dafd35a09377c3bffa89552af584cf95f47568b6cd31fc241", - "zh:d4b931f7a54603fa4692a2ec6e498b95464babd2be072bed5c7c2e140a280d99", - "zh:f1c9337fcfe3a7be39d179eb7986c22a979cfb2c587c05f1b3b83064f41785c5", - "zh:f58fc57edd1ee3250a28943cd84de3e4b744cdb52df0356a53403fc240240636", - "zh:f5f50de0923ff530b03e1bca0ac697534d61bb3e5fc7f60e13becb62229097a9", - ] -} - -provider "registry.terraform.io/hashicorp/null" { - version = "3.2.2" - constraints = ">= 3.0.0" - hashes = [ - "h1:zT1ZbegaAYHwQa+QwIFugArWikRJI9dqohj8xb0GY88=", - "zh:3248aae6a2198f3ec8394218d05bd5e42be59f43a3a7c0b71c66ec0df08b69e7", - "zh:32b1aaa1c3013d33c245493f4a65465eab9436b454d250102729321a44c8ab9a", - "zh:38eff7e470acb48f66380a73a5c7cdd76cc9b9c9ba9a7249c7991488abe22fe3", - "zh:4c2f1faee67af104f5f9e711c4574ff4d298afaa8a420680b0cb55d7bbc65606", - "zh:544b33b757c0b954dbb87db83a5ad921edd61f02f1dc86c6186a5ea86465b546", - "zh:696cf785090e1e8cf1587499516b0494f47413b43cb99877ad97f5d0de3dc539", - "zh:6e301f34757b5d265ae44467d95306d61bef5e41930be1365f5a8dcf80f59452", - "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", - "zh:913a929070c819e59e94bb37a2a253c228f83921136ff4a7aa1a178c7cce5422", - "zh:aa9015926cd152425dbf86d1abdbc74bfe0e1ba3d26b3db35051d7b9ca9f72ae", - "zh:bb04798b016e1e1d49bcc76d62c53b56c88c63d6f2dfe38821afef17c416a0e1", - "zh:c23084e1b23577de22603cff752e59128d83cfecc2e6819edadd8cf7a10af11e", - ] -} - -provider "registry.terraform.io/hashicorp/time" { - version = "0.11.1" - constraints = ">= 0.9.0" - hashes = [ - "h1:IkDriv5C9G+kQQ+mP+8QGIahwKgbQcw1/mzh9U6q+ZI=", - "h1:pQGSL9mdgw4qsLndFYsEF93mbsIxyxNoAyIbBqhS3Xo=", - "zh:19a393db736ec4fd024d098d55aefaef07056c37a448ece3b55b3f5f4c2c7e4a", - "zh:227fa1e221de2907f37be78d40c06ca6a6f7b243a1ec33ade014dfaf6d92cd9c", - "zh:29970fecbf4a3ca23bacbb05d6b90cdd33dd379f90059fe39e08289951502d9f", - "zh:65024596f22f10e7dcb5e0e4a75277f275b529daa0bc0daf34ca7901c678ab88", - "zh:694d080cb5e3bf5ef08c7409208d061c135a4f5f4cdc93ea8607860995264b2e", - "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", - "zh:b29d15d13e1b3412e6a4e1627d378dbd102659132f7488f64017dd6b6d5216d3", - "zh:bb79f4cae9f8c17c73998edc54aa16c2130a03227f7f4e71fc6ac87e230575ec", - "zh:ceccf80e95929d97f62dcf1bb3c7c7553d5757b2d9e7d222518722fc934f7ad5", - "zh:f40e638336527490e294d9c938ae55919069e6987e85a80506784ba90348792a", - "zh:f99ef33b1629a3b2278201142a3011a8489e66d92da832a5b99e442204de18fb", - "zh:fded14754ea46fdecc62a52cd970126420d4cd190e598cb61190b4724a727edb", - ] -} - -provider "registry.terraform.io/hashicorp/tls" { - version = "4.0.5" - constraints = ">= 3.0.0" - hashes = [ - "h1:e4LBdJoZJNOQXPWgOAG0UuPBVhCStu98PieNlqJTmeU=", - "h1:zeG5RmggBZW/8JWIVrdaeSJa0OG62uFX5HY1eE8SjzY=", - "zh:01cfb11cb74654c003f6d4e32bbef8f5969ee2856394a96d127da4949c65153e", - "zh:0472ea1574026aa1e8ca82bb6df2c40cd0478e9336b7a8a64e652119a2fa4f32", - "zh:1a8ddba2b1550c5d02003ea5d6cdda2eef6870ece86c5619f33edd699c9dc14b", - "zh:1e3bb505c000adb12cdf60af5b08f0ed68bc3955b0d4d4a126db5ca4d429eb4a", - "zh:6636401b2463c25e03e68a6b786acf91a311c78444b1dc4f97c539f9f78de22a", - "zh:76858f9d8b460e7b2a338c477671d07286b0d287fd2d2e3214030ae8f61dd56e", - "zh:a13b69fb43cb8746793b3069c4d897bb18f454290b496f19d03c3387d1c9a2dc", - "zh:a90ca81bb9bb509063b736842250ecff0f886a91baae8de65c8430168001dad9", - "zh:c4de401395936e41234f1956ebadbd2ed9f414e6908f27d578614aaa529870d4", - "zh:c657e121af8fde19964482997f0de2d5173217274f6997e16389e7707ed8ece8", - "zh:d68b07a67fbd604c38ec9733069fbf23441436fecf554de6c75c032f82e1ef19", - "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", - ] -} diff --git a/main.tf b/main.tf index 44bda2dc..b76b6250 100644 --- a/main.tf +++ b/main.tf @@ -1,174 +1,181 @@ -resource "aws_iam_role" "admin_role" { - name = "eks_admin_role" - - assume_role_policy = jsonencode({ - Version = "2012-10-17" - Statement = [ - { - Effect = "Allow" - Principal = { - AWS = "arn:aws:iam::766808016710:root" # Replace YOUR_AWS_ACCOUNT_ID with your actual AWS account ID - } - Action = "sts:AssumeRole" - }, - ] - }) - - tags = var.tags -} - -resource "aws_iam_role_policy_attachment" "admin_policy" { - role = aws_iam_role.admin_role.name - policy_arn = "arn:aws:iam::aws:policy/PowerUserAccess" -} - - -module "vpc" { - source = "terraform-aws-modules/vpc/aws" - - name = "spacelift-created-vpc" - cidr = var.cidr - - azs = var.azs - private_subnets = var.private_subnet_cidrs - public_subnets = var.public_subnet_cidrs - - private_subnet_tags = { - Name = "private" - } - - # When removing the Internet gateway it might have allocated from elastic IP addresses - # Turn off the nat_gateway to force the IP addresses to be removed - # > "Network vpc-0f30cfca319ebc521 has some mapped public address(es). Please unmap those public address(es) before detaching the gateway."" - create_igw = true - enable_nat_gateway = true - enable_vpn_gateway = false - single_nat_gateway = true - - # Disable inbound rules for the default network ACL - default_network_acl_ingress = [ - { - "action" : "deny", - "cidr_block" : "0.0.0.0/0", - "from_port" : 0, - "protocol" : "-1", - "rule_no" : 100, - "to_port" : 0 - }, - { - "action" : "deny", - "from_port" : 0, - "ipv6_cidr_block" : "::/0", - "protocol" : "-1", - "rule_no" : 101, - "to_port" : 0 - } - ] - - tags = merge( - var.tags, - { - Terraform = "true" - Environment = "dev" - } - ) -} - -module "eks" { - source = "terraform-aws-modules/eks/aws" - version = "~> 20.10" - # version = "~> 20.9" - - depends_on = [module.vpc] - - cluster_name = var.cluster_name - cluster_version = var.cluster_version - - cluster_endpoint_public_access = true - - cluster_addons = { - coredns = { - most_recent = true - } - kube-proxy = { - most_recent = true - } - vpc-cni = { - most_recent = true - } - aws-ebs-csi-driver = { - most_recent = true - } - } - - vpc_id = module.vpc.vpc_id - subnet_ids = module.vpc.private_subnets - cluster_security_group_id = module.vpc.default_security_group_id - - - # EKS Managed Node Group(s) - eks_managed_node_group_defaults = { - instance_types = ["m6i.large", "m5.large", "m5n.large", "m5zn.large"] - } - - eks_managed_node_groups = { - one = { - name = var.eks_nodeGroup - desired_size = 0 - min_size = 0 - max_size = 1 - - instance_types = ["t3.large"] - capacity_type = "SPOT" - iam_role_additional_policies = { - AmazonEBSCSIDriverPolicy = "arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy", - SecretsManagerReadWrite = "arn:aws:iam::aws:policy/SecretsManagerReadWrite" - } - } - # , - # two = { - # name = "seqera" - # desired_size = 1 - # min_size = 0 - # max_size = 10 - - # instance_types = ["t3.large"] - # capacity_type = "SPOT" - # } - } - iam_role_additional_policies = { - AmazonEBSCSIDriverPolicy = "arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy", - SecretsManagerReadWrite = "arn:aws:iam::aws:policy/SecretsManagerReadWrite" - } - - # Cluster access entry - # To add the current caller identity as an administrator - enable_cluster_creator_admin_permissions = true - authentication_mode = "API" - - - access_entries = { - # One access entry with a policy associated - eks_admin_role = { - kubernetes_groups = [] - principal_arn = "arn:aws:iam::766808016710:role/eks_admin_role" - - policy_associations = { - eks_admin_role = { - policy_arn = "arn:aws:eks::aws:cluster-access-policy/AmazonEKSClusterAdminPolicy" - access_scope = { - type = "cluster" - } - } - } - } - # https://docs.aws.amazon.com/eks/latest/userguide/access-policies.html#access-policy-permissions - # TODO: Additional roles that need to be created: - # AmazonEKSAdminViewPolicy? - # AmazonEKSEditPolicy - # AmazonEKSViewPolicy - - } - tags = var.tags -} - +resource "aws_iam_role" "admin_role" { + name = "eks_admin_role" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Principal = { + AWS = "arn:aws:iam::766808016710:root" # Replace YOUR_AWS_ACCOUNT_ID with your actual AWS account ID + } + Action = "sts:AssumeRole" + }, + ] + }) + + tags = var.tags +} + +resource "aws_iam_role_policy_attachment" "admin_policy" { + role = aws_iam_role.admin_role.name + policy_arn = "arn:aws:iam::aws:policy/PowerUserAccess" +} + + +module "vpc" { + source = "terraform-aws-modules/vpc/aws" + + name = "spacelift-created-vpc" + cidr = var.cidr + + azs = var.azs + private_subnets = var.private_subnet_cidrs + public_subnets = var.public_subnet_cidrs + + private_subnet_tags = { + Name = "private" + } + + # When removing the Internet gateway it might have allocated from elastic IP addresses + # Turn off the nat_gateway to force the IP addresses to be removed + # > "Network vpc-0f30cfca319ebc521 has some mapped public address(es). Please unmap those public address(es) before detaching the gateway."" + create_igw = true + enable_nat_gateway = true + enable_vpn_gateway = false + single_nat_gateway = true + + manage_default_security_group = true + # default_security_group_egress = [] + # default_security_group_ingress = [] + # Disable inbound rules for the default network ACL + # TODO: Another mechanism is required. Having these rules prevents nodes from joining the cluster + # default_network_acl_ingress = [ + # { + # "action" : "deny", + # "cidr_block" : "0.0.0.0/0", + # "from_port" : 0, + # "protocol" : "-1", + # "rule_no" : 98, + # "to_port" : 0 + # }, + # { + # "action" : "deny", + # "from_port" : 0, + # "ipv6_cidr_block" : "::/0", + # "protocol" : "-1", + # "rule_no" : 99, + # "to_port" : 0 + # } + # ] + + tags = merge( + var.tags, + { + Terraform = "true" + Environment = "dev" + } + ) +} + +module "eks" { + source = "terraform-aws-modules/eks/aws" + version = "~> 20.12" + # version = "~> 20.9" + + depends_on = [module.vpc] + + cluster_name = var.cluster_name + cluster_version = var.cluster_version + + cluster_endpoint_public_access = true + + cluster_addons = { + coredns = { + most_recent = true + } + kube-proxy = { + most_recent = true + } + vpc-cni = { + most_recent = true + } + aws-ebs-csi-driver = { + most_recent = true + } + } + + vpc_id = module.vpc.vpc_id + subnet_ids = module.vpc.private_subnets + control_plane_subnet_ids = module.vpc.intra_subnets + cluster_security_group_id = module.vpc.default_security_group_id + + + # EKS Managed Node Group(s) + eks_managed_node_group_defaults = { + instance_types = ["m6i.large", "m5.large", "m5n.large", "m5zn.large"] + } + + eks_managed_node_groups = { + one = { + name = var.eks_nodeGroup + desired_size = 1 + min_size = 0 + max_size = 2 + + instance_types = ["t3.large"] + capacity_type = "SPOT" + iam_role_additional_policies = { + AmazonEBSCSIDriverPolicy = "arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy", + SecretsManagerReadWrite = "arn:aws:iam::aws:policy/SecretsManagerReadWrite" + WorkerNodePolicy = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy" + } + } + # , + # two = { + # name = "seqera" + # desired_size = 1 + # min_size = 0 + # max_size = 10 + + # instance_types = ["t3.large"] + # capacity_type = "SPOT" + # } + } + iam_role_additional_policies = { + AmazonEBSCSIDriverPolicy = "arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy", + SecretsManagerReadWrite = "arn:aws:iam::aws:policy/SecretsManagerReadWrite" + WorkerNodePolicy = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy" + } + + # Cluster access entry + # To add the current caller identity as an administrator + enable_cluster_creator_admin_permissions = true + authentication_mode = "API" + + + access_entries = { + # One access entry with a policy associated + eks_admin_role = { + kubernetes_groups = [] + principal_arn = aws_iam_role.admin_role.arn + + policy_associations = { + eks_admin_role = { + policy_arn = "arn:aws:eks::aws:cluster-access-policy/AmazonEKSClusterAdminPolicy" + access_scope = { + type = "cluster" + } + } + } + } + # https://docs.aws.amazon.com/eks/latest/userguide/access-policies.html#access-policy-permissions + # TODO: Additional roles that need to be created: + # AmazonEKSAdminViewPolicy? + # AmazonEKSEditPolicy + # AmazonEKSViewPolicy + + } + tags = var.tags +} + diff --git a/modules/apache-airflow/data.tf b/modules/apache-airflow/data.tf new file mode 100644 index 00000000..765d5620 --- /dev/null +++ b/modules/apache-airflow/data.tf @@ -0,0 +1,7 @@ +data "aws_eks_cluster" "cluster" { + name = var.cluster_name +} + +data "aws_eks_cluster_auth" "cluster" { + name = var.cluster_name +} \ No newline at end of file diff --git a/modules/apache-airflow/main.tf b/modules/apache-airflow/main.tf new file mode 100644 index 00000000..8c9451a4 --- /dev/null +++ b/modules/apache-airflow/main.tf @@ -0,0 +1,101 @@ +resource "kubernetes_namespace" "airflow" { + metadata { + name = "airflow" + } +} + +resource "random_password" "airflow" { + length = 16 + special = true + override_special = "!#$%&*()-_=+[]{}<>:?" +} + +resource "kubernetes_secret" "airflow_webserver_secret" { + metadata { + name = "airflow-webserver-secret" + namespace = "airflow" + } + + data = { + "webserver-secret-key" = random_password.airflow.result + } + + depends_on = [kubernetes_namespace.airflow] +} + +# TODO: Should a long-term deployment use a managed RDS instance? +# https://github.com/apache/airflow/blob/main/chart/values.yaml#L2321-L2329 +resource "helm_release" "airflow" { + name = "apache-airflow" + repository = "https://airflow.apache.org" + chart = "airflow" + namespace = "airflow" + version = "1.11.0" + depends_on = [kubernetes_namespace.airflow] + + # https://github.com/hashicorp/terraform-provider-helm/issues/683#issuecomment-830872443 + wait = false + + set { + name = "config.webserver.expose_config" + value = "true" + } + + set { + name = "config.secrets.backend" + value = "airflow.providers.amazon.aws.secrets.secrets_manager.SecretsManagerBackend" + } + + set { + name = "webserver.service.type" + value = "LoadBalancer" + } + + set { + name = "webserverSecretKeySecretName" + value = "airflow-webserver-secret" + } + + set { + name = "airflowVersion" + value = "2.7.1" + } + + set { + name = "defaultAirflowRepository" + value = "bfaublesage/airflow" + } + + set { + name = "defaultAirflowTag" + value = "2.7.1-python-3.10" + } + + set { + name = "dags.persistence.enabled" + value = "false" + } + + set { + name = "dags.gitSync.enabled" + value = "true" + } + + set { + name = "dags.gitSync.repo" + value = "https://github.com/Sage-Bionetworks-Workflows/orca-recipes" + } + + set { + name = "dags.gitSync.subPath" + value = "dags" + } + + set { + name = "dags.gitSync.branch" + value = "main" + } + + + values = [templatefile("${path.module}/templates/airflow-values.yaml", {})] +} diff --git a/modules/apache-airflow/provider.tf b/modules/apache-airflow/provider.tf new file mode 100644 index 00000000..b6449817 --- /dev/null +++ b/modules/apache-airflow/provider.tf @@ -0,0 +1,16 @@ +provider "aws" { + region = var.region +} + +provider "kubernetes" { + config_path = var.kube_config_path + host = data.aws_eks_cluster.cluster.endpoint + cluster_ca_certificate = base64decode(data.aws_eks_cluster.cluster.certificate_authority[0].data) + token = data.aws_eks_cluster_auth.cluster.token +} + +provider "helm" { + kubernetes { + config_path = var.kube_config_path + } +} diff --git a/modules/apache-airflow/templates/airflow-values.yaml b/modules/apache-airflow/templates/airflow-values.yaml new file mode 100644 index 00000000..c89e8e99 --- /dev/null +++ b/modules/apache-airflow/templates/airflow-values.yaml @@ -0,0 +1,20 @@ +config: + secrets: + backend_kwargs: '{"connections_prefix": "airflow/connections", "variables_prefix": "airflow/variables", "region_name": "us-east-1"}' + # webserver: + # authenticate: true + # auth_backend: airflow.contrib.auth.backends.google_auth + # web_server_ssl_cert = + # web_server_ssl_key = + # web_server_port = 443 + # base_url = http://:443 + # celery: + # ssl_active = True + # ssl_key = + # ssl_cert = + # ssl_cacert = + +# service: +# type: LoadBalancer # or another type as needed +# annotations: +# alb.ingress.kubernetes.io/scheme: "internal" diff --git a/modules/apache-airflow/variables.tf b/modules/apache-airflow/variables.tf new file mode 100644 index 00000000..93adc5a2 --- /dev/null +++ b/modules/apache-airflow/variables.tf @@ -0,0 +1,18 @@ +variable "cluster_name" { + description = "Name of K8 cluster" + type = string + default = "dpe-k8" +} + +variable "kube_config_path" { + description = "Kube config path" + type = string + default = "~/.kube/config" +} + +variable "region" { + description = "AWS region" + type = string + default = "us-east-1" +} + diff --git a/modules/apache-airflow/versions.tf b/modules/apache-airflow/versions.tf new file mode 100644 index 00000000..dce26314 --- /dev/null +++ b/modules/apache-airflow/versions.tf @@ -0,0 +1,3 @@ +terraform { + required_version = "<= 1.5.7" +} diff --git a/modules/internal-k8-infra/data.tf b/modules/internal-k8-infra/data.tf index 8eb511e9..be8854a8 100644 --- a/modules/internal-k8-infra/data.tf +++ b/modules/internal-k8-infra/data.tf @@ -1,9 +1,9 @@ data "aws_eks_cluster" "cluster" { - name = "dpe-k8" + name = var.cluster_name } data "aws_eks_cluster_auth" "cluster" { - name = "dpe-k8" + name = var.cluster_name } data "aws_secretsmanager_secret" "spotinst_token" { @@ -14,12 +14,22 @@ data "aws_secretsmanager_secret_version" "secret_credentials" { secret_id = data.aws_secretsmanager_secret.spotinst_token.id } -# TODO: This should search for the VPC using some other value as ID would change -# on first startup and teardown/restart +data "aws_vpc" "selected" { + filter { + name = "tag:Name" + values = ["spacelift-created-vpc"] + } +} + data "aws_subnets" "node_subnets" { filter { name = "vpc-id" - values = ["vpc-0f30cfca319ebc521"] + values = [data.aws_vpc.selected.id] + } + + filter { + name = "tag:Name" + values = ["private"] } } diff --git a/modules/internal-k8-infra/main.tf b/modules/internal-k8-infra/main.tf index 25ebf955..f72749ed 100644 --- a/modules/internal-k8-infra/main.tf +++ b/modules/internal-k8-infra/main.tf @@ -1,16 +1,15 @@ -module "kubernetes-controller" { - source = "spotinst/kubernetes-controller/ocean" - version = "0.0.2" +module "ocean-controller" { + source = "spotinst/ocean-controller/spotinst" + version = "0.54.0" - # Credentials + # Credentials. spotinst_token = data.aws_secretsmanager_secret_version.secret_credentials.secret_string spotinst_account = var.spotinst_account - # Configuration + # Configuration. cluster_identifier = var.cluster_name } - module "ocean-aws-k8s" { source = "spotinst/ocean-aws-k8s/spotinst" version = "1.2.0" @@ -60,7 +59,7 @@ resource "helm_release" "airflow" { chart = "airflow" namespace = "airflow" version = "1.11.0" - depends_on = [kubernetes_namespace.airflow] + depends_on = [kubernetes_namespace.airflow, module.ocean-controller, module.ocean-aws-k8s] # https://github.com/hashicorp/terraform-provider-helm/issues/683#issuecomment-830872443 wait = false diff --git a/modules/k8s-node-autoscaler/README.md b/modules/k8s-node-autoscaler/README.md new file mode 100644 index 00000000..5dd28bc5 --- /dev/null +++ b/modules/k8s-node-autoscaler/README.md @@ -0,0 +1 @@ +# The use of this module is experimental and is a WIP \ No newline at end of file diff --git a/modules/k8s-node-autoscaler/data.tf b/modules/k8s-node-autoscaler/data.tf new file mode 100644 index 00000000..9912982f --- /dev/null +++ b/modules/k8s-node-autoscaler/data.tf @@ -0,0 +1,39 @@ +data "aws_eks_cluster" "cluster" { + name = var.cluster_name +} + +data "aws_eks_cluster_auth" "cluster" { + name = var.cluster_name +} + +data "aws_secretsmanager_secret" "spotinst_token" { + name = "spotinst_token" +} + +data "aws_secretsmanager_secret_version" "secret_credentials" { + secret_id = data.aws_secretsmanager_secret.spotinst_token.id +} + +# TODO: This should search for the VPC using some other value as ID would change +# on first startup and teardown/restart +data "aws_subnets" "node_subnets" { + filter { + name = "vpc-id" + values = ["vpc-0f30cfca319ebc521"] + } +} + +data "aws_eks_node_groups" "node_groups" { + cluster_name = var.cluster_name +} + +data "aws_eks_node_group" "node_group" { + cluster_name = var.cluster_name + node_group_name = data.aws_eks_node_groups.node_groups[0].id +} + +data "aws_security_group" "eks_cluster_security_group" { + tags = { + Name = "${var.cluster_name}-node" + } +} diff --git a/modules/k8s-node-autoscaler/main.tf b/modules/k8s-node-autoscaler/main.tf new file mode 100644 index 00000000..bdd290a7 --- /dev/null +++ b/modules/k8s-node-autoscaler/main.tf @@ -0,0 +1,27 @@ +module "ocean-controller" { + source = "spotinst/ocean-controller/spotinst" + version = "0.54.0" + + # Credentials. + spotinst_token = data.aws_secretsmanager_secret_version.secret_credentials.secret_string + spotinst_account = var.spotinst_account + + # Configuration. + cluster_identifier = var.cluster_name +} + +module "ocean-aws-k8s" { + source = "spotinst/ocean-aws-k8s/spotinst" + version = "1.2.0" + # worker_instance_profile_arn = "arn:aws:iam::766808016710:role/airflow-node-group-eks-node-group-20240517054613935800000001" + + # Configuration + cluster_name = var.cluster_name + region = var.region + subnet_ids = data.aws_subnets.node_subnets.ids + worker_instance_profile_arn = tolist(data.aws_eks_node_group.node_group.node_role_arn)[0] + security_groups = [data.aws_security_group.eks_cluster_security_group.id] + is_aggressive_scale_down_enabled = true + max_scale_down_percentage = 33 + tags = var.tags +} diff --git a/modules/k8s-node-autoscaler/provider.tf b/modules/k8s-node-autoscaler/provider.tf new file mode 100644 index 00000000..451c9b98 --- /dev/null +++ b/modules/k8s-node-autoscaler/provider.tf @@ -0,0 +1,21 @@ +provider "aws" { + region = var.region +} + +provider "spotinst" { + account = var.spotinst_account + token = data.aws_secretsmanager_secret_version.secret_credentials.secret_string +} + +provider "kubernetes" { + config_path = var.kube_config_path + host = data.aws_eks_cluster.cluster.endpoint + cluster_ca_certificate = base64decode(data.aws_eks_cluster.cluster.certificate_authority[0].data) + token = data.aws_eks_cluster_auth.cluster.token +} + +provider "helm" { + kubernetes { + config_path = var.kube_config_path + } +} diff --git a/modules/k8s-node-autoscaler/variables.tf b/modules/k8s-node-autoscaler/variables.tf new file mode 100644 index 00000000..8d1203d2 --- /dev/null +++ b/modules/k8s-node-autoscaler/variables.tf @@ -0,0 +1,31 @@ +variable "cluster_name" { + description = "Name of K8 cluster" + type = string + default = "dpe-k8" +} + +variable "region" { + description = "AWS region" + type = string + default = "us-east-1" +} + +variable "kube_config_path" { + description = "Kube config path" + type = string + default = "~/.kube/config" +} + +variable "spotinst_account" { + description = "Spot.io account" + type = string + default = "act-ac6522b4" +} + +variable "tags" { + description = "AWS Resource Tags" + type = map(string) + default = { + "CostCenter" = "No Program / 000000" + } +} \ No newline at end of file diff --git a/modules/k8s-node-autoscaler/versions.tf b/modules/k8s-node-autoscaler/versions.tf new file mode 100644 index 00000000..5508ca73 --- /dev/null +++ b/modules/k8s-node-autoscaler/versions.tf @@ -0,0 +1,9 @@ +terraform { + required_version = "<= 1.5.7" + required_providers { + spotinst = { + source = "spotinst/spotinst" + version = "1.172.0" # Specify the version you wish to use + } + } +} diff --git a/spacelift/main.tf b/spacelift/main.tf new file mode 100644 index 00000000..dd4a484c --- /dev/null +++ b/spacelift/main.tf @@ -0,0 +1,31 @@ +resource "spacelift_stack" "root_administrative_stack" { + github_enterprise { + namespace = "Sage-Bionetworks-Workflows" + id = "sage-bionetworks-workflows-gh" + } + + administrative = true + autodeploy = false + branch = "main" + description = "Manages other spacelift resources" + name = "Root Spacelift Administrative Stack" + project_root = "spacelift" + repository = "eks-stack" + terraform_version = "1.5.7" + space_id = "root" +} + +module "policies" { + source = "./modules/policies" +} + +module "policy-attachments" { + source = "./modules/policy-attachments" + depends_on = [ + module.policies + ] +} + +module "stacks" { + source = "./modules/stacks" +} \ No newline at end of file diff --git a/spacelift/modules/policies/check-estimated-cloud-spend.rego b/spacelift/modules/policies/check-estimated-cloud-spend.rego new file mode 100644 index 00000000..35cfe9c5 --- /dev/null +++ b/spacelift/modules/policies/check-estimated-cloud-spend.rego @@ -0,0 +1,20 @@ +package spacelift + +# Warn if changes that will cause the monthly cost to go above a certain threshold +warn[sprintf("monthly cost greater than $%d ($%.2f)", [threshold, monthly_cost])] { + threshold := 100 + monthly_cost := to_number(input.third_party_metadata.infracost.projects[0].breakdown.totalMonthlyCost) + monthly_cost > threshold +} + +# Warn if the monthly costs increase more than a certain percentage +warn[sprintf("monthly cost increase greater than %d%% (%.2f%%)", [threshold, percentage_increase])] { + threshold := 5 + previous_cost := to_number(input.third_party_metadata.infracost.projects[0].pastBreakdown.totalMonthlyCost) + previous_cost > 0 + + monthly_cost := to_number(input.third_party_metadata.infracost.projects[0].breakdown.totalMonthlyCost) + percentage_increase := ((monthly_cost - previous_cost) / previous_cost) * 100 + + percentage_increase > threshold +} \ No newline at end of file diff --git a/spacelift/modules/policies/enforce-tags-on-resources.rego b/spacelift/modules/policies/enforce-tags-on-resources.rego new file mode 100644 index 00000000..0ee4b588 --- /dev/null +++ b/spacelift/modules/policies/enforce-tags-on-resources.rego @@ -0,0 +1,17 @@ +package spacelift + +# This example plan policy enforces specific tags are present on your resources +# +# You can read more about plan policies here: +# https://docs.spacelift.io/concepts/policy/terraform-plan-policy + +required_tags := {"CostCenter"} + +deny[sprintf("resource %q does not have all suggested tags (%s)", [resource.address, concat(", ", missing_tags)])] { + resource := input.terraform.resource_changes[_] + tags := resource.change.after.tags_all + + missing_tags := {tag | required_tags[tag]; not tags[tag]} + + count(missing_tags) > 0 +} diff --git a/spacelift/modules/policies/main.tf b/spacelift/modules/policies/main.tf new file mode 100644 index 00000000..54df3933 --- /dev/null +++ b/spacelift/modules/policies/main.tf @@ -0,0 +1,18 @@ +resource "spacelift_policy" "enforce-tags-on-resources" { + name = "Enforce Tags On Resources - cli" + body = file("${path.module}/enforce-tags-on-resources.rego") + type = "PLAN" + labels = ["compliance", "plan", "tagging", "terraform"] + description = "This policy ensures that all Terraform-managed resources adhere to tagging conventions by requiring the presence of specific tags. It denies changes to resources that lack any of these required tags, emphasizing the importance of consistent tagging for resource identification, environment management, and ownership tracking. The policy aids in maintaining order, facilitating cost allocation, security, and governance across the infrastructure." + space_id = "root" +} + + +resource "spacelift_policy" "cloud-spend-estimation" { + name = "Cloud Spend Estimation - cli" + body = file("${path.module}/check-estimated-cloud-spend.rego") + type = "PLAN" + space_id = "root" +} + + diff --git a/spacelift/modules/policies/versions.tf b/spacelift/modules/policies/versions.tf new file mode 100644 index 00000000..ca249ecb --- /dev/null +++ b/spacelift/modules/policies/versions.tf @@ -0,0 +1,8 @@ +terraform { + required_providers { + spacelift = { + source = "spacelift-io/spacelift" + version = "1.13.0" + } + } +} diff --git a/spacelift/modules/policy-attachments/main.tf b/spacelift/modules/policy-attachments/main.tf new file mode 100644 index 00000000..6c8cf3d6 --- /dev/null +++ b/spacelift/modules/policy-attachments/main.tf @@ -0,0 +1,21 @@ +resource "spacelift_policy_attachment" "bfauble-enforce-tags-on-resources" { + policy_id = "enforce-tags-on-resources-cli" + # This is the Outside K8s infra stack + stack_id = "bfauble" +} + +resource "spacelift_policy_attachment" "infrastructure-inside-eks-cluster-enforce-tags-on-resources" { + policy_id = "enforce-tags-on-resources-cli" + stack_id = "infrastructure-inside-eks-cluster" +} + +resource "spacelift_policy_attachment" "bfauble-cloud-spend-estimation" { + policy_id = "cloud-spend-estimation-cli" + # This is the Outside K8s infra stack + stack_id = "bfauble" +} + +resource "spacelift_policy_attachment" "infrastructure-inside-eks-cluster-cloud-spend-estimation" { + policy_id = "cloud-spend-estimation-cli" + stack_id = "infrastructure-inside-eks-cluster" +} \ No newline at end of file diff --git a/spacelift/modules/policy-attachments/versions.tf b/spacelift/modules/policy-attachments/versions.tf new file mode 100644 index 00000000..ca249ecb --- /dev/null +++ b/spacelift/modules/policy-attachments/versions.tf @@ -0,0 +1,8 @@ +terraform { + required_providers { + spacelift = { + source = "spacelift-io/spacelift" + version = "1.13.0" + } + } +} diff --git a/spacelift/modules/stacks/main.tf b/spacelift/modules/stacks/main.tf new file mode 100644 index 00000000..70318a60 --- /dev/null +++ b/spacelift/modules/stacks/main.tf @@ -0,0 +1,33 @@ +resource "spacelift_stack" "external_dpe_k8s_infra_stack" { + github_enterprise { + namespace = "Sage-Bionetworks-Workflows" + id = "sage-bionetworks-workflows-gh" + } + + administrative = false + autodeploy = false + branch = "main" + description = "Manages outside DPE cluster resources" + name = "Infrastructure (Outside EKS Cluster) - CLI" + project_root = "" + repository = "eks-stack" + terraform_version = "1.5.7" + space_id = "dpe-01HY43JT0KWB83XMT89QF3TA72" +} + +resource "spacelift_stack" "interal_dpe_k8s_infra_stack" { + github_enterprise { + namespace = "Sage-Bionetworks-Workflows" + id = "sage-bionetworks-workflows-gh" + } + + administrative = false + autodeploy = false + branch = "main" + description = "Manages inside DPE cluster resources" + name = "Infrastructure (Inside EKS Cluster) - CLI" + project_root = "modules/internal-k8-infra" + repository = "eks-stack" + terraform_version = "1.5.7" + space_id = "dpe-01HY43JT0KWB83XMT89QF3TA72" +} \ No newline at end of file diff --git a/spacelift/modules/stacks/versions.tf b/spacelift/modules/stacks/versions.tf new file mode 100644 index 00000000..ca249ecb --- /dev/null +++ b/spacelift/modules/stacks/versions.tf @@ -0,0 +1,8 @@ +terraform { + required_providers { + spacelift = { + source = "spacelift-io/spacelift" + version = "1.13.0" + } + } +} diff --git a/spacelift/provider.tf b/spacelift/provider.tf new file mode 100644 index 00000000..cb68d081 --- /dev/null +++ b/spacelift/provider.tf @@ -0,0 +1,6 @@ +provider "spacelift" { + api_key_endpoint = "https://sagebionetworks.app.spacelift.io" + # Running from within spacelift does not require these to be set + # api_key_id = "" + # api_key_secret = "" +} diff --git a/spacelift/versions.tf b/spacelift/versions.tf new file mode 100644 index 00000000..ca249ecb --- /dev/null +++ b/spacelift/versions.tf @@ -0,0 +1,8 @@ +terraform { + required_providers { + spacelift = { + source = "spacelift-io/spacelift" + version = "1.13.0" + } + } +} diff --git a/stacks/dpe-prod/main.tf b/stacks/dpe-prod/main.tf new file mode 100644 index 00000000..723f30ad --- /dev/null +++ b/stacks/dpe-prod/main.tf @@ -0,0 +1,9 @@ +# Set up the resources and their dependency on one another +# sage-aws-networking +# k8s-node-autoscaler +# sage-aws-eks +# apache-airflow + +# sage-aws-networking <- sage-aws-eks +# sage-aws-eks <- apache-airflow +# sage-aws-eks <- k8s-node-autoscaler \ No newline at end of file diff --git a/variables.tf b/variables.tf index cc9c875e..daced97a 100644 --- a/variables.tf +++ b/variables.tf @@ -7,7 +7,7 @@ variable "cluster_name" { variable "cluster_version" { description = "Version of K8 cluster" type = string - default = "1.29" + default = "1.30" } variable "kube_config_path" {