Website: https://www.cast.ai
- Terraform 0.13+
A module to connect an EKS cluster to CAST AI.
Requires castai/castai
and hashicorp/aws
providers to be configured.
module "castai-eks-cluster" {
source = "castai/eks-cluster/castai"
aws_account_id = var.aws_account_id
aws_cluster_region = var.cluster_region
aws_cluster_name = var.cluster_id
aws_assume_role_arn = module.castai-eks-role-iam.role_arn
autoscaler_policies_json = var.autoscaler_policies_json
// Default node configuration will be used for all CAST provisioned nodes unless specific configuration is requested.
default_node_configuration = module.cast-eks-cluster.castai_node_configurations["default"]
node_configurations = {
default = {
subnets = module.vpc.private_subnets
dns_cluster_ip = "10.100.0.10"
instance_profile_role_arn = var.instance_profile_arn
ssh_public_key = var.ssh_public_key
security_groups = [
module.eks.node_security_group_id,
]
tags = {
"team" : "core"
}
init_script = base64encode(var.init_script)
docker_config = jsonencode({
"insecure-registries" = ["registry.com:5000"],
"max-concurrent-downloads" = 10
})
kubelet_config = jsonencode({
"registryBurst" : 20,
"registryPullQPS" : 10
})
container_runtime = "dockerd"
}
}
node_templates = {
spot_tmpl = {
configuration_id = module.cast-eks-cluster.castai_node_configurations["default"]
should_taint = true
custom_labels = {
custom-label-key-1 = "custom-label-value-1"
custom-label-key-2 = "custom-label-value-2"
}
custom_taints = [
{
key = "custom-taint-key-1"
value = "custom-taint-value-1"
},
{
key = "custom-taint-key-2"
value = "custom-taint-value-2"
}
]
constraints = {
fallback_restore_rate_seconds = 1800
spot = true
use_spot_fallbacks = true
min_cpu = 4
max_cpu = 100
instance_families = {
exclude = ["m5"]
}
compute_optimized_state = "disabled"
storage_optimized_state = "disabled"
is_gpu_only = false
architectures = ["amd64"]
}
}
}
autoscaler_settings = {
enabled = true
node_templates_partial_matching_enabled = false
unschedulable_pods = {
enabled = true
headroom = {
enabled = true
cpu_percentage = 10
memory_percentage = 10
}
headroom_spot = {
enabled = true
cpu_percentage = 10
memory_percentage = 10
}
}
node_downscaler = {
enabled = true
empty_nodes = {
enabled = true
}
evictor = {
aggressive_mode = false
cycle_interval = "5s10s"
dry_run = false
enabled = true
node_grace_period_minutes = 10
scoped_mode = false
}
}
cluster_limits = {
enabled = true
cpu = {
max_cores = 20
min_cores = 1
}
}
}
}
Existing configuration:
module "castai-eks-cluster" {
// ...
subnets = module.vpc.private_subnets
dns_cluster_ip = "10.100.0.10"
instance_profile_role_arn = var.instance_profile_arn
ssh_public_key = var.ssh_public_key
override_security_groups = [
module.eks.node_security_group_id,
]
tags = {
"team" : "core"
}
}
New configuration:
module "castai-eks-cluster" {
// ...
// Default node configuration will be used for all CAST provisioned nodes unless specific configuration is requested.
default_node_configuration = module.cast-eks-cluster.castai_node_configurations["default"]
node_configurations = {
default = {
subnets = module.vpc.private_subnets
dns_cluster_ip = "10.100.0.10"
instance_profile_role_arn = var.instance_profile_arn
ssh_public_key = var.ssh_public_key
security_groups = [
module.eks.node_security_group_id,
]
tags = {
"team" : "core"
}
}
}
}
Existing configuration:
module "castai-eks-cluster" {
// ...
node_templates = {
// ...
}
autoscaler_policies_json = <<-EOT
{
"enabled": true,
"unschedulablePods": {
"enabled": true
},
"spotInstances": {
"enabled": true,
"clouds": ["aws"],
"spotBackups": {
"enabled": true
},
"spotDiversityEnabled": false,
"spotDiversityPriceIncreaseLimitPercent": 20,
"spotInterruptionPredictions": {
"enabled": true,
"type": "AWSRebalanceRecommendations"
}
},
"nodeDownscaler": {
"enabled": true,
"emptyNodes": {
"enabled": true
},
"evictor": {
"aggressiveMode": true,
"cycleInterval": "5m10s",
"dryRun": false,
"enabled": true,
"nodeGracePeriodMinutes": 10,
"scopedMode": false
}
}
}
EOT
}
New configuration:
module "castai-eks-cluster" {
// ...
node_templates = {
default_by_castai = {
name = "default-by-castai"
configuration_id = module.castai-eks-cluster.castai_node_configurations["default"]
is_default = true
should_taint = false
constraints = {
on_demand = true
spot = true
use_spot_fallbacks = true
enable_spot_diversity = false
spot_diversity_price_increase_limit_percent = 20
spot_interruption_predictions_enabled = true
spot_interruption_predictions_type = "aws-rebalance-recommendations"
}
}
}
autoscaler_policies_json = <<-EOT
{
"enabled": true,
"unschedulablePods": {
"enabled": true
},
"nodeDownscaler": {
"enabled": true,
"emptyNodes": {
"enabled": true
},
"evictor": {
"aggressiveMode": true,
"cycleInterval": "5m10s",
"dryRun": false,
"enabled": true,
"nodeGracePeriodMinutes": 10,
"scopedMode": false
}
}
}
EOT
}
Version 7.x.x changes:
- Removed
custom_label
attribute incastai_node_template
resource. Usecustom_labels
instead.
Old configuration:
module "castai-eks-cluster" {
// ...
node_templates = {
spot_tmpl = {
custom_label = {
key = "custom-label-key-1"
value = "custom-label-value-1"
}
}
}
}
New configuration:
module "castai-eks-cluster" {
// ...
node_templates = {
spot_tmpl = {
custom_labels = {
custom-label-key-1 = "custom-label-value-1"
}
}
}
}
Version 8.x.x changed:
- Removed
compute_optimized
andstorage_optimized
attributes incastai_node_template
resource,constraints
object. Usecompute_optimized_state
andstorage_optimized_state
instead.
Old configuration:
module "castai-eks-cluster" {
node_templates = {
spot_tmpl = {
constraints = {
compute_optimized = false
storage_optimized = true
}
}
}
}
New configuration:
module "castai-eks-cluster" {
node_templates = {
spot_tmpl = {
constraints = {
compute_optimized_state = "disabled"
storage_optimized_state = "enabled"
}
}
}
}
Version 9.3.x changed:
- Deprecated
autoscaler_policies_json
attribute. Useautoscaler_settings
instead.
Old configuration:
module "castai-eks-cluster" {
autoscaler_policies_json = <<-EOT
{
"enabled": true,
"unschedulablePods": {
"enabled": true
},
"nodeDownscaler": {
"enabled": true,
"emptyNodes": {
"enabled": true
},
"evictor": {
"aggressiveMode": false,
"cycleInterval": "5m10s",
"dryRun": false,
"enabled": true,
"nodeGracePeriodMinutes": 10,
"scopedMode": false
}
},
"nodeTemplatesPartialMatchingEnabled": false,
"clusterLimits": {
"cpu": {
"maxCores": 20,
"minCores": 1
},
"enabled": true
}
}
EOT
}
New configuration:
module "castai-eks-cluster" {
autoscaler_settings = {
enabled = true
node_templates_partial_matching_enabled = false
unschedulable_pods = {
enabled = true
}
node_downscaler = {
enabled = true
empty_nodes = {
enabled = true
}
evictor = {
aggressive_mode = false
cycle_interval = "5m10s"
dry_run = false
enabled = true
node_grace_period_minutes = 10
scoped_mode = false
}
}
cluster_limits = {
enabled = true
cpu = {
max_cores = 20
min_cores = 1
}
}
}
}
Usage examples are located in terraform provider repo
terraform-docs markdown table . --output-file README.md
Name | Version |
---|---|
terraform | >= 0.13 |
aws | >= 2.49 |
castai | ~> 7.14 |
helm | >= 2.0.0 |
Name | Version |
---|---|
castai | ~> 7.14 |
helm | >= 2.0.0 |
null | n/a |
No modules.
Name | Description | Type | Default | Required |
---|---|---|---|---|
agent_aws_access_key_id | AWS access key for CAST AI agent to fetch instance details. | string |
"" |
no |
agent_aws_iam_service_account_role_arn | Arn of the role to be used by CAST AI agent to fetch instance details. Only readonly AmazonEC2ReadOnlyAccess is needed. | string |
"" |
no |
agent_aws_secret_access_key | AWS access key secret for CAST AI agent to fetch instance details. | string |
"" |
no |
agent_values | List of YAML formatted string with agent values | list(string) |
[] |
no |
agent_version | Version of castai-agent helm chart. Default latest | string |
null |
no |
api_grpc_addr | CAST AI GRPC API address | string |
"api-grpc.cast.ai:443" |
no |
api_url | URL of alternative CAST AI API to be used during development or testing | string |
"https://api.cast.ai" |
no |
autoscaler_policies_json | Optional json object to override CAST AI cluster autoscaler policies. Deprecated, use autoscaler_settings instead. |
string |
null |
no |
autoscaler_settings | Optional Autoscaler policy definitions to override current autoscaler settings | any |
null |
no |
aws_account_id | ID of AWS account the cluster is located in. | string |
n/a | yes |
aws_assume_role_arn | Arn of the role to be used by CAST AI for IAM access | string |
null |
no |
aws_cluster_name | Name of the cluster to be connected to CAST AI. | string |
n/a | yes |
aws_cluster_region | Region of the cluster to be connected to CAST AI. | string |
n/a | yes |
castai_api_token | Optional CAST AI API token created in console.cast.ai API Access keys section. Used only when wait_for_cluster_ready is set to true |
string |
"" |
no |
castai_components_labels | Optional additional Kubernetes labels for CAST AI pods | map(any) |
{} |
no |
cluster_controller_values | List of YAML formatted string with cluster-controller values | list(string) |
[] |
no |
cluster_controller_version | Version of castai-cluster-controller helm chart. Default latest | string |
null |
no |
default_node_configuration | ID of the default node configuration | string |
"" |
no |
default_node_configuration_name | Name of the default node configuration | string |
"" |
no |
delete_nodes_on_disconnect | Optionally delete Cast AI created nodes when the cluster is destroyed | bool |
false |
no |
egressd_values | List of YAML formatted string with egressd values | list(string) |
[] |
no |
egressd_version | Version of castai-egressd helm chart. Default latest | string |
null |
no |
evictor_ext_values | List of YAML formatted string with evictor-ext values | list(string) |
[] |
no |
evictor_ext_version | Version of castai-evictor-ext chart. Default latest | string |
null |
no |
evictor_values | List of YAML formatted string with evictor values | list(string) |
[] |
no |
evictor_version | Version of castai-evictor chart. Default latest | string |
null |
no |
grpc_url | gRPC endpoint used by pod-pinner | string |
"grpc.cast.ai:443" |
no |
install_egressd | Optional flag for installation of Egressd (Network cost monitoring) (https://docs.cast.ai/docs/network-cost) | bool |
false |
no |
install_security_agent | Optional flag for installation of security agent (https://docs.cast.ai/product-overview/console/security-insights/) | bool |
false |
no |
install_workload_autoscaler | Optional flag for installation of workload autoscaler (https://docs.cast.ai/docs/workload-autoscaling-configuration) | bool |
false |
no |
kvisor_controller_extra_args | Extra arguments for the kvisor controller. Optionally enable kvisor to lint Kubernetes YAML manifests, scan workload images and check if workloads pass CIS Kubernetes Benchmarks as well as NSA, WASP and PCI recommendations. | map(string) |
{ |
no |
kvisor_values | List of YAML formatted string with kvisor values | list(string) |
[] |
no |
kvisor_version | Version of kvisor chart. Default latest | string |
null |
no |
kvisor_wait | Wait for kvisor chart to finish release | bool |
true |
no |
node_configurations | Map of EKS node configurations to create | any |
{} |
no |
node_templates | Map of node templates to create | any |
{} |
no |
pod_pinner_values | List of YAML formatted string values for agent helm chart | list(string) |
[] |
no |
pod_pinner_version | Version of pod-pinner helm chart. Default latest | string |
null |
no |
self_managed | Whether CAST AI components' upgrades are managed by a customer; by default upgrades are managed CAST AI central system. | bool |
false |
no |
spot_handler_values | List of YAML formatted string with spot-handler values | list(string) |
[] |
no |
spot_handler_version | Version of castai-spot-handler helm chart. Default latest | string |
null |
no |
wait_for_cluster_ready | Wait for cluster to be ready before finishing the module execution, this option requires castai_api_token to be set |
bool |
false |
no |
workload_autoscaler_values | List of YAML formatted string with cluster-workload-autoscaler values | list(string) |
[] |
no |
workload_autoscaler_version | Version of castai-workload-autoscaler helm chart. Default latest | string |
null |
no |
workload_scaling_policies | Map of workload scaling policies to create | any |
{} |
no |
Name | Description |
---|---|
castai_node_configurations | Map of node configurations ids by name |
castai_node_templates | Map of node template by name |
cluster_id | CAST AI cluster id, which can be used for accessing cluster data using API |