8.1 Root module
The root directory of the Terraform infrastructure consists of the main module, calling other submodules that deploy specific infrastructure setting or tools.
This enables to have an overview about the deployment in one place. At first, the necessary cluster infrastructure is deployed such as the vpc
and the eks
cluster itself. Afterward the custom tools to be run on EKS are deployed, such as airflow
, mlflow
, and jupyterhub
.
The following will look a bit more detailed into the call of the module airflow
, yet a lot of it also applies for the other modules.
The module imports are structure in three parts. At first the general information about the module are given, such as name
of the module, or the cluster_name
, as well as more specific variables needed for specific Terraform calls in the module, like cluster_endpoint
.
Terraform does not provide the functionality to activate or deactivate a module by itself. As this is a useful feature, a custom workaround is proposed by setting the count a module as such count = var.deploy_airflow ? 1 : 0
. This will set the cound of the module to 0
or 1
, depending on the var.deploy_airflow
variable. This functionality is proposed for all custom modules.
Secondly, as Airflow needs access to and RDS Database, the RDS module is called. Therefore it is needed to pass the relevant information to create the the RDS under the correct settings, like vpc_id
, rds_engine
, or storage_type
.
Third, variable values for the Airflow Helm chart are passed to the module. Using Helm makes the deployment of Airflow very easy. Since there are customizations on the deployment, such as a connection to the Airflow DAG repository on Github, it is necessary to specify these information beforehand, and to integrate them into the deployment.
locals {= "${var.name_prefix}-eks"
cluster_name = "${var.name_prefix}-vpc"
vpc_name = var.port_airflow
port_airflow = var.port_mlflow
port_mlflow = "${var.name_prefix}-mlflow-bucket"
mlflow_s3_bucket_name = true
force_destroy_s3_bucket = "gp2"
storage_type = var.max_allocated_storage
max_allocated_storage = var.airflow_github_ssh
airflow_github_ssh = var.git_username
git_username = var.git_token
git_token = var.git_repository_url
git_repository_url = var.git_branch
git_branch
}
"aws_caller_identity" "current" {}
data
# INFRASTRUCTURE"vpc" {
module = "./infrastructure/vpc"
source = local.cluster_name
cluster_name = local.vpc_name
vpc_name
}
"eks" {
module = "./infrastructure/eks"
source = local.cluster_name
cluster_name = "1.23"
eks_cluster_version = module.vpc.vpc_id
vpc_id = module.vpc.private_subnets
private_subnets = [module.vpc.worker_group_mgmt_one_id]
security_group_id_one = [module.vpc.worker_group_mgmt_two_id]
security_group_id_two = [
depends_on .vpc
module
]
}
# CUSTOM TOOLS"airflow" {
module = var.deploy_airflow ? 1 : 0
count = "./modules/airflow"
source = "airflow"
name = local.cluster_name
cluster_name = module.eks.cluster_endpoint
cluster_endpoint
# RDS= module.vpc.vpc_id
vpc_id = module.vpc.private_subnets
private_subnets = module.vpc.private_subnets_cidr_blocks
private_subnets_cidr_blocks = local.port_airflow
rds_port = "airflow"
rds_name = "postgres"
rds_engine = "13.3"
rds_engine_version = "db.t3.micro"
rds_instance_class = local.storage_type
storage_type = local.max_allocated_storage
max_allocated_storage
# HELM= "https://airflow-helm.github.io/charts"
helm_chart_repository = "airflow"
helm_chart_name = "8.6.1"
helm_chart_version = local.git_username
git_username = local.git_token
git_token = local.git_repository_url
git_repository_url = local.git_branch
git_branch
= [
depends_on .eks
module
]
}
"mlflow" {
module = var.deploy_mlflow ? 1 : 0
count = "./modules/mlflow"
source = "mlflow"
name = local.mlflow_s3_bucket_name
mlflow_s3_bucket_name = local.force_destroy_s3_bucket
s3_force_destroy
# RDS= module.vpc.vpc_id
vpc_id = module.vpc.private_subnets
private_subnets = module.vpc.private_subnets_cidr_blocks
private_subnets_cidr_blocks = local.port_mlflow
rds_port = "mlflow"
rds_name = "mysql"
rds_engine = "8.0.30"
rds_engine_version = "db.t3.micro"
rds_instance_class = local.storage_type
storage_type = local.max_allocated_storage
max_allocated_storage
= [
depends_on .eks
module
]
}
"jupyterhub" {
module = var.deploy_jupyterhub ? 1 : 0
count = "./modules/jupyterhub"
source = "jupyterhub"
name = local.cluster_name
cluster_name = module.eks.cluster_endpoint
cluster_endpoint
# HELM= "https://jupyterhub.github.io/helm-chart/"
helm_chart_repository = "jupyterhub"
helm_chart_name = "2.0.0"
helm_chart_version
= [
depends_on .eks
module
] }