8.1 Root module

The root directory of the Terraform infrastructure consists of the main module, calling other submodules that deploy specific infrastructure setting or tools. This enables to have an overview about the deployment in one place. At first, the necessary cluster infrastructure is deployed such as the vpc and the eks cluster itself. Afterward the custom tools to be run on EKS are deployed, such as airflow, mlflow, and jupyterhub.

The following will look a bit more detailed into the call of the module airflow, yet a lot of it also applies for the other modules. The module imports are structure in three parts. At first the general information about the module are given, such as name of the module, or the cluster_name, as well as more specific variables needed for specific Terraform calls in the module, like cluster_endpoint. Terraform does not provide the functionality to activate or deactivate a module by itself. As this is a useful feature, a custom workaround is proposed by setting the count a module as such count = var.deploy_airflow ? 1 : 0. This will set the cound of the module to 0 or 1, depending on the var.deploy_airflow variable. This functionality is proposed for all custom modules.

Secondly, as Airflow needs access to and RDS Database, the RDS module is called. Therefore it is needed to pass the relevant information to create the the RDS under the correct settings, like vpc_id, rds_engine, or storage_type.

Third, variable values for the Airflow Helm chart are passed to the module. Using Helm makes the deployment of Airflow very easy. Since there are customizations on the deployment, such as a connection to the Airflow DAG repository on Github, it is necessary to specify these information beforehand, and to integrate them into the deployment.

locals {
  cluster_name            = "${var.name_prefix}-eks"
  vpc_name                = "${var.name_prefix}-vpc"
  port_airflow            = var.port_airflow
  port_mlflow             = var.port_mlflow
  mlflow_s3_bucket_name   = "${var.name_prefix}-mlflow-bucket"
  force_destroy_s3_bucket = true
  storage_type            = "gp2"
  max_allocated_storage   = var.max_allocated_storage
  airflow_github_ssh      = var.airflow_github_ssh
  git_username            = var.git_username
  git_token               = var.git_token
  git_repository_url      = var.git_repository_url
  git_branch              = var.git_branch
}

data "aws_caller_identity" "current" {}


# INFRASTRUCTURE
module "vpc" {
  source       = "./infrastructure/vpc"
  cluster_name = local.cluster_name
  vpc_name     = local.vpc_name
}

module "eks" {
  source                = "./infrastructure/eks"
  cluster_name          = local.cluster_name
  eks_cluster_version   = "1.23"
  vpc_id                = module.vpc.vpc_id
  private_subnets       = module.vpc.private_subnets
  security_group_id_one = [module.vpc.worker_group_mgmt_one_id]
  security_group_id_two = [module.vpc.worker_group_mgmt_two_id]
  depends_on = [
    module.vpc
  ]
}

# CUSTOM TOOLS
module "airflow" {
  count            = var.deploy_airflow ? 1 : 0
  source           = "./modules/airflow"
  name             = "airflow"
  cluster_name     = local.cluster_name
  cluster_endpoint = module.eks.cluster_endpoint

  # RDS
  vpc_id                      = module.vpc.vpc_id
  private_subnets             = module.vpc.private_subnets
  private_subnets_cidr_blocks = module.vpc.private_subnets_cidr_blocks
  rds_port                    = local.port_airflow
  rds_name                    = "airflow"
  rds_engine                  = "postgres"
  rds_engine_version          = "13.3"
  rds_instance_class          = "db.t3.micro"
  storage_type                = local.storage_type
  max_allocated_storage       = local.max_allocated_storage

  # HELM
  helm_chart_repository = "https://airflow-helm.github.io/charts"
  helm_chart_name       = "airflow"
  helm_chart_version    = "8.6.1"
  git_username          = local.git_username
  git_token             = local.git_token
  git_repository_url    = local.git_repository_url
  git_branch            = local.git_branch

  depends_on = [
    module.eks
  ]
}


module "mlflow" {
  count                 = var.deploy_mlflow ? 1 : 0
  source                = "./modules/mlflow"
  name                  = "mlflow"
  mlflow_s3_bucket_name = local.mlflow_s3_bucket_name
  s3_force_destroy      = local.force_destroy_s3_bucket

  # RDS
  vpc_id                      = module.vpc.vpc_id
  private_subnets             = module.vpc.private_subnets
  private_subnets_cidr_blocks = module.vpc.private_subnets_cidr_blocks
  rds_port                    = local.port_mlflow
  rds_name                    = "mlflow"
  rds_engine                  = "mysql"
  rds_engine_version          = "8.0.30"
  rds_instance_class          = "db.t3.micro"
  storage_type                = local.storage_type
  max_allocated_storage       = local.max_allocated_storage

  depends_on = [
    module.eks
  ]
}


module "jupyterhub" {
  count            = var.deploy_jupyterhub ? 1 : 0
  source           = "./modules/jupyterhub"
  name             = "jupyterhub"
  cluster_name     = local.cluster_name
  cluster_endpoint = module.eks.cluster_endpoint

  # HELM
  helm_chart_repository = "https://jupyterhub.github.io/helm-chart/"
  helm_chart_name       = "jupyterhub"
  helm_chart_version    = "2.0.0"

  depends_on = [
    module.eks
  ]
}