diff --git a/.gitignore b/.gitignore index 6f744e3..77ce3d5 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,12 @@ CLAUDE.md devbox.d/ devbox.lock .devbox/ -bash.exe.stackdump \ No newline at end of file +bash.exe.stackdump + +# OpenTofu +.tofu/configs/*.env +.tofu/scripts/*.config +.tofu/platforms/**/.terraform/ +.tofu/platforms/**/terraform.tfstate* +.tofu/platforms/**/tfplan +.tofu/platforms/**/.terraform.lock.hcl \ No newline at end of file diff --git a/.tofu/configs/aks.env.example b/.tofu/configs/aks.env.example new file mode 100644 index 0000000..003a029 --- /dev/null +++ b/.tofu/configs/aks.env.example @@ -0,0 +1,9 @@ +# Azure AKS credentials — copy to aks.env and fill in values +# NEVER commit aks.env to git! + +# Required +AZURE_TENANT_ID=your-azure-tenant-id +AZURE_SUBSCRIPTION_ID=your-azure-subscription-id + +# Optional — defaults to cluster name if not set +ARM_RESOURCE_GROUP= diff --git a/.tofu/configs/eks.env.example b/.tofu/configs/eks.env.example new file mode 100644 index 0000000..8e5885a --- /dev/null +++ b/.tofu/configs/eks.env.example @@ -0,0 +1,10 @@ +# AWS EKS credentials — copy to eks.env and fill in values +# NEVER commit eks.env to git! + +# Required — AWS CLI profile or access key +AWS_PROFILE=default +AWS_REGION=eu-west-1 + +# Optional — override with explicit keys instead of profile +# AWS_ACCESS_KEY_ID= +# AWS_SECRET_ACCESS_KEY= diff --git a/.tofu/configs/gke.env.example b/.tofu/configs/gke.env.example new file mode 100644 index 0000000..2da5828 --- /dev/null +++ b/.tofu/configs/gke.env.example @@ -0,0 +1,9 @@ +# GCP GKE credentials — copy to gke.env and fill in values +# NEVER commit gke.env to git! + +# Required +GCP_PROJECT_ID=your-gcp-project-id +GCP_REGION=europe-west4 + +# Optional — path to service account JSON key (if not using gcloud auth) +# GOOGLE_APPLICATION_CREDENTIALS=/path/to/sa-key.json diff --git a/.tofu/configs/upc.env.example b/.tofu/configs/upc.env.example new file mode 100644 index 0000000..697f3c8 --- /dev/null +++ b/.tofu/configs/upc.env.example @@ -0,0 +1,8 @@ +# UpCloud credentials — copy to upc.env and fill in values +# NEVER commit upc.env to git! + +# Required +UPCLOUD_TOKEN=your-upcloud-api-token + +# Optional — set after cluster creation for kubeconfig retrieval +UPCLOUD_CLUSTER_ID= diff --git a/.tofu/platforms/aks/dev/main.tf b/.tofu/platforms/aks/dev/main.tf new file mode 100644 index 0000000..d19d1b2 --- /dev/null +++ b/.tofu/platforms/aks/dev/main.tf @@ -0,0 +1,18 @@ +module "cluster" { + source = "../modules/cluster" + + prefix = "clst-dev" + location = "norwayeast" + resource_group_name = "clst-dev-rg" + + # AKS — small dev nodes + aks_node_vm_size = "Standard_B2s" + aks_node_count = 2 + + enable_delete_lock = false + + tags = { + Environment = "dev" + ManagedBy = "tofu" + } +} diff --git a/.tofu/platforms/aks/dev/outputs.tf b/.tofu/platforms/aks/dev/outputs.tf new file mode 100644 index 0000000..ee1f8de --- /dev/null +++ b/.tofu/platforms/aks/dev/outputs.tf @@ -0,0 +1,26 @@ +# ─── Cluster ───────────────────────────────────────────────────────── + +output "cluster_name" { + value = module.cluster.cluster_name +} + +output "resource_group_name" { + value = module.cluster.resource_group_name +} + +output "kubernetes_version" { + value = module.cluster.kubernetes_version +} + +output "location" { + value = module.cluster.location +} + +output "oidc_issuer_url" { + value = module.cluster.oidc_issuer_url +} + +output "kubeconfig" { + value = module.cluster.kubeconfig + sensitive = true +} diff --git a/.tofu/platforms/aks/dev/providers.tf b/.tofu/platforms/aks/dev/providers.tf new file mode 100644 index 0000000..f24c50e --- /dev/null +++ b/.tofu/platforms/aks/dev/providers.tf @@ -0,0 +1,17 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "~> 4.0" + } + } +} + +provider "azurerm" { + features {} + # Credentials via environment variables: + # ARM_SUBSCRIPTION_ID, ARM_TENANT_ID, ARM_CLIENT_ID, ARM_CLIENT_SECRET + # Or: az login (uses your Azure CLI session) +} diff --git a/.tofu/platforms/aks/modules/cluster/main.tf b/.tofu/platforms/aks/modules/cluster/main.tf new file mode 100644 index 0000000..7459bf9 --- /dev/null +++ b/.tofu/platforms/aks/modules/cluster/main.tf @@ -0,0 +1,72 @@ +# Current Azure/Entra ID context — provides tenant_id used in outputs +data "azurerm_client_config" "current" {} + +# ─── Resource Group ─────────────────────────────────────────────────── + +resource "azurerm_resource_group" "main" { + name = var.resource_group_name + location = var.location + tags = var.tags +} + +resource "azurerm_management_lock" "main" { + count = var.enable_delete_lock ? 1 : 0 + name = "${var.prefix}-delete-lock" + scope = azurerm_resource_group.main.id + lock_level = "CanNotDelete" + notes = "Prevents accidental deletion of production resources" +} + +# ─── Networking ─────────────────────────────────────────────────────── + +resource "azurerm_virtual_network" "main" { + name = "${var.prefix}-vnet" + resource_group_name = azurerm_resource_group.main.name + location = azurerm_resource_group.main.location + address_space = [var.vnet_address_space] + tags = var.tags +} + +# AKS nodes subnet +resource "azurerm_subnet" "aks" { + name = "${var.prefix}-aks-subnet" + resource_group_name = azurerm_resource_group.main.name + virtual_network_name = azurerm_virtual_network.main.name + address_prefixes = [var.aks_subnet_cidr] +} + +# ─── AKS Cluster ────────────────────────────────────────────────────── + +resource "azurerm_kubernetes_cluster" "main" { + name = "${var.prefix}-aks" + resource_group_name = azurerm_resource_group.main.name + location = azurerm_resource_group.main.location + dns_prefix = replace(var.prefix, "-", "") + kubernetes_version = var.aks_kubernetes_version + tags = var.tags + + default_node_pool { + name = "system" + node_count = var.aks_node_count + vm_size = var.aks_node_vm_size + vnet_subnet_id = azurerm_subnet.aks.id + node_labels = { + prefix = var.prefix + role = "worker" + env = lookup(var.tags, "Environment", "dev") + } + } + + identity { + type = "SystemAssigned" + } + + network_profile { + network_plugin = "azure" + network_policy = "azure" + } + + # Enable Workload Identity for keyless Azure service access (MSI) + oidc_issuer_enabled = true + workload_identity_enabled = true +} diff --git a/.tofu/platforms/aks/modules/cluster/outputs.tf b/.tofu/platforms/aks/modules/cluster/outputs.tf new file mode 100644 index 0000000..42f52cc --- /dev/null +++ b/.tofu/platforms/aks/modules/cluster/outputs.tf @@ -0,0 +1,32 @@ +# ─── Cluster ───────────────────────────────────────────────────────── + +output "cluster_name" { + description = "AKS cluster name" + value = azurerm_kubernetes_cluster.main.name +} + +output "resource_group_name" { + description = "Resource group name" + value = azurerm_resource_group.main.name +} + +output "kubernetes_version" { + description = "Kubernetes version" + value = azurerm_kubernetes_cluster.main.kubernetes_version +} + +output "location" { + description = "Azure region" + value = azurerm_resource_group.main.location +} + +output "oidc_issuer_url" { + description = "AKS OIDC issuer URL (for workload identity federation)" + value = azurerm_kubernetes_cluster.main.oidc_issuer_url +} + +output "kubeconfig" { + description = "Kubeconfig for the AKS cluster" + value = azurerm_kubernetes_cluster.main.kube_config_raw + sensitive = true +} diff --git a/.tofu/platforms/aks/modules/cluster/providers.tf b/.tofu/platforms/aks/modules/cluster/providers.tf new file mode 100644 index 0000000..0be1288 --- /dev/null +++ b/.tofu/platforms/aks/modules/cluster/providers.tf @@ -0,0 +1,18 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "~> 4.0" + } + azuread = { + source = "hashicorp/azuread" + version = "~> 3.0" + } + random = { + source = "hashicorp/random" + version = "~> 3.0" + } + } +} diff --git a/.tofu/platforms/aks/modules/cluster/variables.tf b/.tofu/platforms/aks/modules/cluster/variables.tf new file mode 100644 index 0000000..0f676a2 --- /dev/null +++ b/.tofu/platforms/aks/modules/cluster/variables.tf @@ -0,0 +1,56 @@ +# ─── Cluster ───────────────────────────────────────────────────────── + +variable "prefix" { + description = "Prefix for resource names" + type = string +} + +variable "location" { + description = "Azure region (e.g., norwayeast, westeurope, northeurope)" + type = string +} + +variable "resource_group_name" { + description = "Name of the Azure Resource Group to create" + type = string +} + +variable "vnet_address_space" { + description = "Address space for the virtual network" + type = string + default = "10.100.0.0/16" +} + +variable "aks_subnet_cidr" { + description = "CIDR block for the AKS node subnet" + type = string + default = "10.100.0.0/22" +} + +variable "aks_node_vm_size" { + description = "VM size for AKS worker nodes (e.g., Standard_B2s, Standard_D4s_v3)" + type = string +} + +variable "aks_node_count" { + description = "Number of AKS worker nodes" + type = number +} + +variable "aks_kubernetes_version" { + description = "Kubernetes version for AKS (null = latest stable)" + type = string + default = null +} + +variable "enable_delete_lock" { + description = "Protect the resource group from accidental deletion" + type = bool + default = false +} + +variable "tags" { + description = "Tags applied to all resources" + type = map(string) + default = {} +} diff --git a/.tofu/platforms/aks/prod/main.tf b/.tofu/platforms/aks/prod/main.tf new file mode 100644 index 0000000..d461a06 --- /dev/null +++ b/.tofu/platforms/aks/prod/main.tf @@ -0,0 +1,18 @@ +module "cluster" { + source = "../modules/cluster" + + prefix = "clst" + location = "westeurope" + resource_group_name = "clst-prod-rg" + + # AKS — general-purpose nodes for production + aks_node_vm_size = "Standard_D4s_v3" + aks_node_count = 3 + + enable_delete_lock = true + + tags = { + Environment = "prod" + ManagedBy = "tofu" + } +} diff --git a/.tofu/platforms/aks/prod/outputs.tf b/.tofu/platforms/aks/prod/outputs.tf new file mode 100644 index 0000000..ee1f8de --- /dev/null +++ b/.tofu/platforms/aks/prod/outputs.tf @@ -0,0 +1,26 @@ +# ─── Cluster ───────────────────────────────────────────────────────── + +output "cluster_name" { + value = module.cluster.cluster_name +} + +output "resource_group_name" { + value = module.cluster.resource_group_name +} + +output "kubernetes_version" { + value = module.cluster.kubernetes_version +} + +output "location" { + value = module.cluster.location +} + +output "oidc_issuer_url" { + value = module.cluster.oidc_issuer_url +} + +output "kubeconfig" { + value = module.cluster.kubeconfig + sensitive = true +} diff --git a/.tofu/platforms/aks/prod/providers.tf b/.tofu/platforms/aks/prod/providers.tf new file mode 100644 index 0000000..f24c50e --- /dev/null +++ b/.tofu/platforms/aks/prod/providers.tf @@ -0,0 +1,17 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "~> 4.0" + } + } +} + +provider "azurerm" { + features {} + # Credentials via environment variables: + # ARM_SUBSCRIPTION_ID, ARM_TENANT_ID, ARM_CLIENT_ID, ARM_CLIENT_SECRET + # Or: az login (uses your Azure CLI session) +} diff --git a/.tofu/platforms/aks/workload/main.tf b/.tofu/platforms/aks/workload/main.tf new file mode 100644 index 0000000..5441ae1 --- /dev/null +++ b/.tofu/platforms/aks/workload/main.tf @@ -0,0 +1,173 @@ +# ============================================================================= +# Azure Workload Cluster +# ============================================================================= +# A lean AKS cluster for running application workloads. No managed data +# services — those live on the platform cluster. ArgoCD (on the platform +# cluster) deploys apps to this cluster via the app-of-apps pattern. +# +# Platform components deployed by deploy-workload.sh: +# nginx-ingress, cert-manager, external-dns, external-secrets, alloy +# +# Usage: +# tofu init && tofu plan && tofu apply +# ./sync-tofu-outputs.sh --env azure-workload +# ./deploy-workload.sh --env azure-workload +# ============================================================================= + +variable "prefix" { + description = "Prefix for resource names (e.g., clst-workload)" + type = string + default = "clst-workload" +} + +variable "location" { + description = "Azure region" + type = string + default = "norwayeast" +} + +variable "resource_group_name" { + description = "Name of the Azure Resource Group to create" + type = string + default = "clst-workload-rg" +} + +variable "vnet_address_space" { + description = "Address space for the virtual network" + type = string + default = "10.110.0.0/16" +} + +variable "aks_subnet_cidr" { + description = "CIDR block for the AKS node subnet" + type = string + default = "10.110.0.0/22" +} + +variable "aks_node_vm_size" { + description = "VM size for AKS worker nodes" + type = string + default = "Standard_B2s" +} + +variable "aks_node_count" { + description = "Number of AKS worker nodes" + type = number + default = 2 +} + +variable "aks_kubernetes_version" { + description = "Kubernetes version for AKS (null = latest stable)" + type = string + default = null +} + +variable "domain" { + description = "Public domain name — must have an existing Azure DNS zone" + type = string +} + +variable "dns_zone_resource_group" { + description = "Resource group containing the Azure DNS zone (defaults to cluster RG)" + type = string + default = "" +} + +variable "tags" { + description = "Tags applied to all resources" + type = map(string) + default = { + Environment = "workload" + ManagedBy = "tofu" + } +} + +# ─── Resource Group ─────────────────────────────────────────────────── + +resource "azurerm_resource_group" "main" { + name = var.resource_group_name + location = var.location + tags = var.tags +} + +# ─── Networking ─────────────────────────────────────────────────────── + +resource "azurerm_virtual_network" "main" { + name = "${var.prefix}-vnet" + resource_group_name = azurerm_resource_group.main.name + location = azurerm_resource_group.main.location + address_space = [var.vnet_address_space] + tags = var.tags +} + +resource "azurerm_subnet" "aks" { + name = "${var.prefix}-aks-subnet" + resource_group_name = azurerm_resource_group.main.name + virtual_network_name = azurerm_virtual_network.main.name + address_prefixes = [var.aks_subnet_cidr] +} + +# ─── AKS Cluster ────────────────────────────────────────────────────── + +resource "azurerm_kubernetes_cluster" "main" { + name = "${var.prefix}-aks" + resource_group_name = azurerm_resource_group.main.name + location = azurerm_resource_group.main.location + dns_prefix = replace(var.prefix, "-", "") + kubernetes_version = var.aks_kubernetes_version + tags = var.tags + + default_node_pool { + name = "system" + node_count = var.aks_node_count + vm_size = var.aks_node_vm_size + vnet_subnet_id = azurerm_subnet.aks.id + node_labels = { + prefix = var.prefix + role = "worker" + env = lookup(var.tags, "Environment", "workload") + } + } + + identity { + type = "SystemAssigned" + } + + network_profile { + network_plugin = "azure" + network_policy = "azure" + } + + oidc_issuer_enabled = true + workload_identity_enabled = true +} + +# ─── External-DNS Workload Identity ────────────────────────────────── +# Allows external-dns to manage Azure DNS records for app ingresses. + +data "azurerm_dns_zone" "main" { + name = var.domain + resource_group_name = var.dns_zone_resource_group != "" ? var.dns_zone_resource_group : azurerm_resource_group.main.name +} + +resource "azurerm_user_assigned_identity" "external_dns" { + name = "${var.prefix}-external-dns-identity" + resource_group_name = azurerm_resource_group.main.name + location = azurerm_resource_group.main.location + tags = var.tags +} + +resource "azurerm_role_assignment" "external_dns_dns_contributor" { + scope = data.azurerm_dns_zone.main.id + role_definition_name = "DNS Zone Contributor" + principal_id = azurerm_user_assigned_identity.external_dns.principal_id +} + +resource "azurerm_federated_identity_credential" "external_dns" { + name = "${var.prefix}-external-dns-fedcred" + resource_group_name = azurerm_resource_group.main.name + parent_id = azurerm_user_assigned_identity.external_dns.id + audience = ["api://AzureADTokenExchange"] + issuer = azurerm_kubernetes_cluster.main.oidc_issuer_url + subject = "system:serviceaccount:external-dns:external-dns" +} diff --git a/.tofu/platforms/aks/workload/outputs.tf b/.tofu/platforms/aks/workload/outputs.tf new file mode 100644 index 0000000..45dcc2e --- /dev/null +++ b/.tofu/platforms/aks/workload/outputs.tf @@ -0,0 +1,4 @@ +output "cluster_name" { value = azurerm_kubernetes_cluster.main.name } +output "resource_group_name" { value = azurerm_resource_group.main.name } +output "location" { value = azurerm_resource_group.main.location } +output "external_dns_identity_client_id" { value = azurerm_user_assigned_identity.external_dns.client_id } diff --git a/.tofu/platforms/aks/workload/providers.tf b/.tofu/platforms/aks/workload/providers.tf new file mode 100644 index 0000000..29f7a8f --- /dev/null +++ b/.tofu/platforms/aks/workload/providers.tf @@ -0,0 +1,21 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "~> 4.0" + } + random = { + source = "hashicorp/random" + version = "~> 3.0" + } + } +} + +provider "azurerm" { + features {} + # Credentials via environment variables: + # ARM_SUBSCRIPTION_ID, ARM_TENANT_ID, ARM_CLIENT_ID, ARM_CLIENT_SECRET + # Or: az login (uses your Azure CLI session) +} diff --git a/.tofu/platforms/eks/dev/main.tf b/.tofu/platforms/eks/dev/main.tf new file mode 100644 index 0000000..dd76736 --- /dev/null +++ b/.tofu/platforms/eks/dev/main.tf @@ -0,0 +1,21 @@ +module "cluster" { + source = "../modules/cluster" + + region = var.region + prefix = "clst-dev" + + # VPC + availability_zones = ["${var.region}a", "${var.region}b"] + + # EKS — small dev nodes + node_instance_type = "t3.medium" + node_count = 2 + node_min_count = 1 + node_max_count = 4 + kubernetes_version = "1.30" + + tags = { + Environment = "dev" + ManagedBy = "tofu" + } +} diff --git a/.tofu/platforms/eks/dev/outputs.tf b/.tofu/platforms/eks/dev/outputs.tf new file mode 100644 index 0000000..83c41b8 --- /dev/null +++ b/.tofu/platforms/eks/dev/outputs.tf @@ -0,0 +1,5 @@ +output "cluster_name" { value = module.cluster.cluster_name } +output "aws_region" { value = module.cluster.aws_region } +output "oidc_issuer_url" { value = module.cluster.oidc_issuer_url } +output "oidc_provider_arn" { value = module.cluster.oidc_provider_arn } +output "vpc_id" { value = module.cluster.vpc_id } diff --git a/.tofu/platforms/eks/dev/providers.tf b/.tofu/platforms/eks/dev/providers.tf new file mode 100644 index 0000000..eebda5f --- /dev/null +++ b/.tofu/platforms/eks/dev/providers.tf @@ -0,0 +1,24 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + tls = { + source = "hashicorp/tls" + version = "~> 4.0" + } + } +} + +# Authentication: set AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN +# or configure an AWS profile: export AWS_PROFILE=clst +provider "aws" { + region = var.region +} + +variable "region" { + description = "AWS region for dev environment" + type = string + default = "eu-west-1" +} diff --git a/.tofu/platforms/eks/modules/cluster/main.tf b/.tofu/platforms/eks/modules/cluster/main.tf new file mode 100644 index 0000000..e0c94e2 --- /dev/null +++ b/.tofu/platforms/eks/modules/cluster/main.tf @@ -0,0 +1,207 @@ +# ─── VPC ────────────────────────────────────────────────────────────── + +resource "aws_vpc" "main" { + cidr_block = var.vpc_cidr + enable_dns_hostnames = true + enable_dns_support = true + + tags = merge(var.tags, { Name = "${var.prefix}-vpc" }) +} + +resource "aws_internet_gateway" "main" { + vpc_id = aws_vpc.main.id + tags = merge(var.tags, { Name = "${var.prefix}-igw" }) +} + +# Public subnets (one per AZ) — for NAT gateways and load balancers +resource "aws_subnet" "public" { + count = length(var.availability_zones) + vpc_id = aws_vpc.main.id + cidr_block = cidrsubnet(var.vpc_cidr, 4, count.index) + availability_zone = var.availability_zones[count.index] + + map_public_ip_on_launch = true + + tags = merge(var.tags, { + Name = "${var.prefix}-public-${count.index + 1}" + "kubernetes.io/cluster/${var.prefix}-eks" = "shared" + "kubernetes.io/role/elb" = "1" + }) +} + +# Private subnets (one per AZ) — for EKS nodes +resource "aws_subnet" "private" { + count = length(var.availability_zones) + vpc_id = aws_vpc.main.id + cidr_block = cidrsubnet(var.vpc_cidr, 4, count.index + length(var.availability_zones)) + availability_zone = var.availability_zones[count.index] + + tags = merge(var.tags, { + Name = "${var.prefix}-private-${count.index + 1}" + "kubernetes.io/cluster/${var.prefix}-eks" = "shared" + "kubernetes.io/role/internal-elb" = "1" + }) +} + +# NAT Gateway (single, in first public subnet — use one per AZ for prod HA) +resource "aws_eip" "nat" { + domain = "vpc" + tags = merge(var.tags, { Name = "${var.prefix}-nat-eip" }) +} + +resource "aws_nat_gateway" "main" { + allocation_id = aws_eip.nat.id + subnet_id = aws_subnet.public[0].id + tags = merge(var.tags, { Name = "${var.prefix}-nat" }) + + depends_on = [aws_internet_gateway.main] +} + +resource "aws_route_table" "public" { + vpc_id = aws_vpc.main.id + + route { + cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.main.id + } + + tags = merge(var.tags, { Name = "${var.prefix}-public-rt" }) +} + +resource "aws_route_table_association" "public" { + count = length(var.availability_zones) + subnet_id = aws_subnet.public[count.index].id + route_table_id = aws_route_table.public.id +} + +resource "aws_route_table" "private" { + vpc_id = aws_vpc.main.id + + route { + cidr_block = "0.0.0.0/0" + nat_gateway_id = aws_nat_gateway.main.id + } + + tags = merge(var.tags, { Name = "${var.prefix}-private-rt" }) +} + +resource "aws_route_table_association" "private" { + count = length(var.availability_zones) + subnet_id = aws_subnet.private[count.index].id + route_table_id = aws_route_table.private.id +} + +# ─── EKS Cluster ────────────────────────────────────────────────────── + +resource "aws_iam_role" "eks_cluster" { + name_prefix = "${var.prefix}-eks-cluster-" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Action = "sts:AssumeRole" + Effect = "Allow" + Principal = { Service = "eks.amazonaws.com" } + }] + }) + + tags = var.tags +} + +resource "aws_iam_role_policy_attachment" "eks_cluster_policy" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy" + role = aws_iam_role.eks_cluster.name +} + +resource "aws_eks_cluster" "main" { + name = "${var.prefix}-eks" + role_arn = aws_iam_role.eks_cluster.arn + version = var.kubernetes_version + + vpc_config { + subnet_ids = concat(aws_subnet.private[*].id, aws_subnet.public[*].id) + endpoint_private_access = true + endpoint_public_access = true + } + + # Enable OIDC issuer for IRSA (IAM Roles for Service Accounts) + access_config { + authentication_mode = "API_AND_CONFIG_MAP" + } + + tags = var.tags + + depends_on = [aws_iam_role_policy_attachment.eks_cluster_policy] +} + +# OIDC provider — required for IRSA (IAM Roles for Service Accounts) +data "tls_certificate" "eks" { + url = aws_eks_cluster.main.identity[0].oidc[0].issuer +} + +resource "aws_iam_openid_connect_provider" "eks" { + client_id_list = ["sts.amazonaws.com"] + thumbprint_list = [data.tls_certificate.eks.certificates[0].sha1_fingerprint] + url = aws_eks_cluster.main.identity[0].oidc[0].issuer + + tags = var.tags +} + +# EKS Node Group + +resource "aws_iam_role" "eks_nodes" { + name_prefix = "${var.prefix}-eks-nodes-" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Action = "sts:AssumeRole" + Effect = "Allow" + Principal = { Service = "ec2.amazonaws.com" } + }] + }) + + tags = var.tags +} + +resource "aws_iam_role_policy_attachment" "eks_worker_node_policy" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy" + role = aws_iam_role.eks_nodes.name +} + +resource "aws_iam_role_policy_attachment" "eks_cni_policy" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy" + role = aws_iam_role.eks_nodes.name +} + +resource "aws_iam_role_policy_attachment" "eks_ecr_readonly" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly" + role = aws_iam_role.eks_nodes.name +} + +resource "aws_eks_node_group" "main" { + cluster_name = aws_eks_cluster.main.name + node_group_name = "${var.prefix}-nodes" + node_role_arn = aws_iam_role.eks_nodes.arn + subnet_ids = aws_subnet.private[*].id + + instance_types = [var.node_instance_type] + + scaling_config { + desired_size = var.node_count + max_size = var.node_max_count + min_size = var.node_min_count + } + + update_config { + max_unavailable = 1 + } + + tags = var.tags + + depends_on = [ + aws_iam_role_policy_attachment.eks_worker_node_policy, + aws_iam_role_policy_attachment.eks_cni_policy, + aws_iam_role_policy_attachment.eks_ecr_readonly, + ] +} diff --git a/.tofu/platforms/eks/modules/cluster/outputs.tf b/.tofu/platforms/eks/modules/cluster/outputs.tf new file mode 100644 index 0000000..180ab9d --- /dev/null +++ b/.tofu/platforms/eks/modules/cluster/outputs.tf @@ -0,0 +1,26 @@ +# ─── Cluster ───────────────────────────────────────────────────────── + +output "cluster_name" { + description = "EKS cluster name" + value = aws_eks_cluster.main.name +} + +output "aws_region" { + description = "AWS region" + value = var.region +} + +output "oidc_issuer_url" { + description = "EKS OIDC issuer URL (for IRSA)" + value = aws_eks_cluster.main.identity[0].oidc[0].issuer +} + +output "oidc_provider_arn" { + description = "IAM OIDC provider ARN (for IRSA trust policies)" + value = aws_iam_openid_connect_provider.eks.arn +} + +output "vpc_id" { + description = "VPC ID" + value = aws_vpc.main.id +} diff --git a/.tofu/platforms/eks/modules/cluster/providers.tf b/.tofu/platforms/eks/modules/cluster/providers.tf new file mode 100644 index 0000000..dc452e6 --- /dev/null +++ b/.tofu/platforms/eks/modules/cluster/providers.tf @@ -0,0 +1,12 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + tls = { + source = "hashicorp/tls" + version = "~> 4.0" + } + } +} diff --git a/.tofu/platforms/eks/modules/cluster/variables.tf b/.tofu/platforms/eks/modules/cluster/variables.tf new file mode 100644 index 0000000..2eaa979 --- /dev/null +++ b/.tofu/platforms/eks/modules/cluster/variables.tf @@ -0,0 +1,61 @@ +# ─── Region ────────────────────────────────────────────────────────── + +variable "region" { + description = "AWS region (e.g., eu-west-1, us-east-1)" + type = string +} + +variable "prefix" { + description = "Prefix for resource names (e.g., clst-dev)" + type = string +} + +# ─── Networking ─────────────────────────────────────────────────────── + +variable "vpc_cidr" { + description = "VPC CIDR block" + type = string + default = "10.100.0.0/16" +} + +variable "availability_zones" { + description = "List of AZs for subnets (2–3 recommended)" + type = list(string) +} + +# ─── EKS Cluster ───────────────────────────────────────────────────── + +variable "node_instance_type" { + description = "EKS node instance type (e.g., t3.medium, m5.xlarge)" + type = string +} + +variable "node_count" { + description = "Desired number of EKS worker nodes" + type = number +} + +variable "node_min_count" { + description = "Minimum number of EKS worker nodes" + type = number + default = 1 +} + +variable "node_max_count" { + description = "Maximum number of EKS worker nodes" + type = number +} + +variable "kubernetes_version" { + description = "Kubernetes version for EKS (e.g., \"1.30\")" + type = string + default = "1.30" +} + +# ─── Tags ───────────────────────────────────────────────────────────── + +variable "tags" { + description = "Tags applied to all resources" + type = map(string) + default = {} +} diff --git a/.tofu/platforms/eks/prod/main.tf b/.tofu/platforms/eks/prod/main.tf new file mode 100644 index 0000000..3d312ad --- /dev/null +++ b/.tofu/platforms/eks/prod/main.tf @@ -0,0 +1,21 @@ +module "cluster" { + source = "../modules/cluster" + + region = var.region + prefix = "clst" + + # VPC + availability_zones = ["${var.region}a", "${var.region}b", "${var.region}c"] + + # EKS — general-purpose nodes for production + node_instance_type = "m5.xlarge" + node_count = 3 + node_min_count = 3 + node_max_count = 6 + kubernetes_version = "1.30" + + tags = { + Environment = "prod" + ManagedBy = "tofu" + } +} diff --git a/.tofu/platforms/eks/prod/outputs.tf b/.tofu/platforms/eks/prod/outputs.tf new file mode 100644 index 0000000..83c41b8 --- /dev/null +++ b/.tofu/platforms/eks/prod/outputs.tf @@ -0,0 +1,5 @@ +output "cluster_name" { value = module.cluster.cluster_name } +output "aws_region" { value = module.cluster.aws_region } +output "oidc_issuer_url" { value = module.cluster.oidc_issuer_url } +output "oidc_provider_arn" { value = module.cluster.oidc_provider_arn } +output "vpc_id" { value = module.cluster.vpc_id } diff --git a/.tofu/platforms/eks/prod/providers.tf b/.tofu/platforms/eks/prod/providers.tf new file mode 100644 index 0000000..51a8880 --- /dev/null +++ b/.tofu/platforms/eks/prod/providers.tf @@ -0,0 +1,22 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + tls = { + source = "hashicorp/tls" + version = "~> 4.0" + } + } +} + +provider "aws" { + region = var.region +} + +variable "region" { + description = "AWS region for prod environment" + type = string + default = "eu-west-1" +} diff --git a/.tofu/platforms/eks/workload/main.tf b/.tofu/platforms/eks/workload/main.tf new file mode 100644 index 0000000..0fa3450 --- /dev/null +++ b/.tofu/platforms/eks/workload/main.tf @@ -0,0 +1,339 @@ +# ============================================================================= +# AWS Workload Cluster +# ============================================================================= +# A lean EKS cluster for running application workloads. No managed data +# services — those live on the platform cluster. ArgoCD (on the platform +# cluster) deploys apps to this cluster via the app-of-apps pattern. +# +# Platform components deployed by deploy-workload.sh: +# nginx-ingress, cert-manager, external-dns, external-secrets, alloy +# +# Usage: +# tofu init && tofu plan && tofu apply +# ./sync-tofu-outputs.sh --env aws-workload +# ./deploy-workload.sh --env aws-workload +# ============================================================================= + +variable "prefix" { + description = "Prefix for resource names (e.g., clst-workload)" + type = string + default = "clst-workload" +} + +variable "availability_zones" { + description = "List of AZs for subnets" + type = list(string) + default = ["eu-west-1a", "eu-west-1b"] +} + +variable "vpc_cidr" { + description = "VPC CIDR block" + type = string + default = "10.110.0.0/16" +} + +variable "node_instance_type" { + description = "EKS node instance type" + type = string + default = "t3.medium" +} + +variable "node_count" { + description = "Desired number of EKS worker nodes" + type = number + default = 2 +} + +variable "node_min_count" { + description = "Minimum number of EKS worker nodes" + type = number + default = 1 +} + +variable "node_max_count" { + description = "Maximum number of EKS worker nodes" + type = number + default = 4 +} + +variable "kubernetes_version" { + description = "Kubernetes version for EKS" + type = string + default = "1.30" +} + +variable "domain" { + description = "Public domain name — must have an existing Route53 hosted zone" + type = string +} + +variable "tags" { + description = "Tags applied to all resources" + type = map(string) + default = { + Environment = "workload" + ManagedBy = "tofu" + } +} + +# ─── VPC ────────────────────────────────────────────────────────────── + +resource "aws_vpc" "main" { + cidr_block = var.vpc_cidr + enable_dns_hostnames = true + enable_dns_support = true + + tags = merge(var.tags, { Name = "${var.prefix}-vpc" }) +} + +resource "aws_internet_gateway" "main" { + vpc_id = aws_vpc.main.id + tags = merge(var.tags, { Name = "${var.prefix}-igw" }) +} + +resource "aws_subnet" "public" { + count = length(var.availability_zones) + vpc_id = aws_vpc.main.id + cidr_block = cidrsubnet(var.vpc_cidr, 4, count.index) + availability_zone = var.availability_zones[count.index] + + map_public_ip_on_launch = true + + tags = merge(var.tags, { + Name = "${var.prefix}-public-${count.index + 1}" + "kubernetes.io/cluster/${var.prefix}-eks" = "shared" + "kubernetes.io/role/elb" = "1" + }) +} + +resource "aws_subnet" "private" { + count = length(var.availability_zones) + vpc_id = aws_vpc.main.id + cidr_block = cidrsubnet(var.vpc_cidr, 4, count.index + length(var.availability_zones)) + availability_zone = var.availability_zones[count.index] + + tags = merge(var.tags, { + Name = "${var.prefix}-private-${count.index + 1}" + "kubernetes.io/cluster/${var.prefix}-eks" = "shared" + "kubernetes.io/role/internal-elb" = "1" + }) +} + +resource "aws_eip" "nat" { + domain = "vpc" + tags = merge(var.tags, { Name = "${var.prefix}-nat-eip" }) +} + +resource "aws_nat_gateway" "main" { + allocation_id = aws_eip.nat.id + subnet_id = aws_subnet.public[0].id + tags = merge(var.tags, { Name = "${var.prefix}-nat" }) + + depends_on = [aws_internet_gateway.main] +} + +resource "aws_route_table" "public" { + vpc_id = aws_vpc.main.id + + route { + cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.main.id + } + + tags = merge(var.tags, { Name = "${var.prefix}-public-rt" }) +} + +resource "aws_route_table_association" "public" { + count = length(var.availability_zones) + subnet_id = aws_subnet.public[count.index].id + route_table_id = aws_route_table.public.id +} + +resource "aws_route_table" "private" { + vpc_id = aws_vpc.main.id + + route { + cidr_block = "0.0.0.0/0" + nat_gateway_id = aws_nat_gateway.main.id + } + + tags = merge(var.tags, { Name = "${var.prefix}-private-rt" }) +} + +resource "aws_route_table_association" "private" { + count = length(var.availability_zones) + subnet_id = aws_subnet.private[count.index].id + route_table_id = aws_route_table.private.id +} + +# ─── EKS Cluster ────────────────────────────────────────────────────── + +resource "aws_iam_role" "eks_cluster" { + name_prefix = "${var.prefix}-eks-cluster-" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Action = "sts:AssumeRole" + Effect = "Allow" + Principal = { Service = "eks.amazonaws.com" } + }] + }) + + tags = var.tags +} + +resource "aws_iam_role_policy_attachment" "eks_cluster_policy" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy" + role = aws_iam_role.eks_cluster.name +} + +resource "aws_eks_cluster" "main" { + name = "${var.prefix}-eks" + role_arn = aws_iam_role.eks_cluster.arn + version = var.kubernetes_version + + vpc_config { + subnet_ids = concat(aws_subnet.private[*].id, aws_subnet.public[*].id) + endpoint_private_access = true + endpoint_public_access = true + } + + access_config { + authentication_mode = "API_AND_CONFIG_MAP" + } + + tags = var.tags + + depends_on = [aws_iam_role_policy_attachment.eks_cluster_policy] +} + +# OIDC provider — required for IRSA +data "tls_certificate" "eks" { + url = aws_eks_cluster.main.identity[0].oidc[0].issuer +} + +resource "aws_iam_openid_connect_provider" "eks" { + client_id_list = ["sts.amazonaws.com"] + thumbprint_list = [data.tls_certificate.eks.certificates[0].sha1_fingerprint] + url = aws_eks_cluster.main.identity[0].oidc[0].issuer + + tags = var.tags +} + +resource "aws_iam_role" "eks_nodes" { + name_prefix = "${var.prefix}-eks-nodes-" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Action = "sts:AssumeRole" + Effect = "Allow" + Principal = { Service = "ec2.amazonaws.com" } + }] + }) + + tags = var.tags +} + +resource "aws_iam_role_policy_attachment" "eks_worker_node_policy" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy" + role = aws_iam_role.eks_nodes.name +} + +resource "aws_iam_role_policy_attachment" "eks_cni_policy" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy" + role = aws_iam_role.eks_nodes.name +} + +resource "aws_iam_role_policy_attachment" "eks_ecr_readonly" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly" + role = aws_iam_role.eks_nodes.name +} + +resource "aws_eks_node_group" "main" { + cluster_name = aws_eks_cluster.main.name + node_group_name = "${var.prefix}-nodes" + node_role_arn = aws_iam_role.eks_nodes.arn + subnet_ids = aws_subnet.private[*].id + + instance_types = [var.node_instance_type] + + scaling_config { + desired_size = var.node_count + max_size = var.node_max_count + min_size = var.node_min_count + } + + update_config { + max_unavailable = 1 + } + + tags = var.tags + + depends_on = [ + aws_iam_role_policy_attachment.eks_worker_node_policy, + aws_iam_role_policy_attachment.eks_cni_policy, + aws_iam_role_policy_attachment.eks_ecr_readonly, + ] +} + +# ─── External-DNS IRSA ─────────────────────────────────────────────── +# Allows external-dns to manage Route53 records for app ingresses. + +data "aws_route53_zone" "main" { + name = var.domain + private_zone = false +} + +data "aws_iam_policy_document" "external_dns_assume_role" { + statement { + effect = "Allow" + + principals { + type = "Federated" + identifiers = [aws_iam_openid_connect_provider.eks.arn] + } + + actions = ["sts:AssumeRoleWithWebIdentity"] + + condition { + test = "StringEquals" + variable = "${replace(aws_iam_openid_connect_provider.eks.url, "https://", "")}:sub" + values = ["system:serviceaccount:external-dns:external-dns"] + } + + condition { + test = "StringEquals" + variable = "${replace(aws_iam_openid_connect_provider.eks.url, "https://", "")}:aud" + values = ["sts.amazonaws.com"] + } + } +} + +resource "aws_iam_role" "external_dns_irsa" { + name_prefix = "${var.prefix}-external-dns-irsa-" + assume_role_policy = data.aws_iam_policy_document.external_dns_assume_role.json + + tags = var.tags +} + +data "aws_iam_policy_document" "external_dns_route53" { + statement { + effect = "Allow" + actions = ["route53:ChangeResourceRecordSets"] + resources = ["arn:aws:route53:::hostedzone/${data.aws_route53_zone.main.zone_id}"] + } + + statement { + effect = "Allow" + actions = ["route53:ListHostedZones", "route53:ListResourceRecordSets", "route53:ListTagsForResource"] + resources = ["*"] + } +} + +resource "aws_iam_role_policy" "external_dns_route53" { + name_prefix = "${var.prefix}-external-dns-route53-" + role = aws_iam_role.external_dns_irsa.id + policy = data.aws_iam_policy_document.external_dns_route53.json +} diff --git a/.tofu/platforms/eks/workload/outputs.tf b/.tofu/platforms/eks/workload/outputs.tf new file mode 100644 index 0000000..1cafe4a --- /dev/null +++ b/.tofu/platforms/eks/workload/outputs.tf @@ -0,0 +1,3 @@ +output "cluster_name" { value = aws_eks_cluster.main.name } +output "aws_region" { value = var.region } +output "external_dns_irsa_role_arn" { value = aws_iam_role.external_dns_irsa.arn } diff --git a/.tofu/platforms/eks/workload/providers.tf b/.tofu/platforms/eks/workload/providers.tf new file mode 100644 index 0000000..68245d6 --- /dev/null +++ b/.tofu/platforms/eks/workload/providers.tf @@ -0,0 +1,24 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + tls = { + source = "hashicorp/tls" + version = "~> 4.0" + } + } +} + +# Authentication: set AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN +# or configure an AWS profile: export AWS_PROFILE=clst +provider "aws" { + region = var.region +} + +variable "region" { + description = "AWS region for the workload environment" + type = string + default = "eu-west-1" +} diff --git a/.tofu/platforms/gke/dev/main.tf b/.tofu/platforms/gke/dev/main.tf new file mode 100644 index 0000000..b31da90 --- /dev/null +++ b/.tofu/platforms/gke/dev/main.tf @@ -0,0 +1,17 @@ +module "cluster" { + source = "../modules/cluster" + + project_id = var.project_id + region = var.region + prefix = "clst-dev" + + # GKE — small dev nodes + node_machine_type = "e2-standard-2" + node_count = 2 + deletion_protection = false + + labels = { + environment = "dev" + managed-by = "tofu" + } +} diff --git a/.tofu/platforms/gke/dev/outputs.tf b/.tofu/platforms/gke/dev/outputs.tf new file mode 100644 index 0000000..ae2b507 --- /dev/null +++ b/.tofu/platforms/gke/dev/outputs.tf @@ -0,0 +1,3 @@ +output "cluster_name" { value = module.cluster.cluster_name } +output "project_id" { value = module.cluster.project_id } +output "region" { value = module.cluster.region } diff --git a/.tofu/platforms/gke/dev/providers.tf b/.tofu/platforms/gke/dev/providers.tf new file mode 100644 index 0000000..517d9eb --- /dev/null +++ b/.tofu/platforms/gke/dev/providers.tf @@ -0,0 +1,26 @@ +terraform { + required_providers { + google = { + source = "hashicorp/google" + version = "~> 6.0" + } + } +} + +# Authentication: use Application Default Credentials (gcloud auth application-default login) +# or set GOOGLE_APPLICATION_CREDENTIALS to a service account key file. +provider "google" { + project = var.project_id + region = var.region +} + +variable "project_id" { + description = "GCP project ID for the dev environment" + type = string +} + +variable "region" { + description = "GCP region" + type = string + default = "europe-west4" +} diff --git a/.tofu/platforms/gke/modules/cluster/main.tf b/.tofu/platforms/gke/modules/cluster/main.tf new file mode 100644 index 0000000..77a2cef --- /dev/null +++ b/.tofu/platforms/gke/modules/cluster/main.tf @@ -0,0 +1,115 @@ +# ─── Required APIs ──────────────────────────────────────────────────── + +resource "google_project_service" "compute" { + project = var.project_id + service = "compute.googleapis.com" + disable_on_destroy = false +} + +resource "google_project_service" "container" { + project = var.project_id + service = "container.googleapis.com" + disable_on_destroy = false +} + +# ─── Networking ─────────────────────────────────────────────────────── + +resource "google_compute_network" "main" { + project = var.project_id + name = "${var.prefix}-vpc" + auto_create_subnetworks = false + + depends_on = [google_project_service.compute] +} + +resource "google_compute_subnetwork" "main" { + project = var.project_id + name = "${var.prefix}-subnet" + ip_cidr_range = "10.100.0.0/22" + region = var.region + network = google_compute_network.main.id + + # Secondary ranges required for GKE VPC-native cluster + secondary_ip_range { + range_name = "pods" + ip_cidr_range = "10.200.0.0/14" # /14 = ~262k pod IPs + } + + secondary_ip_range { + range_name = "services" + ip_cidr_range = "10.204.0.0/20" # /20 = ~4k service IPs + } +} + +# ─── GKE Cluster ────────────────────────────────────────────────────── +# +# Regional cluster (3 control-plane replicas) for HA. +# Workload Identity enabled — allows K8s service accounts to impersonate +# Google Service Accounts for keyless access to GCP services. + +resource "google_container_cluster" "main" { + project = var.project_id + name = "${var.prefix}-gke" + location = var.region # regional cluster + + network = google_compute_network.main.id + subnetwork = google_compute_subnetwork.main.id + + # VPC-native cluster with alias IP ranges + ip_allocation_policy { + cluster_secondary_range_name = "pods" + services_secondary_range_name = "services" + } + + # Workload Identity pool — enables OIDC token projection for pods + workload_identity_config { + workload_pool = "${var.project_id}.svc.id.goog" + } + + # Remove default node pool — we manage our own below + remove_default_node_pool = true + initial_node_count = 1 + + deletion_protection = var.deletion_protection + + dynamic "release_channel" { + for_each = var.kubernetes_version == null ? [1] : [] + content { + channel = "STABLE" + } + } + + resource_labels = var.labels + + depends_on = [google_project_service.container] +} + +resource "google_container_node_pool" "main" { + project = var.project_id + name = "${var.prefix}-nodes" + location = var.region + cluster = google_container_cluster.main.name + node_count = var.node_count + + node_config { + machine_type = var.node_machine_type + + # GKE_METADATA mode is required for Workload Identity + workload_metadata_config { + mode = "GKE_METADATA" + } + + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform", + ] + + labels = merge(var.labels, { + role = "worker" + }) + } + + management { + auto_repair = true + auto_upgrade = true + } +} diff --git a/.tofu/platforms/gke/modules/cluster/outputs.tf b/.tofu/platforms/gke/modules/cluster/outputs.tf new file mode 100644 index 0000000..f5e93a7 --- /dev/null +++ b/.tofu/platforms/gke/modules/cluster/outputs.tf @@ -0,0 +1,16 @@ +# ─── Cluster ───────────────────────────────────────────────────────── + +output "cluster_name" { + description = "GKE cluster name" + value = google_container_cluster.main.name +} + +output "project_id" { + description = "GCP project ID" + value = var.project_id +} + +output "region" { + description = "GCP region" + value = var.region +} diff --git a/.tofu/platforms/gke/modules/cluster/providers.tf b/.tofu/platforms/gke/modules/cluster/providers.tf new file mode 100644 index 0000000..3138f12 --- /dev/null +++ b/.tofu/platforms/gke/modules/cluster/providers.tf @@ -0,0 +1,8 @@ +terraform { + required_providers { + google = { + source = "hashicorp/google" + version = "~> 6.0" + } + } +} diff --git a/.tofu/platforms/gke/modules/cluster/variables.tf b/.tofu/platforms/gke/modules/cluster/variables.tf new file mode 100644 index 0000000..e43917f --- /dev/null +++ b/.tofu/platforms/gke/modules/cluster/variables.tf @@ -0,0 +1,48 @@ +# ─── Project / Region ──────────────────────────────────────────────── + +variable "project_id" { + description = "GCP project ID" + type = string +} + +variable "region" { + description = "GCP region (e.g., europe-west4, europe-west1)" + type = string +} + +variable "prefix" { + description = "Prefix for resource names (e.g., clst-dev)" + type = string +} + +# ─── GKE Cluster ───────────────────────────────────────────────────── + +variable "node_machine_type" { + description = "GKE node machine type (e.g., e2-standard-2, e2-standard-4)" + type = string +} + +variable "node_count" { + description = "Number of nodes per zone (regional cluster spawns nodes in each zone)" + type = number +} + +variable "kubernetes_version" { + description = "GKE Kubernetes version channel (null = STABLE release channel)" + type = string + default = null +} + +variable "deletion_protection" { + description = "Prevent cluster deletion (set true for production)" + type = bool + default = false +} + +# ─── Labels ────────────────────────────────────────────────────────── + +variable "labels" { + description = "Labels applied to all resources" + type = map(string) + default = {} +} diff --git a/.tofu/platforms/gke/prod/main.tf b/.tofu/platforms/gke/prod/main.tf new file mode 100644 index 0000000..2b1a841 --- /dev/null +++ b/.tofu/platforms/gke/prod/main.tf @@ -0,0 +1,17 @@ +module "cluster" { + source = "../modules/cluster" + + project_id = var.project_id + region = var.region + prefix = "clst" + + # GKE — general-purpose nodes for production + node_machine_type = "e2-standard-4" + node_count = 3 + deletion_protection = true + + labels = { + environment = "prod" + managed-by = "tofu" + } +} diff --git a/.tofu/platforms/gke/prod/outputs.tf b/.tofu/platforms/gke/prod/outputs.tf new file mode 100644 index 0000000..ae2b507 --- /dev/null +++ b/.tofu/platforms/gke/prod/outputs.tf @@ -0,0 +1,3 @@ +output "cluster_name" { value = module.cluster.cluster_name } +output "project_id" { value = module.cluster.project_id } +output "region" { value = module.cluster.region } diff --git a/.tofu/platforms/gke/prod/providers.tf b/.tofu/platforms/gke/prod/providers.tf new file mode 100644 index 0000000..f306689 --- /dev/null +++ b/.tofu/platforms/gke/prod/providers.tf @@ -0,0 +1,24 @@ +terraform { + required_providers { + google = { + source = "hashicorp/google" + version = "~> 6.0" + } + } +} + +provider "google" { + project = var.project_id + region = var.region +} + +variable "project_id" { + description = "GCP project ID for the prod environment" + type = string +} + +variable "region" { + description = "GCP region" + type = string + default = "europe-west1" +} diff --git a/.tofu/platforms/gke/workload/main.tf b/.tofu/platforms/gke/workload/main.tf new file mode 100644 index 0000000..832875b --- /dev/null +++ b/.tofu/platforms/gke/workload/main.tf @@ -0,0 +1,194 @@ +# ============================================================================= +# GCP Workload Cluster +# ============================================================================= +# A lean GKE cluster for running application workloads. No managed data +# services — those live on the platform cluster. ArgoCD (on the platform +# cluster) deploys apps to this cluster via the app-of-apps pattern. +# +# Platform components deployed by deploy-workload.sh: +# nginx-ingress, cert-manager, external-dns, external-secrets, alloy +# +# Usage: +# tofu init && tofu plan && tofu apply +# ./sync-tofu-outputs.sh --env gcp-workload +# ./deploy-workload.sh --env gcp-workload +# ============================================================================= + +variable "prefix" { + description = "Prefix for resource names (e.g., clst-workload)" + type = string + default = "clst-workload" +} + +variable "node_machine_type" { + description = "GKE node machine type" + type = string + default = "e2-standard-2" +} + +variable "node_count" { + description = "Number of nodes per zone" + type = number + default = 1 +} + +variable "kubernetes_version" { + description = "GKE Kubernetes version (null = STABLE release channel)" + type = string + default = null +} + +variable "deletion_protection" { + description = "Prevent cluster deletion" + type = bool + default = false +} + +variable "labels" { + description = "Labels applied to all resources" + type = map(string) + default = { + environment = "workload" + managed-by = "tofu" + } +} + +# ─── Required APIs ──────────────────────────────────────────────────── + +resource "google_project_service" "compute" { + project = var.project_id + service = "compute.googleapis.com" + disable_on_destroy = false +} + +resource "google_project_service" "container" { + project = var.project_id + service = "container.googleapis.com" + disable_on_destroy = false +} + +resource "google_project_service" "iam" { + project = var.project_id + service = "iam.googleapis.com" + disable_on_destroy = false +} + +resource "google_project_service" "dns" { + project = var.project_id + service = "dns.googleapis.com" + disable_on_destroy = false +} + +# ─── Networking ─────────────────────────────────────────────────────── + +resource "google_compute_network" "main" { + project = var.project_id + name = "${var.prefix}-vpc" + auto_create_subnetworks = false + + depends_on = [google_project_service.compute] +} + +resource "google_compute_subnetwork" "main" { + project = var.project_id + name = "${var.prefix}-subnet" + ip_cidr_range = "10.110.0.0/22" + region = var.region + network = google_compute_network.main.id + + secondary_ip_range { + range_name = "pods" + ip_cidr_range = "10.210.0.0/14" + } + + secondary_ip_range { + range_name = "services" + ip_cidr_range = "10.214.0.0/20" + } +} + +# ─── GKE Cluster ────────────────────────────────────────────────────── + +resource "google_container_cluster" "main" { + project = var.project_id + name = "${var.prefix}-gke" + location = var.region + + network = google_compute_network.main.id + subnetwork = google_compute_subnetwork.main.id + + ip_allocation_policy { + cluster_secondary_range_name = "pods" + services_secondary_range_name = "services" + } + + workload_identity_config { + workload_pool = "${var.project_id}.svc.id.goog" + } + + remove_default_node_pool = true + initial_node_count = 1 + + deletion_protection = var.deletion_protection + + dynamic "release_channel" { + for_each = var.kubernetes_version == null ? [1] : [] + content { + channel = "STABLE" + } + } + + resource_labels = var.labels + + depends_on = [google_project_service.container] +} + +resource "google_container_node_pool" "main" { + project = var.project_id + name = "${var.prefix}-nodes" + location = var.region + cluster = google_container_cluster.main.name + node_count = var.node_count + + node_config { + machine_type = var.node_machine_type + + workload_metadata_config { + mode = "GKE_METADATA" + } + + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform", + ] + + labels = merge(var.labels, { role = "worker" }) + } + + management { + auto_repair = true + auto_upgrade = true + } +} + +# ─── External-DNS Workload Identity ────────────────────────────────── +# Allows external-dns to manage Cloud DNS records for app ingresses. + +resource "google_service_account" "external_dns" { + project = var.project_id + account_id = "${var.prefix}-external-dns" + display_name = "External-DNS Service Account (Workload Identity)" + + depends_on = [google_project_service.iam] +} + +resource "google_project_iam_member" "external_dns_dns_admin" { + project = var.project_id + role = "roles/dns.admin" + member = "serviceAccount:${google_service_account.external_dns.email}" +} + +resource "google_service_account_iam_member" "external_dns_workload_identity" { + service_account_id = google_service_account.external_dns.name + role = "roles/iam.workloadIdentityUser" + member = "serviceAccount:${var.project_id}.svc.id.goog[external-dns/external-dns]" +} diff --git a/.tofu/platforms/gke/workload/outputs.tf b/.tofu/platforms/gke/workload/outputs.tf new file mode 100644 index 0000000..a825928 --- /dev/null +++ b/.tofu/platforms/gke/workload/outputs.tf @@ -0,0 +1,4 @@ +output "cluster_name" { value = google_container_cluster.main.name } +output "project_id" { value = var.project_id } +output "region" { value = var.region } +output "external_dns_gsa_email" { value = google_service_account.external_dns.email } diff --git a/.tofu/platforms/gke/workload/providers.tf b/.tofu/platforms/gke/workload/providers.tf new file mode 100644 index 0000000..1503c79 --- /dev/null +++ b/.tofu/platforms/gke/workload/providers.tf @@ -0,0 +1,26 @@ +terraform { + required_providers { + google = { + source = "hashicorp/google" + version = "~> 6.0" + } + } +} + +# Authentication: use Application Default Credentials (gcloud auth application-default login) +# or set GOOGLE_APPLICATION_CREDENTIALS to a service account key file. +provider "google" { + project = var.project_id + region = var.region +} + +variable "project_id" { + description = "GCP project ID for the workload environment" + type = string +} + +variable "region" { + description = "GCP region" + type = string + default = "europe-west4" +} diff --git a/.tofu/platforms/upc/dev/main.tf b/.tofu/platforms/upc/dev/main.tf new file mode 100644 index 0000000..48da206 --- /dev/null +++ b/.tofu/platforms/upc/dev/main.tf @@ -0,0 +1,14 @@ +module "cluster" { + source = "../modules/cluster" + + prefix = "clst-dev" + zone = "no-svg1" + node_plan = "DEV-1xCPU-2GB" + node_count = 2 + network_cidr = "10.100.0.0/24" + + tags = { + Environment = "dev" + ManagedBy = "tofu" + } +} diff --git a/.tofu/platforms/upc/dev/outputs.tf b/.tofu/platforms/upc/dev/outputs.tf new file mode 100644 index 0000000..f1429e1 --- /dev/null +++ b/.tofu/platforms/upc/dev/outputs.tf @@ -0,0 +1,13 @@ +# ─── Cluster ───────────────────────────────────────────────────────── + +output "cluster_id" { + value = module.cluster.cluster_id +} + +output "cluster_name" { + value = module.cluster.cluster_name +} + +output "zone" { + value = module.cluster.zone +} diff --git a/.tofu/platforms/upc/dev/providers.tf b/.tofu/platforms/upc/dev/providers.tf new file mode 100644 index 0000000..2e2be87 --- /dev/null +++ b/.tofu/platforms/upc/dev/providers.tf @@ -0,0 +1,14 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + upcloud = { + source = "UpCloudLtd/upcloud" + version = "~> 5.0" + } + } +} + +provider "upcloud" { + # Set via environment variables: UPCLOUD_USERNAME, UPCLOUD_PASSWORD +} diff --git a/.tofu/platforms/upc/modules/cluster/main.tf b/.tofu/platforms/upc/modules/cluster/main.tf new file mode 100644 index 0000000..ee71b32 --- /dev/null +++ b/.tofu/platforms/upc/modules/cluster/main.tf @@ -0,0 +1,56 @@ +# Router for the private network +resource "upcloud_router" "kubernetes" { + name = "${var.prefix}-${var.cluster_name}-router" +} + +# Gateway for internet connectivity +resource "upcloud_gateway" "kubernetes" { + name = "${var.prefix}-${var.cluster_name}-gateway" + zone = var.zone + features = ["nat"] + router { + id = upcloud_router.kubernetes.id + } +} + +# Private network for the Kubernetes cluster +resource "upcloud_network" "kubernetes" { + name = "${var.prefix}-${var.cluster_name}-network" + zone = var.zone + router = upcloud_router.kubernetes.id + + ip_network { + address = var.network_cidr + dhcp = true + dhcp_default_route = true + family = "IPv4" + gateway = cidrhost(var.network_cidr, 1) + } + + depends_on = [upcloud_gateway.kubernetes] +} + +# Kubernetes cluster +resource "upcloud_kubernetes_cluster" "main" { + name = "${var.prefix}-${var.cluster_name}" + zone = var.zone + network = upcloud_network.kubernetes.id + control_plane_ip_filter = var.control_plane_ip_filter + + private_node_groups = true +} + +# Node group for worker nodes +resource "upcloud_kubernetes_node_group" "workers" { + cluster = upcloud_kubernetes_cluster.main.id + name = "${var.prefix}-${var.cluster_name}-workers" + node_count = var.node_count + plan = var.node_plan + anti_affinity = var.node_count > 1 + labels = { + prefix = var.prefix + cluster = var.cluster_name + role = "worker" + env = lookup(var.tags, "Environment", "dev") + } +} diff --git a/.tofu/platforms/upc/modules/cluster/outputs.tf b/.tofu/platforms/upc/modules/cluster/outputs.tf new file mode 100644 index 0000000..d6d4126 --- /dev/null +++ b/.tofu/platforms/upc/modules/cluster/outputs.tf @@ -0,0 +1,31 @@ +# ─── Cluster ───────────────────────────────────────────────────────── + +output "cluster_id" { + description = "The ID of the Kubernetes cluster" + value = upcloud_kubernetes_cluster.main.id +} + +output "cluster_name" { + description = "The name of the Kubernetes cluster" + value = upcloud_kubernetes_cluster.main.name +} + +output "network_id" { + description = "The ID of the private network" + value = upcloud_network.kubernetes.id +} + +output "network_cidr" { + description = "The CIDR block of the private network" + value = var.network_cidr +} + +output "kubernetes_version" { + description = "The Kubernetes version of the cluster" + value = upcloud_kubernetes_cluster.main.version +} + +output "zone" { + description = "The zone where the cluster is deployed" + value = var.zone +} diff --git a/.tofu/platforms/upc/modules/cluster/providers.tf b/.tofu/platforms/upc/modules/cluster/providers.tf new file mode 100644 index 0000000..f637d8b --- /dev/null +++ b/.tofu/platforms/upc/modules/cluster/providers.tf @@ -0,0 +1,8 @@ +terraform { + required_providers { + upcloud = { + source = "UpCloudLtd/upcloud" + version = "~> 5.0" + } + } +} diff --git a/.tofu/platforms/upc/modules/cluster/variables.tf b/.tofu/platforms/upc/modules/cluster/variables.tf new file mode 100644 index 0000000..f18651d --- /dev/null +++ b/.tofu/platforms/upc/modules/cluster/variables.tf @@ -0,0 +1,44 @@ +# ─── Cluster ───────────────────────────────────────────────────────── + +variable "prefix" { + description = "Prefix for resource names" + type = string +} + +variable "cluster_name" { + description = "Name of the Kubernetes cluster" + type = string + default = "main" +} + +variable "zone" { + description = "UpCloud zone" + type = string +} + +variable "node_plan" { + description = "UpCloud server plan for worker nodes" + type = string +} + +variable "node_count" { + description = "Number of worker nodes" + type = number +} + +variable "network_cidr" { + description = "CIDR block for the private network" + type = string + default = "10.100.0.0/24" +} + +variable "control_plane_ip_filter" { + description = "CIDRs allowed to access the K8s API" + type = list(string) + default = ["0.0.0.0/0"] +} + +variable "tags" { + description = "Labels to apply to resources" + type = map(string) +} diff --git a/.tofu/platforms/upc/prod/main.tf b/.tofu/platforms/upc/prod/main.tf new file mode 100644 index 0000000..b12b651 --- /dev/null +++ b/.tofu/platforms/upc/prod/main.tf @@ -0,0 +1,16 @@ +module "cluster" { + source = "../modules/cluster" + + prefix = "clst" + zone = "de-fra1" + node_plan = "4xCPU-8GB" + node_count = 3 + network_cidr = "10.100.0.0/24" + + control_plane_ip_filter = ["0.0.0.0/0"] # TODO: restrict to known CIDRs + + tags = { + Environment = "prod" + ManagedBy = "tofu" + } +} diff --git a/.tofu/platforms/upc/prod/outputs.tf b/.tofu/platforms/upc/prod/outputs.tf new file mode 100644 index 0000000..f1429e1 --- /dev/null +++ b/.tofu/platforms/upc/prod/outputs.tf @@ -0,0 +1,13 @@ +# ─── Cluster ───────────────────────────────────────────────────────── + +output "cluster_id" { + value = module.cluster.cluster_id +} + +output "cluster_name" { + value = module.cluster.cluster_name +} + +output "zone" { + value = module.cluster.zone +} diff --git a/.tofu/platforms/upc/prod/providers.tf b/.tofu/platforms/upc/prod/providers.tf new file mode 100644 index 0000000..2e2be87 --- /dev/null +++ b/.tofu/platforms/upc/prod/providers.tf @@ -0,0 +1,14 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + upcloud = { + source = "UpCloudLtd/upcloud" + version = "~> 5.0" + } + } +} + +provider "upcloud" { + # Set via environment variables: UPCLOUD_USERNAME, UPCLOUD_PASSWORD +} diff --git a/.tofu/platforms/upc/workload/main.tf b/.tofu/platforms/upc/workload/main.tf new file mode 100644 index 0000000..8b87e10 --- /dev/null +++ b/.tofu/platforms/upc/workload/main.tf @@ -0,0 +1,116 @@ +# ============================================================================= +# UpCloud Workload Cluster +# ============================================================================= +# A lean UCS cluster for running application workloads. No managed data +# services — those live on the platform cluster. ArgoCD (on the platform +# cluster) deploys apps to this cluster via the app-of-apps pattern. +# +# Platform components deployed by deploy-workload.sh: +# nginx-ingress, cert-manager, external-dns, external-secrets, alloy +# +# Usage: +# tofu init && tofu plan && tofu apply +# ./sync-tofu-outputs.sh --env upcloud-workload +# ./deploy-workload.sh --env upcloud-workload +# ============================================================================= + +variable "prefix" { + description = "Prefix for resource names" + type = string + default = "clst-workload" +} + +variable "zone" { + description = "UpCloud zone" + type = string + default = "fi-hel1" +} + +variable "node_plan" { + description = "UpCloud server plan for worker nodes" + type = string + default = "2xCPU-4GB" +} + +variable "node_count" { + description = "Number of worker nodes" + type = number + default = 2 +} + +variable "network_cidr" { + description = "CIDR block for the private network" + type = string + default = "10.110.0.0/24" +} + +variable "control_plane_ip_filter" { + description = "CIDRs allowed to access the K8s API" + type = list(string) + default = ["0.0.0.0/0"] +} + +variable "tags" { + description = "Labels to apply to resources" + type = map(string) + default = { + Environment = "workload" + ManagedBy = "tofu" + } +} + +# ─── Networking ─────────────────────────────────────────────────────── + +resource "upcloud_router" "kubernetes" { + name = "${var.prefix}-workload-router" +} + +resource "upcloud_gateway" "kubernetes" { + name = "${var.prefix}-workload-gateway" + zone = var.zone + features = ["nat"] + router { + id = upcloud_router.kubernetes.id + } +} + +resource "upcloud_network" "kubernetes" { + name = "${var.prefix}-workload-network" + zone = var.zone + router = upcloud_router.kubernetes.id + + ip_network { + address = var.network_cidr + dhcp = true + dhcp_default_route = true + family = "IPv4" + gateway = cidrhost(var.network_cidr, 1) + } + + depends_on = [upcloud_gateway.kubernetes] +} + +# ─── Kubernetes Cluster ─────────────────────────────────────────────── + +resource "upcloud_kubernetes_cluster" "main" { + name = "${var.prefix}-workload" + zone = var.zone + network = upcloud_network.kubernetes.id + control_plane_ip_filter = var.control_plane_ip_filter + + private_node_groups = true +} + +resource "upcloud_kubernetes_node_group" "workers" { + cluster = upcloud_kubernetes_cluster.main.id + name = "${var.prefix}-workload-workers" + node_count = var.node_count + plan = var.node_plan + anti_affinity = var.node_count > 1 + labels = { + prefix = var.prefix + cluster = "workload" + role = "worker" + env = lookup(var.tags, "Environment", "workload") + } +} diff --git a/.tofu/platforms/upc/workload/outputs.tf b/.tofu/platforms/upc/workload/outputs.tf new file mode 100644 index 0000000..ba93f7e --- /dev/null +++ b/.tofu/platforms/upc/workload/outputs.tf @@ -0,0 +1,3 @@ +output "cluster_name" { value = upcloud_kubernetes_cluster.main.name } +output "cluster_id" { value = upcloud_kubernetes_cluster.main.id } +output "zone" { value = var.zone } diff --git a/.tofu/platforms/upc/workload/providers.tf b/.tofu/platforms/upc/workload/providers.tf new file mode 100644 index 0000000..2e2be87 --- /dev/null +++ b/.tofu/platforms/upc/workload/providers.tf @@ -0,0 +1,14 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + upcloud = { + source = "UpCloudLtd/upcloud" + version = "~> 5.0" + } + } +} + +provider "upcloud" { + # Set via environment variables: UPCLOUD_USERNAME, UPCLOUD_PASSWORD +} diff --git a/.tofu/scripts/get-kubeconfig.sh b/.tofu/scripts/get-kubeconfig.sh new file mode 100644 index 0000000..61d588d --- /dev/null +++ b/.tofu/scripts/get-kubeconfig.sh @@ -0,0 +1,66 @@ +#!/bin/bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TOFU_ROOT="$(dirname "$SCRIPT_DIR")" +PROJECT_ROOT="$(dirname "$TOFU_ROOT")" + +CLUSTER="${1:?Usage: $0 (e.g., aks-dev, eks-prod)}" +PLATFORM="${CLUSTER%%-*}" +ENV="${CLUSTER#*-}" + +KUBECONFIG_FILE="$PROJECT_ROOT/private/$CLUSTER/kubeconfig" + +if [[ -f "$KUBECONFIG_FILE" ]]; then + echo "Kubeconfig already exists: $KUBECONFIG_FILE" + echo "" + echo " export KUBECONFIG=$KUBECONFIG_FILE" +else + echo "No cached kubeconfig. Fetching from platform..." + + # Load platform credentials + ENV_FILE="$TOFU_ROOT/configs/$PLATFORM.env" + if [[ -f "$ENV_FILE" ]]; then + set -a; source "$ENV_FILE"; set +a + fi + + TOFU_DIR="$TOFU_ROOT/platforms/$PLATFORM/$ENV" + mkdir -p "$(dirname "$KUBECONFIG_FILE")" + + case "$PLATFORM" in + aks) + cd "$TOFU_DIR" + RG=$(tofu output -raw resource_group_name 2>/dev/null || echo "$CLUSTER-rg") + NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "$CLUSTER") + az aks get-credentials --resource-group "$RG" --name "$NAME" --file "$KUBECONFIG_FILE" --overwrite-existing + ;; + eks) + cd "$TOFU_DIR" + NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "$CLUSTER") + REGION=$(tofu output -raw aws_region 2>/dev/null || echo "${AWS_REGION:-eu-west-1}") + aws eks update-kubeconfig --name "$NAME" --region "$REGION" --kubeconfig "$KUBECONFIG_FILE" + ;; + gke) + cd "$TOFU_DIR" + NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "$CLUSTER") + REGION=$(tofu output -raw region 2>/dev/null || echo "${GCP_REGION:-europe-west4}") + PROJECT=$(tofu output -raw project_id 2>/dev/null || echo "${GCP_PROJECT_ID:-}") + gcloud container clusters get-credentials "$NAME" --region "$REGION" --project "$PROJECT" + cp ~/.kube/config "$KUBECONFIG_FILE" + ;; + upc) + cd "$TOFU_DIR" + CLUSTER_ID=$(tofu output -raw cluster_id 2>/dev/null || echo "${UPCLOUD_CLUSTER_ID:-}") + upctl kubernetes config "$CLUSTER_ID" > "$KUBECONFIG_FILE" + ;; + *) + echo "Error: unknown platform '$PLATFORM'" + exit 1 + ;; + esac + + chmod 600 "$KUBECONFIG_FILE" + echo "Kubeconfig saved: $KUBECONFIG_FILE" + echo "" + echo " export KUBECONFIG=$KUBECONFIG_FILE" +fi diff --git a/.tofu/scripts/setup-cluster.sh b/.tofu/scripts/setup-cluster.sh new file mode 100644 index 0000000..869dd02 --- /dev/null +++ b/.tofu/scripts/setup-cluster.sh @@ -0,0 +1,246 @@ +#!/bin/bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TOFU_ROOT="$(dirname "$SCRIPT_DIR")" +PROJECT_ROOT="$(dirname "$TOFU_ROOT")" + +# ─── Usage ──────────────────────────────────────────────────────────── +usage() { + cat < [options] + + Provision a Kubernetes cluster using OpenTofu. + Mirrors bootstrap.sh convention: cluster = - + + Clusters: aks-dev | aks-prod | eks-dev | eks-prod + gke-dev | gke-prod | upc-dev | upc-prod + -workload (for workload clusters) + + Options: + --plan Plan only, don't apply + --destroy Destroy the cluster (use teardown-cluster.sh instead) + --auto Skip confirmation prompts + -h, --help Show this help + + Examples: + $0 aks-dev + $0 eks-prod --plan + $0 upc-dev --auto + + Prerequisites: + - tofu, kubectl, helm installed + - Platform credentials in .tofu/configs/.env + - Cluster config in clusters/.yaml + + After provisioning, run: + ./bootstrap.sh +EOF + exit "${1:-0}" +} + +# ─── Parse arguments ────────────────────────────────────────────────── +CLUSTER="" +PLAN_ONLY=false +DESTROY=false +AUTO_APPROVE=false + +while [[ $# -gt 0 ]]; do + case "$1" in + --plan) PLAN_ONLY=true; shift ;; + --destroy) DESTROY=true; shift ;; + --auto) AUTO_APPROVE=true; shift ;; + -h|--help) usage 0 ;; + -*) echo "Unknown option: $1"; usage 1 ;; + *) + if [[ -z "$CLUSTER" ]]; then + CLUSTER="$1" + else + echo "Error: unexpected argument '$1'" + usage 1 + fi + shift + ;; + esac +done + +[[ -z "$CLUSTER" ]] && { echo "Error: argument required"; usage 1; } + +# ─── Map cluster → platform + env ──────────────────────────────────── +PLATFORM="${CLUSTER%%-*}" # aks-dev → aks +ENV="${CLUSTER#*-}" # aks-dev → dev + +case "$PLATFORM" in + aks|eks|gke|upc) ;; + *) echo "Error: unknown platform '$PLATFORM'. Expected: aks, eks, gke, upc"; exit 1 ;; +esac + +TOFU_DIR="$TOFU_ROOT/platforms/$PLATFORM/$ENV" +if [[ ! -d "$TOFU_DIR" ]]; then + echo "Error: tofu directory not found: $TOFU_DIR" + echo "Available environments for $PLATFORM:" + ls -1 "$TOFU_ROOT/platforms/$PLATFORM/" 2>/dev/null | grep -v modules || echo " (none)" + exit 1 +fi + +echo "=========================================" +echo " Kubernetes Cluster Setup" +echo "=========================================" +echo "" +echo " Cluster: $CLUSTER" +echo " Platform: $PLATFORM" +echo " Env: $ENV" +echo " Tofu dir: $TOFU_DIR" +echo "" + +# ─── Prerequisites ──────────────────────────────────────────────────── +echo "=== Checking Prerequisites ===" +command -v tofu >/dev/null 2>&1 || { echo "Error: tofu is not installed."; exit 1; } +command -v kubectl >/dev/null 2>&1 || { echo "Error: kubectl is not installed."; exit 1; } +command -v helm >/dev/null 2>&1 || { echo "Error: helm is not installed."; exit 1; } +echo " tofu, kubectl, helm: OK" + +# ─── Load platform credentials ──────────────────────────────────────── +ENV_FILE="$TOFU_ROOT/configs/$PLATFORM.env" +if [[ -f "$ENV_FILE" ]]; then + echo " Loading credentials from configs/$PLATFORM.env" + set -a + # shellcheck disable=SC1090 + source "$ENV_FILE" + set +a +else + echo " Warning: $ENV_FILE not found — using existing environment/CLI auth" + echo " Copy configs/$PLATFORM.env.example → configs/$PLATFORM.env to configure" +fi + +# ─── Load cluster config (if exists) ────────────────────────────────── +CLUSTER_CONFIG="$PROJECT_ROOT/clusters/$CLUSTER.yaml" +if [[ -f "$CLUSTER_CONFIG" ]]; then + echo " Loading cluster config from clusters/$CLUSTER.yaml" + if command -v yq >/dev/null 2>&1; then + eval "$(yq -r 'to_entries[] | "export CLUSTER_\(.key)=\"\(.value)\""' "$CLUSTER_CONFIG")" + echo " Cluster name: ${CLUSTER_clusterName:-$CLUSTER}" + else + echo " Warning: yq not installed — cluster config not loaded" + fi +else + echo " Warning: $CLUSTER_CONFIG not found — using defaults" +fi +echo "" + +# ─── Run OpenTofu ───────────────────────────────────────────────────── +cd "$TOFU_DIR" + +echo "=== Initializing OpenTofu ===" +tofu init + +echo "" +if $DESTROY; then + echo "=== Planning Destruction ===" + tofu plan -destroy -out=tfplan + + if ! $AUTO_APPROVE; then + echo "" + read -rp "DESTROY cluster $CLUSTER? This is irreversible. (yes/no) " REPLY + [[ "$REPLY" == "yes" ]] || { echo "Cancelled."; exit 1; } + fi + + echo "Destroying infrastructure..." + tofu apply tfplan + echo "" + echo "=== Cluster $CLUSTER Destroyed ===" + +elif $PLAN_ONLY; then + echo "=== Planning Infrastructure ===" + tofu plan + echo "" + echo "=== Plan complete (--plan mode, no changes applied) ===" + +else + echo "=== Planning Infrastructure ===" + tofu plan -out=tfplan + + if ! $AUTO_APPROVE; then + echo "" + read -rp "Apply this plan for $CLUSTER? (y/n) " -n 1 REPLY + echo + [[ "$REPLY" =~ ^[Yy]$ ]] || { echo "Cancelled."; exit 1; } + fi + + echo "Applying infrastructure..." + tofu apply tfplan + + # ─── Save kubeconfig ────────────────────────────────────────────── + KUBECONFIG_DIR="$PROJECT_ROOT/private/$CLUSTER" + mkdir -p "$KUBECONFIG_DIR" + KUBECONFIG_FILE="$KUBECONFIG_DIR/kubeconfig" + + echo "" + echo "=== Saving Kubeconfig ===" + + case "$PLATFORM" in + aks) + if tofu output -raw kubeconfig > "$KUBECONFIG_FILE" 2>/dev/null; then + echo " Saved from tofu output" + else + echo " Fetching from Azure CLI..." + RG=$(tofu output -raw resource_group_name 2>/dev/null || echo "${CLUSTER_clusterName:-$CLUSTER}-rg") + NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "${CLUSTER_clusterName:-$CLUSTER}") + az aks get-credentials --resource-group "$RG" --name "$NAME" --file "$KUBECONFIG_FILE" --overwrite-existing + fi + ;; + eks) + NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "${CLUSTER_clusterName:-$CLUSTER}") + REGION=$(tofu output -raw aws_region 2>/dev/null || echo "${AWS_REGION:-eu-west-1}") + aws eks update-kubeconfig --name "$NAME" --region "$REGION" --kubeconfig "$KUBECONFIG_FILE" + ;; + gke) + NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "${CLUSTER_clusterName:-$CLUSTER}") + REGION=$(tofu output -raw region 2>/dev/null || echo "${GCP_REGION:-europe-west4}") + PROJECT=$(tofu output -raw project_id 2>/dev/null || echo "${GCP_PROJECT_ID:-}") + gcloud container clusters get-credentials "$NAME" --region "$REGION" --project "$PROJECT" 2>/dev/null \ + && cp ~/.kube/config "$KUBECONFIG_FILE" \ + || echo " Warning: could not fetch kubeconfig via gcloud" + ;; + upc) + if tofu output -raw kubeconfig > "$KUBECONFIG_FILE" 2>/dev/null; then + echo " Saved from tofu output" + else + CLUSTER_ID=$(tofu output -raw cluster_id 2>/dev/null || echo "${UPCLOUD_CLUSTER_ID:-}") + if [[ -n "$CLUSTER_ID" ]]; then + upctl kubernetes config "$CLUSTER_ID" > "$KUBECONFIG_FILE" + else + echo " Warning: could not determine cluster ID for kubeconfig" + fi + fi + ;; + esac + + if [[ -f "$KUBECONFIG_FILE" ]]; then + chmod 600 "$KUBECONFIG_FILE" + echo " Kubeconfig: $KUBECONFIG_FILE" + fi + + # ─── Wait for nodes ────────────────────────────────────────────── + echo "" + echo "=== Waiting for Cluster Nodes ===" + export KUBECONFIG="$KUBECONFIG_FILE" + if kubectl wait --for=condition=Ready nodes --all --timeout=300s 2>/dev/null; then + echo " All nodes ready" + else + echo " Warning: nodes not ready within timeout — check cluster status" + fi + + # ─── Summary ───────────────────────────────────────────────────── + echo "" + echo "=========================================" + echo " Cluster $CLUSTER Provisioned" + echo "=========================================" + echo "" + echo " Kubeconfig: $KUBECONFIG_FILE" + echo "" + echo " Next steps:" + echo " export KUBECONFIG=$KUBECONFIG_FILE" + echo " ./bootstrap.sh $CLUSTER" + echo "" +fi diff --git a/.tofu/scripts/teardown-cluster.sh b/.tofu/scripts/teardown-cluster.sh new file mode 100644 index 0000000..5e0b90a --- /dev/null +++ b/.tofu/scripts/teardown-cluster.sh @@ -0,0 +1,7 @@ +#!/bin/bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Delegate to setup-cluster.sh with --destroy flag +exec "$SCRIPT_DIR/setup-cluster.sh" "$@" --destroy diff --git a/README.md b/README.md index 8b300fe..f7ddeef 100644 --- a/README.md +++ b/README.md @@ -80,8 +80,23 @@ This repository contains the complete GitOps configuration for our Kubernetes cl ``` . -├── bootstrap.sh # Cluster initialization script -├── _app-of-apps.yaml # Root ArgoCD Application (App-of-Apps pattern) +├── bootstrap.sh # Cluster initialization (ArgoCD + GitOps) +├── _app-of-apps-{cluster}.yaml # Root ArgoCD Application (per cluster) +│ +├── .tofu/ # Infrastructure provisioning (OpenTofu) +│ ├── platforms/ # Per-platform IaC (one dir per cloud) +│ │ ├── aks/ # Azure AKS (modules/ + dev/ + prod/ + workload/) +│ │ ├── eks/ # AWS EKS +│ │ ├── gke/ # GCP GKE +│ │ └── upc/ # UpCloud +│ ├── configs/ # Platform credentials (git-ignored) +│ │ └── *.env.example # Template for each platform +│ └── scripts/ # Cluster lifecycle scripts +│ ├── setup-cluster.sh # Create cluster: ./setup-cluster.sh aks-dev +│ ├── teardown-cluster.sh +│ └── get-kubeconfig.sh +│ +├── clusters/ # Cluster metadata (domain, trustedIPs, etc.) │ ├── infra/ # Infrastructure ArgoCD Applications (Kustomize multi-cluster) │ ├── base/ # Base ArgoCD Application manifests (one dir per component) diff --git a/docs/GITOPS-ARCHITECTURE.md b/docs/GITOPS-ARCHITECTURE.md index 607f639..19f6dc4 100644 --- a/docs/GITOPS-ARCHITECTURE.md +++ b/docs/GITOPS-ARCHITECTURE.md @@ -115,9 +115,30 @@ This Kubernetes cluster uses a **GitOps approach** powered by **ArgoCD**, where ``` launchpad/ -├── bootstrap.sh # Cluster initialization script -├── _app-of-apps-upc-dev.yaml # Root ArgoCD Application (upc-dev cluster) -├── _app-of-apps-upc-prod.yaml # Root ArgoCD Application (upc-prod cluster) +├── bootstrap.sh # Cluster initialization (ArgoCD + GitOps) +├── _app-of-apps-{cluster}.yaml # Root ArgoCD Application (per cluster) +│ +├── .tofu/ # Infrastructure provisioning (OpenTofu) +│ ├── platforms/ # Per-platform IaC +│ │ ├── aks/ # Azure AKS +│ │ │ ├── modules/cluster/ # Reusable AKS module +│ │ │ ├── dev/ # tofu root for aks-dev +│ │ │ ├── prod/ # tofu root for aks-prod +│ │ │ └── workload/ # workload cluster (no data services) +│ │ ├── eks/ # AWS EKS (same structure) +│ │ ├── gke/ # GCP GKE +│ │ └── upc/ # UpCloud +│ ├── configs/ # Platform credentials (git-ignored) +│ │ └── {platform}.env.example # Template per platform +│ └── scripts/ +│ ├── setup-cluster.sh # ./setup-cluster.sh [--plan|--auto] +│ ├── teardown-cluster.sh # ./teardown-cluster.sh +│ └── get-kubeconfig.sh # ./get-kubeconfig.sh +│ +├── clusters/ # Cluster metadata YAML (domain, IPs, etc.) +│ ├── aks-dev.yaml +│ ├── upc-dev.yaml +│ └── ... │ ├── infra/ # Infrastructure ArgoCD Applications (Kustomize) │ ├── base/ # Base Application manifests (one dir per component) diff --git a/docs/REFERENCE.md b/docs/REFERENCE.md index f0d1bc4..7bd9e3d 100644 --- a/docs/REFERENCE.md +++ b/docs/REFERENCE.md @@ -72,9 +72,22 @@ Internet ``` launchpad/ -├── bootstrap.sh # Cluster initialization script -├── _app-of-apps-upc-dev.yaml # Root ArgoCD Application (upc-dev) -├── _app-of-apps-upc-prod.yaml # Root ArgoCD Application (upc-prod) +├── bootstrap.sh # Cluster initialization (ArgoCD + GitOps) +├── _app-of-apps-{cluster}.yaml # Root ArgoCD Application (per cluster) +│ +├── .tofu/ # Infrastructure provisioning (OpenTofu) +│ ├── platforms/ # Per-platform IaC +│ │ ├── aks/ # Azure: modules/cluster/, dev/, prod/, workload/ +│ │ ├── eks/ # AWS: same structure +│ │ ├── gke/ # GCP +│ │ └── upc/ # UpCloud +│ ├── configs/ # Platform credentials (git-ignored) +│ └── scripts/ # setup-cluster.sh, teardown-cluster.sh, get-kubeconfig.sh +│ +├── clusters/ # Cluster metadata YAML +│ ├── aks-dev.yaml +│ ├── upc-dev.yaml +│ └── ... │ ├── infra/ # Infrastructure applications (Kustomize) │ ├── base/ # One subdirectory per component