2026-05-29 15:48:29 +00:00 · 2026-04-28 06:52:06 +00:00 · 2026-04-28 06:52:07 +00:00 · 2026-04-28 06:52:06 +00:00 · 2026-04-28 06:52:07 +00:00 · 2026-04-28 06:52:07 +00:00
63 changed files with 2546 additions and 9 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -16,3 +16,11 @@ devbox.d/
 devbox.lock
 .devbox/
 bash.exe.stackdump
 # OpenTofu
 .tofu/configs/*.env
 .tofu/scripts/*.config
 .tofu/platforms/**/.terraform/
 .tofu/platforms/**/terraform.tfstate*
 .tofu/platforms/**/tfplan
 .tofu/platforms/**/.terraform.lock.hcl
--- a/.tofu/configs/aks.env.example
+++ b/.tofu/configs/aks.env.example
@@ -0,0 +1,9 @@
 # Azure AKS credentials — copy to aks.env and fill in values
 # NEVER commit aks.env to git!
 # Required
 AZURE_TENANT_ID=your-azure-tenant-id
 AZURE_SUBSCRIPTION_ID=your-azure-subscription-id
 # Optional — defaults to cluster name if not set
 ARM_RESOURCE_GROUP=
--- a/.tofu/configs/eks.env.example
+++ b/.tofu/configs/eks.env.example
@@ -0,0 +1,10 @@
 # AWS EKS credentials — copy to eks.env and fill in values
 # NEVER commit eks.env to git!
 # Required — AWS CLI profile or access key
 AWS_PROFILE=default
 AWS_REGION=eu-west-1
 # Optional — override with explicit keys instead of profile
 # AWS_ACCESS_KEY_ID=
 # AWS_SECRET_ACCESS_KEY=
--- a/.tofu/configs/gke.env.example
+++ b/.tofu/configs/gke.env.example
@@ -0,0 +1,9 @@
 # GCP GKE credentials — copy to gke.env and fill in values
 # NEVER commit gke.env to git!
 # Required
 GCP_PROJECT_ID=your-gcp-project-id
 GCP_REGION=europe-west4
 # Optional — path to service account JSON key (if not using gcloud auth)
 # GOOGLE_APPLICATION_CREDENTIALS=/path/to/sa-key.json
--- a/.tofu/configs/upc.env.example
+++ b/.tofu/configs/upc.env.example
@@ -0,0 +1,8 @@
 # UpCloud credentials — copy to upc.env and fill in values
 # NEVER commit upc.env to git!
 # Required
 UPCLOUD_TOKEN=your-upcloud-api-token
 # Optional — set after cluster creation for kubeconfig retrieval
 UPCLOUD_CLUSTER_ID=
--- a/.tofu/platforms/aks/dev/main.tf
+++ b/.tofu/platforms/aks/dev/main.tf
@@ -0,0 +1,18 @@
 module "cluster" {
  source = "../modules/cluster"
  prefix              = "clst-dev"
  location            = "norwayeast"
  resource_group_name = "clst-dev-rg"
  # AKS — small dev nodes
  aks_node_vm_size = "Standard_B2s"
  aks_node_count   = 2
  enable_delete_lock = false
  tags = {
    Environment = "dev"
    ManagedBy   = "tofu"
  }
 }
--- a/.tofu/platforms/aks/dev/outputs.tf
+++ b/.tofu/platforms/aks/dev/outputs.tf
@@ -0,0 +1,26 @@
 # ─── Cluster ─────────────────────────────────────────────────────────
 output "cluster_name" {
  value = module.cluster.cluster_name
 }
 output "resource_group_name" {
  value = module.cluster.resource_group_name
 }
 output "kubernetes_version" {
  value = module.cluster.kubernetes_version
 }
 output "location" {
  value = module.cluster.location
 }
 output "oidc_issuer_url" {
  value = module.cluster.oidc_issuer_url
 }
 output "kubeconfig" {
  value     = module.cluster.kubeconfig
  sensitive = true
 }
--- a/.tofu/platforms/aks/dev/providers.tf
+++ b/.tofu/platforms/aks/dev/providers.tf
@@ -0,0 +1,17 @@
 terraform {
  required_version = ">= 1.0"
  required_providers {
    azurerm = {
      source  = "hashicorp/azurerm"
      version = "~> 4.0"
    }
  }
 }
 provider "azurerm" {
  features {}
  # Credentials via environment variables:
  #   ARM_SUBSCRIPTION_ID, ARM_TENANT_ID, ARM_CLIENT_ID, ARM_CLIENT_SECRET
  # Or: az login (uses your Azure CLI session)
 }
--- a/.tofu/platforms/aks/modules/cluster/main.tf
+++ b/.tofu/platforms/aks/modules/cluster/main.tf
@@ -0,0 +1,72 @@
 # Current Azure/Entra ID context — provides tenant_id used in outputs
 data "azurerm_client_config" "current" {}
 # ─── Resource Group ───────────────────────────────────────────────────
 resource "azurerm_resource_group" "main" {
  name     = var.resource_group_name
  location = var.location
  tags     = var.tags
 }
 resource "azurerm_management_lock" "main" {
  count      = var.enable_delete_lock ? 1 : 0
  name       = "${var.prefix}-delete-lock"
  scope      = azurerm_resource_group.main.id
  lock_level = "CanNotDelete"
  notes      = "Prevents accidental deletion of production resources"
 }
 # ─── Networking ───────────────────────────────────────────────────────
 resource "azurerm_virtual_network" "main" {
  name                = "${var.prefix}-vnet"
  resource_group_name = azurerm_resource_group.main.name
  location            = azurerm_resource_group.main.location
  address_space       = [var.vnet_address_space]
  tags                = var.tags
 }
 # AKS nodes subnet
 resource "azurerm_subnet" "aks" {
  name                 = "${var.prefix}-aks-subnet"
  resource_group_name  = azurerm_resource_group.main.name
  virtual_network_name = azurerm_virtual_network.main.name
  address_prefixes     = [var.aks_subnet_cidr]
 }
 # ─── AKS Cluster ──────────────────────────────────────────────────────
 resource "azurerm_kubernetes_cluster" "main" {
  name                = "${var.prefix}-aks"
  resource_group_name = azurerm_resource_group.main.name
  location            = azurerm_resource_group.main.location
  dns_prefix          = replace(var.prefix, "-", "")
  kubernetes_version  = var.aks_kubernetes_version
  tags                = var.tags
  default_node_pool {
    name           = "system"
    node_count     = var.aks_node_count
    vm_size        = var.aks_node_vm_size
    vnet_subnet_id = azurerm_subnet.aks.id
    node_labels = {
      prefix = var.prefix
      role   = "worker"
      env    = lookup(var.tags, "Environment", "dev")
    }
  }
  identity {
    type = "SystemAssigned"
  }
  network_profile {
    network_plugin = "azure"
    network_policy = "azure"
  }
  # Enable Workload Identity for keyless Azure service access (MSI)
  oidc_issuer_enabled       = true
  workload_identity_enabled = true
 }
--- a/.tofu/platforms/aks/modules/cluster/outputs.tf
+++ b/.tofu/platforms/aks/modules/cluster/outputs.tf
@@ -0,0 +1,32 @@
 # ─── Cluster ─────────────────────────────────────────────────────────
 output "cluster_name" {
  description = "AKS cluster name"
  value       = azurerm_kubernetes_cluster.main.name
 }
 output "resource_group_name" {
  description = "Resource group name"
  value       = azurerm_resource_group.main.name
 }
 output "kubernetes_version" {
  description = "Kubernetes version"
  value       = azurerm_kubernetes_cluster.main.kubernetes_version
 }
 output "location" {
  description = "Azure region"
  value       = azurerm_resource_group.main.location
 }
 output "oidc_issuer_url" {
  description = "AKS OIDC issuer URL (for workload identity federation)"
  value       = azurerm_kubernetes_cluster.main.oidc_issuer_url
 }
 output "kubeconfig" {
  description = "Kubeconfig for the AKS cluster"
  value       = azurerm_kubernetes_cluster.main.kube_config_raw
  sensitive   = true
 }
--- a/.tofu/platforms/aks/modules/cluster/providers.tf
+++ b/.tofu/platforms/aks/modules/cluster/providers.tf
@@ -0,0 +1,18 @@
 terraform {
  required_version = ">= 1.0"
  required_providers {
    azurerm = {
      source  = "hashicorp/azurerm"
      version = "~> 4.0"
    }
    azuread = {
      source  = "hashicorp/azuread"
      version = "~> 3.0"
    }
    random = {
      source  = "hashicorp/random"
      version = "~> 3.0"
    }
  }
 }
--- a/.tofu/platforms/aks/modules/cluster/variables.tf
+++ b/.tofu/platforms/aks/modules/cluster/variables.tf
@@ -0,0 +1,56 @@
 # ─── Cluster ─────────────────────────────────────────────────────────
 variable "prefix" {
  description = "Prefix for resource names"
  type        = string
 }
 variable "location" {
  description = "Azure region (e.g., norwayeast, westeurope, northeurope)"
  type        = string
 }
 variable "resource_group_name" {
  description = "Name of the Azure Resource Group to create"
  type        = string
 }
 variable "vnet_address_space" {
  description = "Address space for the virtual network"
  type        = string
  default     = "10.100.0.0/16"
 }
 variable "aks_subnet_cidr" {
  description = "CIDR block for the AKS node subnet"
  type        = string
  default     = "10.100.0.0/22"
 }
 variable "aks_node_vm_size" {
  description = "VM size for AKS worker nodes (e.g., Standard_B2s, Standard_D4s_v3)"
  type        = string
 }
 variable "aks_node_count" {
  description = "Number of AKS worker nodes"
  type        = number
 }
 variable "aks_kubernetes_version" {
  description = "Kubernetes version for AKS (null = latest stable)"
  type        = string
  default     = null
 }
 variable "enable_delete_lock" {
  description = "Protect the resource group from accidental deletion"
  type        = bool
  default     = false
 }
 variable "tags" {
  description = "Tags applied to all resources"
  type        = map(string)
  default     = {}
 }
--- a/.tofu/platforms/aks/prod/main.tf
+++ b/.tofu/platforms/aks/prod/main.tf
@@ -0,0 +1,18 @@
 module "cluster" {
  source = "../modules/cluster"
  prefix              = "clst"
  location            = "westeurope"
  resource_group_name = "clst-prod-rg"
  # AKS — general-purpose nodes for production
  aks_node_vm_size = "Standard_D4s_v3"
  aks_node_count   = 3
  enable_delete_lock = true
  tags = {
    Environment = "prod"
    ManagedBy   = "tofu"
  }
 }
--- a/.tofu/platforms/aks/prod/outputs.tf
+++ b/.tofu/platforms/aks/prod/outputs.tf
@@ -0,0 +1,26 @@
 # ─── Cluster ─────────────────────────────────────────────────────────
 output "cluster_name" {
  value = module.cluster.cluster_name
 }
 output "resource_group_name" {
  value = module.cluster.resource_group_name
 }
 output "kubernetes_version" {
  value = module.cluster.kubernetes_version
 }
 output "location" {
  value = module.cluster.location
 }
 output "oidc_issuer_url" {
  value = module.cluster.oidc_issuer_url
 }
 output "kubeconfig" {
  value     = module.cluster.kubeconfig
  sensitive = true
 }
--- a/.tofu/platforms/aks/prod/providers.tf
+++ b/.tofu/platforms/aks/prod/providers.tf
@@ -0,0 +1,17 @@
 terraform {
  required_version = ">= 1.0"
  required_providers {
    azurerm = {
      source  = "hashicorp/azurerm"
      version = "~> 4.0"
    }
  }
 }
 provider "azurerm" {
  features {}
  # Credentials via environment variables:
  #   ARM_SUBSCRIPTION_ID, ARM_TENANT_ID, ARM_CLIENT_ID, ARM_CLIENT_SECRET
  # Or: az login (uses your Azure CLI session)
 }
--- a/.tofu/platforms/aks/workload/main.tf
+++ b/.tofu/platforms/aks/workload/main.tf
@@ -0,0 +1,173 @@
 # =============================================================================
 # Azure Workload Cluster
 # =============================================================================
 # A lean AKS cluster for running application workloads. No managed data
 # services — those live on the platform cluster. ArgoCD (on the platform
 # cluster) deploys apps to this cluster via the app-of-apps pattern.
 #
 # Platform components deployed by deploy-workload.sh:
 #   nginx-ingress, cert-manager, external-dns, external-secrets, alloy
 #
 # Usage:
 #   tofu init && tofu plan && tofu apply
 #   ./sync-tofu-outputs.sh --env azure-workload
 #   ./deploy-workload.sh --env azure-workload
 # =============================================================================
 variable "prefix" {
  description = "Prefix for resource names (e.g., clst-workload)"
  type        = string
  default     = "clst-workload"
 }
 variable "location" {
  description = "Azure region"
  type        = string
  default     = "norwayeast"
 }
 variable "resource_group_name" {
  description = "Name of the Azure Resource Group to create"
  type        = string
  default     = "clst-workload-rg"
 }
 variable "vnet_address_space" {
  description = "Address space for the virtual network"
  type        = string
  default     = "10.110.0.0/16"
 }
 variable "aks_subnet_cidr" {
  description = "CIDR block for the AKS node subnet"
  type        = string
  default     = "10.110.0.0/22"
 }
 variable "aks_node_vm_size" {
  description = "VM size for AKS worker nodes"
  type        = string
  default     = "Standard_B2s"
 }
 variable "aks_node_count" {
  description = "Number of AKS worker nodes"
  type        = number
  default     = 2
 }
 variable "aks_kubernetes_version" {
  description = "Kubernetes version for AKS (null = latest stable)"
  type        = string
  default     = null
 }
 variable "domain" {
  description = "Public domain name — must have an existing Azure DNS zone"
  type        = string
 }
 variable "dns_zone_resource_group" {
  description = "Resource group containing the Azure DNS zone (defaults to cluster RG)"
  type        = string
  default     = ""
 }
 variable "tags" {
  description = "Tags applied to all resources"
  type        = map(string)
  default = {
    Environment = "workload"
    ManagedBy   = "tofu"
  }
 }
 # ─── Resource Group ───────────────────────────────────────────────────
 resource "azurerm_resource_group" "main" {
  name     = var.resource_group_name
  location = var.location
  tags     = var.tags
 }
 # ─── Networking ───────────────────────────────────────────────────────
 resource "azurerm_virtual_network" "main" {
  name                = "${var.prefix}-vnet"
  resource_group_name = azurerm_resource_group.main.name
  location            = azurerm_resource_group.main.location
  address_space       = [var.vnet_address_space]
  tags                = var.tags
 }
 resource "azurerm_subnet" "aks" {
  name                 = "${var.prefix}-aks-subnet"
  resource_group_name  = azurerm_resource_group.main.name
  virtual_network_name = azurerm_virtual_network.main.name
  address_prefixes     = [var.aks_subnet_cidr]
 }
 # ─── AKS Cluster ──────────────────────────────────────────────────────
 resource "azurerm_kubernetes_cluster" "main" {
  name                = "${var.prefix}-aks"
  resource_group_name = azurerm_resource_group.main.name
  location            = azurerm_resource_group.main.location
  dns_prefix          = replace(var.prefix, "-", "")
  kubernetes_version  = var.aks_kubernetes_version
  tags                = var.tags
  default_node_pool {
    name           = "system"
    node_count     = var.aks_node_count
    vm_size        = var.aks_node_vm_size
    vnet_subnet_id = azurerm_subnet.aks.id
    node_labels = {
      prefix = var.prefix
      role   = "worker"
      env    = lookup(var.tags, "Environment", "workload")
    }
  }
  identity {
    type = "SystemAssigned"
  }
  network_profile {
    network_plugin = "azure"
    network_policy = "azure"
  }
  oidc_issuer_enabled       = true
  workload_identity_enabled = true
 }
 # ─── External-DNS Workload Identity ──────────────────────────────────
 # Allows external-dns to manage Azure DNS records for app ingresses.
 data "azurerm_dns_zone" "main" {
  name                = var.domain
  resource_group_name = var.dns_zone_resource_group != "" ? var.dns_zone_resource_group : azurerm_resource_group.main.name
 }
 resource "azurerm_user_assigned_identity" "external_dns" {
  name                = "${var.prefix}-external-dns-identity"
  resource_group_name = azurerm_resource_group.main.name
  location            = azurerm_resource_group.main.location
  tags                = var.tags
 }
 resource "azurerm_role_assignment" "external_dns_dns_contributor" {
  scope                = data.azurerm_dns_zone.main.id
  role_definition_name = "DNS Zone Contributor"
  principal_id         = azurerm_user_assigned_identity.external_dns.principal_id
 }
 resource "azurerm_federated_identity_credential" "external_dns" {
  name                = "${var.prefix}-external-dns-fedcred"
  resource_group_name = azurerm_resource_group.main.name
  parent_id           = azurerm_user_assigned_identity.external_dns.id
  audience            = ["api://AzureADTokenExchange"]
  issuer              = azurerm_kubernetes_cluster.main.oidc_issuer_url
  subject             = "system:serviceaccount:external-dns:external-dns"
 }
--- a/.tofu/platforms/aks/workload/outputs.tf
+++ b/.tofu/platforms/aks/workload/outputs.tf
@@ -0,0 +1,4 @@
 output "cluster_name"                    { value = azurerm_kubernetes_cluster.main.name }
 output "resource_group_name"             { value = azurerm_resource_group.main.name }
 output "location"                        { value = azurerm_resource_group.main.location }
 output "external_dns_identity_client_id" { value = azurerm_user_assigned_identity.external_dns.client_id }
--- a/.tofu/platforms/aks/workload/providers.tf
+++ b/.tofu/platforms/aks/workload/providers.tf
@@ -0,0 +1,21 @@
 terraform {
  required_version = ">= 1.0"
  required_providers {
    azurerm = {
      source  = "hashicorp/azurerm"
      version = "~> 4.0"
    }
    random = {
      source  = "hashicorp/random"
      version = "~> 3.0"
    }
  }
 }
 provider "azurerm" {
  features {}
  # Credentials via environment variables:
  #   ARM_SUBSCRIPTION_ID, ARM_TENANT_ID, ARM_CLIENT_ID, ARM_CLIENT_SECRET
  # Or: az login (uses your Azure CLI session)
 }
--- a/.tofu/platforms/eks/dev/main.tf
+++ b/.tofu/platforms/eks/dev/main.tf
@@ -0,0 +1,21 @@
 module "cluster" {
  source = "../modules/cluster"
  region = var.region
  prefix = "clst-dev"
  # VPC
  availability_zones = ["${var.region}a", "${var.region}b"]
  # EKS — small dev nodes
  node_instance_type = "t3.medium"
  node_count         = 2
  node_min_count     = 1
  node_max_count     = 4
  kubernetes_version = "1.30"
  tags = {
    Environment = "dev"
    ManagedBy   = "tofu"
  }
 }
--- a/.tofu/platforms/eks/dev/outputs.tf
+++ b/.tofu/platforms/eks/dev/outputs.tf
@@ -0,0 +1,5 @@
 output "cluster_name"    { value = module.cluster.cluster_name }
 output "aws_region"      { value = module.cluster.aws_region }
 output "oidc_issuer_url" { value = module.cluster.oidc_issuer_url }
 output "oidc_provider_arn" { value = module.cluster.oidc_provider_arn }
 output "vpc_id"          { value = module.cluster.vpc_id }
--- a/.tofu/platforms/eks/dev/providers.tf
+++ b/.tofu/platforms/eks/dev/providers.tf
@@ -0,0 +1,24 @@
 terraform {
  required_providers {
    aws = {
      source  = "hashicorp/aws"
      version = "~> 5.0"
    }
    tls = {
      source  = "hashicorp/tls"
      version = "~> 4.0"
    }
  }
 }
 # Authentication: set AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN
 # or configure an AWS profile: export AWS_PROFILE=clst
 provider "aws" {
  region = var.region
 }
 variable "region" {
  description = "AWS region for dev environment"
  type        = string
  default     = "eu-west-1"
 }
--- a/.tofu/platforms/eks/modules/cluster/main.tf
+++ b/.tofu/platforms/eks/modules/cluster/main.tf
@@ -0,0 +1,207 @@
 # ─── VPC ──────────────────────────────────────────────────────────────
 resource "aws_vpc" "main" {
  cidr_block           = var.vpc_cidr
  enable_dns_hostnames = true
  enable_dns_support   = true
  tags = merge(var.tags, { Name = "${var.prefix}-vpc" })
 }
 resource "aws_internet_gateway" "main" {
  vpc_id = aws_vpc.main.id
  tags   = merge(var.tags, { Name = "${var.prefix}-igw" })
 }
 # Public subnets (one per AZ) — for NAT gateways and load balancers
 resource "aws_subnet" "public" {
  count             = length(var.availability_zones)
  vpc_id            = aws_vpc.main.id
  cidr_block        = cidrsubnet(var.vpc_cidr, 4, count.index)
  availability_zone = var.availability_zones[count.index]
  map_public_ip_on_launch = true
  tags = merge(var.tags, {
    Name                                        = "${var.prefix}-public-${count.index + 1}"
    "kubernetes.io/cluster/${var.prefix}-eks"   = "shared"
    "kubernetes.io/role/elb"                    = "1"
  })
 }
 # Private subnets (one per AZ) — for EKS nodes
 resource "aws_subnet" "private" {
  count             = length(var.availability_zones)
  vpc_id            = aws_vpc.main.id
  cidr_block        = cidrsubnet(var.vpc_cidr, 4, count.index + length(var.availability_zones))
  availability_zone = var.availability_zones[count.index]
  tags = merge(var.tags, {
    Name                                        = "${var.prefix}-private-${count.index + 1}"
    "kubernetes.io/cluster/${var.prefix}-eks"   = "shared"
    "kubernetes.io/role/internal-elb"           = "1"
  })
 }
 # NAT Gateway (single, in first public subnet — use one per AZ for prod HA)
 resource "aws_eip" "nat" {
  domain = "vpc"
  tags   = merge(var.tags, { Name = "${var.prefix}-nat-eip" })
 }
 resource "aws_nat_gateway" "main" {
  allocation_id = aws_eip.nat.id
  subnet_id     = aws_subnet.public[0].id
  tags          = merge(var.tags, { Name = "${var.prefix}-nat" })
  depends_on = [aws_internet_gateway.main]
 }
 resource "aws_route_table" "public" {
  vpc_id = aws_vpc.main.id
  route {
    cidr_block = "0.0.0.0/0"
    gateway_id = aws_internet_gateway.main.id
  }
  tags = merge(var.tags, { Name = "${var.prefix}-public-rt" })
 }
 resource "aws_route_table_association" "public" {
  count          = length(var.availability_zones)
  subnet_id      = aws_subnet.public[count.index].id
  route_table_id = aws_route_table.public.id
 }
 resource "aws_route_table" "private" {
  vpc_id = aws_vpc.main.id
  route {
    cidr_block     = "0.0.0.0/0"
    nat_gateway_id = aws_nat_gateway.main.id
  }
  tags = merge(var.tags, { Name = "${var.prefix}-private-rt" })
 }
 resource "aws_route_table_association" "private" {
  count          = length(var.availability_zones)
  subnet_id      = aws_subnet.private[count.index].id
  route_table_id = aws_route_table.private.id
 }
 # ─── EKS Cluster ──────────────────────────────────────────────────────
 resource "aws_iam_role" "eks_cluster" {
  name_prefix = "${var.prefix}-eks-cluster-"
  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [{
      Action    = "sts:AssumeRole"
      Effect    = "Allow"
      Principal = { Service = "eks.amazonaws.com" }
    }]
  })
  tags = var.tags
 }
 resource "aws_iam_role_policy_attachment" "eks_cluster_policy" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy"
  role       = aws_iam_role.eks_cluster.name
 }
 resource "aws_eks_cluster" "main" {
  name     = "${var.prefix}-eks"
  role_arn = aws_iam_role.eks_cluster.arn
  version  = var.kubernetes_version
  vpc_config {
    subnet_ids              = concat(aws_subnet.private[*].id, aws_subnet.public[*].id)
    endpoint_private_access = true
    endpoint_public_access  = true
  }
  # Enable OIDC issuer for IRSA (IAM Roles for Service Accounts)
  access_config {
    authentication_mode = "API_AND_CONFIG_MAP"
  }
  tags = var.tags
  depends_on = [aws_iam_role_policy_attachment.eks_cluster_policy]
 }
 # OIDC provider — required for IRSA (IAM Roles for Service Accounts)
 data "tls_certificate" "eks" {
  url = aws_eks_cluster.main.identity[0].oidc[0].issuer
 }
 resource "aws_iam_openid_connect_provider" "eks" {
  client_id_list  = ["sts.amazonaws.com"]
  thumbprint_list = [data.tls_certificate.eks.certificates[0].sha1_fingerprint]
  url             = aws_eks_cluster.main.identity[0].oidc[0].issuer
  tags = var.tags
 }
 # EKS Node Group
 resource "aws_iam_role" "eks_nodes" {
  name_prefix = "${var.prefix}-eks-nodes-"
  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [{
      Action    = "sts:AssumeRole"
      Effect    = "Allow"
      Principal = { Service = "ec2.amazonaws.com" }
    }]
  })
  tags = var.tags
 }
 resource "aws_iam_role_policy_attachment" "eks_worker_node_policy" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy"
  role       = aws_iam_role.eks_nodes.name
 }
 resource "aws_iam_role_policy_attachment" "eks_cni_policy" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy"
  role       = aws_iam_role.eks_nodes.name
 }
 resource "aws_iam_role_policy_attachment" "eks_ecr_readonly" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"
  role       = aws_iam_role.eks_nodes.name
 }
 resource "aws_eks_node_group" "main" {
  cluster_name    = aws_eks_cluster.main.name
  node_group_name = "${var.prefix}-nodes"
  node_role_arn   = aws_iam_role.eks_nodes.arn
  subnet_ids      = aws_subnet.private[*].id
  instance_types = [var.node_instance_type]
  scaling_config {
    desired_size = var.node_count
    max_size     = var.node_max_count
    min_size     = var.node_min_count
  }
  update_config {
    max_unavailable = 1
  }
  tags = var.tags
  depends_on = [
    aws_iam_role_policy_attachment.eks_worker_node_policy,
    aws_iam_role_policy_attachment.eks_cni_policy,
    aws_iam_role_policy_attachment.eks_ecr_readonly,
  ]
 }
--- a/.tofu/platforms/eks/modules/cluster/outputs.tf
+++ b/.tofu/platforms/eks/modules/cluster/outputs.tf
@@ -0,0 +1,26 @@
 # ─── Cluster ─────────────────────────────────────────────────────────
 output "cluster_name" {
  description = "EKS cluster name"
  value       = aws_eks_cluster.main.name
 }
 output "aws_region" {
  description = "AWS region"
  value       = var.region
 }
 output "oidc_issuer_url" {
  description = "EKS OIDC issuer URL (for IRSA)"
  value       = aws_eks_cluster.main.identity[0].oidc[0].issuer
 }
 output "oidc_provider_arn" {
  description = "IAM OIDC provider ARN (for IRSA trust policies)"
  value       = aws_iam_openid_connect_provider.eks.arn
 }
 output "vpc_id" {
  description = "VPC ID"
  value       = aws_vpc.main.id
 }
--- a/.tofu/platforms/eks/modules/cluster/providers.tf
+++ b/.tofu/platforms/eks/modules/cluster/providers.tf
@@ -0,0 +1,12 @@
 terraform {
  required_providers {
    aws = {
      source  = "hashicorp/aws"
      version = "~> 5.0"
    }
    tls = {
      source  = "hashicorp/tls"
      version = "~> 4.0"
    }
  }
 }
--- a/.tofu/platforms/eks/modules/cluster/variables.tf
+++ b/.tofu/platforms/eks/modules/cluster/variables.tf
@@ -0,0 +1,61 @@
 # ─── Region ──────────────────────────────────────────────────────────
 variable "region" {
  description = "AWS region (e.g., eu-west-1, us-east-1)"
  type        = string
 }
 variable "prefix" {
  description = "Prefix for resource names (e.g., clst-dev)"
  type        = string
 }
 # ─── Networking ───────────────────────────────────────────────────────
 variable "vpc_cidr" {
  description = "VPC CIDR block"
  type        = string
  default     = "10.100.0.0/16"
 }
 variable "availability_zones" {
  description = "List of AZs for subnets (2–3 recommended)"
  type        = list(string)
 }
 # ─── EKS Cluster ─────────────────────────────────────────────────────
 variable "node_instance_type" {
  description = "EKS node instance type (e.g., t3.medium, m5.xlarge)"
  type        = string
 }
 variable "node_count" {
  description = "Desired number of EKS worker nodes"
  type        = number
 }
 variable "node_min_count" {
  description = "Minimum number of EKS worker nodes"
  type        = number
  default     = 1
 }
 variable "node_max_count" {
  description = "Maximum number of EKS worker nodes"
  type        = number
 }
 variable "kubernetes_version" {
  description = "Kubernetes version for EKS (e.g., \"1.30\")"
  type        = string
  default     = "1.30"
 }
 # ─── Tags ─────────────────────────────────────────────────────────────
 variable "tags" {
  description = "Tags applied to all resources"
  type        = map(string)
  default     = {}
 }
--- a/.tofu/platforms/eks/prod/main.tf
+++ b/.tofu/platforms/eks/prod/main.tf
@@ -0,0 +1,21 @@
 module "cluster" {
  source = "../modules/cluster"
  region = var.region
  prefix = "clst"
  # VPC
  availability_zones = ["${var.region}a", "${var.region}b", "${var.region}c"]
  # EKS — general-purpose nodes for production
  node_instance_type = "m5.xlarge"
  node_count         = 3
  node_min_count     = 3
  node_max_count     = 6
  kubernetes_version = "1.30"
  tags = {
    Environment = "prod"
    ManagedBy   = "tofu"
  }
 }
--- a/.tofu/platforms/eks/prod/outputs.tf
+++ b/.tofu/platforms/eks/prod/outputs.tf
@@ -0,0 +1,5 @@
 output "cluster_name"    { value = module.cluster.cluster_name }
 output "aws_region"      { value = module.cluster.aws_region }
 output "oidc_issuer_url" { value = module.cluster.oidc_issuer_url }
 output "oidc_provider_arn" { value = module.cluster.oidc_provider_arn }
 output "vpc_id"          { value = module.cluster.vpc_id }
--- a/.tofu/platforms/eks/prod/providers.tf
+++ b/.tofu/platforms/eks/prod/providers.tf
@@ -0,0 +1,22 @@
 terraform {
  required_providers {
    aws = {
      source  = "hashicorp/aws"
      version = "~> 5.0"
    }
    tls = {
      source  = "hashicorp/tls"
      version = "~> 4.0"
    }
  }
 }
 provider "aws" {
  region = var.region
 }
 variable "region" {
  description = "AWS region for prod environment"
  type        = string
  default     = "eu-west-1"
 }
--- a/.tofu/platforms/eks/workload/main.tf
+++ b/.tofu/platforms/eks/workload/main.tf
@@ -0,0 +1,339 @@
 # =============================================================================
 # AWS Workload Cluster
 # =============================================================================
 # A lean EKS cluster for running application workloads. No managed data
 # services — those live on the platform cluster. ArgoCD (on the platform
 # cluster) deploys apps to this cluster via the app-of-apps pattern.
 #
 # Platform components deployed by deploy-workload.sh:
 #   nginx-ingress, cert-manager, external-dns, external-secrets, alloy
 #
 # Usage:
 #   tofu init && tofu plan && tofu apply
 #   ./sync-tofu-outputs.sh --env aws-workload
 #   ./deploy-workload.sh --env aws-workload
 # =============================================================================
 variable "prefix" {
  description = "Prefix for resource names (e.g., clst-workload)"
  type        = string
  default     = "clst-workload"
 }
 variable "availability_zones" {
  description = "List of AZs for subnets"
  type        = list(string)
  default     = ["eu-west-1a", "eu-west-1b"]
 }
 variable "vpc_cidr" {
  description = "VPC CIDR block"
  type        = string
  default     = "10.110.0.0/16"
 }
 variable "node_instance_type" {
  description = "EKS node instance type"
  type        = string
  default     = "t3.medium"
 }
 variable "node_count" {
  description = "Desired number of EKS worker nodes"
  type        = number
  default     = 2
 }
 variable "node_min_count" {
  description = "Minimum number of EKS worker nodes"
  type        = number
  default     = 1
 }
 variable "node_max_count" {
  description = "Maximum number of EKS worker nodes"
  type        = number
  default     = 4
 }
 variable "kubernetes_version" {
  description = "Kubernetes version for EKS"
  type        = string
  default     = "1.30"
 }
 variable "domain" {
  description = "Public domain name — must have an existing Route53 hosted zone"
  type        = string
 }
 variable "tags" {
  description = "Tags applied to all resources"
  type        = map(string)
  default = {
    Environment = "workload"
    ManagedBy   = "tofu"
  }
 }
 # ─── VPC ──────────────────────────────────────────────────────────────
 resource "aws_vpc" "main" {
  cidr_block           = var.vpc_cidr
  enable_dns_hostnames = true
  enable_dns_support   = true
  tags = merge(var.tags, { Name = "${var.prefix}-vpc" })
 }
 resource "aws_internet_gateway" "main" {
  vpc_id = aws_vpc.main.id
  tags   = merge(var.tags, { Name = "${var.prefix}-igw" })
 }
 resource "aws_subnet" "public" {
  count             = length(var.availability_zones)
  vpc_id            = aws_vpc.main.id
  cidr_block        = cidrsubnet(var.vpc_cidr, 4, count.index)
  availability_zone = var.availability_zones[count.index]
  map_public_ip_on_launch = true
  tags = merge(var.tags, {
    Name                                        = "${var.prefix}-public-${count.index + 1}"
    "kubernetes.io/cluster/${var.prefix}-eks"   = "shared"
    "kubernetes.io/role/elb"                    = "1"
  })
 }
 resource "aws_subnet" "private" {
  count             = length(var.availability_zones)
  vpc_id            = aws_vpc.main.id
  cidr_block        = cidrsubnet(var.vpc_cidr, 4, count.index + length(var.availability_zones))
  availability_zone = var.availability_zones[count.index]
  tags = merge(var.tags, {
    Name                                        = "${var.prefix}-private-${count.index + 1}"
    "kubernetes.io/cluster/${var.prefix}-eks"   = "shared"
    "kubernetes.io/role/internal-elb"           = "1"
  })
 }
 resource "aws_eip" "nat" {
  domain = "vpc"
  tags   = merge(var.tags, { Name = "${var.prefix}-nat-eip" })
 }
 resource "aws_nat_gateway" "main" {
  allocation_id = aws_eip.nat.id
  subnet_id     = aws_subnet.public[0].id
  tags          = merge(var.tags, { Name = "${var.prefix}-nat" })
  depends_on = [aws_internet_gateway.main]
 }
 resource "aws_route_table" "public" {
  vpc_id = aws_vpc.main.id
  route {
    cidr_block = "0.0.0.0/0"
    gateway_id = aws_internet_gateway.main.id
  }
  tags = merge(var.tags, { Name = "${var.prefix}-public-rt" })
 }
 resource "aws_route_table_association" "public" {
  count          = length(var.availability_zones)
  subnet_id      = aws_subnet.public[count.index].id
  route_table_id = aws_route_table.public.id
 }
 resource "aws_route_table" "private" {
  vpc_id = aws_vpc.main.id
  route {
    cidr_block     = "0.0.0.0/0"
    nat_gateway_id = aws_nat_gateway.main.id
  }
  tags = merge(var.tags, { Name = "${var.prefix}-private-rt" })
 }
 resource "aws_route_table_association" "private" {
  count          = length(var.availability_zones)
  subnet_id      = aws_subnet.private[count.index].id
  route_table_id = aws_route_table.private.id
 }
 # ─── EKS Cluster ──────────────────────────────────────────────────────
 resource "aws_iam_role" "eks_cluster" {
  name_prefix = "${var.prefix}-eks-cluster-"
  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [{
      Action    = "sts:AssumeRole"
      Effect    = "Allow"
      Principal = { Service = "eks.amazonaws.com" }
    }]
  })
  tags = var.tags
 }
 resource "aws_iam_role_policy_attachment" "eks_cluster_policy" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy"
  role       = aws_iam_role.eks_cluster.name
 }
 resource "aws_eks_cluster" "main" {
  name     = "${var.prefix}-eks"
  role_arn = aws_iam_role.eks_cluster.arn
  version  = var.kubernetes_version
  vpc_config {
    subnet_ids              = concat(aws_subnet.private[*].id, aws_subnet.public[*].id)
    endpoint_private_access = true
    endpoint_public_access  = true
  }
  access_config {
    authentication_mode = "API_AND_CONFIG_MAP"
  }
  tags = var.tags
  depends_on = [aws_iam_role_policy_attachment.eks_cluster_policy]
 }
 # OIDC provider — required for IRSA
 data "tls_certificate" "eks" {
  url = aws_eks_cluster.main.identity[0].oidc[0].issuer
 }
 resource "aws_iam_openid_connect_provider" "eks" {
  client_id_list  = ["sts.amazonaws.com"]
  thumbprint_list = [data.tls_certificate.eks.certificates[0].sha1_fingerprint]
  url             = aws_eks_cluster.main.identity[0].oidc[0].issuer
  tags = var.tags
 }
 resource "aws_iam_role" "eks_nodes" {
  name_prefix = "${var.prefix}-eks-nodes-"
  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [{
      Action    = "sts:AssumeRole"
      Effect    = "Allow"
      Principal = { Service = "ec2.amazonaws.com" }
    }]
  })
  tags = var.tags
 }
 resource "aws_iam_role_policy_attachment" "eks_worker_node_policy" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy"
  role       = aws_iam_role.eks_nodes.name
 }
 resource "aws_iam_role_policy_attachment" "eks_cni_policy" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy"
  role       = aws_iam_role.eks_nodes.name
 }
 resource "aws_iam_role_policy_attachment" "eks_ecr_readonly" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"
  role       = aws_iam_role.eks_nodes.name
 }
 resource "aws_eks_node_group" "main" {
  cluster_name    = aws_eks_cluster.main.name
  node_group_name = "${var.prefix}-nodes"
  node_role_arn   = aws_iam_role.eks_nodes.arn
  subnet_ids      = aws_subnet.private[*].id
  instance_types = [var.node_instance_type]
  scaling_config {
    desired_size = var.node_count
    max_size     = var.node_max_count
    min_size     = var.node_min_count
  }
  update_config {
    max_unavailable = 1
  }
  tags = var.tags
  depends_on = [
    aws_iam_role_policy_attachment.eks_worker_node_policy,
    aws_iam_role_policy_attachment.eks_cni_policy,
    aws_iam_role_policy_attachment.eks_ecr_readonly,
  ]
 }
 # ─── External-DNS IRSA ───────────────────────────────────────────────
 # Allows external-dns to manage Route53 records for app ingresses.
 data "aws_route53_zone" "main" {
  name         = var.domain
  private_zone = false
 }
 data "aws_iam_policy_document" "external_dns_assume_role" {
  statement {
    effect = "Allow"
    principals {
      type        = "Federated"
      identifiers = [aws_iam_openid_connect_provider.eks.arn]
    }
    actions = ["sts:AssumeRoleWithWebIdentity"]
    condition {
      test     = "StringEquals"
      variable = "${replace(aws_iam_openid_connect_provider.eks.url, "https://", "")}:sub"
      values   = ["system:serviceaccount:external-dns:external-dns"]
    }
    condition {
      test     = "StringEquals"
      variable = "${replace(aws_iam_openid_connect_provider.eks.url, "https://", "")}:aud"
      values   = ["sts.amazonaws.com"]
    }
  }
 }
 resource "aws_iam_role" "external_dns_irsa" {
  name_prefix        = "${var.prefix}-external-dns-irsa-"
  assume_role_policy = data.aws_iam_policy_document.external_dns_assume_role.json
  tags = var.tags
 }
 data "aws_iam_policy_document" "external_dns_route53" {
  statement {
    effect    = "Allow"
    actions   = ["route53:ChangeResourceRecordSets"]
    resources = ["arn:aws:route53:::hostedzone/${data.aws_route53_zone.main.zone_id}"]
  }
  statement {
    effect  = "Allow"
    actions = ["route53:ListHostedZones", "route53:ListResourceRecordSets", "route53:ListTagsForResource"]
    resources = ["*"]
  }
 }
 resource "aws_iam_role_policy" "external_dns_route53" {
  name_prefix = "${var.prefix}-external-dns-route53-"
  role        = aws_iam_role.external_dns_irsa.id
  policy      = data.aws_iam_policy_document.external_dns_route53.json
 }
--- a/.tofu/platforms/eks/workload/outputs.tf
+++ b/.tofu/platforms/eks/workload/outputs.tf
@@ -0,0 +1,3 @@
 output "cluster_name"              { value = aws_eks_cluster.main.name }
 output "aws_region"                { value = var.region }
 output "external_dns_irsa_role_arn" { value = aws_iam_role.external_dns_irsa.arn }
--- a/.tofu/platforms/eks/workload/providers.tf
+++ b/.tofu/platforms/eks/workload/providers.tf
@@ -0,0 +1,24 @@
 terraform {
  required_providers {
    aws = {
      source  = "hashicorp/aws"
      version = "~> 5.0"
    }
    tls = {
      source  = "hashicorp/tls"
      version = "~> 4.0"
    }
  }
 }
 # Authentication: set AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN
 # or configure an AWS profile: export AWS_PROFILE=clst
 provider "aws" {
  region = var.region
 }
 variable "region" {
  description = "AWS region for the workload environment"
  type        = string
  default     = "eu-west-1"
 }
--- a/.tofu/platforms/gke/dev/main.tf
+++ b/.tofu/platforms/gke/dev/main.tf
@@ -0,0 +1,17 @@
 module "cluster" {
  source = "../modules/cluster"
  project_id = var.project_id
  region     = var.region
  prefix     = "clst-dev"
  # GKE — small dev nodes
  node_machine_type   = "e2-standard-2"
  node_count          = 2
  deletion_protection = false
  labels = {
    environment = "dev"
    managed-by  = "tofu"
  }
 }
--- a/.tofu/platforms/gke/dev/outputs.tf
+++ b/.tofu/platforms/gke/dev/outputs.tf
@@ -0,0 +1,3 @@
 output "cluster_name" { value = module.cluster.cluster_name }
 output "project_id"   { value = module.cluster.project_id }
 output "region"       { value = module.cluster.region }
--- a/.tofu/platforms/gke/dev/providers.tf
+++ b/.tofu/platforms/gke/dev/providers.tf
@@ -0,0 +1,26 @@
 terraform {
  required_providers {
    google = {
      source  = "hashicorp/google"
      version = "~> 6.0"
    }
  }
 }
 # Authentication: use Application Default Credentials (gcloud auth application-default login)
 # or set GOOGLE_APPLICATION_CREDENTIALS to a service account key file.
 provider "google" {
  project = var.project_id
  region  = var.region
 }
 variable "project_id" {
  description = "GCP project ID for the dev environment"
  type        = string
 }
 variable "region" {
  description = "GCP region"
  type        = string
  default     = "europe-west4"
 }
--- a/.tofu/platforms/gke/modules/cluster/main.tf
+++ b/.tofu/platforms/gke/modules/cluster/main.tf
@@ -0,0 +1,115 @@
 # ─── Required APIs ────────────────────────────────────────────────────
 resource "google_project_service" "compute" {
  project            = var.project_id
  service            = "compute.googleapis.com"
  disable_on_destroy = false
 }
 resource "google_project_service" "container" {
  project            = var.project_id
  service            = "container.googleapis.com"
  disable_on_destroy = false
 }
 # ─── Networking ───────────────────────────────────────────────────────
 resource "google_compute_network" "main" {
  project                 = var.project_id
  name                    = "${var.prefix}-vpc"
  auto_create_subnetworks = false
  depends_on = [google_project_service.compute]
 }
 resource "google_compute_subnetwork" "main" {
  project       = var.project_id
  name          = "${var.prefix}-subnet"
  ip_cidr_range = "10.100.0.0/22"
  region        = var.region
  network       = google_compute_network.main.id
  # Secondary ranges required for GKE VPC-native cluster
  secondary_ip_range {
    range_name    = "pods"
    ip_cidr_range = "10.200.0.0/14" # /14 = ~262k pod IPs
  }
  secondary_ip_range {
    range_name    = "services"
    ip_cidr_range = "10.204.0.0/20" # /20 = ~4k service IPs
  }
 }
 # ─── GKE Cluster ──────────────────────────────────────────────────────
 #
 # Regional cluster (3 control-plane replicas) for HA.
 # Workload Identity enabled — allows K8s service accounts to impersonate
 # Google Service Accounts for keyless access to GCP services.
 resource "google_container_cluster" "main" {
  project  = var.project_id
  name     = "${var.prefix}-gke"
  location = var.region # regional cluster
  network    = google_compute_network.main.id
  subnetwork = google_compute_subnetwork.main.id
  # VPC-native cluster with alias IP ranges
  ip_allocation_policy {
    cluster_secondary_range_name  = "pods"
    services_secondary_range_name = "services"
  }
  # Workload Identity pool — enables OIDC token projection for pods
  workload_identity_config {
    workload_pool = "${var.project_id}.svc.id.goog"
  }
  # Remove default node pool — we manage our own below
  remove_default_node_pool = true
  initial_node_count       = 1
  deletion_protection = var.deletion_protection
  dynamic "release_channel" {
    for_each = var.kubernetes_version == null ? [1] : []
    content {
      channel = "STABLE"
    }
  }
  resource_labels = var.labels
  depends_on = [google_project_service.container]
 }
 resource "google_container_node_pool" "main" {
  project    = var.project_id
  name       = "${var.prefix}-nodes"
  location   = var.region
  cluster    = google_container_cluster.main.name
  node_count = var.node_count
  node_config {
    machine_type = var.node_machine_type
    # GKE_METADATA mode is required for Workload Identity
    workload_metadata_config {
      mode = "GKE_METADATA"
    }
    oauth_scopes = [
      "https://www.googleapis.com/auth/cloud-platform",
    ]
    labels = merge(var.labels, {
      role = "worker"
    })
  }
  management {
    auto_repair  = true
    auto_upgrade = true
  }
 }
--- a/.tofu/platforms/gke/modules/cluster/outputs.tf
+++ b/.tofu/platforms/gke/modules/cluster/outputs.tf
@@ -0,0 +1,16 @@
 # ─── Cluster ─────────────────────────────────────────────────────────
 output "cluster_name" {
  description = "GKE cluster name"
  value       = google_container_cluster.main.name
 }
 output "project_id" {
  description = "GCP project ID"
  value       = var.project_id
 }
 output "region" {
  description = "GCP region"
  value       = var.region
 }
--- a/.tofu/platforms/gke/modules/cluster/providers.tf
+++ b/.tofu/platforms/gke/modules/cluster/providers.tf
@@ -0,0 +1,8 @@
 terraform {
  required_providers {
    google = {
      source  = "hashicorp/google"
      version = "~> 6.0"
    }
  }
 }
--- a/.tofu/platforms/gke/modules/cluster/variables.tf
+++ b/.tofu/platforms/gke/modules/cluster/variables.tf
@@ -0,0 +1,48 @@
 # ─── Project / Region ────────────────────────────────────────────────
 variable "project_id" {
  description = "GCP project ID"
  type        = string
 }
 variable "region" {
  description = "GCP region (e.g., europe-west4, europe-west1)"
  type        = string
 }
 variable "prefix" {
  description = "Prefix for resource names (e.g., clst-dev)"
  type        = string
 }
 # ─── GKE Cluster ─────────────────────────────────────────────────────
 variable "node_machine_type" {
  description = "GKE node machine type (e.g., e2-standard-2, e2-standard-4)"
  type        = string
 }
 variable "node_count" {
  description = "Number of nodes per zone (regional cluster spawns nodes in each zone)"
  type        = number
 }
 variable "kubernetes_version" {
  description = "GKE Kubernetes version channel (null = STABLE release channel)"
  type        = string
  default     = null
 }
 variable "deletion_protection" {
  description = "Prevent cluster deletion (set true for production)"
  type        = bool
  default     = false
 }
 # ─── Labels ──────────────────────────────────────────────────────────
 variable "labels" {
  description = "Labels applied to all resources"
  type        = map(string)
  default     = {}
 }
--- a/.tofu/platforms/gke/prod/main.tf
+++ b/.tofu/platforms/gke/prod/main.tf
@@ -0,0 +1,17 @@
 module "cluster" {
  source = "../modules/cluster"
  project_id = var.project_id
  region     = var.region
  prefix     = "clst"
  # GKE — general-purpose nodes for production
  node_machine_type   = "e2-standard-4"
  node_count          = 3
  deletion_protection = true
  labels = {
    environment = "prod"
    managed-by  = "tofu"
  }
 }
--- a/.tofu/platforms/gke/prod/outputs.tf
+++ b/.tofu/platforms/gke/prod/outputs.tf
@@ -0,0 +1,3 @@
 output "cluster_name" { value = module.cluster.cluster_name }
 output "project_id"   { value = module.cluster.project_id }
 output "region"       { value = module.cluster.region }
--- a/.tofu/platforms/gke/prod/providers.tf
+++ b/.tofu/platforms/gke/prod/providers.tf
@@ -0,0 +1,24 @@
 terraform {
  required_providers {
    google = {
      source  = "hashicorp/google"
      version = "~> 6.0"
    }
  }
 }
 provider "google" {
  project = var.project_id
  region  = var.region
 }
 variable "project_id" {
  description = "GCP project ID for the prod environment"
  type        = string
 }
 variable "region" {
  description = "GCP region"
  type        = string
  default     = "europe-west1"
 }
--- a/.tofu/platforms/gke/workload/main.tf
+++ b/.tofu/platforms/gke/workload/main.tf
@@ -0,0 +1,194 @@
 # =============================================================================
 # GCP Workload Cluster
 # =============================================================================
 # A lean GKE cluster for running application workloads. No managed data
 # services — those live on the platform cluster. ArgoCD (on the platform
 # cluster) deploys apps to this cluster via the app-of-apps pattern.
 #
 # Platform components deployed by deploy-workload.sh:
 #   nginx-ingress, cert-manager, external-dns, external-secrets, alloy
 #
 # Usage:
 #   tofu init && tofu plan && tofu apply
 #   ./sync-tofu-outputs.sh --env gcp-workload
 #   ./deploy-workload.sh --env gcp-workload
 # =============================================================================
 variable "prefix" {
  description = "Prefix for resource names (e.g., clst-workload)"
  type        = string
  default     = "clst-workload"
 }
 variable "node_machine_type" {
  description = "GKE node machine type"
  type        = string
  default     = "e2-standard-2"
 }
 variable "node_count" {
  description = "Number of nodes per zone"
  type        = number
  default     = 1
 }
 variable "kubernetes_version" {
  description = "GKE Kubernetes version (null = STABLE release channel)"
  type        = string
  default     = null
 }
 variable "deletion_protection" {
  description = "Prevent cluster deletion"
  type        = bool
  default     = false
 }
 variable "labels" {
  description = "Labels applied to all resources"
  type        = map(string)
  default = {
    environment = "workload"
    managed-by  = "tofu"
  }
 }
 # ─── Required APIs ────────────────────────────────────────────────────
 resource "google_project_service" "compute" {
  project            = var.project_id
  service            = "compute.googleapis.com"
  disable_on_destroy = false
 }
 resource "google_project_service" "container" {
  project            = var.project_id
  service            = "container.googleapis.com"
  disable_on_destroy = false
 }
 resource "google_project_service" "iam" {
  project            = var.project_id
  service            = "iam.googleapis.com"
  disable_on_destroy = false
 }
 resource "google_project_service" "dns" {
  project            = var.project_id
  service            = "dns.googleapis.com"
  disable_on_destroy = false
 }
 # ─── Networking ───────────────────────────────────────────────────────
 resource "google_compute_network" "main" {
  project                 = var.project_id
  name                    = "${var.prefix}-vpc"
  auto_create_subnetworks = false
  depends_on = [google_project_service.compute]
 }
 resource "google_compute_subnetwork" "main" {
  project       = var.project_id
  name          = "${var.prefix}-subnet"
  ip_cidr_range = "10.110.0.0/22"
  region        = var.region
  network       = google_compute_network.main.id
  secondary_ip_range {
    range_name    = "pods"
    ip_cidr_range = "10.210.0.0/14"
  }
  secondary_ip_range {
    range_name    = "services"
    ip_cidr_range = "10.214.0.0/20"
  }
 }
 # ─── GKE Cluster ──────────────────────────────────────────────────────
 resource "google_container_cluster" "main" {
  project  = var.project_id
  name     = "${var.prefix}-gke"
  location = var.region
  network    = google_compute_network.main.id
  subnetwork = google_compute_subnetwork.main.id
  ip_allocation_policy {
    cluster_secondary_range_name  = "pods"
    services_secondary_range_name = "services"
  }
  workload_identity_config {
    workload_pool = "${var.project_id}.svc.id.goog"
  }
  remove_default_node_pool = true
  initial_node_count       = 1
  deletion_protection = var.deletion_protection
  dynamic "release_channel" {
    for_each = var.kubernetes_version == null ? [1] : []
    content {
      channel = "STABLE"
    }
  }
  resource_labels = var.labels
  depends_on = [google_project_service.container]
 }
 resource "google_container_node_pool" "main" {
  project    = var.project_id
  name       = "${var.prefix}-nodes"
  location   = var.region
  cluster    = google_container_cluster.main.name
  node_count = var.node_count
  node_config {
    machine_type = var.node_machine_type
    workload_metadata_config {
      mode = "GKE_METADATA"
    }
    oauth_scopes = [
      "https://www.googleapis.com/auth/cloud-platform",
    ]
    labels = merge(var.labels, { role = "worker" })
  }
  management {
    auto_repair  = true
    auto_upgrade = true
  }
 }
 # ─── External-DNS Workload Identity ──────────────────────────────────
 # Allows external-dns to manage Cloud DNS records for app ingresses.
 resource "google_service_account" "external_dns" {
  project      = var.project_id
  account_id   = "${var.prefix}-external-dns"
  display_name = "External-DNS Service Account (Workload Identity)"
  depends_on = [google_project_service.iam]
 }
 resource "google_project_iam_member" "external_dns_dns_admin" {
  project = var.project_id
  role    = "roles/dns.admin"
  member  = "serviceAccount:${google_service_account.external_dns.email}"
 }
 resource "google_service_account_iam_member" "external_dns_workload_identity" {
  service_account_id = google_service_account.external_dns.name
  role               = "roles/iam.workloadIdentityUser"
  member             = "serviceAccount:${var.project_id}.svc.id.goog[external-dns/external-dns]"
 }
--- a/.tofu/platforms/gke/workload/outputs.tf
+++ b/.tofu/platforms/gke/workload/outputs.tf
@@ -0,0 +1,4 @@
 output "cluster_name"           { value = google_container_cluster.main.name }
 output "project_id"             { value = var.project_id }
 output "region"                 { value = var.region }
 output "external_dns_gsa_email" { value = google_service_account.external_dns.email }
--- a/.tofu/platforms/gke/workload/providers.tf
+++ b/.tofu/platforms/gke/workload/providers.tf
@@ -0,0 +1,26 @@
 terraform {
  required_providers {
    google = {
      source  = "hashicorp/google"
      version = "~> 6.0"
    }
  }
 }
 # Authentication: use Application Default Credentials (gcloud auth application-default login)
 # or set GOOGLE_APPLICATION_CREDENTIALS to a service account key file.
 provider "google" {
  project = var.project_id
  region  = var.region
 }
 variable "project_id" {
  description = "GCP project ID for the workload environment"
  type        = string
 }
 variable "region" {
  description = "GCP region"
  type        = string
  default     = "europe-west4"
 }
--- a/.tofu/platforms/upc/dev/main.tf
+++ b/.tofu/platforms/upc/dev/main.tf
@@ -0,0 +1,14 @@
 module "cluster" {
  source = "../modules/cluster"
  prefix       = "clst-dev"
  zone         = "no-svg1"
  node_plan    = "DEV-1xCPU-2GB"
  node_count   = 2
  network_cidr = "10.100.0.0/24"
  tags = {
    Environment = "dev"
    ManagedBy   = "tofu"
  }
 }
--- a/.tofu/platforms/upc/dev/outputs.tf
+++ b/.tofu/platforms/upc/dev/outputs.tf
@@ -0,0 +1,13 @@
 # ─── Cluster ─────────────────────────────────────────────────────────
 output "cluster_id" {
  value = module.cluster.cluster_id
 }
 output "cluster_name" {
  value = module.cluster.cluster_name
 }
 output "zone" {
  value = module.cluster.zone
 }
--- a/.tofu/platforms/upc/dev/providers.tf
+++ b/.tofu/platforms/upc/dev/providers.tf
@@ -0,0 +1,14 @@
 terraform {
  required_version = ">= 1.0"
  required_providers {
    upcloud = {
      source  = "UpCloudLtd/upcloud"
      version = "~> 5.0"
    }
  }
 }
 provider "upcloud" {
  # Set via environment variables: UPCLOUD_USERNAME, UPCLOUD_PASSWORD
 }
--- a/.tofu/platforms/upc/modules/cluster/main.tf
+++ b/.tofu/platforms/upc/modules/cluster/main.tf
@@ -0,0 +1,56 @@
 # Router for the private network
 resource "upcloud_router" "kubernetes" {
  name = "${var.prefix}-${var.cluster_name}-router"
 }
 # Gateway for internet connectivity
 resource "upcloud_gateway" "kubernetes" {
  name     = "${var.prefix}-${var.cluster_name}-gateway"
  zone     = var.zone
  features = ["nat"]
  router {
    id = upcloud_router.kubernetes.id
  }
 }
 # Private network for the Kubernetes cluster
 resource "upcloud_network" "kubernetes" {
  name   = "${var.prefix}-${var.cluster_name}-network"
  zone   = var.zone
  router = upcloud_router.kubernetes.id
  ip_network {
    address            = var.network_cidr
    dhcp               = true
    dhcp_default_route = true
    family             = "IPv4"
    gateway            = cidrhost(var.network_cidr, 1)
  }
  depends_on = [upcloud_gateway.kubernetes]
 }
 # Kubernetes cluster
 resource "upcloud_kubernetes_cluster" "main" {
  name                    = "${var.prefix}-${var.cluster_name}"
  zone                    = var.zone
  network                 = upcloud_network.kubernetes.id
  control_plane_ip_filter = var.control_plane_ip_filter
  private_node_groups = true
 }
 # Node group for worker nodes
 resource "upcloud_kubernetes_node_group" "workers" {
  cluster       = upcloud_kubernetes_cluster.main.id
  name          = "${var.prefix}-${var.cluster_name}-workers"
  node_count    = var.node_count
  plan          = var.node_plan
  anti_affinity = var.node_count > 1
  labels = {
    prefix  = var.prefix
    cluster = var.cluster_name
    role    = "worker"
    env     = lookup(var.tags, "Environment", "dev")
  }
 }
--- a/.tofu/platforms/upc/modules/cluster/outputs.tf
+++ b/.tofu/platforms/upc/modules/cluster/outputs.tf
@@ -0,0 +1,31 @@
 # ─── Cluster ─────────────────────────────────────────────────────────
 output "cluster_id" {
  description = "The ID of the Kubernetes cluster"
  value       = upcloud_kubernetes_cluster.main.id
 }
 output "cluster_name" {
  description = "The name of the Kubernetes cluster"
  value       = upcloud_kubernetes_cluster.main.name
 }
 output "network_id" {
  description = "The ID of the private network"
  value       = upcloud_network.kubernetes.id
 }
 output "network_cidr" {
  description = "The CIDR block of the private network"
  value       = var.network_cidr
 }
 output "kubernetes_version" {
  description = "The Kubernetes version of the cluster"
  value       = upcloud_kubernetes_cluster.main.version
 }
 output "zone" {
  description = "The zone where the cluster is deployed"
  value       = var.zone
 }
--- a/.tofu/platforms/upc/modules/cluster/providers.tf
+++ b/.tofu/platforms/upc/modules/cluster/providers.tf
@@ -0,0 +1,8 @@
 terraform {
  required_providers {
    upcloud = {
      source  = "UpCloudLtd/upcloud"
      version = "~> 5.0"
    }
  }
 }
--- a/.tofu/platforms/upc/modules/cluster/variables.tf
+++ b/.tofu/platforms/upc/modules/cluster/variables.tf
@@ -0,0 +1,44 @@
 # ─── Cluster ─────────────────────────────────────────────────────────
 variable "prefix" {
  description = "Prefix for resource names"
  type        = string
 }
 variable "cluster_name" {
  description = "Name of the Kubernetes cluster"
  type        = string
  default     = "main"
 }
 variable "zone" {
  description = "UpCloud zone"
  type        = string
 }
 variable "node_plan" {
  description = "UpCloud server plan for worker nodes"
  type        = string
 }
 variable "node_count" {
  description = "Number of worker nodes"
  type        = number
 }
 variable "network_cidr" {
  description = "CIDR block for the private network"
  type        = string
  default     = "10.100.0.0/24"
 }
 variable "control_plane_ip_filter" {
  description = "CIDRs allowed to access the K8s API"
  type        = list(string)
  default     = ["0.0.0.0/0"]
 }
 variable "tags" {
  description = "Labels to apply to resources"
  type        = map(string)
 }
--- a/.tofu/platforms/upc/prod/main.tf
+++ b/.tofu/platforms/upc/prod/main.tf
@@ -0,0 +1,16 @@
 module "cluster" {
  source = "../modules/cluster"
  prefix       = "clst"
  zone         = "de-fra1"
  node_plan    = "4xCPU-8GB"
  node_count   = 3
  network_cidr = "10.100.0.0/24"
  control_plane_ip_filter = ["0.0.0.0/0"] # TODO: restrict to known CIDRs
  tags = {
    Environment = "prod"
    ManagedBy   = "tofu"
  }
 }
--- a/.tofu/platforms/upc/prod/outputs.tf
+++ b/.tofu/platforms/upc/prod/outputs.tf
@@ -0,0 +1,13 @@
 # ─── Cluster ─────────────────────────────────────────────────────────
 output "cluster_id" {
  value = module.cluster.cluster_id
 }
 output "cluster_name" {
  value = module.cluster.cluster_name
 }
 output "zone" {
  value = module.cluster.zone
 }
--- a/.tofu/platforms/upc/prod/providers.tf
+++ b/.tofu/platforms/upc/prod/providers.tf
@@ -0,0 +1,14 @@
 terraform {
  required_version = ">= 1.0"
  required_providers {
    upcloud = {
      source  = "UpCloudLtd/upcloud"
      version = "~> 5.0"
    }
  }
 }
 provider "upcloud" {
  # Set via environment variables: UPCLOUD_USERNAME, UPCLOUD_PASSWORD
 }
--- a/.tofu/platforms/upc/workload/main.tf
+++ b/.tofu/platforms/upc/workload/main.tf
@@ -0,0 +1,116 @@
 # =============================================================================
 # UpCloud Workload Cluster
 # =============================================================================
 # A lean UCS cluster for running application workloads. No managed data
 # services — those live on the platform cluster. ArgoCD (on the platform
 # cluster) deploys apps to this cluster via the app-of-apps pattern.
 #
 # Platform components deployed by deploy-workload.sh:
 #   nginx-ingress, cert-manager, external-dns, external-secrets, alloy
 #
 # Usage:
 #   tofu init && tofu plan && tofu apply
 #   ./sync-tofu-outputs.sh --env upcloud-workload
 #   ./deploy-workload.sh --env upcloud-workload
 # =============================================================================
 variable "prefix" {
  description = "Prefix for resource names"
  type        = string
  default     = "clst-workload"
 }
 variable "zone" {
  description = "UpCloud zone"
  type        = string
  default     = "fi-hel1"
 }
 variable "node_plan" {
  description = "UpCloud server plan for worker nodes"
  type        = string
  default     = "2xCPU-4GB"
 }
 variable "node_count" {
  description = "Number of worker nodes"
  type        = number
  default     = 2
 }
 variable "network_cidr" {
  description = "CIDR block for the private network"
  type        = string
  default     = "10.110.0.0/24"
 }
 variable "control_plane_ip_filter" {
  description = "CIDRs allowed to access the K8s API"
  type        = list(string)
  default     = ["0.0.0.0/0"]
 }
 variable "tags" {
  description = "Labels to apply to resources"
  type        = map(string)
  default = {
    Environment = "workload"
    ManagedBy   = "tofu"
  }
 }
 # ─── Networking ───────────────────────────────────────────────────────
 resource "upcloud_router" "kubernetes" {
  name = "${var.prefix}-workload-router"
 }
 resource "upcloud_gateway" "kubernetes" {
  name     = "${var.prefix}-workload-gateway"
  zone     = var.zone
  features = ["nat"]
  router {
    id = upcloud_router.kubernetes.id
  }
 }
 resource "upcloud_network" "kubernetes" {
  name   = "${var.prefix}-workload-network"
  zone   = var.zone
  router = upcloud_router.kubernetes.id
  ip_network {
    address            = var.network_cidr
    dhcp               = true
    dhcp_default_route = true
    family             = "IPv4"
    gateway            = cidrhost(var.network_cidr, 1)
  }
  depends_on = [upcloud_gateway.kubernetes]
 }
 # ─── Kubernetes Cluster ───────────────────────────────────────────────
 resource "upcloud_kubernetes_cluster" "main" {
  name                    = "${var.prefix}-workload"
  zone                    = var.zone
  network                 = upcloud_network.kubernetes.id
  control_plane_ip_filter = var.control_plane_ip_filter
  private_node_groups = true
 }
 resource "upcloud_kubernetes_node_group" "workers" {
  cluster       = upcloud_kubernetes_cluster.main.id
  name          = "${var.prefix}-workload-workers"
  node_count    = var.node_count
  plan          = var.node_plan
  anti_affinity = var.node_count > 1
  labels = {
    prefix  = var.prefix
    cluster = "workload"
    role    = "worker"
    env     = lookup(var.tags, "Environment", "workload")
  }
 }
--- a/.tofu/platforms/upc/workload/outputs.tf
+++ b/.tofu/platforms/upc/workload/outputs.tf
@@ -0,0 +1,3 @@
 output "cluster_name" { value = upcloud_kubernetes_cluster.main.name }
 output "cluster_id"   { value = upcloud_kubernetes_cluster.main.id }
 output "zone"         { value = var.zone }
--- a/.tofu/platforms/upc/workload/providers.tf
+++ b/.tofu/platforms/upc/workload/providers.tf
@@ -0,0 +1,14 @@
 terraform {
  required_version = ">= 1.0"
  required_providers {
    upcloud = {
      source  = "UpCloudLtd/upcloud"
      version = "~> 5.0"
    }
  }
 }
 provider "upcloud" {
  # Set via environment variables: UPCLOUD_USERNAME, UPCLOUD_PASSWORD
 }
--- a/.tofu/scripts/get-kubeconfig.sh
+++ b/.tofu/scripts/get-kubeconfig.sh
@@ -0,0 +1,66 @@
 #!/bin/bash
 set -euo pipefail
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 TOFU_ROOT="$(dirname "$SCRIPT_DIR")"
 PROJECT_ROOT="$(dirname "$TOFU_ROOT")"
 CLUSTER="${1:?Usage: $0 <cluster> (e.g., aks-dev, eks-prod)}"
 PLATFORM="${CLUSTER%%-*}"
 ENV="${CLUSTER#*-}"
 KUBECONFIG_FILE="$PROJECT_ROOT/private/$CLUSTER/kubeconfig"
 if [[ -f "$KUBECONFIG_FILE" ]]; then
  echo "Kubeconfig already exists: $KUBECONFIG_FILE"
  echo ""
  echo "  export KUBECONFIG=$KUBECONFIG_FILE"
 else
  echo "No cached kubeconfig. Fetching from platform..."
  # Load platform credentials
  ENV_FILE="$TOFU_ROOT/configs/$PLATFORM.env"
  if [[ -f "$ENV_FILE" ]]; then
    set -a; source "$ENV_FILE"; set +a
  fi
  TOFU_DIR="$TOFU_ROOT/platforms/$PLATFORM/$ENV"
  mkdir -p "$(dirname "$KUBECONFIG_FILE")"
  case "$PLATFORM" in
    aks)
      cd "$TOFU_DIR"
      RG=$(tofu output -raw resource_group_name 2>/dev/null || echo "$CLUSTER-rg")
      NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "$CLUSTER")
      az aks get-credentials --resource-group "$RG" --name "$NAME" --file "$KUBECONFIG_FILE" --overwrite-existing
      ;;
    eks)
      cd "$TOFU_DIR"
      NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "$CLUSTER")
      REGION=$(tofu output -raw aws_region 2>/dev/null || echo "${AWS_REGION:-eu-west-1}")
      aws eks update-kubeconfig --name "$NAME" --region "$REGION" --kubeconfig "$KUBECONFIG_FILE"
      ;;
    gke)
      cd "$TOFU_DIR"
      NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "$CLUSTER")
      REGION=$(tofu output -raw region 2>/dev/null || echo "${GCP_REGION:-europe-west4}")
      PROJECT=$(tofu output -raw project_id 2>/dev/null || echo "${GCP_PROJECT_ID:-}")
      gcloud container clusters get-credentials "$NAME" --region "$REGION" --project "$PROJECT"
      cp ~/.kube/config "$KUBECONFIG_FILE"
      ;;
    upc)
      cd "$TOFU_DIR"
      CLUSTER_ID=$(tofu output -raw cluster_id 2>/dev/null || echo "${UPCLOUD_CLUSTER_ID:-}")
      upctl kubernetes config "$CLUSTER_ID" > "$KUBECONFIG_FILE"
      ;;
    *)
      echo "Error: unknown platform '$PLATFORM'"
      exit 1
      ;;
  esac
  chmod 600 "$KUBECONFIG_FILE"
  echo "Kubeconfig saved: $KUBECONFIG_FILE"
  echo ""
  echo "  export KUBECONFIG=$KUBECONFIG_FILE"
 fi
--- a/.tofu/scripts/setup-cluster.sh
+++ b/.tofu/scripts/setup-cluster.sh
@@ -0,0 +1,246 @@
 #!/bin/bash
 set -euo pipefail
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 TOFU_ROOT="$(dirname "$SCRIPT_DIR")"
 PROJECT_ROOT="$(dirname "$TOFU_ROOT")"
 # ─── Usage ────────────────────────────────────────────────────────────
 usage() {
  cat <<EOF
 Usage: $0 <cluster> [options]
  Provision a Kubernetes cluster using OpenTofu.
  Mirrors bootstrap.sh convention: cluster = <platform>-<env>
  Clusters:   aks-dev | aks-prod | eks-dev | eks-prod
              gke-dev | gke-prod | upc-dev | upc-prod
              <platform>-workload (for workload clusters)
  Options:
    --plan        Plan only, don't apply
    --destroy     Destroy the cluster (use teardown-cluster.sh instead)
    --auto        Skip confirmation prompts
    -h, --help    Show this help
  Examples:
    $0 aks-dev
    $0 eks-prod --plan
    $0 upc-dev --auto
  Prerequisites:
    - tofu, kubectl, helm installed
    - Platform credentials in .tofu/configs/<platform>.env
    - Cluster config in clusters/<cluster>.yaml
  After provisioning, run:
    ./bootstrap.sh <cluster>
 EOF
  exit "${1:-0}"
 }
 # ─── Parse arguments ──────────────────────────────────────────────────
 CLUSTER=""
 PLAN_ONLY=false
 DESTROY=false
 AUTO_APPROVE=false
 while [[ $# -gt 0 ]]; do
  case "$1" in
    --plan)       PLAN_ONLY=true; shift ;;
    --destroy)    DESTROY=true; shift ;;
    --auto)       AUTO_APPROVE=true; shift ;;
    -h|--help)    usage 0 ;;
    -*)           echo "Unknown option: $1"; usage 1 ;;
    *)
      if [[ -z "$CLUSTER" ]]; then
        CLUSTER="$1"
      else
        echo "Error: unexpected argument '$1'"
        usage 1
      fi
      shift
      ;;
  esac
 done
 [[ -z "$CLUSTER" ]] && { echo "Error: <cluster> argument required"; usage 1; }
 # ─── Map cluster → platform + env ────────────────────────────────────
 PLATFORM="${CLUSTER%%-*}"   # aks-dev → aks
 ENV="${CLUSTER#*-}"         # aks-dev → dev
 case "$PLATFORM" in
  aks|eks|gke|upc) ;;
  *) echo "Error: unknown platform '$PLATFORM'. Expected: aks, eks, gke, upc"; exit 1 ;;
 esac
 TOFU_DIR="$TOFU_ROOT/platforms/$PLATFORM/$ENV"
 if [[ ! -d "$TOFU_DIR" ]]; then
  echo "Error: tofu directory not found: $TOFU_DIR"
  echo "Available environments for $PLATFORM:"
  ls -1 "$TOFU_ROOT/platforms/$PLATFORM/" 2>/dev/null | grep -v modules || echo "  (none)"
  exit 1
 fi
 echo "========================================="
 echo "  Kubernetes Cluster Setup"
 echo "========================================="
 echo ""
 echo "  Cluster:   $CLUSTER"
 echo "  Platform:  $PLATFORM"
 echo "  Env:       $ENV"
 echo "  Tofu dir:  $TOFU_DIR"
 echo ""
 # ─── Prerequisites ────────────────────────────────────────────────────
 echo "=== Checking Prerequisites ==="
 command -v tofu    >/dev/null 2>&1 || { echo "Error: tofu is not installed."; exit 1; }
 command -v kubectl >/dev/null 2>&1 || { echo "Error: kubectl is not installed."; exit 1; }
 command -v helm    >/dev/null 2>&1 || { echo "Error: helm is not installed."; exit 1; }
 echo "  tofu, kubectl, helm: OK"
 # ─── Load platform credentials ────────────────────────────────────────
 ENV_FILE="$TOFU_ROOT/configs/$PLATFORM.env"
 if [[ -f "$ENV_FILE" ]]; then
  echo "  Loading credentials from configs/$PLATFORM.env"
  set -a
  # shellcheck disable=SC1090
  source "$ENV_FILE"
  set +a
 else
  echo "  Warning: $ENV_FILE not found — using existing environment/CLI auth"
  echo "  Copy configs/$PLATFORM.env.example → configs/$PLATFORM.env to configure"
 fi
 # ─── Load cluster config (if exists) ──────────────────────────────────
 CLUSTER_CONFIG="$PROJECT_ROOT/clusters/$CLUSTER.yaml"
 if [[ -f "$CLUSTER_CONFIG" ]]; then
  echo "  Loading cluster config from clusters/$CLUSTER.yaml"
  if command -v yq >/dev/null 2>&1; then
    eval "$(yq -r 'to_entries[] | "export CLUSTER_\(.key)=\"\(.value)\""' "$CLUSTER_CONFIG")"
    echo "  Cluster name: ${CLUSTER_clusterName:-$CLUSTER}"
  else
    echo "  Warning: yq not installed — cluster config not loaded"
  fi
 else
  echo "  Warning: $CLUSTER_CONFIG not found — using defaults"
 fi
 echo ""
 # ─── Run OpenTofu ─────────────────────────────────────────────────────
 cd "$TOFU_DIR"
 echo "=== Initializing OpenTofu ==="
 tofu init
 echo ""
 if $DESTROY; then
  echo "=== Planning Destruction ==="
  tofu plan -destroy -out=tfplan
  if ! $AUTO_APPROVE; then
    echo ""
    read -rp "DESTROY cluster $CLUSTER? This is irreversible. (yes/no) " REPLY
    [[ "$REPLY" == "yes" ]] || { echo "Cancelled."; exit 1; }
  fi
  echo "Destroying infrastructure..."
  tofu apply tfplan
  echo ""
  echo "=== Cluster $CLUSTER Destroyed ==="
 elif $PLAN_ONLY; then
  echo "=== Planning Infrastructure ==="
  tofu plan
  echo ""
  echo "=== Plan complete (--plan mode, no changes applied) ==="
 else
  echo "=== Planning Infrastructure ==="
  tofu plan -out=tfplan
  if ! $AUTO_APPROVE; then
    echo ""
    read -rp "Apply this plan for $CLUSTER? (y/n) " -n 1 REPLY
    echo
    [[ "$REPLY" =~ ^[Yy]$ ]] || { echo "Cancelled."; exit 1; }
  fi
  echo "Applying infrastructure..."
  tofu apply tfplan
  # ─── Save kubeconfig ──────────────────────────────────────────────
  KUBECONFIG_DIR="$PROJECT_ROOT/private/$CLUSTER"
  mkdir -p "$KUBECONFIG_DIR"
  KUBECONFIG_FILE="$KUBECONFIG_DIR/kubeconfig"
  echo ""
  echo "=== Saving Kubeconfig ==="
  case "$PLATFORM" in
    aks)
      if tofu output -raw kubeconfig > "$KUBECONFIG_FILE" 2>/dev/null; then
        echo "  Saved from tofu output"
      else
        echo "  Fetching from Azure CLI..."
        RG=$(tofu output -raw resource_group_name 2>/dev/null || echo "${CLUSTER_clusterName:-$CLUSTER}-rg")
        NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "${CLUSTER_clusterName:-$CLUSTER}")
        az aks get-credentials --resource-group "$RG" --name "$NAME" --file "$KUBECONFIG_FILE" --overwrite-existing
      fi
      ;;
    eks)
      NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "${CLUSTER_clusterName:-$CLUSTER}")
      REGION=$(tofu output -raw aws_region 2>/dev/null || echo "${AWS_REGION:-eu-west-1}")
      aws eks update-kubeconfig --name "$NAME" --region "$REGION" --kubeconfig "$KUBECONFIG_FILE"
      ;;
    gke)
      NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "${CLUSTER_clusterName:-$CLUSTER}")
      REGION=$(tofu output -raw region 2>/dev/null || echo "${GCP_REGION:-europe-west4}")
      PROJECT=$(tofu output -raw project_id 2>/dev/null || echo "${GCP_PROJECT_ID:-}")
      gcloud container clusters get-credentials "$NAME" --region "$REGION" --project "$PROJECT" 2>/dev/null \
        && cp ~/.kube/config "$KUBECONFIG_FILE" \
        || echo "  Warning: could not fetch kubeconfig via gcloud"
      ;;
    upc)
      if tofu output -raw kubeconfig > "$KUBECONFIG_FILE" 2>/dev/null; then
        echo "  Saved from tofu output"
      else
        CLUSTER_ID=$(tofu output -raw cluster_id 2>/dev/null || echo "${UPCLOUD_CLUSTER_ID:-}")
        if [[ -n "$CLUSTER_ID" ]]; then
          upctl kubernetes config "$CLUSTER_ID" > "$KUBECONFIG_FILE"
        else
          echo "  Warning: could not determine cluster ID for kubeconfig"
        fi
      fi
      ;;
  esac
  if [[ -f "$KUBECONFIG_FILE" ]]; then
    chmod 600 "$KUBECONFIG_FILE"
    echo "  Kubeconfig: $KUBECONFIG_FILE"
  fi
  # ─── Wait for nodes ──────────────────────────────────────────────
  echo ""
  echo "=== Waiting for Cluster Nodes ==="
  export KUBECONFIG="$KUBECONFIG_FILE"
  if kubectl wait --for=condition=Ready nodes --all --timeout=300s 2>/dev/null; then
    echo "  All nodes ready"
  else
    echo "  Warning: nodes not ready within timeout — check cluster status"
  fi
  # ─── Summary ─────────────────────────────────────────────────────
  echo ""
  echo "========================================="
  echo "  Cluster $CLUSTER Provisioned"
  echo "========================================="
  echo ""
  echo "  Kubeconfig: $KUBECONFIG_FILE"
  echo ""
  echo "  Next steps:"
  echo "    export KUBECONFIG=$KUBECONFIG_FILE"
  echo "    ./bootstrap.sh $CLUSTER"
  echo ""
 fi
--- a/.tofu/scripts/teardown-cluster.sh
+++ b/.tofu/scripts/teardown-cluster.sh
@@ -0,0 +1,7 @@
 #!/bin/bash
 set -euo pipefail
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 # Delegate to setup-cluster.sh with --destroy flag
 exec "$SCRIPT_DIR/setup-cluster.sh" "$@" --destroy
--- a/README.md
+++ b/README.md
@@ -80,8 +80,23 @@ This repository contains the complete GitOps configuration for our Kubernetes cl
 ```
 .
-├── bootstrap.sh                # Cluster initialization script
+├── bootstrap.sh                # Cluster initialization (ArgoCD + GitOps)
-├── _app-of-apps.yaml          # Root ArgoCD Application (App-of-Apps pattern)
+├── _app-of-apps-{cluster}.yaml # Root ArgoCD Application (per cluster)
 │
 ├── .tofu/                     # Infrastructure provisioning (OpenTofu)
 │   ├── platforms/             # Per-platform IaC (one dir per cloud)
 │   │   ├── aks/               # Azure AKS (modules/ + dev/ + prod/ + workload/)
 │   │   ├── eks/               # AWS EKS
 │   │   ├── gke/               # GCP GKE
 │   │   └── upc/               # UpCloud
 │   ├── configs/               # Platform credentials (git-ignored)
 │   │   └── *.env.example      # Template for each platform
 │   └── scripts/               # Cluster lifecycle scripts
 │       ├── setup-cluster.sh   # Create cluster: ./setup-cluster.sh aks-dev
 │       ├── teardown-cluster.sh
 │       └── get-kubeconfig.sh
 │
 ├── clusters/                  # Cluster metadata (domain, trustedIPs, etc.)
 │
 ├── infra/                     # Infrastructure ArgoCD Applications (Kustomize multi-cluster)
 │   ├── base/                  # Base ArgoCD Application manifests (one dir per component)
--- a/docs/GITOPS-ARCHITECTURE.md
+++ b/docs/GITOPS-ARCHITECTURE.md
@@ -115,9 +115,30 @@ This Kubernetes cluster uses a **GitOps approach** powered by **ArgoCD**, where
 ```
 launchpad/
-├── bootstrap.sh                      # Cluster initialization script
+├── bootstrap.sh                      # Cluster initialization (ArgoCD + GitOps)
-├── _app-of-apps-upc-dev.yaml        # Root ArgoCD Application (upc-dev cluster)
+├── _app-of-apps-{cluster}.yaml      # Root ArgoCD Application (per cluster)
-├── _app-of-apps-upc-prod.yaml       # Root ArgoCD Application (upc-prod cluster)
+│
 ├── .tofu/                            # Infrastructure provisioning (OpenTofu)
 │   ├── platforms/                    # Per-platform IaC
 │   │   ├── aks/                      # Azure AKS
 │   │   │   ├── modules/cluster/     # Reusable AKS module
 │   │   │   ├── dev/                 # tofu root for aks-dev
 │   │   │   ├── prod/                # tofu root for aks-prod
 │   │   │   └── workload/            # workload cluster (no data services)
 │   │   ├── eks/                      # AWS EKS (same structure)
 │   │   ├── gke/                      # GCP GKE
 │   │   └── upc/                      # UpCloud
 │   ├── configs/                      # Platform credentials (git-ignored)
 │   │   └── {platform}.env.example   # Template per platform
 │   └── scripts/
 │       ├── setup-cluster.sh          # ./setup-cluster.sh <cluster> [--plan|--auto]
 │       ├── teardown-cluster.sh       # ./teardown-cluster.sh <cluster>
 │       └── get-kubeconfig.sh         # ./get-kubeconfig.sh <cluster>
 │
 ├── clusters/                         # Cluster metadata YAML (domain, IPs, etc.)
 │   ├── aks-dev.yaml
 │   ├── upc-dev.yaml
 │   └── ...
 │
 ├── infra/                            # Infrastructure ArgoCD Applications (Kustomize)
 │   ├── base/                         # Base Application manifests (one dir per component)
--- a/docs/REFERENCE.md
+++ b/docs/REFERENCE.md
@@ -72,9 +72,22 @@ Internet
 ```
 launchpad/
-├── bootstrap.sh                   # Cluster initialization script
+├── bootstrap.sh                   # Cluster initialization (ArgoCD + GitOps)
-├── _app-of-apps-upc-dev.yaml     # Root ArgoCD Application (upc-dev)
+├── _app-of-apps-{cluster}.yaml   # Root ArgoCD Application (per cluster)
-├── _app-of-apps-upc-prod.yaml    # Root ArgoCD Application (upc-prod)
+│
 ├── .tofu/                         # Infrastructure provisioning (OpenTofu)
 │   ├── platforms/                 # Per-platform IaC
 │   │   ├── aks/                   # Azure: modules/cluster/, dev/, prod/, workload/
 │   │   ├── eks/                   # AWS: same structure
 │   │   ├── gke/                   # GCP
 │   │   └── upc/                   # UpCloud
 │   ├── configs/                   # Platform credentials (git-ignored)
 │   └── scripts/                   # setup-cluster.sh, teardown-cluster.sh, get-kubeconfig.sh
 │
 ├── clusters/                      # Cluster metadata YAML
 │   ├── aks-dev.yaml
 │   ├── upc-dev.yaml
 │   └── ...
 │
 ├── infra/                         # Infrastructure applications (Kustomize)
 │   ├── base/                      # One subdirectory per component