diff --git a/.gitignore b/.gitignore index 6f744e3..ea99964 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,11 @@ CLAUDE.md devbox.d/ devbox.lock .devbox/ -bash.exe.stackdump \ No newline at end of file +bash.exe.stackdump + +# OpenTofu +.tofu/configs/*.env +.tofu/platforms/**/.terraform/ +.tofu/platforms/**/terraform.tfstate* +.tofu/platforms/**/tfplan +.tofu/platforms/**/.terraform.lock.hcl \ No newline at end of file diff --git a/.tofu/configs/aks.env.example b/.tofu/configs/aks.env.example new file mode 100644 index 0000000..003a029 --- /dev/null +++ b/.tofu/configs/aks.env.example @@ -0,0 +1,9 @@ +# Azure AKS credentials — copy to aks.env and fill in values +# NEVER commit aks.env to git! + +# Required +AZURE_TENANT_ID=your-azure-tenant-id +AZURE_SUBSCRIPTION_ID=your-azure-subscription-id + +# Optional — defaults to cluster name if not set +ARM_RESOURCE_GROUP= diff --git a/.tofu/configs/eks.env.example b/.tofu/configs/eks.env.example new file mode 100644 index 0000000..8e5885a --- /dev/null +++ b/.tofu/configs/eks.env.example @@ -0,0 +1,10 @@ +# AWS EKS credentials — copy to eks.env and fill in values +# NEVER commit eks.env to git! + +# Required — AWS CLI profile or access key +AWS_PROFILE=default +AWS_REGION=eu-west-1 + +# Optional — override with explicit keys instead of profile +# AWS_ACCESS_KEY_ID= +# AWS_SECRET_ACCESS_KEY= diff --git a/.tofu/configs/gke.env.example b/.tofu/configs/gke.env.example new file mode 100644 index 0000000..2da5828 --- /dev/null +++ b/.tofu/configs/gke.env.example @@ -0,0 +1,9 @@ +# GCP GKE credentials — copy to gke.env and fill in values +# NEVER commit gke.env to git! + +# Required +GCP_PROJECT_ID=your-gcp-project-id +GCP_REGION=europe-west4 + +# Optional — path to service account JSON key (if not using gcloud auth) +# GOOGLE_APPLICATION_CREDENTIALS=/path/to/sa-key.json diff --git a/.tofu/configs/upc.env.example b/.tofu/configs/upc.env.example new file mode 100644 index 0000000..697f3c8 --- /dev/null +++ b/.tofu/configs/upc.env.example @@ -0,0 +1,8 @@ +# UpCloud credentials — copy to upc.env and fill in values +# NEVER commit upc.env to git! + +# Required +UPCLOUD_TOKEN=your-upcloud-api-token + +# Optional — set after cluster creation for kubeconfig retrieval +UPCLOUD_CLUSTER_ID= diff --git a/.tofu/platforms/aks/dev/main.tf b/.tofu/platforms/aks/dev/main.tf new file mode 100644 index 0000000..c1abdf1 --- /dev/null +++ b/.tofu/platforms/aks/dev/main.tf @@ -0,0 +1,37 @@ +module "cluster" { + source = "../modules/cluster" + + prefix = "devhub-dev" + location = "norwayeast" + resource_group_name = "devhub-dev-rg" + + # AKS — small dev nodes + aks_node_vm_size = "Standard_B2s" + aks_node_count = 2 + + # PostgreSQL — burstable tier for dev + pg_sku_name = "B_Standard_B1ms" + pg_version = "16" + pg_storage_mb = 32768 + pg_backup_retention_days = 7 + pg_ha_mode = "Disabled" + + # Redis — Basic C0 (250 MB) for dev + redis_sku_name = "Basic" + redis_family = "C" + redis_capacity = 0 + + # Blob storage — locally-redundant for dev + storage_replication = "LRS" + + # DNS — must match an existing Azure DNS zone + domain = "dev.example.com" # TODO: set to your actual domain + # dns_zone_resource_group = "my-dns-rg" # uncomment if DNS zone is in a separate RG + + enable_delete_lock = false + + tags = { + Environment = "dev" + ManagedBy = "tofu" + } +} diff --git a/.tofu/platforms/aks/dev/outputs.tf b/.tofu/platforms/aks/dev/outputs.tf new file mode 100644 index 0000000..0dd3101 --- /dev/null +++ b/.tofu/platforms/aks/dev/outputs.tf @@ -0,0 +1,91 @@ +# ─── Cluster ───────────────────────────────────────────────────────── + +output "cluster_name" { + value = module.cluster.cluster_name +} + +output "resource_group_name" { + value = module.cluster.resource_group_name +} + +output "kubernetes_version" { + value = module.cluster.kubernetes_version +} + +output "location" { + value = module.cluster.location +} + +# ─── PostgreSQL ─────────────────────────────────────────────────────── + +output "pg_host" { + value = module.cluster.pg_host +} + +output "pg_port" { + value = module.cluster.pg_port +} + +output "pg_admin_login" { + value = module.cluster.pg_admin_login +} + +output "pg_admin_password" { + value = module.cluster.pg_admin_password + sensitive = true +} + +output "pg_keycloak_password" { + value = module.cluster.pg_keycloak_password + sensitive = true +} + +output "pg_gitlab_password" { + value = module.cluster.pg_gitlab_password + sensitive = true +} + +# ─── Redis ──────────────────────────────────────────────────────────── + +output "redis_host" { + value = module.cluster.redis_host +} + +output "redis_port" { + value = module.cluster.redis_port +} + +output "redis_password" { + value = module.cluster.redis_password + sensitive = true +} + +# ─── Blob Storage ───────────────────────────────────────────────────── + +output "storage_account_name" { + value = module.cluster.storage_account_name +} + +output "storage_primary_access_key" { + value = module.cluster.storage_primary_access_key + sensitive = true +} + +output "gitlab_identity_client_id" { + value = module.cluster.gitlab_identity_client_id +} + +# ─── Entra ID ──────────────────────────────────────────────────────── + +output "entra_tenant_id" { + value = module.cluster.entra_tenant_id +} + +output "entra_keycloak_client_id" { + value = module.cluster.entra_keycloak_client_id +} + +output "entra_keycloak_client_secret" { + value = module.cluster.entra_keycloak_client_secret + sensitive = true +} diff --git a/.tofu/platforms/aks/dev/providers.tf b/.tofu/platforms/aks/dev/providers.tf new file mode 100644 index 0000000..e5f22fb --- /dev/null +++ b/.tofu/platforms/aks/dev/providers.tf @@ -0,0 +1,25 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "~> 4.0" + } + azuread = { + source = "hashicorp/azuread" + version = "~> 3.0" + } + random = { + source = "hashicorp/random" + version = "~> 3.0" + } + } +} + +provider "azurerm" { + features {} + # Credentials via environment variables: + # ARM_SUBSCRIPTION_ID, ARM_TENANT_ID, ARM_CLIENT_ID, ARM_CLIENT_SECRET + # Or: az login (uses your Azure CLI session) +} diff --git a/.tofu/platforms/aks/modules/cluster/main.tf b/.tofu/platforms/aks/modules/cluster/main.tf new file mode 100644 index 0000000..f01488d --- /dev/null +++ b/.tofu/platforms/aks/modules/cluster/main.tf @@ -0,0 +1,378 @@ +# Current Azure/Entra ID context — provides tenant_id used in outputs +data "azurerm_client_config" "current" {} + +# ─── Resource Group ─────────────────────────────────────────────────── + +resource "azurerm_resource_group" "main" { + name = var.resource_group_name + location = var.location + tags = var.tags +} + +resource "azurerm_management_lock" "main" { + count = var.enable_delete_lock ? 1 : 0 + name = "${var.prefix}-delete-lock" + scope = azurerm_resource_group.main.id + lock_level = "CanNotDelete" + notes = "Prevents accidental deletion of production resources" +} + +# ─── Networking ─────────────────────────────────────────────────────── + +resource "azurerm_virtual_network" "main" { + name = "${var.prefix}-vnet" + resource_group_name = azurerm_resource_group.main.name + location = azurerm_resource_group.main.location + address_space = [var.vnet_address_space] + tags = var.tags +} + +# AKS nodes subnet +resource "azurerm_subnet" "aks" { + name = "${var.prefix}-aks-subnet" + resource_group_name = azurerm_resource_group.main.name + virtual_network_name = azurerm_virtual_network.main.name + address_prefixes = [var.aks_subnet_cidr] +} + +# PostgreSQL Flexible Server requires a delegated subnet +resource "azurerm_subnet" "postgresql" { + name = "${var.prefix}-pg-subnet" + resource_group_name = azurerm_resource_group.main.name + virtual_network_name = azurerm_virtual_network.main.name + address_prefixes = [var.pg_subnet_cidr] + + delegation { + name = "postgresql-delegation" + service_delegation { + name = "Microsoft.DBforPostgreSQL/flexibleServers" + actions = ["Microsoft.Network/virtualNetworks/subnets/join/action"] + } + } +} + +# ─── AKS Cluster ────────────────────────────────────────────────────── + +resource "azurerm_kubernetes_cluster" "main" { + name = "${var.prefix}-aks" + resource_group_name = azurerm_resource_group.main.name + location = azurerm_resource_group.main.location + dns_prefix = replace(var.prefix, "-", "") + kubernetes_version = var.aks_kubernetes_version + tags = var.tags + + default_node_pool { + name = "system" + node_count = var.aks_node_count + vm_size = var.aks_node_vm_size + vnet_subnet_id = azurerm_subnet.aks.id + node_labels = { + prefix = var.prefix + role = "worker" + env = lookup(var.tags, "Environment", "dev") + } + } + + identity { + type = "SystemAssigned" + } + + network_profile { + network_plugin = "azure" + network_policy = "azure" + } + + # Enable Workload Identity for keyless Azure service access (MSI) + oidc_issuer_enabled = true + workload_identity_enabled = true +} + +# ─── PostgreSQL Flexible Server ─────────────────────────────────────── + +# Private DNS zone: required for VNet-integrated Flexible Server +resource "azurerm_private_dns_zone" "postgresql" { + name = "${replace(var.prefix, "-", "")}.postgres.database.azure.com" + resource_group_name = azurerm_resource_group.main.name + tags = var.tags +} + +resource "azurerm_private_dns_zone_virtual_network_link" "postgresql" { + name = "${var.prefix}-pg-dns-link" + private_dns_zone_name = azurerm_private_dns_zone.postgresql.name + resource_group_name = azurerm_resource_group.main.name + virtual_network_id = azurerm_virtual_network.main.id +} + +resource "random_password" "pg_admin" { + length = 32 + special = false +} + +# Passwords for application DB users. +# NOTE: The users themselves must be created post-provision — Azure PostgreSQL +# Flexible Server has no Terraform resource for local user creation. Use +# setup scripts to run: CREATE USER keycloak/gitlab WITH PASSWORD '...'; +resource "random_password" "pg_keycloak" { + length = 32 + special = false +} + +resource "random_password" "pg_gitlab" { + length = 32 + special = false +} + +resource "azurerm_postgresql_flexible_server" "main" { + name = "${var.prefix}-postgresql" + resource_group_name = azurerm_resource_group.main.name + location = azurerm_resource_group.main.location + version = var.pg_version + sku_name = var.pg_sku_name + storage_mb = var.pg_storage_mb + backup_retention_days = var.pg_backup_retention_days + administrator_login = "pgadmin" + administrator_password = random_password.pg_admin.result + delegated_subnet_id = azurerm_subnet.postgresql.id + private_dns_zone_id = azurerm_private_dns_zone.postgresql.id + tags = var.tags + + dynamic "high_availability" { + for_each = var.pg_ha_mode != "Disabled" ? [1] : [] + content { + mode = var.pg_ha_mode + standby_availability_zone = var.pg_standby_zone + } + } + + depends_on = [azurerm_private_dns_zone_virtual_network_link.postgresql] +} + +resource "azurerm_postgresql_flexible_server_database" "keycloak" { + name = "keycloak" + server_id = azurerm_postgresql_flexible_server.main.id + collation = "en_US.utf8" + charset = "UTF8" +} + +resource "azurerm_postgresql_flexible_server_database" "gitlab" { + name = "gitlabhq_production" + server_id = azurerm_postgresql_flexible_server.main.id + collation = "en_US.utf8" + charset = "UTF8" +} + +# ─── Azure Cache for Redis ──────────────────────────────────────────── + +resource "azurerm_redis_cache" "main" { + name = "${var.prefix}-redis" + resource_group_name = azurerm_resource_group.main.name + location = azurerm_resource_group.main.location + sku_name = var.redis_sku_name + family = var.redis_family + capacity = var.redis_capacity + non_ssl_port_enabled = false + minimum_tls_version = "1.2" + tags = var.tags +} + +# ─── Blob Storage ───────────────────────────────────────────────────── + +locals { + # Storage account name: 3-24 chars, lowercase alphanumeric only + storage_account_name = substr(replace(lower(var.prefix), "-", ""), 0, 19) +} + +resource "azurerm_storage_account" "main" { + name = "${local.storage_account_name}store" + resource_group_name = azurerm_resource_group.main.name + location = azurerm_resource_group.main.location + account_tier = "Standard" + account_replication_type = var.storage_replication + min_tls_version = "TLS1_2" + + # Enable hierarchical namespace for better performance (optional, uncomment for Premium) + # is_hns_enabled = false + + tags = var.tags +} + +# GitLab storage containers — native Azure Blob (provider: AzureRM) +# No S3 shim required: GitLab CE Helm chart supports Azure Blob natively +resource "azurerm_storage_container" "gitlab_artifacts" { + name = "gitlab-artifacts" + storage_account_id = azurerm_storage_account.main.id + container_access_type = "private" +} + +resource "azurerm_storage_container" "gitlab_uploads" { + name = "gitlab-uploads" + storage_account_id = azurerm_storage_account.main.id + container_access_type = "private" +} + +resource "azurerm_storage_container" "gitlab_packages" { + name = "gitlab-packages" + storage_account_id = azurerm_storage_account.main.id + container_access_type = "private" +} + +resource "azurerm_storage_container" "gitlab_lfs" { + name = "gitlab-lfs" + storage_account_id = azurerm_storage_account.main.id + container_access_type = "private" +} + +resource "azurerm_storage_container" "gitlab_registry" { + name = "gitlab-registry" + storage_account_id = azurerm_storage_account.main.id + container_access_type = "private" +} + +resource "azurerm_storage_container" "gitlab_backups" { + name = "gitlab-backups" + storage_account_id = azurerm_storage_account.main.id + container_access_type = "private" +} + +# ─── Entra ID Identity Provider for Keycloak ───────────────────────── +# +# Keycloak federates with Entra ID — users authenticate via "Sign in with +# Microsoft" through Keycloak, which remains the single OIDC issuer for all +# services. This keeps the auth layer portable across clouds (UpCloud, GCP, AWS). +# +# Three App Roles are defined (devops-admins, developers, viewers). Assign +# Entra ID users or security groups to these roles in the Azure portal or +# via the azuread_app_role_assignment resource. +# +# The redirect URI is set to a placeholder here; setup-keycloak.sh updates it +# to the real domain (https://keycloak./realms/devops/broker/entra/endpoint) +# using `az ad app update` after the domain is known. + +resource "azuread_application" "keycloak_idp" { + display_name = "${var.prefix}-keycloak-idp" + + web { + redirect_uris = ["https://placeholder.invalid/realms/devops/broker/entra/endpoint"] + } + + required_resource_access { + resource_app_id = "00000003-0000-0000-c000-000000000000" # Microsoft Graph + resource_access { + id = "37f7f235-527c-4136-accd-4a02d197296e" # openid + type = "Scope" + } + resource_access { + id = "64a6cdd6-aab1-4aaf-94b8-3cc8405e90d0" # email + type = "Scope" + } + resource_access { + id = "14dad69e-099b-42c9-810b-d002981feec1" # profile + type = "Scope" + } + } + + # App Roles map to Keycloak groups via setup-keycloak.sh IdP mappers. + # Assign Entra ID users/groups to these roles in the Azure portal. + app_role { + allowed_member_types = ["User"] + description = "Full access to DevOps platform administration" + display_name = "DevOps Admins" + enabled = true + id = "a1b2c3d4-0001-4000-8000-devopsadmins0" # stable GUID + value = "devops-admins" + } + + app_role { + allowed_member_types = ["User"] + description = "Developer access to DevOps platform services" + display_name = "Developers" + enabled = true + id = "a1b2c3d4-0002-4000-8000-developers000" # stable GUID + value = "developers" + } + + app_role { + allowed_member_types = ["User"] + description = "Read-only access to DevOps platform services" + display_name = "Viewers" + enabled = true + id = "a1b2c3d4-0003-4000-8000-viewers000000" # stable GUID + value = "viewers" + } +} + +resource "azuread_service_principal" "keycloak_idp" { + client_id = azuread_application.keycloak_idp.client_id + app_role_assignment_required = var.entra_require_assignment +} + +resource "azuread_application_password" "keycloak_idp" { + application_id = azuread_application.keycloak_idp.id + display_name = "keycloak-idp-secret" + end_date = "2099-01-01T00:00:00Z" +} + +# ─── Workload Identity for GitLab ───────────────────────────────────── +# +# Allows GitLab pods (webservice, sidekiq) to access Blob Storage without +# a storage account key. The K8s service account "gitlab" in the "gitlab" +# namespace exchanges its projected OIDC token for an Azure AD token. +# +# AKS must have oidc_issuer_enabled and workload_identity_enabled (set above). + +resource "azurerm_user_assigned_identity" "gitlab" { + name = "${var.prefix}-gitlab-identity" + resource_group_name = azurerm_resource_group.main.name + location = azurerm_resource_group.main.location + tags = var.tags +} + +# Grant the identity read/write access to all blob containers in the storage account +resource "azurerm_role_assignment" "gitlab_storage" { + scope = azurerm_storage_account.main.id + role_definition_name = "Storage Blob Data Contributor" + principal_id = azurerm_user_assigned_identity.gitlab.principal_id +} + +# Federated credential: trusts tokens from the AKS OIDC issuer for the +# "gitlab" service account in the "gitlab" namespace. +# The GitLab Helm chart creates this SA when global.serviceAccount.enabled=true. +resource "azurerm_federated_identity_credential" "gitlab" { + name = "${var.prefix}-gitlab-fedcred" + resource_group_name = azurerm_resource_group.main.name + parent_id = azurerm_user_assigned_identity.gitlab.id + audience = ["api://AzureADTokenExchange"] + issuer = azurerm_kubernetes_cluster.main.oidc_issuer_url + subject = "system:serviceaccount:gitlab:gitlab" +} + +# ─── External-DNS Workload Identity ────────────────────────────────── +# Allows external-dns to manage Azure DNS records for the cluster's domain. +# The K8s service account "external-dns/external-dns" uses the federated credential. + +data "azurerm_dns_zone" "main" { + name = var.domain + resource_group_name = var.dns_zone_resource_group != "" ? var.dns_zone_resource_group : azurerm_resource_group.main.name +} + +resource "azurerm_user_assigned_identity" "external_dns" { + name = "${var.prefix}-external-dns-identity" + resource_group_name = azurerm_resource_group.main.name + location = azurerm_resource_group.main.location + tags = var.tags +} + +resource "azurerm_role_assignment" "external_dns_dns_contributor" { + scope = data.azurerm_dns_zone.main.id + role_definition_name = "DNS Zone Contributor" + principal_id = azurerm_user_assigned_identity.external_dns.principal_id +} + +resource "azurerm_federated_identity_credential" "external_dns" { + name = "${var.prefix}-external-dns-fedcred" + resource_group_name = azurerm_resource_group.main.name + parent_id = azurerm_user_assigned_identity.external_dns.id + audience = ["api://AzureADTokenExchange"] + issuer = azurerm_kubernetes_cluster.main.oidc_issuer_url + subject = "system:serviceaccount:external-dns:external-dns" +} diff --git a/.tofu/platforms/aks/modules/cluster/outputs.tf b/.tofu/platforms/aks/modules/cluster/outputs.tf new file mode 100644 index 0000000..a7c784d --- /dev/null +++ b/.tofu/platforms/aks/modules/cluster/outputs.tf @@ -0,0 +1,117 @@ +# ─── Cluster ───────────────────────────────────────────────────────── + +output "cluster_name" { + description = "AKS cluster name" + value = azurerm_kubernetes_cluster.main.name +} + +output "resource_group_name" { + description = "Resource group name" + value = azurerm_resource_group.main.name +} + +output "kubernetes_version" { + description = "Kubernetes version" + value = azurerm_kubernetes_cluster.main.kubernetes_version +} + +output "location" { + description = "Azure region" + value = azurerm_resource_group.main.location +} + +# ─── PostgreSQL ─────────────────────────────────────────────────────── + +output "pg_host" { + description = "PostgreSQL Flexible Server FQDN (private, reachable from AKS)" + value = azurerm_postgresql_flexible_server.main.fqdn +} + +output "pg_port" { + description = "PostgreSQL port" + value = 5432 +} + +output "pg_admin_login" { + description = "PostgreSQL administrator login" + value = azurerm_postgresql_flexible_server.main.administrator_login +} + +output "pg_admin_password" { + description = "PostgreSQL administrator password" + value = random_password.pg_admin.result + sensitive = true +} + +output "pg_keycloak_password" { + description = "Pre-generated password for keycloak DB user — create user post-provision" + value = random_password.pg_keycloak.result + sensitive = true +} + +output "pg_gitlab_password" { + description = "Pre-generated password for gitlab DB user — create user post-provision" + value = random_password.pg_gitlab.result + sensitive = true +} + +# ─── Redis ──────────────────────────────────────────────────────────── + +output "redis_host" { + description = "Azure Cache for Redis hostname" + value = azurerm_redis_cache.main.hostname +} + +output "redis_port" { + description = "Redis SSL port (6380)" + value = azurerm_redis_cache.main.ssl_port +} + +output "redis_password" { + description = "Redis primary access key" + value = azurerm_redis_cache.main.primary_access_key + sensitive = true +} + +# ─── Blob Storage ───────────────────────────────────────────────────── + +output "storage_account_name" { + description = "Azure Storage Account name" + value = azurerm_storage_account.main.name +} + +output "storage_primary_access_key" { + description = "Storage Account primary access key (used for registry; main GitLab storage uses managed identity)" + value = azurerm_storage_account.main.primary_access_key + sensitive = true +} + +# ─── Entra ID ──────────────────────────────────────────────────────── + +output "entra_tenant_id" { + description = "Entra ID tenant ID — used in Keycloak IdP OIDC endpoint URLs" + value = data.azurerm_client_config.current.tenant_id +} + +output "entra_keycloak_client_id" { + description = "App Registration client ID for the Keycloak IdP" + value = azuread_application.keycloak_idp.client_id +} + +output "entra_keycloak_client_secret" { + description = "App Registration client secret for the Keycloak IdP" + value = azuread_application_password.keycloak_idp.value + sensitive = true +} + +# ─── Workload Identity ──────────────────────────────────────────────── + +output "gitlab_identity_client_id" { + description = "Client ID of the GitLab managed identity — annotate the K8s service account with this value" + value = azurerm_user_assigned_identity.gitlab.client_id +} + +output "external_dns_identity_client_id" { + description = "Client ID of the external-dns managed identity — written to config.yaml by sync-tofu-outputs.sh" + value = azurerm_user_assigned_identity.external_dns.client_id +} diff --git a/.tofu/platforms/aks/modules/cluster/providers.tf b/.tofu/platforms/aks/modules/cluster/providers.tf new file mode 100644 index 0000000..0be1288 --- /dev/null +++ b/.tofu/platforms/aks/modules/cluster/providers.tf @@ -0,0 +1,18 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "~> 4.0" + } + azuread = { + source = "hashicorp/azuread" + version = "~> 3.0" + } + random = { + source = "hashicorp/random" + version = "~> 3.0" + } + } +} diff --git a/.tofu/platforms/aks/modules/cluster/variables.tf b/.tofu/platforms/aks/modules/cluster/variables.tf new file mode 100644 index 0000000..17e8991 --- /dev/null +++ b/.tofu/platforms/aks/modules/cluster/variables.tf @@ -0,0 +1,145 @@ +# ─── Cluster ───────────────────────────────────────────────────────── + +variable "prefix" { + description = "Prefix for resource names" + type = string +} + +variable "location" { + description = "Azure region (e.g., norwayeast, westeurope, northeurope)" + type = string +} + +variable "resource_group_name" { + description = "Name of the Azure Resource Group to create" + type = string +} + +variable "vnet_address_space" { + description = "Address space for the virtual network" + type = string + default = "10.100.0.0/16" +} + +variable "aks_subnet_cidr" { + description = "CIDR block for the AKS node subnet" + type = string + default = "10.100.0.0/22" +} + +variable "pg_subnet_cidr" { + description = "CIDR block for the PostgreSQL delegated subnet" + type = string + default = "10.100.4.0/24" +} + +variable "aks_node_vm_size" { + description = "VM size for AKS worker nodes (e.g., Standard_B2s, Standard_D4s_v3)" + type = string +} + +variable "aks_node_count" { + description = "Number of AKS worker nodes" + type = number +} + +variable "aks_kubernetes_version" { + description = "Kubernetes version for AKS (null = latest stable)" + type = string + default = null +} + +variable "enable_delete_lock" { + description = "Protect the resource group from accidental deletion" + type = bool + default = false +} + +variable "tags" { + description = "Tags applied to all resources" + type = map(string) + default = {} +} + +# ─── PostgreSQL Flexible Server ─────────────────────────────────────── + +variable "pg_sku_name" { + description = "PostgreSQL Flexible Server SKU (e.g., B_Standard_B1ms, GP_Standard_D2s_v3)" + type = string +} + +variable "pg_version" { + description = "PostgreSQL major version" + type = string + default = "16" +} + +variable "pg_storage_mb" { + description = "PostgreSQL storage in MB (minimum 32768)" + type = number + default = 32768 +} + +variable "pg_backup_retention_days" { + description = "PostgreSQL backup retention in days (7-35)" + type = number + default = 7 +} + +variable "pg_ha_mode" { + description = "PostgreSQL high availability mode: Disabled or ZoneRedundant" + type = string + default = "Disabled" +} + +variable "pg_standby_zone" { + description = "Availability zone for PostgreSQL standby replica (used when pg_ha_mode = ZoneRedundant)" + type = string + default = "2" +} + +# ─── Azure Cache for Redis ──────────────────────────────────────────── + +variable "redis_sku_name" { + description = "Azure Cache for Redis SKU: Basic, Standard, or Premium" + type = string +} + +variable "redis_family" { + description = "Redis family: C (Basic/Standard) or P (Premium)" + type = string +} + +variable "redis_capacity" { + description = "Redis cache size (0-6, meaning depends on SKU/family)" + type = number +} + +# ─── Blob Storage ───────────────────────────────────────────────────── + +variable "storage_replication" { + description = "Storage account replication type (LRS, ZRS, GRS, RAGRS)" + type = string + default = "LRS" +} + +# ─── DNS ───────────────────────────────────────────────────────────── + +variable "domain" { + description = "Public domain name for the cluster (e.g., dev.example.com) — must have an existing Azure DNS zone" + type = string +} + +variable "dns_zone_resource_group" { + description = "Resource group containing the Azure DNS zone (defaults to the cluster resource group)" + type = string + default = "" +} + +# ─── Entra ID (Azure AD) ────────────────────────────────────────────── + +variable "entra_require_assignment" { + description = "Require explicit App Role assignment before users can authenticate via Entra ID" + type = bool + default = false # Set true for production to restrict access to assigned users only +} diff --git a/.tofu/platforms/aks/prod/main.tf b/.tofu/platforms/aks/prod/main.tf new file mode 100644 index 0000000..df5a107 --- /dev/null +++ b/.tofu/platforms/aks/prod/main.tf @@ -0,0 +1,40 @@ +module "cluster" { + source = "../modules/cluster" + + prefix = "devhub" + location = "westeurope" + resource_group_name = "devhub-prod-rg" + + # AKS — general-purpose nodes for production + aks_node_vm_size = "Standard_D4s_v3" + aks_node_count = 3 + + # PostgreSQL — general-purpose tier for production + pg_sku_name = "GP_Standard_D2s_v3" + pg_version = "16" + pg_storage_mb = 102400 # 100 GB + pg_backup_retention_days = 14 + pg_ha_mode = "ZoneRedundant" + pg_standby_zone = "2" + + # Redis — Standard C1 (1 GB) with replication for production + redis_sku_name = "Standard" + redis_family = "C" + redis_capacity = 1 + + # Blob storage — geo-redundant for production + storage_replication = "GRS" + + # DNS — must match an existing Azure DNS zone + domain = "example.com" # TODO: set to your actual domain + # dns_zone_resource_group = "my-dns-rg" # uncomment if DNS zone is in a separate RG + + enable_delete_lock = true + + # api_server_authorized_ip_ranges = ["0.0.0.0/0"] # TODO: restrict to known CIDRs + + tags = { + Environment = "prod" + ManagedBy = "tofu" + } +} diff --git a/.tofu/platforms/aks/prod/outputs.tf b/.tofu/platforms/aks/prod/outputs.tf new file mode 100644 index 0000000..0dd3101 --- /dev/null +++ b/.tofu/platforms/aks/prod/outputs.tf @@ -0,0 +1,91 @@ +# ─── Cluster ───────────────────────────────────────────────────────── + +output "cluster_name" { + value = module.cluster.cluster_name +} + +output "resource_group_name" { + value = module.cluster.resource_group_name +} + +output "kubernetes_version" { + value = module.cluster.kubernetes_version +} + +output "location" { + value = module.cluster.location +} + +# ─── PostgreSQL ─────────────────────────────────────────────────────── + +output "pg_host" { + value = module.cluster.pg_host +} + +output "pg_port" { + value = module.cluster.pg_port +} + +output "pg_admin_login" { + value = module.cluster.pg_admin_login +} + +output "pg_admin_password" { + value = module.cluster.pg_admin_password + sensitive = true +} + +output "pg_keycloak_password" { + value = module.cluster.pg_keycloak_password + sensitive = true +} + +output "pg_gitlab_password" { + value = module.cluster.pg_gitlab_password + sensitive = true +} + +# ─── Redis ──────────────────────────────────────────────────────────── + +output "redis_host" { + value = module.cluster.redis_host +} + +output "redis_port" { + value = module.cluster.redis_port +} + +output "redis_password" { + value = module.cluster.redis_password + sensitive = true +} + +# ─── Blob Storage ───────────────────────────────────────────────────── + +output "storage_account_name" { + value = module.cluster.storage_account_name +} + +output "storage_primary_access_key" { + value = module.cluster.storage_primary_access_key + sensitive = true +} + +output "gitlab_identity_client_id" { + value = module.cluster.gitlab_identity_client_id +} + +# ─── Entra ID ──────────────────────────────────────────────────────── + +output "entra_tenant_id" { + value = module.cluster.entra_tenant_id +} + +output "entra_keycloak_client_id" { + value = module.cluster.entra_keycloak_client_id +} + +output "entra_keycloak_client_secret" { + value = module.cluster.entra_keycloak_client_secret + sensitive = true +} diff --git a/.tofu/platforms/aks/prod/providers.tf b/.tofu/platforms/aks/prod/providers.tf new file mode 100644 index 0000000..e5f22fb --- /dev/null +++ b/.tofu/platforms/aks/prod/providers.tf @@ -0,0 +1,25 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "~> 4.0" + } + azuread = { + source = "hashicorp/azuread" + version = "~> 3.0" + } + random = { + source = "hashicorp/random" + version = "~> 3.0" + } + } +} + +provider "azurerm" { + features {} + # Credentials via environment variables: + # ARM_SUBSCRIPTION_ID, ARM_TENANT_ID, ARM_CLIENT_ID, ARM_CLIENT_SECRET + # Or: az login (uses your Azure CLI session) +} diff --git a/.tofu/platforms/aks/workload/main.tf b/.tofu/platforms/aks/workload/main.tf new file mode 100644 index 0000000..b9e5b26 --- /dev/null +++ b/.tofu/platforms/aks/workload/main.tf @@ -0,0 +1,173 @@ +# ============================================================================= +# Azure Workload Cluster +# ============================================================================= +# A lean AKS cluster for running application workloads. No managed data +# services — those live on the platform cluster. ArgoCD (on the platform +# cluster) deploys apps to this cluster via the app-of-apps pattern. +# +# Platform components deployed by deploy-workload.sh: +# nginx-ingress, cert-manager, external-dns, external-secrets, alloy +# +# Usage: +# tofu init && tofu plan && tofu apply +# ./sync-tofu-outputs.sh --env azure-workload +# ./deploy-workload.sh --env azure-workload +# ============================================================================= + +variable "prefix" { + description = "Prefix for resource names (e.g., devhub-workload)" + type = string + default = "devhub-workload" +} + +variable "location" { + description = "Azure region" + type = string + default = "norwayeast" +} + +variable "resource_group_name" { + description = "Name of the Azure Resource Group to create" + type = string + default = "devhub-workload-rg" +} + +variable "vnet_address_space" { + description = "Address space for the virtual network" + type = string + default = "10.110.0.0/16" +} + +variable "aks_subnet_cidr" { + description = "CIDR block for the AKS node subnet" + type = string + default = "10.110.0.0/22" +} + +variable "aks_node_vm_size" { + description = "VM size for AKS worker nodes" + type = string + default = "Standard_B2s" +} + +variable "aks_node_count" { + description = "Number of AKS worker nodes" + type = number + default = 2 +} + +variable "aks_kubernetes_version" { + description = "Kubernetes version for AKS (null = latest stable)" + type = string + default = null +} + +variable "domain" { + description = "Public domain name — must have an existing Azure DNS zone" + type = string +} + +variable "dns_zone_resource_group" { + description = "Resource group containing the Azure DNS zone (defaults to cluster RG)" + type = string + default = "" +} + +variable "tags" { + description = "Tags applied to all resources" + type = map(string) + default = { + Environment = "workload" + ManagedBy = "tofu" + } +} + +# ─── Resource Group ─────────────────────────────────────────────────── + +resource "azurerm_resource_group" "main" { + name = var.resource_group_name + location = var.location + tags = var.tags +} + +# ─── Networking ─────────────────────────────────────────────────────── + +resource "azurerm_virtual_network" "main" { + name = "${var.prefix}-vnet" + resource_group_name = azurerm_resource_group.main.name + location = azurerm_resource_group.main.location + address_space = [var.vnet_address_space] + tags = var.tags +} + +resource "azurerm_subnet" "aks" { + name = "${var.prefix}-aks-subnet" + resource_group_name = azurerm_resource_group.main.name + virtual_network_name = azurerm_virtual_network.main.name + address_prefixes = [var.aks_subnet_cidr] +} + +# ─── AKS Cluster ────────────────────────────────────────────────────── + +resource "azurerm_kubernetes_cluster" "main" { + name = "${var.prefix}-aks" + resource_group_name = azurerm_resource_group.main.name + location = azurerm_resource_group.main.location + dns_prefix = replace(var.prefix, "-", "") + kubernetes_version = var.aks_kubernetes_version + tags = var.tags + + default_node_pool { + name = "system" + node_count = var.aks_node_count + vm_size = var.aks_node_vm_size + vnet_subnet_id = azurerm_subnet.aks.id + node_labels = { + prefix = var.prefix + role = "worker" + env = lookup(var.tags, "Environment", "workload") + } + } + + identity { + type = "SystemAssigned" + } + + network_profile { + network_plugin = "azure" + network_policy = "azure" + } + + oidc_issuer_enabled = true + workload_identity_enabled = true +} + +# ─── External-DNS Workload Identity ────────────────────────────────── +# Allows external-dns to manage Azure DNS records for app ingresses. + +data "azurerm_dns_zone" "main" { + name = var.domain + resource_group_name = var.dns_zone_resource_group != "" ? var.dns_zone_resource_group : azurerm_resource_group.main.name +} + +resource "azurerm_user_assigned_identity" "external_dns" { + name = "${var.prefix}-external-dns-identity" + resource_group_name = azurerm_resource_group.main.name + location = azurerm_resource_group.main.location + tags = var.tags +} + +resource "azurerm_role_assignment" "external_dns_dns_contributor" { + scope = data.azurerm_dns_zone.main.id + role_definition_name = "DNS Zone Contributor" + principal_id = azurerm_user_assigned_identity.external_dns.principal_id +} + +resource "azurerm_federated_identity_credential" "external_dns" { + name = "${var.prefix}-external-dns-fedcred" + resource_group_name = azurerm_resource_group.main.name + parent_id = azurerm_user_assigned_identity.external_dns.id + audience = ["api://AzureADTokenExchange"] + issuer = azurerm_kubernetes_cluster.main.oidc_issuer_url + subject = "system:serviceaccount:external-dns:external-dns" +} diff --git a/.tofu/platforms/aks/workload/outputs.tf b/.tofu/platforms/aks/workload/outputs.tf new file mode 100644 index 0000000..45dcc2e --- /dev/null +++ b/.tofu/platforms/aks/workload/outputs.tf @@ -0,0 +1,4 @@ +output "cluster_name" { value = azurerm_kubernetes_cluster.main.name } +output "resource_group_name" { value = azurerm_resource_group.main.name } +output "location" { value = azurerm_resource_group.main.location } +output "external_dns_identity_client_id" { value = azurerm_user_assigned_identity.external_dns.client_id } diff --git a/.tofu/platforms/aks/workload/providers.tf b/.tofu/platforms/aks/workload/providers.tf new file mode 100644 index 0000000..29f7a8f --- /dev/null +++ b/.tofu/platforms/aks/workload/providers.tf @@ -0,0 +1,21 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "~> 4.0" + } + random = { + source = "hashicorp/random" + version = "~> 3.0" + } + } +} + +provider "azurerm" { + features {} + # Credentials via environment variables: + # ARM_SUBSCRIPTION_ID, ARM_TENANT_ID, ARM_CLIENT_ID, ARM_CLIENT_SECRET + # Or: az login (uses your Azure CLI session) +} diff --git a/.tofu/platforms/eks/dev/main.tf b/.tofu/platforms/eks/dev/main.tf new file mode 100644 index 0000000..983d624 --- /dev/null +++ b/.tofu/platforms/eks/dev/main.tf @@ -0,0 +1,39 @@ +module "cluster" { + source = "../modules/cluster" + + region = var.region + prefix = "devhub-dev" + + # VPC + availability_zones = ["${var.region}a", "${var.region}b"] + + # EKS — small dev nodes + node_instance_type = "t3.medium" + node_count = 2 + node_min_count = 1 + node_max_count = 4 + kubernetes_version = "1.30" + + # RDS — small burstable tier for dev + rds_instance_class = "db.t3.micro" + rds_allocated_storage = 20 + rds_multi_az = false + + # ElastiCache — small single node for dev + redis_node_type = "cache.t3.micro" + redis_num_cache_clusters = 1 + redis_automatic_failover = false + + # DNS — must match an existing Route53 hosted zone + domain = "dev.example.com" # TODO: set to your actual domain + + # Cognito — domain prefix must be globally unique + cognito_domain_prefix = "devhub-dev-devhub" # TODO: customize to avoid conflicts + + enable_deletion_protection = false + + tags = { + Environment = "dev" + ManagedBy = "tofu" + } +} diff --git a/.tofu/platforms/eks/dev/outputs.tf b/.tofu/platforms/eks/dev/outputs.tf new file mode 100644 index 0000000..e7657c5 --- /dev/null +++ b/.tofu/platforms/eks/dev/outputs.tf @@ -0,0 +1,17 @@ +output "cluster_name" { value = module.cluster.cluster_name } +output "aws_region" { value = module.cluster.aws_region } +output "pg_host" { value = module.cluster.pg_host } +output "pg_port" { value = module.cluster.pg_port } +output "pg_admin_login" { value = module.cluster.pg_admin_login } +output "pg_admin_password" { value = module.cluster.pg_admin_password; sensitive = true } +output "pg_keycloak_password" { value = module.cluster.pg_keycloak_password; sensitive = true } +output "pg_gitlab_password" { value = module.cluster.pg_gitlab_password; sensitive = true } +output "redis_host" { value = module.cluster.redis_host } +output "redis_port" { value = module.cluster.redis_port } +output "gitlab_s3_bucket_prefix" { value = module.cluster.gitlab_s3_bucket_prefix } +output "gitlab_irsa_role_arn" { value = module.cluster.gitlab_irsa_role_arn } +output "cognito_user_pool_id" { value = module.cluster.cognito_user_pool_id } +output "cognito_issuer_url" { value = module.cluster.cognito_issuer_url } +output "cognito_hosted_ui_domain" { value = module.cluster.cognito_hosted_ui_domain } +output "cognito_client_id" { value = module.cluster.cognito_client_id } +output "cognito_client_secret" { value = module.cluster.cognito_client_secret; sensitive = true } diff --git a/.tofu/platforms/eks/dev/providers.tf b/.tofu/platforms/eks/dev/providers.tf new file mode 100644 index 0000000..7613256 --- /dev/null +++ b/.tofu/platforms/eks/dev/providers.tf @@ -0,0 +1,28 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + tls = { + source = "hashicorp/tls" + version = "~> 4.0" + } + random = { + source = "hashicorp/random" + version = "~> 3.0" + } + } +} + +# Authentication: set AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN +# or configure an AWS profile: export AWS_PROFILE=devhub +provider "aws" { + region = var.region +} + +variable "region" { + description = "AWS region for dev environment" + type = string + default = "eu-west-1" +} diff --git a/.tofu/platforms/eks/modules/cluster/main.tf b/.tofu/platforms/eks/modules/cluster/main.tf new file mode 100644 index 0000000..187a15f --- /dev/null +++ b/.tofu/platforms/eks/modules/cluster/main.tf @@ -0,0 +1,703 @@ +# ─── VPC ────────────────────────────────────────────────────────────── + +resource "aws_vpc" "main" { + cidr_block = var.vpc_cidr + enable_dns_hostnames = true + enable_dns_support = true + + tags = merge(var.tags, { Name = "${var.prefix}-vpc" }) +} + +resource "aws_internet_gateway" "main" { + vpc_id = aws_vpc.main.id + tags = merge(var.tags, { Name = "${var.prefix}-igw" }) +} + +# Public subnets (one per AZ) — for NAT gateways and load balancers +resource "aws_subnet" "public" { + count = length(var.availability_zones) + vpc_id = aws_vpc.main.id + cidr_block = cidrsubnet(var.vpc_cidr, 4, count.index) + availability_zone = var.availability_zones[count.index] + + map_public_ip_on_launch = true + + tags = merge(var.tags, { + Name = "${var.prefix}-public-${count.index + 1}" + "kubernetes.io/cluster/${var.prefix}-eks" = "shared" + "kubernetes.io/role/elb" = "1" + }) +} + +# Private subnets (one per AZ) — for EKS nodes, RDS, ElastiCache +resource "aws_subnet" "private" { + count = length(var.availability_zones) + vpc_id = aws_vpc.main.id + cidr_block = cidrsubnet(var.vpc_cidr, 4, count.index + length(var.availability_zones)) + availability_zone = var.availability_zones[count.index] + + tags = merge(var.tags, { + Name = "${var.prefix}-private-${count.index + 1}" + "kubernetes.io/cluster/${var.prefix}-eks" = "shared" + "kubernetes.io/role/internal-elb" = "1" + }) +} + +# NAT Gateway (single, in first public subnet — use one per AZ for prod HA) +resource "aws_eip" "nat" { + domain = "vpc" + tags = merge(var.tags, { Name = "${var.prefix}-nat-eip" }) +} + +resource "aws_nat_gateway" "main" { + allocation_id = aws_eip.nat.id + subnet_id = aws_subnet.public[0].id + tags = merge(var.tags, { Name = "${var.prefix}-nat" }) + + depends_on = [aws_internet_gateway.main] +} + +resource "aws_route_table" "public" { + vpc_id = aws_vpc.main.id + + route { + cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.main.id + } + + tags = merge(var.tags, { Name = "${var.prefix}-public-rt" }) +} + +resource "aws_route_table_association" "public" { + count = length(var.availability_zones) + subnet_id = aws_subnet.public[count.index].id + route_table_id = aws_route_table.public.id +} + +resource "aws_route_table" "private" { + vpc_id = aws_vpc.main.id + + route { + cidr_block = "0.0.0.0/0" + nat_gateway_id = aws_nat_gateway.main.id + } + + tags = merge(var.tags, { Name = "${var.prefix}-private-rt" }) +} + +resource "aws_route_table_association" "private" { + count = length(var.availability_zones) + subnet_id = aws_subnet.private[count.index].id + route_table_id = aws_route_table.private.id +} + +# ─── Security Groups ────────────────────────────────────────────────── + +resource "aws_security_group" "rds" { + name_prefix = "${var.prefix}-rds-" + description = "Allow PostgreSQL access from within VPC" + vpc_id = aws_vpc.main.id + + ingress { + description = "PostgreSQL from VPC" + from_port = 5432 + to_port = 5432 + protocol = "tcp" + cidr_blocks = [var.vpc_cidr] + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + + tags = merge(var.tags, { Name = "${var.prefix}-rds-sg" }) + + lifecycle { + create_before_destroy = true + } +} + +resource "aws_security_group" "redis" { + name_prefix = "${var.prefix}-redis-" + description = "Allow Redis access from within VPC" + vpc_id = aws_vpc.main.id + + ingress { + description = "Redis from VPC" + from_port = 6379 + to_port = 6379 + protocol = "tcp" + cidr_blocks = [var.vpc_cidr] + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + + tags = merge(var.tags, { Name = "${var.prefix}-redis-sg" }) + + lifecycle { + create_before_destroy = true + } +} + +# ─── EKS Cluster ────────────────────────────────────────────────────── + +resource "aws_iam_role" "eks_cluster" { + name_prefix = "${var.prefix}-eks-cluster-" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Action = "sts:AssumeRole" + Effect = "Allow" + Principal = { Service = "eks.amazonaws.com" } + }] + }) + + tags = var.tags +} + +resource "aws_iam_role_policy_attachment" "eks_cluster_policy" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy" + role = aws_iam_role.eks_cluster.name +} + +resource "aws_eks_cluster" "main" { + name = "${var.prefix}-eks" + role_arn = aws_iam_role.eks_cluster.arn + version = var.kubernetes_version + + vpc_config { + subnet_ids = concat(aws_subnet.private[*].id, aws_subnet.public[*].id) + endpoint_private_access = true + endpoint_public_access = true + } + + # Enable OIDC issuer for IRSA (IAM Roles for Service Accounts) + access_config { + authentication_mode = "API_AND_CONFIG_MAP" + } + + tags = var.tags + + depends_on = [aws_iam_role_policy_attachment.eks_cluster_policy] +} + +# OIDC provider — required for IRSA (IAM Roles for Service Accounts) +data "tls_certificate" "eks" { + url = aws_eks_cluster.main.identity[0].oidc[0].issuer +} + +resource "aws_iam_openid_connect_provider" "eks" { + client_id_list = ["sts.amazonaws.com"] + thumbprint_list = [data.tls_certificate.eks.certificates[0].sha1_fingerprint] + url = aws_eks_cluster.main.identity[0].oidc[0].issuer + + tags = var.tags +} + +# EKS Node Group + +resource "aws_iam_role" "eks_nodes" { + name_prefix = "${var.prefix}-eks-nodes-" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Action = "sts:AssumeRole" + Effect = "Allow" + Principal = { Service = "ec2.amazonaws.com" } + }] + }) + + tags = var.tags +} + +resource "aws_iam_role_policy_attachment" "eks_worker_node_policy" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy" + role = aws_iam_role.eks_nodes.name +} + +resource "aws_iam_role_policy_attachment" "eks_cni_policy" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy" + role = aws_iam_role.eks_nodes.name +} + +resource "aws_iam_role_policy_attachment" "eks_ecr_readonly" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly" + role = aws_iam_role.eks_nodes.name +} + +resource "aws_eks_node_group" "main" { + cluster_name = aws_eks_cluster.main.name + node_group_name = "${var.prefix}-nodes" + node_role_arn = aws_iam_role.eks_nodes.arn + subnet_ids = aws_subnet.private[*].id + + instance_types = [var.node_instance_type] + + scaling_config { + desired_size = var.node_count + max_size = var.node_max_count + min_size = var.node_min_count + } + + update_config { + max_unavailable = 1 + } + + tags = var.tags + + depends_on = [ + aws_iam_role_policy_attachment.eks_worker_node_policy, + aws_iam_role_policy_attachment.eks_cni_policy, + aws_iam_role_policy_attachment.eks_ecr_readonly, + ] +} + +# ─── RDS PostgreSQL ──────────────────────────────────────────────────── +# +# NOTE: RDS doesn't support Terraform-managed local user creation. +# Create users post-provision via psql: +# kubectl run pg-init --rm -it --image=postgres:16 -- psql -h -U pgadmin + +resource "aws_db_subnet_group" "main" { + name_prefix = "${var.prefix}-pg-" + subnet_ids = aws_subnet.private[*].id + + tags = merge(var.tags, { Name = "${var.prefix}-pg-subnet-group" }) +} + +resource "random_password" "pg_admin" { + length = 32 + special = false +} + +resource "random_password" "pg_keycloak" { + length = 32 + special = false +} + +resource "random_password" "pg_gitlab" { + length = 32 + special = false +} + +resource "aws_db_instance" "main" { + identifier = "${var.prefix}-postgresql" + engine = "postgres" + engine_version = "16" + instance_class = var.rds_instance_class + allocated_storage = var.rds_allocated_storage + storage_type = "gp3" + storage_encrypted = true + + db_name = "postgres" + username = "pgadmin" + password = random_password.pg_admin.result + + db_subnet_group_name = aws_db_subnet_group.main.name + vpc_security_group_ids = [aws_security_group.rds.id] + + multi_az = var.rds_multi_az + deletion_protection = var.enable_deletion_protection + skip_final_snapshot = !var.enable_deletion_protection + + tags = var.tags +} + +# ─── ElastiCache Redis ──────────────────────────────────────────────── +# +# In-VPC Redis with no TLS/auth — security via security group (VPC-only access). +# For production, consider enabling transit_encryption_enabled + auth_token. + +resource "aws_elasticache_subnet_group" "main" { + name_prefix = "${var.prefix}-redis-" + subnet_ids = aws_subnet.private[*].id + + tags = var.tags +} + +resource "aws_elasticache_replication_group" "main" { + replication_group_id = "${var.prefix}-redis" + description = "Redis for DevHub ${var.prefix}" + node_type = var.redis_node_type + num_cache_clusters = var.redis_num_cache_clusters + automatic_failover_enabled = var.redis_automatic_failover + engine_version = "7.0" + port = 6379 + + subnet_group_name = aws_elasticache_subnet_group.main.name + security_group_ids = [aws_security_group.redis.id] + + at_rest_encryption_enabled = true + + tags = var.tags +} + +# ─── S3 Buckets (GitLab Object Storage) ────────────────────────────── +# +# GitLab supports S3 natively — no shim needed. +# IRSA (IAM Role for Service Accounts) provides keyless access. +# NOTE: S3 bucket names are globally unique. Adjust var.prefix if conflicts arise. + +locals { + s3_bucket_prefix = "${var.prefix}-gitlab" +} + +resource "aws_s3_bucket" "gitlab_artifacts" { + bucket = "${local.s3_bucket_prefix}-artifacts" + force_destroy = true + tags = var.tags +} + +resource "aws_s3_bucket" "gitlab_uploads" { + bucket = "${local.s3_bucket_prefix}-uploads" + force_destroy = true + tags = var.tags +} + +resource "aws_s3_bucket" "gitlab_packages" { + bucket = "${local.s3_bucket_prefix}-packages" + force_destroy = true + tags = var.tags +} + +resource "aws_s3_bucket" "gitlab_lfs" { + bucket = "${local.s3_bucket_prefix}-lfs" + force_destroy = true + tags = var.tags +} + +resource "aws_s3_bucket" "gitlab_registry" { + bucket = "${local.s3_bucket_prefix}-registry" + force_destroy = true + tags = var.tags +} + +resource "aws_s3_bucket" "gitlab_backups" { + bucket = "${local.s3_bucket_prefix}-backups" + force_destroy = true + tags = var.tags +} + +# Block public access on all GitLab buckets +resource "aws_s3_bucket_public_access_block" "gitlab_artifacts" { + bucket = aws_s3_bucket.gitlab_artifacts.id + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +resource "aws_s3_bucket_public_access_block" "gitlab_uploads" { + bucket = aws_s3_bucket.gitlab_uploads.id + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +resource "aws_s3_bucket_public_access_block" "gitlab_packages" { + bucket = aws_s3_bucket.gitlab_packages.id + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +resource "aws_s3_bucket_public_access_block" "gitlab_lfs" { + bucket = aws_s3_bucket.gitlab_lfs.id + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +resource "aws_s3_bucket_public_access_block" "gitlab_registry" { + bucket = aws_s3_bucket.gitlab_registry.id + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +resource "aws_s3_bucket_public_access_block" "gitlab_backups" { + bucket = aws_s3_bucket.gitlab_backups.id + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +# Server-side encryption for all buckets +resource "aws_s3_bucket_server_side_encryption_configuration" "gitlab_artifacts" { + bucket = aws_s3_bucket.gitlab_artifacts.id + rule { apply_server_side_encryption_by_default { sse_algorithm = "AES256" } } +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "gitlab_uploads" { + bucket = aws_s3_bucket.gitlab_uploads.id + rule { apply_server_side_encryption_by_default { sse_algorithm = "AES256" } } +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "gitlab_packages" { + bucket = aws_s3_bucket.gitlab_packages.id + rule { apply_server_side_encryption_by_default { sse_algorithm = "AES256" } } +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "gitlab_lfs" { + bucket = aws_s3_bucket.gitlab_lfs.id + rule { apply_server_side_encryption_by_default { sse_algorithm = "AES256" } } +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "gitlab_registry" { + bucket = aws_s3_bucket.gitlab_registry.id + rule { apply_server_side_encryption_by_default { sse_algorithm = "AES256" } } +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "gitlab_backups" { + bucket = aws_s3_bucket.gitlab_backups.id + rule { apply_server_side_encryption_by_default { sse_algorithm = "AES256" } } +} + +# ─── Cognito User Pool (IdP for Keycloak) ──────────────────────────── +# +# Keycloak federates with Cognito — users authenticate via "Sign in with AWS" +# through Keycloak, which remains the single OIDC issuer for all services. +# +# Three Cognito groups map to Keycloak groups via setup-keycloak.sh IdP mappers. +# The token's `cognito:groups` claim is an array — assign users to Cognito groups +# in the AWS console or via `aws cognito-idp admin-add-user-to-group`. +# +# NOTE: var.cognito_domain_prefix must be globally unique across ALL AWS accounts. + +resource "aws_cognito_user_pool" "main" { + name = "${var.prefix}-devhub" + + username_attributes = ["email"] + auto_verified_attributes = ["email"] + + admin_create_user_config { + allow_admin_create_user_only = true # admins create users; disable for self-signup + } + + password_policy { + minimum_length = 12 + require_lowercase = true + require_numbers = true + require_symbols = false + require_uppercase = true + temporary_password_validity_days = 7 + } + + schema { + attribute_data_type = "String" + name = "email" + required = true + mutable = true + string_attribute_constraints { + min_length = 5 + max_length = 254 + } + } + + tags = var.tags +} + +resource "aws_cognito_user_pool_client" "keycloak_idp" { + name = "${var.prefix}-keycloak-idp" + user_pool_id = aws_cognito_user_pool.main.id + + generate_secret = true + prevent_user_existence_errors = "ENABLED" + allowed_oauth_flows_user_pool_client = true + allowed_oauth_flows = ["code"] + allowed_oauth_scopes = ["openid", "email", "profile"] + supported_identity_providers = ["COGNITO"] + + # Placeholder redirect URI — updated by setup-keycloak.sh via AWS CLI + callback_urls = ["https://placeholder.invalid/realms/devops/broker/aws-cognito/endpoint"] + + refresh_token_validity = 30 + access_token_validity = 60 + id_token_validity = 60 + + token_validity_units { + refresh_token = "days" + access_token = "minutes" + id_token = "minutes" + } +} + +resource "aws_cognito_user_pool_domain" "main" { + # Must be globally unique. Customize var.cognito_domain_prefix to avoid conflicts. + domain = var.cognito_domain_prefix + user_pool_id = aws_cognito_user_pool.main.id +} + +# Cognito groups — assign users to these groups to grant platform access. +# The token's `cognito:groups` claim maps to Keycloak groups via IdP mappers. +resource "aws_cognito_user_group" "devops_admins" { + name = "devops-admins" + user_pool_id = aws_cognito_user_pool.main.id + description = "Full access to DevOps platform administration" +} + +resource "aws_cognito_user_group" "developers" { + name = "developers" + user_pool_id = aws_cognito_user_pool.main.id + description = "Developer access to DevOps platform services" +} + +resource "aws_cognito_user_group" "viewers" { + name = "viewers" + user_pool_id = aws_cognito_user_pool.main.id + description = "Read-only access to DevOps platform services" +} + +# ─── IRSA for GitLab ───────────────────────────────────────────────── +# +# Allows GitLab pods (webservice, sidekiq) to access S3 without explicit +# AWS credentials. The K8s service account "gitlab" in the "gitlab" namespace +# exchanges its projected OIDC token for temporary AWS credentials. + +data "aws_iam_policy_document" "gitlab_assume_role" { + statement { + effect = "Allow" + + principals { + type = "Federated" + identifiers = [aws_iam_openid_connect_provider.eks.arn] + } + + actions = ["sts:AssumeRoleWithWebIdentity"] + + condition { + test = "StringEquals" + variable = "${replace(aws_iam_openid_connect_provider.eks.url, "https://", "")}:sub" + values = ["system:serviceaccount:gitlab:gitlab"] + } + + condition { + test = "StringEquals" + variable = "${replace(aws_iam_openid_connect_provider.eks.url, "https://", "")}:aud" + values = ["sts.amazonaws.com"] + } + } +} + +resource "aws_iam_role" "gitlab_irsa" { + name_prefix = "${var.prefix}-gitlab-irsa-" + assume_role_policy = data.aws_iam_policy_document.gitlab_assume_role.json + + tags = var.tags +} + +data "aws_iam_policy_document" "gitlab_s3" { + statement { + effect = "Allow" + actions = [ + "s3:GetObject", + "s3:PutObject", + "s3:DeleteObject", + "s3:ListMultipartUploadParts", + "s3:AbortMultipartUpload", + ] + resources = [ + "${aws_s3_bucket.gitlab_artifacts.arn}/*", + "${aws_s3_bucket.gitlab_uploads.arn}/*", + "${aws_s3_bucket.gitlab_packages.arn}/*", + "${aws_s3_bucket.gitlab_lfs.arn}/*", + "${aws_s3_bucket.gitlab_registry.arn}/*", + "${aws_s3_bucket.gitlab_backups.arn}/*", + ] + } + + statement { + effect = "Allow" + actions = ["s3:ListBucket"] + resources = [ + aws_s3_bucket.gitlab_artifacts.arn, + aws_s3_bucket.gitlab_uploads.arn, + aws_s3_bucket.gitlab_packages.arn, + aws_s3_bucket.gitlab_lfs.arn, + aws_s3_bucket.gitlab_registry.arn, + aws_s3_bucket.gitlab_backups.arn, + ] + } +} + +resource "aws_iam_role_policy" "gitlab_s3" { + name_prefix = "${var.prefix}-gitlab-s3-" + role = aws_iam_role.gitlab_irsa.id + policy = data.aws_iam_policy_document.gitlab_s3.json +} + +# ─── External-DNS IRSA ─────────────────────────────────────────────── +# Allows external-dns to manage Route53 records for the cluster's domain. +# The K8s service account "external-dns/external-dns" assumes this role via IRSA. + +data "aws_route53_zone" "main" { + name = var.domain + private_zone = false +} + +data "aws_iam_policy_document" "external_dns_assume_role" { + statement { + effect = "Allow" + + principals { + type = "Federated" + identifiers = [aws_iam_openid_connect_provider.eks.arn] + } + + actions = ["sts:AssumeRoleWithWebIdentity"] + + condition { + test = "StringEquals" + variable = "${replace(aws_iam_openid_connect_provider.eks.url, "https://", "")}:sub" + values = ["system:serviceaccount:external-dns:external-dns"] + } + + condition { + test = "StringEquals" + variable = "${replace(aws_iam_openid_connect_provider.eks.url, "https://", "")}:aud" + values = ["sts.amazonaws.com"] + } + } +} + +resource "aws_iam_role" "external_dns_irsa" { + name_prefix = "${var.prefix}-external-dns-irsa-" + assume_role_policy = data.aws_iam_policy_document.external_dns_assume_role.json + + tags = var.tags +} + +data "aws_iam_policy_document" "external_dns_route53" { + statement { + effect = "Allow" + actions = ["route53:ChangeResourceRecordSets"] + resources = ["arn:aws:route53:::hostedzone/${data.aws_route53_zone.main.zone_id}"] + } + + statement { + effect = "Allow" + actions = ["route53:ListHostedZones", "route53:ListResourceRecordSets", "route53:ListTagsForResource"] + resources = ["*"] + } +} + +resource "aws_iam_role_policy" "external_dns_route53" { + name_prefix = "${var.prefix}-external-dns-route53-" + role = aws_iam_role.external_dns_irsa.id + policy = data.aws_iam_policy_document.external_dns_route53.json +} diff --git a/.tofu/platforms/eks/modules/cluster/outputs.tf b/.tofu/platforms/eks/modules/cluster/outputs.tf new file mode 100644 index 0000000..bbcafda --- /dev/null +++ b/.tofu/platforms/eks/modules/cluster/outputs.tf @@ -0,0 +1,110 @@ +# ─── Cluster ───────────────────────────────────────────────────────── + +output "cluster_name" { + description = "EKS cluster name" + value = aws_eks_cluster.main.name +} + +output "aws_region" { + description = "AWS region" + value = var.region +} + +# ─── PostgreSQL ─────────────────────────────────────────────────────── + +output "pg_host" { + description = "RDS PostgreSQL endpoint (private, reachable from EKS)" + value = aws_db_instance.main.address +} + +output "pg_port" { + description = "PostgreSQL port" + value = aws_db_instance.main.port +} + +output "pg_admin_login" { + description = "RDS administrator login" + value = aws_db_instance.main.username +} + +output "pg_admin_password" { + description = "RDS administrator password" + value = random_password.pg_admin.result + sensitive = true +} + +output "pg_keycloak_password" { + description = "Pre-generated password for keycloak DB user — create user post-provision" + value = random_password.pg_keycloak.result + sensitive = true +} + +output "pg_gitlab_password" { + description = "Pre-generated password for gitlab DB user — create user post-provision" + value = random_password.pg_gitlab.result + sensitive = true +} + +# ─── Redis ──────────────────────────────────────────────────────────── + +output "redis_host" { + description = "ElastiCache Redis primary endpoint" + value = aws_elasticache_replication_group.main.primary_endpoint_address +} + +output "redis_port" { + description = "ElastiCache Redis port" + value = aws_elasticache_replication_group.main.port +} + +# ─── S3 ────────────────────────────────────────────────────────────── + +output "gitlab_s3_bucket_prefix" { + description = "S3 bucket name prefix — buckets are {prefix}-artifacts, {prefix}-uploads, etc." + value = local.s3_bucket_prefix +} + +output "aws_region_output" { + description = "AWS region (for S3 connection config)" + value = var.region +} + +# ─── IRSA ──────────────────────────────────────────────────────────── + +output "gitlab_irsa_role_arn" { + description = "IAM Role ARN for GitLab IRSA — annotate the K8s service account with this value" + value = aws_iam_role.gitlab_irsa.arn +} + +output "external_dns_irsa_role_arn" { + description = "IAM Role ARN for external-dns IRSA — written to config.yaml by sync-tofu-outputs.sh" + value = aws_iam_role.external_dns_irsa.arn +} + +# ─── Cognito ───────────────────────────────────────────────────────── + +output "cognito_user_pool_id" { + description = "Cognito User Pool ID" + value = aws_cognito_user_pool.main.id +} + +output "cognito_issuer_url" { + description = "Cognito OIDC issuer URL — used in Keycloak IdP config" + value = "https://cognito-idp.${var.region}.amazonaws.com/${aws_cognito_user_pool.main.id}" +} + +output "cognito_hosted_ui_domain" { + description = "Cognito hosted UI domain (for auth/token endpoints)" + value = "${aws_cognito_user_pool_domain.main.domain}.auth.${var.region}.amazoncognito.com" +} + +output "cognito_client_id" { + description = "Cognito app client ID for the Keycloak IdP" + value = aws_cognito_user_pool_client.keycloak_idp.id +} + +output "cognito_client_secret" { + description = "Cognito app client secret for the Keycloak IdP" + value = aws_cognito_user_pool_client.keycloak_idp.client_secret + sensitive = true +} diff --git a/.tofu/platforms/eks/modules/cluster/providers.tf b/.tofu/platforms/eks/modules/cluster/providers.tf new file mode 100644 index 0000000..443c9eb --- /dev/null +++ b/.tofu/platforms/eks/modules/cluster/providers.tf @@ -0,0 +1,16 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + tls = { + source = "hashicorp/tls" + version = "~> 4.0" + } + random = { + source = "hashicorp/random" + version = "~> 3.0" + } + } +} diff --git a/.tofu/platforms/eks/modules/cluster/variables.tf b/.tofu/platforms/eks/modules/cluster/variables.tf new file mode 100644 index 0000000..0fb6012 --- /dev/null +++ b/.tofu/platforms/eks/modules/cluster/variables.tf @@ -0,0 +1,119 @@ +# ─── Region ────────────────────────────────────────────────────────── + +variable "region" { + description = "AWS region (e.g., eu-west-1, us-east-1)" + type = string +} + +variable "prefix" { + description = "Prefix for resource names (e.g., devhub-dev)" + type = string +} + +# ─── Networking ─────────────────────────────────────────────────────── + +variable "vpc_cidr" { + description = "VPC CIDR block" + type = string + default = "10.100.0.0/16" +} + +variable "availability_zones" { + description = "List of AZs for subnets (2–3 recommended)" + type = list(string) +} + +# ─── EKS Cluster ───────────────────────────────────────────────────── + +variable "node_instance_type" { + description = "EKS node instance type (e.g., t3.medium, m5.xlarge)" + type = string +} + +variable "node_count" { + description = "Desired number of EKS worker nodes" + type = number +} + +variable "node_min_count" { + description = "Minimum number of EKS worker nodes" + type = number + default = 1 +} + +variable "node_max_count" { + description = "Maximum number of EKS worker nodes" + type = number +} + +variable "kubernetes_version" { + description = "Kubernetes version for EKS (e.g., \"1.30\")" + type = string + default = "1.30" +} + +variable "enable_deletion_protection" { + description = "Enable deletion protection on stateful resources (RDS)" + type = bool + default = false +} + +# ─── RDS (PostgreSQL) ──────────────────────────────────────────────── + +variable "rds_instance_class" { + description = "RDS instance class (e.g., db.t3.micro, db.r5.large)" + type = string +} + +variable "rds_allocated_storage" { + description = "RDS allocated storage in GB" + type = number + default = 20 +} + +variable "rds_multi_az" { + description = "Enable RDS Multi-AZ deployment" + type = bool + default = false +} + +# ─── ElastiCache (Redis) ────────────────────────────────────────────── + +variable "redis_node_type" { + description = "ElastiCache node type (e.g., cache.t3.micro, cache.r5.large)" + type = string +} + +variable "redis_num_cache_clusters" { + description = "Number of Redis cache clusters (1 = single, 2 = primary+replica)" + type = number + default = 1 +} + +variable "redis_automatic_failover" { + description = "Enable automatic Redis failover (requires num_cache_clusters >= 2)" + type = bool + default = false +} + +# ─── DNS ───────────────────────────────────────────────────────────── + +variable "domain" { + description = "Public domain name for the cluster (e.g., dev.example.com) — must have an existing Route53 hosted zone" + type = string +} + +# ─── Cognito (IdP for Keycloak) ─────────────────────────────────────── + +variable "cognito_domain_prefix" { + description = "Cognito hosted UI domain prefix — must be globally unique across all AWS accounts" + type = string +} + +# ─── Tags ───────────────────────────────────────────────────────────── + +variable "tags" { + description = "Tags applied to all resources" + type = map(string) + default = {} +} diff --git a/.tofu/platforms/eks/prod/main.tf b/.tofu/platforms/eks/prod/main.tf new file mode 100644 index 0000000..5701524 --- /dev/null +++ b/.tofu/platforms/eks/prod/main.tf @@ -0,0 +1,39 @@ +module "cluster" { + source = "../modules/cluster" + + region = var.region + prefix = "devhub" + + # VPC + availability_zones = ["${var.region}a", "${var.region}b", "${var.region}c"] + + # EKS — general-purpose nodes for production + node_instance_type = "m5.xlarge" + node_count = 3 + node_min_count = 3 + node_max_count = 6 + kubernetes_version = "1.30" + + # RDS — larger instance with Multi-AZ for production + rds_instance_class = "db.r5.large" + rds_allocated_storage = 100 + rds_multi_az = true + + # ElastiCache — larger instance with replica for production + redis_node_type = "cache.r5.large" + redis_num_cache_clusters = 2 + redis_automatic_failover = true + + # DNS — must match an existing Route53 hosted zone + domain = "example.com" # TODO: set to your actual domain + + # Cognito — domain prefix must be globally unique + cognito_domain_prefix = "devhub-prod-devhub" # TODO: customize to avoid conflicts + + enable_deletion_protection = true + + tags = { + Environment = "prod" + ManagedBy = "tofu" + } +} diff --git a/.tofu/platforms/eks/prod/outputs.tf b/.tofu/platforms/eks/prod/outputs.tf new file mode 100644 index 0000000..e7657c5 --- /dev/null +++ b/.tofu/platforms/eks/prod/outputs.tf @@ -0,0 +1,17 @@ +output "cluster_name" { value = module.cluster.cluster_name } +output "aws_region" { value = module.cluster.aws_region } +output "pg_host" { value = module.cluster.pg_host } +output "pg_port" { value = module.cluster.pg_port } +output "pg_admin_login" { value = module.cluster.pg_admin_login } +output "pg_admin_password" { value = module.cluster.pg_admin_password; sensitive = true } +output "pg_keycloak_password" { value = module.cluster.pg_keycloak_password; sensitive = true } +output "pg_gitlab_password" { value = module.cluster.pg_gitlab_password; sensitive = true } +output "redis_host" { value = module.cluster.redis_host } +output "redis_port" { value = module.cluster.redis_port } +output "gitlab_s3_bucket_prefix" { value = module.cluster.gitlab_s3_bucket_prefix } +output "gitlab_irsa_role_arn" { value = module.cluster.gitlab_irsa_role_arn } +output "cognito_user_pool_id" { value = module.cluster.cognito_user_pool_id } +output "cognito_issuer_url" { value = module.cluster.cognito_issuer_url } +output "cognito_hosted_ui_domain" { value = module.cluster.cognito_hosted_ui_domain } +output "cognito_client_id" { value = module.cluster.cognito_client_id } +output "cognito_client_secret" { value = module.cluster.cognito_client_secret; sensitive = true } diff --git a/.tofu/platforms/eks/prod/providers.tf b/.tofu/platforms/eks/prod/providers.tf new file mode 100644 index 0000000..edb21a4 --- /dev/null +++ b/.tofu/platforms/eks/prod/providers.tf @@ -0,0 +1,26 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + tls = { + source = "hashicorp/tls" + version = "~> 4.0" + } + random = { + source = "hashicorp/random" + version = "~> 3.0" + } + } +} + +provider "aws" { + region = var.region +} + +variable "region" { + description = "AWS region for prod environment" + type = string + default = "eu-west-1" +} diff --git a/.tofu/platforms/eks/workload/main.tf b/.tofu/platforms/eks/workload/main.tf new file mode 100644 index 0000000..93f5014 --- /dev/null +++ b/.tofu/platforms/eks/workload/main.tf @@ -0,0 +1,339 @@ +# ============================================================================= +# AWS Workload Cluster +# ============================================================================= +# A lean EKS cluster for running application workloads. No managed data +# services — those live on the platform cluster. ArgoCD (on the platform +# cluster) deploys apps to this cluster via the app-of-apps pattern. +# +# Platform components deployed by deploy-workload.sh: +# nginx-ingress, cert-manager, external-dns, external-secrets, alloy +# +# Usage: +# tofu init && tofu plan && tofu apply +# ./sync-tofu-outputs.sh --env aws-workload +# ./deploy-workload.sh --env aws-workload +# ============================================================================= + +variable "prefix" { + description = "Prefix for resource names (e.g., devhub-workload)" + type = string + default = "devhub-workload" +} + +variable "availability_zones" { + description = "List of AZs for subnets" + type = list(string) + default = ["eu-west-1a", "eu-west-1b"] +} + +variable "vpc_cidr" { + description = "VPC CIDR block" + type = string + default = "10.110.0.0/16" +} + +variable "node_instance_type" { + description = "EKS node instance type" + type = string + default = "t3.medium" +} + +variable "node_count" { + description = "Desired number of EKS worker nodes" + type = number + default = 2 +} + +variable "node_min_count" { + description = "Minimum number of EKS worker nodes" + type = number + default = 1 +} + +variable "node_max_count" { + description = "Maximum number of EKS worker nodes" + type = number + default = 4 +} + +variable "kubernetes_version" { + description = "Kubernetes version for EKS" + type = string + default = "1.30" +} + +variable "domain" { + description = "Public domain name — must have an existing Route53 hosted zone" + type = string +} + +variable "tags" { + description = "Tags applied to all resources" + type = map(string) + default = { + Environment = "workload" + ManagedBy = "tofu" + } +} + +# ─── VPC ────────────────────────────────────────────────────────────── + +resource "aws_vpc" "main" { + cidr_block = var.vpc_cidr + enable_dns_hostnames = true + enable_dns_support = true + + tags = merge(var.tags, { Name = "${var.prefix}-vpc" }) +} + +resource "aws_internet_gateway" "main" { + vpc_id = aws_vpc.main.id + tags = merge(var.tags, { Name = "${var.prefix}-igw" }) +} + +resource "aws_subnet" "public" { + count = length(var.availability_zones) + vpc_id = aws_vpc.main.id + cidr_block = cidrsubnet(var.vpc_cidr, 4, count.index) + availability_zone = var.availability_zones[count.index] + + map_public_ip_on_launch = true + + tags = merge(var.tags, { + Name = "${var.prefix}-public-${count.index + 1}" + "kubernetes.io/cluster/${var.prefix}-eks" = "shared" + "kubernetes.io/role/elb" = "1" + }) +} + +resource "aws_subnet" "private" { + count = length(var.availability_zones) + vpc_id = aws_vpc.main.id + cidr_block = cidrsubnet(var.vpc_cidr, 4, count.index + length(var.availability_zones)) + availability_zone = var.availability_zones[count.index] + + tags = merge(var.tags, { + Name = "${var.prefix}-private-${count.index + 1}" + "kubernetes.io/cluster/${var.prefix}-eks" = "shared" + "kubernetes.io/role/internal-elb" = "1" + }) +} + +resource "aws_eip" "nat" { + domain = "vpc" + tags = merge(var.tags, { Name = "${var.prefix}-nat-eip" }) +} + +resource "aws_nat_gateway" "main" { + allocation_id = aws_eip.nat.id + subnet_id = aws_subnet.public[0].id + tags = merge(var.tags, { Name = "${var.prefix}-nat" }) + + depends_on = [aws_internet_gateway.main] +} + +resource "aws_route_table" "public" { + vpc_id = aws_vpc.main.id + + route { + cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.main.id + } + + tags = merge(var.tags, { Name = "${var.prefix}-public-rt" }) +} + +resource "aws_route_table_association" "public" { + count = length(var.availability_zones) + subnet_id = aws_subnet.public[count.index].id + route_table_id = aws_route_table.public.id +} + +resource "aws_route_table" "private" { + vpc_id = aws_vpc.main.id + + route { + cidr_block = "0.0.0.0/0" + nat_gateway_id = aws_nat_gateway.main.id + } + + tags = merge(var.tags, { Name = "${var.prefix}-private-rt" }) +} + +resource "aws_route_table_association" "private" { + count = length(var.availability_zones) + subnet_id = aws_subnet.private[count.index].id + route_table_id = aws_route_table.private.id +} + +# ─── EKS Cluster ────────────────────────────────────────────────────── + +resource "aws_iam_role" "eks_cluster" { + name_prefix = "${var.prefix}-eks-cluster-" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Action = "sts:AssumeRole" + Effect = "Allow" + Principal = { Service = "eks.amazonaws.com" } + }] + }) + + tags = var.tags +} + +resource "aws_iam_role_policy_attachment" "eks_cluster_policy" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy" + role = aws_iam_role.eks_cluster.name +} + +resource "aws_eks_cluster" "main" { + name = "${var.prefix}-eks" + role_arn = aws_iam_role.eks_cluster.arn + version = var.kubernetes_version + + vpc_config { + subnet_ids = concat(aws_subnet.private[*].id, aws_subnet.public[*].id) + endpoint_private_access = true + endpoint_public_access = true + } + + access_config { + authentication_mode = "API_AND_CONFIG_MAP" + } + + tags = var.tags + + depends_on = [aws_iam_role_policy_attachment.eks_cluster_policy] +} + +# OIDC provider — required for IRSA +data "tls_certificate" "eks" { + url = aws_eks_cluster.main.identity[0].oidc[0].issuer +} + +resource "aws_iam_openid_connect_provider" "eks" { + client_id_list = ["sts.amazonaws.com"] + thumbprint_list = [data.tls_certificate.eks.certificates[0].sha1_fingerprint] + url = aws_eks_cluster.main.identity[0].oidc[0].issuer + + tags = var.tags +} + +resource "aws_iam_role" "eks_nodes" { + name_prefix = "${var.prefix}-eks-nodes-" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Action = "sts:AssumeRole" + Effect = "Allow" + Principal = { Service = "ec2.amazonaws.com" } + }] + }) + + tags = var.tags +} + +resource "aws_iam_role_policy_attachment" "eks_worker_node_policy" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy" + role = aws_iam_role.eks_nodes.name +} + +resource "aws_iam_role_policy_attachment" "eks_cni_policy" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy" + role = aws_iam_role.eks_nodes.name +} + +resource "aws_iam_role_policy_attachment" "eks_ecr_readonly" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly" + role = aws_iam_role.eks_nodes.name +} + +resource "aws_eks_node_group" "main" { + cluster_name = aws_eks_cluster.main.name + node_group_name = "${var.prefix}-nodes" + node_role_arn = aws_iam_role.eks_nodes.arn + subnet_ids = aws_subnet.private[*].id + + instance_types = [var.node_instance_type] + + scaling_config { + desired_size = var.node_count + max_size = var.node_max_count + min_size = var.node_min_count + } + + update_config { + max_unavailable = 1 + } + + tags = var.tags + + depends_on = [ + aws_iam_role_policy_attachment.eks_worker_node_policy, + aws_iam_role_policy_attachment.eks_cni_policy, + aws_iam_role_policy_attachment.eks_ecr_readonly, + ] +} + +# ─── External-DNS IRSA ─────────────────────────────────────────────── +# Allows external-dns to manage Route53 records for app ingresses. + +data "aws_route53_zone" "main" { + name = var.domain + private_zone = false +} + +data "aws_iam_policy_document" "external_dns_assume_role" { + statement { + effect = "Allow" + + principals { + type = "Federated" + identifiers = [aws_iam_openid_connect_provider.eks.arn] + } + + actions = ["sts:AssumeRoleWithWebIdentity"] + + condition { + test = "StringEquals" + variable = "${replace(aws_iam_openid_connect_provider.eks.url, "https://", "")}:sub" + values = ["system:serviceaccount:external-dns:external-dns"] + } + + condition { + test = "StringEquals" + variable = "${replace(aws_iam_openid_connect_provider.eks.url, "https://", "")}:aud" + values = ["sts.amazonaws.com"] + } + } +} + +resource "aws_iam_role" "external_dns_irsa" { + name_prefix = "${var.prefix}-external-dns-irsa-" + assume_role_policy = data.aws_iam_policy_document.external_dns_assume_role.json + + tags = var.tags +} + +data "aws_iam_policy_document" "external_dns_route53" { + statement { + effect = "Allow" + actions = ["route53:ChangeResourceRecordSets"] + resources = ["arn:aws:route53:::hostedzone/${data.aws_route53_zone.main.zone_id}"] + } + + statement { + effect = "Allow" + actions = ["route53:ListHostedZones", "route53:ListResourceRecordSets", "route53:ListTagsForResource"] + resources = ["*"] + } +} + +resource "aws_iam_role_policy" "external_dns_route53" { + name_prefix = "${var.prefix}-external-dns-route53-" + role = aws_iam_role.external_dns_irsa.id + policy = data.aws_iam_policy_document.external_dns_route53.json +} diff --git a/.tofu/platforms/eks/workload/outputs.tf b/.tofu/platforms/eks/workload/outputs.tf new file mode 100644 index 0000000..1cafe4a --- /dev/null +++ b/.tofu/platforms/eks/workload/outputs.tf @@ -0,0 +1,3 @@ +output "cluster_name" { value = aws_eks_cluster.main.name } +output "aws_region" { value = var.region } +output "external_dns_irsa_role_arn" { value = aws_iam_role.external_dns_irsa.arn } diff --git a/.tofu/platforms/eks/workload/providers.tf b/.tofu/platforms/eks/workload/providers.tf new file mode 100644 index 0000000..c237265 --- /dev/null +++ b/.tofu/platforms/eks/workload/providers.tf @@ -0,0 +1,24 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + tls = { + source = "hashicorp/tls" + version = "~> 4.0" + } + } +} + +# Authentication: set AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN +# or configure an AWS profile: export AWS_PROFILE=devhub +provider "aws" { + region = var.region +} + +variable "region" { + description = "AWS region for the workload environment" + type = string + default = "eu-west-1" +} diff --git a/.tofu/platforms/gke/dev/main.tf b/.tofu/platforms/gke/dev/main.tf new file mode 100644 index 0000000..57c417f --- /dev/null +++ b/.tofu/platforms/gke/dev/main.tf @@ -0,0 +1,31 @@ +module "cluster" { + source = "../modules/cluster" + + project_id = var.project_id + region = var.region + prefix = "devhub-dev" + + # GKE — small dev nodes + node_machine_type = "e2-standard-2" + node_count = 2 + + # Cloud SQL — small burstable tier for dev + pg_tier = "db-g1-small" + pg_disk_size_gb = 20 + pg_availability_type = "ZONAL" + pg_deletion_protection = false + + # Memorystore Redis — BASIC (no HA) for dev + redis_tier = "BASIC" + redis_memory_size_gb = 1 + + # GCS — STANDARD storage for dev + gcs_storage_class = "STANDARD" + + deletion_protection = false + + labels = { + environment = "dev" + managed-by = "tofu" + } +} diff --git a/.tofu/platforms/gke/dev/outputs.tf b/.tofu/platforms/gke/dev/outputs.tf new file mode 100644 index 0000000..ceeafaf --- /dev/null +++ b/.tofu/platforms/gke/dev/outputs.tf @@ -0,0 +1,14 @@ +output "cluster_name" { value = module.cluster.cluster_name } +output "project_id" { value = module.cluster.project_id } +output "region" { value = module.cluster.region } +output "pg_host" { value = module.cluster.pg_host } +output "pg_port" { value = module.cluster.pg_port } +output "pg_admin_login" { value = module.cluster.pg_admin_login } +output "pg_admin_password" { value = module.cluster.pg_admin_password; sensitive = true } +output "pg_keycloak_password" { value = module.cluster.pg_keycloak_password; sensitive = true } +output "pg_gitlab_password" { value = module.cluster.pg_gitlab_password; sensitive = true } +output "redis_host" { value = module.cluster.redis_host } +output "redis_port" { value = module.cluster.redis_port } +output "redis_auth_string" { value = module.cluster.redis_auth_string; sensitive = true } +output "gitlab_gcs_bucket_prefix" { value = module.cluster.gitlab_gcs_bucket_prefix } +output "gitlab_gsa_email" { value = module.cluster.gitlab_gsa_email } diff --git a/.tofu/platforms/gke/dev/providers.tf b/.tofu/platforms/gke/dev/providers.tf new file mode 100644 index 0000000..89fb1f4 --- /dev/null +++ b/.tofu/platforms/gke/dev/providers.tf @@ -0,0 +1,30 @@ +terraform { + required_providers { + google = { + source = "hashicorp/google" + version = "~> 6.0" + } + random = { + source = "hashicorp/random" + version = "~> 3.0" + } + } +} + +# Authentication: use Application Default Credentials (gcloud auth application-default login) +# or set GOOGLE_APPLICATION_CREDENTIALS to a service account key file. +provider "google" { + project = var.project_id + region = var.region +} + +variable "project_id" { + description = "GCP project ID for the dev environment" + type = string +} + +variable "region" { + description = "GCP region" + type = string + default = "europe-west4" +} diff --git a/.tofu/platforms/gke/modules/cluster/main.tf b/.tofu/platforms/gke/modules/cluster/main.tf new file mode 100644 index 0000000..4e303e3 --- /dev/null +++ b/.tofu/platforms/gke/modules/cluster/main.tf @@ -0,0 +1,441 @@ +# ─── Required APIs ──────────────────────────────────────────────────── + +resource "google_project_service" "compute" { + project = var.project_id + service = "compute.googleapis.com" + disable_on_destroy = false +} + +resource "google_project_service" "container" { + project = var.project_id + service = "container.googleapis.com" + disable_on_destroy = false +} + +resource "google_project_service" "sqladmin" { + project = var.project_id + service = "sqladmin.googleapis.com" + disable_on_destroy = false +} + +resource "google_project_service" "servicenetworking" { + project = var.project_id + service = "servicenetworking.googleapis.com" + disable_on_destroy = false +} + +resource "google_project_service" "redis" { + project = var.project_id + service = "redis.googleapis.com" + disable_on_destroy = false +} + +resource "google_project_service" "iam" { + project = var.project_id + service = "iam.googleapis.com" + disable_on_destroy = false +} + +# ─── Networking ─────────────────────────────────────────────────────── + +resource "google_compute_network" "main" { + project = var.project_id + name = "${var.prefix}-vpc" + auto_create_subnetworks = false + + depends_on = [google_project_service.compute] +} + +resource "google_compute_subnetwork" "main" { + project = var.project_id + name = "${var.prefix}-subnet" + ip_cidr_range = "10.100.0.0/22" + region = var.region + network = google_compute_network.main.id + + # Secondary ranges required for GKE VPC-native cluster + secondary_ip_range { + range_name = "pods" + ip_cidr_range = "10.200.0.0/14" # /14 = ~262k pod IPs + } + + secondary_ip_range { + range_name = "services" + ip_cidr_range = "10.204.0.0/20" # /20 = ~4k service IPs + } +} + +# Private IP range for Cloud SQL VPC peering +resource "google_compute_global_address" "private_ip_range" { + project = var.project_id + name = "${var.prefix}-private-ip-range" + purpose = "VPC_PEERING" + address_type = "INTERNAL" + prefix_length = 20 + network = google_compute_network.main.id + + depends_on = [google_project_service.compute] +} + +resource "google_service_networking_connection" "private_vpc_connection" { + network = google_compute_network.main.id + service = "servicenetworking.googleapis.com" + reserved_peering_ranges = [google_compute_global_address.private_ip_range.name] + + depends_on = [google_project_service.servicenetworking] +} + +# ─── GKE Cluster ────────────────────────────────────────────────────── +# +# Regional cluster (3 control-plane replicas) for HA. +# Workload Identity enabled — allows K8s service accounts to impersonate +# Google Service Accounts for keyless GCS access. + +resource "google_container_cluster" "main" { + project = var.project_id + name = "${var.prefix}-gke" + location = var.region # regional cluster + + network = google_compute_network.main.id + subnetwork = google_compute_subnetwork.main.id + + # VPC-native cluster with alias IP ranges + ip_allocation_policy { + cluster_secondary_range_name = "pods" + services_secondary_range_name = "services" + } + + # Workload Identity pool — enables OIDC token projection for pods + workload_identity_config { + workload_pool = "${var.project_id}.svc.id.goog" + } + + # Remove default node pool — we manage our own below + remove_default_node_pool = true + initial_node_count = 1 + + deletion_protection = var.deletion_protection + + dynamic "release_channel" { + for_each = var.kubernetes_version == null ? [1] : [] + content { + channel = "STABLE" + } + } + + resource_labels = var.labels + + depends_on = [google_project_service.container] +} + +resource "google_container_node_pool" "main" { + project = var.project_id + name = "${var.prefix}-nodes" + location = var.region + cluster = google_container_cluster.main.name + node_count = var.node_count + + node_config { + machine_type = var.node_machine_type + + # GKE_METADATA mode is required for Workload Identity + workload_metadata_config { + mode = "GKE_METADATA" + } + + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform", + ] + + labels = merge(var.labels, { + role = "worker" + }) + } + + management { + auto_repair = true + auto_upgrade = true + } +} + +# ─── Cloud SQL PostgreSQL ───────────────────────────────────────────── +# +# Private IP only — reachable from GKE via VPC peering. +# NOTE: Cloud SQL doesn't support Terraform-managed local user creation. +# Users (keycloak, gitlab) must be created post-provision via psql. +# Use: kubectl run pg-init --rm -it --image=postgres:16 -- psql -h -U pgadmin + +resource "random_password" "pg_admin" { + length = 32 + special = false +} + +resource "random_password" "pg_keycloak" { + length = 32 + special = false +} + +resource "random_password" "pg_gitlab" { + length = 32 + special = false +} + +resource "google_sql_database_instance" "main" { + project = var.project_id + name = "${var.prefix}-postgresql" + region = var.region + database_version = var.pg_database_version + + settings { + tier = var.pg_tier + availability_type = var.pg_availability_type + disk_size = var.pg_disk_size_gb + disk_autoresize = true + + ip_configuration { + ipv4_enabled = false # private IP only + private_network = google_compute_network.main.id + enable_private_path_for_google_cloud_services = true + } + + backup_configuration { + enabled = var.pg_backup_enabled + } + + database_flags { + name = "max_connections" + value = "200" + } + } + + deletion_protection = var.pg_deletion_protection + + depends_on = [google_service_networking_connection.private_vpc_connection] +} + +resource "google_sql_user" "pg_admin" { + project = var.project_id + name = "pgadmin" + instance = google_sql_database_instance.main.name + password = random_password.pg_admin.result +} + +resource "google_sql_database" "keycloak" { + project = var.project_id + name = "keycloak" + instance = google_sql_database_instance.main.name +} + +resource "google_sql_database" "gitlab" { + project = var.project_id + name = "gitlabhq_production" + instance = google_sql_database_instance.main.name +} + +# ─── Cloud Memorystore (Redis) ──────────────────────────────────────── +# +# Private IP within VPC. Auth enabled (password via AUTH command). +# The auth_string is output and must be stored in a K8s secret for GitLab. + +resource "google_redis_instance" "main" { + project = var.project_id + name = "${var.prefix}-redis" + region = var.region + tier = var.redis_tier + memory_size_gb = var.redis_memory_size_gb + + authorized_network = google_compute_network.main.id + + # Redis AUTH password — keyless access is not supported by Memorystore + auth_enabled = true + + labels = var.labels + + depends_on = [google_project_service.redis] +} + +# ─── GCS Buckets (GitLab Object Storage) ───────────────────────────── +# +# GitLab supports GCS natively via the Fog/Google provider. +# Workload Identity is used for keyless access — no access key required. +# NOTE: GCS bucket names are globally unique. If "${prefix}-gitlab-*" conflicts, +# adjust var.prefix to include a project-specific component. + +locals { + gcs_bucket_prefix = "${var.prefix}-gitlab" +} + +resource "google_storage_bucket" "gitlab_artifacts" { + project = var.project_id + name = "${local.gcs_bucket_prefix}-artifacts" + location = var.region + storage_class = var.gcs_storage_class + force_destroy = true + + uniform_bucket_level_access = true + labels = var.labels +} + +resource "google_storage_bucket" "gitlab_uploads" { + project = var.project_id + name = "${local.gcs_bucket_prefix}-uploads" + location = var.region + storage_class = var.gcs_storage_class + force_destroy = true + + uniform_bucket_level_access = true + labels = var.labels +} + +resource "google_storage_bucket" "gitlab_packages" { + project = var.project_id + name = "${local.gcs_bucket_prefix}-packages" + location = var.region + storage_class = var.gcs_storage_class + force_destroy = true + + uniform_bucket_level_access = true + labels = var.labels +} + +resource "google_storage_bucket" "gitlab_lfs" { + project = var.project_id + name = "${local.gcs_bucket_prefix}-lfs" + location = var.region + storage_class = var.gcs_storage_class + force_destroy = true + + uniform_bucket_level_access = true + labels = var.labels +} + +resource "google_storage_bucket" "gitlab_registry" { + project = var.project_id + name = "${local.gcs_bucket_prefix}-registry" + location = var.region + storage_class = var.gcs_storage_class + force_destroy = true + + uniform_bucket_level_access = true + labels = var.labels +} + +resource "google_storage_bucket" "gitlab_backups" { + project = var.project_id + name = "${local.gcs_bucket_prefix}-backups" + location = var.region + storage_class = var.gcs_storage_class + force_destroy = true + + uniform_bucket_level_access = true + labels = var.labels +} + +# ─── Google Identity Provider for Keycloak ──────────────────────────── +# +# Keycloak federates with Google — users authenticate via "Sign in with Google" +# through Keycloak, which remains the single OIDC issuer for all services. +# +# IMPORTANT: The Google OAuth 2.0 client (Web Application type) must be +# created MANUALLY in Google Cloud Console: +# APIs & Services → Credentials → Create OAuth client ID → Web application +# Authorized redirect URIs: https://keycloak./realms/devops/broker/google/endpoint +# +# After creation, fill in k8s/scripts/gcp-{dev,prod}/gcp-idp.env: +# GOOGLE_IDP_CLIENT_ID= +# GOOGLE_IDP_CLIENT_SECRET= +# +# Then run: ./setup-keycloak.sh --env gcp-dev idp + +# Enable Google Identity Platform API for documentation purposes +resource "google_project_service" "oauth2" { + project = var.project_id + service = "oauth2.googleapis.com" + disable_on_destroy = false +} + +# ─── Workload Identity for GitLab ───────────────────────────────────── +# +# Allows GitLab pods (webservice, sidekiq) to access GCS buckets without +# a service account key. The K8s service account "gitlab" in the "gitlab" +# namespace exchanges its projected OIDC token for a Google token. +# +# GKE must have workload_identity_config set (done above). + +resource "google_service_account" "gitlab" { + project = var.project_id + account_id = "${var.prefix}-gitlab" + display_name = "GitLab Service Account (Workload Identity)" + + depends_on = [google_project_service.iam] +} + +# Grant the GSA Object Admin on all GitLab buckets +resource "google_storage_bucket_iam_member" "gitlab_artifacts" { + bucket = google_storage_bucket.gitlab_artifacts.name + role = "roles/storage.objectAdmin" + member = "serviceAccount:${google_service_account.gitlab.email}" +} + +resource "google_storage_bucket_iam_member" "gitlab_uploads" { + bucket = google_storage_bucket.gitlab_uploads.name + role = "roles/storage.objectAdmin" + member = "serviceAccount:${google_service_account.gitlab.email}" +} + +resource "google_storage_bucket_iam_member" "gitlab_packages" { + bucket = google_storage_bucket.gitlab_packages.name + role = "roles/storage.objectAdmin" + member = "serviceAccount:${google_service_account.gitlab.email}" +} + +resource "google_storage_bucket_iam_member" "gitlab_lfs" { + bucket = google_storage_bucket.gitlab_lfs.name + role = "roles/storage.objectAdmin" + member = "serviceAccount:${google_service_account.gitlab.email}" +} + +resource "google_storage_bucket_iam_member" "gitlab_registry" { + bucket = google_storage_bucket.gitlab_registry.name + role = "roles/storage.objectAdmin" + member = "serviceAccount:${google_service_account.gitlab.email}" +} + +resource "google_storage_bucket_iam_member" "gitlab_backups" { + bucket = google_storage_bucket.gitlab_backups.name + role = "roles/storage.objectAdmin" + member = "serviceAccount:${google_service_account.gitlab.email}" +} + +# Bind the K8s service account "gitlab/gitlab" to the GSA via Workload Identity. +# The GitLab Helm chart creates the "gitlab" SA when global.serviceAccount.enabled=true. +resource "google_service_account_iam_member" "gitlab_workload_identity" { + service_account_id = google_service_account.gitlab.name + role = "roles/iam.workloadIdentityUser" + member = "serviceAccount:${var.project_id}.svc.id.goog[gitlab/gitlab]" +} + +# ─── External-DNS Workload Identity ────────────────────────────────── +# Allows external-dns to manage Cloud DNS records for the cluster's domain. +# The K8s service account "external-dns/external-dns" exchanges its OIDC token +# for a Google token via Workload Identity. + +resource "google_service_account" "external_dns" { + project = var.project_id + account_id = "${var.prefix}-external-dns" + display_name = "External-DNS Service Account (Workload Identity)" + + depends_on = [google_project_service.iam] +} + +resource "google_project_iam_member" "external_dns_dns_admin" { + project = var.project_id + role = "roles/dns.admin" + member = "serviceAccount:${google_service_account.external_dns.email}" +} + +resource "google_service_account_iam_member" "external_dns_workload_identity" { + service_account_id = google_service_account.external_dns.name + role = "roles/iam.workloadIdentityUser" + member = "serviceAccount:${var.project_id}.svc.id.goog[external-dns/external-dns]" +} diff --git a/.tofu/platforms/gke/modules/cluster/outputs.tf b/.tofu/platforms/gke/modules/cluster/outputs.tf new file mode 100644 index 0000000..9f42a91 --- /dev/null +++ b/.tofu/platforms/gke/modules/cluster/outputs.tf @@ -0,0 +1,88 @@ +# ─── Cluster ───────────────────────────────────────────────────────── + +output "cluster_name" { + description = "GKE cluster name" + value = google_container_cluster.main.name +} + +output "project_id" { + description = "GCP project ID" + value = var.project_id +} + +output "region" { + description = "GCP region" + value = var.region +} + +# ─── PostgreSQL ─────────────────────────────────────────────────────── + +output "pg_host" { + description = "Cloud SQL private IP address (reachable from GKE via VPC)" + value = google_sql_database_instance.main.private_ip_address +} + +output "pg_port" { + description = "PostgreSQL port" + value = 5432 +} + +output "pg_admin_login" { + description = "PostgreSQL administrator login" + value = google_sql_user.pg_admin.name +} + +output "pg_admin_password" { + description = "PostgreSQL administrator password" + value = random_password.pg_admin.result + sensitive = true +} + +output "pg_keycloak_password" { + description = "Pre-generated password for keycloak DB user — create user post-provision" + value = random_password.pg_keycloak.result + sensitive = true +} + +output "pg_gitlab_password" { + description = "Pre-generated password for gitlab DB user — create user post-provision" + value = random_password.pg_gitlab.result + sensitive = true +} + +# ─── Redis ──────────────────────────────────────────────────────────── + +output "redis_host" { + description = "Memorystore Redis host (private IP within VPC)" + value = google_redis_instance.main.host +} + +output "redis_port" { + description = "Memorystore Redis port" + value = google_redis_instance.main.port +} + +output "redis_auth_string" { + description = "Memorystore Redis AUTH string — store in gitlab-redis-secret K8s secret" + value = google_redis_instance.main.auth_string + sensitive = true +} + +# ─── GCS ───────────────────────────────────────────────────────────── + +output "gitlab_gcs_bucket_prefix" { + description = "GCS bucket name prefix — buckets are {prefix}-artifacts, {prefix}-uploads, etc." + value = local.gcs_bucket_prefix +} + +# ─── Workload Identity ──────────────────────────────────────────────── + +output "gitlab_gsa_email" { + description = "GitLab Google Service Account email — annotate the K8s service account with this value" + value = google_service_account.gitlab.email +} + +output "external_dns_gsa_email" { + description = "External-DNS Google Service Account email — written to config.yaml by sync-tofu-outputs.sh" + value = google_service_account.external_dns.email +} diff --git a/.tofu/platforms/gke/modules/cluster/providers.tf b/.tofu/platforms/gke/modules/cluster/providers.tf new file mode 100644 index 0000000..38e3e31 --- /dev/null +++ b/.tofu/platforms/gke/modules/cluster/providers.tf @@ -0,0 +1,12 @@ +terraform { + required_providers { + google = { + source = "hashicorp/google" + version = "~> 6.0" + } + random = { + source = "hashicorp/random" + version = "~> 3.0" + } + } +} diff --git a/.tofu/platforms/gke/modules/cluster/variables.tf b/.tofu/platforms/gke/modules/cluster/variables.tf new file mode 100644 index 0000000..1d43387 --- /dev/null +++ b/.tofu/platforms/gke/modules/cluster/variables.tf @@ -0,0 +1,105 @@ +# ─── Project / Region ──────────────────────────────────────────────── + +variable "project_id" { + description = "GCP project ID" + type = string +} + +variable "region" { + description = "GCP region (e.g., europe-west4, europe-west1)" + type = string +} + +variable "prefix" { + description = "Prefix for resource names (e.g., devhub-dev)" + type = string +} + +# ─── GKE Cluster ───────────────────────────────────────────────────── + +variable "node_machine_type" { + description = "GKE node machine type (e.g., e2-standard-2, e2-standard-4)" + type = string +} + +variable "node_count" { + description = "Number of nodes per zone (regional cluster spawns nodes in each zone)" + type = number +} + +variable "kubernetes_version" { + description = "GKE Kubernetes version channel (null = STABLE release channel)" + type = string + default = null +} + +variable "deletion_protection" { + description = "Prevent cluster deletion (set true for production)" + type = bool + default = false +} + +# ─── Cloud SQL (PostgreSQL) ─────────────────────────────────────────── + +variable "pg_database_version" { + description = "PostgreSQL version (e.g., POSTGRES_16)" + type = string + default = "POSTGRES_16" +} + +variable "pg_tier" { + description = "Cloud SQL machine tier (e.g., db-g1-small, db-n1-standard-2)" + type = string +} + +variable "pg_disk_size_gb" { + description = "Cloud SQL disk size in GB" + type = number + default = 20 +} + +variable "pg_availability_type" { + description = "Cloud SQL availability: ZONAL or REGIONAL (REGIONAL = HA)" + type = string + default = "ZONAL" +} + +variable "pg_backup_enabled" { + description = "Enable automated Cloud SQL backups" + type = bool + default = true +} + +variable "pg_deletion_protection" { + description = "Prevent Cloud SQL instance deletion" + type = bool + default = false +} + +# ─── Cloud Memorystore (Redis) ──────────────────────────────────────── + +variable "redis_tier" { + description = "Memorystore Redis tier: BASIC or STANDARD_HA" + type = string +} + +variable "redis_memory_size_gb" { + description = "Redis memory size in GB" + type = number +} + +# ─── GCS (Object Storage) ──────────────────────────────────────────── + +variable "gcs_storage_class" { + description = "GCS storage class: STANDARD, NEARLINE, COLDLINE, ARCHIVE" + type = string + default = "STANDARD" +} + +# ─── Labels ────────────────────────────────────────────────────────── + +variable "labels" { + description = "Labels applied to all resources" + type = map(string) + default = {} +} diff --git a/.tofu/platforms/gke/prod/main.tf b/.tofu/platforms/gke/prod/main.tf new file mode 100644 index 0000000..1514d2c --- /dev/null +++ b/.tofu/platforms/gke/prod/main.tf @@ -0,0 +1,31 @@ +module "cluster" { + source = "../modules/cluster" + + project_id = var.project_id + region = var.region + prefix = "devhub" + + # GKE — general-purpose nodes for production + node_machine_type = "e2-standard-4" + node_count = 3 + deletion_protection = true + + # Cloud SQL — general-purpose HA tier for production + pg_tier = "db-n1-standard-2" + pg_disk_size_gb = 100 + pg_availability_type = "REGIONAL" # HA with hot standby + pg_backup_enabled = true + pg_deletion_protection = true + + # Memorystore Redis — STANDARD_HA for production + redis_tier = "STANDARD_HA" + redis_memory_size_gb = 4 + + # GCS — STANDARD storage (geo-redundant via multi-region location if needed) + gcs_storage_class = "STANDARD" + + labels = { + environment = "prod" + managed-by = "tofu" + } +} diff --git a/.tofu/platforms/gke/prod/outputs.tf b/.tofu/platforms/gke/prod/outputs.tf new file mode 100644 index 0000000..ceeafaf --- /dev/null +++ b/.tofu/platforms/gke/prod/outputs.tf @@ -0,0 +1,14 @@ +output "cluster_name" { value = module.cluster.cluster_name } +output "project_id" { value = module.cluster.project_id } +output "region" { value = module.cluster.region } +output "pg_host" { value = module.cluster.pg_host } +output "pg_port" { value = module.cluster.pg_port } +output "pg_admin_login" { value = module.cluster.pg_admin_login } +output "pg_admin_password" { value = module.cluster.pg_admin_password; sensitive = true } +output "pg_keycloak_password" { value = module.cluster.pg_keycloak_password; sensitive = true } +output "pg_gitlab_password" { value = module.cluster.pg_gitlab_password; sensitive = true } +output "redis_host" { value = module.cluster.redis_host } +output "redis_port" { value = module.cluster.redis_port } +output "redis_auth_string" { value = module.cluster.redis_auth_string; sensitive = true } +output "gitlab_gcs_bucket_prefix" { value = module.cluster.gitlab_gcs_bucket_prefix } +output "gitlab_gsa_email" { value = module.cluster.gitlab_gsa_email } diff --git a/.tofu/platforms/gke/prod/providers.tf b/.tofu/platforms/gke/prod/providers.tf new file mode 100644 index 0000000..fcaa0f2 --- /dev/null +++ b/.tofu/platforms/gke/prod/providers.tf @@ -0,0 +1,28 @@ +terraform { + required_providers { + google = { + source = "hashicorp/google" + version = "~> 6.0" + } + random = { + source = "hashicorp/random" + version = "~> 3.0" + } + } +} + +provider "google" { + project = var.project_id + region = var.region +} + +variable "project_id" { + description = "GCP project ID for the prod environment" + type = string +} + +variable "region" { + description = "GCP region" + type = string + default = "europe-west1" +} diff --git a/.tofu/platforms/gke/workload/main.tf b/.tofu/platforms/gke/workload/main.tf new file mode 100644 index 0000000..82b779f --- /dev/null +++ b/.tofu/platforms/gke/workload/main.tf @@ -0,0 +1,194 @@ +# ============================================================================= +# GCP Workload Cluster +# ============================================================================= +# A lean GKE cluster for running application workloads. No managed data +# services — those live on the platform cluster. ArgoCD (on the platform +# cluster) deploys apps to this cluster via the app-of-apps pattern. +# +# Platform components deployed by deploy-workload.sh: +# nginx-ingress, cert-manager, external-dns, external-secrets, alloy +# +# Usage: +# tofu init && tofu plan && tofu apply +# ./sync-tofu-outputs.sh --env gcp-workload +# ./deploy-workload.sh --env gcp-workload +# ============================================================================= + +variable "prefix" { + description = "Prefix for resource names (e.g., devhub-workload)" + type = string + default = "devhub-workload" +} + +variable "node_machine_type" { + description = "GKE node machine type" + type = string + default = "e2-standard-2" +} + +variable "node_count" { + description = "Number of nodes per zone" + type = number + default = 1 +} + +variable "kubernetes_version" { + description = "GKE Kubernetes version (null = STABLE release channel)" + type = string + default = null +} + +variable "deletion_protection" { + description = "Prevent cluster deletion" + type = bool + default = false +} + +variable "labels" { + description = "Labels applied to all resources" + type = map(string) + default = { + environment = "workload" + managed-by = "tofu" + } +} + +# ─── Required APIs ──────────────────────────────────────────────────── + +resource "google_project_service" "compute" { + project = var.project_id + service = "compute.googleapis.com" + disable_on_destroy = false +} + +resource "google_project_service" "container" { + project = var.project_id + service = "container.googleapis.com" + disable_on_destroy = false +} + +resource "google_project_service" "iam" { + project = var.project_id + service = "iam.googleapis.com" + disable_on_destroy = false +} + +resource "google_project_service" "dns" { + project = var.project_id + service = "dns.googleapis.com" + disable_on_destroy = false +} + +# ─── Networking ─────────────────────────────────────────────────────── + +resource "google_compute_network" "main" { + project = var.project_id + name = "${var.prefix}-vpc" + auto_create_subnetworks = false + + depends_on = [google_project_service.compute] +} + +resource "google_compute_subnetwork" "main" { + project = var.project_id + name = "${var.prefix}-subnet" + ip_cidr_range = "10.110.0.0/22" + region = var.region + network = google_compute_network.main.id + + secondary_ip_range { + range_name = "pods" + ip_cidr_range = "10.210.0.0/14" + } + + secondary_ip_range { + range_name = "services" + ip_cidr_range = "10.214.0.0/20" + } +} + +# ─── GKE Cluster ────────────────────────────────────────────────────── + +resource "google_container_cluster" "main" { + project = var.project_id + name = "${var.prefix}-gke" + location = var.region + + network = google_compute_network.main.id + subnetwork = google_compute_subnetwork.main.id + + ip_allocation_policy { + cluster_secondary_range_name = "pods" + services_secondary_range_name = "services" + } + + workload_identity_config { + workload_pool = "${var.project_id}.svc.id.goog" + } + + remove_default_node_pool = true + initial_node_count = 1 + + deletion_protection = var.deletion_protection + + dynamic "release_channel" { + for_each = var.kubernetes_version == null ? [1] : [] + content { + channel = "STABLE" + } + } + + resource_labels = var.labels + + depends_on = [google_project_service.container] +} + +resource "google_container_node_pool" "main" { + project = var.project_id + name = "${var.prefix}-nodes" + location = var.region + cluster = google_container_cluster.main.name + node_count = var.node_count + + node_config { + machine_type = var.node_machine_type + + workload_metadata_config { + mode = "GKE_METADATA" + } + + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform", + ] + + labels = merge(var.labels, { role = "worker" }) + } + + management { + auto_repair = true + auto_upgrade = true + } +} + +# ─── External-DNS Workload Identity ────────────────────────────────── +# Allows external-dns to manage Cloud DNS records for app ingresses. + +resource "google_service_account" "external_dns" { + project = var.project_id + account_id = "${var.prefix}-external-dns" + display_name = "External-DNS Service Account (Workload Identity)" + + depends_on = [google_project_service.iam] +} + +resource "google_project_iam_member" "external_dns_dns_admin" { + project = var.project_id + role = "roles/dns.admin" + member = "serviceAccount:${google_service_account.external_dns.email}" +} + +resource "google_service_account_iam_member" "external_dns_workload_identity" { + service_account_id = google_service_account.external_dns.name + role = "roles/iam.workloadIdentityUser" + member = "serviceAccount:${var.project_id}.svc.id.goog[external-dns/external-dns]" +} diff --git a/.tofu/platforms/gke/workload/outputs.tf b/.tofu/platforms/gke/workload/outputs.tf new file mode 100644 index 0000000..a825928 --- /dev/null +++ b/.tofu/platforms/gke/workload/outputs.tf @@ -0,0 +1,4 @@ +output "cluster_name" { value = google_container_cluster.main.name } +output "project_id" { value = var.project_id } +output "region" { value = var.region } +output "external_dns_gsa_email" { value = google_service_account.external_dns.email } diff --git a/.tofu/platforms/gke/workload/providers.tf b/.tofu/platforms/gke/workload/providers.tf new file mode 100644 index 0000000..1503c79 --- /dev/null +++ b/.tofu/platforms/gke/workload/providers.tf @@ -0,0 +1,26 @@ +terraform { + required_providers { + google = { + source = "hashicorp/google" + version = "~> 6.0" + } + } +} + +# Authentication: use Application Default Credentials (gcloud auth application-default login) +# or set GOOGLE_APPLICATION_CREDENTIALS to a service account key file. +provider "google" { + project = var.project_id + region = var.region +} + +variable "project_id" { + description = "GCP project ID for the workload environment" + type = string +} + +variable "region" { + description = "GCP region" + type = string + default = "europe-west4" +} diff --git a/.tofu/platforms/upc/dev/main.tf b/.tofu/platforms/upc/dev/main.tf new file mode 100644 index 0000000..704e82c --- /dev/null +++ b/.tofu/platforms/upc/dev/main.tf @@ -0,0 +1,22 @@ +module "cluster" { + source = "../modules/cluster" + + prefix = "devhub-dev" + zone = "no-svg1" + node_plan = "DEV-1xCPU-2GB" + node_count = 2 + network_cidr = "10.100.0.0/24" + + # Data services — smallest plans for dev + pg_plan = "1x1xCPU-2GB-25GB" + pg_version = "16" + valkey_plan = "1x1xCPU-2GB" + objstore_region = "europe-1" + + termination_protection = false + + tags = { + Environment = "dev" + ManagedBy = "tofu" + } +} diff --git a/.tofu/platforms/upc/dev/outputs.tf b/.tofu/platforms/upc/dev/outputs.tf new file mode 100644 index 0000000..cdf97bb --- /dev/null +++ b/.tofu/platforms/upc/dev/outputs.tf @@ -0,0 +1,67 @@ +# ─── Cluster ───────────────────────────────────────────────────────── + +output "cluster_id" { + value = module.cluster.cluster_id +} + +output "cluster_name" { + value = module.cluster.cluster_name +} + +output "zone" { + value = module.cluster.zone +} + +# ─── PostgreSQL ────────────────────────────────────────────────────── + +output "pg_host" { + value = module.cluster.pg_host +} + +output "pg_port" { + value = module.cluster.pg_port +} + +output "pg_keycloak_password" { + value = module.cluster.pg_keycloak_password + sensitive = true +} + +output "pg_gitlab_password" { + value = module.cluster.pg_gitlab_password + sensitive = true +} + +# ─── Valkey ────────────────────────────────────────────────────────── + +output "valkey_host" { + value = module.cluster.valkey_host +} + +output "valkey_port" { + value = module.cluster.valkey_port +} + +output "valkey_password" { + value = module.cluster.valkey_password + sensitive = true +} + +# ─── Object Storage ───────────────────────────────────────────────── + +output "s3_endpoint" { + value = module.cluster.s3_endpoint +} + +output "s3_region" { + value = module.cluster.s3_region +} + +output "s3_access_key" { + value = module.cluster.s3_access_key +} + +output "s3_secret_key" { + value = module.cluster.s3_secret_key + sensitive = true +} diff --git a/.tofu/platforms/upc/dev/providers.tf b/.tofu/platforms/upc/dev/providers.tf new file mode 100644 index 0000000..2e2be87 --- /dev/null +++ b/.tofu/platforms/upc/dev/providers.tf @@ -0,0 +1,14 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + upcloud = { + source = "UpCloudLtd/upcloud" + version = "~> 5.0" + } + } +} + +provider "upcloud" { + # Set via environment variables: UPCLOUD_USERNAME, UPCLOUD_PASSWORD +} diff --git a/.tofu/platforms/upc/modules/cluster/main.tf b/.tofu/platforms/upc/modules/cluster/main.tf new file mode 100644 index 0000000..1c1b8de --- /dev/null +++ b/.tofu/platforms/upc/modules/cluster/main.tf @@ -0,0 +1,205 @@ +# Router for the private network +resource "upcloud_router" "kubernetes" { + name = "${var.prefix}-${var.cluster_name}-router" +} + +# Gateway for internet connectivity +resource "upcloud_gateway" "kubernetes" { + name = "${var.prefix}-${var.cluster_name}-gateway" + zone = var.zone + features = ["nat"] + router { + id = upcloud_router.kubernetes.id + } +} + +# Private network for the Kubernetes cluster +resource "upcloud_network" "kubernetes" { + name = "${var.prefix}-${var.cluster_name}-network" + zone = var.zone + router = upcloud_router.kubernetes.id + + ip_network { + address = var.network_cidr + dhcp = true + dhcp_default_route = true + family = "IPv4" + gateway = cidrhost(var.network_cidr, 1) + } + + depends_on = [upcloud_gateway.kubernetes] +} + +# Kubernetes cluster +resource "upcloud_kubernetes_cluster" "main" { + name = "${var.prefix}-${var.cluster_name}" + zone = var.zone + network = upcloud_network.kubernetes.id + control_plane_ip_filter = var.control_plane_ip_filter + + private_node_groups = true +} + +# Node group for worker nodes +resource "upcloud_kubernetes_node_group" "workers" { + cluster = upcloud_kubernetes_cluster.main.id + name = "${var.prefix}-${var.cluster_name}-workers" + node_count = var.node_count + plan = var.node_plan + anti_affinity = var.node_count > 1 + labels = { + prefix = var.prefix + cluster = var.cluster_name + role = "worker" + env = lookup(var.tags, "Environment", "dev") + } +} + +# ─── Managed PostgreSQL ────────────────────────────────────────────── + +resource "upcloud_managed_database_postgresql" "main" { + name = "${var.prefix}-postgresql" + plan = var.pg_plan + title = "${var.prefix} PostgreSQL" + zone = var.zone + + termination_protection = var.termination_protection + + network { + family = "IPv4" + name = "pg-private" + type = "private" + uuid = upcloud_network.kubernetes.id + } + + properties { + public_access = false + version = var.pg_version + } + + labels = var.tags +} + +resource "upcloud_managed_database_logical_database" "keycloak" { + service = upcloud_managed_database_postgresql.main.id + name = "keycloak" +} + +resource "upcloud_managed_database_logical_database" "gitlab" { + service = upcloud_managed_database_postgresql.main.id + name = "gitlabhq_production" +} + +resource "upcloud_managed_database_user" "keycloak" { + service = upcloud_managed_database_postgresql.main.id + username = "keycloak" +} + +resource "upcloud_managed_database_user" "gitlab" { + service = upcloud_managed_database_postgresql.main.id + username = "gitlab" +} + +# ─── Managed Valkey ────────────────────────────────────────────────── + +resource "upcloud_managed_database_valkey" "main" { + name = "${var.prefix}-valkey" + plan = var.valkey_plan + title = "${var.prefix} Valkey" + zone = var.zone + + termination_protection = var.termination_protection + + network { + family = "IPv4" + name = "valkey-private" + type = "private" + uuid = upcloud_network.kubernetes.id + } + + properties { + public_access = false + } + + labels = var.tags +} + +# ─── Managed Object Storage ───────────────────────────────────────── + +resource "upcloud_managed_object_storage" "main" { + name = "${var.prefix}-objsto" + region = var.objstore_region + configured_status = "started" + + network { + family = "IPv4" + name = "objsto-private" + type = "private" + uuid = upcloud_network.kubernetes.id + } + + labels = var.tags +} + +resource "upcloud_managed_object_storage_user" "gitlab" { + service_uuid = upcloud_managed_object_storage.main.id + username = "${var.prefix}-gitlab" +} + +resource "upcloud_managed_object_storage_user_access_key" "gitlab" { + service_uuid = upcloud_managed_object_storage.main.id + username = upcloud_managed_object_storage_user.gitlab.username + status = "Active" +} + +resource "upcloud_managed_object_storage_policy" "gitlab" { + service_uuid = upcloud_managed_object_storage.main.id + name = "gitlab-full-access" + description = "Full S3 access for GitLab" + document = urlencode(jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Action = ["s3:*"] + Resource = "*" + } + ] + })) +} + +resource "upcloud_managed_object_storage_user_policy" "gitlab" { + service_uuid = upcloud_managed_object_storage.main.id + username = upcloud_managed_object_storage_user.gitlab.username + name = upcloud_managed_object_storage_policy.gitlab.name +} + +resource "upcloud_managed_object_storage_bucket" "gitlab_artifacts" { + service_uuid = upcloud_managed_object_storage.main.id + name = "${var.prefix}-gitlab-artifacts" +} + +resource "upcloud_managed_object_storage_bucket" "gitlab_uploads" { + service_uuid = upcloud_managed_object_storage.main.id + name = "${var.prefix}-gitlab-uploads" +} + +resource "upcloud_managed_object_storage_bucket" "gitlab_packages" { + service_uuid = upcloud_managed_object_storage.main.id + name = "${var.prefix}-gitlab-packages" +} + +resource "upcloud_managed_object_storage_bucket" "gitlab_lfs" { + service_uuid = upcloud_managed_object_storage.main.id + name = "${var.prefix}-gitlab-lfs" +} + +resource "upcloud_managed_object_storage_bucket" "gitlab_registry" { + service_uuid = upcloud_managed_object_storage.main.id + name = "${var.prefix}-gitlab-registry" +} + +resource "upcloud_managed_object_storage_bucket" "gitlab_backups" { + service_uuid = upcloud_managed_object_storage.main.id + name = "${var.prefix}-gitlab-backups" +} diff --git a/.tofu/platforms/upc/modules/cluster/outputs.tf b/.tofu/platforms/upc/modules/cluster/outputs.tf new file mode 100644 index 0000000..0831972 --- /dev/null +++ b/.tofu/platforms/upc/modules/cluster/outputs.tf @@ -0,0 +1,99 @@ +# ─── Cluster ───────────────────────────────────────────────────────── + +output "cluster_id" { + description = "The ID of the Kubernetes cluster" + value = upcloud_kubernetes_cluster.main.id +} + +output "cluster_name" { + description = "The name of the Kubernetes cluster" + value = upcloud_kubernetes_cluster.main.name +} + +output "network_id" { + description = "The ID of the private network" + value = upcloud_network.kubernetes.id +} + +output "network_cidr" { + description = "The CIDR block of the private network" + value = var.network_cidr +} + +output "kubernetes_version" { + description = "The Kubernetes version of the cluster" + value = upcloud_kubernetes_cluster.main.version +} + +output "zone" { + description = "The zone where the cluster is deployed" + value = var.zone +} + +# ─── PostgreSQL ────────────────────────────────────────────────────── + +output "pg_host" { + description = "PostgreSQL private hostname" + value = upcloud_managed_database_postgresql.main.service_host +} + +output "pg_port" { + description = "PostgreSQL port" + value = upcloud_managed_database_postgresql.main.service_port +} + +output "pg_keycloak_password" { + description = "PostgreSQL password for keycloak user" + value = upcloud_managed_database_user.keycloak.password + sensitive = true +} + +output "pg_gitlab_password" { + description = "PostgreSQL password for gitlab user" + value = upcloud_managed_database_user.gitlab.password + sensitive = true +} + +# ─── Valkey ────────────────────────────────────────────────────────── + +output "valkey_host" { + description = "Valkey private hostname" + value = upcloud_managed_database_valkey.main.service_host +} + +output "valkey_port" { + description = "Valkey port" + value = upcloud_managed_database_valkey.main.service_port +} + +output "valkey_password" { + description = "Valkey default user password" + value = upcloud_managed_database_valkey.main.service_password + sensitive = true +} + +# ─── Object Storage ───────────────────────────────────────────────── + +output "s3_endpoint" { + description = "S3-compatible public endpoint" + value = [ + for ep in upcloud_managed_object_storage.main.endpoint : + "https://${ep.domain_name}" if ep.type == "public" + ][0] +} + +output "s3_region" { + description = "Object storage region" + value = var.objstore_region +} + +output "s3_access_key" { + description = "S3 access key ID" + value = upcloud_managed_object_storage_user_access_key.gitlab.access_key_id +} + +output "s3_secret_key" { + description = "S3 secret access key" + value = upcloud_managed_object_storage_user_access_key.gitlab.secret_access_key + sensitive = true +} diff --git a/.tofu/platforms/upc/modules/cluster/providers.tf b/.tofu/platforms/upc/modules/cluster/providers.tf new file mode 100644 index 0000000..f637d8b --- /dev/null +++ b/.tofu/platforms/upc/modules/cluster/providers.tf @@ -0,0 +1,8 @@ +terraform { + required_providers { + upcloud = { + source = "UpCloudLtd/upcloud" + version = "~> 5.0" + } + } +} diff --git a/.tofu/platforms/upc/modules/cluster/variables.tf b/.tofu/platforms/upc/modules/cluster/variables.tf new file mode 100644 index 0000000..afc9c94 --- /dev/null +++ b/.tofu/platforms/upc/modules/cluster/variables.tf @@ -0,0 +1,73 @@ +# ─── Cluster ───────────────────────────────────────────────────────── + +variable "prefix" { + description = "Prefix for resource names" + type = string +} + +variable "cluster_name" { + description = "Name of the Kubernetes cluster" + type = string + default = "main" +} + +variable "zone" { + description = "UpCloud zone" + type = string +} + +variable "node_plan" { + description = "UpCloud server plan for worker nodes" + type = string +} + +variable "node_count" { + description = "Number of worker nodes" + type = number +} + +variable "network_cidr" { + description = "CIDR block for the private network" + type = string + default = "10.100.0.0/24" +} + +variable "control_plane_ip_filter" { + description = "CIDRs allowed to access the K8s API" + type = list(string) + default = ["0.0.0.0/0"] +} + +variable "termination_protection" { + description = "Protect managed databases from accidental deletion" + type = bool + default = false +} + +variable "tags" { + description = "Labels to apply to resources" + type = map(string) +} + +# ─── Managed Data Services ─────────────────────────────────────────── + +variable "pg_plan" { + description = "UpCloud Managed PostgreSQL plan" + type = string +} + +variable "pg_version" { + description = "PostgreSQL major version" + type = string + default = "16" +} + +variable "valkey_plan" { + description = "UpCloud Managed Valkey plan" + type = string +} + +variable "objstore_region" { + description = "UpCloud Managed Object Storage region" + type = string +} diff --git a/.tofu/platforms/upc/prod/main.tf b/.tofu/platforms/upc/prod/main.tf new file mode 100644 index 0000000..fd9390a --- /dev/null +++ b/.tofu/platforms/upc/prod/main.tf @@ -0,0 +1,24 @@ +module "cluster" { + source = "../modules/cluster" + + prefix = "devhub" + zone = "de-fra1" + node_plan = "4xCPU-8GB" + node_count = 3 + network_cidr = "10.100.0.0/24" + + # Data services — production-grade plans + pg_plan = "2x2xCPU-4GB-100GB" + pg_version = "16" + valkey_plan = "1x1xCPU-2GB" + objstore_region = "europe-1" + + termination_protection = true + + control_plane_ip_filter = ["0.0.0.0/0"] # TODO: restrict to known CIDRs + + tags = { + Environment = "prod" + ManagedBy = "tofu" + } +} diff --git a/.tofu/platforms/upc/prod/outputs.tf b/.tofu/platforms/upc/prod/outputs.tf new file mode 100644 index 0000000..cdf97bb --- /dev/null +++ b/.tofu/platforms/upc/prod/outputs.tf @@ -0,0 +1,67 @@ +# ─── Cluster ───────────────────────────────────────────────────────── + +output "cluster_id" { + value = module.cluster.cluster_id +} + +output "cluster_name" { + value = module.cluster.cluster_name +} + +output "zone" { + value = module.cluster.zone +} + +# ─── PostgreSQL ────────────────────────────────────────────────────── + +output "pg_host" { + value = module.cluster.pg_host +} + +output "pg_port" { + value = module.cluster.pg_port +} + +output "pg_keycloak_password" { + value = module.cluster.pg_keycloak_password + sensitive = true +} + +output "pg_gitlab_password" { + value = module.cluster.pg_gitlab_password + sensitive = true +} + +# ─── Valkey ────────────────────────────────────────────────────────── + +output "valkey_host" { + value = module.cluster.valkey_host +} + +output "valkey_port" { + value = module.cluster.valkey_port +} + +output "valkey_password" { + value = module.cluster.valkey_password + sensitive = true +} + +# ─── Object Storage ───────────────────────────────────────────────── + +output "s3_endpoint" { + value = module.cluster.s3_endpoint +} + +output "s3_region" { + value = module.cluster.s3_region +} + +output "s3_access_key" { + value = module.cluster.s3_access_key +} + +output "s3_secret_key" { + value = module.cluster.s3_secret_key + sensitive = true +} diff --git a/.tofu/platforms/upc/prod/providers.tf b/.tofu/platforms/upc/prod/providers.tf new file mode 100644 index 0000000..2e2be87 --- /dev/null +++ b/.tofu/platforms/upc/prod/providers.tf @@ -0,0 +1,14 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + upcloud = { + source = "UpCloudLtd/upcloud" + version = "~> 5.0" + } + } +} + +provider "upcloud" { + # Set via environment variables: UPCLOUD_USERNAME, UPCLOUD_PASSWORD +} diff --git a/.tofu/platforms/upc/workload/main.tf b/.tofu/platforms/upc/workload/main.tf new file mode 100644 index 0000000..eaabef1 --- /dev/null +++ b/.tofu/platforms/upc/workload/main.tf @@ -0,0 +1,116 @@ +# ============================================================================= +# UpCloud Workload Cluster +# ============================================================================= +# A lean UCS cluster for running application workloads. No managed data +# services — those live on the platform cluster. ArgoCD (on the platform +# cluster) deploys apps to this cluster via the app-of-apps pattern. +# +# Platform components deployed by deploy-workload.sh: +# nginx-ingress, cert-manager, external-dns, external-secrets, alloy +# +# Usage: +# tofu init && tofu plan && tofu apply +# ./sync-tofu-outputs.sh --env upcloud-workload +# ./deploy-workload.sh --env upcloud-workload +# ============================================================================= + +variable "prefix" { + description = "Prefix for resource names" + type = string + default = "devhub-workload" +} + +variable "zone" { + description = "UpCloud zone" + type = string + default = "fi-hel1" +} + +variable "node_plan" { + description = "UpCloud server plan for worker nodes" + type = string + default = "2xCPU-4GB" +} + +variable "node_count" { + description = "Number of worker nodes" + type = number + default = 2 +} + +variable "network_cidr" { + description = "CIDR block for the private network" + type = string + default = "10.110.0.0/24" +} + +variable "control_plane_ip_filter" { + description = "CIDRs allowed to access the K8s API" + type = list(string) + default = ["0.0.0.0/0"] +} + +variable "tags" { + description = "Labels to apply to resources" + type = map(string) + default = { + Environment = "workload" + ManagedBy = "tofu" + } +} + +# ─── Networking ─────────────────────────────────────────────────────── + +resource "upcloud_router" "kubernetes" { + name = "${var.prefix}-workload-router" +} + +resource "upcloud_gateway" "kubernetes" { + name = "${var.prefix}-workload-gateway" + zone = var.zone + features = ["nat"] + router { + id = upcloud_router.kubernetes.id + } +} + +resource "upcloud_network" "kubernetes" { + name = "${var.prefix}-workload-network" + zone = var.zone + router = upcloud_router.kubernetes.id + + ip_network { + address = var.network_cidr + dhcp = true + dhcp_default_route = true + family = "IPv4" + gateway = cidrhost(var.network_cidr, 1) + } + + depends_on = [upcloud_gateway.kubernetes] +} + +# ─── Kubernetes Cluster ─────────────────────────────────────────────── + +resource "upcloud_kubernetes_cluster" "main" { + name = "${var.prefix}-workload" + zone = var.zone + network = upcloud_network.kubernetes.id + control_plane_ip_filter = var.control_plane_ip_filter + + private_node_groups = true +} + +resource "upcloud_kubernetes_node_group" "workers" { + cluster = upcloud_kubernetes_cluster.main.id + name = "${var.prefix}-workload-workers" + node_count = var.node_count + plan = var.node_plan + anti_affinity = var.node_count > 1 + labels = { + prefix = var.prefix + cluster = "workload" + role = "worker" + env = lookup(var.tags, "Environment", "workload") + } +} diff --git a/.tofu/platforms/upc/workload/outputs.tf b/.tofu/platforms/upc/workload/outputs.tf new file mode 100644 index 0000000..ba93f7e --- /dev/null +++ b/.tofu/platforms/upc/workload/outputs.tf @@ -0,0 +1,3 @@ +output "cluster_name" { value = upcloud_kubernetes_cluster.main.name } +output "cluster_id" { value = upcloud_kubernetes_cluster.main.id } +output "zone" { value = var.zone } diff --git a/.tofu/platforms/upc/workload/providers.tf b/.tofu/platforms/upc/workload/providers.tf new file mode 100644 index 0000000..2e2be87 --- /dev/null +++ b/.tofu/platforms/upc/workload/providers.tf @@ -0,0 +1,14 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + upcloud = { + source = "UpCloudLtd/upcloud" + version = "~> 5.0" + } + } +} + +provider "upcloud" { + # Set via environment variables: UPCLOUD_USERNAME, UPCLOUD_PASSWORD +} diff --git a/.tofu/scripts/get-kubeconfig.sh b/.tofu/scripts/get-kubeconfig.sh new file mode 100644 index 0000000..61d588d --- /dev/null +++ b/.tofu/scripts/get-kubeconfig.sh @@ -0,0 +1,66 @@ +#!/bin/bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TOFU_ROOT="$(dirname "$SCRIPT_DIR")" +PROJECT_ROOT="$(dirname "$TOFU_ROOT")" + +CLUSTER="${1:?Usage: $0 (e.g., aks-dev, eks-prod)}" +PLATFORM="${CLUSTER%%-*}" +ENV="${CLUSTER#*-}" + +KUBECONFIG_FILE="$PROJECT_ROOT/private/$CLUSTER/kubeconfig" + +if [[ -f "$KUBECONFIG_FILE" ]]; then + echo "Kubeconfig already exists: $KUBECONFIG_FILE" + echo "" + echo " export KUBECONFIG=$KUBECONFIG_FILE" +else + echo "No cached kubeconfig. Fetching from platform..." + + # Load platform credentials + ENV_FILE="$TOFU_ROOT/configs/$PLATFORM.env" + if [[ -f "$ENV_FILE" ]]; then + set -a; source "$ENV_FILE"; set +a + fi + + TOFU_DIR="$TOFU_ROOT/platforms/$PLATFORM/$ENV" + mkdir -p "$(dirname "$KUBECONFIG_FILE")" + + case "$PLATFORM" in + aks) + cd "$TOFU_DIR" + RG=$(tofu output -raw resource_group_name 2>/dev/null || echo "$CLUSTER-rg") + NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "$CLUSTER") + az aks get-credentials --resource-group "$RG" --name "$NAME" --file "$KUBECONFIG_FILE" --overwrite-existing + ;; + eks) + cd "$TOFU_DIR" + NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "$CLUSTER") + REGION=$(tofu output -raw aws_region 2>/dev/null || echo "${AWS_REGION:-eu-west-1}") + aws eks update-kubeconfig --name "$NAME" --region "$REGION" --kubeconfig "$KUBECONFIG_FILE" + ;; + gke) + cd "$TOFU_DIR" + NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "$CLUSTER") + REGION=$(tofu output -raw region 2>/dev/null || echo "${GCP_REGION:-europe-west4}") + PROJECT=$(tofu output -raw project_id 2>/dev/null || echo "${GCP_PROJECT_ID:-}") + gcloud container clusters get-credentials "$NAME" --region "$REGION" --project "$PROJECT" + cp ~/.kube/config "$KUBECONFIG_FILE" + ;; + upc) + cd "$TOFU_DIR" + CLUSTER_ID=$(tofu output -raw cluster_id 2>/dev/null || echo "${UPCLOUD_CLUSTER_ID:-}") + upctl kubernetes config "$CLUSTER_ID" > "$KUBECONFIG_FILE" + ;; + *) + echo "Error: unknown platform '$PLATFORM'" + exit 1 + ;; + esac + + chmod 600 "$KUBECONFIG_FILE" + echo "Kubeconfig saved: $KUBECONFIG_FILE" + echo "" + echo " export KUBECONFIG=$KUBECONFIG_FILE" +fi diff --git a/.tofu/scripts/setup-cluster.sh b/.tofu/scripts/setup-cluster.sh new file mode 100644 index 0000000..869dd02 --- /dev/null +++ b/.tofu/scripts/setup-cluster.sh @@ -0,0 +1,246 @@ +#!/bin/bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TOFU_ROOT="$(dirname "$SCRIPT_DIR")" +PROJECT_ROOT="$(dirname "$TOFU_ROOT")" + +# ─── Usage ──────────────────────────────────────────────────────────── +usage() { + cat < [options] + + Provision a Kubernetes cluster using OpenTofu. + Mirrors bootstrap.sh convention: cluster = - + + Clusters: aks-dev | aks-prod | eks-dev | eks-prod + gke-dev | gke-prod | upc-dev | upc-prod + -workload (for workload clusters) + + Options: + --plan Plan only, don't apply + --destroy Destroy the cluster (use teardown-cluster.sh instead) + --auto Skip confirmation prompts + -h, --help Show this help + + Examples: + $0 aks-dev + $0 eks-prod --plan + $0 upc-dev --auto + + Prerequisites: + - tofu, kubectl, helm installed + - Platform credentials in .tofu/configs/.env + - Cluster config in clusters/.yaml + + After provisioning, run: + ./bootstrap.sh +EOF + exit "${1:-0}" +} + +# ─── Parse arguments ────────────────────────────────────────────────── +CLUSTER="" +PLAN_ONLY=false +DESTROY=false +AUTO_APPROVE=false + +while [[ $# -gt 0 ]]; do + case "$1" in + --plan) PLAN_ONLY=true; shift ;; + --destroy) DESTROY=true; shift ;; + --auto) AUTO_APPROVE=true; shift ;; + -h|--help) usage 0 ;; + -*) echo "Unknown option: $1"; usage 1 ;; + *) + if [[ -z "$CLUSTER" ]]; then + CLUSTER="$1" + else + echo "Error: unexpected argument '$1'" + usage 1 + fi + shift + ;; + esac +done + +[[ -z "$CLUSTER" ]] && { echo "Error: argument required"; usage 1; } + +# ─── Map cluster → platform + env ──────────────────────────────────── +PLATFORM="${CLUSTER%%-*}" # aks-dev → aks +ENV="${CLUSTER#*-}" # aks-dev → dev + +case "$PLATFORM" in + aks|eks|gke|upc) ;; + *) echo "Error: unknown platform '$PLATFORM'. Expected: aks, eks, gke, upc"; exit 1 ;; +esac + +TOFU_DIR="$TOFU_ROOT/platforms/$PLATFORM/$ENV" +if [[ ! -d "$TOFU_DIR" ]]; then + echo "Error: tofu directory not found: $TOFU_DIR" + echo "Available environments for $PLATFORM:" + ls -1 "$TOFU_ROOT/platforms/$PLATFORM/" 2>/dev/null | grep -v modules || echo " (none)" + exit 1 +fi + +echo "=========================================" +echo " Kubernetes Cluster Setup" +echo "=========================================" +echo "" +echo " Cluster: $CLUSTER" +echo " Platform: $PLATFORM" +echo " Env: $ENV" +echo " Tofu dir: $TOFU_DIR" +echo "" + +# ─── Prerequisites ──────────────────────────────────────────────────── +echo "=== Checking Prerequisites ===" +command -v tofu >/dev/null 2>&1 || { echo "Error: tofu is not installed."; exit 1; } +command -v kubectl >/dev/null 2>&1 || { echo "Error: kubectl is not installed."; exit 1; } +command -v helm >/dev/null 2>&1 || { echo "Error: helm is not installed."; exit 1; } +echo " tofu, kubectl, helm: OK" + +# ─── Load platform credentials ──────────────────────────────────────── +ENV_FILE="$TOFU_ROOT/configs/$PLATFORM.env" +if [[ -f "$ENV_FILE" ]]; then + echo " Loading credentials from configs/$PLATFORM.env" + set -a + # shellcheck disable=SC1090 + source "$ENV_FILE" + set +a +else + echo " Warning: $ENV_FILE not found — using existing environment/CLI auth" + echo " Copy configs/$PLATFORM.env.example → configs/$PLATFORM.env to configure" +fi + +# ─── Load cluster config (if exists) ────────────────────────────────── +CLUSTER_CONFIG="$PROJECT_ROOT/clusters/$CLUSTER.yaml" +if [[ -f "$CLUSTER_CONFIG" ]]; then + echo " Loading cluster config from clusters/$CLUSTER.yaml" + if command -v yq >/dev/null 2>&1; then + eval "$(yq -r 'to_entries[] | "export CLUSTER_\(.key)=\"\(.value)\""' "$CLUSTER_CONFIG")" + echo " Cluster name: ${CLUSTER_clusterName:-$CLUSTER}" + else + echo " Warning: yq not installed — cluster config not loaded" + fi +else + echo " Warning: $CLUSTER_CONFIG not found — using defaults" +fi +echo "" + +# ─── Run OpenTofu ───────────────────────────────────────────────────── +cd "$TOFU_DIR" + +echo "=== Initializing OpenTofu ===" +tofu init + +echo "" +if $DESTROY; then + echo "=== Planning Destruction ===" + tofu plan -destroy -out=tfplan + + if ! $AUTO_APPROVE; then + echo "" + read -rp "DESTROY cluster $CLUSTER? This is irreversible. (yes/no) " REPLY + [[ "$REPLY" == "yes" ]] || { echo "Cancelled."; exit 1; } + fi + + echo "Destroying infrastructure..." + tofu apply tfplan + echo "" + echo "=== Cluster $CLUSTER Destroyed ===" + +elif $PLAN_ONLY; then + echo "=== Planning Infrastructure ===" + tofu plan + echo "" + echo "=== Plan complete (--plan mode, no changes applied) ===" + +else + echo "=== Planning Infrastructure ===" + tofu plan -out=tfplan + + if ! $AUTO_APPROVE; then + echo "" + read -rp "Apply this plan for $CLUSTER? (y/n) " -n 1 REPLY + echo + [[ "$REPLY" =~ ^[Yy]$ ]] || { echo "Cancelled."; exit 1; } + fi + + echo "Applying infrastructure..." + tofu apply tfplan + + # ─── Save kubeconfig ────────────────────────────────────────────── + KUBECONFIG_DIR="$PROJECT_ROOT/private/$CLUSTER" + mkdir -p "$KUBECONFIG_DIR" + KUBECONFIG_FILE="$KUBECONFIG_DIR/kubeconfig" + + echo "" + echo "=== Saving Kubeconfig ===" + + case "$PLATFORM" in + aks) + if tofu output -raw kubeconfig > "$KUBECONFIG_FILE" 2>/dev/null; then + echo " Saved from tofu output" + else + echo " Fetching from Azure CLI..." + RG=$(tofu output -raw resource_group_name 2>/dev/null || echo "${CLUSTER_clusterName:-$CLUSTER}-rg") + NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "${CLUSTER_clusterName:-$CLUSTER}") + az aks get-credentials --resource-group "$RG" --name "$NAME" --file "$KUBECONFIG_FILE" --overwrite-existing + fi + ;; + eks) + NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "${CLUSTER_clusterName:-$CLUSTER}") + REGION=$(tofu output -raw aws_region 2>/dev/null || echo "${AWS_REGION:-eu-west-1}") + aws eks update-kubeconfig --name "$NAME" --region "$REGION" --kubeconfig "$KUBECONFIG_FILE" + ;; + gke) + NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "${CLUSTER_clusterName:-$CLUSTER}") + REGION=$(tofu output -raw region 2>/dev/null || echo "${GCP_REGION:-europe-west4}") + PROJECT=$(tofu output -raw project_id 2>/dev/null || echo "${GCP_PROJECT_ID:-}") + gcloud container clusters get-credentials "$NAME" --region "$REGION" --project "$PROJECT" 2>/dev/null \ + && cp ~/.kube/config "$KUBECONFIG_FILE" \ + || echo " Warning: could not fetch kubeconfig via gcloud" + ;; + upc) + if tofu output -raw kubeconfig > "$KUBECONFIG_FILE" 2>/dev/null; then + echo " Saved from tofu output" + else + CLUSTER_ID=$(tofu output -raw cluster_id 2>/dev/null || echo "${UPCLOUD_CLUSTER_ID:-}") + if [[ -n "$CLUSTER_ID" ]]; then + upctl kubernetes config "$CLUSTER_ID" > "$KUBECONFIG_FILE" + else + echo " Warning: could not determine cluster ID for kubeconfig" + fi + fi + ;; + esac + + if [[ -f "$KUBECONFIG_FILE" ]]; then + chmod 600 "$KUBECONFIG_FILE" + echo " Kubeconfig: $KUBECONFIG_FILE" + fi + + # ─── Wait for nodes ────────────────────────────────────────────── + echo "" + echo "=== Waiting for Cluster Nodes ===" + export KUBECONFIG="$KUBECONFIG_FILE" + if kubectl wait --for=condition=Ready nodes --all --timeout=300s 2>/dev/null; then + echo " All nodes ready" + else + echo " Warning: nodes not ready within timeout — check cluster status" + fi + + # ─── Summary ───────────────────────────────────────────────────── + echo "" + echo "=========================================" + echo " Cluster $CLUSTER Provisioned" + echo "=========================================" + echo "" + echo " Kubeconfig: $KUBECONFIG_FILE" + echo "" + echo " Next steps:" + echo " export KUBECONFIG=$KUBECONFIG_FILE" + echo " ./bootstrap.sh $CLUSTER" + echo "" +fi diff --git a/.tofu/scripts/teardown-cluster.sh b/.tofu/scripts/teardown-cluster.sh new file mode 100644 index 0000000..5e0b90a --- /dev/null +++ b/.tofu/scripts/teardown-cluster.sh @@ -0,0 +1,7 @@ +#!/bin/bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Delegate to setup-cluster.sh with --destroy flag +exec "$SCRIPT_DIR/setup-cluster.sh" "$@" --destroy diff --git a/README.md b/README.md index 198cd80..4178d57 100644 --- a/README.md +++ b/README.md @@ -80,8 +80,23 @@ This repository contains the complete GitOps configuration for our Kubernetes cl ``` . -├── bootstrap.sh # Cluster initialization script -├── _app-of-apps.yaml # Root ArgoCD Application (App-of-Apps pattern) +├── bootstrap.sh # Cluster initialization (ArgoCD + GitOps) +├── _app-of-apps-{cluster}.yaml # Root ArgoCD Application (per cluster) +│ +├── .tofu/ # Infrastructure provisioning (OpenTofu) +│ ├── platforms/ # Per-platform IaC (one dir per cloud) +│ │ ├── aks/ # Azure AKS (modules/ + dev/ + prod/ + workload/) +│ │ ├── eks/ # AWS EKS +│ │ ├── gke/ # GCP GKE +│ │ └── upc/ # UpCloud +│ ├── configs/ # Platform credentials (git-ignored) +│ │ └── *.env.example # Template for each platform +│ └── scripts/ # Cluster lifecycle scripts +│ ├── setup-cluster.sh # Create cluster: ./setup-cluster.sh aks-dev +│ ├── teardown-cluster.sh +│ └── get-kubeconfig.sh +│ +├── clusters/ # Cluster metadata (domain, trustedIPs, etc.) │ ├── infra/ # Infrastructure ArgoCD Applications (Kustomize multi-cluster) │ ├── base/ # Base ArgoCD Application manifests (one dir per component) diff --git a/docs/GITOPS-ARCHITECTURE.md b/docs/GITOPS-ARCHITECTURE.md index 607f639..19f6dc4 100644 --- a/docs/GITOPS-ARCHITECTURE.md +++ b/docs/GITOPS-ARCHITECTURE.md @@ -115,9 +115,30 @@ This Kubernetes cluster uses a **GitOps approach** powered by **ArgoCD**, where ``` launchpad/ -├── bootstrap.sh # Cluster initialization script -├── _app-of-apps-upc-dev.yaml # Root ArgoCD Application (upc-dev cluster) -├── _app-of-apps-upc-prod.yaml # Root ArgoCD Application (upc-prod cluster) +├── bootstrap.sh # Cluster initialization (ArgoCD + GitOps) +├── _app-of-apps-{cluster}.yaml # Root ArgoCD Application (per cluster) +│ +├── .tofu/ # Infrastructure provisioning (OpenTofu) +│ ├── platforms/ # Per-platform IaC +│ │ ├── aks/ # Azure AKS +│ │ │ ├── modules/cluster/ # Reusable AKS module +│ │ │ ├── dev/ # tofu root for aks-dev +│ │ │ ├── prod/ # tofu root for aks-prod +│ │ │ └── workload/ # workload cluster (no data services) +│ │ ├── eks/ # AWS EKS (same structure) +│ │ ├── gke/ # GCP GKE +│ │ └── upc/ # UpCloud +│ ├── configs/ # Platform credentials (git-ignored) +│ │ └── {platform}.env.example # Template per platform +│ └── scripts/ +│ ├── setup-cluster.sh # ./setup-cluster.sh [--plan|--auto] +│ ├── teardown-cluster.sh # ./teardown-cluster.sh +│ └── get-kubeconfig.sh # ./get-kubeconfig.sh +│ +├── clusters/ # Cluster metadata YAML (domain, IPs, etc.) +│ ├── aks-dev.yaml +│ ├── upc-dev.yaml +│ └── ... │ ├── infra/ # Infrastructure ArgoCD Applications (Kustomize) │ ├── base/ # Base Application manifests (one dir per component) diff --git a/docs/REFERENCE.md b/docs/REFERENCE.md index 524a943..efc0935 100644 --- a/docs/REFERENCE.md +++ b/docs/REFERENCE.md @@ -72,9 +72,22 @@ Internet ``` launchpad/ -├── bootstrap.sh # Cluster initialization script -├── _app-of-apps-upc-dev.yaml # Root ArgoCD Application (upc-dev) -├── _app-of-apps-upc-prod.yaml # Root ArgoCD Application (upc-prod) +├── bootstrap.sh # Cluster initialization (ArgoCD + GitOps) +├── _app-of-apps-{cluster}.yaml # Root ArgoCD Application (per cluster) +│ +├── .tofu/ # Infrastructure provisioning (OpenTofu) +│ ├── platforms/ # Per-platform IaC +│ │ ├── aks/ # Azure: modules/cluster/, dev/, prod/, workload/ +│ │ ├── eks/ # AWS: same structure +│ │ ├── gke/ # GCP +│ │ └── upc/ # UpCloud +│ ├── configs/ # Platform credentials (git-ignored) +│ └── scripts/ # setup-cluster.sh, teardown-cluster.sh, get-kubeconfig.sh +│ +├── clusters/ # Cluster metadata YAML +│ ├── aks-dev.yaml +│ ├── upc-dev.yaml +│ └── ... │ ├── infra/ # Infrastructure applications (Kustomize) │ ├── base/ # One subdirectory per component