24 Commits

Author SHA1 Message Date
5879c84a05 Merge branch 'feature/multi-cloud' of https://git.forteapps.net/Forte/launchpad into feature/multi-cloud
All checks were successful
AI Code Review / ai-review (pull_request) Has been skipped
2026-04-24 10:48:08 +02:00
c7cbfc712e overlays 2026-04-24 10:48:03 +02:00
ddccdacd6d Merge branch 'main' into feature/multi-cloud
All checks were successful
AI Code Review / ai-review (pull_request) Has been skipped
2026-04-24 08:24:34 +00:00
a89f2f30ce details 2026-04-22 22:26:57 +02:00
9a7e03b794 Merge branch 'feature/cloud-agnostic' into feature/multi-cloud 2026-04-22 22:06:31 +02:00
f1dd61cece sync 2026-04-22 21:56:43 +02:00
acc9bb1a85 sync 2026-04-22 21:53:44 +02:00
c8c2dedea5 rename 2026-04-22 21:48:02 +02:00
a471f11740 repo url 2026-04-22 14:45:23 +02:00
92ddc22322 azure>aks 2026-04-22 14:42:02 +02:00
7d2fb8bc0c azure>aks 2026-04-22 14:41:42 +02:00
79f9c62012 azure>aks 2026-04-22 14:35:59 +02:00
dea54e469e repo url 2026-04-22 14:34:20 +02:00
333acdea26 multi-cloud overlays
All checks were successful
AI Code Review / ai-review (pull_request) Successful in 6s
2026-04-22 14:30:13 +02:00
03d526208b Merge branch 'main' into feature/cloud-agnostic
All checks were successful
AI Code Review / ai-review (pull_request) Successful in 7s
2026-04-22 12:08:08 +00:00
458f7b23ad Merge branch 'main' into feature/multi-cloud
All checks were successful
AI Code Review / ai-review (pull_request) Successful in 28s
2026-04-22 11:55:05 +00:00
41c8b85bf8 Merge branch 'main' into feature/multi-cloud
All checks were successful
AI Code Review / ai-review (pull_request) Successful in 26s
2026-04-22 11:52:22 +00:00
c3f723333b Merge branch 'feature/cloud-agnostic' of ssh://git.forteapps.net:2222/Forte/launchpad into feature/cloud-agnostic
All checks were successful
AI Code Review / ai-review (pull_request) Successful in 1m3s
2026-04-22 13:43:09 +02:00
4144b1c1ac token 2026-04-22 13:39:43 +02:00
16eadbe181 Merge remote-tracking branch 'origin/main' into feature/cloud-agnostic 2026-04-22 13:38:55 +02:00
4e6a84785a token
All checks were successful
AI Code Review / ai-review (pull_request) Successful in 28s
2026-04-22 13:37:32 +02:00
e0bdaab422 multi-cloud + mcp
Some checks failed
AI Code Review / ai-review (pull_request) Failing after 2s
2026-04-22 13:34:48 +02:00
230ea7ebeb Merge branch 'main' into feature/cloud-agnostic
Some checks failed
AI Code Review / ai-review (pull_request) Failing after 3s
2026-04-22 11:33:03 +00:00
cab0866e14 multi-cloud no mcp 2026-04-22 13:31:09 +02:00
166 changed files with 2456 additions and 3354 deletions

2
.gitattributes vendored
View File

@@ -1,2 +0,0 @@
# Force LF line endings for shell scripts
*.sh text eol=lf

8
.gitignore vendored
View File

@@ -16,11 +16,3 @@ devbox.d/
devbox.lock
.devbox/
bash.exe.stackdump
# OpenTofu
.tofu/configs/*.env
.tofu/scripts/*.config
.tofu/platforms/**/.terraform/
.tofu/platforms/**/terraform.tfstate*
.tofu/platforms/**/tfplan
.tofu/platforms/**/.terraform.lock.hcl

View File

@@ -1,9 +0,0 @@
# Azure AKS credentials — copy to aks.env and fill in values
# NEVER commit aks.env to git!
# Required
AZURE_TENANT_ID=your-azure-tenant-id
AZURE_SUBSCRIPTION_ID=your-azure-subscription-id
# Optional — defaults to cluster name if not set
ARM_RESOURCE_GROUP=

View File

@@ -1,10 +0,0 @@
# AWS EKS credentials — copy to eks.env and fill in values
# NEVER commit eks.env to git!
# Required — AWS CLI profile or access key
AWS_PROFILE=default
AWS_REGION=eu-west-1
# Optional — override with explicit keys instead of profile
# AWS_ACCESS_KEY_ID=
# AWS_SECRET_ACCESS_KEY=

View File

@@ -1,9 +0,0 @@
# GCP GKE credentials — copy to gke.env and fill in values
# NEVER commit gke.env to git!
# Required
GCP_PROJECT_ID=your-gcp-project-id
GCP_REGION=europe-west4
# Optional — path to service account JSON key (if not using gcloud auth)
# GOOGLE_APPLICATION_CREDENTIALS=/path/to/sa-key.json

View File

@@ -1,8 +0,0 @@
# UpCloud credentials — copy to upc.env and fill in values
# NEVER commit upc.env to git!
# Required
UPCLOUD_TOKEN=your-upcloud-api-token
# Optional — set after cluster creation for kubeconfig retrieval
UPCLOUD_CLUSTER_ID=

View File

@@ -1,18 +0,0 @@
module "cluster" {
source = "../modules/cluster"
prefix = "clst-dev"
location = "norwayeast"
resource_group_name = "clst-dev-rg"
# AKS — small dev nodes
aks_node_vm_size = "Standard_B2s"
aks_node_count = 2
enable_delete_lock = false
tags = {
Environment = "dev"
ManagedBy = "tofu"
}
}

View File

@@ -1,26 +0,0 @@
# ─── Cluster ─────────────────────────────────────────────────────────
output "cluster_name" {
value = module.cluster.cluster_name
}
output "resource_group_name" {
value = module.cluster.resource_group_name
}
output "kubernetes_version" {
value = module.cluster.kubernetes_version
}
output "location" {
value = module.cluster.location
}
output "oidc_issuer_url" {
value = module.cluster.oidc_issuer_url
}
output "kubeconfig" {
value = module.cluster.kubeconfig
sensitive = true
}

View File

@@ -1,17 +0,0 @@
terraform {
required_version = ">= 1.0"
required_providers {
azurerm = {
source = "hashicorp/azurerm"
version = "~> 4.0"
}
}
}
provider "azurerm" {
features {}
# Credentials via environment variables:
# ARM_SUBSCRIPTION_ID, ARM_TENANT_ID, ARM_CLIENT_ID, ARM_CLIENT_SECRET
# Or: az login (uses your Azure CLI session)
}

View File

@@ -1,72 +0,0 @@
# Current Azure/Entra ID context — provides tenant_id used in outputs
data "azurerm_client_config" "current" {}
# ─── Resource Group ───────────────────────────────────────────────────
resource "azurerm_resource_group" "main" {
name = var.resource_group_name
location = var.location
tags = var.tags
}
resource "azurerm_management_lock" "main" {
count = var.enable_delete_lock ? 1 : 0
name = "${var.prefix}-delete-lock"
scope = azurerm_resource_group.main.id
lock_level = "CanNotDelete"
notes = "Prevents accidental deletion of production resources"
}
# ─── Networking ───────────────────────────────────────────────────────
resource "azurerm_virtual_network" "main" {
name = "${var.prefix}-vnet"
resource_group_name = azurerm_resource_group.main.name
location = azurerm_resource_group.main.location
address_space = [var.vnet_address_space]
tags = var.tags
}
# AKS nodes subnet
resource "azurerm_subnet" "aks" {
name = "${var.prefix}-aks-subnet"
resource_group_name = azurerm_resource_group.main.name
virtual_network_name = azurerm_virtual_network.main.name
address_prefixes = [var.aks_subnet_cidr]
}
# ─── AKS Cluster ──────────────────────────────────────────────────────
resource "azurerm_kubernetes_cluster" "main" {
name = "${var.prefix}-aks"
resource_group_name = azurerm_resource_group.main.name
location = azurerm_resource_group.main.location
dns_prefix = replace(var.prefix, "-", "")
kubernetes_version = var.aks_kubernetes_version
tags = var.tags
default_node_pool {
name = "system"
node_count = var.aks_node_count
vm_size = var.aks_node_vm_size
vnet_subnet_id = azurerm_subnet.aks.id
node_labels = {
prefix = var.prefix
role = "worker"
env = lookup(var.tags, "Environment", "dev")
}
}
identity {
type = "SystemAssigned"
}
network_profile {
network_plugin = "azure"
network_policy = "azure"
}
# Enable Workload Identity for keyless Azure service access (MSI)
oidc_issuer_enabled = true
workload_identity_enabled = true
}

View File

@@ -1,32 +0,0 @@
# ─── Cluster ─────────────────────────────────────────────────────────
output "cluster_name" {
description = "AKS cluster name"
value = azurerm_kubernetes_cluster.main.name
}
output "resource_group_name" {
description = "Resource group name"
value = azurerm_resource_group.main.name
}
output "kubernetes_version" {
description = "Kubernetes version"
value = azurerm_kubernetes_cluster.main.kubernetes_version
}
output "location" {
description = "Azure region"
value = azurerm_resource_group.main.location
}
output "oidc_issuer_url" {
description = "AKS OIDC issuer URL (for workload identity federation)"
value = azurerm_kubernetes_cluster.main.oidc_issuer_url
}
output "kubeconfig" {
description = "Kubeconfig for the AKS cluster"
value = azurerm_kubernetes_cluster.main.kube_config_raw
sensitive = true
}

View File

@@ -1,18 +0,0 @@
terraform {
required_version = ">= 1.0"
required_providers {
azurerm = {
source = "hashicorp/azurerm"
version = "~> 4.0"
}
azuread = {
source = "hashicorp/azuread"
version = "~> 3.0"
}
random = {
source = "hashicorp/random"
version = "~> 3.0"
}
}
}

View File

@@ -1,56 +0,0 @@
# ─── Cluster ─────────────────────────────────────────────────────────
variable "prefix" {
description = "Prefix for resource names"
type = string
}
variable "location" {
description = "Azure region (e.g., norwayeast, westeurope, northeurope)"
type = string
}
variable "resource_group_name" {
description = "Name of the Azure Resource Group to create"
type = string
}
variable "vnet_address_space" {
description = "Address space for the virtual network"
type = string
default = "10.100.0.0/16"
}
variable "aks_subnet_cidr" {
description = "CIDR block for the AKS node subnet"
type = string
default = "10.100.0.0/22"
}
variable "aks_node_vm_size" {
description = "VM size for AKS worker nodes (e.g., Standard_B2s, Standard_D4s_v3)"
type = string
}
variable "aks_node_count" {
description = "Number of AKS worker nodes"
type = number
}
variable "aks_kubernetes_version" {
description = "Kubernetes version for AKS (null = latest stable)"
type = string
default = null
}
variable "enable_delete_lock" {
description = "Protect the resource group from accidental deletion"
type = bool
default = false
}
variable "tags" {
description = "Tags applied to all resources"
type = map(string)
default = {}
}

View File

@@ -1,18 +0,0 @@
module "cluster" {
source = "../modules/cluster"
prefix = "clst"
location = "westeurope"
resource_group_name = "clst-prod-rg"
# AKS — general-purpose nodes for production
aks_node_vm_size = "Standard_D4s_v3"
aks_node_count = 3
enable_delete_lock = true
tags = {
Environment = "prod"
ManagedBy = "tofu"
}
}

View File

@@ -1,26 +0,0 @@
# ─── Cluster ─────────────────────────────────────────────────────────
output "cluster_name" {
value = module.cluster.cluster_name
}
output "resource_group_name" {
value = module.cluster.resource_group_name
}
output "kubernetes_version" {
value = module.cluster.kubernetes_version
}
output "location" {
value = module.cluster.location
}
output "oidc_issuer_url" {
value = module.cluster.oidc_issuer_url
}
output "kubeconfig" {
value = module.cluster.kubeconfig
sensitive = true
}

View File

@@ -1,17 +0,0 @@
terraform {
required_version = ">= 1.0"
required_providers {
azurerm = {
source = "hashicorp/azurerm"
version = "~> 4.0"
}
}
}
provider "azurerm" {
features {}
# Credentials via environment variables:
# ARM_SUBSCRIPTION_ID, ARM_TENANT_ID, ARM_CLIENT_ID, ARM_CLIENT_SECRET
# Or: az login (uses your Azure CLI session)
}

View File

@@ -1,173 +0,0 @@
# =============================================================================
# Azure Workload Cluster
# =============================================================================
# A lean AKS cluster for running application workloads. No managed data
# services — those live on the platform cluster. ArgoCD (on the platform
# cluster) deploys apps to this cluster via the app-of-apps pattern.
#
# Platform components deployed by deploy-workload.sh:
# nginx-ingress, cert-manager, external-dns, external-secrets, alloy
#
# Usage:
# tofu init && tofu plan && tofu apply
# ./sync-tofu-outputs.sh --env azure-workload
# ./deploy-workload.sh --env azure-workload
# =============================================================================
variable "prefix" {
description = "Prefix for resource names (e.g., clst-workload)"
type = string
default = "clst-workload"
}
variable "location" {
description = "Azure region"
type = string
default = "norwayeast"
}
variable "resource_group_name" {
description = "Name of the Azure Resource Group to create"
type = string
default = "clst-workload-rg"
}
variable "vnet_address_space" {
description = "Address space for the virtual network"
type = string
default = "10.110.0.0/16"
}
variable "aks_subnet_cidr" {
description = "CIDR block for the AKS node subnet"
type = string
default = "10.110.0.0/22"
}
variable "aks_node_vm_size" {
description = "VM size for AKS worker nodes"
type = string
default = "Standard_B2s"
}
variable "aks_node_count" {
description = "Number of AKS worker nodes"
type = number
default = 2
}
variable "aks_kubernetes_version" {
description = "Kubernetes version for AKS (null = latest stable)"
type = string
default = null
}
variable "domain" {
description = "Public domain name — must have an existing Azure DNS zone"
type = string
}
variable "dns_zone_resource_group" {
description = "Resource group containing the Azure DNS zone (defaults to cluster RG)"
type = string
default = ""
}
variable "tags" {
description = "Tags applied to all resources"
type = map(string)
default = {
Environment = "workload"
ManagedBy = "tofu"
}
}
# ─── Resource Group ───────────────────────────────────────────────────
resource "azurerm_resource_group" "main" {
name = var.resource_group_name
location = var.location
tags = var.tags
}
# ─── Networking ───────────────────────────────────────────────────────
resource "azurerm_virtual_network" "main" {
name = "${var.prefix}-vnet"
resource_group_name = azurerm_resource_group.main.name
location = azurerm_resource_group.main.location
address_space = [var.vnet_address_space]
tags = var.tags
}
resource "azurerm_subnet" "aks" {
name = "${var.prefix}-aks-subnet"
resource_group_name = azurerm_resource_group.main.name
virtual_network_name = azurerm_virtual_network.main.name
address_prefixes = [var.aks_subnet_cidr]
}
# ─── AKS Cluster ──────────────────────────────────────────────────────
resource "azurerm_kubernetes_cluster" "main" {
name = "${var.prefix}-aks"
resource_group_name = azurerm_resource_group.main.name
location = azurerm_resource_group.main.location
dns_prefix = replace(var.prefix, "-", "")
kubernetes_version = var.aks_kubernetes_version
tags = var.tags
default_node_pool {
name = "system"
node_count = var.aks_node_count
vm_size = var.aks_node_vm_size
vnet_subnet_id = azurerm_subnet.aks.id
node_labels = {
prefix = var.prefix
role = "worker"
env = lookup(var.tags, "Environment", "workload")
}
}
identity {
type = "SystemAssigned"
}
network_profile {
network_plugin = "azure"
network_policy = "azure"
}
oidc_issuer_enabled = true
workload_identity_enabled = true
}
# ─── External-DNS Workload Identity ──────────────────────────────────
# Allows external-dns to manage Azure DNS records for app ingresses.
data "azurerm_dns_zone" "main" {
name = var.domain
resource_group_name = var.dns_zone_resource_group != "" ? var.dns_zone_resource_group : azurerm_resource_group.main.name
}
resource "azurerm_user_assigned_identity" "external_dns" {
name = "${var.prefix}-external-dns-identity"
resource_group_name = azurerm_resource_group.main.name
location = azurerm_resource_group.main.location
tags = var.tags
}
resource "azurerm_role_assignment" "external_dns_dns_contributor" {
scope = data.azurerm_dns_zone.main.id
role_definition_name = "DNS Zone Contributor"
principal_id = azurerm_user_assigned_identity.external_dns.principal_id
}
resource "azurerm_federated_identity_credential" "external_dns" {
name = "${var.prefix}-external-dns-fedcred"
resource_group_name = azurerm_resource_group.main.name
parent_id = azurerm_user_assigned_identity.external_dns.id
audience = ["api://AzureADTokenExchange"]
issuer = azurerm_kubernetes_cluster.main.oidc_issuer_url
subject = "system:serviceaccount:external-dns:external-dns"
}

View File

@@ -1,4 +0,0 @@
output "cluster_name" { value = azurerm_kubernetes_cluster.main.name }
output "resource_group_name" { value = azurerm_resource_group.main.name }
output "location" { value = azurerm_resource_group.main.location }
output "external_dns_identity_client_id" { value = azurerm_user_assigned_identity.external_dns.client_id }

View File

@@ -1,21 +0,0 @@
terraform {
required_version = ">= 1.0"
required_providers {
azurerm = {
source = "hashicorp/azurerm"
version = "~> 4.0"
}
random = {
source = "hashicorp/random"
version = "~> 3.0"
}
}
}
provider "azurerm" {
features {}
# Credentials via environment variables:
# ARM_SUBSCRIPTION_ID, ARM_TENANT_ID, ARM_CLIENT_ID, ARM_CLIENT_SECRET
# Or: az login (uses your Azure CLI session)
}

View File

@@ -1,21 +0,0 @@
module "cluster" {
source = "../modules/cluster"
region = var.region
prefix = "clst-dev"
# VPC
availability_zones = ["${var.region}a", "${var.region}b"]
# EKS — small dev nodes
node_instance_type = "t3.medium"
node_count = 2
node_min_count = 1
node_max_count = 4
kubernetes_version = "1.30"
tags = {
Environment = "dev"
ManagedBy = "tofu"
}
}

View File

@@ -1,5 +0,0 @@
output "cluster_name" { value = module.cluster.cluster_name }
output "aws_region" { value = module.cluster.aws_region }
output "oidc_issuer_url" { value = module.cluster.oidc_issuer_url }
output "oidc_provider_arn" { value = module.cluster.oidc_provider_arn }
output "vpc_id" { value = module.cluster.vpc_id }

View File

@@ -1,24 +0,0 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
tls = {
source = "hashicorp/tls"
version = "~> 4.0"
}
}
}
# Authentication: set AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN
# or configure an AWS profile: export AWS_PROFILE=clst
provider "aws" {
region = var.region
}
variable "region" {
description = "AWS region for dev environment"
type = string
default = "eu-west-1"
}

View File

@@ -1,207 +0,0 @@
# ─── VPC ──────────────────────────────────────────────────────────────
resource "aws_vpc" "main" {
cidr_block = var.vpc_cidr
enable_dns_hostnames = true
enable_dns_support = true
tags = merge(var.tags, { Name = "${var.prefix}-vpc" })
}
resource "aws_internet_gateway" "main" {
vpc_id = aws_vpc.main.id
tags = merge(var.tags, { Name = "${var.prefix}-igw" })
}
# Public subnets (one per AZ) — for NAT gateways and load balancers
resource "aws_subnet" "public" {
count = length(var.availability_zones)
vpc_id = aws_vpc.main.id
cidr_block = cidrsubnet(var.vpc_cidr, 4, count.index)
availability_zone = var.availability_zones[count.index]
map_public_ip_on_launch = true
tags = merge(var.tags, {
Name = "${var.prefix}-public-${count.index + 1}"
"kubernetes.io/cluster/${var.prefix}-eks" = "shared"
"kubernetes.io/role/elb" = "1"
})
}
# Private subnets (one per AZ) — for EKS nodes
resource "aws_subnet" "private" {
count = length(var.availability_zones)
vpc_id = aws_vpc.main.id
cidr_block = cidrsubnet(var.vpc_cidr, 4, count.index + length(var.availability_zones))
availability_zone = var.availability_zones[count.index]
tags = merge(var.tags, {
Name = "${var.prefix}-private-${count.index + 1}"
"kubernetes.io/cluster/${var.prefix}-eks" = "shared"
"kubernetes.io/role/internal-elb" = "1"
})
}
# NAT Gateway (single, in first public subnet — use one per AZ for prod HA)
resource "aws_eip" "nat" {
domain = "vpc"
tags = merge(var.tags, { Name = "${var.prefix}-nat-eip" })
}
resource "aws_nat_gateway" "main" {
allocation_id = aws_eip.nat.id
subnet_id = aws_subnet.public[0].id
tags = merge(var.tags, { Name = "${var.prefix}-nat" })
depends_on = [aws_internet_gateway.main]
}
resource "aws_route_table" "public" {
vpc_id = aws_vpc.main.id
route {
cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.main.id
}
tags = merge(var.tags, { Name = "${var.prefix}-public-rt" })
}
resource "aws_route_table_association" "public" {
count = length(var.availability_zones)
subnet_id = aws_subnet.public[count.index].id
route_table_id = aws_route_table.public.id
}
resource "aws_route_table" "private" {
vpc_id = aws_vpc.main.id
route {
cidr_block = "0.0.0.0/0"
nat_gateway_id = aws_nat_gateway.main.id
}
tags = merge(var.tags, { Name = "${var.prefix}-private-rt" })
}
resource "aws_route_table_association" "private" {
count = length(var.availability_zones)
subnet_id = aws_subnet.private[count.index].id
route_table_id = aws_route_table.private.id
}
# ─── EKS Cluster ──────────────────────────────────────────────────────
resource "aws_iam_role" "eks_cluster" {
name_prefix = "${var.prefix}-eks-cluster-"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = { Service = "eks.amazonaws.com" }
}]
})
tags = var.tags
}
resource "aws_iam_role_policy_attachment" "eks_cluster_policy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy"
role = aws_iam_role.eks_cluster.name
}
resource "aws_eks_cluster" "main" {
name = "${var.prefix}-eks"
role_arn = aws_iam_role.eks_cluster.arn
version = var.kubernetes_version
vpc_config {
subnet_ids = concat(aws_subnet.private[*].id, aws_subnet.public[*].id)
endpoint_private_access = true
endpoint_public_access = true
}
# Enable OIDC issuer for IRSA (IAM Roles for Service Accounts)
access_config {
authentication_mode = "API_AND_CONFIG_MAP"
}
tags = var.tags
depends_on = [aws_iam_role_policy_attachment.eks_cluster_policy]
}
# OIDC provider — required for IRSA (IAM Roles for Service Accounts)
data "tls_certificate" "eks" {
url = aws_eks_cluster.main.identity[0].oidc[0].issuer
}
resource "aws_iam_openid_connect_provider" "eks" {
client_id_list = ["sts.amazonaws.com"]
thumbprint_list = [data.tls_certificate.eks.certificates[0].sha1_fingerprint]
url = aws_eks_cluster.main.identity[0].oidc[0].issuer
tags = var.tags
}
# EKS Node Group
resource "aws_iam_role" "eks_nodes" {
name_prefix = "${var.prefix}-eks-nodes-"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = { Service = "ec2.amazonaws.com" }
}]
})
tags = var.tags
}
resource "aws_iam_role_policy_attachment" "eks_worker_node_policy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy"
role = aws_iam_role.eks_nodes.name
}
resource "aws_iam_role_policy_attachment" "eks_cni_policy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy"
role = aws_iam_role.eks_nodes.name
}
resource "aws_iam_role_policy_attachment" "eks_ecr_readonly" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"
role = aws_iam_role.eks_nodes.name
}
resource "aws_eks_node_group" "main" {
cluster_name = aws_eks_cluster.main.name
node_group_name = "${var.prefix}-nodes"
node_role_arn = aws_iam_role.eks_nodes.arn
subnet_ids = aws_subnet.private[*].id
instance_types = [var.node_instance_type]
scaling_config {
desired_size = var.node_count
max_size = var.node_max_count
min_size = var.node_min_count
}
update_config {
max_unavailable = 1
}
tags = var.tags
depends_on = [
aws_iam_role_policy_attachment.eks_worker_node_policy,
aws_iam_role_policy_attachment.eks_cni_policy,
aws_iam_role_policy_attachment.eks_ecr_readonly,
]
}

View File

@@ -1,26 +0,0 @@
# ─── Cluster ─────────────────────────────────────────────────────────
output "cluster_name" {
description = "EKS cluster name"
value = aws_eks_cluster.main.name
}
output "aws_region" {
description = "AWS region"
value = var.region
}
output "oidc_issuer_url" {
description = "EKS OIDC issuer URL (for IRSA)"
value = aws_eks_cluster.main.identity[0].oidc[0].issuer
}
output "oidc_provider_arn" {
description = "IAM OIDC provider ARN (for IRSA trust policies)"
value = aws_iam_openid_connect_provider.eks.arn
}
output "vpc_id" {
description = "VPC ID"
value = aws_vpc.main.id
}

View File

@@ -1,12 +0,0 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
tls = {
source = "hashicorp/tls"
version = "~> 4.0"
}
}
}

View File

@@ -1,61 +0,0 @@
# ─── Region ──────────────────────────────────────────────────────────
variable "region" {
description = "AWS region (e.g., eu-west-1, us-east-1)"
type = string
}
variable "prefix" {
description = "Prefix for resource names (e.g., clst-dev)"
type = string
}
# ─── Networking ───────────────────────────────────────────────────────
variable "vpc_cidr" {
description = "VPC CIDR block"
type = string
default = "10.100.0.0/16"
}
variable "availability_zones" {
description = "List of AZs for subnets (23 recommended)"
type = list(string)
}
# ─── EKS Cluster ─────────────────────────────────────────────────────
variable "node_instance_type" {
description = "EKS node instance type (e.g., t3.medium, m5.xlarge)"
type = string
}
variable "node_count" {
description = "Desired number of EKS worker nodes"
type = number
}
variable "node_min_count" {
description = "Minimum number of EKS worker nodes"
type = number
default = 1
}
variable "node_max_count" {
description = "Maximum number of EKS worker nodes"
type = number
}
variable "kubernetes_version" {
description = "Kubernetes version for EKS (e.g., \"1.30\")"
type = string
default = "1.30"
}
# ─── Tags ─────────────────────────────────────────────────────────────
variable "tags" {
description = "Tags applied to all resources"
type = map(string)
default = {}
}

View File

@@ -1,21 +0,0 @@
module "cluster" {
source = "../modules/cluster"
region = var.region
prefix = "clst"
# VPC
availability_zones = ["${var.region}a", "${var.region}b", "${var.region}c"]
# EKS — general-purpose nodes for production
node_instance_type = "m5.xlarge"
node_count = 3
node_min_count = 3
node_max_count = 6
kubernetes_version = "1.30"
tags = {
Environment = "prod"
ManagedBy = "tofu"
}
}

View File

@@ -1,5 +0,0 @@
output "cluster_name" { value = module.cluster.cluster_name }
output "aws_region" { value = module.cluster.aws_region }
output "oidc_issuer_url" { value = module.cluster.oidc_issuer_url }
output "oidc_provider_arn" { value = module.cluster.oidc_provider_arn }
output "vpc_id" { value = module.cluster.vpc_id }

View File

@@ -1,22 +0,0 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
tls = {
source = "hashicorp/tls"
version = "~> 4.0"
}
}
}
provider "aws" {
region = var.region
}
variable "region" {
description = "AWS region for prod environment"
type = string
default = "eu-west-1"
}

View File

@@ -1,339 +0,0 @@
# =============================================================================
# AWS Workload Cluster
# =============================================================================
# A lean EKS cluster for running application workloads. No managed data
# services — those live on the platform cluster. ArgoCD (on the platform
# cluster) deploys apps to this cluster via the app-of-apps pattern.
#
# Platform components deployed by deploy-workload.sh:
# nginx-ingress, cert-manager, external-dns, external-secrets, alloy
#
# Usage:
# tofu init && tofu plan && tofu apply
# ./sync-tofu-outputs.sh --env aws-workload
# ./deploy-workload.sh --env aws-workload
# =============================================================================
variable "prefix" {
description = "Prefix for resource names (e.g., clst-workload)"
type = string
default = "clst-workload"
}
variable "availability_zones" {
description = "List of AZs for subnets"
type = list(string)
default = ["eu-west-1a", "eu-west-1b"]
}
variable "vpc_cidr" {
description = "VPC CIDR block"
type = string
default = "10.110.0.0/16"
}
variable "node_instance_type" {
description = "EKS node instance type"
type = string
default = "t3.medium"
}
variable "node_count" {
description = "Desired number of EKS worker nodes"
type = number
default = 2
}
variable "node_min_count" {
description = "Minimum number of EKS worker nodes"
type = number
default = 1
}
variable "node_max_count" {
description = "Maximum number of EKS worker nodes"
type = number
default = 4
}
variable "kubernetes_version" {
description = "Kubernetes version for EKS"
type = string
default = "1.30"
}
variable "domain" {
description = "Public domain name — must have an existing Route53 hosted zone"
type = string
}
variable "tags" {
description = "Tags applied to all resources"
type = map(string)
default = {
Environment = "workload"
ManagedBy = "tofu"
}
}
# ─── VPC ──────────────────────────────────────────────────────────────
resource "aws_vpc" "main" {
cidr_block = var.vpc_cidr
enable_dns_hostnames = true
enable_dns_support = true
tags = merge(var.tags, { Name = "${var.prefix}-vpc" })
}
resource "aws_internet_gateway" "main" {
vpc_id = aws_vpc.main.id
tags = merge(var.tags, { Name = "${var.prefix}-igw" })
}
resource "aws_subnet" "public" {
count = length(var.availability_zones)
vpc_id = aws_vpc.main.id
cidr_block = cidrsubnet(var.vpc_cidr, 4, count.index)
availability_zone = var.availability_zones[count.index]
map_public_ip_on_launch = true
tags = merge(var.tags, {
Name = "${var.prefix}-public-${count.index + 1}"
"kubernetes.io/cluster/${var.prefix}-eks" = "shared"
"kubernetes.io/role/elb" = "1"
})
}
resource "aws_subnet" "private" {
count = length(var.availability_zones)
vpc_id = aws_vpc.main.id
cidr_block = cidrsubnet(var.vpc_cidr, 4, count.index + length(var.availability_zones))
availability_zone = var.availability_zones[count.index]
tags = merge(var.tags, {
Name = "${var.prefix}-private-${count.index + 1}"
"kubernetes.io/cluster/${var.prefix}-eks" = "shared"
"kubernetes.io/role/internal-elb" = "1"
})
}
resource "aws_eip" "nat" {
domain = "vpc"
tags = merge(var.tags, { Name = "${var.prefix}-nat-eip" })
}
resource "aws_nat_gateway" "main" {
allocation_id = aws_eip.nat.id
subnet_id = aws_subnet.public[0].id
tags = merge(var.tags, { Name = "${var.prefix}-nat" })
depends_on = [aws_internet_gateway.main]
}
resource "aws_route_table" "public" {
vpc_id = aws_vpc.main.id
route {
cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.main.id
}
tags = merge(var.tags, { Name = "${var.prefix}-public-rt" })
}
resource "aws_route_table_association" "public" {
count = length(var.availability_zones)
subnet_id = aws_subnet.public[count.index].id
route_table_id = aws_route_table.public.id
}
resource "aws_route_table" "private" {
vpc_id = aws_vpc.main.id
route {
cidr_block = "0.0.0.0/0"
nat_gateway_id = aws_nat_gateway.main.id
}
tags = merge(var.tags, { Name = "${var.prefix}-private-rt" })
}
resource "aws_route_table_association" "private" {
count = length(var.availability_zones)
subnet_id = aws_subnet.private[count.index].id
route_table_id = aws_route_table.private.id
}
# ─── EKS Cluster ──────────────────────────────────────────────────────
resource "aws_iam_role" "eks_cluster" {
name_prefix = "${var.prefix}-eks-cluster-"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = { Service = "eks.amazonaws.com" }
}]
})
tags = var.tags
}
resource "aws_iam_role_policy_attachment" "eks_cluster_policy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy"
role = aws_iam_role.eks_cluster.name
}
resource "aws_eks_cluster" "main" {
name = "${var.prefix}-eks"
role_arn = aws_iam_role.eks_cluster.arn
version = var.kubernetes_version
vpc_config {
subnet_ids = concat(aws_subnet.private[*].id, aws_subnet.public[*].id)
endpoint_private_access = true
endpoint_public_access = true
}
access_config {
authentication_mode = "API_AND_CONFIG_MAP"
}
tags = var.tags
depends_on = [aws_iam_role_policy_attachment.eks_cluster_policy]
}
# OIDC provider — required for IRSA
data "tls_certificate" "eks" {
url = aws_eks_cluster.main.identity[0].oidc[0].issuer
}
resource "aws_iam_openid_connect_provider" "eks" {
client_id_list = ["sts.amazonaws.com"]
thumbprint_list = [data.tls_certificate.eks.certificates[0].sha1_fingerprint]
url = aws_eks_cluster.main.identity[0].oidc[0].issuer
tags = var.tags
}
resource "aws_iam_role" "eks_nodes" {
name_prefix = "${var.prefix}-eks-nodes-"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = { Service = "ec2.amazonaws.com" }
}]
})
tags = var.tags
}
resource "aws_iam_role_policy_attachment" "eks_worker_node_policy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy"
role = aws_iam_role.eks_nodes.name
}
resource "aws_iam_role_policy_attachment" "eks_cni_policy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy"
role = aws_iam_role.eks_nodes.name
}
resource "aws_iam_role_policy_attachment" "eks_ecr_readonly" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"
role = aws_iam_role.eks_nodes.name
}
resource "aws_eks_node_group" "main" {
cluster_name = aws_eks_cluster.main.name
node_group_name = "${var.prefix}-nodes"
node_role_arn = aws_iam_role.eks_nodes.arn
subnet_ids = aws_subnet.private[*].id
instance_types = [var.node_instance_type]
scaling_config {
desired_size = var.node_count
max_size = var.node_max_count
min_size = var.node_min_count
}
update_config {
max_unavailable = 1
}
tags = var.tags
depends_on = [
aws_iam_role_policy_attachment.eks_worker_node_policy,
aws_iam_role_policy_attachment.eks_cni_policy,
aws_iam_role_policy_attachment.eks_ecr_readonly,
]
}
# ─── External-DNS IRSA ───────────────────────────────────────────────
# Allows external-dns to manage Route53 records for app ingresses.
data "aws_route53_zone" "main" {
name = var.domain
private_zone = false
}
data "aws_iam_policy_document" "external_dns_assume_role" {
statement {
effect = "Allow"
principals {
type = "Federated"
identifiers = [aws_iam_openid_connect_provider.eks.arn]
}
actions = ["sts:AssumeRoleWithWebIdentity"]
condition {
test = "StringEquals"
variable = "${replace(aws_iam_openid_connect_provider.eks.url, "https://", "")}:sub"
values = ["system:serviceaccount:external-dns:external-dns"]
}
condition {
test = "StringEquals"
variable = "${replace(aws_iam_openid_connect_provider.eks.url, "https://", "")}:aud"
values = ["sts.amazonaws.com"]
}
}
}
resource "aws_iam_role" "external_dns_irsa" {
name_prefix = "${var.prefix}-external-dns-irsa-"
assume_role_policy = data.aws_iam_policy_document.external_dns_assume_role.json
tags = var.tags
}
data "aws_iam_policy_document" "external_dns_route53" {
statement {
effect = "Allow"
actions = ["route53:ChangeResourceRecordSets"]
resources = ["arn:aws:route53:::hostedzone/${data.aws_route53_zone.main.zone_id}"]
}
statement {
effect = "Allow"
actions = ["route53:ListHostedZones", "route53:ListResourceRecordSets", "route53:ListTagsForResource"]
resources = ["*"]
}
}
resource "aws_iam_role_policy" "external_dns_route53" {
name_prefix = "${var.prefix}-external-dns-route53-"
role = aws_iam_role.external_dns_irsa.id
policy = data.aws_iam_policy_document.external_dns_route53.json
}

View File

@@ -1,3 +0,0 @@
output "cluster_name" { value = aws_eks_cluster.main.name }
output "aws_region" { value = var.region }
output "external_dns_irsa_role_arn" { value = aws_iam_role.external_dns_irsa.arn }

View File

@@ -1,24 +0,0 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
tls = {
source = "hashicorp/tls"
version = "~> 4.0"
}
}
}
# Authentication: set AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN
# or configure an AWS profile: export AWS_PROFILE=clst
provider "aws" {
region = var.region
}
variable "region" {
description = "AWS region for the workload environment"
type = string
default = "eu-west-1"
}

View File

@@ -1,17 +0,0 @@
module "cluster" {
source = "../modules/cluster"
project_id = var.project_id
region = var.region
prefix = "clst-dev"
# GKE — small dev nodes
node_machine_type = "e2-standard-2"
node_count = 2
deletion_protection = false
labels = {
environment = "dev"
managed-by = "tofu"
}
}

View File

@@ -1,3 +0,0 @@
output "cluster_name" { value = module.cluster.cluster_name }
output "project_id" { value = module.cluster.project_id }
output "region" { value = module.cluster.region }

View File

@@ -1,26 +0,0 @@
terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "~> 6.0"
}
}
}
# Authentication: use Application Default Credentials (gcloud auth application-default login)
# or set GOOGLE_APPLICATION_CREDENTIALS to a service account key file.
provider "google" {
project = var.project_id
region = var.region
}
variable "project_id" {
description = "GCP project ID for the dev environment"
type = string
}
variable "region" {
description = "GCP region"
type = string
default = "europe-west4"
}

View File

@@ -1,115 +0,0 @@
# ─── Required APIs ────────────────────────────────────────────────────
resource "google_project_service" "compute" {
project = var.project_id
service = "compute.googleapis.com"
disable_on_destroy = false
}
resource "google_project_service" "container" {
project = var.project_id
service = "container.googleapis.com"
disable_on_destroy = false
}
# ─── Networking ───────────────────────────────────────────────────────
resource "google_compute_network" "main" {
project = var.project_id
name = "${var.prefix}-vpc"
auto_create_subnetworks = false
depends_on = [google_project_service.compute]
}
resource "google_compute_subnetwork" "main" {
project = var.project_id
name = "${var.prefix}-subnet"
ip_cidr_range = "10.100.0.0/22"
region = var.region
network = google_compute_network.main.id
# Secondary ranges required for GKE VPC-native cluster
secondary_ip_range {
range_name = "pods"
ip_cidr_range = "10.200.0.0/14" # /14 = ~262k pod IPs
}
secondary_ip_range {
range_name = "services"
ip_cidr_range = "10.204.0.0/20" # /20 = ~4k service IPs
}
}
# ─── GKE Cluster ──────────────────────────────────────────────────────
#
# Regional cluster (3 control-plane replicas) for HA.
# Workload Identity enabled — allows K8s service accounts to impersonate
# Google Service Accounts for keyless access to GCP services.
resource "google_container_cluster" "main" {
project = var.project_id
name = "${var.prefix}-gke"
location = var.region # regional cluster
network = google_compute_network.main.id
subnetwork = google_compute_subnetwork.main.id
# VPC-native cluster with alias IP ranges
ip_allocation_policy {
cluster_secondary_range_name = "pods"
services_secondary_range_name = "services"
}
# Workload Identity pool — enables OIDC token projection for pods
workload_identity_config {
workload_pool = "${var.project_id}.svc.id.goog"
}
# Remove default node pool — we manage our own below
remove_default_node_pool = true
initial_node_count = 1
deletion_protection = var.deletion_protection
dynamic "release_channel" {
for_each = var.kubernetes_version == null ? [1] : []
content {
channel = "STABLE"
}
}
resource_labels = var.labels
depends_on = [google_project_service.container]
}
resource "google_container_node_pool" "main" {
project = var.project_id
name = "${var.prefix}-nodes"
location = var.region
cluster = google_container_cluster.main.name
node_count = var.node_count
node_config {
machine_type = var.node_machine_type
# GKE_METADATA mode is required for Workload Identity
workload_metadata_config {
mode = "GKE_METADATA"
}
oauth_scopes = [
"https://www.googleapis.com/auth/cloud-platform",
]
labels = merge(var.labels, {
role = "worker"
})
}
management {
auto_repair = true
auto_upgrade = true
}
}

View File

@@ -1,16 +0,0 @@
# ─── Cluster ─────────────────────────────────────────────────────────
output "cluster_name" {
description = "GKE cluster name"
value = google_container_cluster.main.name
}
output "project_id" {
description = "GCP project ID"
value = var.project_id
}
output "region" {
description = "GCP region"
value = var.region
}

View File

@@ -1,8 +0,0 @@
terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "~> 6.0"
}
}
}

View File

@@ -1,48 +0,0 @@
# ─── Project / Region ────────────────────────────────────────────────
variable "project_id" {
description = "GCP project ID"
type = string
}
variable "region" {
description = "GCP region (e.g., europe-west4, europe-west1)"
type = string
}
variable "prefix" {
description = "Prefix for resource names (e.g., clst-dev)"
type = string
}
# ─── GKE Cluster ─────────────────────────────────────────────────────
variable "node_machine_type" {
description = "GKE node machine type (e.g., e2-standard-2, e2-standard-4)"
type = string
}
variable "node_count" {
description = "Number of nodes per zone (regional cluster spawns nodes in each zone)"
type = number
}
variable "kubernetes_version" {
description = "GKE Kubernetes version channel (null = STABLE release channel)"
type = string
default = null
}
variable "deletion_protection" {
description = "Prevent cluster deletion (set true for production)"
type = bool
default = false
}
# ─── Labels ──────────────────────────────────────────────────────────
variable "labels" {
description = "Labels applied to all resources"
type = map(string)
default = {}
}

View File

@@ -1,17 +0,0 @@
module "cluster" {
source = "../modules/cluster"
project_id = var.project_id
region = var.region
prefix = "clst"
# GKE — general-purpose nodes for production
node_machine_type = "e2-standard-4"
node_count = 3
deletion_protection = true
labels = {
environment = "prod"
managed-by = "tofu"
}
}

View File

@@ -1,3 +0,0 @@
output "cluster_name" { value = module.cluster.cluster_name }
output "project_id" { value = module.cluster.project_id }
output "region" { value = module.cluster.region }

View File

@@ -1,24 +0,0 @@
terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "~> 6.0"
}
}
}
provider "google" {
project = var.project_id
region = var.region
}
variable "project_id" {
description = "GCP project ID for the prod environment"
type = string
}
variable "region" {
description = "GCP region"
type = string
default = "europe-west1"
}

View File

@@ -1,194 +0,0 @@
# =============================================================================
# GCP Workload Cluster
# =============================================================================
# A lean GKE cluster for running application workloads. No managed data
# services — those live on the platform cluster. ArgoCD (on the platform
# cluster) deploys apps to this cluster via the app-of-apps pattern.
#
# Platform components deployed by deploy-workload.sh:
# nginx-ingress, cert-manager, external-dns, external-secrets, alloy
#
# Usage:
# tofu init && tofu plan && tofu apply
# ./sync-tofu-outputs.sh --env gcp-workload
# ./deploy-workload.sh --env gcp-workload
# =============================================================================
variable "prefix" {
description = "Prefix for resource names (e.g., clst-workload)"
type = string
default = "clst-workload"
}
variable "node_machine_type" {
description = "GKE node machine type"
type = string
default = "e2-standard-2"
}
variable "node_count" {
description = "Number of nodes per zone"
type = number
default = 1
}
variable "kubernetes_version" {
description = "GKE Kubernetes version (null = STABLE release channel)"
type = string
default = null
}
variable "deletion_protection" {
description = "Prevent cluster deletion"
type = bool
default = false
}
variable "labels" {
description = "Labels applied to all resources"
type = map(string)
default = {
environment = "workload"
managed-by = "tofu"
}
}
# ─── Required APIs ────────────────────────────────────────────────────
resource "google_project_service" "compute" {
project = var.project_id
service = "compute.googleapis.com"
disable_on_destroy = false
}
resource "google_project_service" "container" {
project = var.project_id
service = "container.googleapis.com"
disable_on_destroy = false
}
resource "google_project_service" "iam" {
project = var.project_id
service = "iam.googleapis.com"
disable_on_destroy = false
}
resource "google_project_service" "dns" {
project = var.project_id
service = "dns.googleapis.com"
disable_on_destroy = false
}
# ─── Networking ───────────────────────────────────────────────────────
resource "google_compute_network" "main" {
project = var.project_id
name = "${var.prefix}-vpc"
auto_create_subnetworks = false
depends_on = [google_project_service.compute]
}
resource "google_compute_subnetwork" "main" {
project = var.project_id
name = "${var.prefix}-subnet"
ip_cidr_range = "10.110.0.0/22"
region = var.region
network = google_compute_network.main.id
secondary_ip_range {
range_name = "pods"
ip_cidr_range = "10.210.0.0/14"
}
secondary_ip_range {
range_name = "services"
ip_cidr_range = "10.214.0.0/20"
}
}
# ─── GKE Cluster ──────────────────────────────────────────────────────
resource "google_container_cluster" "main" {
project = var.project_id
name = "${var.prefix}-gke"
location = var.region
network = google_compute_network.main.id
subnetwork = google_compute_subnetwork.main.id
ip_allocation_policy {
cluster_secondary_range_name = "pods"
services_secondary_range_name = "services"
}
workload_identity_config {
workload_pool = "${var.project_id}.svc.id.goog"
}
remove_default_node_pool = true
initial_node_count = 1
deletion_protection = var.deletion_protection
dynamic "release_channel" {
for_each = var.kubernetes_version == null ? [1] : []
content {
channel = "STABLE"
}
}
resource_labels = var.labels
depends_on = [google_project_service.container]
}
resource "google_container_node_pool" "main" {
project = var.project_id
name = "${var.prefix}-nodes"
location = var.region
cluster = google_container_cluster.main.name
node_count = var.node_count
node_config {
machine_type = var.node_machine_type
workload_metadata_config {
mode = "GKE_METADATA"
}
oauth_scopes = [
"https://www.googleapis.com/auth/cloud-platform",
]
labels = merge(var.labels, { role = "worker" })
}
management {
auto_repair = true
auto_upgrade = true
}
}
# ─── External-DNS Workload Identity ──────────────────────────────────
# Allows external-dns to manage Cloud DNS records for app ingresses.
resource "google_service_account" "external_dns" {
project = var.project_id
account_id = "${var.prefix}-external-dns"
display_name = "External-DNS Service Account (Workload Identity)"
depends_on = [google_project_service.iam]
}
resource "google_project_iam_member" "external_dns_dns_admin" {
project = var.project_id
role = "roles/dns.admin"
member = "serviceAccount:${google_service_account.external_dns.email}"
}
resource "google_service_account_iam_member" "external_dns_workload_identity" {
service_account_id = google_service_account.external_dns.name
role = "roles/iam.workloadIdentityUser"
member = "serviceAccount:${var.project_id}.svc.id.goog[external-dns/external-dns]"
}

View File

@@ -1,4 +0,0 @@
output "cluster_name" { value = google_container_cluster.main.name }
output "project_id" { value = var.project_id }
output "region" { value = var.region }
output "external_dns_gsa_email" { value = google_service_account.external_dns.email }

View File

@@ -1,26 +0,0 @@
terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "~> 6.0"
}
}
}
# Authentication: use Application Default Credentials (gcloud auth application-default login)
# or set GOOGLE_APPLICATION_CREDENTIALS to a service account key file.
provider "google" {
project = var.project_id
region = var.region
}
variable "project_id" {
description = "GCP project ID for the workload environment"
type = string
}
variable "region" {
description = "GCP region"
type = string
default = "europe-west4"
}

View File

@@ -1,14 +0,0 @@
module "cluster" {
source = "../modules/cluster"
prefix = "clst-dev"
zone = "no-svg1"
node_plan = "DEV-1xCPU-2GB"
node_count = 2
network_cidr = "10.100.0.0/24"
tags = {
Environment = "dev"
ManagedBy = "tofu"
}
}

View File

@@ -1,13 +0,0 @@
# ─── Cluster ─────────────────────────────────────────────────────────
output "cluster_id" {
value = module.cluster.cluster_id
}
output "cluster_name" {
value = module.cluster.cluster_name
}
output "zone" {
value = module.cluster.zone
}

View File

@@ -1,14 +0,0 @@
terraform {
required_version = ">= 1.0"
required_providers {
upcloud = {
source = "UpCloudLtd/upcloud"
version = "~> 5.0"
}
}
}
provider "upcloud" {
# Set via environment variables: UPCLOUD_USERNAME, UPCLOUD_PASSWORD
}

View File

@@ -1,56 +0,0 @@
# Router for the private network
resource "upcloud_router" "kubernetes" {
name = "${var.prefix}-${var.cluster_name}-router"
}
# Gateway for internet connectivity
resource "upcloud_gateway" "kubernetes" {
name = "${var.prefix}-${var.cluster_name}-gateway"
zone = var.zone
features = ["nat"]
router {
id = upcloud_router.kubernetes.id
}
}
# Private network for the Kubernetes cluster
resource "upcloud_network" "kubernetes" {
name = "${var.prefix}-${var.cluster_name}-network"
zone = var.zone
router = upcloud_router.kubernetes.id
ip_network {
address = var.network_cidr
dhcp = true
dhcp_default_route = true
family = "IPv4"
gateway = cidrhost(var.network_cidr, 1)
}
depends_on = [upcloud_gateway.kubernetes]
}
# Kubernetes cluster
resource "upcloud_kubernetes_cluster" "main" {
name = "${var.prefix}-${var.cluster_name}"
zone = var.zone
network = upcloud_network.kubernetes.id
control_plane_ip_filter = var.control_plane_ip_filter
private_node_groups = true
}
# Node group for worker nodes
resource "upcloud_kubernetes_node_group" "workers" {
cluster = upcloud_kubernetes_cluster.main.id
name = "${var.prefix}-${var.cluster_name}-workers"
node_count = var.node_count
plan = var.node_plan
anti_affinity = var.node_count > 1
labels = {
prefix = var.prefix
cluster = var.cluster_name
role = "worker"
env = lookup(var.tags, "Environment", "dev")
}
}

View File

@@ -1,31 +0,0 @@
# ─── Cluster ─────────────────────────────────────────────────────────
output "cluster_id" {
description = "The ID of the Kubernetes cluster"
value = upcloud_kubernetes_cluster.main.id
}
output "cluster_name" {
description = "The name of the Kubernetes cluster"
value = upcloud_kubernetes_cluster.main.name
}
output "network_id" {
description = "The ID of the private network"
value = upcloud_network.kubernetes.id
}
output "network_cidr" {
description = "The CIDR block of the private network"
value = var.network_cidr
}
output "kubernetes_version" {
description = "The Kubernetes version of the cluster"
value = upcloud_kubernetes_cluster.main.version
}
output "zone" {
description = "The zone where the cluster is deployed"
value = var.zone
}

View File

@@ -1,8 +0,0 @@
terraform {
required_providers {
upcloud = {
source = "UpCloudLtd/upcloud"
version = "~> 5.0"
}
}
}

View File

@@ -1,44 +0,0 @@
# ─── Cluster ─────────────────────────────────────────────────────────
variable "prefix" {
description = "Prefix for resource names"
type = string
}
variable "cluster_name" {
description = "Name of the Kubernetes cluster"
type = string
default = "main"
}
variable "zone" {
description = "UpCloud zone"
type = string
}
variable "node_plan" {
description = "UpCloud server plan for worker nodes"
type = string
}
variable "node_count" {
description = "Number of worker nodes"
type = number
}
variable "network_cidr" {
description = "CIDR block for the private network"
type = string
default = "10.100.0.0/24"
}
variable "control_plane_ip_filter" {
description = "CIDRs allowed to access the K8s API"
type = list(string)
default = ["0.0.0.0/0"]
}
variable "tags" {
description = "Labels to apply to resources"
type = map(string)
}

View File

@@ -1,16 +0,0 @@
module "cluster" {
source = "../modules/cluster"
prefix = "clst"
zone = "de-fra1"
node_plan = "4xCPU-8GB"
node_count = 3
network_cidr = "10.100.0.0/24"
control_plane_ip_filter = ["0.0.0.0/0"] # TODO: restrict to known CIDRs
tags = {
Environment = "prod"
ManagedBy = "tofu"
}
}

View File

@@ -1,13 +0,0 @@
# ─── Cluster ─────────────────────────────────────────────────────────
output "cluster_id" {
value = module.cluster.cluster_id
}
output "cluster_name" {
value = module.cluster.cluster_name
}
output "zone" {
value = module.cluster.zone
}

View File

@@ -1,14 +0,0 @@
terraform {
required_version = ">= 1.0"
required_providers {
upcloud = {
source = "UpCloudLtd/upcloud"
version = "~> 5.0"
}
}
}
provider "upcloud" {
# Set via environment variables: UPCLOUD_USERNAME, UPCLOUD_PASSWORD
}

View File

@@ -1,116 +0,0 @@
# =============================================================================
# UpCloud Workload Cluster
# =============================================================================
# A lean UCS cluster for running application workloads. No managed data
# services — those live on the platform cluster. ArgoCD (on the platform
# cluster) deploys apps to this cluster via the app-of-apps pattern.
#
# Platform components deployed by deploy-workload.sh:
# nginx-ingress, cert-manager, external-dns, external-secrets, alloy
#
# Usage:
# tofu init && tofu plan && tofu apply
# ./sync-tofu-outputs.sh --env upcloud-workload
# ./deploy-workload.sh --env upcloud-workload
# =============================================================================
variable "prefix" {
description = "Prefix for resource names"
type = string
default = "clst-workload"
}
variable "zone" {
description = "UpCloud zone"
type = string
default = "fi-hel1"
}
variable "node_plan" {
description = "UpCloud server plan for worker nodes"
type = string
default = "2xCPU-4GB"
}
variable "node_count" {
description = "Number of worker nodes"
type = number
default = 2
}
variable "network_cidr" {
description = "CIDR block for the private network"
type = string
default = "10.110.0.0/24"
}
variable "control_plane_ip_filter" {
description = "CIDRs allowed to access the K8s API"
type = list(string)
default = ["0.0.0.0/0"]
}
variable "tags" {
description = "Labels to apply to resources"
type = map(string)
default = {
Environment = "workload"
ManagedBy = "tofu"
}
}
# ─── Networking ───────────────────────────────────────────────────────
resource "upcloud_router" "kubernetes" {
name = "${var.prefix}-workload-router"
}
resource "upcloud_gateway" "kubernetes" {
name = "${var.prefix}-workload-gateway"
zone = var.zone
features = ["nat"]
router {
id = upcloud_router.kubernetes.id
}
}
resource "upcloud_network" "kubernetes" {
name = "${var.prefix}-workload-network"
zone = var.zone
router = upcloud_router.kubernetes.id
ip_network {
address = var.network_cidr
dhcp = true
dhcp_default_route = true
family = "IPv4"
gateway = cidrhost(var.network_cidr, 1)
}
depends_on = [upcloud_gateway.kubernetes]
}
# ─── Kubernetes Cluster ───────────────────────────────────────────────
resource "upcloud_kubernetes_cluster" "main" {
name = "${var.prefix}-workload"
zone = var.zone
network = upcloud_network.kubernetes.id
control_plane_ip_filter = var.control_plane_ip_filter
private_node_groups = true
}
resource "upcloud_kubernetes_node_group" "workers" {
cluster = upcloud_kubernetes_cluster.main.id
name = "${var.prefix}-workload-workers"
node_count = var.node_count
plan = var.node_plan
anti_affinity = var.node_count > 1
labels = {
prefix = var.prefix
cluster = "workload"
role = "worker"
env = lookup(var.tags, "Environment", "workload")
}
}

View File

@@ -1,3 +0,0 @@
output "cluster_name" { value = upcloud_kubernetes_cluster.main.name }
output "cluster_id" { value = upcloud_kubernetes_cluster.main.id }
output "zone" { value = var.zone }

View File

@@ -1,14 +0,0 @@
terraform {
required_version = ">= 1.0"
required_providers {
upcloud = {
source = "UpCloudLtd/upcloud"
version = "~> 5.0"
}
}
}
provider "upcloud" {
# Set via environment variables: UPCLOUD_USERNAME, UPCLOUD_PASSWORD
}

View File

@@ -1,66 +0,0 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
TOFU_ROOT="$(dirname "$SCRIPT_DIR")"
PROJECT_ROOT="$(dirname "$TOFU_ROOT")"
CLUSTER="${1:?Usage: $0 <cluster> (e.g., aks-dev, eks-prod)}"
PLATFORM="${CLUSTER%%-*}"
ENV="${CLUSTER#*-}"
KUBECONFIG_FILE="$PROJECT_ROOT/private/$CLUSTER/kubeconfig"
if [[ -f "$KUBECONFIG_FILE" ]]; then
echo "Kubeconfig already exists: $KUBECONFIG_FILE"
echo ""
echo " export KUBECONFIG=$KUBECONFIG_FILE"
else
echo "No cached kubeconfig. Fetching from platform..."
# Load platform credentials
ENV_FILE="$TOFU_ROOT/configs/$PLATFORM.env"
if [[ -f "$ENV_FILE" ]]; then
set -a; source "$ENV_FILE"; set +a
fi
TOFU_DIR="$TOFU_ROOT/platforms/$PLATFORM/$ENV"
mkdir -p "$(dirname "$KUBECONFIG_FILE")"
case "$PLATFORM" in
aks)
cd "$TOFU_DIR"
RG=$(tofu output -raw resource_group_name 2>/dev/null || echo "$CLUSTER-rg")
NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "$CLUSTER")
az aks get-credentials --resource-group "$RG" --name "$NAME" --file "$KUBECONFIG_FILE" --overwrite-existing
;;
eks)
cd "$TOFU_DIR"
NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "$CLUSTER")
REGION=$(tofu output -raw aws_region 2>/dev/null || echo "${AWS_REGION:-eu-west-1}")
aws eks update-kubeconfig --name "$NAME" --region "$REGION" --kubeconfig "$KUBECONFIG_FILE"
;;
gke)
cd "$TOFU_DIR"
NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "$CLUSTER")
REGION=$(tofu output -raw region 2>/dev/null || echo "${GCP_REGION:-europe-west4}")
PROJECT=$(tofu output -raw project_id 2>/dev/null || echo "${GCP_PROJECT_ID:-}")
gcloud container clusters get-credentials "$NAME" --region "$REGION" --project "$PROJECT"
cp ~/.kube/config "$KUBECONFIG_FILE"
;;
upc)
cd "$TOFU_DIR"
CLUSTER_ID=$(tofu output -raw cluster_id 2>/dev/null || echo "${UPCLOUD_CLUSTER_ID:-}")
upctl kubernetes config "$CLUSTER_ID" > "$KUBECONFIG_FILE"
;;
*)
echo "Error: unknown platform '$PLATFORM'"
exit 1
;;
esac
chmod 600 "$KUBECONFIG_FILE"
echo "Kubeconfig saved: $KUBECONFIG_FILE"
echo ""
echo " export KUBECONFIG=$KUBECONFIG_FILE"
fi

View File

@@ -1,246 +0,0 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
TOFU_ROOT="$(dirname "$SCRIPT_DIR")"
PROJECT_ROOT="$(dirname "$TOFU_ROOT")"
# ─── Usage ────────────────────────────────────────────────────────────
usage() {
cat <<EOF
Usage: $0 <cluster> [options]
Provision a Kubernetes cluster using OpenTofu.
Mirrors bootstrap.sh convention: cluster = <platform>-<env>
Clusters: aks-dev | aks-prod | eks-dev | eks-prod
gke-dev | gke-prod | upc-dev | upc-prod
<platform>-workload (for workload clusters)
Options:
--plan Plan only, don't apply
--destroy Destroy the cluster (use teardown-cluster.sh instead)
--auto Skip confirmation prompts
-h, --help Show this help
Examples:
$0 aks-dev
$0 eks-prod --plan
$0 upc-dev --auto
Prerequisites:
- tofu, kubectl, helm installed
- Platform credentials in .tofu/configs/<platform>.env
- Cluster config in clusters/<cluster>.yaml
After provisioning, run:
./bootstrap.sh <cluster>
EOF
exit "${1:-0}"
}
# ─── Parse arguments ──────────────────────────────────────────────────
CLUSTER=""
PLAN_ONLY=false
DESTROY=false
AUTO_APPROVE=false
while [[ $# -gt 0 ]]; do
case "$1" in
--plan) PLAN_ONLY=true; shift ;;
--destroy) DESTROY=true; shift ;;
--auto) AUTO_APPROVE=true; shift ;;
-h|--help) usage 0 ;;
-*) echo "Unknown option: $1"; usage 1 ;;
*)
if [[ -z "$CLUSTER" ]]; then
CLUSTER="$1"
else
echo "Error: unexpected argument '$1'"
usage 1
fi
shift
;;
esac
done
[[ -z "$CLUSTER" ]] && { echo "Error: <cluster> argument required"; usage 1; }
# ─── Map cluster → platform + env ────────────────────────────────────
PLATFORM="${CLUSTER%%-*}" # aks-dev → aks
ENV="${CLUSTER#*-}" # aks-dev → dev
case "$PLATFORM" in
aks|eks|gke|upc) ;;
*) echo "Error: unknown platform '$PLATFORM'. Expected: aks, eks, gke, upc"; exit 1 ;;
esac
TOFU_DIR="$TOFU_ROOT/platforms/$PLATFORM/$ENV"
if [[ ! -d "$TOFU_DIR" ]]; then
echo "Error: tofu directory not found: $TOFU_DIR"
echo "Available environments for $PLATFORM:"
ls -1 "$TOFU_ROOT/platforms/$PLATFORM/" 2>/dev/null | grep -v modules || echo " (none)"
exit 1
fi
echo "========================================="
echo " Kubernetes Cluster Setup"
echo "========================================="
echo ""
echo " Cluster: $CLUSTER"
echo " Platform: $PLATFORM"
echo " Env: $ENV"
echo " Tofu dir: $TOFU_DIR"
echo ""
# ─── Prerequisites ────────────────────────────────────────────────────
echo "=== Checking Prerequisites ==="
command -v tofu >/dev/null 2>&1 || { echo "Error: tofu is not installed."; exit 1; }
command -v kubectl >/dev/null 2>&1 || { echo "Error: kubectl is not installed."; exit 1; }
command -v helm >/dev/null 2>&1 || { echo "Error: helm is not installed."; exit 1; }
echo " tofu, kubectl, helm: OK"
# ─── Load platform credentials ────────────────────────────────────────
ENV_FILE="$TOFU_ROOT/configs/$PLATFORM.env"
if [[ -f "$ENV_FILE" ]]; then
echo " Loading credentials from configs/$PLATFORM.env"
set -a
# shellcheck disable=SC1090
source "$ENV_FILE"
set +a
else
echo " Warning: $ENV_FILE not found — using existing environment/CLI auth"
echo " Copy configs/$PLATFORM.env.example → configs/$PLATFORM.env to configure"
fi
# ─── Load cluster config (if exists) ──────────────────────────────────
CLUSTER_CONFIG="$PROJECT_ROOT/clusters/$CLUSTER.yaml"
if [[ -f "$CLUSTER_CONFIG" ]]; then
echo " Loading cluster config from clusters/$CLUSTER.yaml"
if command -v yq >/dev/null 2>&1; then
eval "$(yq -r 'to_entries[] | "export CLUSTER_\(.key)=\"\(.value)\""' "$CLUSTER_CONFIG")"
echo " Cluster name: ${CLUSTER_clusterName:-$CLUSTER}"
else
echo " Warning: yq not installed — cluster config not loaded"
fi
else
echo " Warning: $CLUSTER_CONFIG not found — using defaults"
fi
echo ""
# ─── Run OpenTofu ─────────────────────────────────────────────────────
cd "$TOFU_DIR"
echo "=== Initializing OpenTofu ==="
tofu init
echo ""
if $DESTROY; then
echo "=== Planning Destruction ==="
tofu plan -destroy -out=tfplan
if ! $AUTO_APPROVE; then
echo ""
read -rp "DESTROY cluster $CLUSTER? This is irreversible. (yes/no) " REPLY
[[ "$REPLY" == "yes" ]] || { echo "Cancelled."; exit 1; }
fi
echo "Destroying infrastructure..."
tofu apply tfplan
echo ""
echo "=== Cluster $CLUSTER Destroyed ==="
elif $PLAN_ONLY; then
echo "=== Planning Infrastructure ==="
tofu plan
echo ""
echo "=== Plan complete (--plan mode, no changes applied) ==="
else
echo "=== Planning Infrastructure ==="
tofu plan -out=tfplan
if ! $AUTO_APPROVE; then
echo ""
read -rp "Apply this plan for $CLUSTER? (y/n) " -n 1 REPLY
echo
[[ "$REPLY" =~ ^[Yy]$ ]] || { echo "Cancelled."; exit 1; }
fi
echo "Applying infrastructure..."
tofu apply tfplan
# ─── Save kubeconfig ──────────────────────────────────────────────
KUBECONFIG_DIR="$PROJECT_ROOT/private/$CLUSTER"
mkdir -p "$KUBECONFIG_DIR"
KUBECONFIG_FILE="$KUBECONFIG_DIR/kubeconfig"
echo ""
echo "=== Saving Kubeconfig ==="
case "$PLATFORM" in
aks)
if tofu output -raw kubeconfig > "$KUBECONFIG_FILE" 2>/dev/null; then
echo " Saved from tofu output"
else
echo " Fetching from Azure CLI..."
RG=$(tofu output -raw resource_group_name 2>/dev/null || echo "${CLUSTER_clusterName:-$CLUSTER}-rg")
NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "${CLUSTER_clusterName:-$CLUSTER}")
az aks get-credentials --resource-group "$RG" --name "$NAME" --file "$KUBECONFIG_FILE" --overwrite-existing
fi
;;
eks)
NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "${CLUSTER_clusterName:-$CLUSTER}")
REGION=$(tofu output -raw aws_region 2>/dev/null || echo "${AWS_REGION:-eu-west-1}")
aws eks update-kubeconfig --name "$NAME" --region "$REGION" --kubeconfig "$KUBECONFIG_FILE"
;;
gke)
NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "${CLUSTER_clusterName:-$CLUSTER}")
REGION=$(tofu output -raw region 2>/dev/null || echo "${GCP_REGION:-europe-west4}")
PROJECT=$(tofu output -raw project_id 2>/dev/null || echo "${GCP_PROJECT_ID:-}")
gcloud container clusters get-credentials "$NAME" --region "$REGION" --project "$PROJECT" 2>/dev/null \
&& cp ~/.kube/config "$KUBECONFIG_FILE" \
|| echo " Warning: could not fetch kubeconfig via gcloud"
;;
upc)
if tofu output -raw kubeconfig > "$KUBECONFIG_FILE" 2>/dev/null; then
echo " Saved from tofu output"
else
CLUSTER_ID=$(tofu output -raw cluster_id 2>/dev/null || echo "${UPCLOUD_CLUSTER_ID:-}")
if [[ -n "$CLUSTER_ID" ]]; then
upctl kubernetes config "$CLUSTER_ID" > "$KUBECONFIG_FILE"
else
echo " Warning: could not determine cluster ID for kubeconfig"
fi
fi
;;
esac
if [[ -f "$KUBECONFIG_FILE" ]]; then
chmod 600 "$KUBECONFIG_FILE"
echo " Kubeconfig: $KUBECONFIG_FILE"
fi
# ─── Wait for nodes ──────────────────────────────────────────────
echo ""
echo "=== Waiting for Cluster Nodes ==="
export KUBECONFIG="$KUBECONFIG_FILE"
if kubectl wait --for=condition=Ready nodes --all --timeout=300s 2>/dev/null; then
echo " All nodes ready"
else
echo " Warning: nodes not ready within timeout — check cluster status"
fi
# ─── Summary ─────────────────────────────────────────────────────
echo ""
echo "========================================="
echo " Cluster $CLUSTER Provisioned"
echo "========================================="
echo ""
echo " Kubeconfig: $KUBECONFIG_FILE"
echo ""
echo " Next steps:"
echo " export KUBECONFIG=$KUBECONFIG_FILE"
echo " ./bootstrap.sh $CLUSTER"
echo ""
fi

View File

@@ -1,7 +0,0 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Delegate to setup-cluster.sh with --destroy flag
exec "$SCRIPT_DIR/setup-cluster.sh" "$@" --destroy

View File

@@ -80,44 +80,28 @@ This repository contains the complete GitOps configuration for our Kubernetes cl
```
.
├── bootstrap.sh # Cluster initialization (ArgoCD + GitOps)
├── _app-of-apps-{cluster}.yaml # Root ArgoCD Application (per cluster)
├── .tofu/ # Infrastructure provisioning (OpenTofu)
│ ├── platforms/ # Per-platform IaC (one dir per cloud)
│ │ ├── aks/ # Azure AKS (modules/ + dev/ + prod/ + workload/)
│ │ ├── eks/ # AWS EKS
│ │ ├── gke/ # GCP GKE
│ │ └── upc/ # UpCloud
│ ├── configs/ # Platform credentials (git-ignored)
│ │ └── *.env.example # Template for each platform
│ └── scripts/ # Cluster lifecycle scripts
│ ├── setup-cluster.sh # Create cluster: ./setup-cluster.sh aks-dev
│ ├── teardown-cluster.sh
│ └── get-kubeconfig.sh
├── clusters/ # Cluster metadata (domain, trustedIPs, etc.)
├── bootstrap.sh # Cluster initialization script
├── _app-of-apps.yaml # Root ArgoCD Application (App-of-Apps pattern)
├── infra/ # Infrastructure ArgoCD Applications (Kustomize multi-cluster)
│ ├── base/ # Base ArgoCD Application manifests (one dir per component)
│ │ ├── kustomization.yaml # Aggregates all component subdirectories
│ │ ├── traefik-application/
│ │ ├── kustomization.yaml
│ │ │ └── traefik-application.yaml
│ │ ├── keycloak/
│ │ │ ├── kustomization.yaml
│ │ │ └── keycloak.yaml
│ │ ├── grafana/
│ │ ├── prometheus/
│ │ ── ... # Each component in its own subdirectory
│ │ └── secrets/
│ ├── base/ # Base ArgoCD Application manifests (EU defaults)
│ │ ├── kustomization.yaml
│ │ ├── traefik-application.yaml
│ │ ├── keycloak.yaml
│ │ ── grafana.yaml
│ │ ├── gitea.yaml
│ │ ├── gitea-actions.yaml
│ │ ├── tempo.yaml
│ │ ├── renovate.yaml
│ │ ├── ... # All other Application manifests
│ │ ── secrets.yaml
│ ├── overlays/ # Per-cluster overrides (Kustomize)
│ │ ├── upc-dev/ # UpCloud Dev — includes all base components
│ │ ├── upc-prod/ # UpCloud Prod — all components + patches
│ │ ├── aks-dev/ # Azure AKS Dev — selective components only
│ │ ├── aks-prod/ # Azure AKS Prod
│ │ ├── upc-dev/ # UpCloud Dev (uses base as-is)
│ │ ├── upc-prod/ # UpCloud Prod (patches value paths)
│ │ ├── eks-dev/ # AWS EKS Dev
│ │ ├── eks-prod/ # AWS EKS Prod
│ │ ├── aks-dev/ # Azure AKS Dev
│ │ ├── aks-prod/ # Azure AKS Prod
│ │ ├── gke-dev/ # GCP GKE Dev
│ │ └── gke-prod/ # GCP GKE Prod
│ ├── dashboards/ # Grafana dashboard ConfigMaps
@@ -132,18 +116,11 @@ This repository contains the complete GitOps configuration for our Kubernetes cl
│ ├── gke-dev/ # GCP GKE Dev
│ └── gke-prod/ # GCP GKE Prod
├── apps/ # Business Applications (Kustomize, same pattern as infra)
│ ├── base/ # One subdirectory per app
│ ├── kustomization.yaml
│ ├── musicman/
│ ├── mcp10x/
│ │ ├── dot-ai-stack/
│ │ ├── ts-mcp/
│ │ └── argo-mcp/
│ └── overlays/ # Per-cluster: cherry-pick or include all
│ ├── upc-dev/ # All apps
│ ├── upc-prod/ # All apps + patches
│ └── aks-dev/ # Selective apps only
├── apps/ # Business Applications
│ ├── mcp10x.yaml
│ ├── musicman.yaml
│ ├── dot-ai-stack.yaml
└── argo-mcp.yaml
├── cluster-resources/ # Cluster-wide Kubernetes resources
│ ├── letsencrypt-issuer.yaml
@@ -378,6 +355,7 @@ kubectl patch application myapp -n argocd \
| **Fluent-Bit** | Log shipping | `monitoring` | DaemonSet |
| **OpenCost** | Cost monitoring | `monitoring` | 1 |
| **Renovate** | Dependency updates | `renovate` | CronJob |
| **Trivy** | Vulnerability scanning | `trivy-system` | 1 |
**Full specs**: [Technical Reference - Infrastructure Components](docs/REFERENCE.md#infrastructure-components)
@@ -395,7 +373,7 @@ kubectl patch application myapp -n argocd \
## 📖 Key Concepts
### App-of-Apps Pattern
`_app-of-apps-{cluster}.yaml` is the root Application that manages all other Applications in `infra/`. Each component in `infra/base/` lives in its own subdirectory (e.g., `infra/base/grafana/`). Overlays can either include **all** components (via `../../base`) or **cherry-pick** specific ones (via `../../base/grafana`, `../../base/prometheus`, etc.). Per-cluster patches swap Helm value file paths. Supported clusters: `upc-dev`, `upc-prod`, `eks-dev`, `eks-prod`, `aks-dev`, `aks-prod`, `gke-dev`, `gke-prod`.
`_app-of-apps-{cluster}.yaml` is the root Application that manages all other Applications in `infra/`. Kustomize overlays in `infra/overlays/{cluster}/` render the base Applications with per-cluster patches (e.g., swapping value file paths). Supported clusters: `upc-dev`, `upc-prod`, `eks-dev`, `eks-prod`, `aks-dev`, `aks-prod`, `gke-dev`, `gke-prod`.
### Multi-Source Pattern
Applications reference both:

View File

@@ -1,6 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- argo-mcp.yaml
- argocdmcp-auth-oidc-sealed.yaml
- argocd-mcp-credentials.yaml

View File

@@ -1,5 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- dot-ai-stack.yaml
- dot-ai-secrets.yaml

View File

@@ -1,8 +1,8 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- dot-ai-stack
- mcp10x
- musicman
- ts-mcp
- argo-mcp
- dot-ai-stack.yaml
- mcp10x.yaml
- musicman.yaml
- ts-mcp.yaml
- argo-mcp.yaml

View File

@@ -1,5 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- mcp10x.yaml
- forte10x-app-credentials-sealed.yaml

View File

@@ -36,8 +36,13 @@ spec:
automated:
prune: true
selfHeal: true
allowEmpty: false
syncOptions:
- CreateNamespace=true
- Validate=true
- ServerSideApply=false
- Replace=false
retry:
limit: 5
backoff:

View File

@@ -1,5 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- musicman.yaml
- musicman-credentials.yaml

View File

@@ -1,5 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ts-mcp.yaml
- ts-mcp-secrets-sealed.yaml

View File

@@ -1,13 +0,0 @@
---
apiVersion: bitnami.com/v1alpha1
kind: SealedSecret
metadata:
name: ts-mcp-secrets
namespace: ts-mcp
spec:
encryptedData:
AZURE_CLIENT_SECRET: AgCWj525+NHkZ8XG97hEe4RS0SDC0QIGDXmEvzSlIqJQ9XVZEeKxVuAYmJ+w/HH7zBXD3qlZISeOPKn3FbMEeRukmYK0d5PsH26tRUMPoMzwWCuQkZIQ83uX9Pz/wMiqW8aZFIxpdEiUgVdanxHSFoDRPC1VlSEtV9B9yN2MgXBID5s0oje5BM9ttc4WVRe6+9pMeaOC6u+YUgcfY7xPLetZfC9nQO4zn4jYhoQXfAddwMzNODvQNGPzIv6PXDXJweTwdmtGaxM6eDdcCJI/30bEV9prA5m6UlgTZ/Qp+onU70KdkBA9gM9tMMVUR6j/2sbWzqMP/rVaFLeUH1PjHv15n4EieWyuDyYEfmZNDFXc7O9RIK6P0jCIE+t3myxK2ZQ7cfXprdOSj94au0qP6leat0UUVoc9CFJHHtrNxXYWl7IYVhwvIQCMSgO2qoAXkdW4wKVJAcbJadJjoL2pWxzjaD4GgnUaAxWBANqZI2lD8CED4VfUVMB0ZUYRS/zvy/eqIGlT8WbzwTYFi3YDZRvAUIknxaWEavIG4x52d0FqTmFYY06W53fGYfBrUjJI54GWYyBpKdZTf7b/AlAN0+kwkk6OqsUWwWDqxR7LVCcPhjSIKd/THp+Tbq9z5TiPIHxOO9V60u51f8IoQrEgQfNov7CEGQZ8B9HUGObjNc5MhujzBJasMhrUcd2Ddk6KWk07B7223p/gIEM+81ZWQYUcc29+U/j1dQyRNZy/TC56ywe5DDBJSoGp
template:
metadata:
name: ts-mcp-secrets
namespace: ts-mcp

View File

@@ -1,4 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../base/musicman

View File

@@ -1,5 +1,4 @@
#!/bin/zsh
# in case of $'\r': command not found error, run command below first
# sed -i 's/\r$//' ./bootstrap.sh
@@ -18,7 +17,7 @@ echo "Bootstrapping cluster: ${clusterName} (${CLUSTER})..."
Bootstrap()
{
ArgoCd
Gitea
# Gitea
}
@@ -28,9 +27,8 @@ Bootstrap()
Gitea()
{
echo "Installing secret..."
kubectl apply -f "secrets/"
kubectl apply -f "private/${CLUSTER}/gitea-repo-main.yaml"
kubectl apply -f "private/${CLUSTER}/main.key"
kubectl apply -f private/gitea-repo-main.yaml
kubectl apply -f private/main.key
}
############################################################
@@ -38,15 +36,10 @@ Gitea()
############################################################
ArgoCd()
{
# Pre-create ConfigMap for repo-server env (must exist before Helm upgrade)
kubectl create namespace argocd --dry-run=client -o yaml | kubectl apply -f -
kubectl apply -f cluster-resources/argocd-repo-server-config.yaml
# install argocd
echo "Installing ArgoCD..."
helm upgrade --install argocd argo-cd \
--repo https://argoproj.github.io/argo-helm \
--version "7.8.0" \
--namespace argocd --create-namespace \
--values infra/values/base/argocd-values.yaml \
--values "infra/values/${CLUSTER}/argocd-values.yaml" \
@@ -56,4 +49,4 @@ ArgoCd()
kubectl apply -f "_app-of-apps-${CLUSTER}.yaml" -n argocd
}
Bootstrap
# Bootstrap

View File

@@ -1,83 +0,0 @@
# CronJob: syncs OIDC client secret from registrar-managed
# argocd-oidc-credentials into argocd-secret (oidc.clientSecret key).
# Runs every 2 min. No-ops if source secret doesn't exist yet
# (safe for fresh deploys before Keycloak is up).
apiVersion: v1
kind: ServiceAccount
metadata:
name: argocd-oidc-sync
namespace: argocd
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: argocd-oidc-sync
namespace: argocd
rules:
- apiGroups: [""]
resources: ["secrets"]
resourceNames: ["argocd-oidc-credentials", "argocd-secret"]
verbs: ["get", "patch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: argocd-oidc-sync
namespace: argocd
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: argocd-oidc-sync
subjects:
- kind: ServiceAccount
name: argocd-oidc-sync
namespace: argocd
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: argocd-oidc-sync
namespace: argocd
spec:
schedule: "*/2 * * * *"
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 1
template:
spec:
serviceAccountName: argocd-oidc-sync
restartPolicy: Never
containers:
- name: sync
image: bitnami/kubectl:latest
command: ["/bin/sh", "-c"]
args:
- |
set -e
# Exit gracefully if source secret doesn't exist yet
if ! kubectl get secret argocd-oidc-credentials -n argocd >/dev/null 2>&1; then
echo "argocd-oidc-credentials not found — skipping (Keycloak not ready yet)"
exit 0
fi
# Read current OIDC client secret
NEW_SECRET=$(kubectl get secret argocd-oidc-credentials -n argocd \
-o jsonpath='{.data.client-secret}' | base64 -d)
# Read current value in argocd-secret (if any)
CURRENT=$(kubectl get secret argocd-secret -n argocd \
-o jsonpath='{.data.oidc\.clientSecret}' 2>/dev/null | base64 -d || echo "")
# Only patch if changed
if [ "$NEW_SECRET" = "$CURRENT" ]; then
echo "oidc.clientSecret already up to date"
exit 0
fi
kubectl patch secret argocd-secret -n argocd --type merge \
-p "{\"stringData\":{\"oidc.clientSecret\":\"${NEW_SECRET}\"}}"
echo "Patched argocd-secret with oidc.clientSecret"

View File

@@ -1,9 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: argocd-repo-server-config
namespace: argocd
data:
# Disable git submodule checkout - submodules (e.g. shared-prompts)
# are not needed for K8s manifest generation
ARGOCD_GIT_MODULES_ENABLED: "false"

View File

@@ -26,6 +26,7 @@ spec:
- monitoring
- secrets
- kyverno
- trivy-system
match:
any:
- resources:

View File

@@ -16,6 +16,7 @@ spec:
- resources:
namespaces:
- kube-system
- trivy-system
- monitoring
- argocd
- cert-manager

View File

@@ -1,12 +1,12 @@
# Cluster config reference — values must match the corresponding overlay files.
# Read by bootstrap.sh at install time; NOT auto-propagated to ArgoCD value files.
clusterName: k8s-launchpad # → infra/values/aks-dev/argocd-values.yaml (notifications.context.clusterName)
domain: example.com # → infra/values/base/gitea-values.yaml, renovate-values.yaml, keycloak-values.yaml (subdomains)
argocdDomain: argocd.example.com # → infra/values/aks-dev/argocd-values.yaml (global.domain)
grafanaDomain: grafana.example.com # → infra/values/aks-dev/grafana-values.yaml (ingress.hosts)
keycloakDomain: id.example.com # → infra/values/aks-dev/keycloak-values.yaml (ingress.hostname)
dotaiDomain: kubemcp.example.com # → infra/values/aks-dev/dot-ai-stack-values.yaml (dot-ai.ingress.host) — create if needed
dotaiUiDomain: kubemcpui.example.com # → infra/values/aks-dev/dot-ai-stack-values.yaml (dot-ai-ui.ingress.host) — create if needed
letsencryptEmail: admin@example.com # → cluster-resources/letsencrypt-issuer.yaml (spec.acme.email)
clusterName: dev-aks # → infra/values/aks-dev/argocd-values.yaml (notifications.context.clusterName)
domain: example.com # → infra/values/base/gitea-values.yaml, renovate-values.yaml, keycloak-values.yaml (subdomains)
argocdDomain: argocd.example.com # → infra/values/aks-dev/argocd-values.yaml (global.domain)
grafanaDomain: grafana.example.com # → infra/values/aks-dev/grafana-values.yaml (ingress.hosts)
keycloakDomain: id.example.com # → infra/values/aks-dev/keycloak-values.yaml (ingress.hostname)
dotaiDomain: kubemcp.example.com # → infra/values/aks-dev/dot-ai-stack-values.yaml (dot-ai.ingress.host) — create if needed
dotaiUiDomain: kubemcpui.example.com # → infra/values/aks-dev/dot-ai-stack-values.yaml (dot-ai-ui.ingress.host) — create if needed
letsencryptEmail: admin@example.com # → cluster-resources/letsencrypt-issuer.yaml (spec.acme.email)
trustedIPs: "10.0.0.0/8,168.63.129.16/32" # → infra/values/aks-dev/traefik-values.yaml (ports.*.trustedIPs) — VNet CIDR + Azure health probe
cloudProvider: azure # → determines overlay directory and cloud-specific LB/storage annotations
cloudProvider: azure # → determines overlay directory and cloud-specific LB/storage annotations

View File

@@ -1,32 +0,0 @@
{
"$schema": "https://raw.githubusercontent.com/jetify-com/devbox/0.16.0/.schema/devbox.schema.json",
"packages": [
"kubectl@1.33.2",
"kubernetes-helm@3.18.4",
"k9s@0.50.7",
"kubeseal@0.30.0",
"argocd@2.14.11",
"kubecm@0.33.1",
"kubectl-tree@0.4.3",
"kind@0.29.0",
"kustomize@5.7.0",
"kyverno@1.14.3",
"syft@1.29.0",
"grype@0.92.2",
"traefik@3.6.7",
"claude-code@latest",
"go@latest",
"dotnet-sdk@latest",
"opentofu@1.11.6"
],
"shell": {
"init_hook": [
"echo 'Welcome to devbox!' > /dev/null"
],
"scripts": {
"test": [
"echo \"Error: no test specified\" && exit 1"
]
}
}
}

View File

@@ -654,11 +654,21 @@ kubectl create secret generic myapp-credentials \
#### Step 2: Seal the Secret
Get the public certificate (one-time setup):
```bash
# Fetch public cert from cluster
kubeseal --fetch-cert \
--controller-name=sealed-secrets-controller \
--controller-namespace=kube-system \
> pub-cert.pem
```
Seal your secret:
```bash
kubeseal --format=yaml \
--namespace=myapp \
--cert=pub-cert.pem \
< private/myapp-credentials.yaml \
> secrets/myapp-credentials-sealed.yaml
```
@@ -701,7 +711,7 @@ kubectl create secret generic myapp-credentials \
# 2. Seal it
kubeseal --format=yaml \
--namespace=myapp \
--cert=pub-cert.pem \
< private/myapp-credentials.yaml \
> secrets/myapp-credentials-sealed.yaml

View File

@@ -115,51 +115,29 @@ This Kubernetes cluster uses a **GitOps approach** powered by **ArgoCD**, where
```
launchpad/
├── bootstrap.sh # Cluster initialization (ArgoCD + GitOps)
├── _app-of-apps-{cluster}.yaml # Root ArgoCD Application (per cluster)
├── .tofu/ # Infrastructure provisioning (OpenTofu)
│ ├── platforms/ # Per-platform IaC
│ │ ├── aks/ # Azure AKS
│ │ │ ├── modules/cluster/ # Reusable AKS module
│ │ │ ├── dev/ # tofu root for aks-dev
│ │ │ ├── prod/ # tofu root for aks-prod
│ │ │ └── workload/ # workload cluster (no data services)
│ │ ├── eks/ # AWS EKS (same structure)
│ │ ├── gke/ # GCP GKE
│ │ └── upc/ # UpCloud
│ ├── configs/ # Platform credentials (git-ignored)
│ │ └── {platform}.env.example # Template per platform
│ └── scripts/
│ ├── setup-cluster.sh # ./setup-cluster.sh <cluster> [--plan|--auto]
│ ├── teardown-cluster.sh # ./teardown-cluster.sh <cluster>
│ └── get-kubeconfig.sh # ./get-kubeconfig.sh <cluster>
├── clusters/ # Cluster metadata YAML (domain, IPs, etc.)
│ ├── aks-dev.yaml
│ ├── upc-dev.yaml
│ └── ...
├── bootstrap.sh # Cluster initialization script
├── _app-of-apps-upc-dev.yaml # Root ArgoCD Application (upc-dev cluster)
├── _app-of-apps-upc-prod.yaml # Root ArgoCD Application (upc-prod cluster)
├── infra/ # Infrastructure ArgoCD Applications (Kustomize)
│ ├── base/ # Base Application manifests (one dir per component)
│ │ ├── kustomization.yaml # Aggregates all component subdirectories
│ │ ├── traefik-application/
│ │ ├── kustomization.yaml
│ │ │ └── traefik-application.yaml
│ │ ├── keycloak/
│ │ │ ├── kustomization.yaml
│ │ │ └── keycloak.yaml
│ │ ├── grafana/
│ │ ├── prometheus/
│ │ ── ... # Each component in its own subdirectory
│ │ └── secrets/
│ ├── base/ # Base Application manifests (upc-dev defaults)
│ │ ├── kustomization.yaml
│ │ ├── traefik-application.yaml
│ │ ├── keycloak.yaml
│ │ ── grafana.yaml
│ │ ├── gitea.yaml
│ │ ├── gitea-actions.yaml
│ │ ├── tempo.yaml
│ │ ├── renovate.yaml
│ │ ├── ... # All other Application manifests
│ │ ── secrets.yaml
│ ├── overlays/ # Per-cluster Kustomize overrides
│ │ ├── upc-dev/ # UpCloud Dev — includes all (resources: ../../base)
│ │ ├── upc-prod/ # UpCloud Prod — all + patches
│ │ ├── aks-dev/ # Azure AKS Dev — selective components
│ │ ├── aks-prod/ # Azure AKS Prod
│ │ ├── upc-dev/ # UpCloud Dev (uses base as-is)
│ │ ├── upc-prod/ # UpCloud Prod (patches value paths)
│ │ ├── eks-dev/ # AWS EKS Dev
│ │ ├── eks-prod/ # AWS EKS Prod
│ │ ├── aks-dev/ # Azure AKS Dev
│ │ ├── aks-prod/ # Azure AKS Prod
│ │ ├── gke-dev/ # GCP GKE Dev
│ │ └── gke-prod/ # GCP GKE Prod
│ ├── dashboards/ # Grafana dashboard ConfigMaps
@@ -171,17 +149,13 @@ launchpad/
│ └── gcp-{dev,prod}/ # GCP: premium-rwo, L4 LB
├── apps/ # Business Application ArgoCD manifests (Kustomize)
│ ├── base/ # One subdirectory per app
│ ├── base/ # Base app manifests
│ │ ├── kustomization.yaml
│ │ ├── musicman/
│ │ ── mcp10x/
│ │ ├── dot-ai-stack/
│ │ ├── ts-mcp/
│ │ └── argo-mcp/
│ │ ├── dot-ai-stack.yaml
│ │ ── ...
│ └── overlays/
│ ├── upc-dev/ # All apps (resources: ../../base)
── upc-prod/ # All apps + patches
│ └── aks-dev/ # Selective apps only
│ ├── upc-dev/ # Uses base as-is
── upc-prod/ # Patches value paths
├── cluster-resources/ # Cluster-wide Kubernetes resources
│ ├── ...
@@ -197,8 +171,6 @@ launchpad/
**Key Points**:
- `_app-of-apps-upc-dev.yaml` and `_app-of-apps-upc-prod.yaml` are the per-cluster root Applications
- Each component in `base/` has its own subdirectory with a `kustomization.yaml`
- Overlays can include **all** components (`resources: [../../base]`) or **cherry-pick** specific ones (`resources: [../../base/grafana, ../../base/prometheus]`)
- Kustomize overlays in `infra/overlays/` render base Applications with per-cluster patches
- Helm values are split: `values/base/` (shared) + `values/upc-dev/` or `values/upc-prod/` (cluster-specific)
- `apps/` follows the same base/overlays pattern for business applications
@@ -381,30 +353,16 @@ spec:
### Multi-Cluster Pattern
Kustomize overlays enable deploying the same Applications across clusters with different configurations.
Each component in `infra/base/` and `apps/base/` lives in its own subdirectory. Overlays define **which components to include** and optionally **patch** them:
Kustomize overlays enable deploying the same Applications across clusters with different configurations:
```yaml
# Option 1: Include ALL components (full cluster)
# infra/overlays/upc-dev/kustomization.yaml
resources:
- ../../base # Pulls in every component subdirectory
# infra/base/ contains default (upc-dev) Applications
# Helm values are layered: base + cluster-specific
valueFiles:
- $values/infra/values/base/traefik-values.yaml # Shared config
- $values/infra/values/upc-dev/traefik-values.yaml # Cluster-specific
# Option 2: Cherry-pick specific components (lightweight cluster)
# infra/overlays/aks-dev/kustomization.yaml
resources:
- ../../base/traefik-application
- ../../base/grafana
- ../../base/prometheus
- ../../base/loki
# Only listed components are deployed — others are excluded
```
Per-cluster patches swap Helm value file paths:
```yaml
# infra/overlays/upc-prod/kustomization.yaml
# infra/overlays/upc-prod/kustomization.yaml patches the second valueFile
patches:
- target:
kind: Application

View File

@@ -72,45 +72,38 @@ Internet
```
launchpad/
├── bootstrap.sh # Cluster initialization (ArgoCD + GitOps)
├── _app-of-apps-{cluster}.yaml # Root ArgoCD Application (per cluster)
├── bootstrap.sh # Cluster initialization script
├── _app-of-apps-upc-dev.yaml # Root ArgoCD Application (upc-dev)
├── _app-of-apps-upc-prod.yaml # Root ArgoCD Application (upc-prod)
├── .tofu/ # Infrastructure provisioning (OpenTofu)
│ ├── platforms/ # Per-platform IaC
│ ├── aks/ # Azure: modules/cluster/, dev/, prod/, workload/
│ ├── eks/ # AWS: same structure
│ ├── gke/ # GCP
│ └── upc/ # UpCloud
│ ├── configs/ # Platform credentials (git-ignored)
── scripts/ # setup-cluster.sh, teardown-cluster.sh, get-kubeconfig.sh
├── clusters/ # Cluster metadata YAML
│ ├── aks-dev.yaml
│ ├── upc-dev.yaml
── ...
├── infra/ # Infrastructure applications (Kustomize)
│ ├── base/ # One subdirectory per component
│ ├── kustomization.yaml # Aggregates all component subdirectories
│ ├── traefik-application/
├── kustomization.yaml
│ │ │ └── traefik-application.yaml
│ │ ├── keycloak/
│ │ │ ├── kustomization.yaml
│ │ │ └── keycloak.yaml
│ │ ├── grafana/
│ │ ── prometheus/
│ ├── loki/
│ │ ── tempo/
│ │ ├── gitea/
│ │ ├── opencost/
│ │ ├── ... # Each component in own directory
│ │ └── secrets/
│ ├── overlays/ # Per-cluster: include all or cherry-pick
│ │ ├── upc-dev/ # resources: [../../base] (all components)
│ │ ├── upc-prod/ # resources: [../../base] + patches
│ │ ├── aks-dev/ # resources: [../../base/grafana, ...] (selective)
│ │ └── .../ # 8 clusters total
├── infra/ # Infrastructure applications
│ ├── cluster-resources-application.yaml
│ ├── enterprise-apps.yaml
│ ├── traefik-application.yaml
│ ├── cert-manager-application.yaml
├── kyverno.yaml
│ ├── kyverno-policies.yaml
── prometheus.yaml
├── grafana.yaml
│ ├── loki.yaml
│ ├── tempo.yaml
│ ├── fluent-bit.yaml
── trivy.yaml
├── gitea.yaml
│ ├── gitea-actions.yaml
│ ├── sealedsecrets.yaml
│ ├── secrets.yaml
│ ├── renovate.yaml
├── base/ # ArgoCD Application manifests (Kustomize base)
│ │ ├── gitea.yaml
│ │ ├── opencost.yaml
│ │ ├── traefik-application.yaml
│ │ ── keycloak.yaml
│ │ ├── grafana.yaml
│ │ ── ...
│ ├── overlays/
│ │ ── upc-prod/
│ │ └── kustomization.yaml # Patches upc-dev → upc-prod valueFile paths
│ └── values/
│ ├── base/ # Cloud-agnostic Helm values
│ │ ├── gitea-values.yaml
@@ -130,18 +123,11 @@ launchpad/
│ ├── gitea-values.yaml
│ └── opencost-values.yaml
├── apps/ # Business applications (Kustomize)
│ ├── base/ # One subdirectory per app
│ ├── kustomization.yaml
│ ├── musicman/
│ ├── mcp10x/
│ │ ├── dot-ai-stack/
│ │ ├── ts-mcp/
│ │ └── argo-mcp/
│ └── overlays/ # Per-cluster: include all or cherry-pick
│ ├── upc-dev/
│ ├── upc-prod/
│ └── aks-dev/ # Selective apps only
├── apps/ # Business applications
│ ├── mcp10x.yaml
│ ├── musicman.yaml
│ ├── dot-ai-stack.yaml
└── argo-mcp.yaml
├── cluster-resources/ # Cluster-level resources
│ ├── cert-manager-namespace.yaml
@@ -669,70 +655,8 @@ retry:
|---------|-------|---------|
| `application.resourceTrackingMethod` | `annotation` | Track resources via annotations |
| `timeout.reconciliation` | `60s` | Reconciliation interval |
| `admin.enabled` | `false` | Admin login disabled (SSO-only) |
| `url` | `https://argocd.forteapps.net` | External URL for ArgoCD UI |
**Git Submodule Disable**: Set via `configs.params` (NOT `repoServer.env` — that causes strategic merge conflicts with chart's `valueFrom` entries):
```yaml
configs:
params:
"reposerver.enable.git.submodule": "false"
```
This writes to `argocd-cmd-params-cm` ConfigMap, which the chart already reads via `valueFrom`. Submodules (e.g., `shared-prompts`) are not needed for K8s manifest generation.
**Break-Glass Admin Access**: Admin login is disabled (`admin.enabled: false`). The admin password remains in `argocd-secret`. To re-enable temporarily:
```bash
# Enable admin login
kubectl patch cm argocd-cm -n argocd -p '{"data":{"admin.enabled":"true"}}'
# Log in as admin, do what's needed, then disable again
kubectl patch cm argocd-cm -n argocd -p '{"data":{"admin.enabled":"false"}}'
```
ArgoCD picks up ConfigMap changes within the reconciliation timeout (60s). Note: ArgoCD will revert this on next sync — this is intentional (temporary access only).
**OIDC Authentication** (Keycloak):
```yaml
configs:
cm:
oidc.config: |
name: Forte SSO
issuer: https://id.forteapps.net/realms/forte
clientID: argocd
clientSecret: $oidc.clientSecret
requestedScopes: ["openid", "email", "profile"]
rbacConfig:
policy.csv: |
g, ArgoCD Admins, role:admin
g, ArgoCD Viewers, role:readonly
# Deny users not in any declared KC group
policy.default: ""
scopes: '[groups]'
```
**Access Control**: Only users in Keycloak groups `ArgoCD Admins` or `ArgoCD Viewers` can access ArgoCD. Users not in either group are denied (empty `policy.default`). Assign users to groups in Keycloak admin console.
- ArgoCD does NOT add `openid` implicitly — must include in `requestedScopes`
- Do NOT add `groups` as a scope — the KC groups mapper emits the claim regardless
- `$oidc.clientSecret` references the `oidc.clientSecret` key in `argocd-secret`
- OIDC secret is synced by CronJob `argocd-oidc-sync` (see `cluster-resources/argocd-oidc-secret-sync.yaml`)
- The CronJob bridges `argocd-oidc-credentials` (from KC registrar) → `argocd-secret` every 2 min
- Safe for fresh deploys: no-ops if source secret doesn't exist yet
**Ingress** (Traefik + TLS):
```yaml
server:
ingress:
enabled: true
ingressClassName: traefik
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
tls: true
extraArgs:
- --insecure
configs:
params:
"server.insecure": true
```
TLS terminates at Traefik; ArgoCD runs in insecure mode behind the proxy.
| `admin.enabled` | `true` | Enable admin account |
| `git.submodule.enabled` | `false` | Disable git submodule checkout — submodules are not needed for manifest generation |
---
@@ -853,15 +777,6 @@ kubeStateMetrics:
- Loki
- Tempo
**Ingress**: Exposed via Traefik at `https://grafana.forteapps.net` with cert-manager TLS.
**OIDC Authentication** (Keycloak):
- Uses `grafana.ini.auth.generic_oauth` with KC `grafana` client
- Secret `grafana-oidc-credentials` synced by KC registrar, loaded via `envFromSecrets`
- SSO-only mode: `auth.disable_login_form: true` + `auth.generic_oauth.auto_login: true`
- Role mapping via JMESPath on `resource_access.grafana.roles` claim (requires KC client role mapper)
- Roles: KC client roles `Admin`/`Editor` map to Grafana roles; default is `Viewer`
### Loki
**Chart**: `grafana/loki-stack`

View File

@@ -1,4 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- cert-manager-application.yaml

View File

@@ -1,4 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- cluster-resources-application.yaml

View File

@@ -1,4 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- databunker.yaml

View File

@@ -1,4 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- enterprise-apps.yaml

View File

@@ -1,4 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- fluent-bit.yaml

View File

@@ -1,4 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- gitea-actions.yaml

View File

@@ -1,8 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- gitea.yaml
- gitea-backup-s3-sealed.yaml
- gitea-credentials-sealed.yaml
- gitea-runner-token-sealed.yaml
- gitea-smtp-secret-sealed.yaml

View File

@@ -1,4 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- grafana-dashboards.yaml

Some files were not shown because too many files have changed in this diff Show More