6 Commits

Author SHA1 Message Date
a471f11740 repo url 2026-04-22 14:45:23 +02:00
333acdea26 multi-cloud overlays
All checks were successful
AI Code Review / ai-review (pull_request) Successful in 6s
2026-04-22 14:30:13 +02:00
458f7b23ad Merge branch 'main' into feature/multi-cloud
All checks were successful
AI Code Review / ai-review (pull_request) Successful in 28s
2026-04-22 11:55:05 +00:00
41c8b85bf8 Merge branch 'main' into feature/multi-cloud
All checks were successful
AI Code Review / ai-review (pull_request) Successful in 26s
2026-04-22 11:52:22 +00:00
4e6a84785a token
All checks were successful
AI Code Review / ai-review (pull_request) Successful in 28s
2026-04-22 13:37:32 +02:00
e0bdaab422 multi-cloud + mcp
Some checks failed
AI Code Review / ai-review (pull_request) Failing after 2s
2026-04-22 13:34:48 +02:00
205 changed files with 2616 additions and 5280 deletions

2
.gitattributes vendored
View File

@@ -1,2 +0,0 @@
# Force LF line endings for shell scripts
*.sh text eol=lf

8
.gitignore vendored
View File

@@ -16,11 +16,3 @@ devbox.d/
devbox.lock
.devbox/
bash.exe.stackdump
# OpenTofu
.tofu/configs/*.env
.tofu/scripts/*.config
.tofu/platforms/**/.terraform/
.tofu/platforms/**/terraform.tfstate*
.tofu/platforms/**/tfplan
.tofu/platforms/**/.terraform.lock.hcl

View File

@@ -1,9 +0,0 @@
# Azure AKS credentials — copy to aks.env and fill in values
# NEVER commit aks.env to git!
# Required
AZURE_TENANT_ID=your-azure-tenant-id
AZURE_SUBSCRIPTION_ID=your-azure-subscription-id
# Optional — defaults to cluster name if not set
ARM_RESOURCE_GROUP=

View File

@@ -1,10 +0,0 @@
# AWS EKS credentials — copy to eks.env and fill in values
# NEVER commit eks.env to git!
# Required — AWS CLI profile or access key
AWS_PROFILE=default
AWS_REGION=eu-west-1
# Optional — override with explicit keys instead of profile
# AWS_ACCESS_KEY_ID=
# AWS_SECRET_ACCESS_KEY=

View File

@@ -1,9 +0,0 @@
# GCP GKE credentials — copy to gke.env and fill in values
# NEVER commit gke.env to git!
# Required
GCP_PROJECT_ID=your-gcp-project-id
GCP_REGION=europe-west4
# Optional — path to service account JSON key (if not using gcloud auth)
# GOOGLE_APPLICATION_CREDENTIALS=/path/to/sa-key.json

View File

@@ -1,8 +0,0 @@
# UpCloud credentials — copy to upc.env and fill in values
# NEVER commit upc.env to git!
# Required
UPCLOUD_TOKEN=your-upcloud-api-token
# Optional — set after cluster creation for kubeconfig retrieval
UPCLOUD_CLUSTER_ID=

View File

@@ -1,18 +0,0 @@
module "cluster" {
source = "../modules/cluster"
prefix = "clst-dev"
location = "norwayeast"
resource_group_name = "clst-dev-rg"
# AKS — small dev nodes
aks_node_vm_size = "Standard_B2s"
aks_node_count = 2
enable_delete_lock = false
tags = {
Environment = "dev"
ManagedBy = "tofu"
}
}

View File

@@ -1,26 +0,0 @@
# ─── Cluster ─────────────────────────────────────────────────────────
output "cluster_name" {
value = module.cluster.cluster_name
}
output "resource_group_name" {
value = module.cluster.resource_group_name
}
output "kubernetes_version" {
value = module.cluster.kubernetes_version
}
output "location" {
value = module.cluster.location
}
output "oidc_issuer_url" {
value = module.cluster.oidc_issuer_url
}
output "kubeconfig" {
value = module.cluster.kubeconfig
sensitive = true
}

View File

@@ -1,17 +0,0 @@
terraform {
required_version = ">= 1.0"
required_providers {
azurerm = {
source = "hashicorp/azurerm"
version = "~> 4.0"
}
}
}
provider "azurerm" {
features {}
# Credentials via environment variables:
# ARM_SUBSCRIPTION_ID, ARM_TENANT_ID, ARM_CLIENT_ID, ARM_CLIENT_SECRET
# Or: az login (uses your Azure CLI session)
}

View File

@@ -1,72 +0,0 @@
# Current Azure/Entra ID context — provides tenant_id used in outputs
data "azurerm_client_config" "current" {}
# ─── Resource Group ───────────────────────────────────────────────────
resource "azurerm_resource_group" "main" {
name = var.resource_group_name
location = var.location
tags = var.tags
}
resource "azurerm_management_lock" "main" {
count = var.enable_delete_lock ? 1 : 0
name = "${var.prefix}-delete-lock"
scope = azurerm_resource_group.main.id
lock_level = "CanNotDelete"
notes = "Prevents accidental deletion of production resources"
}
# ─── Networking ───────────────────────────────────────────────────────
resource "azurerm_virtual_network" "main" {
name = "${var.prefix}-vnet"
resource_group_name = azurerm_resource_group.main.name
location = azurerm_resource_group.main.location
address_space = [var.vnet_address_space]
tags = var.tags
}
# AKS nodes subnet
resource "azurerm_subnet" "aks" {
name = "${var.prefix}-aks-subnet"
resource_group_name = azurerm_resource_group.main.name
virtual_network_name = azurerm_virtual_network.main.name
address_prefixes = [var.aks_subnet_cidr]
}
# ─── AKS Cluster ──────────────────────────────────────────────────────
resource "azurerm_kubernetes_cluster" "main" {
name = "${var.prefix}-aks"
resource_group_name = azurerm_resource_group.main.name
location = azurerm_resource_group.main.location
dns_prefix = replace(var.prefix, "-", "")
kubernetes_version = var.aks_kubernetes_version
tags = var.tags
default_node_pool {
name = "system"
node_count = var.aks_node_count
vm_size = var.aks_node_vm_size
vnet_subnet_id = azurerm_subnet.aks.id
node_labels = {
prefix = var.prefix
role = "worker"
env = lookup(var.tags, "Environment", "dev")
}
}
identity {
type = "SystemAssigned"
}
network_profile {
network_plugin = "azure"
network_policy = "azure"
}
# Enable Workload Identity for keyless Azure service access (MSI)
oidc_issuer_enabled = true
workload_identity_enabled = true
}

View File

@@ -1,32 +0,0 @@
# ─── Cluster ─────────────────────────────────────────────────────────
output "cluster_name" {
description = "AKS cluster name"
value = azurerm_kubernetes_cluster.main.name
}
output "resource_group_name" {
description = "Resource group name"
value = azurerm_resource_group.main.name
}
output "kubernetes_version" {
description = "Kubernetes version"
value = azurerm_kubernetes_cluster.main.kubernetes_version
}
output "location" {
description = "Azure region"
value = azurerm_resource_group.main.location
}
output "oidc_issuer_url" {
description = "AKS OIDC issuer URL (for workload identity federation)"
value = azurerm_kubernetes_cluster.main.oidc_issuer_url
}
output "kubeconfig" {
description = "Kubeconfig for the AKS cluster"
value = azurerm_kubernetes_cluster.main.kube_config_raw
sensitive = true
}

View File

@@ -1,18 +0,0 @@
terraform {
required_version = ">= 1.0"
required_providers {
azurerm = {
source = "hashicorp/azurerm"
version = "~> 4.0"
}
azuread = {
source = "hashicorp/azuread"
version = "~> 3.0"
}
random = {
source = "hashicorp/random"
version = "~> 3.0"
}
}
}

View File

@@ -1,56 +0,0 @@
# ─── Cluster ─────────────────────────────────────────────────────────
variable "prefix" {
description = "Prefix for resource names"
type = string
}
variable "location" {
description = "Azure region (e.g., norwayeast, westeurope, northeurope)"
type = string
}
variable "resource_group_name" {
description = "Name of the Azure Resource Group to create"
type = string
}
variable "vnet_address_space" {
description = "Address space for the virtual network"
type = string
default = "10.100.0.0/16"
}
variable "aks_subnet_cidr" {
description = "CIDR block for the AKS node subnet"
type = string
default = "10.100.0.0/22"
}
variable "aks_node_vm_size" {
description = "VM size for AKS worker nodes (e.g., Standard_B2s, Standard_D4s_v3)"
type = string
}
variable "aks_node_count" {
description = "Number of AKS worker nodes"
type = number
}
variable "aks_kubernetes_version" {
description = "Kubernetes version for AKS (null = latest stable)"
type = string
default = null
}
variable "enable_delete_lock" {
description = "Protect the resource group from accidental deletion"
type = bool
default = false
}
variable "tags" {
description = "Tags applied to all resources"
type = map(string)
default = {}
}

View File

@@ -1,18 +0,0 @@
module "cluster" {
source = "../modules/cluster"
prefix = "clst"
location = "westeurope"
resource_group_name = "clst-prod-rg"
# AKS — general-purpose nodes for production
aks_node_vm_size = "Standard_D4s_v3"
aks_node_count = 3
enable_delete_lock = true
tags = {
Environment = "prod"
ManagedBy = "tofu"
}
}

View File

@@ -1,26 +0,0 @@
# ─── Cluster ─────────────────────────────────────────────────────────
output "cluster_name" {
value = module.cluster.cluster_name
}
output "resource_group_name" {
value = module.cluster.resource_group_name
}
output "kubernetes_version" {
value = module.cluster.kubernetes_version
}
output "location" {
value = module.cluster.location
}
output "oidc_issuer_url" {
value = module.cluster.oidc_issuer_url
}
output "kubeconfig" {
value = module.cluster.kubeconfig
sensitive = true
}

View File

@@ -1,17 +0,0 @@
terraform {
required_version = ">= 1.0"
required_providers {
azurerm = {
source = "hashicorp/azurerm"
version = "~> 4.0"
}
}
}
provider "azurerm" {
features {}
# Credentials via environment variables:
# ARM_SUBSCRIPTION_ID, ARM_TENANT_ID, ARM_CLIENT_ID, ARM_CLIENT_SECRET
# Or: az login (uses your Azure CLI session)
}

View File

@@ -1,173 +0,0 @@
# =============================================================================
# Azure Workload Cluster
# =============================================================================
# A lean AKS cluster for running application workloads. No managed data
# services — those live on the platform cluster. ArgoCD (on the platform
# cluster) deploys apps to this cluster via the app-of-apps pattern.
#
# Platform components deployed by deploy-workload.sh:
# nginx-ingress, cert-manager, external-dns, external-secrets, alloy
#
# Usage:
# tofu init && tofu plan && tofu apply
# ./sync-tofu-outputs.sh --env azure-workload
# ./deploy-workload.sh --env azure-workload
# =============================================================================
variable "prefix" {
description = "Prefix for resource names (e.g., clst-workload)"
type = string
default = "clst-workload"
}
variable "location" {
description = "Azure region"
type = string
default = "norwayeast"
}
variable "resource_group_name" {
description = "Name of the Azure Resource Group to create"
type = string
default = "clst-workload-rg"
}
variable "vnet_address_space" {
description = "Address space for the virtual network"
type = string
default = "10.110.0.0/16"
}
variable "aks_subnet_cidr" {
description = "CIDR block for the AKS node subnet"
type = string
default = "10.110.0.0/22"
}
variable "aks_node_vm_size" {
description = "VM size for AKS worker nodes"
type = string
default = "Standard_B2s"
}
variable "aks_node_count" {
description = "Number of AKS worker nodes"
type = number
default = 2
}
variable "aks_kubernetes_version" {
description = "Kubernetes version for AKS (null = latest stable)"
type = string
default = null
}
variable "domain" {
description = "Public domain name — must have an existing Azure DNS zone"
type = string
}
variable "dns_zone_resource_group" {
description = "Resource group containing the Azure DNS zone (defaults to cluster RG)"
type = string
default = ""
}
variable "tags" {
description = "Tags applied to all resources"
type = map(string)
default = {
Environment = "workload"
ManagedBy = "tofu"
}
}
# ─── Resource Group ───────────────────────────────────────────────────
resource "azurerm_resource_group" "main" {
name = var.resource_group_name
location = var.location
tags = var.tags
}
# ─── Networking ───────────────────────────────────────────────────────
resource "azurerm_virtual_network" "main" {
name = "${var.prefix}-vnet"
resource_group_name = azurerm_resource_group.main.name
location = azurerm_resource_group.main.location
address_space = [var.vnet_address_space]
tags = var.tags
}
resource "azurerm_subnet" "aks" {
name = "${var.prefix}-aks-subnet"
resource_group_name = azurerm_resource_group.main.name
virtual_network_name = azurerm_virtual_network.main.name
address_prefixes = [var.aks_subnet_cidr]
}
# ─── AKS Cluster ──────────────────────────────────────────────────────
resource "azurerm_kubernetes_cluster" "main" {
name = "${var.prefix}-aks"
resource_group_name = azurerm_resource_group.main.name
location = azurerm_resource_group.main.location
dns_prefix = replace(var.prefix, "-", "")
kubernetes_version = var.aks_kubernetes_version
tags = var.tags
default_node_pool {
name = "system"
node_count = var.aks_node_count
vm_size = var.aks_node_vm_size
vnet_subnet_id = azurerm_subnet.aks.id
node_labels = {
prefix = var.prefix
role = "worker"
env = lookup(var.tags, "Environment", "workload")
}
}
identity {
type = "SystemAssigned"
}
network_profile {
network_plugin = "azure"
network_policy = "azure"
}
oidc_issuer_enabled = true
workload_identity_enabled = true
}
# ─── External-DNS Workload Identity ──────────────────────────────────
# Allows external-dns to manage Azure DNS records for app ingresses.
data "azurerm_dns_zone" "main" {
name = var.domain
resource_group_name = var.dns_zone_resource_group != "" ? var.dns_zone_resource_group : azurerm_resource_group.main.name
}
resource "azurerm_user_assigned_identity" "external_dns" {
name = "${var.prefix}-external-dns-identity"
resource_group_name = azurerm_resource_group.main.name
location = azurerm_resource_group.main.location
tags = var.tags
}
resource "azurerm_role_assignment" "external_dns_dns_contributor" {
scope = data.azurerm_dns_zone.main.id
role_definition_name = "DNS Zone Contributor"
principal_id = azurerm_user_assigned_identity.external_dns.principal_id
}
resource "azurerm_federated_identity_credential" "external_dns" {
name = "${var.prefix}-external-dns-fedcred"
resource_group_name = azurerm_resource_group.main.name
parent_id = azurerm_user_assigned_identity.external_dns.id
audience = ["api://AzureADTokenExchange"]
issuer = azurerm_kubernetes_cluster.main.oidc_issuer_url
subject = "system:serviceaccount:external-dns:external-dns"
}

View File

@@ -1,4 +0,0 @@
output "cluster_name" { value = azurerm_kubernetes_cluster.main.name }
output "resource_group_name" { value = azurerm_resource_group.main.name }
output "location" { value = azurerm_resource_group.main.location }
output "external_dns_identity_client_id" { value = azurerm_user_assigned_identity.external_dns.client_id }

View File

@@ -1,21 +0,0 @@
terraform {
required_version = ">= 1.0"
required_providers {
azurerm = {
source = "hashicorp/azurerm"
version = "~> 4.0"
}
random = {
source = "hashicorp/random"
version = "~> 3.0"
}
}
}
provider "azurerm" {
features {}
# Credentials via environment variables:
# ARM_SUBSCRIPTION_ID, ARM_TENANT_ID, ARM_CLIENT_ID, ARM_CLIENT_SECRET
# Or: az login (uses your Azure CLI session)
}

View File

@@ -1,21 +0,0 @@
module "cluster" {
source = "../modules/cluster"
region = var.region
prefix = "clst-dev"
# VPC
availability_zones = ["${var.region}a", "${var.region}b"]
# EKS — small dev nodes
node_instance_type = "t3.medium"
node_count = 2
node_min_count = 1
node_max_count = 4
kubernetes_version = "1.30"
tags = {
Environment = "dev"
ManagedBy = "tofu"
}
}

View File

@@ -1,5 +0,0 @@
output "cluster_name" { value = module.cluster.cluster_name }
output "aws_region" { value = module.cluster.aws_region }
output "oidc_issuer_url" { value = module.cluster.oidc_issuer_url }
output "oidc_provider_arn" { value = module.cluster.oidc_provider_arn }
output "vpc_id" { value = module.cluster.vpc_id }

View File

@@ -1,24 +0,0 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
tls = {
source = "hashicorp/tls"
version = "~> 4.0"
}
}
}
# Authentication: set AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN
# or configure an AWS profile: export AWS_PROFILE=clst
provider "aws" {
region = var.region
}
variable "region" {
description = "AWS region for dev environment"
type = string
default = "eu-west-1"
}

View File

@@ -1,207 +0,0 @@
# ─── VPC ──────────────────────────────────────────────────────────────
resource "aws_vpc" "main" {
cidr_block = var.vpc_cidr
enable_dns_hostnames = true
enable_dns_support = true
tags = merge(var.tags, { Name = "${var.prefix}-vpc" })
}
resource "aws_internet_gateway" "main" {
vpc_id = aws_vpc.main.id
tags = merge(var.tags, { Name = "${var.prefix}-igw" })
}
# Public subnets (one per AZ) — for NAT gateways and load balancers
resource "aws_subnet" "public" {
count = length(var.availability_zones)
vpc_id = aws_vpc.main.id
cidr_block = cidrsubnet(var.vpc_cidr, 4, count.index)
availability_zone = var.availability_zones[count.index]
map_public_ip_on_launch = true
tags = merge(var.tags, {
Name = "${var.prefix}-public-${count.index + 1}"
"kubernetes.io/cluster/${var.prefix}-eks" = "shared"
"kubernetes.io/role/elb" = "1"
})
}
# Private subnets (one per AZ) — for EKS nodes
resource "aws_subnet" "private" {
count = length(var.availability_zones)
vpc_id = aws_vpc.main.id
cidr_block = cidrsubnet(var.vpc_cidr, 4, count.index + length(var.availability_zones))
availability_zone = var.availability_zones[count.index]
tags = merge(var.tags, {
Name = "${var.prefix}-private-${count.index + 1}"
"kubernetes.io/cluster/${var.prefix}-eks" = "shared"
"kubernetes.io/role/internal-elb" = "1"
})
}
# NAT Gateway (single, in first public subnet — use one per AZ for prod HA)
resource "aws_eip" "nat" {
domain = "vpc"
tags = merge(var.tags, { Name = "${var.prefix}-nat-eip" })
}
resource "aws_nat_gateway" "main" {
allocation_id = aws_eip.nat.id
subnet_id = aws_subnet.public[0].id
tags = merge(var.tags, { Name = "${var.prefix}-nat" })
depends_on = [aws_internet_gateway.main]
}
resource "aws_route_table" "public" {
vpc_id = aws_vpc.main.id
route {
cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.main.id
}
tags = merge(var.tags, { Name = "${var.prefix}-public-rt" })
}
resource "aws_route_table_association" "public" {
count = length(var.availability_zones)
subnet_id = aws_subnet.public[count.index].id
route_table_id = aws_route_table.public.id
}
resource "aws_route_table" "private" {
vpc_id = aws_vpc.main.id
route {
cidr_block = "0.0.0.0/0"
nat_gateway_id = aws_nat_gateway.main.id
}
tags = merge(var.tags, { Name = "${var.prefix}-private-rt" })
}
resource "aws_route_table_association" "private" {
count = length(var.availability_zones)
subnet_id = aws_subnet.private[count.index].id
route_table_id = aws_route_table.private.id
}
# ─── EKS Cluster ──────────────────────────────────────────────────────
resource "aws_iam_role" "eks_cluster" {
name_prefix = "${var.prefix}-eks-cluster-"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = { Service = "eks.amazonaws.com" }
}]
})
tags = var.tags
}
resource "aws_iam_role_policy_attachment" "eks_cluster_policy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy"
role = aws_iam_role.eks_cluster.name
}
resource "aws_eks_cluster" "main" {
name = "${var.prefix}-eks"
role_arn = aws_iam_role.eks_cluster.arn
version = var.kubernetes_version
vpc_config {
subnet_ids = concat(aws_subnet.private[*].id, aws_subnet.public[*].id)
endpoint_private_access = true
endpoint_public_access = true
}
# Enable OIDC issuer for IRSA (IAM Roles for Service Accounts)
access_config {
authentication_mode = "API_AND_CONFIG_MAP"
}
tags = var.tags
depends_on = [aws_iam_role_policy_attachment.eks_cluster_policy]
}
# OIDC provider — required for IRSA (IAM Roles for Service Accounts)
data "tls_certificate" "eks" {
url = aws_eks_cluster.main.identity[0].oidc[0].issuer
}
resource "aws_iam_openid_connect_provider" "eks" {
client_id_list = ["sts.amazonaws.com"]
thumbprint_list = [data.tls_certificate.eks.certificates[0].sha1_fingerprint]
url = aws_eks_cluster.main.identity[0].oidc[0].issuer
tags = var.tags
}
# EKS Node Group
resource "aws_iam_role" "eks_nodes" {
name_prefix = "${var.prefix}-eks-nodes-"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = { Service = "ec2.amazonaws.com" }
}]
})
tags = var.tags
}
resource "aws_iam_role_policy_attachment" "eks_worker_node_policy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy"
role = aws_iam_role.eks_nodes.name
}
resource "aws_iam_role_policy_attachment" "eks_cni_policy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy"
role = aws_iam_role.eks_nodes.name
}
resource "aws_iam_role_policy_attachment" "eks_ecr_readonly" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"
role = aws_iam_role.eks_nodes.name
}
resource "aws_eks_node_group" "main" {
cluster_name = aws_eks_cluster.main.name
node_group_name = "${var.prefix}-nodes"
node_role_arn = aws_iam_role.eks_nodes.arn
subnet_ids = aws_subnet.private[*].id
instance_types = [var.node_instance_type]
scaling_config {
desired_size = var.node_count
max_size = var.node_max_count
min_size = var.node_min_count
}
update_config {
max_unavailable = 1
}
tags = var.tags
depends_on = [
aws_iam_role_policy_attachment.eks_worker_node_policy,
aws_iam_role_policy_attachment.eks_cni_policy,
aws_iam_role_policy_attachment.eks_ecr_readonly,
]
}

View File

@@ -1,26 +0,0 @@
# ─── Cluster ─────────────────────────────────────────────────────────
output "cluster_name" {
description = "EKS cluster name"
value = aws_eks_cluster.main.name
}
output "aws_region" {
description = "AWS region"
value = var.region
}
output "oidc_issuer_url" {
description = "EKS OIDC issuer URL (for IRSA)"
value = aws_eks_cluster.main.identity[0].oidc[0].issuer
}
output "oidc_provider_arn" {
description = "IAM OIDC provider ARN (for IRSA trust policies)"
value = aws_iam_openid_connect_provider.eks.arn
}
output "vpc_id" {
description = "VPC ID"
value = aws_vpc.main.id
}

View File

@@ -1,12 +0,0 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
tls = {
source = "hashicorp/tls"
version = "~> 4.0"
}
}
}

View File

@@ -1,61 +0,0 @@
# ─── Region ──────────────────────────────────────────────────────────
variable "region" {
description = "AWS region (e.g., eu-west-1, us-east-1)"
type = string
}
variable "prefix" {
description = "Prefix for resource names (e.g., clst-dev)"
type = string
}
# ─── Networking ───────────────────────────────────────────────────────
variable "vpc_cidr" {
description = "VPC CIDR block"
type = string
default = "10.100.0.0/16"
}
variable "availability_zones" {
description = "List of AZs for subnets (23 recommended)"
type = list(string)
}
# ─── EKS Cluster ─────────────────────────────────────────────────────
variable "node_instance_type" {
description = "EKS node instance type (e.g., t3.medium, m5.xlarge)"
type = string
}
variable "node_count" {
description = "Desired number of EKS worker nodes"
type = number
}
variable "node_min_count" {
description = "Minimum number of EKS worker nodes"
type = number
default = 1
}
variable "node_max_count" {
description = "Maximum number of EKS worker nodes"
type = number
}
variable "kubernetes_version" {
description = "Kubernetes version for EKS (e.g., \"1.30\")"
type = string
default = "1.30"
}
# ─── Tags ─────────────────────────────────────────────────────────────
variable "tags" {
description = "Tags applied to all resources"
type = map(string)
default = {}
}

View File

@@ -1,21 +0,0 @@
module "cluster" {
source = "../modules/cluster"
region = var.region
prefix = "clst"
# VPC
availability_zones = ["${var.region}a", "${var.region}b", "${var.region}c"]
# EKS — general-purpose nodes for production
node_instance_type = "m5.xlarge"
node_count = 3
node_min_count = 3
node_max_count = 6
kubernetes_version = "1.30"
tags = {
Environment = "prod"
ManagedBy = "tofu"
}
}

View File

@@ -1,5 +0,0 @@
output "cluster_name" { value = module.cluster.cluster_name }
output "aws_region" { value = module.cluster.aws_region }
output "oidc_issuer_url" { value = module.cluster.oidc_issuer_url }
output "oidc_provider_arn" { value = module.cluster.oidc_provider_arn }
output "vpc_id" { value = module.cluster.vpc_id }

View File

@@ -1,22 +0,0 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
tls = {
source = "hashicorp/tls"
version = "~> 4.0"
}
}
}
provider "aws" {
region = var.region
}
variable "region" {
description = "AWS region for prod environment"
type = string
default = "eu-west-1"
}

View File

@@ -1,339 +0,0 @@
# =============================================================================
# AWS Workload Cluster
# =============================================================================
# A lean EKS cluster for running application workloads. No managed data
# services — those live on the platform cluster. ArgoCD (on the platform
# cluster) deploys apps to this cluster via the app-of-apps pattern.
#
# Platform components deployed by deploy-workload.sh:
# nginx-ingress, cert-manager, external-dns, external-secrets, alloy
#
# Usage:
# tofu init && tofu plan && tofu apply
# ./sync-tofu-outputs.sh --env aws-workload
# ./deploy-workload.sh --env aws-workload
# =============================================================================
variable "prefix" {
description = "Prefix for resource names (e.g., clst-workload)"
type = string
default = "clst-workload"
}
variable "availability_zones" {
description = "List of AZs for subnets"
type = list(string)
default = ["eu-west-1a", "eu-west-1b"]
}
variable "vpc_cidr" {
description = "VPC CIDR block"
type = string
default = "10.110.0.0/16"
}
variable "node_instance_type" {
description = "EKS node instance type"
type = string
default = "t3.medium"
}
variable "node_count" {
description = "Desired number of EKS worker nodes"
type = number
default = 2
}
variable "node_min_count" {
description = "Minimum number of EKS worker nodes"
type = number
default = 1
}
variable "node_max_count" {
description = "Maximum number of EKS worker nodes"
type = number
default = 4
}
variable "kubernetes_version" {
description = "Kubernetes version for EKS"
type = string
default = "1.30"
}
variable "domain" {
description = "Public domain name — must have an existing Route53 hosted zone"
type = string
}
variable "tags" {
description = "Tags applied to all resources"
type = map(string)
default = {
Environment = "workload"
ManagedBy = "tofu"
}
}
# ─── VPC ──────────────────────────────────────────────────────────────
resource "aws_vpc" "main" {
cidr_block = var.vpc_cidr
enable_dns_hostnames = true
enable_dns_support = true
tags = merge(var.tags, { Name = "${var.prefix}-vpc" })
}
resource "aws_internet_gateway" "main" {
vpc_id = aws_vpc.main.id
tags = merge(var.tags, { Name = "${var.prefix}-igw" })
}
resource "aws_subnet" "public" {
count = length(var.availability_zones)
vpc_id = aws_vpc.main.id
cidr_block = cidrsubnet(var.vpc_cidr, 4, count.index)
availability_zone = var.availability_zones[count.index]
map_public_ip_on_launch = true
tags = merge(var.tags, {
Name = "${var.prefix}-public-${count.index + 1}"
"kubernetes.io/cluster/${var.prefix}-eks" = "shared"
"kubernetes.io/role/elb" = "1"
})
}
resource "aws_subnet" "private" {
count = length(var.availability_zones)
vpc_id = aws_vpc.main.id
cidr_block = cidrsubnet(var.vpc_cidr, 4, count.index + length(var.availability_zones))
availability_zone = var.availability_zones[count.index]
tags = merge(var.tags, {
Name = "${var.prefix}-private-${count.index + 1}"
"kubernetes.io/cluster/${var.prefix}-eks" = "shared"
"kubernetes.io/role/internal-elb" = "1"
})
}
resource "aws_eip" "nat" {
domain = "vpc"
tags = merge(var.tags, { Name = "${var.prefix}-nat-eip" })
}
resource "aws_nat_gateway" "main" {
allocation_id = aws_eip.nat.id
subnet_id = aws_subnet.public[0].id
tags = merge(var.tags, { Name = "${var.prefix}-nat" })
depends_on = [aws_internet_gateway.main]
}
resource "aws_route_table" "public" {
vpc_id = aws_vpc.main.id
route {
cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.main.id
}
tags = merge(var.tags, { Name = "${var.prefix}-public-rt" })
}
resource "aws_route_table_association" "public" {
count = length(var.availability_zones)
subnet_id = aws_subnet.public[count.index].id
route_table_id = aws_route_table.public.id
}
resource "aws_route_table" "private" {
vpc_id = aws_vpc.main.id
route {
cidr_block = "0.0.0.0/0"
nat_gateway_id = aws_nat_gateway.main.id
}
tags = merge(var.tags, { Name = "${var.prefix}-private-rt" })
}
resource "aws_route_table_association" "private" {
count = length(var.availability_zones)
subnet_id = aws_subnet.private[count.index].id
route_table_id = aws_route_table.private.id
}
# ─── EKS Cluster ──────────────────────────────────────────────────────
resource "aws_iam_role" "eks_cluster" {
name_prefix = "${var.prefix}-eks-cluster-"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = { Service = "eks.amazonaws.com" }
}]
})
tags = var.tags
}
resource "aws_iam_role_policy_attachment" "eks_cluster_policy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy"
role = aws_iam_role.eks_cluster.name
}
resource "aws_eks_cluster" "main" {
name = "${var.prefix}-eks"
role_arn = aws_iam_role.eks_cluster.arn
version = var.kubernetes_version
vpc_config {
subnet_ids = concat(aws_subnet.private[*].id, aws_subnet.public[*].id)
endpoint_private_access = true
endpoint_public_access = true
}
access_config {
authentication_mode = "API_AND_CONFIG_MAP"
}
tags = var.tags
depends_on = [aws_iam_role_policy_attachment.eks_cluster_policy]
}
# OIDC provider — required for IRSA
data "tls_certificate" "eks" {
url = aws_eks_cluster.main.identity[0].oidc[0].issuer
}
resource "aws_iam_openid_connect_provider" "eks" {
client_id_list = ["sts.amazonaws.com"]
thumbprint_list = [data.tls_certificate.eks.certificates[0].sha1_fingerprint]
url = aws_eks_cluster.main.identity[0].oidc[0].issuer
tags = var.tags
}
resource "aws_iam_role" "eks_nodes" {
name_prefix = "${var.prefix}-eks-nodes-"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = { Service = "ec2.amazonaws.com" }
}]
})
tags = var.tags
}
resource "aws_iam_role_policy_attachment" "eks_worker_node_policy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy"
role = aws_iam_role.eks_nodes.name
}
resource "aws_iam_role_policy_attachment" "eks_cni_policy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy"
role = aws_iam_role.eks_nodes.name
}
resource "aws_iam_role_policy_attachment" "eks_ecr_readonly" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"
role = aws_iam_role.eks_nodes.name
}
resource "aws_eks_node_group" "main" {
cluster_name = aws_eks_cluster.main.name
node_group_name = "${var.prefix}-nodes"
node_role_arn = aws_iam_role.eks_nodes.arn
subnet_ids = aws_subnet.private[*].id
instance_types = [var.node_instance_type]
scaling_config {
desired_size = var.node_count
max_size = var.node_max_count
min_size = var.node_min_count
}
update_config {
max_unavailable = 1
}
tags = var.tags
depends_on = [
aws_iam_role_policy_attachment.eks_worker_node_policy,
aws_iam_role_policy_attachment.eks_cni_policy,
aws_iam_role_policy_attachment.eks_ecr_readonly,
]
}
# ─── External-DNS IRSA ───────────────────────────────────────────────
# Allows external-dns to manage Route53 records for app ingresses.
data "aws_route53_zone" "main" {
name = var.domain
private_zone = false
}
data "aws_iam_policy_document" "external_dns_assume_role" {
statement {
effect = "Allow"
principals {
type = "Federated"
identifiers = [aws_iam_openid_connect_provider.eks.arn]
}
actions = ["sts:AssumeRoleWithWebIdentity"]
condition {
test = "StringEquals"
variable = "${replace(aws_iam_openid_connect_provider.eks.url, "https://", "")}:sub"
values = ["system:serviceaccount:external-dns:external-dns"]
}
condition {
test = "StringEquals"
variable = "${replace(aws_iam_openid_connect_provider.eks.url, "https://", "")}:aud"
values = ["sts.amazonaws.com"]
}
}
}
resource "aws_iam_role" "external_dns_irsa" {
name_prefix = "${var.prefix}-external-dns-irsa-"
assume_role_policy = data.aws_iam_policy_document.external_dns_assume_role.json
tags = var.tags
}
data "aws_iam_policy_document" "external_dns_route53" {
statement {
effect = "Allow"
actions = ["route53:ChangeResourceRecordSets"]
resources = ["arn:aws:route53:::hostedzone/${data.aws_route53_zone.main.zone_id}"]
}
statement {
effect = "Allow"
actions = ["route53:ListHostedZones", "route53:ListResourceRecordSets", "route53:ListTagsForResource"]
resources = ["*"]
}
}
resource "aws_iam_role_policy" "external_dns_route53" {
name_prefix = "${var.prefix}-external-dns-route53-"
role = aws_iam_role.external_dns_irsa.id
policy = data.aws_iam_policy_document.external_dns_route53.json
}

View File

@@ -1,3 +0,0 @@
output "cluster_name" { value = aws_eks_cluster.main.name }
output "aws_region" { value = var.region }
output "external_dns_irsa_role_arn" { value = aws_iam_role.external_dns_irsa.arn }

View File

@@ -1,24 +0,0 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
tls = {
source = "hashicorp/tls"
version = "~> 4.0"
}
}
}
# Authentication: set AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN
# or configure an AWS profile: export AWS_PROFILE=clst
provider "aws" {
region = var.region
}
variable "region" {
description = "AWS region for the workload environment"
type = string
default = "eu-west-1"
}

View File

@@ -1,17 +0,0 @@
module "cluster" {
source = "../modules/cluster"
project_id = var.project_id
region = var.region
prefix = "clst-dev"
# GKE — small dev nodes
node_machine_type = "e2-standard-2"
node_count = 2
deletion_protection = false
labels = {
environment = "dev"
managed-by = "tofu"
}
}

View File

@@ -1,3 +0,0 @@
output "cluster_name" { value = module.cluster.cluster_name }
output "project_id" { value = module.cluster.project_id }
output "region" { value = module.cluster.region }

View File

@@ -1,26 +0,0 @@
terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "~> 6.0"
}
}
}
# Authentication: use Application Default Credentials (gcloud auth application-default login)
# or set GOOGLE_APPLICATION_CREDENTIALS to a service account key file.
provider "google" {
project = var.project_id
region = var.region
}
variable "project_id" {
description = "GCP project ID for the dev environment"
type = string
}
variable "region" {
description = "GCP region"
type = string
default = "europe-west4"
}

View File

@@ -1,115 +0,0 @@
# ─── Required APIs ────────────────────────────────────────────────────
resource "google_project_service" "compute" {
project = var.project_id
service = "compute.googleapis.com"
disable_on_destroy = false
}
resource "google_project_service" "container" {
project = var.project_id
service = "container.googleapis.com"
disable_on_destroy = false
}
# ─── Networking ───────────────────────────────────────────────────────
resource "google_compute_network" "main" {
project = var.project_id
name = "${var.prefix}-vpc"
auto_create_subnetworks = false
depends_on = [google_project_service.compute]
}
resource "google_compute_subnetwork" "main" {
project = var.project_id
name = "${var.prefix}-subnet"
ip_cidr_range = "10.100.0.0/22"
region = var.region
network = google_compute_network.main.id
# Secondary ranges required for GKE VPC-native cluster
secondary_ip_range {
range_name = "pods"
ip_cidr_range = "10.200.0.0/14" # /14 = ~262k pod IPs
}
secondary_ip_range {
range_name = "services"
ip_cidr_range = "10.204.0.0/20" # /20 = ~4k service IPs
}
}
# ─── GKE Cluster ──────────────────────────────────────────────────────
#
# Regional cluster (3 control-plane replicas) for HA.
# Workload Identity enabled — allows K8s service accounts to impersonate
# Google Service Accounts for keyless access to GCP services.
resource "google_container_cluster" "main" {
project = var.project_id
name = "${var.prefix}-gke"
location = var.region # regional cluster
network = google_compute_network.main.id
subnetwork = google_compute_subnetwork.main.id
# VPC-native cluster with alias IP ranges
ip_allocation_policy {
cluster_secondary_range_name = "pods"
services_secondary_range_name = "services"
}
# Workload Identity pool — enables OIDC token projection for pods
workload_identity_config {
workload_pool = "${var.project_id}.svc.id.goog"
}
# Remove default node pool — we manage our own below
remove_default_node_pool = true
initial_node_count = 1
deletion_protection = var.deletion_protection
dynamic "release_channel" {
for_each = var.kubernetes_version == null ? [1] : []
content {
channel = "STABLE"
}
}
resource_labels = var.labels
depends_on = [google_project_service.container]
}
resource "google_container_node_pool" "main" {
project = var.project_id
name = "${var.prefix}-nodes"
location = var.region
cluster = google_container_cluster.main.name
node_count = var.node_count
node_config {
machine_type = var.node_machine_type
# GKE_METADATA mode is required for Workload Identity
workload_metadata_config {
mode = "GKE_METADATA"
}
oauth_scopes = [
"https://www.googleapis.com/auth/cloud-platform",
]
labels = merge(var.labels, {
role = "worker"
})
}
management {
auto_repair = true
auto_upgrade = true
}
}

View File

@@ -1,16 +0,0 @@
# ─── Cluster ─────────────────────────────────────────────────────────
output "cluster_name" {
description = "GKE cluster name"
value = google_container_cluster.main.name
}
output "project_id" {
description = "GCP project ID"
value = var.project_id
}
output "region" {
description = "GCP region"
value = var.region
}

View File

@@ -1,8 +0,0 @@
terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "~> 6.0"
}
}
}

View File

@@ -1,48 +0,0 @@
# ─── Project / Region ────────────────────────────────────────────────
variable "project_id" {
description = "GCP project ID"
type = string
}
variable "region" {
description = "GCP region (e.g., europe-west4, europe-west1)"
type = string
}
variable "prefix" {
description = "Prefix for resource names (e.g., clst-dev)"
type = string
}
# ─── GKE Cluster ─────────────────────────────────────────────────────
variable "node_machine_type" {
description = "GKE node machine type (e.g., e2-standard-2, e2-standard-4)"
type = string
}
variable "node_count" {
description = "Number of nodes per zone (regional cluster spawns nodes in each zone)"
type = number
}
variable "kubernetes_version" {
description = "GKE Kubernetes version channel (null = STABLE release channel)"
type = string
default = null
}
variable "deletion_protection" {
description = "Prevent cluster deletion (set true for production)"
type = bool
default = false
}
# ─── Labels ──────────────────────────────────────────────────────────
variable "labels" {
description = "Labels applied to all resources"
type = map(string)
default = {}
}

View File

@@ -1,17 +0,0 @@
module "cluster" {
source = "../modules/cluster"
project_id = var.project_id
region = var.region
prefix = "clst"
# GKE — general-purpose nodes for production
node_machine_type = "e2-standard-4"
node_count = 3
deletion_protection = true
labels = {
environment = "prod"
managed-by = "tofu"
}
}

View File

@@ -1,3 +0,0 @@
output "cluster_name" { value = module.cluster.cluster_name }
output "project_id" { value = module.cluster.project_id }
output "region" { value = module.cluster.region }

View File

@@ -1,24 +0,0 @@
terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "~> 6.0"
}
}
}
provider "google" {
project = var.project_id
region = var.region
}
variable "project_id" {
description = "GCP project ID for the prod environment"
type = string
}
variable "region" {
description = "GCP region"
type = string
default = "europe-west1"
}

View File

@@ -1,194 +0,0 @@
# =============================================================================
# GCP Workload Cluster
# =============================================================================
# A lean GKE cluster for running application workloads. No managed data
# services — those live on the platform cluster. ArgoCD (on the platform
# cluster) deploys apps to this cluster via the app-of-apps pattern.
#
# Platform components deployed by deploy-workload.sh:
# nginx-ingress, cert-manager, external-dns, external-secrets, alloy
#
# Usage:
# tofu init && tofu plan && tofu apply
# ./sync-tofu-outputs.sh --env gcp-workload
# ./deploy-workload.sh --env gcp-workload
# =============================================================================
variable "prefix" {
description = "Prefix for resource names (e.g., clst-workload)"
type = string
default = "clst-workload"
}
variable "node_machine_type" {
description = "GKE node machine type"
type = string
default = "e2-standard-2"
}
variable "node_count" {
description = "Number of nodes per zone"
type = number
default = 1
}
variable "kubernetes_version" {
description = "GKE Kubernetes version (null = STABLE release channel)"
type = string
default = null
}
variable "deletion_protection" {
description = "Prevent cluster deletion"
type = bool
default = false
}
variable "labels" {
description = "Labels applied to all resources"
type = map(string)
default = {
environment = "workload"
managed-by = "tofu"
}
}
# ─── Required APIs ────────────────────────────────────────────────────
resource "google_project_service" "compute" {
project = var.project_id
service = "compute.googleapis.com"
disable_on_destroy = false
}
resource "google_project_service" "container" {
project = var.project_id
service = "container.googleapis.com"
disable_on_destroy = false
}
resource "google_project_service" "iam" {
project = var.project_id
service = "iam.googleapis.com"
disable_on_destroy = false
}
resource "google_project_service" "dns" {
project = var.project_id
service = "dns.googleapis.com"
disable_on_destroy = false
}
# ─── Networking ───────────────────────────────────────────────────────
resource "google_compute_network" "main" {
project = var.project_id
name = "${var.prefix}-vpc"
auto_create_subnetworks = false
depends_on = [google_project_service.compute]
}
resource "google_compute_subnetwork" "main" {
project = var.project_id
name = "${var.prefix}-subnet"
ip_cidr_range = "10.110.0.0/22"
region = var.region
network = google_compute_network.main.id
secondary_ip_range {
range_name = "pods"
ip_cidr_range = "10.210.0.0/14"
}
secondary_ip_range {
range_name = "services"
ip_cidr_range = "10.214.0.0/20"
}
}
# ─── GKE Cluster ──────────────────────────────────────────────────────
resource "google_container_cluster" "main" {
project = var.project_id
name = "${var.prefix}-gke"
location = var.region
network = google_compute_network.main.id
subnetwork = google_compute_subnetwork.main.id
ip_allocation_policy {
cluster_secondary_range_name = "pods"
services_secondary_range_name = "services"
}
workload_identity_config {
workload_pool = "${var.project_id}.svc.id.goog"
}
remove_default_node_pool = true
initial_node_count = 1
deletion_protection = var.deletion_protection
dynamic "release_channel" {
for_each = var.kubernetes_version == null ? [1] : []
content {
channel = "STABLE"
}
}
resource_labels = var.labels
depends_on = [google_project_service.container]
}
resource "google_container_node_pool" "main" {
project = var.project_id
name = "${var.prefix}-nodes"
location = var.region
cluster = google_container_cluster.main.name
node_count = var.node_count
node_config {
machine_type = var.node_machine_type
workload_metadata_config {
mode = "GKE_METADATA"
}
oauth_scopes = [
"https://www.googleapis.com/auth/cloud-platform",
]
labels = merge(var.labels, { role = "worker" })
}
management {
auto_repair = true
auto_upgrade = true
}
}
# ─── External-DNS Workload Identity ──────────────────────────────────
# Allows external-dns to manage Cloud DNS records for app ingresses.
resource "google_service_account" "external_dns" {
project = var.project_id
account_id = "${var.prefix}-external-dns"
display_name = "External-DNS Service Account (Workload Identity)"
depends_on = [google_project_service.iam]
}
resource "google_project_iam_member" "external_dns_dns_admin" {
project = var.project_id
role = "roles/dns.admin"
member = "serviceAccount:${google_service_account.external_dns.email}"
}
resource "google_service_account_iam_member" "external_dns_workload_identity" {
service_account_id = google_service_account.external_dns.name
role = "roles/iam.workloadIdentityUser"
member = "serviceAccount:${var.project_id}.svc.id.goog[external-dns/external-dns]"
}

View File

@@ -1,4 +0,0 @@
output "cluster_name" { value = google_container_cluster.main.name }
output "project_id" { value = var.project_id }
output "region" { value = var.region }
output "external_dns_gsa_email" { value = google_service_account.external_dns.email }

View File

@@ -1,26 +0,0 @@
terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "~> 6.0"
}
}
}
# Authentication: use Application Default Credentials (gcloud auth application-default login)
# or set GOOGLE_APPLICATION_CREDENTIALS to a service account key file.
provider "google" {
project = var.project_id
region = var.region
}
variable "project_id" {
description = "GCP project ID for the workload environment"
type = string
}
variable "region" {
description = "GCP region"
type = string
default = "europe-west4"
}

View File

@@ -1,14 +0,0 @@
module "cluster" {
source = "../modules/cluster"
prefix = "clst-dev"
zone = "no-svg1"
node_plan = "DEV-1xCPU-2GB"
node_count = 2
network_cidr = "10.100.0.0/24"
tags = {
Environment = "dev"
ManagedBy = "tofu"
}
}

View File

@@ -1,13 +0,0 @@
# ─── Cluster ─────────────────────────────────────────────────────────
output "cluster_id" {
value = module.cluster.cluster_id
}
output "cluster_name" {
value = module.cluster.cluster_name
}
output "zone" {
value = module.cluster.zone
}

View File

@@ -1,14 +0,0 @@
terraform {
required_version = ">= 1.0"
required_providers {
upcloud = {
source = "UpCloudLtd/upcloud"
version = "~> 5.0"
}
}
}
provider "upcloud" {
# Set via environment variables: UPCLOUD_USERNAME, UPCLOUD_PASSWORD
}

View File

@@ -1,56 +0,0 @@
# Router for the private network
resource "upcloud_router" "kubernetes" {
name = "${var.prefix}-${var.cluster_name}-router"
}
# Gateway for internet connectivity
resource "upcloud_gateway" "kubernetes" {
name = "${var.prefix}-${var.cluster_name}-gateway"
zone = var.zone
features = ["nat"]
router {
id = upcloud_router.kubernetes.id
}
}
# Private network for the Kubernetes cluster
resource "upcloud_network" "kubernetes" {
name = "${var.prefix}-${var.cluster_name}-network"
zone = var.zone
router = upcloud_router.kubernetes.id
ip_network {
address = var.network_cidr
dhcp = true
dhcp_default_route = true
family = "IPv4"
gateway = cidrhost(var.network_cidr, 1)
}
depends_on = [upcloud_gateway.kubernetes]
}
# Kubernetes cluster
resource "upcloud_kubernetes_cluster" "main" {
name = "${var.prefix}-${var.cluster_name}"
zone = var.zone
network = upcloud_network.kubernetes.id
control_plane_ip_filter = var.control_plane_ip_filter
private_node_groups = true
}
# Node group for worker nodes
resource "upcloud_kubernetes_node_group" "workers" {
cluster = upcloud_kubernetes_cluster.main.id
name = "${var.prefix}-${var.cluster_name}-workers"
node_count = var.node_count
plan = var.node_plan
anti_affinity = var.node_count > 1
labels = {
prefix = var.prefix
cluster = var.cluster_name
role = "worker"
env = lookup(var.tags, "Environment", "dev")
}
}

View File

@@ -1,31 +0,0 @@
# ─── Cluster ─────────────────────────────────────────────────────────
output "cluster_id" {
description = "The ID of the Kubernetes cluster"
value = upcloud_kubernetes_cluster.main.id
}
output "cluster_name" {
description = "The name of the Kubernetes cluster"
value = upcloud_kubernetes_cluster.main.name
}
output "network_id" {
description = "The ID of the private network"
value = upcloud_network.kubernetes.id
}
output "network_cidr" {
description = "The CIDR block of the private network"
value = var.network_cidr
}
output "kubernetes_version" {
description = "The Kubernetes version of the cluster"
value = upcloud_kubernetes_cluster.main.version
}
output "zone" {
description = "The zone where the cluster is deployed"
value = var.zone
}

View File

@@ -1,8 +0,0 @@
terraform {
required_providers {
upcloud = {
source = "UpCloudLtd/upcloud"
version = "~> 5.0"
}
}
}

View File

@@ -1,44 +0,0 @@
# ─── Cluster ─────────────────────────────────────────────────────────
variable "prefix" {
description = "Prefix for resource names"
type = string
}
variable "cluster_name" {
description = "Name of the Kubernetes cluster"
type = string
default = "main"
}
variable "zone" {
description = "UpCloud zone"
type = string
}
variable "node_plan" {
description = "UpCloud server plan for worker nodes"
type = string
}
variable "node_count" {
description = "Number of worker nodes"
type = number
}
variable "network_cidr" {
description = "CIDR block for the private network"
type = string
default = "10.100.0.0/24"
}
variable "control_plane_ip_filter" {
description = "CIDRs allowed to access the K8s API"
type = list(string)
default = ["0.0.0.0/0"]
}
variable "tags" {
description = "Labels to apply to resources"
type = map(string)
}

View File

@@ -1,16 +0,0 @@
module "cluster" {
source = "../modules/cluster"
prefix = "clst"
zone = "de-fra1"
node_plan = "4xCPU-8GB"
node_count = 3
network_cidr = "10.100.0.0/24"
control_plane_ip_filter = ["0.0.0.0/0"] # TODO: restrict to known CIDRs
tags = {
Environment = "prod"
ManagedBy = "tofu"
}
}

View File

@@ -1,13 +0,0 @@
# ─── Cluster ─────────────────────────────────────────────────────────
output "cluster_id" {
value = module.cluster.cluster_id
}
output "cluster_name" {
value = module.cluster.cluster_name
}
output "zone" {
value = module.cluster.zone
}

View File

@@ -1,14 +0,0 @@
terraform {
required_version = ">= 1.0"
required_providers {
upcloud = {
source = "UpCloudLtd/upcloud"
version = "~> 5.0"
}
}
}
provider "upcloud" {
# Set via environment variables: UPCLOUD_USERNAME, UPCLOUD_PASSWORD
}

View File

@@ -1,116 +0,0 @@
# =============================================================================
# UpCloud Workload Cluster
# =============================================================================
# A lean UCS cluster for running application workloads. No managed data
# services — those live on the platform cluster. ArgoCD (on the platform
# cluster) deploys apps to this cluster via the app-of-apps pattern.
#
# Platform components deployed by deploy-workload.sh:
# nginx-ingress, cert-manager, external-dns, external-secrets, alloy
#
# Usage:
# tofu init && tofu plan && tofu apply
# ./sync-tofu-outputs.sh --env upcloud-workload
# ./deploy-workload.sh --env upcloud-workload
# =============================================================================
variable "prefix" {
description = "Prefix for resource names"
type = string
default = "clst-workload"
}
variable "zone" {
description = "UpCloud zone"
type = string
default = "fi-hel1"
}
variable "node_plan" {
description = "UpCloud server plan for worker nodes"
type = string
default = "2xCPU-4GB"
}
variable "node_count" {
description = "Number of worker nodes"
type = number
default = 2
}
variable "network_cidr" {
description = "CIDR block for the private network"
type = string
default = "10.110.0.0/24"
}
variable "control_plane_ip_filter" {
description = "CIDRs allowed to access the K8s API"
type = list(string)
default = ["0.0.0.0/0"]
}
variable "tags" {
description = "Labels to apply to resources"
type = map(string)
default = {
Environment = "workload"
ManagedBy = "tofu"
}
}
# ─── Networking ───────────────────────────────────────────────────────
resource "upcloud_router" "kubernetes" {
name = "${var.prefix}-workload-router"
}
resource "upcloud_gateway" "kubernetes" {
name = "${var.prefix}-workload-gateway"
zone = var.zone
features = ["nat"]
router {
id = upcloud_router.kubernetes.id
}
}
resource "upcloud_network" "kubernetes" {
name = "${var.prefix}-workload-network"
zone = var.zone
router = upcloud_router.kubernetes.id
ip_network {
address = var.network_cidr
dhcp = true
dhcp_default_route = true
family = "IPv4"
gateway = cidrhost(var.network_cidr, 1)
}
depends_on = [upcloud_gateway.kubernetes]
}
# ─── Kubernetes Cluster ───────────────────────────────────────────────
resource "upcloud_kubernetes_cluster" "main" {
name = "${var.prefix}-workload"
zone = var.zone
network = upcloud_network.kubernetes.id
control_plane_ip_filter = var.control_plane_ip_filter
private_node_groups = true
}
resource "upcloud_kubernetes_node_group" "workers" {
cluster = upcloud_kubernetes_cluster.main.id
name = "${var.prefix}-workload-workers"
node_count = var.node_count
plan = var.node_plan
anti_affinity = var.node_count > 1
labels = {
prefix = var.prefix
cluster = "workload"
role = "worker"
env = lookup(var.tags, "Environment", "workload")
}
}

View File

@@ -1,3 +0,0 @@
output "cluster_name" { value = upcloud_kubernetes_cluster.main.name }
output "cluster_id" { value = upcloud_kubernetes_cluster.main.id }
output "zone" { value = var.zone }

View File

@@ -1,14 +0,0 @@
terraform {
required_version = ">= 1.0"
required_providers {
upcloud = {
source = "UpCloudLtd/upcloud"
version = "~> 5.0"
}
}
}
provider "upcloud" {
# Set via environment variables: UPCLOUD_USERNAME, UPCLOUD_PASSWORD
}

View File

@@ -1,66 +0,0 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
TOFU_ROOT="$(dirname "$SCRIPT_DIR")"
PROJECT_ROOT="$(dirname "$TOFU_ROOT")"
CLUSTER="${1:?Usage: $0 <cluster> (e.g., aks-dev, eks-prod)}"
PLATFORM="${CLUSTER%%-*}"
ENV="${CLUSTER#*-}"
KUBECONFIG_FILE="$PROJECT_ROOT/private/$CLUSTER/kubeconfig"
if [[ -f "$KUBECONFIG_FILE" ]]; then
echo "Kubeconfig already exists: $KUBECONFIG_FILE"
echo ""
echo " export KUBECONFIG=$KUBECONFIG_FILE"
else
echo "No cached kubeconfig. Fetching from platform..."
# Load platform credentials
ENV_FILE="$TOFU_ROOT/configs/$PLATFORM.env"
if [[ -f "$ENV_FILE" ]]; then
set -a; source "$ENV_FILE"; set +a
fi
TOFU_DIR="$TOFU_ROOT/platforms/$PLATFORM/$ENV"
mkdir -p "$(dirname "$KUBECONFIG_FILE")"
case "$PLATFORM" in
aks)
cd "$TOFU_DIR"
RG=$(tofu output -raw resource_group_name 2>/dev/null || echo "$CLUSTER-rg")
NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "$CLUSTER")
az aks get-credentials --resource-group "$RG" --name "$NAME" --file "$KUBECONFIG_FILE" --overwrite-existing
;;
eks)
cd "$TOFU_DIR"
NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "$CLUSTER")
REGION=$(tofu output -raw aws_region 2>/dev/null || echo "${AWS_REGION:-eu-west-1}")
aws eks update-kubeconfig --name "$NAME" --region "$REGION" --kubeconfig "$KUBECONFIG_FILE"
;;
gke)
cd "$TOFU_DIR"
NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "$CLUSTER")
REGION=$(tofu output -raw region 2>/dev/null || echo "${GCP_REGION:-europe-west4}")
PROJECT=$(tofu output -raw project_id 2>/dev/null || echo "${GCP_PROJECT_ID:-}")
gcloud container clusters get-credentials "$NAME" --region "$REGION" --project "$PROJECT"
cp ~/.kube/config "$KUBECONFIG_FILE"
;;
upc)
cd "$TOFU_DIR"
CLUSTER_ID=$(tofu output -raw cluster_id 2>/dev/null || echo "${UPCLOUD_CLUSTER_ID:-}")
upctl kubernetes config "$CLUSTER_ID" > "$KUBECONFIG_FILE"
;;
*)
echo "Error: unknown platform '$PLATFORM'"
exit 1
;;
esac
chmod 600 "$KUBECONFIG_FILE"
echo "Kubeconfig saved: $KUBECONFIG_FILE"
echo ""
echo " export KUBECONFIG=$KUBECONFIG_FILE"
fi

View File

@@ -1,246 +0,0 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
TOFU_ROOT="$(dirname "$SCRIPT_DIR")"
PROJECT_ROOT="$(dirname "$TOFU_ROOT")"
# ─── Usage ────────────────────────────────────────────────────────────
usage() {
cat <<EOF
Usage: $0 <cluster> [options]
Provision a Kubernetes cluster using OpenTofu.
Mirrors bootstrap.sh convention: cluster = <platform>-<env>
Clusters: aks-dev | aks-prod | eks-dev | eks-prod
gke-dev | gke-prod | upc-dev | upc-prod
<platform>-workload (for workload clusters)
Options:
--plan Plan only, don't apply
--destroy Destroy the cluster (use teardown-cluster.sh instead)
--auto Skip confirmation prompts
-h, --help Show this help
Examples:
$0 aks-dev
$0 eks-prod --plan
$0 upc-dev --auto
Prerequisites:
- tofu, kubectl, helm installed
- Platform credentials in .tofu/configs/<platform>.env
- Cluster config in clusters/<cluster>.yaml
After provisioning, run:
./bootstrap.sh <cluster>
EOF
exit "${1:-0}"
}
# ─── Parse arguments ──────────────────────────────────────────────────
CLUSTER=""
PLAN_ONLY=false
DESTROY=false
AUTO_APPROVE=false
while [[ $# -gt 0 ]]; do
case "$1" in
--plan) PLAN_ONLY=true; shift ;;
--destroy) DESTROY=true; shift ;;
--auto) AUTO_APPROVE=true; shift ;;
-h|--help) usage 0 ;;
-*) echo "Unknown option: $1"; usage 1 ;;
*)
if [[ -z "$CLUSTER" ]]; then
CLUSTER="$1"
else
echo "Error: unexpected argument '$1'"
usage 1
fi
shift
;;
esac
done
[[ -z "$CLUSTER" ]] && { echo "Error: <cluster> argument required"; usage 1; }
# ─── Map cluster → platform + env ────────────────────────────────────
PLATFORM="${CLUSTER%%-*}" # aks-dev → aks
ENV="${CLUSTER#*-}" # aks-dev → dev
case "$PLATFORM" in
aks|eks|gke|upc) ;;
*) echo "Error: unknown platform '$PLATFORM'. Expected: aks, eks, gke, upc"; exit 1 ;;
esac
TOFU_DIR="$TOFU_ROOT/platforms/$PLATFORM/$ENV"
if [[ ! -d "$TOFU_DIR" ]]; then
echo "Error: tofu directory not found: $TOFU_DIR"
echo "Available environments for $PLATFORM:"
ls -1 "$TOFU_ROOT/platforms/$PLATFORM/" 2>/dev/null | grep -v modules || echo " (none)"
exit 1
fi
echo "========================================="
echo " Kubernetes Cluster Setup"
echo "========================================="
echo ""
echo " Cluster: $CLUSTER"
echo " Platform: $PLATFORM"
echo " Env: $ENV"
echo " Tofu dir: $TOFU_DIR"
echo ""
# ─── Prerequisites ────────────────────────────────────────────────────
echo "=== Checking Prerequisites ==="
command -v tofu >/dev/null 2>&1 || { echo "Error: tofu is not installed."; exit 1; }
command -v kubectl >/dev/null 2>&1 || { echo "Error: kubectl is not installed."; exit 1; }
command -v helm >/dev/null 2>&1 || { echo "Error: helm is not installed."; exit 1; }
echo " tofu, kubectl, helm: OK"
# ─── Load platform credentials ────────────────────────────────────────
ENV_FILE="$TOFU_ROOT/configs/$PLATFORM.env"
if [[ -f "$ENV_FILE" ]]; then
echo " Loading credentials from configs/$PLATFORM.env"
set -a
# shellcheck disable=SC1090
source "$ENV_FILE"
set +a
else
echo " Warning: $ENV_FILE not found — using existing environment/CLI auth"
echo " Copy configs/$PLATFORM.env.example → configs/$PLATFORM.env to configure"
fi
# ─── Load cluster config (if exists) ──────────────────────────────────
CLUSTER_CONFIG="$PROJECT_ROOT/clusters/$CLUSTER.yaml"
if [[ -f "$CLUSTER_CONFIG" ]]; then
echo " Loading cluster config from clusters/$CLUSTER.yaml"
if command -v yq >/dev/null 2>&1; then
eval "$(yq -r 'to_entries[] | "export CLUSTER_\(.key)=\"\(.value)\""' "$CLUSTER_CONFIG")"
echo " Cluster name: ${CLUSTER_clusterName:-$CLUSTER}"
else
echo " Warning: yq not installed — cluster config not loaded"
fi
else
echo " Warning: $CLUSTER_CONFIG not found — using defaults"
fi
echo ""
# ─── Run OpenTofu ─────────────────────────────────────────────────────
cd "$TOFU_DIR"
echo "=== Initializing OpenTofu ==="
tofu init
echo ""
if $DESTROY; then
echo "=== Planning Destruction ==="
tofu plan -destroy -out=tfplan
if ! $AUTO_APPROVE; then
echo ""
read -rp "DESTROY cluster $CLUSTER? This is irreversible. (yes/no) " REPLY
[[ "$REPLY" == "yes" ]] || { echo "Cancelled."; exit 1; }
fi
echo "Destroying infrastructure..."
tofu apply tfplan
echo ""
echo "=== Cluster $CLUSTER Destroyed ==="
elif $PLAN_ONLY; then
echo "=== Planning Infrastructure ==="
tofu plan
echo ""
echo "=== Plan complete (--plan mode, no changes applied) ==="
else
echo "=== Planning Infrastructure ==="
tofu plan -out=tfplan
if ! $AUTO_APPROVE; then
echo ""
read -rp "Apply this plan for $CLUSTER? (y/n) " -n 1 REPLY
echo
[[ "$REPLY" =~ ^[Yy]$ ]] || { echo "Cancelled."; exit 1; }
fi
echo "Applying infrastructure..."
tofu apply tfplan
# ─── Save kubeconfig ──────────────────────────────────────────────
KUBECONFIG_DIR="$PROJECT_ROOT/private/$CLUSTER"
mkdir -p "$KUBECONFIG_DIR"
KUBECONFIG_FILE="$KUBECONFIG_DIR/kubeconfig"
echo ""
echo "=== Saving Kubeconfig ==="
case "$PLATFORM" in
aks)
if tofu output -raw kubeconfig > "$KUBECONFIG_FILE" 2>/dev/null; then
echo " Saved from tofu output"
else
echo " Fetching from Azure CLI..."
RG=$(tofu output -raw resource_group_name 2>/dev/null || echo "${CLUSTER_clusterName:-$CLUSTER}-rg")
NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "${CLUSTER_clusterName:-$CLUSTER}")
az aks get-credentials --resource-group "$RG" --name "$NAME" --file "$KUBECONFIG_FILE" --overwrite-existing
fi
;;
eks)
NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "${CLUSTER_clusterName:-$CLUSTER}")
REGION=$(tofu output -raw aws_region 2>/dev/null || echo "${AWS_REGION:-eu-west-1}")
aws eks update-kubeconfig --name "$NAME" --region "$REGION" --kubeconfig "$KUBECONFIG_FILE"
;;
gke)
NAME=$(tofu output -raw cluster_name 2>/dev/null || echo "${CLUSTER_clusterName:-$CLUSTER}")
REGION=$(tofu output -raw region 2>/dev/null || echo "${GCP_REGION:-europe-west4}")
PROJECT=$(tofu output -raw project_id 2>/dev/null || echo "${GCP_PROJECT_ID:-}")
gcloud container clusters get-credentials "$NAME" --region "$REGION" --project "$PROJECT" 2>/dev/null \
&& cp ~/.kube/config "$KUBECONFIG_FILE" \
|| echo " Warning: could not fetch kubeconfig via gcloud"
;;
upc)
if tofu output -raw kubeconfig > "$KUBECONFIG_FILE" 2>/dev/null; then
echo " Saved from tofu output"
else
CLUSTER_ID=$(tofu output -raw cluster_id 2>/dev/null || echo "${UPCLOUD_CLUSTER_ID:-}")
if [[ -n "$CLUSTER_ID" ]]; then
upctl kubernetes config "$CLUSTER_ID" > "$KUBECONFIG_FILE"
else
echo " Warning: could not determine cluster ID for kubeconfig"
fi
fi
;;
esac
if [[ -f "$KUBECONFIG_FILE" ]]; then
chmod 600 "$KUBECONFIG_FILE"
echo " Kubeconfig: $KUBECONFIG_FILE"
fi
# ─── Wait for nodes ──────────────────────────────────────────────
echo ""
echo "=== Waiting for Cluster Nodes ==="
export KUBECONFIG="$KUBECONFIG_FILE"
if kubectl wait --for=condition=Ready nodes --all --timeout=300s 2>/dev/null; then
echo " All nodes ready"
else
echo " Warning: nodes not ready within timeout — check cluster status"
fi
# ─── Summary ─────────────────────────────────────────────────────
echo ""
echo "========================================="
echo " Cluster $CLUSTER Provisioned"
echo "========================================="
echo ""
echo " Kubeconfig: $KUBECONFIG_FILE"
echo ""
echo " Next steps:"
echo " export KUBECONFIG=$KUBECONFIG_FILE"
echo " ./bootstrap.sh $CLUSTER"
echo ""
fi

View File

@@ -1,7 +0,0 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Delegate to setup-cluster.sh with --destroy flag
exec "$SCRIPT_DIR/setup-cluster.sh" "$@" --destroy

102
README.md
View File

@@ -1,9 +1,9 @@
# Kubernetes Cluster - GitOps Configuration
> **Kubernetes cluster bootstrapping and GitOps configuration repository** using ArgoCD for multi-cloud Kubernetes (UpCloud, AWS EKS, Azure AKS, GCP GKE)
> **Kubernetes cluster bootstrapping and GitOps configuration repository** using ArgoCD for UpCloud Managed Kubernetes
[![GitOps](https://img.shields.io/badge/GitOps-ArgoCD-blue)](https://argoproj.github.io/cd/)
[![Kubernetes](https://img.shields.io/badge/Kubernetes-Multi--Cloud-orange)]()
[![Kubernetes](https://img.shields.io/badge/Kubernetes-UpCloud-orange)](https://upcloud.com/)
---
@@ -57,7 +57,7 @@ This repository contains the complete GitOps configuration for our Kubernetes cl
### What's Inside
- **Infrastructure Applications**: Traefik, Cert-Manager, Kyverno, Prometheus, Grafana, Loki, Tempo, Sealed Secrets, Homepage (platform dashboard)
- **Infrastructure Applications**: Traefik, Cert-Manager, Kyverno, Prometheus, Grafana, Loki, Tempo, Sealed Secrets
- **Business Applications**: MCP10X, MusicMan, Dot-AI Stack, ArgoCD MCP
- **Policies**: Kyverno security policies for secret management, namespace controls, pod verification
- **Monitoring**: Full observability stack with metrics, logs, traces, and alerting
@@ -80,70 +80,35 @@ This repository contains the complete GitOps configuration for our Kubernetes cl
```
.
├── bootstrap.sh # Cluster initialization (ArgoCD + GitOps)
├── _app-of-apps-{cluster}.yaml # Root ArgoCD Application (per cluster)
├── .tofu/ # Infrastructure provisioning (OpenTofu)
│ ├── platforms/ # Per-platform IaC (one dir per cloud)
│ │ ├── aks/ # Azure AKS (modules/ + dev/ + prod/ + workload/)
│ │ ├── eks/ # AWS EKS
│ │ ├── gke/ # GCP GKE
│ │ └── upc/ # UpCloud
│ ├── configs/ # Platform credentials (git-ignored)
│ │ └── *.env.example # Template for each platform
│ └── scripts/ # Cluster lifecycle scripts
│ ├── setup-cluster.sh # Create cluster: ./setup-cluster.sh aks-dev
│ ├── teardown-cluster.sh
│ └── get-kubeconfig.sh
├── clusters/ # Cluster metadata (domain, trustedIPs, etc.)
├── bootstrap.sh # Cluster initialization script
├── _app-of-apps.yaml # Root ArgoCD Application (App-of-Apps pattern)
├── infra/ # Infrastructure ArgoCD Applications (Kustomize multi-cluster)
│ ├── base/ # Base ArgoCD Application manifests (one dir per component)
│ │ ├── kustomization.yaml # Aggregates all component subdirectories
│ │ ├── traefik-application/
│ │ ├── kustomization.yaml
│ │ │ └── traefik-application.yaml
│ │ ├── keycloak/
│ │ │ ├── kustomization.yaml
│ │ │ └── keycloak.yaml
│ │ ├── grafana/
│ │ ├── prometheus/
│ │ ── ... # Each component in its own subdirectory
│ └── secrets/
│ ├── overlays/ # Per-cluster overrides (Kustomize)
│ │ ── upc-dev/ # UpCloud Dev — includes all base components
│ │ ├── upc-prod/ # UpCloud Prod — all components + patches
│ │ ├── aks-dev/ # Azure AKS Dev — selective components only
│ │ ├── aks-prod/ # Azure AKS Prod
│ │ ├── eks-dev/ # AWS EKS Dev
│ │ ├── eks-prod/ # AWS EKS Prod
│ │ ├── gke-dev/ # GCP GKE Dev
│ │ └── gke-prod/ # GCP GKE Prod
│ ├── base/ # Base ArgoCD Application manifests (EU defaults)
│ │ ├── kustomization.yaml
│ │ ├── traefik-application.yaml
│ │ ├── keycloak.yaml
│ │ ── grafana.yaml
│ │ ├── gitea.yaml
│ │ ├── gitea-actions.yaml
│ │ ├── tempo.yaml
│ │ ├── renovate.yaml
│ │ ├── ... # All other Application manifests
│ │ ── secrets.yaml
├── overlays/ # Per-cluster overrides
│ ├── upc-dev/ # UpCloud Dev cluster (uses base as-is)
│ │ ── upc-prod/ # UpCloud Prod cluster (patches value paths)
│ ├── dashboards/ # Grafana dashboard ConfigMaps
│ └── values/ # Helm value overrides
│ ├── base/ # Shared cloud-agnostic values
│ ├── upc-dev/ # UpCloud Dev (storage, LB, pricing)
── upc-prod/ # UpCloud Prod
│ ├── eks-dev/ # AWS EKS Dev
│ ├── eks-prod/ # AWS EKS Prod
│ ├── aks-dev/ # Azure AKS Dev
│ ├── aks-prod/ # Azure AKS Prod
│ ├── gke-dev/ # GCP GKE Dev
│ └── gke-prod/ # GCP GKE Prod
│ ├── base/ # Shared values (all clusters)
│ ├── upc-dev/ # UpCloud Dev-specific values
── upc-prod/ # UpCloud Prod-specific values
├── apps/ # Business Applications (Kustomize, same pattern as infra)
│ ├── base/ # One subdirectory per app
│ ├── kustomization.yaml
│ ├── musicman/
│ ├── mcp10x/
│ │ ├── dot-ai-stack/
│ │ ├── ts-mcp/
│ │ └── argo-mcp/
│ └── overlays/ # Per-cluster: cherry-pick or include all
│ ├── upc-dev/ # All apps
│ ├── upc-prod/ # All apps + patches
│ └── aks-dev/ # Selective apps only
├── apps/ # Business Applications
│ ├── mcp10x.yaml
│ ├── musicman.yaml
│ ├── dot-ai-stack.yaml
└── argo-mcp.yaml
├── cluster-resources/ # Cluster-wide Kubernetes resources
│ ├── letsencrypt-issuer.yaml
@@ -378,6 +343,7 @@ kubectl patch application myapp -n argocd \
| **Fluent-Bit** | Log shipping | `monitoring` | DaemonSet |
| **OpenCost** | Cost monitoring | `monitoring` | 1 |
| **Renovate** | Dependency updates | `renovate` | CronJob |
| **Trivy** | Vulnerability scanning | `trivy-system` | 1 |
**Full specs**: [Technical Reference - Infrastructure Components](docs/REFERENCE.md#infrastructure-components)
@@ -395,7 +361,7 @@ kubectl patch application myapp -n argocd \
## 📖 Key Concepts
### App-of-Apps Pattern
`_app-of-apps-{cluster}.yaml` is the root Application that manages all other Applications in `infra/`. Each component in `infra/base/` lives in its own subdirectory (e.g., `infra/base/grafana/`). Overlays can either include **all** components (via `../../base`) or **cherry-pick** specific ones (via `../../base/grafana`, `../../base/prometheus`, etc.). Per-cluster patches swap Helm value file paths. Supported clusters: `upc-dev`, `upc-prod`, `eks-dev`, `eks-prod`, `aks-dev`, `aks-prod`, `gke-dev`, `gke-prod`.
`_app-of-apps.yaml` is the root Application that manages all other Applications in `infra/`. Kustomize overlays in `infra/overlays/{upc-dev,upc-prod}/` render the base Applications with per-cluster patches (e.g., swapping value file paths from `upc-dev` to `upc-prod`).
### Multi-Source Pattern
Applications reference both:
@@ -492,14 +458,16 @@ Documentation lives in `docs/`. To update:
## 📝 Notes
### Current Environment
- **Provider**: Multi-cloud (UpCloud, AWS EKS, Azure AKS, GCP GKE)
- **Active clusters**: UpCloud (upc-dev, upc-prod)
- **Provider**: UpCloud Managed Kubernetes
- **Environment**: Production (internal use only)
- **Clusters**: Multi-cluster (upc-dev, upc-prod) via Kustomize overlays
- **Auth**: Disabled for ArgoCD (internal access)
- **Backup**: Gitea daily backup to S3-compatible storage
- **Backup**: None (cluster rebuildable via GitOps)
### Known Limitations
- No automated backups (yet)
- Secret rotation not automated
- Multi-cluster limited to upc-dev and upc-prod environments
- DNS management is manual
**Future improvements**: See [Operations Runbook - Disaster Recovery](docs/OPERATIONS-RUNBOOK.md#disaster-recovery)
@@ -536,7 +504,7 @@ Internal use only. Not for public distribution.
---
**Last Updated**: 2026-04-22
**Last Updated**: 2026-03-16
**Documentation Version**: 1.0.0
**🚀 Ready to get started? Check out the [Documentation Index](docs/README.md)!**

View File

@@ -1,6 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- argo-mcp.yaml
- argocdmcp-auth-oidc-sealed.yaml
- argocd-mcp-credentials.yaml

View File

@@ -1,5 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- dot-ai-stack.yaml
- dot-ai-secrets.yaml

View File

@@ -1,8 +1,7 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- dot-ai-stack
- mcp10x
- musicman
- ts-mcp
- argo-mcp
- dot-ai-stack.yaml
- mcp10x.yaml
- musicman.yaml
- argo-mcp.yaml

View File

@@ -1,5 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- mcp10x.yaml
- forte10x-app-credentials-sealed.yaml

View File

@@ -36,8 +36,13 @@ spec:
automated:
prune: true
selfHeal: true
allowEmpty: false
syncOptions:
- CreateNamespace=true
- Validate=true
- ServerSideApply=false
- Replace=false
retry:
limit: 5
backoff:

View File

@@ -1,5 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- musicman.yaml
- musicman-credentials.yaml

View File

@@ -1,5 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ts-mcp.yaml
- ts-mcp-secrets-sealed.yaml

View File

@@ -1,13 +0,0 @@
---
apiVersion: bitnami.com/v1alpha1
kind: SealedSecret
metadata:
name: ts-mcp-secrets
namespace: ts-mcp
spec:
encryptedData:
AZURE_CLIENT_SECRET: AgCWj525+NHkZ8XG97hEe4RS0SDC0QIGDXmEvzSlIqJQ9XVZEeKxVuAYmJ+w/HH7zBXD3qlZISeOPKn3FbMEeRukmYK0d5PsH26tRUMPoMzwWCuQkZIQ83uX9Pz/wMiqW8aZFIxpdEiUgVdanxHSFoDRPC1VlSEtV9B9yN2MgXBID5s0oje5BM9ttc4WVRe6+9pMeaOC6u+YUgcfY7xPLetZfC9nQO4zn4jYhoQXfAddwMzNODvQNGPzIv6PXDXJweTwdmtGaxM6eDdcCJI/30bEV9prA5m6UlgTZ/Qp+onU70KdkBA9gM9tMMVUR6j/2sbWzqMP/rVaFLeUH1PjHv15n4EieWyuDyYEfmZNDFXc7O9RIK6P0jCIE+t3myxK2ZQ7cfXprdOSj94au0qP6leat0UUVoc9CFJHHtrNxXYWl7IYVhwvIQCMSgO2qoAXkdW4wKVJAcbJadJjoL2pWxzjaD4GgnUaAxWBANqZI2lD8CED4VfUVMB0ZUYRS/zvy/eqIGlT8WbzwTYFi3YDZRvAUIknxaWEavIG4x52d0FqTmFYY06W53fGYfBrUjJI54GWYyBpKdZTf7b/AlAN0+kwkk6OqsUWwWDqxR7LVCcPhjSIKd/THp+Tbq9z5TiPIHxOO9V60u51f8IoQrEgQfNov7CEGQZ8B9HUGObjNc5MhujzBJasMhrUcd2Ddk6KWk07B7223p/gIEM+81ZWQYUcc29+U/j1dQyRNZy/TC56ywe5DDBJSoGp
template:
metadata:
name: ts-mcp-secrets
namespace: ts-mcp

View File

@@ -1,4 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../base/musicman

View File

@@ -1,47 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: dbunk-demo
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "12"
labels:
app.kubernetes.io/name: dbunk-demo
app.kubernetes.io/part-of: apps
app.kubernetes.io/managed-by: argocd
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
sources:
- repoURL: ssh://git@git.forteapps.net:2222/Forte/forte-helm.git
path: forteapp
targetRevision: HEAD
helm:
valueFiles:
- $values/dbunk-demo/values.yaml
- repoURL: ssh://git@git.forteapps.net:2222/Forte/helm-prod-values.git
targetRevision: HEAD
ref: values
destination:
server: https://kubernetes.default.svc
namespace: dbunk-demo
syncPolicy:
automated:
prune: true
selfHeal: true
allowEmpty: false
syncOptions:
- CreateNamespace=true
- Validate=true
- ServerSideApply=true
retry:
limit: 5
backoff:
duration: 5s
factor: 2
maxDuration: 3m

View File

@@ -1,4 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- dbunk-demo.yaml

View File

@@ -2,7 +2,6 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../base
- dbunk-demo
# No patches needed — base already has "upc-dev" paths
# upc-dev is the default/base cluster

View File

@@ -1,5 +1,4 @@
#!/bin/zsh
# in case of $'\r': command not found error, run command below first
# sed -i 's/\r$//' ./bootstrap.sh
@@ -18,7 +17,7 @@ echo "Bootstrapping cluster: ${clusterName} (${CLUSTER})..."
Bootstrap()
{
ArgoCd
Gitea
# Gitea
}
@@ -28,8 +27,8 @@ Bootstrap()
Gitea()
{
echo "Installing secret..."
kubectl apply -f "private/${CLUSTER}/gitea-repo-main.yaml"
kubectl apply -f "private/${CLUSTER}/main.key"
kubectl apply -f private/gitea-repo-main.yaml
kubectl apply -f private/main.key
}
############################################################
@@ -37,15 +36,10 @@ Gitea()
############################################################
ArgoCd()
{
# Pre-create ConfigMap for repo-server env (must exist before Helm upgrade)
kubectl create namespace argocd --dry-run=client -o yaml | kubectl apply -f -
kubectl apply -f cluster-resources/argocd-repo-server-config.yaml
# install argocd
echo "Installing ArgoCD..."
helm upgrade --install argocd argo-cd \
--repo https://argoproj.github.io/argo-helm \
--version "7.8.0" \
--namespace argocd --create-namespace \
--values infra/values/base/argocd-values.yaml \
--values "infra/values/${CLUSTER}/argocd-values.yaml" \
@@ -55,4 +49,4 @@ ArgoCd()
kubectl apply -f "_app-of-apps-${CLUSTER}.yaml" -n argocd
}
Bootstrap
# Bootstrap

View File

@@ -1,83 +0,0 @@
# CronJob: syncs OIDC client secret from registrar-managed
# argocd-oidc-credentials into argocd-secret (oidc.clientSecret key).
# Runs every 2 min. No-ops if source secret doesn't exist yet
# (safe for fresh deploys before Keycloak is up).
apiVersion: v1
kind: ServiceAccount
metadata:
name: argocd-oidc-sync
namespace: argocd
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: argocd-oidc-sync
namespace: argocd
rules:
- apiGroups: [""]
resources: ["secrets"]
resourceNames: ["argocd-oidc-credentials", "argocd-secret"]
verbs: ["get", "patch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: argocd-oidc-sync
namespace: argocd
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: argocd-oidc-sync
subjects:
- kind: ServiceAccount
name: argocd-oidc-sync
namespace: argocd
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: argocd-oidc-sync
namespace: argocd
spec:
schedule: "*/2 * * * *"
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 1
template:
spec:
serviceAccountName: argocd-oidc-sync
restartPolicy: Never
containers:
- name: sync
image: bitnami/kubectl:latest
command: ["/bin/sh", "-c"]
args:
- |
set -e
# Exit gracefully if source secret doesn't exist yet
if ! kubectl get secret argocd-oidc-credentials -n argocd >/dev/null 2>&1; then
echo "argocd-oidc-credentials not found — skipping (Keycloak not ready yet)"
exit 0
fi
# Read current OIDC client secret
NEW_SECRET=$(kubectl get secret argocd-oidc-credentials -n argocd \
-o jsonpath='{.data.client-secret}' | base64 -d)
# Read current value in argocd-secret (if any)
CURRENT=$(kubectl get secret argocd-secret -n argocd \
-o jsonpath='{.data.oidc\.clientSecret}' 2>/dev/null | base64 -d || echo "")
# Only patch if changed
if [ "$NEW_SECRET" = "$CURRENT" ]; then
echo "oidc.clientSecret already up to date"
exit 0
fi
kubectl patch secret argocd-secret -n argocd --type merge \
-p "{\"stringData\":{\"oidc.clientSecret\":\"${NEW_SECRET}\"}}"
echo "Patched argocd-secret with oidc.clientSecret"

View File

@@ -1,9 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: argocd-repo-server-config
namespace: argocd
data:
# Disable git submodule checkout - submodules (e.g. shared-prompts)
# are not needed for K8s manifest generation
ARGOCD_GIT_MODULES_ENABLED: "false"

View File

@@ -1,15 +0,0 @@
---
apiVersion: bitnami.com/v1alpha1
kind: SealedSecret
metadata:
creationTimestamp: null
name: azuredns-config
namespace: cert-manager
spec:
encryptedData:
client-secret: AgBPCix6yTt8gXV2pMfRx6weWtLWUeMa/cBwuaDBZkqZE4CTSDxjQWWK8ul5OtndlEX7+gXV2HpuAmHRhGC8P1z39yWRxIDbKf+AH4JOGSIfu57tnrGvzAwjanKtqeFCEM67Y42Kz1rahCEtdwJj2YIL9oVdCtRPkpAJrAJclqmrCvJZtqQYcDeEn5ONK5Gchxc6x2/J0U0LTjLz/MP74CEKw3CiZ+9VKA4ppvPqjqoE4yyAXSglSnhYTqtMZpFg5sr+at6rAB1ufmtiS70Vfks46OL0/ZJjdS6h5wjIZhl/1DgIFXqc1yoAjuEoMRzMeW9ji1PnjRqas4lU19tuEOf9/Eq65BYOqSIqJ3saG4I/+z013coCGCalo4ghuufmu5pcsi6ywcszz+g20N/PZcVcLbzZbMnKcXsaE007MDLxGY1QD89aribAvsypDUNEuNh7w2n/OAyQdw+nRDGIi8Oz3FoJUmC0HBxin2hstuBnrAXtxdqRX1NU25HfR2s3qt/yQ33TFx4xHA3NAll8Riyg2rvgqAKo9x9rmlcnzML011vlNu5oLKAuCqJlH8W/Nnnh6LNcZJy8Cj+fqXPeWHS4Qk7nEAbYJN9/sNAUg/VzsP0yYejPfjwzoDDaPLvTHROwv9nZ+Lr/U5epHr231jc5+i3x8dLuBtg+aa5PHoS/Ml1a4811w3Bxj3u36q8UPJGnszQXLKCpucynVVstAj4ufhXhhNXJdK/U31Zrc6j3Skw4zgF8Ddv0
template:
metadata:
creationTimestamp: null
name: azuredns-config
namespace: cert-manager

View File

@@ -12,24 +12,10 @@ spec:
privateKeySecretRef:
name: letsencrypt-staging-key
solvers:
- dns01:
azureDNS:
subscriptionID: 1b52bc03-6815-4574-b579-60745dce544d
resourceGroupName: forteapps-domain
hostedZoneName: forteapps.net
environment: AzurePublicCloud
clientID: 3b7a4ebf-894c-4f5d-9b1e-2b61312f8e74
clientSecretSecretRef:
name: azuredns-config
key: client-secret
selector:
dnsNames:
- '*.forteapps.net'
- 'forteapps.net'
# HTTP-01 fallback for non-wildcard certificates
- http01:
ingress:
class: traefik
---
# Production ClusterIssuer for browser-trusted certificates
apiVersion: cert-manager.io/v1
@@ -44,146 +30,6 @@ spec:
privateKeySecretRef:
name: letsencrypt-prod-key
solvers:
# DNS-01 solver for wildcard certificates (*.forteapps.net)
- dns01:
azureDNS:
subscriptionID: 1b52bc03-6815-4574-b579-60745dce544d
resourceGroupName: forteapps-domain
hostedZoneName: forteapps.net
environment: AzurePublicCloud
clientID: 3b7a4ebf-894c-4f5d-9b1e-2b61312f8e74
clientSecretSecretRef:
name: azuredns-config
key: client-secret
selector:
dnsNames:
- '*.forteapps.net'
- 'forteapps.net'
# HTTP-01 fallback for non-wildcard certificates
- http01:
ingress:
class: traefik
# =============================================================================
# CONFIGURATION INSTRUCTIONS FOR AZURE DNS WITH WILDCARD CERTIFICATES
# =============================================================================
#
# PREREQUISITES IN AZURE DNS PORTAL:
# ----------------------------------
# 1. Ensure you have an Azure DNS Zone for "forteapps.net" created in your
# Azure subscription. If not, create it in Azure Portal:
# - Search for "DNS zones" → Create → Zone name: forteapps.net
# - Note the Resource Group where you create it (e.g., "dns-zones-rg")
#
# 2. Configure NS records at your domain registrar to point to Azure DNS:
# - In Azure Portal → DNS zones → forteapps.net
# - Note the 4 NS records shown (e.g., ns1-04.azure-dns.com, etc.)
# - Go to your domain registrar and update the NS records to these values
#
# AUTHENTICATION (Service Principal - Required for UpCloud/non-Azure clusters):
# ----------------------------------------------------------------------------
# Since your cluster runs on UpCloud (not AKS), you must use Service Principal
# authentication. Managed Identity only works with Azure-hosted resources.
#
# =============================================================================
# SETUP: Service Principal for UpCloud Clusters
# =============================================================================
#
# 1. Create Azure AD App Registration:
# az ad sp create-for-rbac --name cert-manager-dns --sdk-auth
# # Save the JSON output - you'll need appId (clientID) and password (clientSecret)
#
# 2. Assign DNS Zone Contributor role:
# az role assignment create \
# --role "DNS Zone Contributor" \
# --assignee <SERVICE_PRINCIPAL_CLIENT_ID> \
# --scope /subscriptions/<SUBSCRIPTION_ID>/resourceGroups/<DNS_RESOURCE_GROUP>/providers/Microsoft.Network/dnszones/forteapps.net
#
# 3. Create Kubernetes secret for the service principal:
# kubectl create secret generic azuredns-config \
# --namespace cert-manager \
# --from-literal=client-secret=YOUR_CLIENT_SECRET
#
# 4. Update the ClusterIssuer above with:
# - subscriptionID: Your Azure subscription ID
# - resourceGroupName: The resource group containing your DNS zone
# - clientID: The Service Principal appId/clientID
# - clientSecretSecretRef: References the secret created in step 3
#
# =============================================================================
# ALTERNATIVE DNS PROVIDERS (for reference):
# =============================================================================
# -----------------------------------------------------------------------------
# Cloudflare (original configuration)
# -----------------------------------------------------------------------------
# Create secret with: kubectl create secret generic cloudflare-api-token-secret \
# --from-literal=api-token=YOUR_CLOUDFLARE_API_TOKEN -n cert-manager
#
# dns01:
# cloudflare:
# email: your-cloudflare-email@example.com
# apiTokenSecretRef:
# name: cloudflare-api-token-secret
# key: api-token
# -----------------------------------------------------------------------------
# AWS Route53
# -----------------------------------------------------------------------------
# Create secret with: kubectl create secret generic route53-credentials \
# --from-literal=secret-access-key=YOUR_SECRET_KEY -n cert-manager
#
# dns01:
# route53:
# region: us-east-1
# hostedZoneID: ZXXXXXXXXXXXXX
# accessKeyID: YOUR_ACCESS_KEY_ID
# secretAccessKeySecretRef:
# name: route53-credentials
# key: secret-access-key
# -----------------------------------------------------------------------------
# Google Cloud DNS
# -----------------------------------------------------------------------------
# Create secret with service account JSON key:
# kubectl create secret generic clouddns-service-account \
# --from-file=service-account.json=path/to/key.json -n cert-manager
#
# dns01:
# cloudDNS:
# project: YOUR_GCP_PROJECT_ID
# hostedZoneName: example-com
# serviceAccountSecretRef:
# name: clouddns-service-account
# key: service-account.json
# -----------------------------------------------------------------------------
# GoDaddy
# -----------------------------------------------------------------------------
# Requires external webhook: https://github.com/snowdrop/godaddy-webhook
#
# dns01:
# webhook:
# groupName: acme.yourcompany.com
# solverName: godaddy
# config:
# apiKeySecretRef:
# name: godaddy-api-credentials
# key: api-key
# apiSecretSecretRef:
# name: godaddy-api-credentials
# key: api-secret
# -----------------------------------------------------------------------------
# Manual/Dynamic DNS (for homelab)
# -----------------------------------------------------------------------------
# Requires RFC2136 provider or external webhook
#
# dns01:
# rfc2136:
# nameserver: your-dns-server.example.com
# tsigKeyName: cert-manager-key
# tsigAlgorithm: HMACSHA256
# tsigSecretSecretRef:
# name: tsig-secret
# key: secret

View File

@@ -0,0 +1,41 @@
apiVersion: kyverno.io/v1
kind: ClusterPolicy
metadata:
name: require-labels
annotations:
policies.kyverno.io/title: Require Labels
policies.kyverno.io/category: Best Practices
policies.kyverno.io/minversion: 1.6.0
policies.kyverno.io/severity: medium
policies.kyverno.io/subject: Pod, Label
policies.kyverno.io/description: Define and use labels that identify semantic attributes of your application or Deployment. A common set of labels allows tools to work collaboratively, describing objects in a common manner that all tools can understand. The recommended labels describe applications in a way that can be queried. This policy validates that the label `app.kubernetes.io/name` is specified with some value.
spec:
validationFailureAction: Audit
background: true
rules:
- name: check-for-labels
skipBackgroundRequests: true
exclude:
any:
- resources:
namespaces:
- kube-system
- istio-system
- argocd
- cert-manager
- monitoring
- secrets
- kyverno
- trivy-system
match:
any:
- resources:
kinds:
- Pod
validate:
message: The label `app.kubernetes.io/name` is required.
allowExistingViolations: true
pattern:
metadata:
labels:
app.kubernetes.io/name: "?*"

View File

@@ -16,6 +16,7 @@ spec:
- resources:
namespaces:
- kube-system
- trivy-system
- monitoring
- argocd
- cert-manager

View File

@@ -1,12 +1,10 @@
# Cluster config reference — values must match the corresponding overlay files.
# Read by bootstrap.sh at install time; NOT auto-propagated to ArgoCD value files.
clusterName: k8s-launchpad # → infra/values/aks-dev/argocd-values.yaml (notifications.context.clusterName)
domain: example.com # → infra/values/base/gitea-values.yaml, renovate-values.yaml, keycloak-values.yaml (subdomains)
argocdDomain: argocd.example.com # → infra/values/aks-dev/argocd-values.yaml (global.domain)
grafanaDomain: grafana.example.com # → infra/values/aks-dev/grafana-values.yaml (ingress.hosts)
keycloakDomain: id.example.com # → infra/values/aks-dev/keycloak-values.yaml (ingress.hostname)
dotaiDomain: kubemcp.example.com # → infra/values/aks-dev/dot-ai-stack-values.yaml (dot-ai.ingress.host) — create if needed
dotaiUiDomain: kubemcpui.example.com # → infra/values/aks-dev/dot-ai-stack-values.yaml (dot-ai-ui.ingress.host) — create if needed
letsencryptEmail: admin@example.com # → cluster-resources/letsencrypt-issuer.yaml (spec.acme.email)
trustedIPs: "10.0.0.0/8,168.63.129.16/32" # → infra/values/aks-dev/traefik-values.yaml (ports.*.trustedIPs) — VNet CIDR + Azure health probe
cloudProvider: azure # → determines overlay directory and cloud-specific LB/storage annotations
clusterName: dev-fd-aks
domain: forteapps.net
argocdDomain: argocd.127.0.0.1.nip.io
grafanaDomain: grafana.forteapps.net
keycloakDomain: id.forteapps.net
dotaiDomain: kubemcp.forteapps.net
dotaiUiDomain: kubemcpui.forteapps.net
letsencryptEmail: danijels@gmail.com
trustedIPs: "10.0.0.0/8"
cloudProvider: azure

View File

@@ -1,12 +1,10 @@
# Cluster config reference — values must match the corresponding overlay files.
# Read by bootstrap.sh at install time; NOT auto-propagated to ArgoCD value files.
clusterName: prod-aks # → infra/values/aks-prod/argocd-values.yaml (notifications.context.clusterName)
domain: example.com # → infra/values/base/gitea-values.yaml, renovate-values.yaml, keycloak-values.yaml (subdomains)
argocdDomain: argocd.example.com # → infra/values/aks-prod/argocd-values.yaml (global.domain)
grafanaDomain: grafana.example.com # → infra/values/aks-prod/grafana-values.yaml (ingress.hosts)
keycloakDomain: id.example.com # → infra/values/aks-prod/keycloak-values.yaml (ingress.hostname)
dotaiDomain: kubemcp.example.com # → infra/values/aks-prod/dot-ai-stack-values.yaml (dot-ai.ingress.host) — create if needed
dotaiUiDomain: kubemcpui.example.com # → infra/values/aks-prod/dot-ai-stack-values.yaml (dot-ai-ui.ingress.host) — create if needed
letsencryptEmail: admin@example.com # → cluster-resources/letsencrypt-issuer.yaml (spec.acme.email)
trustedIPs: "10.0.0.0/8,168.63.129.16/32" # → infra/values/aks-prod/traefik-values.yaml (ports.*.trustedIPs) — VNet CIDR + Azure health probe
cloudProvider: azure # → determines overlay directory and cloud-specific LB/storage annotations
clusterName: prod-fd-aks
domain: fortedigital.com
argocdDomain: argocd.127.0.0.1.nip.io
grafanaDomain: grafana.fortedigital.com
keycloakDomain: id.fortedigital.com
dotaiDomain: kubemcp.fortedigital.com
dotaiUiDomain: kubemcpui.fortedigital.com
letsencryptEmail: danijel.simeunovic@fortedigital.com
trustedIPs: "10.0.0.0/8"
cloudProvider: azure

View File

@@ -1,12 +1,10 @@
# Cluster config reference — values must match the corresponding overlay files.
# Read by bootstrap.sh at install time; NOT auto-propagated to ArgoCD value files.
clusterName: dev-eks # → infra/values/eks-dev/argocd-values.yaml (notifications.context.clusterName)
domain: example.com # → infra/values/base/gitea-values.yaml, renovate-values.yaml, keycloak-values.yaml (subdomains)
argocdDomain: argocd.example.com # → infra/values/eks-dev/argocd-values.yaml (global.domain)
grafanaDomain: grafana.example.com # → infra/values/eks-dev/grafana-values.yaml (ingress.hosts)
keycloakDomain: id.example.com # → infra/values/eks-dev/keycloak-values.yaml (ingress.hostname)
dotaiDomain: kubemcp.example.com # → infra/values/eks-dev/dot-ai-stack-values.yaml (dot-ai.ingress.host) — create if needed
dotaiUiDomain: kubemcpui.example.com # → infra/values/eks-dev/dot-ai-stack-values.yaml (dot-ai-ui.ingress.host) — create if needed
letsencryptEmail: admin@example.com # → cluster-resources/letsencrypt-issuer.yaml (spec.acme.email)
trustedIPs: "10.0.0.0/8" # → infra/values/eks-dev/traefik-values.yaml (ports.*.trustedIPs) — VPC CIDR
cloudProvider: eks # → determines overlay directory and cloud-specific LB/storage annotations
clusterName: dev-fd-eks
domain: forteapps.net
argocdDomain: argocd.127.0.0.1.nip.io
grafanaDomain: grafana.forteapps.net
keycloakDomain: id.forteapps.net
dotaiDomain: kubemcp.forteapps.net
dotaiUiDomain: kubemcpui.forteapps.net
letsencryptEmail: danijels@gmail.com
trustedIPs: "10.0.0.0/8"
cloudProvider: aws

View File

@@ -1,12 +1,10 @@
# Cluster config reference — values must match the corresponding overlay files.
# Read by bootstrap.sh at install time; NOT auto-propagated to ArgoCD value files.
clusterName: prod-eks # → infra/values/eks-prod/argocd-values.yaml (notifications.context.clusterName)
domain: example.com # → infra/values/base/gitea-values.yaml, renovate-values.yaml, keycloak-values.yaml (subdomains)
argocdDomain: argocd.example.com # → infra/values/eks-prod/argocd-values.yaml (global.domain)
grafanaDomain: grafana.example.com # → infra/values/eks-prod/grafana-values.yaml (ingress.hosts)
keycloakDomain: id.example.com # → infra/values/eks-prod/keycloak-values.yaml (ingress.hostname)
dotaiDomain: kubemcp.example.com # → infra/values/eks-prod/dot-ai-stack-values.yaml (dot-ai.ingress.host) — create if needed
dotaiUiDomain: kubemcpui.example.com # → infra/values/eks-prod/dot-ai-stack-values.yaml (dot-ai-ui.ingress.host) — create if needed
letsencryptEmail: admin@example.com # → cluster-resources/letsencrypt-issuer.yaml (spec.acme.email)
trustedIPs: "10.0.0.0/8" # → infra/values/eks-prod/traefik-values.yaml (ports.*.trustedIPs) — VPC CIDR
cloudProvider: eks # → determines overlay directory and cloud-specific LB/storage annotations
clusterName: prod-fd-eks
domain: fortedigital.com
argocdDomain: argocd.127.0.0.1.nip.io
grafanaDomain: grafana.fortedigital.com
keycloakDomain: id.fortedigital.com
dotaiDomain: kubemcp.fortedigital.com
dotaiUiDomain: kubemcpui.fortedigital.com
letsencryptEmail: danijel.simeunovic@fortedigital.com
trustedIPs: "10.0.0.0/8"
cloudProvider: aws

View File

@@ -1,12 +1,10 @@
# Cluster config reference — values must match the corresponding overlay files.
# Read by bootstrap.sh at install time; NOT auto-propagated to ArgoCD value files.
clusterName: dev-gke # → infra/values/gke-dev/argocd-values.yaml (notifications.context.clusterName)
domain: example.com # → infra/values/base/gitea-values.yaml, renovate-values.yaml, keycloak-values.yaml (subdomains)
argocdDomain: argocd.example.com # → infra/values/gke-dev/argocd-values.yaml (global.domain)
grafanaDomain: grafana.example.com # → infra/values/gke-dev/grafana-values.yaml (ingress.hosts)
keycloakDomain: id.example.com # → infra/values/gke-dev/keycloak-values.yaml (ingress.hostname)
dotaiDomain: kubemcp.example.com # → infra/values/gke-dev/dot-ai-stack-values.yaml (dot-ai.ingress.host) — create if needed
dotaiUiDomain: kubemcpui.example.com # → infra/values/gke-dev/dot-ai-stack-values.yaml (dot-ai-ui.ingress.host) — create if needed
letsencryptEmail: admin@example.com # → cluster-resources/letsencrypt-issuer.yaml (spec.acme.email)
trustedIPs: "10.0.0.0/8,35.191.0.0/16,130.211.0.0/22" # → infra/values/gke-dev/traefik-values.yaml (ports.*.trustedIPs) — subnet + GCP health checks
cloudProvider: gke # → determines overlay directory and cloud-specific LB/storage annotations
clusterName: dev-fd-gke
domain: forteapps.net
argocdDomain: argocd.127.0.0.1.nip.io
grafanaDomain: grafana.forteapps.net
keycloakDomain: id.forteapps.net
dotaiDomain: kubemcp.forteapps.net
dotaiUiDomain: kubemcpui.forteapps.net
letsencryptEmail: danijels@gmail.com
trustedIPs: "10.0.0.0/8"
cloudProvider: gcp

View File

@@ -1,12 +1,10 @@
# Cluster config reference — values must match the corresponding overlay files.
# Read by bootstrap.sh at install time; NOT auto-propagated to ArgoCD value files.
clusterName: prod-gke # → infra/values/gke-prod/argocd-values.yaml (notifications.context.clusterName)
domain: example.com # → infra/values/base/gitea-values.yaml, renovate-values.yaml, keycloak-values.yaml (subdomains)
argocdDomain: argocd.example.com # → infra/values/gke-prod/argocd-values.yaml (global.domain)
grafanaDomain: grafana.example.com # → infra/values/gke-prod/grafana-values.yaml (ingress.hosts)
keycloakDomain: id.example.com # → infra/values/gke-prod/keycloak-values.yaml (ingress.hostname)
dotaiDomain: kubemcp.example.com # → infra/values/gke-prod/dot-ai-stack-values.yaml (dot-ai.ingress.host) — create if needed
dotaiUiDomain: kubemcpui.example.com # → infra/values/gke-prod/dot-ai-stack-values.yaml (dot-ai-ui.ingress.host) — create if needed
letsencryptEmail: admin@example.com # → cluster-resources/letsencrypt-issuer.yaml (spec.acme.email)
trustedIPs: "10.0.0.0/8,35.191.0.0/16,130.211.0.0/22" # → infra/values/gke-prod/traefik-values.yaml (ports.*.trustedIPs) — subnet + GCP health checks
cloudProvider: gke # → determines overlay directory and cloud-specific LB/storage annotations
clusterName: prod-fd-gke
domain: fortedigital.com
argocdDomain: argocd.127.0.0.1.nip.io
grafanaDomain: grafana.fortedigital.com
keycloakDomain: id.fortedigital.com
dotaiDomain: kubemcp.fortedigital.com
dotaiUiDomain: kubemcpui.fortedigital.com
letsencryptEmail: danijel.simeunovic@fortedigital.com
trustedIPs: "10.0.0.0/8"
cloudProvider: gcp

View File

@@ -1,12 +1,10 @@
# Cluster config reference — values must match the corresponding overlay files.
# Read by bootstrap.sh at install time; NOT auto-propagated to ArgoCD value files.
clusterName: dev-fd-no-svg1 # → infra/values/upc-dev/argocd-values.yaml (notifications.context.clusterName)
domain: forteapps.net # → infra/values/base/gitea-values.yaml, renovate-values.yaml, keycloak-values.yaml (subdomains)
argocdDomain: argocd.127.0.0.1.nip.io # → infra/values/upc-dev/argocd-values.yaml (global.domain)
grafanaDomain: grafana.forteapps.net # → infra/values/upc-dev/grafana-values.yaml (ingress.hosts)
keycloakDomain: id.forteapps.net # → infra/values/upc-dev/keycloak-values.yaml (ingress.hostname)
dotaiDomain: kubemcp.forteapps.net # → infra/values/upc-dev/dot-ai-stack-values.yaml (dot-ai.ingress.host)
dotaiUiDomain: kubemcpui.forteapps.net # → infra/values/upc-dev/dot-ai-stack-values.yaml (dot-ai-ui.ingress.host)
letsencryptEmail: danijels@gmail.com # → cluster-resources/letsencrypt-issuer.yaml (spec.acme.email)
trustedIPs: "172.16.1.0/24" # → infra/values/upc-dev/traefik-values.yaml (ports.*.trustedIPs)
cloudProvider: upcloud # → determines overlay directory and cloud-specific LB/storage annotations
clusterName: dev-fd-no-svg1
domain: forteapps.net
argocdDomain: argocd.127.0.0.1.nip.io
grafanaDomain: grafana.forteapps.net
keycloakDomain: id.forteapps.net
dotaiDomain: kubemcp.forteapps.net
dotaiUiDomain: kubemcpui.forteapps.net
letsencryptEmail: danijels@gmail.com
trustedIPs: "172.16.1.0/24"
cloudProvider: upcloud

View File

@@ -1,12 +1,10 @@
# Cluster config reference — values must match the corresponding overlay files.
# Read by bootstrap.sh at install time; NOT auto-propagated to ArgoCD value files.
clusterName: prod-fd-no-svg1 # → infra/values/upc-prod/argocd-values.yaml (notifications.context.clusterName)
domain: fortedigital.com # → infra/values/base/gitea-values.yaml, renovate-values.yaml, keycloak-values.yaml (subdomains)
argocdDomain: argocd.127.0.0.1.nip.io # → infra/values/upc-prod/argocd-values.yaml (global.domain)
grafanaDomain: grafana.fortedigital.com # → infra/values/upc-prod/grafana-values.yaml (ingress.hosts)
keycloakDomain: id.fortedigital.com # → infra/values/upc-prod/keycloak-values.yaml (ingress.hostname)
dotaiDomain: kubemcp.fortedigital.com # → infra/values/upc-prod/dot-ai-stack-values.yaml (dot-ai.ingress.host)
dotaiUiDomain: kubemcpui.fortedigital.com # → infra/values/upc-prod/dot-ai-stack-values.yaml (dot-ai-ui.ingress.host)
letsencryptEmail: danijel.simeunovic@fortedigital.com # → cluster-resources/letsencrypt-issuer.yaml (spec.acme.email)
trustedIPs: "172.16.1.0/24" # → infra/values/upc-prod/traefik-values.yaml (ports.*.trustedIPs)
cloudProvider: upcloud # → determines overlay directory and cloud-specific LB/storage annotations
clusterName: prod-fd-no-svg1
domain: fortedigital.com
argocdDomain: argocd.127.0.0.1.nip.io
grafanaDomain: grafana.fortedigital.com
keycloakDomain: id.fortedigital.com
dotaiDomain: kubemcp.fortedigital.com
dotaiUiDomain: kubemcpui.fortedigital.com
letsencryptEmail: danijel.simeunovic@fortedigital.com
trustedIPs: "172.16.1.0/24"
cloudProvider: upcloud

View File

@@ -1,34 +0,0 @@
{
"$schema": "https://raw.githubusercontent.com/jetify-com/devbox/0.16.0/.schema/devbox.schema.json",
"packages": [
"kubectl@1.33.2",
"kubernetes-helm@3.18.4",
"k9s@0.50.7",
"kubeseal@0.30.0",
"argocd@2.14.11",
"kubecm@0.33.1",
"kubectl-tree@0.4.3",
"kind@0.29.0",
"kustomize@5.7.0",
"kyverno@1.14.3",
"syft@1.29.0",
"grype@0.92.2",
"traefik@3.6.7",
"claude-code@latest",
"go@latest",
"dotnet-sdk@latest",
"opentofu@1.11.6",
"_1password@latest",
"github-cli@latest"
],
"shell": {
"init_hook": [
"echo 'Welcome to devbox!' > /dev/null"
],
"scripts": {
"test": [
"echo \"Error: no test specified\" && exit 1"
]
}
}
}

View File

@@ -654,11 +654,21 @@ kubectl create secret generic myapp-credentials \
#### Step 2: Seal the Secret
Get the public certificate (one-time setup):
```bash
# Fetch public cert from cluster
kubeseal --fetch-cert \
--controller-name=sealed-secrets-controller \
--controller-namespace=kube-system \
> pub-cert.pem
```
Seal your secret:
```bash
kubeseal --format=yaml \
--namespace=myapp \
--cert=pub-cert.pem \
< private/myapp-credentials.yaml \
> secrets/myapp-credentials-sealed.yaml
```
@@ -701,7 +711,7 @@ kubectl create secret generic myapp-credentials \
# 2. Seal it
kubeseal --format=yaml \
--namespace=myapp \
--cert=pub-cert.pem \
< private/myapp-credentials.yaml \
> secrets/myapp-credentials-sealed.yaml
@@ -952,46 +962,6 @@ User sees application (authenticated)
---
### Accessing Authenticated User Information
The auth sidecar handles all authentication before requests reach your application. Your app never sees unauthenticated traffic — the sidecar returns 401 or redirects to the IdP first.
After successful authentication, the sidecar forwards the request to your application with user identity injected as HTTP headers:
| Header | Description | Available in |
|--------|-------------|-------------|
| `X-Auth-User` | Username or display name | Token, OIDC, MCP |
| `X-Auth-Email` | User email address | OIDC |
| `X-Auth-Subject` | OIDC `sub` claim (stable user ID) | OIDC, MCP |
| `X-Auth-Groups` | Comma-separated group memberships | OIDC (if scope includes `groups`) |
| `X-Auth-Token` | The validated access token | All modes |
**Your application reads these headers — no auth library needed:**
```javascript
// Express.js example
app.get('/profile', (req, res) => {
const user = req.headers['x-auth-user'];
const email = req.headers['x-auth-email'];
res.json({ user, email });
});
```
```python
# Flask example
@app.route('/profile')
def profile():
user = request.headers.get('X-Auth-User')
email = request.headers.get('X-Auth-Email')
return jsonify(user=user, email=email)
```
**Why this is safe**: The Kyverno-generated NetworkPolicy restricts ingress to the sidecar port only. Traffic cannot bypass the sidecar to reach the application port directly, so the `X-Auth-*` headers can be trusted unconditionally.
**Key principle**: Your application is zero-trust-unaware by design. It reads headers and renders UI. All authentication complexity lives in the sidecar and Kyverno policy.
---
### Authentication Configuration Reference
#### Helm Values Schema
@@ -1336,34 +1306,16 @@ stringData:
| Field | Required | Description |
|-------|----------|-------------|
| `clientId` | Yes | Keycloak client ID (must be unique in realm) |
| `name` | Yes | Display name in Keycloak UI |
| `redirectUris` | Yes | Allowed OAuth redirect URLs (supports wildcards like `/*`) |
| `webOrigins` | Yes | Allowed CORS origins |
| `defaultClientScopes` | No | OIDC scopes (default: `["openid", "email", "profile"]`) |
| `protocolMappers` | No | Custom claim mappers for tokens (see examples below) |
| `secret.namespace` | No | Target namespace for credentials (default: `source-namespace` annotation value) |
| `secret.name` | No | Credential Secret name (default: `<clientId>-oidc-credentials`) |
| `secret.keys.clientId` | No | Key name for client ID (default: `client-id`) |
| `secret.keys.clientSecret` | No | Key name for client secret (default: `client-secret`) |
**Protocol Mappers Example**:
```json
"protocolMappers": [
{
"name": "groups",
"protocol": "openid-connect",
"protocolMapper": "oidc-group-membership-mapper",
"config": {
"claim.name": "groups",
"full.path": "false",
"id.token.claim": "true",
"access.token.claim": "true",
"userinfo.token.claim": "true"
}
}
]
```
| `clientId` | Yes | Keycloak client ID |
| `name` | Yes | Display name in Keycloak |
| `redirectUris` | Yes | Allowed redirect URIs |
| `webOrigins` | Yes | Allowed web origins (CORS) |
| `defaultClientScopes` | No | Scopes (default: `["openid", "email", "profile"]`) |
| `protocolMappers` | No | Custom claim mappers (default: `[]`) |
| `secret.namespace` | No | Namespace for the credential Secret (default: source namespace) |
| `secret.name` | No | Name of the credential Secret (default: `<clientId>-oidc-credentials`) |
| `secret.keys.clientId` | No | Key name for client ID in credential Secret (default: `client-id`) |
| `secret.keys.clientSecret` | No | Key name for client secret in credential Secret (default: `client-secret`) |
#### Step 2: Reference the Credential Secret

View File

@@ -12,11 +12,11 @@
## Overview
This Kubernetes cluster uses a **GitOps approach** powered by **ArgoCD**, where Git repositories serve as the single source of truth for both infrastructure and application deployments. The cluster setup is **cloud-agnostic**, with ready-to-use configurations for **UpCloud**, **AWS EKS**, **Azure AKS**, and **GCP GKE**.
This Kubernetes cluster uses a **GitOps approach** powered by **ArgoCD**, where Git repositories serve as the single source of truth for both infrastructure and application deployments. The cluster is running on **UpCloud Managed Kubernetes** but is designed to be cloud-agnostic.
### Key Characteristics
- **Environment**: Production (internal use only)
- **Cluster Type**: Multi-cloud, multi-cluster via Kustomize overlays (UpCloud, AWS, Azure, GCP)
- **Cluster Type**: Multi-cluster (upc-dev, upc-prod) via Kustomize overlays
- **GitOps Tool**: ArgoCD
- **Deployment Pattern**: App-of-Apps
- **Secret Management**: Sealed Secrets (kubeseal)
@@ -63,7 +63,7 @@ This Kubernetes cluster uses a **GitOps approach** powered by **ArgoCD**, where
┌────────────────────────────────┐
│ Kubernetes Clusters │
│ (UpCloud, AWS, Azure, GCP)
│ (UpCloud: upc-dev, upc-prod)
│ │
│ ┌──────────────────────────┐ │
│ │ ArgoCD │ │
@@ -115,73 +115,51 @@ This Kubernetes cluster uses a **GitOps approach** powered by **ArgoCD**, where
```
launchpad/
├── bootstrap.sh # Cluster initialization (ArgoCD + GitOps)
├── _app-of-apps-{cluster}.yaml # Root ArgoCD Application (per cluster)
├── .tofu/ # Infrastructure provisioning (OpenTofu)
│ ├── platforms/ # Per-platform IaC
│ │ ├── aks/ # Azure AKS
│ │ │ ├── modules/cluster/ # Reusable AKS module
│ │ │ ├── dev/ # tofu root for aks-dev
│ │ │ ├── prod/ # tofu root for aks-prod
│ │ │ └── workload/ # workload cluster (no data services)
│ │ ├── eks/ # AWS EKS (same structure)
│ │ ├── gke/ # GCP GKE
│ │ └── upc/ # UpCloud
│ ├── configs/ # Platform credentials (git-ignored)
│ │ └── {platform}.env.example # Template per platform
│ └── scripts/
│ ├── setup-cluster.sh # ./setup-cluster.sh <cluster> [--plan|--auto]
│ ├── teardown-cluster.sh # ./teardown-cluster.sh <cluster>
│ └── get-kubeconfig.sh # ./get-kubeconfig.sh <cluster>
├── clusters/ # Cluster metadata YAML (domain, IPs, etc.)
│ ├── aks-dev.yaml
│ ├── upc-dev.yaml
│ └── ...
├── bootstrap.sh # Cluster initialization script
├── _app-of-apps-upc-dev.yaml # Root ArgoCD Application (upc-dev cluster)
├── _app-of-apps-upc-prod.yaml # Root ArgoCD Application (upc-prod cluster)
├── infra/ # Infrastructure ArgoCD Applications (Kustomize)
│ ├── base/ # Base Application manifests (one dir per component)
│ │ ├── kustomization.yaml # Aggregates all component subdirectories
│ │ ├── traefik-application/
│ │ ├── kustomization.yaml
│ │ │ └── traefik-application.yaml
│ │ ├── keycloak/
│ │ │ ├── kustomization.yaml
│ │ │ └── keycloak.yaml
│ │ ├── grafana/
│ │ ├── prometheus/
│ │ ── ... # Each component in its own subdirectory
│ └── secrets/
│ ├── overlays/ # Per-cluster Kustomize overrides
│ │ ── upc-dev/ # UpCloud Dev — includes all (resources: ../../base)
│ │ ├── upc-prod/ # UpCloud Prod — all + patches
│ │ ├── aks-dev/ # Azure AKS Dev — selective components
│ │ ├── aks-prod/ # Azure AKS Prod
│ │ ├── eks-dev/ # AWS EKS Dev
│ │ ├── eks-prod/ # AWS EKS Prod
│ │ ├── gke-dev/ # GCP GKE Dev
│ │ └── gke-prod/ # GCP GKE Prod
│ ├── base/ # Base Application manifests (upc-dev defaults)
│ │ ├── kustomization.yaml
│ │ ├── traefik-application.yaml
│ │ ├── keycloak.yaml
│ │ ── grafana.yaml
│ │ ├── gitea.yaml
│ │ ├── gitea-actions.yaml
│ │ ├── tempo.yaml
│ │ ├── renovate.yaml
│ │ ├── ... # All other Application manifests
│ │ ── secrets.yaml
├── overlays/ # Per-cluster overrides
│ ├── upc-dev/ # UpCloud Dev (uses base as-is)
│ │ ── upc-prod/ # UpCloud Prod (patches value paths)
│ ├── dashboards/ # Grafana dashboard ConfigMaps
│ └── values/ # Helm value overrides for infra
│ ├── base/ # Cloud-agnostic shared values
├── upc-{dev,prod}/ # UpCloud: storage class, LB, pricing
├── aws-{dev,prod}/ # AWS: gp3, NLB, CUR pricing
├── aks-{dev,prod}/ # Azure: managed-csi-premium, Standard LB
└── gcp-{dev,prod}/ # GCP: premium-rwo, L4 LB
│ ├── base/ # Shared values (all clusters)
│ ├── traefik-values.yaml
│ ├── keycloak-values.yaml
│ ├── grafana-values.yaml
│ ├── prometheus-values.yaml
│ │ ├── gitea-values.yaml
│ │ └── ...
│ ├── upc-dev/ # upc-dev cluster-specific values
│ │ ├── traefik-values.yaml
│ │ ├── keycloak-values.yaml
│ │ └── grafana-values.yaml
│ └── upc-prod/ # upc-prod cluster-specific values
│ ├── traefik-values.yaml
│ ├── keycloak-values.yaml
│ └── grafana-values.yaml
├── apps/ # Business Application ArgoCD manifests (Kustomize)
│ ├── base/ # One subdirectory per app
│ ├── base/ # Base app manifests
│ │ ├── kustomization.yaml
│ │ ├── musicman/
│ │ ── mcp10x/
│ │ ├── dot-ai-stack/
│ │ ├── ts-mcp/
│ │ └── argo-mcp/
│ │ ├── dot-ai-stack.yaml
│ │ ── ...
│ └── overlays/
│ ├── upc-dev/ # All apps (resources: ../../base)
── upc-prod/ # All apps + patches
│ └── aks-dev/ # Selective apps only
│ ├── upc-dev/ # Uses base as-is
── upc-prod/ # Patches value paths
├── cluster-resources/ # Cluster-wide Kubernetes resources
│ ├── ...
@@ -197,8 +175,6 @@ launchpad/
**Key Points**:
- `_app-of-apps-upc-dev.yaml` and `_app-of-apps-upc-prod.yaml` are the per-cluster root Applications
- Each component in `base/` has its own subdirectory with a `kustomization.yaml`
- Overlays can include **all** components (`resources: [../../base]`) or **cherry-pick** specific ones (`resources: [../../base/grafana, ../../base/prometheus]`)
- Kustomize overlays in `infra/overlays/` render base Applications with per-cluster patches
- Helm values are split: `values/base/` (shared) + `values/upc-dev/` or `values/upc-prod/` (cluster-specific)
- `apps/` follows the same base/overlays pattern for business applications
@@ -311,7 +287,7 @@ app-repository/
### The App-of-Apps Pattern
```
_app-of-apps-{cluster}.yaml (Root, per cluster — e.g. upc-dev, eks-prod, gke-dev)
_app-of-apps-{upc-dev,upc-prod}.yaml (Root, per cluster)
├── infrastructure-apps (manages infra/)
│ ├── cluster-resources-application
@@ -381,30 +357,16 @@ spec:
### Multi-Cluster Pattern
Kustomize overlays enable deploying the same Applications across clusters with different configurations.
Each component in `infra/base/` and `apps/base/` lives in its own subdirectory. Overlays define **which components to include** and optionally **patch** them:
Kustomize overlays enable deploying the same Applications across clusters with different configurations:
```yaml
# Option 1: Include ALL components (full cluster)
# infra/overlays/upc-dev/kustomization.yaml
resources:
- ../../base # Pulls in every component subdirectory
# infra/base/ contains default (upc-dev) Applications
# Helm values are layered: base + cluster-specific
valueFiles:
- $values/infra/values/base/traefik-values.yaml # Shared config
- $values/infra/values/upc-dev/traefik-values.yaml # Cluster-specific
# Option 2: Cherry-pick specific components (lightweight cluster)
# infra/overlays/aks-dev/kustomization.yaml
resources:
- ../../base/traefik-application
- ../../base/grafana
- ../../base/prometheus
- ../../base/loki
# Only listed components are deployed — others are excluded
```
Per-cluster patches swap Helm value file paths:
```yaml
# infra/overlays/upc-prod/kustomization.yaml
# infra/overlays/upc-prod/kustomization.yaml patches the second valueFile
patches:
- target:
kind: Application
@@ -415,15 +377,6 @@ patches:
value: $values/infra/values/upc-prod/traefik-values.yaml
```
Cloud-specific values (storage classes, load balancer annotations, cost model) are isolated in per-cluster value files. Base values are fully cloud-agnostic:
| Cloud | Storage Class | Load Balancer | OpenCost Provider |
|-------|--------------|---------------|-------------------|
| **UpCloud** | `upcloud-block-storage-maxiops` | UpCloud LB (ProxyProtocol v2) | Custom pricing |
| **AWS EKS** | `gp3` (EBS CSI) | NLB (ProxyProtocol v2) | AWS CUR |
| **Azure AKS** | `managed-csi-premium` | Standard LB (`externalTrafficPolicy: Local`) | Azure Billing API |
| **GCP GKE** | `premium-rwo` (PD CSI) | L4 passthrough NLB | GCP Cloud Billing |
**Benefits**:
- Single source of truth for Application definitions
- Cluster-specific values isolated per overlay
@@ -705,6 +658,6 @@ Notifications include:
---
**Last Updated**: 2026-04-22
**Last Updated**: 2026-03-16
**Maintained By**: Platform Team
**Questions?**: Contact #platform-support on Slack

View File

@@ -37,7 +37,7 @@ Bootstrap a new cluster from scratch:
#### Prerequisites
1. **Kubernetes cluster running** (UpCloud, AWS EKS, Azure AKS, GCP GKE, or any K8s cluster)
1. **Kubernetes cluster running** (UpCloud or any K8s cluster)
2. **kubectl configured** with admin access
3. **Repositories cloned** locally
@@ -54,13 +54,11 @@ kubectl get nodes
git clone https://git.forteapps.net/Forte/launchpad
cd launchpad
# 2. Run bootstrap script with cluster target
# Available clusters: upc-dev, upc-prod, eks-dev, eks-prod,
# aks-dev, aks-prod, gke-dev, gke-prod
./bootstrap.sh upc-dev
# 2. Set cluster name (optional)
export CLUSTER_NAME="prod-cluster-01"
# Cluster config is loaded from clusters/<cluster>.yaml
# (cloudProvider, trustedIPs, domain, etc.)
# 3. Run bootstrap script
./bootstrap.sh
```
**What Happens:**
@@ -1264,21 +1262,13 @@ spec:
### Backup Strategy
**Current State**: Gitea daily backups to S3-compatible storage
**Current State**: No automated backups
**What Is Backed Up**:
- ✅ Gitea repositories + database: Daily CronJob (`cluster-resources/gitea-backup-cronjob.yaml`) uploads to S3-compatible storage with 7-day retention
- ✅ Git repositories: Full cluster config recoverable from Git
- ⚠️ Secrets: Sealed secrets in Git; unseal keys need safekeeping
**What Is NOT Backed Up**:
- ❌ Cluster state (recreate via GitOps)
- ❌ Other persistent volumes (Prometheus, Loki, Tempo data)
**Per-cloud backup scripts** (manual restore helpers):
- UpCloud/AWS: `scripts/gitea-backup.sh` / `scripts/gitea-backup-eks.sh` (MinIO CLI, S3-compatible)
- Azure: `scripts/gitea-backup-aks.sh` (Azure CLI + Blob Storage)
- GCP: `scripts/gitea-backup-gke.sh` (gsutil + GCS)
**What Needs Backup**:
- ❌ Cluster state (not backed up - recreate via GitOps)
- ❌ Persistent volumes (currently not critical)
- ✅ Git repositories (Gitea provides backup)
- ⚠️ Secrets (sealed secrets in Git, unseal keys need safekeeping)
### Cluster Rebuild
@@ -1380,9 +1370,6 @@ kubectl get pods -n argocd
```bash
# UpCloud: Upgrade via control panel or CLI
# AWS EKS: eksctl upgrade cluster / AWS Console
# Azure AKS: az aks upgrade / Azure Portal
# GCP GKE: gcloud container clusters upgrade / Cloud Console
# After upgrade, verify cluster
kubectl version
@@ -1520,35 +1507,18 @@ git push
### Multi-Cluster Setup
The repository supports multiple clusters across multiple clouds via Kustomize overlays:
The repository supports multiple clusters via Kustomize overlays:
**Active clusters:**
- **upc-dev** (default): `infra/overlays/upc-dev/` — uses base Applications as-is
- **upc-prod**: `infra/overlays/upc-prod/` — patches value file paths from `upc-dev` to `upc-prod`
**Cloud-ready templates (fill in `clusters/*.yaml` before use):**
- **eks-dev** / **eks-prod**: AWS EKS with NLB, gp3 storage, AWS CUR pricing
- **aks-dev** / **aks-prod**: Azure AKS with Standard LB, managed-csi-premium storage
- **gke-dev** / **gke-prod**: GCP GKE with L4 LB, premium-rwo storage
Each cluster has its own:
- Root app-of-apps: `_app-of-apps-{cluster}.yaml`
- Cluster config: `clusters/{cluster}.yaml` (domain, trustedIPs, cloudProvider)
- Kustomize overlay: `infra/overlays/{cluster}/kustomization.yaml`
- Helm value overrides: `infra/values/{cluster}/` (traefik, gitea, opencost)
- Sealed secrets: `secrets/{cluster}/` (as needed)
- Apps overlay: `apps/overlays/{cluster}/`
- Root app-of-apps file: `_app-of-apps-upc-dev.yaml` / `_app-of-apps-upc-prod.yaml`
- Cluster-specific Helm values: `infra/values/upc-dev/` / `infra/values/upc-prod/`
- Sealed secrets: `secrets/upc-dev/` (others as needed)
- Apps overlay: `apps/overlays/upc-dev/` / `apps/overlays/upc-prod/`
Cloud-specific values handled per-cluster:
| Concern | UpCloud | AWS EKS | Azure AKS | GCP GKE |
|---------|---------|---------|-----------|---------|
| **Storage class** | `upcloud-block-storage-maxiops` | `gp3` | `managed-csi-premium` | `premium-rwo` |
| **Load balancer** | UpCloud LB + ProxyProtocol v2 | NLB + ProxyProtocol v2 | Standard LB + `externalTrafficPolicy: Local` | L4 passthrough NLB |
| **Cost monitoring** | Custom pricing | AWS CUR | Azure Billing API | GCP Cloud Billing |
| **Backup storage** | UpCloud S3-compat | AWS S3 (native) | Azure Blob Storage | GCS |
To add a new cluster, create a new overlay directory (e.g., `infra/overlays/eks-staging/`) with patches that swap the value file paths, and a matching `clusters/eks-staging.yaml`.
To add a new cluster, create a new overlay directory (e.g., `infra/overlays/upc-staging/`) with patches that swap the value file paths.
### Blue-Green Deployments
@@ -1691,6 +1661,6 @@ echo "Remember to delete: $SECRET_FILE"
---
**Last Updated**: 2026-04-22
**Last Updated**: 2026-03-16
**Maintained By**: Platform Team
**Emergency Contact**: #platform-support on Slack

View File

@@ -180,7 +180,7 @@ Reference for:
┌──────────────────────────────────────────────────────────────┐
│ Kubernetes Clusters (UpCloud, AWS, Azure, GCP)
│ Kubernetes Clusters (UpCloud: upc-dev, upc-prod)
│ ┌──────────────────────────────────────────────────────┐ │
│ │ Infrastructure: Traefik, Cert-Manager, Kyverno │ │
│ ├──────────────────────────────────────────────────────┤ │
@@ -194,7 +194,7 @@ Reference for:
### Key Technologies
- **GitOps**: ArgoCD
- **Kubernetes**: Multi-cloud (UpCloud, AWS EKS, Azure AKS, GCP GKE)
- **Kubernetes**: UpCloud Managed Kubernetes (multi-cluster: upc-dev, upc-prod)
- **Ingress**: Traefik v2
- **Certificates**: Cert-Manager + Let's Encrypt
- **Policies**: Kyverno
@@ -299,16 +299,11 @@ docs/
## 🔄 Documentation Versions
**Current Version**: 1.0.0
**Last Updated**: 2026-04-22
**Last Updated**: 2026-03-16
**Maintained By**: Platform Team
### Changelog
- **v1.1.0 (2026-04-22)**: Multi-cloud support
- Cloud-agnostic base values (storage, LB, pricing moved to per-cluster overlays)
- Added AWS EKS, Azure AKS, GCP GKE configurations
- Per-cloud backup scripts
- Updated all documentation
- **v1.0.0 (2026-03-16)**: Initial comprehensive documentation release
- GitOps Architecture guide
- Developer Onboarding guide

View File

@@ -20,10 +20,9 @@
| Component | Value |
|-----------|-------|
| **Provider** | Multi-cloud (UpCloud, AWS EKS, Azure AKS, GCP GKE) |
| **Provider** | Multi-cloud (UpCloud, AKS, EKS, GKE) |
| **Environment** | Dev + Production per cloud |
| **Active clusters** | UpCloud (upc-dev, upc-prod) |
| **Cloud-ready templates** | EKS, AKS, GKE (dev + prod each) |
| **Cluster Count** | Multi-cluster (upc-dev/prod, aks-dev/prod, eks-dev/prod, gke-dev/prod) |
| **GitOps Tool** | ArgoCD |
| **Ingress Controller** | Traefik v2 |
| **Certificate Management** | Cert-Manager + Let's Encrypt |
@@ -44,7 +43,7 @@ Internet
[DNS: *.forteapps.net]
[Cloud Load Balancer]
[UpCloud LoadBalancer]
[Traefik Ingress Controller]
@@ -72,45 +71,38 @@ Internet
```
launchpad/
├── bootstrap.sh # Cluster initialization (ArgoCD + GitOps)
├── _app-of-apps-{cluster}.yaml # Root ArgoCD Application (per cluster)
├── bootstrap.sh # Cluster initialization script
├── _app-of-apps-upc-dev.yaml # Root ArgoCD Application (upc-dev)
├── _app-of-apps-upc-prod.yaml # Root ArgoCD Application (upc-prod)
├── .tofu/ # Infrastructure provisioning (OpenTofu)
│ ├── platforms/ # Per-platform IaC
│ ├── aks/ # Azure: modules/cluster/, dev/, prod/, workload/
│ ├── eks/ # AWS: same structure
│ ├── gke/ # GCP
│ └── upc/ # UpCloud
│ ├── configs/ # Platform credentials (git-ignored)
── scripts/ # setup-cluster.sh, teardown-cluster.sh, get-kubeconfig.sh
├── clusters/ # Cluster metadata YAML
│ ├── aks-dev.yaml
│ ├── upc-dev.yaml
── ...
├── infra/ # Infrastructure applications (Kustomize)
│ ├── base/ # One subdirectory per component
│ ├── kustomization.yaml # Aggregates all component subdirectories
│ ├── traefik-application/
├── kustomization.yaml
│ │ │ └── traefik-application.yaml
│ │ ├── keycloak/
│ │ │ ├── kustomization.yaml
│ │ │ └── keycloak.yaml
│ │ ├── grafana/
│ │ ── prometheus/
│ ├── loki/
│ │ ── tempo/
│ │ ├── gitea/
│ │ ├── opencost/
│ │ ├── ... # Each component in own directory
│ │ └── secrets/
│ ├── overlays/ # Per-cluster: include all or cherry-pick
│ │ ├── upc-dev/ # resources: [../../base] (all components)
│ │ ├── upc-prod/ # resources: [../../base] + patches
│ │ ├── aks-dev/ # resources: [../../base/grafana, ...] (selective)
│ │ └── .../ # 8 clusters total
├── infra/ # Infrastructure applications
│ ├── cluster-resources-application.yaml
│ ├── enterprise-apps.yaml
│ ├── traefik-application.yaml
│ ├── cert-manager-application.yaml
├── kyverno.yaml
│ ├── kyverno-policies.yaml
── prometheus.yaml
├── grafana.yaml
│ ├── loki.yaml
│ ├── tempo.yaml
│ ├── fluent-bit.yaml
── trivy.yaml
├── gitea.yaml
│ ├── gitea-actions.yaml
│ ├── sealedsecrets.yaml
│ ├── secrets.yaml
│ ├── renovate.yaml
├── base/ # ArgoCD Application manifests (Kustomize base)
│ │ ├── gitea.yaml
│ │ ├── opencost.yaml
│ │ ├── traefik-application.yaml
│ │ ── keycloak.yaml
│ │ ├── grafana.yaml
│ │ ── ...
│ ├── overlays/
│ │ ── upc-prod/
│ │ └── kustomization.yaml # Patches upc-dev → upc-prod valueFile paths
│ └── values/
│ ├── base/ # Cloud-agnostic Helm values
│ │ ├── gitea-values.yaml
@@ -130,18 +122,11 @@ launchpad/
│ ├── gitea-values.yaml
│ └── opencost-values.yaml
├── apps/ # Business applications (Kustomize)
│ ├── base/ # One subdirectory per app
│ ├── kustomization.yaml
│ ├── musicman/
│ ├── mcp10x/
│ │ ├── dot-ai-stack/
│ │ ├── ts-mcp/
│ │ └── argo-mcp/
│ └── overlays/ # Per-cluster: include all or cherry-pick
│ ├── upc-dev/
│ ├── upc-prod/
│ └── aks-dev/ # Selective apps only
├── apps/ # Business applications
│ ├── mcp10x.yaml
│ ├── musicman.yaml
│ ├── dot-ai-stack.yaml
└── argo-mcp.yaml
├── cluster-resources/ # Cluster-level resources
│ ├── cert-manager-namespace.yaml
@@ -162,30 +147,12 @@ launchpad/
│ └── auth-sidecar-injector.yaml
├── secrets/ # Application secrets (sealed)
│ ├── base/ # All SealedSecrets (shared across clouds)
│ ├── kustomization.yaml
│ ├── argocd-forte-helm-secret-sealed.yaml
│ ├── argocd-mcp-credentials.yaml
│ ├── argocdmcp-auth-oidc-sealed.yaml
│ ├── dot-ai-secrets.yaml
│ │ ├── forte10x-app-credentials-sealed.yaml
│ │ ├── gitea-backup-s3-sealed.yaml
│ │ ├── gitea-credentials-sealed.yaml
│ │ ├── gitea-runner-token-sealed.yaml
│ │ ├── gitea-smtp-secret-sealed.yaml
│ │ ├── keycloak-credentials-sealed.yaml
│ │ ├── musicman-auth-oidc-sealed.yaml
│ │ ├── musicman-credentials.yaml
│ │ └── renovate-env-sealed.yaml
│ └── overlays/ # Per-cloud overlays (reference base)
│ ├── aks-dev/kustomization.yaml
│ ├── aks-prod/kustomization.yaml
│ ├── eks-dev/kustomization.yaml
│ ├── eks-prod/kustomization.yaml
│ ├── gke-dev/kustomization.yaml
│ ├── gke-prod/kustomization.yaml
│ ├── upc-dev/kustomization.yaml
│ └── upc-prod/kustomization.yaml
│ ├── argocd-mcp-credentials.yaml
│ ├── dot-ai-secrets.yaml
│ ├── gitea-credentials-sealed.yaml
│ ├── gitea-runner-token-sealed.yaml
│ ├── mcp10x-credentials-sealed.yaml
└── musicman-credentials.yaml
├── scripts/ # Operational helper scripts
│ ├── gitea-backup.sh # S3 backup helper (list/download)
@@ -663,134 +630,10 @@ retry:
4. 40 seconds
5. 80 seconds (capped at 3 minutes)
### Global Settings (`argocd-cm`)
| Setting | Value | Purpose |
|---------|-------|---------|
| `application.resourceTrackingMethod` | `annotation` | Track resources via annotations |
| `timeout.reconciliation` | `60s` | Reconciliation interval |
| `admin.enabled` | `false` | Admin login disabled (SSO-only) |
| `url` | `https://argocd.forteapps.net` | External URL for ArgoCD UI |
**Git Submodule Disable**: Set via `configs.params` (NOT `repoServer.env` — that causes strategic merge conflicts with chart's `valueFrom` entries):
```yaml
configs:
params:
"reposerver.enable.git.submodule": "false"
```
This writes to `argocd-cmd-params-cm` ConfigMap, which the chart already reads via `valueFrom`. Submodules (e.g., `shared-prompts`) are not needed for K8s manifest generation.
**Break-Glass Admin Access**: Admin login is disabled (`admin.enabled: false`). The admin password remains in `argocd-secret`. To re-enable temporarily:
```bash
# Enable admin login
kubectl patch cm argocd-cm -n argocd -p '{"data":{"admin.enabled":"true"}}'
# Log in as admin, do what's needed, then disable again
kubectl patch cm argocd-cm -n argocd -p '{"data":{"admin.enabled":"false"}}'
```
ArgoCD picks up ConfigMap changes within the reconciliation timeout (60s). Note: ArgoCD will revert this on next sync — this is intentional (temporary access only).
**OIDC Authentication** (Keycloak):
```yaml
configs:
cm:
oidc.config: |
name: Forte SSO
issuer: https://id.forteapps.net/realms/forte
clientID: argocd
clientSecret: $oidc.clientSecret
requestedScopes: ["openid", "email", "profile"]
rbacConfig:
policy.csv: |
g, ArgoCD Admins, role:admin
g, ArgoCD Viewers, role:readonly
# Deny users not in any declared KC group
policy.default: ""
scopes: '[groups]'
```
**Access Control**: Only users in Keycloak groups `ArgoCD Admins` or `ArgoCD Viewers` can access ArgoCD. Users not in either group are denied (empty `policy.default`). Assign users to groups in Keycloak admin console.
- ArgoCD does NOT add `openid` implicitly — must include in `requestedScopes`
- Do NOT add `groups` as a scope — the KC groups mapper emits the claim regardless
- `$oidc.clientSecret` references the `oidc.clientSecret` key in `argocd-secret`
- OIDC secret is synced by CronJob `argocd-oidc-sync` (see `cluster-resources/argocd-oidc-secret-sync.yaml`)
- The CronJob bridges `argocd-oidc-credentials` (from KC registrar) → `argocd-secret` every 2 min
- Safe for fresh deploys: no-ops if source secret doesn't exist yet
**Ingress** (Traefik + TLS):
```yaml
server:
ingress:
enabled: true
ingressClassName: traefik
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
tls: true
extraArgs:
- --insecure
configs:
params:
"server.insecure": true
```
TLS terminates at Traefik; ArgoCD runs in insecure mode behind the proxy.
---
## Infrastructure Components
### Homepage (Platform Dashboard)
**Chart**: `jameswynn/homepage`
**Namespace**: `homepage`
**URL**: `https://start.forteapps.net`
Platform dashboard that auto-discovers deployed apps via Kubernetes service annotations.
**Discovery mechanism**: Services annotated with `gethomepage.dev/enabled: "true"` appear in the dashboard. Apps not deployed = annotations absent = not shown. Fully dynamic per environment.
**Annotated services**:
| Service | Namespace | Group | Widget |
|---------|-----------|-------|--------|
| `gitea-http` | `gitea` | DevOps | `gitea` |
| `argocd-server` | `argocd` | DevOps | `argocd` |
| `keycloak` | `keycloak` | Identity | none |
| `grafana` | `monitoring` | Monitoring | `grafana` |
| `karpor-server` | `karpor` | DevOps | none |
**Adding a new app**: Annotate the app's Service in its Helm values:
```yaml
service:
annotations:
gethomepage.dev/enabled: "true"
gethomepage.dev/name: "My App"
gethomepage.dev/description: "What it does"
gethomepage.dev/group: "GroupName"
gethomepage.dev/icon: "icon-name" # https://github.com/walkxcode/dashboard-icons
gethomepage.dev/href: "https://myapp.forteapps.net"
# Optional live widget:
gethomepage.dev/widget.type: "myapp"
gethomepage.dev/widget.url: "https://myapp.forteapps.net"
# gethomepage.dev/widget.key: "{{HOMEPAGE_VAR_MYAPP_TOKEN}}"
```
**Widget API credentials**: Inject via env vars into the Homepage pod:
```yaml
# In homepage-values.yaml per environment
env:
- name: HOMEPAGE_VAR_GRAFANA_TOKEN
valueFrom:
secretKeyRef:
name: homepage-widget-credentials
key: grafana-token
```
Then reference as `gethomepage.dev/widget.key: "{{HOMEPAGE_VAR_GRAFANA_TOKEN}}"`.
**Values files**:
- `infra/values/base/homepage-values.yaml` — RBAC, kubernetes mode, layout
- `infra/values/{env}/homepage-values.yaml` — hostname per environment
---
### Traefik
**Chart**: `traefik/traefik`
@@ -862,10 +705,6 @@ spec:
**Chart**: `sealed-secrets/sealed-secrets-controller`
**Namespace**: `kube-system`
**Directory Structure**: `secrets/base/` contains all SealedSecrets with a `kustomization.yaml`. Per-cloud overlays in `secrets/overlays/<cloud>/` reference the base via Kustomize. The ArgoCD `secrets` Application points to the active overlay (e.g., `secrets/overlays/upc-dev`), and `infra/overlays/upc-prod` patches the path to `secrets/overlays/upc-prod`.
To add cloud-specific secrets, create a new SealedSecret in the overlay directory and add it to the overlay's `kustomization.yaml`.
**Public Certificate**:
```bash
kubeseal --fetch-cert \
@@ -906,15 +745,6 @@ kubeStateMetrics:
- Loki
- Tempo
**Ingress**: Exposed via Traefik at `https://grafana.forteapps.net` with cert-manager TLS.
**OIDC Authentication** (Keycloak):
- Uses `grafana.ini.auth.generic_oauth` with KC `grafana` client
- Secret `grafana-oidc-credentials` synced by KC registrar, loaded via `envFromSecrets`
- SSO-only mode: `auth.disable_login_form: true` + `auth.generic_oauth.auto_login: true`
- Role mapping via JMESPath on `resource_access.grafana.roles` claim (requires KC client role mapper)
- Roles: KC client roles `Admin`/`Editor` map to Grafana roles; default is `Viewer`
### Loki
**Chart**: `grafana/loki-stack`
@@ -1076,52 +906,6 @@ dind:
- Gitea admin panel (`/admin/runners`) — runners show as Online
- Create test workflow in `.gitea/workflows/test.yml` — job executes
### Vaultwarden
**Chart**: `guerzon/vaultwarden`
**Version**: 0.36.4 (app v1.36.0-alpine)
**Namespace**: `vaultwarden`
**Purpose**: Self-hosted Bitwarden-compatible password manager.
**Configuration**:
```yaml
# infra/overlays/upc-dev/vaultwarden/ + infra/values/
domain: "https://bitwarden.forteapps.net"
ingress:
enabled: true
class: "traefik"
tls: true
tlsSecret: vaultwarden-tls
hostname: bitwarden.forteapps.net
additionalAnnotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
database:
type: postgresql
host: vaultwarden-postgresql # StatefulSet in overlay
existingSecret: prod-db-creds
storage:
data: 5Gi (ReadWriteOnce)
attachments: 5Gi (ReadWriteOnce)
```
**TLS**: cert-manager auto-provisions Let's Encrypt certificate via `letsencrypt-prod` ClusterIssuer (same pattern as Gitea, Grafana, etc).
**SSO**: Keycloak OIDC via `forte` realm (client ID: `vaultwarden`). Self-service client config Secret (`keycloak-client-vaultwarden`) triggers registrar to create KC client and sync credentials to `vaultwarden-oidc-credentials`. PKCE enabled.
**Endpoints**:
- Web UI: `https://bitwarden.forteapps.net`
**Database**: Separate ArgoCD Application `vaultwarden-postgresql` (sync-wave `"0"`) deploys PostgreSQL 16 StatefulSet + SealedSecret before Vaultwarden (wave `"1"`). 2Gi PVC. Chart does NOT include a PostgreSQL subchart — must be provisioned separately.
**Secrets**:
- `prod-db-creds` (SealedSecret) — PostgreSQL credentials (`pgusername`, `pgpassword`) + SMTP credentials
- `vaultwarden-oidc-credentials` (registrar-managed) — OIDC client ID + secret
- `vaultwarden-tls` — auto-managed by cert-manager
### AI Code Review (ai-review)
**Type**: Gitea Actions workflow (`.gitea/workflows/ai-review.yaml`)
@@ -1200,30 +984,6 @@ ignore:
- Check Gitea Actions tab for workflow run status and logs
- Monitor Anthropic usage dashboard for token consumption
### Keycloak Browser Flow (IdP Auto-Redirect)
**File**: `infra/values/base/keycloak-values.yaml` (inside `forte-realm.json`)
The realm uses a custom browser authentication flow (`browser-auto-idp`) that skips the Keycloak login page and redirects directly to the Entra ID identity provider.
**Flow executions**:
| Priority | Authenticator | Requirement | Purpose |
|----------|--------------|-------------|---------|
| 10 | `auth-cookie` | ALTERNATIVE | Reuse existing session (no redirect) |
| 20 | `identity-provider-redirector` | ALTERNATIVE | Auto-redirect to `forte-entra` IdP |
**Key fields in realm JSON**:
- `"browserFlow": "browser-auto-idp"` — overrides the default `browser` flow at realm level
- `"authenticationFlows"` — defines the custom flow with its executions
- `"authenticatorConfig"` — sets `defaultProvider: "forte-entra"` on the redirector
**Why custom flow**: The default KC browser flow shows a username/password form with an IdP button. Since all authentication is via Entra ID, the custom flow eliminates this step. The `auth-cookie` execution preserves session reuse so returning users aren't redirected again.
**Important**: The `forte-entra` identity provider must exist in Keycloak (currently configured manually in the KC admin console). If the IdP alias changes, update the `defaultProvider` value in the realm JSON.
---
### Keycloak Client Registrar
**Type**: CronJob (deployed via Keycloak Helm chart `extraDeploy`)
@@ -1255,18 +1015,9 @@ The realm uses a custom browser authentication flow (`browser-auto-idp`) that sk
**Resources**:
- `ServiceAccount`: `keycloak-client-registrar` (namespace: `keycloak`)
- `ClusterRole`: `keycloak-client-registrar`
- Secrets: `get`, `list`, `create`, `update`, `patch`
- Namespaces: `get`, `list`
- `ClusterRole`: `keycloak-client-registrar` (secrets: get/list/create/update/patch; namespaces: get/list)
- `ClusterRoleBinding`: `keycloak-client-registrar`
- `CronJob`: `keycloak-client-registrar`
- **Schedule**: `*/2 * * * *` (every 2 minutes)
- **Concurrency Policy**: `Forbid` (prevents concurrent runs)
- **Backoff Limit**: 3 retries per job
- **History**: 1 successful job, 3 failed jobs retained
- **Resources**: 50m CPU / 64Mi memory (requests), 200m CPU / 128Mi memory (limits)
**Container**: Alpine 3.20 with `curl` and `jq` installed
**Kyverno Policy**: `keycloak-client-config-cloner` — clones labeled Secrets from app namespaces to `keycloak` namespace (see [Kyverno Policies](#kyverno-policies))
@@ -1346,33 +1097,6 @@ kubectl get secret keycloak-client-<app> -n keycloak -o jsonpath='{.metadata.ann
**See**: [Developer Guide - Adding a New Keycloak Client](DEVELOPER-GUIDE.md#adding-a-new-keycloak-client)
### Karpor
**Chart**: `karpor` from `https://kusionstack.github.io/charts`
**Version**: 0.7.6 (app v0.6.4)
**Namespace**: `karpor`
**Sync Wave**: 1
**Purpose**: Kubernetes visualization and intelligence tool. Provides cross-cluster resource search, compliance checking, and topology visualization. Gives platform engineers a unified view of all cluster resources and their relationships.
**Architecture** (4 components):
- **Server** — main Karpor API/UI (port 7443)
- **Syncer** — syncs cluster state into the search index
- **ElasticSearch** — search backend for resource indexing
- **etcd** — persistent key-value store (10Gi PVC)
**Configuration** (`infra/values/base/karpor-values.yaml`):
- `namespaceEnabled: false` — ArgoCD manages namespace creation
- Default resource limits tuned for small clusters
- ElasticSearch: 2 CPU / 4Gi memory (the heaviest component)
- AI features available but not enabled (requires `server.ai.authToken` + backend config)
**Access**: Port-forward to reach the UI:
```bash
kubectl port-forward svc/karpor-release-server -n karpor 7443:7443
# Open https://localhost:7443
```
### Renovate
**Chart**: `renovate` (OCI: `ghcr.io/renovatebot/charts`)
@@ -1820,23 +1544,7 @@ Forward to Application (localhost:3000)
Application processes request
```
#### Forwarded Headers
After successful authentication, the sidecar injects user identity as HTTP headers before forwarding the request to the application container:
| Header | Description | Auth Modes |
|--------|-------------|------------|
| `X-Auth-User` | Username or display name | Token, OIDC, MCP |
| `X-Auth-Email` | User email address | OIDC |
| `X-Auth-Subject` | OIDC `sub` claim (stable user ID) | OIDC, MCP |
| `X-Auth-Groups` | Comma-separated group memberships | OIDC (if `groups` scope) |
| `X-Auth-Token` | The validated access token | All modes |
These headers are trustworthy because the auto-generated `NetworkPolicy` restricts pod ingress to the sidecar port only — external traffic cannot reach the application container directly, so headers cannot be spoofed.
Applications should read these headers to obtain authenticated user information (e.g. for display, authorisation decisions, or audit logging) instead of implementing their own authentication.
**See**: [Developer Guide - Accessing Authenticated User Information](DEVELOPER-GUIDE.md#accessing-authenticated-user-information) for code examples.
**See**: [Developer Guide - Enabling Authentication](DEVELOPER-GUIDE.md#enabling-authentication-for-applications) for usage examples.
---
@@ -1870,22 +1578,14 @@ Recommended resource allocation:
### Storage Classes
Storage classes are cloud-specific and configured in per-cluster value overrides (`infra/values/{cluster}/gitea-values.yaml`):
| Cloud | Storage Class | Driver |
|-------|--------------|--------|
| **UpCloud** | `upcloud-block-storage-maxiops` | UpCloud CSI |
| **AWS EKS** | `gp3` | EBS CSI |
| **Azure AKS** | `managed-csi-premium` | Azure Disk CSI |
| **GCP GKE** | `premium-rwo` | PD CSI |
Default storage class used: **UpCloud default** (varies by provider)
```yaml
# Example: base values omit storageClass (set in per-cluster overlay)
persistence:
enabled: true
storageClass: "" # Uses default
accessMode: ReadWriteOnce
size: 5Gi
# storageClass set by infra/values/{cluster}/gitea-values.yaml
```
---
@@ -2025,9 +1725,8 @@ To add support for a new cloud (e.g., `oci-dev` for Oracle Cloud):
- `opencost-values.yaml` — pricing model or cloud billing integration
3. **Kustomize overlay**: `infra/overlays/oci-dev/kustomization.yaml` — patch `valueFiles[1]` for each Application
4. **App-of-apps**: `_app-of-apps-oci-dev.yaml` — points to `infra/overlays/oci-dev`
5. **Secrets overlay**: `secrets/overlays/oci-dev/kustomization.yaml` — references `../../base`, add cloud-specific SealedSecrets if needed
6. **Secrets patch**: Add patch to `infra/overlays/oci-dev/kustomization.yaml` to swap secrets path to `secrets/overlays/oci-dev`
7. **Bootstrap**: `./bootstrap.sh oci-dev`
5. **Sealed Secrets**: `secrets/oci-dev/` — TLS certs, credentials, backup S3 config
6. **Bootstrap**: `./bootstrap.sh oci-dev`
---
@@ -2163,6 +1862,6 @@ team: platform
---
**Last Updated**: 2026-04-22
**Last Updated**: 2026-04-16
**Maintained By**: Platform Team
**Version**: 1.0.0

View File

@@ -1,4 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- cert-manager-application.yaml

Some files were not shown because too many files have changed in this diff Show More