diff --git a/README.md b/README.md index d419e33..b560f46 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ # Kubernetes Cluster - GitOps Configuration -> **Kubernetes cluster bootstrapping and GitOps configuration repository** using ArgoCD for UpCloud Managed Kubernetes +> **Kubernetes cluster bootstrapping and GitOps configuration repository** using ArgoCD for multi-cloud Kubernetes (UpCloud, AWS EKS, Azure AKS, GCP GKE) [![GitOps](https://img.shields.io/badge/GitOps-ArgoCD-blue)](https://argoproj.github.io/cd/) -[![Kubernetes](https://img.shields.io/badge/Kubernetes-UpCloud-orange)](https://upcloud.com/) +[![Kubernetes](https://img.shields.io/badge/Kubernetes-Multi--Cloud-orange)]() --- @@ -95,14 +95,26 @@ This repository contains the complete GitOps configuration for our Kubernetes cl │ │ ├── renovate.yaml │ │ ├── ... # All other Application manifests │ │ └── secrets.yaml -│ ├── overlays/ # Per-cluster overrides -│ │ ├── upc-dev/ # UpCloud Dev cluster (uses base as-is) -│ │ └── upc-prod/ # UpCloud Prod cluster (patches value paths) +│ ├── overlays/ # Per-cluster overrides (Kustomize) +│ │ ├── upc-dev/ # UpCloud Dev (uses base as-is) +│ │ ├── upc-prod/ # UpCloud Prod (patches value paths) +│ │ ├── eks-dev/ # AWS EKS Dev +│ │ ├── eks-prod/ # AWS EKS Prod +│ │ ├── aks-dev/ # Azure AKS Dev +│ │ ├── aks-prod/ # Azure AKS Prod +│ │ ├── gke-dev/ # GCP GKE Dev +│ │ └── gke-prod/ # GCP GKE Prod │ ├── dashboards/ # Grafana dashboard ConfigMaps │ └── values/ # Helm value overrides -│ ├── base/ # Shared values (all clusters) -│ ├── upc-dev/ # UpCloud Dev-specific values -│ └── upc-prod/ # UpCloud Prod-specific values +│ ├── base/ # Shared cloud-agnostic values +│ ├── upc-dev/ # UpCloud Dev (storage, LB, pricing) +│ ├── upc-prod/ # UpCloud Prod +│ ├── eks-dev/ # AWS EKS Dev +│ ├── eks-prod/ # AWS EKS Prod +│ ├── aks-dev/ # Azure AKS Dev +│ ├── aks-prod/ # Azure AKS Prod +│ ├── gke-dev/ # GCP GKE Dev +│ └── gke-prod/ # GCP GKE Prod │ ├── apps/ # Business Applications │ ├── mcp10x.yaml @@ -361,7 +373,7 @@ kubectl patch application myapp -n argocd \ ## 📖 Key Concepts ### App-of-Apps Pattern -`_app-of-apps.yaml` is the root Application that manages all other Applications in `infra/`. Kustomize overlays in `infra/overlays/{upc-dev,upc-prod}/` render the base Applications with per-cluster patches (e.g., swapping value file paths from `upc-dev` to `upc-prod`). +`_app-of-apps-{cluster}.yaml` is the root Application that manages all other Applications in `infra/`. Kustomize overlays in `infra/overlays/{cluster}/` render the base Applications with per-cluster patches (e.g., swapping value file paths). Supported clusters: `upc-dev`, `upc-prod`, `eks-dev`, `eks-prod`, `aks-dev`, `aks-prod`, `gke-dev`, `gke-prod`. ### Multi-Source Pattern Applications reference both: @@ -458,16 +470,14 @@ Documentation lives in `docs/`. To update: ## 📝 Notes ### Current Environment -- **Provider**: UpCloud Managed Kubernetes +- **Provider**: Multi-cloud (UpCloud, AWS EKS, Azure AKS, GCP GKE) +- **Active clusters**: UpCloud (upc-dev, upc-prod) - **Environment**: Production (internal use only) -- **Clusters**: Multi-cluster (upc-dev, upc-prod) via Kustomize overlays - **Auth**: Disabled for ArgoCD (internal access) -- **Backup**: None (cluster rebuildable via GitOps) +- **Backup**: Gitea daily backup to S3-compatible storage ### Known Limitations -- No automated backups (yet) - Secret rotation not automated -- Multi-cluster limited to upc-dev and upc-prod environments - DNS management is manual **Future improvements**: See [Operations Runbook - Disaster Recovery](docs/OPERATIONS-RUNBOOK.md#disaster-recovery) @@ -504,7 +514,7 @@ Internal use only. Not for public distribution. --- -**Last Updated**: 2026-03-16 +**Last Updated**: 2026-04-22 **Documentation Version**: 1.0.0 **🚀 Ready to get started? Check out the [Documentation Index](docs/README.md)!** diff --git a/clusters/aks-dev.yaml b/clusters/aks-dev.yaml index db86d2b..5a3ace2 100644 --- a/clusters/aks-dev.yaml +++ b/clusters/aks-dev.yaml @@ -1,10 +1,10 @@ -clusterName: dev-fd-aks -domain: forteapps.net -argocdDomain: argocd.127.0.0.1.nip.io -grafanaDomain: grafana.forteapps.net -keycloakDomain: id.forteapps.net -dotaiDomain: kubemcp.forteapps.net -dotaiUiDomain: kubemcpui.forteapps.net -letsencryptEmail: danijels@gmail.com -trustedIPs: "10.0.0.0/8" +clusterName: dev-aks # <- adjust to your AKS cluster name +domain: example.com # <- adjust to your domain +argocdDomain: argocd.example.com +grafanaDomain: grafana.example.com +keycloakDomain: id.example.com +dotaiDomain: kubemcp.example.com +dotaiUiDomain: kubemcpui.example.com +letsencryptEmail: admin@example.com # <- adjust +trustedIPs: "10.0.0.0/8,168.63.129.16/32" # <- VNet CIDR + Azure health probe cloudProvider: azure diff --git a/clusters/aks-prod.yaml b/clusters/aks-prod.yaml index d230b08..0be858e 100644 --- a/clusters/aks-prod.yaml +++ b/clusters/aks-prod.yaml @@ -1,10 +1,10 @@ -clusterName: prod-fd-aks -domain: fortedigital.com -argocdDomain: argocd.127.0.0.1.nip.io -grafanaDomain: grafana.fortedigital.com -keycloakDomain: id.fortedigital.com -dotaiDomain: kubemcp.fortedigital.com -dotaiUiDomain: kubemcpui.fortedigital.com -letsencryptEmail: danijel.simeunovic@fortedigital.com -trustedIPs: "10.0.0.0/8" +clusterName: prod-aks # <- adjust to your AKS cluster name +domain: example.com # <- adjust to your domain +argocdDomain: argocd.example.com +grafanaDomain: grafana.example.com +keycloakDomain: id.example.com +dotaiDomain: kubemcp.example.com +dotaiUiDomain: kubemcpui.example.com +letsencryptEmail: admin@example.com # <- adjust +trustedIPs: "10.0.0.0/8,168.63.129.16/32" # <- VNet CIDR + Azure health probe cloudProvider: azure diff --git a/clusters/eks-dev.yaml b/clusters/eks-dev.yaml index e822b84..55ffa95 100644 --- a/clusters/eks-dev.yaml +++ b/clusters/eks-dev.yaml @@ -1,10 +1,10 @@ -clusterName: dev-fd-eks -domain: forteapps.net -argocdDomain: argocd.127.0.0.1.nip.io -grafanaDomain: grafana.forteapps.net -keycloakDomain: id.forteapps.net -dotaiDomain: kubemcp.forteapps.net -dotaiUiDomain: kubemcpui.forteapps.net -letsencryptEmail: danijels@gmail.com -trustedIPs: "10.0.0.0/8" -cloudProvider: aws +clusterName: dev-eks # <- adjust to your EKS cluster name +domain: example.com # <- adjust to your domain +argocdDomain: argocd.example.com +grafanaDomain: grafana.example.com +keycloakDomain: id.example.com +dotaiDomain: kubemcp.example.com +dotaiUiDomain: kubemcpui.example.com +letsencryptEmail: admin@example.com # <- adjust +trustedIPs: "10.0.0.0/8" # <- adjust to your VPC CIDR +cloudProvider: eks diff --git a/clusters/eks-prod.yaml b/clusters/eks-prod.yaml index dd5cc83..8dcfc90 100644 --- a/clusters/eks-prod.yaml +++ b/clusters/eks-prod.yaml @@ -1,10 +1,10 @@ -clusterName: prod-fd-eks -domain: fortedigital.com -argocdDomain: argocd.127.0.0.1.nip.io -grafanaDomain: grafana.fortedigital.com -keycloakDomain: id.fortedigital.com -dotaiDomain: kubemcp.fortedigital.com -dotaiUiDomain: kubemcpui.fortedigital.com -letsencryptEmail: danijel.simeunovic@fortedigital.com -trustedIPs: "10.0.0.0/8" -cloudProvider: aws +clusterName: prod-eks # <- adjust to your EKS cluster name +domain: example.com # <- adjust to your domain +argocdDomain: argocd.example.com +grafanaDomain: grafana.example.com +keycloakDomain: id.example.com +dotaiDomain: kubemcp.example.com +dotaiUiDomain: kubemcpui.example.com +letsencryptEmail: admin@example.com # <- adjust +trustedIPs: "10.0.0.0/8" # <- adjust to your VPC CIDR +cloudProvider: eks diff --git a/clusters/gke-dev.yaml b/clusters/gke-dev.yaml index fd5afd5..2bd2801 100644 --- a/clusters/gke-dev.yaml +++ b/clusters/gke-dev.yaml @@ -1,10 +1,10 @@ -clusterName: dev-fd-gke -domain: forteapps.net -argocdDomain: argocd.127.0.0.1.nip.io -grafanaDomain: grafana.forteapps.net -keycloakDomain: id.forteapps.net -dotaiDomain: kubemcp.forteapps.net -dotaiUiDomain: kubemcpui.forteapps.net -letsencryptEmail: danijels@gmail.com -trustedIPs: "10.0.0.0/8" -cloudProvider: gcp +clusterName: dev-gke # <- adjust to your GKE cluster name +domain: example.com # <- adjust to your domain +argocdDomain: argocd.example.com +grafanaDomain: grafana.example.com +keycloakDomain: id.example.com +dotaiDomain: kubemcp.example.com +dotaiUiDomain: kubemcpui.example.com +letsencryptEmail: admin@example.com # <- adjust +trustedIPs: "10.0.0.0/8,35.191.0.0/16,130.211.0.0/22" # <- subnet CIDR + GCP health checks +cloudProvider: gke diff --git a/clusters/gke-prod.yaml b/clusters/gke-prod.yaml index 9c6751d..7d8740f 100644 --- a/clusters/gke-prod.yaml +++ b/clusters/gke-prod.yaml @@ -1,10 +1,10 @@ -clusterName: prod-fd-gke -domain: fortedigital.com -argocdDomain: argocd.127.0.0.1.nip.io -grafanaDomain: grafana.fortedigital.com -keycloakDomain: id.fortedigital.com -dotaiDomain: kubemcp.fortedigital.com -dotaiUiDomain: kubemcpui.fortedigital.com -letsencryptEmail: danijel.simeunovic@fortedigital.com -trustedIPs: "10.0.0.0/8" -cloudProvider: gcp +clusterName: prod-gke # <- adjust to your GKE cluster name +domain: example.com # <- adjust to your domain +argocdDomain: argocd.example.com +grafanaDomain: grafana.example.com +keycloakDomain: id.example.com +dotaiDomain: kubemcp.example.com +dotaiUiDomain: kubemcpui.example.com +letsencryptEmail: admin@example.com # <- adjust +trustedIPs: "10.0.0.0/8,35.191.0.0/16,130.211.0.0/22" # <- subnet CIDR + GCP health checks +cloudProvider: gke diff --git a/docs/GITOPS-ARCHITECTURE.md b/docs/GITOPS-ARCHITECTURE.md index ec9769f..b199cc8 100644 --- a/docs/GITOPS-ARCHITECTURE.md +++ b/docs/GITOPS-ARCHITECTURE.md @@ -12,11 +12,11 @@ ## Overview -This Kubernetes cluster uses a **GitOps approach** powered by **ArgoCD**, where Git repositories serve as the single source of truth for both infrastructure and application deployments. The cluster is running on **UpCloud Managed Kubernetes** but is designed to be cloud-agnostic. +This Kubernetes cluster uses a **GitOps approach** powered by **ArgoCD**, where Git repositories serve as the single source of truth for both infrastructure and application deployments. The cluster setup is **cloud-agnostic**, with ready-to-use configurations for **UpCloud**, **AWS EKS**, **Azure AKS**, and **GCP GKE**. ### Key Characteristics - **Environment**: Production (internal use only) -- **Cluster Type**: Multi-cluster (upc-dev, upc-prod) via Kustomize overlays +- **Cluster Type**: Multi-cloud, multi-cluster via Kustomize overlays (UpCloud, AWS, Azure, GCP) - **GitOps Tool**: ArgoCD - **Deployment Pattern**: App-of-Apps - **Secret Management**: Sealed Secrets (kubeseal) @@ -63,7 +63,7 @@ This Kubernetes cluster uses a **GitOps approach** powered by **ArgoCD**, where ▼ ┌────────────────────────────────┐ │ Kubernetes Clusters │ - │ (UpCloud: upc-dev, upc-prod) │ + │ (UpCloud, AWS, Azure, GCP) │ │ │ │ ┌──────────────────────────┐ │ │ │ ArgoCD │ │ @@ -131,26 +131,22 @@ launchpad/ │ │ ├── renovate.yaml │ │ ├── ... # All other Application manifests │ │ └── secrets.yaml -│ ├── overlays/ # Per-cluster overrides +│ ├── overlays/ # Per-cluster Kustomize overrides │ │ ├── upc-dev/ # UpCloud Dev (uses base as-is) -│ │ └── upc-prod/ # UpCloud Prod (patches value paths) +│ │ ├── upc-prod/ # UpCloud Prod (patches value paths) +│ │ ├── eks-dev/ # AWS EKS Dev +│ │ ├── eks-prod/ # AWS EKS Prod +│ │ ├── aks-dev/ # Azure AKS Dev +│ │ ├── aks-prod/ # Azure AKS Prod +│ │ ├── gke-dev/ # GCP GKE Dev +│ │ └── gke-prod/ # GCP GKE Prod │ ├── dashboards/ # Grafana dashboard ConfigMaps │ └── values/ # Helm value overrides for infra -│ ├── base/ # Shared values (all clusters) -│ │ ├── traefik-values.yaml -│ │ ├── keycloak-values.yaml -│ │ ├── grafana-values.yaml -│ │ ├── prometheus-values.yaml -│ │ ├── gitea-values.yaml -│ │ └── ... -│ ├── upc-dev/ # upc-dev cluster-specific values -│ │ ├── traefik-values.yaml -│ │ ├── keycloak-values.yaml -│ │ └── grafana-values.yaml -│ └── upc-prod/ # upc-prod cluster-specific values -│ ├── traefik-values.yaml -│ ├── keycloak-values.yaml -│ └── grafana-values.yaml +│ ├── base/ # Cloud-agnostic shared values +│ ├── upc-{dev,prod}/ # UpCloud: storage class, LB, pricing +│ ├── aws-{dev,prod}/ # AWS: gp3, NLB, CUR pricing +│ ├── aks-{dev,prod}/ # Azure: managed-csi-premium, Standard LB +│ └── gcp-{dev,prod}/ # GCP: premium-rwo, L4 LB │ ├── apps/ # Business Application ArgoCD manifests (Kustomize) │ ├── base/ # Base app manifests @@ -287,7 +283,7 @@ app-repository/ ### The App-of-Apps Pattern ``` -_app-of-apps-{upc-dev,upc-prod}.yaml (Root, per cluster) +_app-of-apps-{cluster}.yaml (Root, per cluster — e.g. upc-dev, eks-prod, gke-dev) │ ├── infrastructure-apps (manages infra/) │ ├── cluster-resources-application @@ -377,6 +373,15 @@ patches: value: $values/infra/values/upc-prod/traefik-values.yaml ``` +Cloud-specific values (storage classes, load balancer annotations, cost model) are isolated in per-cluster value files. Base values are fully cloud-agnostic: + +| Cloud | Storage Class | Load Balancer | OpenCost Provider | +|-------|--------------|---------------|-------------------| +| **UpCloud** | `upcloud-block-storage-maxiops` | UpCloud LB (ProxyProtocol v2) | Custom pricing | +| **AWS EKS** | `gp3` (EBS CSI) | NLB (ProxyProtocol v2) | AWS CUR | +| **Azure AKS** | `managed-csi-premium` | Standard LB (`externalTrafficPolicy: Local`) | Azure Billing API | +| **GCP GKE** | `premium-rwo` (PD CSI) | L4 passthrough NLB | GCP Cloud Billing | + **Benefits**: - Single source of truth for Application definitions - Cluster-specific values isolated per overlay @@ -658,6 +663,6 @@ Notifications include: --- -**Last Updated**: 2026-03-16 +**Last Updated**: 2026-04-22 **Maintained By**: Platform Team **Questions?**: Contact #platform-support on Slack diff --git a/docs/OPERATIONS-RUNBOOK.md b/docs/OPERATIONS-RUNBOOK.md index a02a239..586a806 100644 --- a/docs/OPERATIONS-RUNBOOK.md +++ b/docs/OPERATIONS-RUNBOOK.md @@ -37,7 +37,7 @@ Bootstrap a new cluster from scratch: #### Prerequisites -1. **Kubernetes cluster running** (UpCloud or any K8s cluster) +1. **Kubernetes cluster running** (UpCloud, AWS EKS, Azure AKS, GCP GKE, or any K8s cluster) 2. **kubectl configured** with admin access 3. **Repositories cloned** locally @@ -54,11 +54,13 @@ kubectl get nodes git clone https://git.forteapps.net/Forte/launchpad cd launchpad -# 2. Set cluster name (optional) -export CLUSTER_NAME="prod-cluster-01" +# 2. Run bootstrap script with cluster target +# Available clusters: upc-dev, upc-prod, eks-dev, eks-prod, +# aks-dev, aks-prod, gke-dev, gke-prod +./bootstrap.sh upc-dev -# 3. Run bootstrap script -./bootstrap.sh +# Cluster config is loaded from clusters/.yaml +# (cloudProvider, trustedIPs, domain, etc.) ``` **What Happens:** @@ -1262,13 +1264,21 @@ spec: ### Backup Strategy -**Current State**: No automated backups +**Current State**: Gitea daily backups to S3-compatible storage -**What Needs Backup**: -- ❌ Cluster state (not backed up - recreate via GitOps) -- ❌ Persistent volumes (currently not critical) -- ✅ Git repositories (Gitea provides backup) -- ⚠️ Secrets (sealed secrets in Git, unseal keys need safekeeping) +**What Is Backed Up**: +- ✅ Gitea repositories + database: Daily CronJob (`cluster-resources/gitea-backup-cronjob.yaml`) uploads to S3-compatible storage with 7-day retention +- ✅ Git repositories: Full cluster config recoverable from Git +- ⚠️ Secrets: Sealed secrets in Git; unseal keys need safekeeping + +**What Is NOT Backed Up**: +- ❌ Cluster state (recreate via GitOps) +- ❌ Other persistent volumes (Prometheus, Loki, Tempo data) + +**Per-cloud backup scripts** (manual restore helpers): +- UpCloud/AWS: `scripts/gitea-backup.sh` / `scripts/gitea-backup-eks.sh` (MinIO CLI, S3-compatible) +- Azure: `scripts/gitea-backup-aks.sh` (Azure CLI + Blob Storage) +- GCP: `scripts/gitea-backup-gke.sh` (gsutil + GCS) ### Cluster Rebuild @@ -1370,6 +1380,9 @@ kubectl get pods -n argocd ```bash # UpCloud: Upgrade via control panel or CLI +# AWS EKS: eksctl upgrade cluster / AWS Console +# Azure AKS: az aks upgrade / Azure Portal +# GCP GKE: gcloud container clusters upgrade / Cloud Console # After upgrade, verify cluster kubectl version @@ -1507,18 +1520,35 @@ git push ### Multi-Cluster Setup -The repository supports multiple clusters via Kustomize overlays: +The repository supports multiple clusters across multiple clouds via Kustomize overlays: +**Active clusters:** - **upc-dev** (default): `infra/overlays/upc-dev/` — uses base Applications as-is - **upc-prod**: `infra/overlays/upc-prod/` — patches value file paths from `upc-dev` to `upc-prod` -Each cluster has its own: -- Root app-of-apps file: `_app-of-apps-upc-dev.yaml` / `_app-of-apps-upc-prod.yaml` -- Cluster-specific Helm values: `infra/values/upc-dev/` / `infra/values/upc-prod/` -- Sealed secrets: `secrets/upc-dev/` (others as needed) -- Apps overlay: `apps/overlays/upc-dev/` / `apps/overlays/upc-prod/` +**Cloud-ready templates (fill in `clusters/*.yaml` before use):** +- **eks-dev** / **eks-prod**: AWS EKS with NLB, gp3 storage, AWS CUR pricing +- **aks-dev** / **aks-prod**: Azure AKS with Standard LB, managed-csi-premium storage +- **gke-dev** / **gke-prod**: GCP GKE with L4 LB, premium-rwo storage -To add a new cluster, create a new overlay directory (e.g., `infra/overlays/upc-staging/`) with patches that swap the value file paths. +Each cluster has its own: +- Root app-of-apps: `_app-of-apps-{cluster}.yaml` +- Cluster config: `clusters/{cluster}.yaml` (domain, trustedIPs, cloudProvider) +- Kustomize overlay: `infra/overlays/{cluster}/kustomization.yaml` +- Helm value overrides: `infra/values/{cluster}/` (traefik, gitea, opencost) +- Sealed secrets: `secrets/{cluster}/` (as needed) +- Apps overlay: `apps/overlays/{cluster}/` + +Cloud-specific values handled per-cluster: + +| Concern | UpCloud | AWS EKS | Azure AKS | GCP GKE | +|---------|---------|---------|-----------|---------| +| **Storage class** | `upcloud-block-storage-maxiops` | `gp3` | `managed-csi-premium` | `premium-rwo` | +| **Load balancer** | UpCloud LB + ProxyProtocol v2 | NLB + ProxyProtocol v2 | Standard LB + `externalTrafficPolicy: Local` | L4 passthrough NLB | +| **Cost monitoring** | Custom pricing | AWS CUR | Azure Billing API | GCP Cloud Billing | +| **Backup storage** | UpCloud S3-compat | AWS S3 (native) | Azure Blob Storage | GCS | + +To add a new cluster, create a new overlay directory (e.g., `infra/overlays/eks-staging/`) with patches that swap the value file paths, and a matching `clusters/eks-staging.yaml`. ### Blue-Green Deployments @@ -1661,6 +1691,6 @@ echo "Remember to delete: $SECRET_FILE" --- -**Last Updated**: 2026-03-16 +**Last Updated**: 2026-04-22 **Maintained By**: Platform Team **Emergency Contact**: #platform-support on Slack diff --git a/docs/README.md b/docs/README.md index 47a394e..6b48500 100644 --- a/docs/README.md +++ b/docs/README.md @@ -180,7 +180,7 @@ Reference for: │ ▼ ┌──────────────────────────────────────────────────────────────┐ -│ Kubernetes Clusters (UpCloud: upc-dev, upc-prod) │ +│ Kubernetes Clusters (UpCloud, AWS, Azure, GCP) │ │ ┌──────────────────────────────────────────────────────┐ │ │ │ Infrastructure: Traefik, Cert-Manager, Kyverno │ │ │ ├──────────────────────────────────────────────────────┤ │ @@ -194,7 +194,7 @@ Reference for: ### Key Technologies - **GitOps**: ArgoCD -- **Kubernetes**: UpCloud Managed Kubernetes (multi-cluster: upc-dev, upc-prod) +- **Kubernetes**: Multi-cloud (UpCloud, AWS EKS, Azure AKS, GCP GKE) - **Ingress**: Traefik v2 - **Certificates**: Cert-Manager + Let's Encrypt - **Policies**: Kyverno @@ -299,11 +299,16 @@ docs/ ## 🔄 Documentation Versions **Current Version**: 1.0.0 -**Last Updated**: 2026-03-16 +**Last Updated**: 2026-04-22 **Maintained By**: Platform Team ### Changelog +- **v1.1.0 (2026-04-22)**: Multi-cloud support + - Cloud-agnostic base values (storage, LB, pricing moved to per-cluster overlays) + - Added AWS EKS, Azure AKS, GCP GKE configurations + - Per-cloud backup scripts + - Updated all documentation - **v1.0.0 (2026-03-16)**: Initial comprehensive documentation release - GitOps Architecture guide - Developer Onboarding guide diff --git a/docs/REFERENCE.md b/docs/REFERENCE.md index 74645c2..ab162f3 100644 --- a/docs/REFERENCE.md +++ b/docs/REFERENCE.md @@ -20,9 +20,10 @@ | Component | Value | |-----------|-------| -| **Provider** | Multi-cloud (UpCloud, AKS, EKS, GKE) | +| **Provider** | Multi-cloud (UpCloud, AWS EKS, Azure AKS, GCP GKE) | | **Environment** | Dev + Production per cloud | -| **Cluster Count** | Multi-cluster (upc-dev/prod, aks-dev/prod, eks-dev/prod, gke-dev/prod) | +| **Active clusters** | UpCloud (upc-dev, upc-prod) | +| **Cloud-ready templates** | EKS, AKS, GKE (dev + prod each) | | **GitOps Tool** | ArgoCD | | **Ingress Controller** | Traefik v2 | | **Certificate Management** | Cert-Manager + Let's Encrypt | @@ -43,7 +44,7 @@ Internet [DNS: *.forteapps.net] │ ▼ -[UpCloud LoadBalancer] +[Cloud Load Balancer] │ ▼ [Traefik Ingress Controller] @@ -1578,14 +1579,22 @@ Recommended resource allocation: ### Storage Classes -Default storage class used: **UpCloud default** (varies by provider) +Storage classes are cloud-specific and configured in per-cluster value overrides (`infra/values/{cluster}/gitea-values.yaml`): + +| Cloud | Storage Class | Driver | +|-------|--------------|--------| +| **UpCloud** | `upcloud-block-storage-maxiops` | UpCloud CSI | +| **AWS EKS** | `gp3` | EBS CSI | +| **Azure AKS** | `managed-csi-premium` | Azure Disk CSI | +| **GCP GKE** | `premium-rwo` | PD CSI | ```yaml +# Example: base values omit storageClass (set in per-cluster overlay) persistence: enabled: true - storageClass: "" # Uses default accessMode: ReadWriteOnce size: 5Gi + # storageClass set by infra/values/{cluster}/gitea-values.yaml ``` --- @@ -1862,6 +1871,6 @@ team: platform --- -**Last Updated**: 2026-04-16 +**Last Updated**: 2026-04-22 **Maintained By**: Platform Team **Version**: 1.0.0 diff --git a/infra/overlays/upc-prod/kustomization.yaml b/infra/overlays/upc-prod/kustomization.yaml index 9242d39..164a6b6 100644 --- a/infra/overlays/upc-prod/kustomization.yaml +++ b/infra/overlays/upc-prod/kustomization.yaml @@ -66,3 +66,21 @@ patches: - op: replace path: /spec/source/path value: apps/overlays/upc-prod + +# Gitea: swap upc-dev → upc-prod +- target: + kind: Application + name: gitea + patch: | + - op: replace + path: /spec/sources/0/helm/valueFiles/1 + value: $values/infra/values/upc-prod/gitea-values.yaml + +# OpenCost: swap upc-dev → upc-prod +- target: + kind: Application + name: opencost + patch: | + - op: replace + path: /spec/sources/0/helm/valueFiles/1 + value: $values/infra/values/upc-prod/opencost-values.yaml diff --git a/infra/values/base/opencost-values.yaml b/infra/values/base/opencost-values.yaml index 1b97209..dde13fb 100644 --- a/infra/values/base/opencost-values.yaml +++ b/infra/values/base/opencost-values.yaml @@ -10,6 +10,8 @@ opencost: serviceName: prometheus-server namespaceName: monitoring port: 80 + # Cloud-specific pricing is in per-cluster value overrides + # (e.g. infra/values/upc-dev/opencost-values.yaml) ui: enabled: false service: diff --git a/infra/values/upc-dev/gitea-values.yaml b/infra/values/upc-dev/gitea-values.yaml index 151047f..ef1f8eb 100644 --- a/infra/values/upc-dev/gitea-values.yaml +++ b/infra/values/upc-dev/gitea-values.yaml @@ -1,4 +1,4 @@ -# UpCloud-specific: block storage class for Gitea + PostgreSQL +# UpCloud storage class for Gitea and its embedded PostgreSQL persistence: storageClass: upcloud-block-storage-maxiops postgresql: diff --git a/infra/values/upc-dev/opencost-values.yaml b/infra/values/upc-dev/opencost-values.yaml index 51fd0a4..06a7488 100644 --- a/infra/values/upc-dev/opencost-values.yaml +++ b/infra/values/upc-dev/opencost-values.yaml @@ -1,4 +1,4 @@ -# UpCloud-specific: custom pricing model +# UpCloud custom pricing (no native OpenCost integration) opencost: exporter: customPricing: diff --git a/infra/values/upc-prod/gitea-values.yaml b/infra/values/upc-prod/gitea-values.yaml index 151047f..ef1f8eb 100644 --- a/infra/values/upc-prod/gitea-values.yaml +++ b/infra/values/upc-prod/gitea-values.yaml @@ -1,4 +1,4 @@ -# UpCloud-specific: block storage class for Gitea + PostgreSQL +# UpCloud storage class for Gitea and its embedded PostgreSQL persistence: storageClass: upcloud-block-storage-maxiops postgresql: diff --git a/infra/values/upc-prod/opencost-values.yaml b/infra/values/upc-prod/opencost-values.yaml index 51fd0a4..06a7488 100644 --- a/infra/values/upc-prod/opencost-values.yaml +++ b/infra/values/upc-prod/opencost-values.yaml @@ -1,4 +1,4 @@ -# UpCloud-specific: custom pricing model +# UpCloud custom pricing (no native OpenCost integration) opencost: exporter: customPricing: diff --git a/scripts/gitea-backup-aks.sh b/scripts/gitea-backup-aks.sh new file mode 100644 index 0000000..5ab2653 --- /dev/null +++ b/scripts/gitea-backup-aks.sh @@ -0,0 +1,100 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Gitea backup helper for Azure Blob Storage +# Uses the gitea-backup-aks secret in the gitea namespace +# Required secret keys: +# AZURE_STORAGE_ACCOUNT — storage account name +# AZURE_STORAGE_KEY — storage account key +# AZURE_CONTAINER — blob container name +# +# Usage: +# ./scripts/gitea-backup-aks.sh list # list all backups +# ./scripts/gitea-backup-aks.sh download # download a backup +# ./scripts/gitea-backup-aks.sh download latest # download the most recent backup + +NAMESPACE="gitea" +SECRET="gitea-backup-aks" +IMAGE="mcr.microsoft.com/azure-cli:latest" +POD_NAME="gitea-backup-helper" + +cleanup() { + kubectl -n "$NAMESPACE" delete pod "$POD_NAME" --ignore-not-found --grace-period=0 > /dev/null 2>&1 || true +} + +az_run() { + cleanup + kubectl -n "$NAMESPACE" run "$POD_NAME" --restart=Never \ + --image="$IMAGE" \ + --overrides="{ + \"spec\":{\"containers\":[{ + \"name\":\"$POD_NAME\", + \"image\":\"$IMAGE\", + \"env\":[{\"name\":\"HOME\",\"value\":\"/tmp\"}], + \"command\":[\"sh\",\"-c\",\"$1\"], + \"envFrom\":[{\"secretRef\":{\"name\":\"$SECRET\"}}] + }]} + }" > /dev/null 2>&1 + + kubectl -n "$NAMESPACE" wait --for=jsonpath='{.status.phase}'=Succeeded "pod/$POD_NAME" --timeout=120s > /dev/null 2>&1 + kubectl -n "$NAMESPACE" logs "$POD_NAME" + cleanup +} + +case "${1:-help}" in + list) + echo "Listing backups..." + az_run 'az storage blob list --account-name ${AZURE_STORAGE_ACCOUNT} --account-key ${AZURE_STORAGE_KEY} --container-name ${AZURE_CONTAINER} --output table --query "[].{Name:name, Size:properties.contentLength, Modified:properties.lastModified}"' + ;; + + download) + FILE="${2:?Usage: $0 download }" + + if [ "$FILE" = "latest" ]; then + echo "Finding latest backup..." + FILE=$(az_run 'az storage blob list --account-name ${AZURE_STORAGE_ACCOUNT} --account-key ${AZURE_STORAGE_KEY} --container-name ${AZURE_CONTAINER} --query "sort_by([], &properties.lastModified)[-1].name" -o tsv' | tr -d '[:space:]') + if [ -z "$FILE" ]; then + echo "No backups found." + exit 1 + fi + echo "Latest: $FILE" + fi + + echo "Downloading $FILE..." + cleanup + kubectl -n "$NAMESPACE" run "$POD_NAME" --restart=Never \ + --image="$IMAGE" \ + --overrides="{ + \"spec\":{\"containers\":[{ + \"name\":\"$POD_NAME\", + \"image\":\"$IMAGE\", + \"env\":[{\"name\":\"HOME\",\"value\":\"/tmp\"}], + \"command\":[\"sh\",\"-c\",\"sleep 300\"], + \"envFrom\":[{\"secretRef\":{\"name\":\"$SECRET\"}}] + }]} + }" > /dev/null 2>&1 + + kubectl -n "$NAMESPACE" wait --for=condition=Ready "pod/$POD_NAME" --timeout=60s > /dev/null 2>&1 + + echo "Saving to ./$FILE ..." + kubectl -n "$NAMESPACE" exec "$POD_NAME" -- \ + az storage blob download \ + --account-name "\${AZURE_STORAGE_ACCOUNT}" \ + --account-key "\${AZURE_STORAGE_KEY}" \ + --container-name "\${AZURE_CONTAINER}" \ + --name "$FILE" \ + --file /dev/stdout 2>/dev/null > "./$FILE" + cleanup + + echo "Downloaded: ./$FILE" + ;; + + *) + echo "Gitea backup helper (Azure Blob Storage)" + echo "" + echo "Usage:" + echo " $0 list List all backups in Azure Blob" + echo " $0 download Download a specific backup" + echo " $0 download latest Download the most recent backup" + ;; +esac diff --git a/scripts/gitea-backup-eks.sh b/scripts/gitea-backup-eks.sh new file mode 100644 index 0000000..c4c5c02 --- /dev/null +++ b/scripts/gitea-backup-eks.sh @@ -0,0 +1,94 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Gitea backup helper for AWS S3 +# Uses the gitea-backup-s3 secret in the gitea namespace +# (same secret schema: S3_ENDPOINT, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, S3_BUCKET) +# +# For AWS, S3_ENDPOINT is typically https://s3..amazonaws.com +# +# Usage: +# ./scripts/gitea-backup-eks.sh list # list all backups +# ./scripts/gitea-backup-eks.sh download # download a backup to current dir +# ./scripts/gitea-backup-eks.sh download latest # download the most recent backup + +NAMESPACE="gitea" +SECRET="gitea-backup-s3" +IMAGE="minio/mc:latest" +POD_NAME="gitea-backup-helper" +ALIAS_CMD='mc alias set s3 ${S3_ENDPOINT} ${AWS_ACCESS_KEY_ID} ${AWS_SECRET_ACCESS_KEY} > /dev/null' + +cleanup() { + kubectl -n "$NAMESPACE" delete pod "$POD_NAME" --ignore-not-found --grace-period=0 > /dev/null 2>&1 || true +} + +mc_run() { + cleanup + kubectl -n "$NAMESPACE" run "$POD_NAME" --restart=Never \ + --image="$IMAGE" \ + --overrides="{ + \"spec\":{\"containers\":[{ + \"name\":\"$POD_NAME\", + \"image\":\"$IMAGE\", + \"env\":[{\"name\":\"HOME\",\"value\":\"/tmp\"}], + \"command\":[\"sh\",\"-c\",\"${ALIAS_CMD}; $1\"], + \"envFrom\":[{\"secretRef\":{\"name\":\"$SECRET\"}}] + }]} + }" > /dev/null 2>&1 + + kubectl -n "$NAMESPACE" wait --for=jsonpath='{.status.phase}'=Succeeded "pod/$POD_NAME" --timeout=120s > /dev/null 2>&1 + kubectl -n "$NAMESPACE" logs "$POD_NAME" + cleanup +} + +case "${1:-help}" in + list) + echo "Listing backups..." + mc_run 'mc ls s3/${S3_BUCKET}/' + ;; + + download) + FILE="${2:?Usage: $0 download }" + + if [ "$FILE" = "latest" ]; then + echo "Finding latest backup..." + FILE=$(mc_run 'mc ls s3/${S3_BUCKET}/' | sort | tail -1 | awk '{print $NF}' | tr -d '[:space:]') + if [ -z "$FILE" ]; then + echo "No backups found." + exit 1 + fi + echo "Latest: $FILE" + fi + + echo "Downloading $FILE..." + cleanup + kubectl -n "$NAMESPACE" run "$POD_NAME" --restart=Never \ + --image="$IMAGE" \ + --overrides="{ + \"spec\":{\"containers\":[{ + \"name\":\"$POD_NAME\", + \"image\":\"$IMAGE\", + \"env\":[{\"name\":\"HOME\",\"value\":\"/tmp\"}], + \"command\":[\"sh\",\"-c\",\"sleep 300\"], + \"envFrom\":[{\"secretRef\":{\"name\":\"$SECRET\"}}] + }]} + }" > /dev/null 2>&1 + + kubectl -n "$NAMESPACE" wait --for=condition=Ready "pod/$POD_NAME" --timeout=60s > /dev/null 2>&1 + + echo "Saving to ./$FILE ..." + kubectl -n "$NAMESPACE" exec "$POD_NAME" -- sh -c "${ALIAS_CMD} && mc cat s3/\${S3_BUCKET}/$FILE" > "./$FILE" + cleanup + + echo "Downloaded: ./$FILE" + ;; + + *) + echo "Gitea backup helper (AWS S3)" + echo "" + echo "Usage:" + echo " $0 list List all backups in S3" + echo " $0 download Download a specific backup" + echo " $0 download latest Download the most recent backup" + ;; +esac diff --git a/scripts/gitea-backup-gke.sh b/scripts/gitea-backup-gke.sh new file mode 100644 index 0000000..bfed6b6 --- /dev/null +++ b/scripts/gitea-backup-gke.sh @@ -0,0 +1,95 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Gitea backup helper for Google Cloud Storage +# Uses the gitea-backup-gcs secret in the gitea namespace +# Required secret keys: +# GCS_BUCKET — bucket name (without gs:// prefix) +# GOOGLE_APPLICATION_CREDENTIALS_JSON — service account key JSON +# (alternatively, use Workload Identity and omit the key) +# +# Usage: +# ./scripts/gitea-backup-gke.sh list # list all backups +# ./scripts/gitea-backup-gke.sh download # download a backup +# ./scripts/gitea-backup-gke.sh download latest # download the most recent backup + +NAMESPACE="gitea" +SECRET="gitea-backup-gcs" +IMAGE="gcr.io/google.com/cloudsdktool/google-cloud-cli:slim" +POD_NAME="gitea-backup-helper" +AUTH_CMD='if [ -n "${GOOGLE_APPLICATION_CREDENTIALS_JSON:-}" ]; then echo "${GOOGLE_APPLICATION_CREDENTIALS_JSON}" > /tmp/gcs-key.json && gcloud auth activate-service-account --key-file=/tmp/gcs-key.json > /dev/null 2>&1; fi' + +cleanup() { + kubectl -n "$NAMESPACE" delete pod "$POD_NAME" --ignore-not-found --grace-period=0 > /dev/null 2>&1 || true +} + +gcs_run() { + cleanup + kubectl -n "$NAMESPACE" run "$POD_NAME" --restart=Never \ + --image="$IMAGE" \ + --overrides="{ + \"spec\":{\"containers\":[{ + \"name\":\"$POD_NAME\", + \"image\":\"$IMAGE\", + \"env\":[{\"name\":\"HOME\",\"value\":\"/tmp\"}], + \"command\":[\"sh\",\"-c\",\"${AUTH_CMD}; $1\"], + \"envFrom\":[{\"secretRef\":{\"name\":\"$SECRET\"}}] + }]} + }" > /dev/null 2>&1 + + kubectl -n "$NAMESPACE" wait --for=jsonpath='{.status.phase}'=Succeeded "pod/$POD_NAME" --timeout=120s > /dev/null 2>&1 + kubectl -n "$NAMESPACE" logs "$POD_NAME" + cleanup +} + +case "${1:-help}" in + list) + echo "Listing backups..." + gcs_run 'gsutil ls -l gs://${GCS_BUCKET}/' + ;; + + download) + FILE="${2:?Usage: $0 download }" + + if [ "$FILE" = "latest" ]; then + echo "Finding latest backup..." + FILE=$(gcs_run 'gsutil ls gs://${GCS_BUCKET}/' | grep -v '^$' | grep -v 'TOTAL' | sort | tail -1 | xargs -I{} basename {} | tr -d '[:space:]') + if [ -z "$FILE" ]; then + echo "No backups found." + exit 1 + fi + echo "Latest: $FILE" + fi + + echo "Downloading $FILE..." + cleanup + kubectl -n "$NAMESPACE" run "$POD_NAME" --restart=Never \ + --image="$IMAGE" \ + --overrides="{ + \"spec\":{\"containers\":[{ + \"name\":\"$POD_NAME\", + \"image\":\"$IMAGE\", + \"env\":[{\"name\":\"HOME\",\"value\":\"/tmp\"}], + \"command\":[\"sh\",\"-c\",\"sleep 300\"], + \"envFrom\":[{\"secretRef\":{\"name\":\"$SECRET\"}}] + }]} + }" > /dev/null 2>&1 + + kubectl -n "$NAMESPACE" wait --for=condition=Ready "pod/$POD_NAME" --timeout=60s > /dev/null 2>&1 + + echo "Saving to ./$FILE ..." + kubectl -n "$NAMESPACE" exec "$POD_NAME" -- sh -c "${AUTH_CMD} && gsutil cat gs://\${GCS_BUCKET}/$FILE" > "./$FILE" + cleanup + + echo "Downloaded: ./$FILE" + ;; + + *) + echo "Gitea backup helper (Google Cloud Storage)" + echo "" + echo "Usage:" + echo " $0 list List all backups in GCS" + echo " $0 download Download a specific backup" + echo " $0 download latest Download the most recent backup" + ;; +esac