diff --git a/.github/workflows/build_images.yml b/.github/workflows/build_images.yml index ecb44333..9ab893a4 100644 --- a/.github/workflows/build_images.yml +++ b/.github/workflows/build_images.yml @@ -6,6 +6,11 @@ on: manifest_tag: description: 'Tag for the Docker manifest' required: false + skip_optional_images: + type: boolean + description: 'Skip building documentdb and gateway images' + required: false + default: false push: branches: @@ -53,8 +58,10 @@ jobs: - name: Checkout code uses: actions/checkout@v4 - name: Login to GHCR + if: ${{ !(github.event_name == 'workflow_dispatch' && github.event.inputs.skip_optional_images == 'true' && (matrix.image.name == 'documentdb' || matrix.image.name == 'gateway')) }} run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - name: Build and Push ${{ matrix.arch }} Image + if: ${{ !(github.event_name == 'workflow_dispatch' && github.event.inputs.skip_optional_images == 'true' && (matrix.image.name == 'documentdb' || matrix.image.name == 'gateway')) }} run: | TAG=${{ env.IMAGE_TAG }}-${{ matrix.arch }} docker build \ diff --git a/.github/workflows/release_images.yml b/.github/workflows/release_images.yml index e0a055cc..ff00fdd0 100644 --- a/.github/workflows/release_images.yml +++ b/.github/workflows/release_images.yml @@ -19,7 +19,7 @@ on: chart_version: description: 'Helm chart version to publish' required: false - default: '0.1.0' + default: '0.1.1' permissions: contents: read @@ -31,7 +31,7 @@ jobs: # Optional test jobs - run both E2E and integration tests in parallel if enabled test-e2e: name: E2E Test Images Before Release - if: ${{ inputs.run_tests == true }} + if: ${{ inputs.run_tests == 'true' }} uses: ./.github/workflows/test-E2E.yml with: image_tag: ${{ inputs.image_tag }} @@ -39,7 +39,7 @@ jobs: test-integration: name: Integration Test Images Before Release - if: ${{ inputs.run_tests == true }} + if: ${{ inputs.run_tests == 'true' }} uses: ./.github/workflows/test-integration.yml with: image_tag: ${{ inputs.image_tag }} @@ -105,11 +105,21 @@ jobs: run: | CHART_VERSION_INPUT="${{ github.event.inputs.chart_version }}" if [[ -z "$CHART_VERSION_INPUT" ]]; then - CHART_VERSION_INPUT="0.1.0" + CHART_VERSION_FROM_FILE=$(awk -F': *' '/^version:/ {print $2; exit}' documentdb-chart/Chart.yaml) + if [[ -z "$CHART_VERSION_FROM_FILE" ]]; then + echo "Failed to determine chart version from documentdb-chart/Chart.yaml" >&2 + exit 1 + fi + CHART_VERSION_INPUT="$CHART_VERSION_FROM_FILE" fi echo "CHART_VERSION=${CHART_VERSION_INPUT}" >> $GITHUB_ENV echo "Using chart version: ${CHART_VERSION_INPUT}" + - name: Update Chart.yaml metadata + run: | + sed -i "s/^version: .*/version: ${CHART_VERSION}/" documentdb-chart/Chart.yaml + sed -i "s/^appVersion: .*/appVersion: \"${CHART_VERSION}\"/" documentdb-chart/Chart.yaml + - name: Package Helm chart run: | helm dependency update operator/documentdb-helm-chart diff --git a/.gitignore b/.gitignore index b6f1d4e8..f53bd114 100644 --- a/.gitignore +++ b/.gitignore @@ -406,4 +406,4 @@ charts/ Chart.lock # Test output -*.out +*.out \ No newline at end of file diff --git a/docs/operator-public-documentation/index.md b/docs/operator-public-documentation/index.md index 23acf501..547c41a6 100644 --- a/docs/operator-public-documentation/index.md +++ b/docs/operator-public-documentation/index.md @@ -7,4 +7,4 @@

If you are not redirected automatically, follow this link to preview/index.md.

- \ No newline at end of file + diff --git a/docs/operator-public-documentation/preview/index.md b/docs/operator-public-documentation/preview/index.md index b3a58795..6114208f 100644 --- a/docs/operator-public-documentation/preview/index.md +++ b/docs/operator-public-documentation/preview/index.md @@ -1,123 +1,40 @@ -# DocumentDB Kubernetes Operator +# DocumentDB Kubernetes Operator - Version 1 -The DocumentDB Kubernetes Operator is an open-source project to run and manage [DocumentDB](https://github.com/microsoft/documentdb) on Kubernetes. `DocumentDB` is the engine powering vCore-based Azure Cosmos DB for MongoDB. It is built on top of PostgreSQL and offers a native implementation of document-oriented NoSQL database, enabling CRUD operations on BSON data types. +Welcome to the DocumentDB Kubernetes Operator documentation for version 1. -As part of a DocumentDB cluster installation, the operator deploys and manages a set of PostgreSQL instance(s), the [DocumentDB Gateway](https://github.com/microsoft/documentdb/tree/main/pg_documentdb_gw), as well as other Kubernetes resources. While PostgreSQL is used as the underlying storage engine, the gateway ensures that you can connect to the DocumentDB cluster using MongoDB-compatible drivers, APIs, and tools. +## Documentation Sections -> **Note:** This project is under active development but not yet recommended for production use. We welcome your feedback and contributions! +### [Advanced Configuration](advanced-configuration/README.md) -## Quickstart +Advanced configuration options for production deployments: -This quickstart guide will walk you through the steps to install the operator, deploy a DocumentDB cluster, access it using `mongosh`, and perform basic operations. +- **[TLS Configuration](advanced-configuration/README.md#tls-configuration)** - Comprehensive TLS setup with three modes (SelfSigned, Provided, CertManager) +- **[High Availability](advanced-configuration/README.md#high-availability)** - Multi-instance and HA configurations +- **[Storage Configuration](advanced-configuration/README.md#storage-configuration)** - Persistent storage and volume management +- **[Resource Management](advanced-configuration/README.md#resource-management)** - CPU and memory optimization +- **[Security](advanced-configuration/README.md#security)** - Network policies, RBAC, and secrets management -### Prerequisites +## Quick Links -- [Helm](https://helm.sh/docs/intro/install/) installed. -- [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl-linux/) installed. -- A local Kubernetes cluster such as [minikube](https://minikube.sigs.k8s.io/docs/start/), or [kind](https://kind.sigs.k8s.io/docs/user/quick-start/#installation) installed. You are free to use any other Kubernetes cluster, but that's not a requirement for this quickstart. -- Install [mongosh](https://www.mongodb.com/docs/mongodb-shell/install/) to connect to the DocumentDB cluster. +### Getting Started +- [Installation Guide](https://microsoft.github.io/documentdb-kubernetes-operator/v1/#quickstart) +- [Quick Start](https://microsoft.github.io/documentdb-kubernetes-operator/v1/#quickstart) -### Start a local Kubernetes cluster +### TLS Setup +- [TLS Setup Guide](../../../documentdb-playground/tls/README.md) - Complete TLS configuration guide +- [E2E Testing](../../../documentdb-playground/tls/E2E-TESTING.md) - Comprehensive testing procedures -If you are using `minikube`, use the following command: +### Examples +- [Sample Configurations](../../../operator/src/config/samples/) +- [Deployment Examples](../../../operator/src/scripts/deployment-examples/) -```sh -minikube start -``` - -If you are using `kind`, use the following command: - -```sh -kind create cluster -``` - -### Install `cert-manager` - -[cert-manager](https://cert-manager.io/docs/) is used to manage TLS certificates for the DocumentDB cluster. - -> If you already have `cert-manager` installed, you can skip this step. - -```sh -helm repo add jetstack https://charts.jetstack.io -helm repo update -helm install cert-manager jetstack/cert-manager --namespace cert-manager --create-namespace --set installCRDs=true -``` - -Verify that `cert-manager` is installed correctly: - -```sh -kubectl get pods -n cert-manager -``` - -Output: - -```text -NAMESPACE NAME READY STATUS RESTARTS -cert-manager cert-manager-6794b8d569-d7lwd 1/1 Running 0 -cert-manager cert-manager-cainjector-7f69cd69f7-pd9bc 1/1 Running 0 -cert-manager cert-manager-webhook-6cc5dccc4b-7jmrh 1/1 Running 0 -``` - -### Install `documentdb-operator` using the Helm chart - -> The DocumentDB operator utilizes the [CloudNativePG operator](https://cloudnative-pg.io/docs/) behind the scenes, and installs it in the `cnpg-system` namespace. At this point, it is assumed that the CloudNativePG operator is **not** pre-installed in your cluster. - -Use the following command to install the DocumentDB operator: - -```sh -helm install documentdb-operator oci://ghcr.io/microsoft/documentdb-operator --namespace documentdb-operator --create-namespace -``` +## Support -This will install the operator in the `documentdb-operator` namespace. Verify that it is running: +- [GitHub Repository](https://github.com/microsoft/documentdb-kubernetes-operator) +- [Issue Tracker](https://github.com/microsoft/documentdb-kubernetes-operator/issues) +- [Discussions](https://github.com/microsoft/documentdb-kubernetes-operator/discussions) -```sh -kubectl get deployment -n documentdb-operator -``` - -Output: - -```text -NAME READY UP-TO-DATE AVAILABLE AGE -documentdb-operator 1/1 1 1 113s -``` - -You should also see the DocumentDB operator CRDs installed in the cluster: - -```sh -kubectl get crd | grep documentdb -``` - -Output: - -```text -documentdbs.db.microsoft.com -``` - -### Store DocumentDB credentials in K8s Secret - -Before deploying the DocumentDB cluster, create a Kubernetes secret to store the DocumentDB credentials. The sidecar injector plugin will automatically inject these credentials as environment variables into the DocumentDB gateway container. - -Create the secret with your desired username and password: - -```sh -cat < +``` + +This single command will: +- โœ… Create AKS cluster with all required addons +- โœ… Install cert-manager and CSI driver +- โœ… Deploy DocumentDB operator +- โœ… Configure and validate both SelfSigned and Provided TLS modes + +**Duration**: ~25-30 minutes + +### TLS Configuration Examples + +#### Example 1: SelfSigned Mode + +```yaml +apiVersion: db.microsoft.com/v1 +kind: DocumentDB +metadata: + name: documentdb-selfsigned + namespace: default +spec: + version: "16" + instances: 3 + storage: + size: 10Gi + tls: + mode: SelfSigned + selfSigned: + issuerName: documentdb-selfsigned-issuer + certificateName: documentdb-gateway-cert +``` + +#### Example 2: Provided Mode (Azure Key Vault) + +```yaml +apiVersion: db.microsoft.com/v1 +kind: DocumentDB +metadata: + name: documentdb-provided + namespace: default +spec: + version: "16" + instances: 3 + storage: + size: 10Gi + tls: + mode: Provided + provided: + secretName: documentdb-tls-akv + secretProviderClass: azure-kv-documentdb +``` + +#### Example 3: CertManager Mode with Let's Encrypt + +```yaml +apiVersion: db.microsoft.com/v1 +kind: DocumentDB +metadata: + name: documentdb-letsencrypt + namespace: default +spec: + version: "16" + instances: 3 + storage: + size: 10Gi + tls: + mode: CertManager + certManager: + issuerRef: + name: letsencrypt-prod + kind: ClusterIssuer + commonName: documentdb.example.com + dnsNames: + - documentdb.example.com + - "*.documentdb.example.com" +``` + +### TLS Status and Monitoring + +Check TLS status of your DocumentDB instance: + +```bash +# Get TLS status +kubectl get documentdb -n -o jsonpath='{.status.tls}' | jq + +# Example output: +{ + "ready": true, + "mode": "SelfSigned", + "certificateName": "documentdb-gateway-cert", + "secretName": "documentdb-gateway-cert-tls", + "expirationTime": "2025-02-04T10:00:00Z" +} +``` + +### Certificate Rotation + +The operator handles certificate rotation automatically: + +- **SelfSigned & CertManager**: cert-manager rotates certificates before expiration +- **Provided Mode**: Sync certificates from Azure Key Vault on rotation + +Monitor certificate expiration: + +```bash +# Check certificate expiration +kubectl get certificate -n -o jsonpath='{.status.notAfter}' + +# Check TLS secret +kubectl get secret -n -o jsonpath='{.data.tls\.crt}' | \ + base64 -d | openssl x509 -noout -dates +``` + +### Troubleshooting TLS + +For comprehensive troubleshooting, see the [E2E Testing Guide](../../../../documentdb-playground/tls/E2E-TESTING.md#troubleshooting). + +Common issues: + +1. **Certificate not ready**: Check cert-manager logs and certificate status +2. **Connection failures**: Verify service endpoints and TLS handshake +3. **Azure Key Vault access denied**: Check managed identity and RBAC permissions + +Quick diagnostics: + +```bash +# Check DocumentDB TLS status +kubectl describe documentdb -n + +# Check certificate status +kubectl describe certificate -n + +# Check cert-manager logs +kubectl logs -n cert-manager deployment/cert-manager + +# Test TLS handshake +EXTERNAL_IP=$(kubectl get svc -n -o jsonpath='{.items[0].status.loadBalancer.ingress[0].ip}') +openssl s_client -connect $EXTERNAL_IP:10260 +``` + +--- + +## High Availability + +Configuration for high availability DocumentDB deployments. + +### Multi-Instance Setup + +```yaml +apiVersion: db.microsoft.com/v1 +kind: DocumentDB +metadata: + name: documentdb-ha +spec: + version: "16" + instances: 3 # Number of replicas + storage: + size: 100Gi + storageClass: premium-ssd +``` + +### Recommended Settings + +- **Minimum instances**: 3 for production +- **Storage class**: Use premium SSDs for production +- **Resource requests**: Set appropriate CPU/memory limits + +--- + +## Storage Configuration + +Configure persistent storage for DocumentDB instances. + +### Storage Classes + +```yaml +spec: + storage: + size: 100Gi + storageClass: premium-ssd # Azure Premium SSD + # or: managed-csi-premium + # or: azurefile-premium +``` + +### Volume Expansion + +```bash +# Ensure storage class allows volume expansion +kubectl get storageclass -o jsonpath='{.allowVolumeExpansion}' + +# Patch DocumentDB for larger storage +kubectl patch documentdb -n --type='json' \ + -p='[{"op": "replace", "path": "/spec/storage/size", "value":"200Gi"}]' +``` + +--- + +## Resource Management + +Configure resource requests and limits for optimal performance. + +### Example Configuration + +```yaml +apiVersion: db.microsoft.com/v1 +kind: DocumentDB +metadata: + name: documentdb-resources +spec: + version: "16" + instances: 3 + resources: + limits: + cpu: "4" + memory: "8Gi" + requests: + cpu: "2" + memory: "4Gi" +``` + +### Recommendations + +- **Development**: 1 CPU, 2Gi memory +- **Production**: 2-4 CPUs, 4-8Gi memory +- **High-load**: 4-8 CPUs, 8-16Gi memory + +--- + +## Security + +Security best practices for DocumentDB deployments. + +### Network Policies + +Restrict network access to DocumentDB: + +```yaml +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: documentdb-access + namespace: default +spec: + podSelector: + matchLabels: + app.kubernetes.io/name: documentdb + policyTypes: + - Ingress + ingress: + - from: + - namespaceSelector: + matchLabels: + name: app-namespace + ports: + - protocol: TCP + port: 10260 +``` + +### RBAC + +The operator requires specific permissions to manage DocumentDB resources. The Helm chart automatically creates the necessary RBAC rules. + +### Secrets Management + +Credentials are automatically stored in Kubernetes secrets: + +```bash +# View credentials (base64 encoded) +kubectl get secret documentdb-credentials -n -o yaml + +# Decode username +kubectl get secret documentdb-credentials -n \ + -o jsonpath='{.data.username}' | base64 -d + +# Decode password +kubectl get secret documentdb-credentials -n \ + -o jsonpath='{.data.password}' | base64 -d +``` + +For production, consider using: +- Azure Key Vault for secrets (via Secrets Store CSI driver) +- HashiCorp Vault integration +- External secrets operator + +--- + +## Additional Resources + +- [Main Documentation](https://microsoft.github.io/documentdb-kubernetes-operator) +- [TLS Setup Guide](../../../../documentdb-playground/tls/README.md) +- [E2E Testing Guide](../../../../documentdb-playground/tls/E2E-TESTING.md) +- [GitHub Repository](https://github.com/microsoft/documentdb-kubernetes-operator) + +--- + +**Last Updated**: November 2025 +**Version**: v1 diff --git a/documentdb-playground/tls/E2E-TESTING.md b/documentdb-playground/tls/E2E-TESTING.md new file mode 100644 index 00000000..9d39178d --- /dev/null +++ b/documentdb-playground/tls/E2E-TESTING.md @@ -0,0 +1,1154 @@ +# End-to-End (E2E) Testing Guide + +This guide provides comprehensive instructions for testing the DocumentDB Kubernetes Operator with TLS support from start to finish. + +## Table of Contents + +- [Overview](#overview) +- [Prerequisites](#prerequisites) +- [Quick E2E Test (Automated)](#quick-e2e-test-automated) +- [Manual Step-by-Step E2E Test](#manual-step-by-step-e2e-test) +- [Testing Individual Components](#testing-individual-components) +- [Validation and Verification](#validation-and-verification) +- [Cleanup](#cleanup) +- [Troubleshooting](#troubleshooting) + +## Overview + +The E2E testing process validates the entire DocumentDB TLS setup, including: + +1. **Infrastructure Setup**: AKS cluster, Azure Key Vault, networking +2. **Prerequisites Installation**: cert-manager, Secrets Store CSI driver +3. **Operator Deployment**: DocumentDB operator with Helm +4. **TLS Modes**: + - SelfSigned mode (cert-manager with self-signed issuer) + - Provided mode (Azure Key Vault with CSI driver) +5. **Connectivity Validation**: MongoDB shell connections with TLS +6. **Data Operations**: CRUD operations to verify full functionality + +## Prerequisites + +### Required Tools + +Ensure these tools are installed and accessible in your PATH: + +```bash +# Check required tools +az --version # Azure CLI +kubectl version # Kubernetes CLI +helm version # Helm 3.x +mongosh --version # MongoDB Shell +openssl version # OpenSSL +jq --version # JSON processor (optional but helpful) +``` + +**Installation guides:** +- [Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli) +- [kubectl](https://kubernetes.io/docs/tasks/tools/) +- [Helm](https://helm.sh/docs/intro/install/) +- [mongosh](https://www.mongodb.com/docs/mongodb-shell/install/) +- [jq](https://stedolan.github.io/jq/download/) + +### Azure Requirements + +- **Azure subscription** with Owner or Contributor permissions +- **Authenticated session**: Run `az login` before starting +- **Subscription ID**: Know your Azure subscription ID + +```bash +# Login to Azure +az login + +# List subscriptions +az account list --output table + +# Set default subscription +az account set --subscription +``` + +### Resource Quotas + +Ensure your subscription has sufficient quota for: +- **AKS cluster**: 2-4 nodes (Standard_D4s_v5 or similar) +- **Public IPs**: 1-2 for LoadBalancer services +- **Storage**: ~50GB for persistent volumes +- **Key Vault**: 1 instance + +## Quick E2E Test (Automated) + +The fastest way to run a complete E2E test is using the automated script. + +### Single Command Test + +```bash +cd documentdb-playground/tls/scripts + +# Run complete E2E test +./create-cluster.sh \ + --suffix e2etest \ + --subscription-id +``` + +This single command will: +1. โœ… Create AKS cluster with all addons +2. โœ… Install cert-manager and CSI driver +3. โœ… Create Azure Key Vault +4. โœ… Deploy DocumentDB operator +5. โœ… Test SelfSigned TLS mode +6. โœ… Test Provided TLS mode (Azure Key Vault) +7. โœ… Validate connectivity for both modes +8. โœ… Provide connection strings + +**Expected Duration**: ~25-30 minutes + +### Cleanup After Test + +```bash +# Delete all resources +./delete-cluster.sh \ + --suffix e2etest \ + --subscription-id \ + --all +``` + +**Expected Duration**: ~5-10 minutes + +## Automated E2E Testing with Scripts + +All E2E tests can be fully automated using the provided scripts. This section describes how to use them for comprehensive testing without manual intervention. + +### Understanding the Test Scripts + +The testing suite consists of several scripts that work together: + +1. **`create-cluster.sh`** - Main entry point that orchestrates the entire setup +2. **`gateway-tls-e2e.sh`** - Core E2E script that tests both TLS modes +3. **`setup-selfsigned-gateway-tls.sh`** - Configures SelfSigned TLS mode +4. **`setup-documentdb-akv.sh`** - Sets up Azure Key Vault for certificates +5. **`documentdb-provided-mode-setup.sh`** - Configures Provided TLS mode +6. **`tls-connectivity-check.sh`** - Validates TLS connectivity and certificates +7. **`delete-cluster.sh`** - Cleanup and resource deletion + +### Complete Automated Test Scenarios + +#### Scenario 1: Full End-to-End Test (Recommended) + +This is the complete automated test that validates everything from cluster creation to TLS validation. + +```bash +cd documentdb-playground/tls/scripts + +# Set your configuration +export SUFFIX="e2etest-$(date +%H%M)" +export SUBSCRIPTION_ID="" + +# Run complete E2E test +./create-cluster.sh \ + --suffix "$SUFFIX" \ + --subscription-id "$SUBSCRIPTION_ID" +``` + +**What it automates:** +- โœ… Creates AKS cluster with all required addons (cert-manager, CSI driver) +- โœ… Deploys DocumentDB operator via Helm +- โœ… Creates Azure Key Vault with proper RBAC +- โœ… Deploys DocumentDB instance with SelfSigned TLS +- โœ… Validates SelfSigned TLS connectivity with mongosh +- โœ… Reconfigures to Provided TLS mode (Azure Key Vault) +- โœ… Validates Provided TLS connectivity with mongosh +- โœ… Outputs connection strings and status + +**Expected output:** +``` +Running end-to-end gateway TLS validation with: + Resource Group: guanzhou-e2etest-1234-rg + AKS Cluster: guanzhou-e2etest-1234 + Location: eastus2 + Key Vault: ddb-issuer-e2etest-1234 + Namespace: documentdb-preview-ns + DocumentDB: documentdb-preview + +2025-11-04 10:00:00 :: Provision AKS cluster +โœ… AKS cluster created successfully +2025-11-04 10:15:00 :: Deploy DocumentDB self-signed mode +โœ… DocumentDB deployed with SelfSigned TLS +2025-11-04 10:18:00 :: Validate self-signed connectivity +โœ… TLS handshake successful +โœ… mongosh connection successful +2025-11-04 10:20:00 :: Prepare Azure Key Vault +โœ… Key Vault created with certificate +2025-11-04 10:22:00 :: Switch cluster to provided TLS +โœ… DocumentDB reconfigured for Provided TLS +2025-11-04 10:25:00 :: Validate provided-mode connectivity +โœ… TLS handshake successful +โœ… mongosh connection successful + +End-to-end gateway TLS validation completed successfully. +``` + +**Duration**: ~25-30 minutes + +#### Scenario 2: Test on Existing Cluster + +If you already have an AKS cluster, test TLS setup without recreating infrastructure. + +```bash +cd documentdb-playground/tls/scripts + +# Ensure you have kubectl context set to your cluster +kubectl config current-context + +# Run E2E test on existing cluster +./create-cluster.sh \ + --suffix "$SUFFIX" \ + --subscription-id "$SUBSCRIPTION_ID" \ + --skip-cluster +``` + +**What it automates:** +- โœ… Uses existing AKS cluster (no cluster creation) +- โœ… Installs cert-manager if not present +- โœ… Creates Azure Key Vault +- โœ… Deploys DocumentDB operator +- โœ… Tests both SelfSigned and Provided TLS modes +- โœ… Validates connectivity for both modes + +**Duration**: ~10-15 minutes + +#### Scenario 3: Direct E2E Script Usage (Advanced) + +For more control, use the core `gateway-tls-e2e.sh` script directly. + +```bash +cd documentdb-playground/tls/scripts + +./gateway-tls-e2e.sh \ + --suffix "e2etest" \ + --location "eastus2" \ + --resource-group "my-test-rg" \ + --aks-name "my-aks-cluster" \ + --keyvault "my-key-vault" \ + --namespace "test-ns" \ + --docdb-name "test-documentdb" +``` + +**Additional options:** +```bash +# Skip cluster creation (use existing) +./gateway-tls-e2e.sh --suffix test --skip-cluster + +# Custom GitHub registry (for private operator images) +./gateway-tls-e2e.sh \ + --suffix test \ + --github-username "myusername" \ + --github-token "ghp_xxxxxxxxxxxx" +``` + +### Component-Level Automated Tests + +Test individual components in isolation using helper scripts. + +#### Test: SelfSigned TLS Mode Only + +```bash +cd documentdb-playground/tls/scripts + +# Setup and validate SelfSigned TLS +./setup-selfsigned-gateway-tls.sh \ + --namespace "test-ns" \ + --name "test-documentdb" \ + --username "admin" \ + --password "SecurePass123!" + +# Validate connectivity +./tls-connectivity-check.sh \ + --mode selfsigned \ + --namespace "test-ns" \ + --docdb-name "test-documentdb" +``` + +**Validates:** +- cert-manager ClusterIssuer creation +- Certificate resource generation +- TLS secret availability +- Gateway pod TLS configuration +- mongosh connectivity with TLS + +**Duration**: ~5 minutes + +#### Test: Provided TLS Mode Only + +```bash +cd documentdb-playground/tls/scripts + +# Setup Azure Key Vault and certificate +./setup-documentdb-akv.sh \ + --resource-group "my-rg" \ + --location "eastus2" \ + --keyvault "my-key-vault" \ + --aks-name "my-aks-cluster" \ + --sni-host "10.0.0.1.sslip.io" + +# Configure DocumentDB for Provided mode +./documentdb-provided-mode-setup.sh \ + --resource-group "my-rg" \ + --aks-name "my-aks-cluster" \ + --keyvault "my-key-vault" \ + --cert-name "documentdb-gateway" \ + --namespace "test-ns" \ + --docdb-name "test-documentdb" \ + --provided-secret "documentdb-tls" \ + --user-assigned-client "" + +# Validate Provided mode connectivity +./tls-connectivity-check.sh \ + --mode provided \ + --namespace "test-ns" \ + --docdb-name "test-documentdb" \ + --provided-secret "documentdb-tls" \ + --sni-host "10.0.0.1.sslip.io" +``` + +**Validates:** +- Azure Key Vault certificate creation +- RBAC role assignments +- SecretProviderClass configuration +- CSI driver secret synchronization +- Gateway pod with provided certificate +- mongosh connectivity with custom certificate + +**Duration**: ~8 minutes + +#### Test: TLS Connectivity Only + +Validate existing TLS setup without making changes. + +```bash +cd documentdb-playground/tls/scripts + +# Check SelfSigned mode +./tls-connectivity-check.sh \ + --mode selfsigned \ + --namespace "my-ns" \ + --docdb-name "my-documentdb" + +# Check Provided mode +./tls-connectivity-check.sh \ + --mode provided \ + --namespace "my-ns" \ + --docdb-name "my-documentdb" \ + --provided-secret "my-tls-secret" \ + --sni-host "example.com" +``` + +**Validates:** +- TLS status readiness +- Service endpoint availability +- Certificate validity +- TLS handshake success +- mongosh connection with TLS + +**Duration**: ~2 minutes + +### Automated Cleanup Tests + +Test cleanup functionality with different retention policies. + +```bash +cd documentdb-playground/tls/scripts + +# Complete cleanup (delete everything) +./delete-cluster.sh \ + --suffix "e2etest" \ + --subscription-id "$SUBSCRIPTION_ID" \ + --all + +# Keep cluster, delete only DocumentDB +./delete-cluster.sh \ + --suffix "e2etest" \ + --subscription-id "$SUBSCRIPTION_ID" \ + --keep-cluster + +# Delete cluster, preserve Key Vault +./delete-cluster.sh \ + --suffix "e2etest" \ + --subscription-id "$SUBSCRIPTION_ID" \ + --keep-keyvault +``` + +### Continuous Integration Test Script + +For CI/CD pipelines, use this automated test script: + +```bash +#!/bin/bash +# ci-automated-test.sh + +set -e + +# Configuration +SUFFIX="ci-$(date +%Y%m%d%H%M%S)" +SUBSCRIPTION_ID="${AZURE_SUBSCRIPTION_ID}" +LOCATION="${AZURE_LOCATION:-eastus2}" + +echo "=== Starting CI E2E Test ===" +echo "Suffix: $SUFFIX" +echo "Subscription: $SUBSCRIPTION_ID" +echo "Location: $LOCATION" +echo "" + +# Navigate to scripts directory +cd "$(dirname "$0")/documentdb-playground/tls/scripts" + +# Run E2E test +echo "=== Running E2E Test ===" +./create-cluster.sh \ + --suffix "$SUFFIX" \ + --subscription-id "$SUBSCRIPTION_ID" \ + --location "$LOCATION" || { + echo "โŒ E2E test failed" + exit 1 +} + +echo "" +echo "=== Running Validation ===" + +# Additional validation +export NS="documentdb-preview-ns" +export DOCDB_NAME="documentdb-preview" + +# Check TLS status +TLS_READY=$(kubectl get documentdb "$DOCDB_NAME" -n "$NS" \ + -o jsonpath='{.status.tls.ready}' 2>/dev/null || echo "false") + +if [ "$TLS_READY" != "true" ]; then + echo "โŒ TLS not ready" + exit 1 +fi + +echo "โœ… TLS status verified" + +# Check DocumentDB status +DOCDB_STATUS=$(kubectl get documentdb "$DOCDB_NAME" -n "$NS" \ + -o jsonpath='{.status.status}' 2>/dev/null || echo "unknown") + +if [[ ! "$DOCDB_STATUS" =~ "healthy" ]]; then + echo "โŒ DocumentDB not healthy: $DOCDB_STATUS" + exit 1 +fi + +echo "โœ… DocumentDB status verified" + +# Cleanup +echo "" +echo "=== Cleaning Up ===" +./delete-cluster.sh \ + --suffix "$SUFFIX" \ + --subscription-id "$SUBSCRIPTION_ID" \ + --all || { + echo "โš ๏ธ Cleanup failed (manual cleanup may be required)" +} + +echo "" +echo "=== CI E2E Test Complete ===" +exit 0 +``` + +**Usage in CI/CD:** +```yaml +# Example GitHub Actions workflow +- name: Run E2E Tests + env: + AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + AZURE_LOCATION: eastus2 + run: | + az login --service-principal -u ${{ secrets.AZURE_CLIENT_ID }} \ + -p ${{ secrets.AZURE_CLIENT_SECRET }} \ + --tenant ${{ secrets.AZURE_TENANT_ID }} + ./ci-automated-test.sh +``` + +### Automated Test Matrix + +Run multiple test scenarios to ensure compatibility: + +```bash +#!/bin/bash +# test-matrix.sh + +set -e + +SUBSCRIPTION_ID="" +SCENARIOS=( + "eastus2:SelfSigned" + "westus2:Provided" + "centralus:Both" +) + +for scenario in "${SCENARIOS[@]}"; do + LOCATION="${scenario%%:*}" + MODE="${scenario##*:}" + SUFFIX="matrix-$(echo $LOCATION | tr -d ' ')-$(date +%H%M)" + + echo "=== Testing: Location=$LOCATION, Mode=$MODE ===" + + case "$MODE" in + SelfSigned) + ./setup-selfsigned-gateway-tls.sh \ + --namespace "test-$SUFFIX" \ + --name "documentdb-$SUFFIX" + ./tls-connectivity-check.sh \ + --mode selfsigned \ + --namespace "test-$SUFFIX" \ + --docdb-name "documentdb-$SUFFIX" + ;; + Provided) + # Full Provided mode test + ./create-cluster.sh --suffix "$SUFFIX" --subscription-id "$SUBSCRIPTION_ID" + ;; + Both) + # Full E2E test (both modes) + ./create-cluster.sh --suffix "$SUFFIX" --subscription-id "$SUBSCRIPTION_ID" + ;; + esac + + # Cleanup + ./delete-cluster.sh --suffix "$SUFFIX" --subscription-id "$SUBSCRIPTION_ID" --all + + echo "โœ… Test complete: $scenario" + echo "" +done + +echo "=== All test scenarios completed ===" +``` + +### Script Output and Logging + +All scripts provide detailed output: + +- **Timestamps**: Each major step is timestamped +- **Status indicators**: โœ… (success), โŒ (error), โš ๏ธ (warning) +- **Progress tracking**: Clear indication of current step +- **Error messages**: Detailed error information for debugging +- **Connection strings**: Ready-to-use mongosh connection strings + +**Example output:** +``` +2025-11-04 10:00:00 :: Creating AKS cluster +โœ… Resource group created +โœ… AKS cluster created +โœ… Azure CSI drivers installed +โœ… cert-manager installed + +2025-11-04 10:15:00 :: Deploying DocumentDB +โœ… Namespace created +โœ… Credentials secret created +โœ… DocumentDB deployed +โณ Waiting for TLS readiness... +โœ… TLS ready (took 120 seconds) + +2025-11-04 10:18:00 :: Validating connectivity +โœ… Service endpoint: 10.0.0.1:10260 +โœ… TLS handshake successful +โœ… mongosh connection successful + +Connection string: +mongodb://docdbuser:P@ssw0rd123@10.0.0.1:10260/?tls=true&tlsAllowInvalidCertificates=true +``` + +### Debugging Automated Tests + +If automated tests fail, scripts provide debug options: + +```bash +# Enable verbose output +bash -x ./create-cluster.sh --suffix test --subscription-id + +# Check script exit codes +./create-cluster.sh --suffix test --subscription-id +echo "Exit code: $?" + +# Capture output for analysis +./create-cluster.sh --suffix test --subscription-id 2>&1 | tee test-output.log +``` + +--- + +## Manual Step-by-Step E2E Test + +For detailed understanding or debugging, follow these manual steps. + +### Step 1: Set Environment Variables + +```bash +# Set your unique identifier +export SUFFIX="e2etest-$(whoami)" +export SUBSCRIPTION_ID="" +export LOCATION="eastus2" +export RG="documentdb-${SUFFIX}-rg" +export AKS_NAME="documentdb-${SUFFIX}-aks" +export KV_NAME="ddb-kv-${SUFFIX}" +export NS="documentdb-e2e-ns" +export DOCDB_NAME="documentdb-e2e" + +# Display configuration +echo "Configuration:" +echo " Suffix: $SUFFIX" +echo " Resource Group: $RG" +echo " AKS Cluster: $AKS_NAME" +echo " Key Vault: $KV_NAME" +echo " Namespace: $NS" +echo " DocumentDB: $DOCDB_NAME" +``` + +### Step 2: Run Comprehensive E2E Script + +The `gateway-tls-e2e.sh` script orchestrates all components: + +```bash +cd documentdb-playground/tls/scripts + +./gateway-tls-e2e.sh \ + --suffix "$SUFFIX" \ + --location "$LOCATION" \ + --resource-group "$RG" \ + --aks-name "$AKS_NAME" \ + --keyvault "$KV_NAME" \ + --namespace "$NS" \ + --docdb-name "$DOCDB_NAME" +``` + +**What it does:** +- Creates all Azure infrastructure +- Installs all prerequisites +- Deploys operator and DocumentDB +- Tests both TLS modes +- Validates connectivity + +### Step 3: Verify Each Component + +After the script completes, verify each component: + +```bash +# 1. Check AKS cluster +az aks show --resource-group "$RG" --name "$AKS_NAME" --output table + +# 2. Verify kubectl context +kubectl config current-context + +# 3. Check cert-manager +kubectl get pods -n cert-manager +kubectl get clusterissuer + +# 4. Check CSI driver +kubectl get pods -n kube-system | grep secrets-store + +# 5. Verify Key Vault +az keyvault show --name "$KV_NAME" --output table + +# 6. Check DocumentDB operator +kubectl get pods -n "$NS" +kubectl get documentdb -n "$NS" + +# 7. Verify TLS status +kubectl get documentdb "$DOCDB_NAME" -n "$NS" -o jsonpath='{.status.tls}' | jq +``` + +### Step 4: Test Connectivity + +```bash +# Get DocumentDB service +kubectl get svc -n "$NS" + +# Get external IP (for LoadBalancer) +EXTERNAL_IP=$(kubectl get svc -n "$NS" -l "app.kubernetes.io/name=documentdb" -o jsonpath='{.items[0].status.loadBalancer.ingress[0].ip}') + +# Get credentials +USERNAME=$(kubectl get secret documentdb-credentials -n "$NS" -o jsonpath='{.data.username}' | base64 -d) +PASSWORD=$(kubectl get secret documentdb-credentials -n "$NS" -o jsonpath='{.data.password}' | base64 -d) + +# Connect with mongosh (SelfSigned mode allows invalid certs) +mongosh "mongodb://${USERNAME}:${PASSWORD}@${EXTERNAL_IP}:10260/?tls=true&tlsAllowInvalidCertificates=true" +``` + +### Step 5: Test CRUD Operations + +Once connected via mongosh: + +```javascript +// Create database +use e2etest + +// Insert documents +db.users.insertMany([ + { name: "Alice", email: "alice@example.com", role: "admin" }, + { name: "Bob", email: "bob@example.com", role: "user" }, + { name: "Charlie", email: "charlie@example.com", role: "user" } +]) + +// Read documents +db.users.find() +db.users.find({ role: "user" }) + +// Update document +db.users.updateOne( + { name: "Bob" }, + { $set: { role: "admin" } } +) + +// Verify update +db.users.find({ name: "Bob" }) + +// Delete document +db.users.deleteOne({ name: "Charlie" }) + +// Verify deletion +db.users.count() + +// Clean up +db.users.drop() +``` + +## Testing Individual Components + +You can test individual components separately using the provided scripts. + +### Test 1: SelfSigned TLS Mode Only + +```bash +cd documentdb-playground/tls/scripts + +# Assumes cluster already exists +./setup-selfsigned-gateway-tls.sh \ + --namespace "$NS" \ + --docdb-name "$DOCDB_NAME" +``` + +**Validates:** +- cert-manager ClusterIssuer creation +- Certificate resource creation +- TLS secret generation +- Gateway pod TLS configuration + +### Test 2: Provided TLS Mode (Azure Key Vault) + +```bash +cd documentdb-playground/tls/scripts + +# Setup Azure Key Vault and certificates +./setup-documentdb-akv.sh \ + --suffix "$SUFFIX" \ + --keyvault "$KV_NAME" \ + --resource-group "$RG" \ + --namespace "$NS" + +# Configure DocumentDB for Provided mode +./documentdb-provided-mode-setup.sh \ + --namespace "$NS" \ + --docdb-name "$DOCDB_NAME" \ + --secret-name documentdb-provided-tls +``` + +**Validates:** +- Azure Key Vault certificate creation +- SecretProviderClass configuration +- CSI driver secret synchronization +- Gateway pod with provided certificate + +### Test 3: TLS Connectivity + +```bash +cd documentdb-playground/tls/scripts + +# Run comprehensive connectivity tests +./tls-connectivity-check.sh \ + --namespace "$NS" \ + --docdb-name "$DOCDB_NAME" +``` + +**Validates:** +- TLS handshake +- Certificate validation +- MongoDB protocol over TLS +- Connection string generation + +### Test 4: Full Cluster Creation + +Use the comprehensive cluster creation script: + +```bash +cd documentdb-playground/tls/scripts + +# Creates everything from scratch +./create-cluster.sh \ + --suffix "$SUFFIX" \ + --subscription-id "$SUBSCRIPTION_ID" +``` + +## Validation and Verification + +### Automated Validation Checklist + +Run these commands to verify a successful E2E test: + +```bash +#!/bin/bash + +echo "=== E2E Validation Checklist ===" +echo "" + +# 1. AKS Cluster +echo "1. AKS Cluster Status:" +az aks show --resource-group "$RG" --name "$AKS_NAME" --query "powerState.code" -o tsv +echo "" + +# 2. cert-manager +echo "2. cert-manager Pods:" +kubectl get pods -n cert-manager --no-headers | wc -l +echo " Expected: 3 (cert-manager, cainjector, webhook)" +echo "" + +# 3. DocumentDB Operator +echo "3. DocumentDB Operator:" +kubectl get deployment -n "$NS" -l app.kubernetes.io/name=documentdb-operator --no-headers | wc -l +echo " Expected: 1" +echo "" + +# 4. DocumentDB Instance +echo "4. DocumentDB Instance Status:" +kubectl get documentdb "$DOCDB_NAME" -n "$NS" -o jsonpath='{.status.status}' +echo "" +echo " Expected: Cluster in healthy state" +echo "" + +# 5. TLS Configuration +echo "5. TLS Status:" +kubectl get documentdb "$DOCDB_NAME" -n "$NS" -o jsonpath='{.status.tls.ready}' +echo "" +echo " Expected: true" +echo "" + +# 6. Certificates +echo "6. Certificates:" +kubectl get certificates -n "$NS" --no-headers | wc -l +echo " Expected: At least 1" +echo "" + +# 7. TLS Secrets +echo "7. TLS Secrets:" +kubectl get secrets -n "$NS" | grep -c "tls" +echo " Expected: At least 1" +echo "" + +# 8. Service External IP +echo "8. LoadBalancer Service:" +kubectl get svc -n "$NS" -o jsonpath='{.items[?(@.spec.type=="LoadBalancer")].status.loadBalancer.ingress[0].ip}' +echo "" +echo " Expected: IP address" +echo "" + +# 9. Pod Readiness +echo "9. All Pods Ready:" +TOTAL_PODS=$(kubectl get pods -n "$NS" --no-headers | wc -l) +READY_PODS=$(kubectl get pods -n "$NS" --field-selector=status.phase=Running --no-headers | wc -l) +echo " Ready: $READY_PODS / $TOTAL_PODS" +echo "" + +# 10. Azure Key Vault +echo "10. Azure Key Vault:" +az keyvault show --name "$KV_NAME" --query "properties.provisioningState" -o tsv 2>/dev/null || echo "Not found or not accessible" +echo " Expected: Succeeded" +echo "" + +echo "=== Validation Complete ===" +``` + +### Manual Verification Steps + +1. **Check DocumentDB CRD**: + ```bash + kubectl get crd documentdbs.db.microsoft.com -o yaml | grep -A 5 "tls" + ``` + +2. **Inspect TLS Configuration**: + ```bash + kubectl get documentdb "$DOCDB_NAME" -n "$NS" -o yaml | grep -A 20 "tls:" + ``` + +3. **Verify Certificate Details**: + ```bash + kubectl get certificate -n "$NS" -o yaml + kubectl describe certificate -n "$NS" + ``` + +4. **Check TLS Secret Contents**: + ```bash + # List keys in TLS secret + kubectl get secret -n "$NS" -o jsonpath='{.data}' | jq 'keys' + + # View certificate + kubectl get secret -n "$NS" -o jsonpath='{.data.tls\.crt}' | base64 -d | openssl x509 -text -noout + ``` + +5. **Test TLS Handshake**: + ```bash + EXTERNAL_IP=$(kubectl get svc -n "$NS" -o jsonpath='{.items[0].status.loadBalancer.ingress[0].ip}') + openssl s_client -connect "$EXTERNAL_IP:10260" -servername "$DOCDB_NAME" + ``` + +## Cleanup + +### Quick Cleanup (All Resources) + +```bash +cd documentdb-playground/tls/scripts + +./delete-cluster.sh \ + --suffix "$SUFFIX" \ + --subscription-id "$SUBSCRIPTION_ID" \ + --all +``` + +### Selective Cleanup + +**Keep cluster, delete DocumentDB only:** +```bash +./delete-cluster.sh \ + --suffix "$SUFFIX" \ + --subscription-id "$SUBSCRIPTION_ID" \ + --keep-cluster +``` + +**Delete cluster, preserve Key Vault:** +```bash +./delete-cluster.sh \ + --suffix "$SUFFIX" \ + --subscription-id "$SUBSCRIPTION_ID" \ + --keep-keyvault +``` + +### Manual Cleanup + +If scripts fail, manually delete resources: + +```bash +# Delete DocumentDB +kubectl delete documentdb "$DOCDB_NAME" -n "$NS" + +# Delete namespace +kubectl delete namespace "$NS" + +# Uninstall operator +helm uninstall documentdb-operator -n "$NS" + +# Uninstall cert-manager +helm uninstall cert-manager -n cert-manager +kubectl delete namespace cert-manager + +# Delete AKS cluster +az aks delete --resource-group "$RG" --name "$AKS_NAME" --yes --no-wait + +# Delete resource group +az group delete --name "$RG" --yes --no-wait + +# Delete Key Vault (if needed) +az keyvault delete --name "$KV_NAME" +az keyvault purge --name "$KV_NAME" +``` + +## Troubleshooting + +### Common Issues and Solutions + +#### Issue: AKS Cluster Creation Fails + +**Symptoms:** +- `az aks create` command fails +- Error about quota limits + +**Solutions:** +```bash +# Check quota +az vm list-usage --location "$LOCATION" --output table + +# Try different region +export LOCATION="westus2" + +# Try smaller node size +# Edit script to use Standard_D2s_v5 instead of Standard_D4s_v5 +``` + +#### Issue: cert-manager Not Ready + +**Symptoms:** +- Certificate stuck in "Pending" state +- cert-manager pods not running + +**Solutions:** +```bash +# Check cert-manager pods +kubectl get pods -n cert-manager + +# Check cert-manager logs +kubectl logs -n cert-manager deployment/cert-manager + +# Reinstall cert-manager +helm uninstall cert-manager -n cert-manager +kubectl delete namespace cert-manager +helm install cert-manager jetstack/cert-manager \ + --namespace cert-manager \ + --create-namespace \ + --set installCRDs=true +``` + +#### Issue: TLS Certificate Not Ready + +**Symptoms:** +- `status.tls.ready` is `false` +- Certificate resource shows errors + +**Solutions:** +```bash +# Check certificate status +kubectl describe certificate -n "$NS" + +# Check cert-manager issuer +kubectl describe clusterissuer + +# Check cert-manager controller logs +kubectl logs -n cert-manager deployment/cert-manager | grep -i error + +# Delete and recreate certificate +kubectl delete certificate -n "$NS" +# Certificate should be recreated automatically by operator +``` + +#### Issue: Cannot Connect with mongosh + +**Symptoms:** +- Connection timeout +- TLS handshake failure + +**Solutions:** +```bash +# Verify service has external IP +kubectl get svc -n "$NS" + +# Check pod logs +kubectl logs -n "$NS" -c gateway-sidecar + +# Test without TLS first +mongosh "mongodb://${USERNAME}:${PASSWORD}@${EXTERNAL_IP}:10260/?tls=false" + +# For self-signed certificates, allow invalid certificates +mongosh "mongodb://${USERNAME}:${PASSWORD}@${EXTERNAL_IP}:10260/?tls=true&tlsAllowInvalidCertificates=true" + +# Check TLS handshake +openssl s_client -connect "${EXTERNAL_IP}:10260" +``` + +#### Issue: Azure Key Vault Access Denied + +**Symptoms:** +- CSI driver cannot sync secrets +- "Access denied" errors in pod logs + +**Solutions:** +```bash +# Check managed identity binding +kubectl get azureidentitybinding -n "$NS" + +# Verify Key Vault access policies +az keyvault show --name "$KV_NAME" --query "properties.accessPolicies" + +# Check CSI driver logs +kubectl logs -n kube-system -l app=secrets-store-csi-driver | grep -i error + +# Verify pod identity +kubectl describe pod -n "$NS" | grep -i identity +``` + +### Debug Mode + +Run scripts with debug output: + +```bash +# Enable bash debug mode +bash -x ./create-cluster.sh --suffix test --subscription-id + +# Or set in script +set -x # Enable debug +set +x # Disable debug +``` + +### Getting Help + +If you encounter issues not covered here: + +1. **Check logs**: + ```bash + kubectl logs -n "$NS" --all-containers + ``` + +2. **Describe resources**: + ```bash + kubectl describe documentdb "$DOCDB_NAME" -n "$NS" + kubectl describe certificate -n "$NS" + ``` + +3. **Export configuration**: + ```bash + kubectl get documentdb "$DOCDB_NAME" -n "$NS" -o yaml > documentdb-config.yaml + ``` + +4. **Create GitHub issue**: + - Repository: https://github.com/microsoft/documentdb-kubernetes-operator + - Include: logs, configurations, error messages, steps to reproduce + +## E2E Test Matrix + +Test different combinations to ensure compatibility: + +| Test Case | AKS Version | TLS Mode | Storage | Expected Result | +|-----------|-------------|----------|---------|-----------------| +| Basic | 1.28+ | SelfSigned | Default | โœ… Pass | +| Provided | 1.28+ | Provided (AKV) | Default | โœ… Pass | +| CertManager | 1.28+ | CertManager | Default | โœ… Pass | +| Custom Storage | 1.28+ | SelfSigned | Premium | โœ… Pass | +| Multi-Instance | 1.28+ | SelfSigned | Default | โœ… Pass | + +## Continuous Testing + +For CI/CD integration, use the automated script: + +```bash +#!/bin/bash +# ci-e2e-test.sh + +set -e + +SUFFIX="ci-$(date +%Y%m%d%H%M)" +SUBSCRIPTION_ID="$AZURE_SUBSCRIPTION_ID" + +# Create and test +./create-cluster.sh --suffix "$SUFFIX" --subscription-id "$SUBSCRIPTION_ID" + +# Run validation +# (Add validation commands here) + +# Cleanup +./delete-cluster.sh --suffix "$SUFFIX" --subscription-id "$SUBSCRIPTION_ID" --all +``` + +## Additional Resources + +- [TLS Setup Guide](README.md) - Main TLS configuration documentation +- [Manual Provided Mode Setup](MANUAL-PROVIDED-MODE-SETUP.md) - Detailed step-by-step guide for Provided TLS with Azure Key Vault +- [Advanced Configuration](../../docs/operator-public-documentation/v1/advanced-configuration/README.md) - Production configurations +- [Project Documentation](../../docs/operator-public-documentation/index.md) - Full operator documentation +- [GitHub Repository](https://github.com/microsoft/documentdb-kubernetes-operator) - Source code and issues + +--- + +**Last Updated**: November 2025 +**Tested On**: AKS 1.28+, cert-manager 1.13+, DocumentDB Operator 0.1.1+ diff --git a/documentdb-playground/tls/MANUAL-PROVIDED-MODE-SETUP.md b/documentdb-playground/tls/MANUAL-PROVIDED-MODE-SETUP.md new file mode 100644 index 00000000..5d969eb7 --- /dev/null +++ b/documentdb-playground/tls/MANUAL-PROVIDED-MODE-SETUP.md @@ -0,0 +1,1077 @@ +# Provided TLS Mode: Manual Step-by-Step Guide + +This guide shows how to manually configure and validate Provided TLS mode end-to-end using Azure Key Vault and the Secrets Store CSI Driver with managed identity. + +> **Note**: For automated setup, see [E2E-TESTING.md](E2E-TESTING.md). This guide is for users who want to understand each step in detail or troubleshoot issues. + +## Table of Contents + +- [Overview](#overview) +- [Prerequisites](#prerequisites) +- [Step 0: Azure and AKS Setup](#step-0-azure-and-aks-setup) +- [Step 1: Deploy DocumentDB with LoadBalancer](#step-1-deploy-documentdb-with-loadbalancer) +- [Step 2: Prepare Azure Key Vault](#step-2-prepare-azure-key-vault) +- [Step 3: Create Server Certificate](#step-3-create-server-certificate) +- [Step 4: Configure SecretProviderClass](#step-4-configure-secretproviderclass) +- [Step 5: Switch to Provided TLS Mode](#step-5-switch-to-provided-tls-mode) +- [Step 6: Validate Connectivity](#step-6-validate-connectivity) +- [Troubleshooting](#troubleshooting) +- [Cleanup](#cleanup) + +## Overview + +### What You'll Do + +1. Create or reuse a DocumentDB cluster exposed via LoadBalancer +2. Mint a server certificate in Azure Key Vault for the service's SNI host (`.sslip.io`) +3. Use a SecretProviderClass to sync the AKV certificate into a Kubernetes TLS secret +4. Switch DocumentDB to `spec.tls.gateway.mode: Provided` and point it at that secret +5. Connect with mongosh using the provided certificate + +### Architecture + +``` +Azure Key Vault (Certificate) + โ†“ +SecretProviderClass (CSI Driver) + โ†“ +Kubernetes TLS Secret + โ†“ +DocumentDB Gateway (TLS Enabled) + โ†“ +mongosh Client (TLS Connection) +``` + +## Prerequisites + +### Required Permissions + +- **Azure subscription** with Owner or Contributor permissions +- Ability to create resource groups, AKS clusters, and Key Vaults +- RBAC permissions to assign roles + +### Required Tools + +Ensure these tools are installed on your machine: + +```bash +# Check versions +az --version # Azure CLI +kubectl version # Kubernetes CLI +helm version # Helm 3.x +mongosh --version # MongoDB Shell +openssl version # OpenSSL +``` + +**Installation guides:** +- [Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli) +- [kubectl](https://kubernetes.io/docs/tasks/tools/) +- [Helm](https://helm.sh/docs/intro/install/) +- [mongosh](https://www.mongodb.com/docs/mongodb-shell/install/) + +### Login to Azure + +```bash +# Login +az login + +# List subscriptions +az account list --output table + +# Set subscription +az account set --subscription +``` + +## Environment Variables + +Set these variables for your environment: + +```bash +export SUFFIX="$(date +%m%d%H)" +export SUBSCRIPTION_ID="" +export LOCATION="eastus2" +export RG="documentdb-aks-${SUFFIX}-rg" +export AKS_NAME="documentdb-aks-${SUFFIX}" +export KV_NAME="${USER}-AKV-${SUFFIX}" +export NS="documentdb-preview-ns" +export DOCDB_NAME="documentdb-preview" +export CERT_NAME="documentdb-gateway" +export SECRET_NAME="documentdb-provided-tls" + +# Display configuration +echo "Configuration:" +echo " Suffix: $SUFFIX" +echo " Subscription: $SUBSCRIPTION_ID" +echo " Location: $LOCATION" +echo " Resource Group: $RG" +echo " AKS Cluster: $AKS_NAME" +echo " Key Vault: $KV_NAME" +echo " Namespace: $NS" +echo " DocumentDB: $DOCDB_NAME" +echo " Certificate: $CERT_NAME" +echo " Secret: $SECRET_NAME" +``` + +## Step 0: Azure and AKS Setup + +### 0.1 Create Resource Group + +```bash +az group create -n "$RG" -l "$LOCATION" +``` + +### 0.2 Create AKS Cluster + +Create AKS with managed identity: + +```bash +az aks create \ + -g "$RG" \ + -n "$AKS_NAME" \ + -l "$LOCATION" \ + --enable-managed-identity \ + --node-count 3 \ + -s Standard_d8s_v5 \ + --generate-ssh-keys +``` + +**Expected duration**: ~10-15 minutes + +### 0.3 Get Cluster Credentials + +```bash +az aks get-credentials -g "$RG" -n "$AKS_NAME" --overwrite-existing +``` + +### 0.4 Verify Cluster Connectivity + +```bash +kubectl config current-context +kubectl cluster-info +kubectl get nodes -o wide +``` + +**Expected output**: All nodes should be in `Ready` state. + +### 0.5 Install cert-manager + +Install cert-manager with CRDs: + +```bash +# Add Helm repository +helm repo add jetstack https://charts.jetstack.io +helm repo update + +# Create namespace +kubectl create namespace cert-manager --dry-run=client -o yaml | kubectl apply -f - + +# Install cert-manager +helm upgrade --install cert-manager jetstack/cert-manager \ + --namespace cert-manager \ + --set installCRDs=true + +# Verify installation +kubectl -n cert-manager get pods +``` + +**Expected output**: 3 pods running (cert-manager, cainjector, webhook) + +**Troubleshooting**: If you see "Kubernetes cluster unreachable" errors: +```bash +az account set --subscription "$SUBSCRIPTION_ID" +az aks get-credentials -g "$RG" -n "$AKS_NAME" --overwrite-existing +kubectl cluster-info && kubectl get nodes +``` + +### 0.6 Install Secrets Store CSI Driver with Azure Provider + +> **Important**: Do not mix with the AKS managed add-on. If the add-on is enabled, disable it first or skip this Helm installation. + +Check for existing add-on: +```bash +kubectl -n kube-system get ds | grep -E 'aks-secrets-store-provider-azure' && \ + echo "โš ๏ธ AKS add-on detected; disable it or skip Helm" || \ + echo "โœ… No add-on detected; proceed with Helm" +``` + +Install the CSI driver with Azure provider: +```bash +# Add Helm repository +helm repo add csi-azure https://azure.github.io/secrets-store-csi-driver-provider-azure/charts +helm repo update + +# Install with secret sync enabled +helm upgrade --install csi-azure-provider \ + csi-azure/csi-secrets-store-provider-azure \ + -n kube-system \ + --set "secrets-store-csi-driver.syncSecret.enabled=true" + +# Verify installation +kubectl -n kube-system get pods -l app=secrets-store-csi-driver +kubectl -n kube-system get pods -l app=csi-secrets-store-provider-azure + +# Wait for pods to be ready +kubectl -n kube-system wait --for=condition=Ready \ + pod -l app=secrets-store-csi-driver --timeout=120s +kubectl -n kube-system wait --for=condition=Ready \ + pod -l app=csi-secrets-store-provider-azure --timeout=120s + +# Verify DaemonSets +kubectl -n kube-system get ds -l app=secrets-store-csi-driver -o wide +kubectl -n kube-system get ds -l app=csi-secrets-store-provider-azure -o wide +``` + +**Important**: The SecretProviderClass `spec.provider` must be `azure` (lowercase), and any pod mounting it must set `volumeAttributes.secretProviderClass` to match the SecretProviderClass name. + +### 0.7 Deploy DocumentDB Operator + +> **Note**: This guide assumes you're using the DocumentDB operator Helm chart from the repository. + +```bash +# Create namespace +kubectl create namespace documentdb-operator --dry-run=client -o yaml | kubectl apply -f - + +# Install operator (adjust image references as needed) +cd /path/to/operator/documentdb-helm-chart +helm upgrade --install documentdb-operator . \ + -n documentdb-operator \ + --set documentDbVersion="16" + +# Verify installation +kubectl -n documentdb-operator get pods +``` + +**Why override `documentDbVersion`?** The Helm chart defaults to `0.1.0`, and the operator uses that value when selecting the gateway image. Without this override, the CNPG pod attempts to pull `ghcr.io/microsoft/documentdb/documentdb-local:0.1.0`, which doesn't exist. + +## Step 1: Deploy DocumentDB with LoadBalancer + +### 1.1 Create Namespace and Credentials + +```bash +# Create namespace +kubectl create namespace "$NS" --dry-run=client -o yaml | kubectl apply -f - + +# Create credentials secret +kubectl -n "$NS" create secret generic documentdb-credentials \ + --from-literal=username="docdbuser" \ + --from-literal=password="P@ssw0rd123" \ + --dry-run=client -o yaml | kubectl apply -f - +``` + +### 1.2 Deploy DocumentDB with SelfSigned TLS (Temporary) + +We'll start with SelfSigned mode to get a LoadBalancer IP, then switch to Provided mode. + +Create the DocumentDB manifest: +```bash +cat > /tmp/documentdb-selfsigned.yaml <<'EOF' +apiVersion: db.microsoft.com/v1 +kind: DocumentDB +metadata: + name: documentdb-preview + namespace: documentdb-preview-ns +spec: + version: "16" + instances: 1 + storage: + size: 10Gi + exposeViaService: + serviceType: LoadBalancer + tls: + mode: SelfSigned +EOF + +kubectl apply -f /tmp/documentdb-selfsigned.yaml +``` + +**Explanation**: +- `version: "16"`: PostgreSQL version (DocumentDB is built on PostgreSQL) +- `instances: 1`: Single instance for testing +- `exposeViaService.serviceType: LoadBalancer`: Exposes service externally +- `tls.mode: SelfSigned`: Temporary mode, will switch to Provided later + +### 1.3 Wait for Service and Capture IP + +Monitor service creation: +```bash +kubectl -n "$NS" get svc -w +``` + +Press `Ctrl+C` when the `EXTERNAL-IP` appears, then capture it: +```bash +export SVC_IP=$(kubectl -n "$NS" get svc documentdb-service-"$DOCDB_NAME" \ + -o jsonpath='{.status.loadBalancer.ingress[0].ip}') +export SNI_HOST="${SVC_IP}.sslip.io" + +echo "Service IP: $SVC_IP" +echo "SNI Host: $SNI_HOST" +``` + +**What is sslip.io?** It's a DNS service that maps `.sslip.io` to ``, providing a hostname for the IP address. This is necessary because TLS certificates require a hostname, not just an IP. + +### 1.4 Verify DocumentDB Status + +```bash +kubectl -n "$NS" get documentdb "$DOCDB_NAME" +kubectl -n "$NS" get documentdb "$DOCDB_NAME" -o jsonpath='{.status.tls}' | jq +``` + +**Expected output**: +- Status should show "Cluster in healthy state" +- TLS status should show `ready: true` and `mode: SelfSigned` + +## Step 2: Prepare Azure Key Vault + +### 2.1 Create Key Vault + +Create a Key Vault with RBAC authorization: + +```bash +az keyvault create \ + -g "$RG" \ + -n "$KV_NAME" \ + -l "$LOCATION" \ + --enable-rbac-authorization true +``` + +### 2.2 Assign RBAC Roles + +**For your user account** (to create/import certificates): +```bash +# Get your user object ID +USER_OBJECT_ID=$(az ad signed-in-user show --query id -o tsv) + +# Assign Key Vault Certificates Officer role +az role assignment create \ + --role "Key Vault Certificates Officer" \ + --assignee "$USER_OBJECT_ID" \ + --scope "/subscriptions/$SUBSCRIPTION_ID/resourceGroups/$RG/providers/Microsoft.KeyVault/vaults/$KV_NAME" +``` + +**For AKS kubelet identity** (to read secrets): +```bash +# Get kubelet managed identity +KUBELET_OBJECT_ID=$(az aks show \ + -g "$RG" \ + -n "$AKS_NAME" \ + --query identityProfile.kubeletidentity.objectId \ + -o tsv) + +# Assign Key Vault Secrets User role +az role assignment create \ + --role "Key Vault Secrets User" \ + --assignee "$KUBELET_OBJECT_ID" \ + --scope "/subscriptions/$SUBSCRIPTION_ID/resourceGroups/$RG/providers/Microsoft.KeyVault/vaults/$KV_NAME" +``` + +**Verify role assignments**: +```bash +az role assignment list --scope "/subscriptions/$SUBSCRIPTION_ID/resourceGroups/$RG/providers/Microsoft.KeyVault/vaults/$KV_NAME" --output table +``` + +## Step 3: Create Server Certificate + +You have two options for creating a certificate: + +### Option A: Import Existing PFX Certificate + +If you already have a certificate for `$SNI_HOST`: + +```bash +export PFX_PATH=/path/to/cert_${SNI_HOST}.pfx +export PFX_PASSWORD="" + +az keyvault certificate import \ + --vault-name "$KV_NAME" \ + -n "$CERT_NAME" \ + --file "$PFX_PATH" \ + --password "$PFX_PASSWORD" +``` + +**Important**: The certificate's Subject Alternative Name (SAN) must include `$SNI_HOST` (e.g., `20.1.2.3.sslip.io`). If it doesn't, strict hostname verification will fail. + +### Option B: Create Self-Signed Certificate in Key Vault (Recommended for Testing) + +Create a certificate policy with proper SAN configuration: + +```bash +cat > /tmp/akv-cert-policy.json <..cloudapp.azure.com`) and create a certificate for that stable name. + +### Verify Certificate + +```bash +az keyvault certificate show \ + --vault-name "$KV_NAME" \ + -n "$CERT_NAME" \ + --query "{enabled: attributes.enabled, created: attributes.created, expires: attributes.expires}" \ + -o table +``` + +## Step 4: Configure SecretProviderClass + +### 4.1 Create SecretProviderClass + +The SecretProviderClass tells the CSI driver how to sync the certificate from Azure Key Vault to a Kubernetes secret. + +```bash +cat > /tmp/azure-secret-provider-class.yaml < /tmp/busybox-cert-puller.yaml <` +- Subject Alternative Name: `DNS:` +- Validity dates + +## Step 5: Switch to Provided TLS Mode + +### 5.1 Patch DocumentDB Resource + +Update the DocumentDB CR to use Provided TLS mode: + +```bash +kubectl -n "$NS" patch documentdb "$DOCDB_NAME" --type merge -p "$(cat < | grep -A 20 "gateway" +``` + +## Step 6: Validate Connectivity + +### 6.1 Get Credentials + +```bash +export DOCDB_USER=$(kubectl -n "$NS" get secret documentdb-credentials \ + -o jsonpath='{.data.username}' | base64 -d) +export DOCDB_PASS=$(kubectl -n "$NS" get secret documentdb-credentials \ + -o jsonpath='{.data.password}' | base64 -d) + +echo "Username: $DOCDB_USER" +echo "Password: $DOCDB_PASS" +``` + +### 6.2 Build CA File from Server + +For self-signed certificates or ad-hoc testing, extract the certificate chain: + +```bash +openssl s_client -connect "$SNI_HOST:10260" -servername "$SNI_HOST" -showcerts /dev/null | awk '/BEGIN CERTIFICATE/,/END CERTIFICATE/ {print}' > /tmp/ca.crt + +# Verify CA file +openssl x509 -in /tmp/ca.crt -text -noout | grep -E "Subject:|Issuer:|DNS:" +``` + +### 6.3 Test Connection - Strict TLS + +With SAN properly configured, strict TLS should work: + +```bash +mongosh "mongodb://$SNI_HOST:10260/?directConnection=true&authMechanism=SCRAM-SHA-256&tls=true&replicaSet=rs0" \ + --tlsCAFile /tmp/ca.crt \ + -u "$DOCDB_USER" \ + -p "$DOCDB_PASS" \ + --eval 'db.runCommand({ ping: 1 })' +``` + +**Expected output**: `{ ok: 1 }` + +### 6.4 Test Connection - Relaxed Hostname Verification + +If SAN doesn't match, use relaxed verification temporarily: + +```bash +mongosh "mongodb://$SNI_HOST:10260/?directConnection=true&authMechanism=SCRAM-SHA-256&tls=true&replicaSet=rs0&tlsAllowInvalidHostnames=true" \ + --tlsCAFile /tmp/ca.crt \ + -u "$DOCDB_USER" \ + -p "$DOCDB_PASS" \ + --eval 'db.runCommand({ ping: 1 })' +``` + +### 6.5 Perform CRUD Operations + +Once connected, test database operations: + +```javascript +// Connect interactively +mongosh "mongodb://$SNI_HOST:10260/?directConnection=true&authMechanism=SCRAM-SHA-256&tls=true&replicaSet=rs0" \ + --tlsCAFile /tmp/ca.crt \ + -u "$DOCDB_USER" \ + -p "$DOCDB_PASS" + +// Inside mongosh: +use testdb + +// Insert +db.products.insertMany([ + { name: "Laptop", price: 999.99, category: "Electronics" }, + { name: "Mouse", price: 29.99, category: "Electronics" }, + { name: "Desk", price: 299.99, category: "Furniture" } +]) + +// Read +db.products.find() +db.products.find({ category: "Electronics" }) + +// Update +db.products.updateOne( + { name: "Laptop" }, + { $set: { price: 899.99 } } +) + +// Verify update +db.products.find({ name: "Laptop" }) + +// Delete +db.products.deleteOne({ name: "Mouse" }) + +// Count +db.products.count() + +// Cleanup +db.products.drop() +``` + +## Troubleshooting + +### Secret Not Created + +**Symptom**: Secret `$SECRET_NAME` doesn't exist after creating cert-puller pod. + +**Diagnosis**: +```bash +# Check puller pod status +kubectl -n "$NS" get pods -l app=cert-puller +kubectl -n "$NS" describe pod -l app=cert-puller + +# Check SecretProviderClass +kubectl -n "$NS" describe secretproviderclass documentdb-azure-tls + +# Check CSI driver logs +kubectl -n kube-system logs -l app=csi-secrets-store-provider-azure --tail=50 +``` + +**Solutions**: +1. Ensure puller pod is running +2. Verify `secretProviderClass` name matches exactly +3. Check pod events for mount errors +4. Verify RBAC permissions on Key Vault + +### Provider Not Found Error + +**Symptom**: Error message `provider not found: provider "azure"` + +**Diagnosis**: +```bash +# Check Azure provider pods +kubectl -n kube-system get pods -l app=csi-secrets-store-provider-azure + +# Check provider registration +kubectl -n kube-system logs -l app=csi-secrets-store-provider-azure | grep -i "provider registered" +``` + +**Solutions**: +1. Ensure Azure provider is installed and running +2. Verify `spec.provider` in SecretProviderClass is `azure` (lowercase, exact case) +3. Verify puller pod's `volumeAttributes.secretProviderClass` matches SPC name +4. Restart CSI driver pods if needed: + ```bash + kubectl -n kube-system rollout restart daemonset csi-secrets-store-provider-azure + ``` + +### Key Vault Permission Denied + +**Symptom**: Errors about "Access denied" or "unauthorized" in pod events or logs. + +**Diagnosis**: +```bash +# Check kubelet identity +az aks show -g "$RG" -n "$AKS_NAME" \ + --query identityProfile.kubeletidentity + +# Check role assignments +az role assignment list \ + --scope "/subscriptions/$SUBSCRIPTION_ID/resourceGroups/$RG/providers/Microsoft.KeyVault/vaults/$KV_NAME" \ + --output table +``` + +**Solutions**: +1. Verify kubelet managed identity has "Key Vault Secrets User" role +2. Verify your user account has "Key Vault Certificates Officer" role +3. Wait a few minutes for RBAC to propagate +4. Check if RBAC authorization is enabled on Key Vault: + ```bash + az keyvault show -n "$KV_NAME" --query "properties.enableRbacAuthorization" + ``` + +### DocumentDB TLS Not Ready + +**Symptom**: `status.tls.ready` is `false` or TLS status shows errors. + +**Diagnosis**: +```bash +# Check DocumentDB status +kubectl -n "$NS" get documentdb "$DOCDB_NAME" -o yaml | grep -A 20 "status:" + +# Check operator logs +kubectl -n documentdb-operator logs -l app.kubernetes.io/name=documentdb-operator --tail=100 +``` + +**Solutions**: +1. Verify secret type is `kubernetes.io/tls` +2. Verify secret contains both `tls.crt` and `tls.key` +3. Check `status.tls.message` for specific error +4. Restart operator if needed: + ```bash + kubectl -n documentdb-operator rollout restart deployment documentdb-operator + ``` + +### Certificate SAN Mismatch + +**Symptom**: TLS handshake errors or "hostname doesn't match certificate" errors. + +**Diagnosis**: +```bash +# Check certificate SAN +kubectl -n "$NS" get secret "$SECRET_NAME" \ + -o jsonpath='{.data.tls\.crt}' | base64 -d | \ + openssl x509 -text -noout | grep -A 2 "Subject Alternative Name" + +# Compare with SNI host +echo "Expected: DNS:$SNI_HOST" +``` + +**Solutions**: +1. Ensure certificate SAN includes exact `$SNI_HOST` value +2. Recreate certificate with correct SAN if mismatch +3. Temporarily use `&tlsAllowInvalidHostnames=true` in connection string +4. For production, use stable DNS name instead of sslip.io + +### IMDS Multiple Identity Error + +**Symptom**: Error "Multiple user assigned identities exist" in pod events. + +**Solution**: +```bash +# Get kubelet client ID +KUBELET_CLIENT_ID=$(az aks show \ + -g "$RG" \ + -n "$AKS_NAME" \ + --query identityProfile.kubeletidentity.clientId \ + -o tsv | tr -d '\r') + +# Patch SecretProviderClass +kubectl -n "$NS" patch secretproviderclass documentdb-azure-tls \ + --type merge \ + -p "{\"spec\":{\"parameters\":{\"userAssignedIdentityID\":\"$KUBELET_CLIENT_ID\"}}}" + +# Restart puller pod +kubectl -n "$NS" rollout restart deploy cert-puller +``` + +### Connection Timeout + +**Symptom**: mongosh connection times out. + +**Diagnosis**: +```bash +# Check service +kubectl -n "$NS" get svc + +# Check if IP is assigned +echo "Service IP: $SVC_IP" + +# Test network connectivity +curl -k https://$SNI_HOST:10260 || echo "Connection failed" +``` + +**Solutions**: +1. Verify LoadBalancer service has external IP +2. Check firewall rules +3. Verify gateway pod is running +4. Check gateway logs: + ```bash + kubectl -n "$NS" logs -c gateway-sidecar + ``` + +## Cleanup + +### Remove Test Resources + +```bash +# Delete cert-puller pod +kubectl -n "$NS" delete deploy cert-puller + +# Delete synced secret +kubectl -n "$NS" delete secret "$SECRET_NAME" + +# Delete SecretProviderClass +kubectl -n "$NS" delete secretproviderclass documentdb-azure-tls + +# Delete DocumentDB instance +kubectl -n "$NS" delete documentdb "$DOCDB_NAME" + +# Delete credentials +kubectl -n "$NS" delete secret documentdb-credentials +``` + +### Remove Infrastructure (Optional) + +```bash +# Delete namespace +kubectl delete namespace "$NS" + +# Delete Key Vault +az keyvault delete --name "$KV_NAME" +az keyvault purge --name "$KV_NAME" + +# Delete AKS cluster +az aks delete --resource-group "$RG" --name "$AKS_NAME" --yes --no-wait + +# Delete resource group (removes everything) +az group delete --name "$RG" --yes --no-wait +``` + +## Advanced Topics + +### Certificate Rotation + +To rotate certificates automatically: + +1. **Update SecretProviderClass** with rotation interval: + ```yaml + spec: + parameters: + rotationPollInterval: "2h" # Check every 2 hours + ``` + +2. **Update certificate in Key Vault** (new version) + +3. **CSI driver syncs automatically** based on poll interval + +4. **Restart pods** that load the secret if hot reload is not supported: + ```bash + kubectl -n "$NS" rollout restart deployment cert-puller + ``` + +### Using User-Assigned Managed Identity + +If using a user-assigned managed identity instead of kubelet identity: + +1. Create user-assigned identity: + ```bash + az identity create -g "$RG" -n my-identity + ``` + +2. Assign Key Vault permissions to the identity + +3. Update SecretProviderClass: + ```yaml + parameters: + userAssignedIdentityID: "" + ``` + +### Using CA-Backed Certificates + +For production with a public or corporate CA: + +1. **Create CSR in Key Vault**: + ```bash + az keyvault certificate create \ + --vault-name "$KV_NAME" \ + -n "$CERT_NAME" \ + --policy @policy.json + + az keyvault certificate pending show \ + --vault-name "$KV_NAME" \ + -n "$CERT_NAME" \ + --query csr -o tsv > cert.csr + ``` + +2. **Submit CSR to CA** and get signed certificate + +3. **Merge signed certificate**: + ```bash + az keyvault certificate pending merge \ + --vault-name "$KV_NAME" \ + -n "$CERT_NAME" \ + --file signed-cert.cer + ``` + +### Stable DNS Names + +For production, use stable DNS instead of sslip.io: + +1. **Reserve Public IP**: + ```bash + az network public-ip create \ + -g "$RG" \ + -n my-stable-ip \ + --dns-name my-documentdb \ + --allocation-method Static \ + --sku Standard + ``` + +2. **Get FQDN**: + ```bash + az network public-ip show \ + -g "$RG" \ + -n my-stable-ip \ + --query dnsSettings.fqdn -o tsv + ``` + +3. **Create certificate for FQDN** + +4. **Configure LoadBalancer** to use reserved IP + +## Additional Resources + +- [Main TLS Setup Guide](README.md) - Overview of all TLS modes +- [E2E Testing Guide](E2E-TESTING.md) - Automated testing with scripts +- [Advanced Configuration](../../docs/operator-public-documentation/v1/advanced-configuration/README.md) - Production configurations +- [Azure Key Vault Documentation](https://learn.microsoft.com/en-us/azure/key-vault/) +- [Secrets Store CSI Driver](https://secrets-store-csi-driver.sigs.k8s.io/) +- [Azure Provider](https://azure.github.io/secrets-store-csi-driver-provider-azure/) + +--- + +**Last Updated**: November 2025 +**Tested On**: AKS 1.28+, Secrets Store CSI Driver 1.4+, Azure Key Vault RBAC mode diff --git a/documentdb-playground/tls/README.md b/documentdb-playground/tls/README.md new file mode 100644 index 00000000..6e0d1e94 --- /dev/null +++ b/documentdb-playground/tls/README.md @@ -0,0 +1,487 @@ +# DocumentDB Gateway TLS Setup + +This directory contains automated scripts for setting up DocumentDB with Gateway TLS support on Azure Kubernetes Service (AKS). + +## Overview + +The DocumentDB Kubernetes Operator supports multiple TLS modes for gateway components: +- **SelfSigned**: Operator automatically provisions self-signed certificates via cert-manager +- **Provided**: Use externally provided certificates (e.g., from Azure Key Vault) +- **CertManager**: Use cert-manager with custom issuers + +This setup automates the entire process, including: +- AKS cluster creation with all required addons +- cert-manager installation +- Azure Key Vault setup (for Provided mode) +- Secrets Store CSI driver configuration +- DocumentDB operator deployment +- TLS certificate provisioning and validation + +## Documentation + +- ๐Ÿ“– **[E2E-TESTING.md](E2E-TESTING.md)** - Comprehensive automated and manual E2E testing guide +- ๐Ÿ“˜ **[MANUAL-PROVIDED-MODE-SETUP.md](MANUAL-PROVIDED-MODE-SETUP.md)** - Detailed step-by-step manual guide for Provided TLS mode with Azure Key Vault + +## Quick Start + +### Prerequisites + +Before running the scripts, ensure you have the following tools installed: +- `az` (Azure CLI) - [Installation guide](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli) +- `kubectl` - [Installation guide](https://kubernetes.io/docs/tasks/tools/) +- `helm` - [Installation guide](https://helm.sh/docs/intro/install/) +- `mongosh` (MongoDB Shell) - [Installation guide](https://www.mongodb.com/docs/mongodb-shell/install/) +- `openssl` - Usually pre-installed on macOS/Linux + +You must also have: +- An Azure subscription with Owner permissions +- Authenticated Azure CLI session (`az login`) + +### Create Complete TLS-Enabled Cluster + +Run the following command to create a complete AKS cluster with DocumentDB and TLS enabled: + +```bash +./scripts/create-cluster.sh \ + --suffix myusername \ + --subscription-id +``` + +**What this does:** +1. Creates an AKS cluster with necessary addons (cert-manager, CSI driver) +2. Deploys the DocumentDB operator +3. Sets up both SelfSigned and Provided TLS modes +4. Validates TLS connectivity +5. Provides connection strings for testing + +**Default Configuration:** +- **Location**: `eastus2` +- **Resource Group**: `guanzhou--rg` +- **AKS Cluster**: `guanzhou-` +- **Key Vault**: `ddb-issuer-` +- **Namespace**: `documentdb-preview-ns` +- **DocumentDB Name**: `documentdb-preview` + +### Customize Your Setup + +You can override defaults with additional flags: + +```bash +./scripts/create-cluster.sh \ + --suffix myusername \ + --subscription-id \ + --location westus2 \ + --resource-group my-rg \ + --aks-name my-aks-cluster \ + --keyvault my-keyvault \ + --namespace my-namespace \ + --docdb-name my-documentdb +``` + +### Use Existing Cluster + +If you already have an AKS cluster and want to add DocumentDB with TLS: + +```bash +./scripts/create-cluster.sh \ + --suffix myusername \ + --subscription-id \ + --skip-cluster +``` + +This will skip cluster creation and install DocumentDB components on the current kubectl context. + +### Clean Up Resources + +To delete all resources created by the script: + +```bash +./scripts/delete-cluster.sh \ + --suffix myusername \ + --subscription-id \ + --all +``` + +**Options:** +- `--all`: Delete everything (AKS cluster, resource group, Key Vault) +- `--keep-cluster`: Delete only DocumentDB resources, keep the cluster +- `--keep-keyvault`: Delete cluster but preserve Key Vault + +### Verify TLS Setup + +After the cluster is created, verify TLS connectivity: + +```bash +# Get DocumentDB status +kubectl get documentdb documentdb-preview -n documentdb-preview-ns + +# Check TLS status specifically +kubectl get documentdb documentdb-preview -n documentdb-preview-ns -o jsonpath='{.status.tls}' | jq + +# Connect using mongosh with TLS +mongosh "mongodb://$(kubectl get secret documentdb-credentials -n documentdb-preview-ns -o jsonpath='{.data.username}' | base64 -d):$(kubectl get secret documentdb-credentials -n documentdb-preview-ns -o jsonpath='{.data.password}' | base64 -d)@:10260/?tls=true&tlsAllowInvalidCertificates=true" +``` + +## Scripts Directory + +All TLS setup and testing scripts are located in the `scripts/` directory. + +## ๐Ÿš€ Main Scripts + +### create-cluster.sh + +**Main entry point** - Creates complete TLS-enabled DocumentDB cluster with simplified interface. + +**Common Options:** +- `--suffix `: Unique identifier for resource names (required) +- `--subscription-id `: Azure subscription ID (required) +- `--location `: Azure region (default: eastus2) +- `--resource-group `: Resource group name (default: guanzhou--rg) +- `--aks-name `: AKS cluster name (default: guanzhou-) +- `--keyvault `: Key Vault name (default: ddb-issuer-) +- `--namespace `: Kubernetes namespace (default: documentdb-preview-ns) +- `--docdb-name `: DocumentDB resource name (default: documentdb-preview) +- `--skip-cluster`: Skip AKS cluster creation +- `--github-username `: GitHub username for operator images (optional) +- `--github-token `: GitHub token for private registries (optional) +- `--help`: Show usage information + +**Examples:** + +```bash +# Minimal setup +./scripts/create-cluster.sh --suffix demo --subscription-id abc123 + +# Production setup with custom names +./scripts/create-cluster.sh \ + --suffix prod \ + --subscription-id abc123 \ + --location westus2 \ + --resource-group documentdb-prod-rg \ + --aks-name documentdb-prod-aks + +# Development setup on existing cluster +./scripts/create-cluster.sh \ + --suffix dev \ + --subscription-id abc123 \ + --skip-cluster +``` + +### delete-cluster.sh + +**Cleanup script** - Removes resources with flexible cleanup options. + +**Options:** +- `--suffix `: Suffix used during creation (required) +- `--subscription-id `: Azure subscription ID (required) +- `--all`: Delete all resources including cluster and resource group +- `--keep-cluster`: Delete DocumentDB resources but keep the AKS cluster +- `--keep-keyvault`: Delete cluster but preserve Key Vault +- `--resource-group `: Override resource group name +- `--aks-name `: Override AKS cluster name +- `--keyvault `: Override Key Vault name +- `--namespace `: Override namespace +- `--help`: Show usage information + +**Examples:** + +```bash +# Delete everything +./scripts/delete-cluster.sh --suffix demo --subscription-id abc123 --all + +# Delete only DocumentDB, keep cluster for reuse +./scripts/delete-cluster.sh --suffix demo --subscription-id abc123 --keep-cluster + +# Delete cluster but preserve Key Vault data +./scripts/delete-cluster.sh --suffix demo --subscription-id abc123 --keep-keyvault +``` + +## ๐Ÿ”ง Core Scripts + +### gateway-tls-e2e.sh + +**Comprehensive E2E script** - Handles full lifecycle from infrastructure to validation. +- Creates AKS cluster with all prerequisites +- Deploys operator and DocumentDB +- Tests both SelfSigned and Provided TLS modes +- Validates connectivity + +**Used by**: `create-cluster.sh` (wrapper for simplified interface) + +**Direct usage** (for advanced control): +```bash +./scripts/gateway-tls-e2e.sh --suffix test --location eastus2 +``` + +## ๐Ÿ” TLS Configuration Scripts + +### setup-selfsigned-gateway-tls.sh + +**SelfSigned TLS mode** - Configure cert-manager with self-signed issuer (requires existing cluster and operator). + +```bash +./scripts/setup-selfsigned-gateway-tls.sh --namespace my-ns --docdb-name my-db +``` + +### setup-documentdb-akv.sh + +**Azure Key Vault setup** - Create and configure AKV for certificates. + +```bash +./scripts/setup-documentdb-akv.sh --suffix test --keyvault my-kv --resource-group my-rg +``` + +### documentdb-provided-mode-setup.sh + +**Provided TLS mode** - Configure DocumentDB to use external certificates. + +```bash +./scripts/documentdb-provided-mode-setup.sh \ + --namespace my-ns \ + --docdb-name my-db \ + --secret-name my-tls-secret +``` + +## โœ… Validation Scripts + +### tls-connectivity-check.sh + +**TLS validation** - Verify TLS configuration and connectivity. + +```bash +./scripts/tls-connectivity-check.sh --namespace my-ns --docdb-name my-db +``` + +## ๐Ÿ“‹ Script Workflow + +### Standard E2E Test Flow +``` +create-cluster.sh + โ†“ +gateway-tls-e2e.sh + โ”œโ”€โ”€ Create AKS cluster + โ”œโ”€โ”€ Install cert-manager + โ”œโ”€โ”€ Install CSI driver + โ”œโ”€โ”€ setup-documentdb-akv.sh + โ”œโ”€โ”€ Deploy operator + โ”œโ”€โ”€ setup-selfsigned-gateway-tls.sh + โ”œโ”€โ”€ tls-connectivity-check.sh + โ”œโ”€โ”€ documentdb-provided-mode-setup.sh + โ””โ”€โ”€ tls-connectivity-check.sh +``` + +### Cleanup Flow +``` +delete-cluster.sh + โ”œโ”€โ”€ Delete Kubernetes resources + โ”œโ”€โ”€ Delete namespace + โ”œโ”€โ”€ (Optional) Delete AKS cluster + โ””โ”€โ”€ (Optional) Delete resource group +``` + +## ๐ŸŽฏ Common Use Cases + +### First Time Setup +```bash +./scripts/create-cluster.sh --suffix myname --subscription-id +``` + +### Using Existing Cluster +```bash +./scripts/create-cluster.sh --suffix myname --subscription-id --skip-cluster +``` + +### Test SelfSigned Mode Only +```bash +# Assumes cluster and operator exist +./scripts/setup-selfsigned-gateway-tls.sh --namespace my-ns --docdb-name my-db +./scripts/tls-connectivity-check.sh --namespace my-ns --docdb-name my-db +``` + +### Test Provided Mode Only +```bash +# Setup Key Vault +./scripts/setup-documentdb-akv.sh --suffix test --keyvault my-kv --resource-group my-rg + +# Configure DocumentDB +./scripts/documentdb-provided-mode-setup.sh --namespace my-ns --docdb-name my-db --secret-name provided-tls + +# Validate +./scripts/tls-connectivity-check.sh --namespace my-ns --docdb-name my-db +``` + +### Complete Cleanup +```bash +./scripts/delete-cluster.sh --suffix myname --subscription-id --all +``` + +### Partial Cleanup (Keep Cluster) +```bash +./scripts/delete-cluster.sh --suffix myname --subscription-id --keep-cluster +``` + +## ๐Ÿ› ๏ธ All Scripts Summary + +| Script | Purpose | Duration | Dependencies | +|--------|---------|----------|--------------| +| `create-cluster.sh` | Main entry point (wrapper) | ~25-30 min | None | +| `delete-cluster.sh` | Cleanup | ~5-10 min | None | +| `gateway-tls-e2e.sh` | Full E2E setup (core) | ~25-30 min | Azure CLI, kubectl | +| `setup-selfsigned-gateway-tls.sh` | SelfSigned mode | ~2-3 min | Existing cluster | +| `setup-documentdb-akv.sh` | Key Vault setup | ~3-5 min | Azure CLI | +| `documentdb-provided-mode-setup.sh` | Provided mode | ~2-3 min | Existing cluster | +| `tls-connectivity-check.sh` | Validation | ~1-2 min | mongosh | + +## ๐Ÿ” Getting Help + +Each script supports `--help`: +```bash +./scripts/create-cluster.sh --help +./scripts/delete-cluster.sh --help +``` + +For detailed E2E testing workflows, see **[E2E-TESTING.md](E2E-TESTING.md)**. + +--- + +## TLS Modes Explained + +### SelfSigned Mode + +The operator automatically creates: +- A self-signed ClusterIssuer via cert-manager +- A Certificate resource for the gateway +- A Kubernetes secret with the TLS certificate + +**Use Case**: Development, testing, or internal environments where self-signed certificates are acceptable. + +**Configuration Example:** +```yaml +apiVersion: db.microsoft.com/v1 +kind: DocumentDB +metadata: + name: documentdb-preview + namespace: documentdb-preview-ns +spec: + tls: + gateway: + mode: SelfSigned +``` + +### Provided Mode + +You provide an existing certificate from an external source (e.g., Azure Key Vault, Let's Encrypt, enterprise CA). + +**Use Case**: Production environments with existing PKI infrastructure or certificate management systems. + +**Configuration Example:** +```yaml +apiVersion: db.microsoft.com/v1 +kind: DocumentDB +metadata: + name: documentdb-preview + namespace: documentdb-preview-ns +spec: + tls: + gateway: + mode: Provided + provided: + secretName: my-tls-secret +``` + +### CertManager Mode + +Use cert-manager with your own Issuer or ClusterIssuer (e.g., Let's Encrypt, Venafi). + +**Use Case**: Production environments with automated certificate renewal requirements. + +**Configuration Example:** +```yaml +apiVersion: db.microsoft.com/v1 +kind: DocumentDB +metadata: + name: documentdb-preview + namespace: documentdb-preview-ns +spec: + tls: + gateway: + mode: CertManager + certManager: + issuerRef: + name: letsencrypt-prod + kind: ClusterIssuer + dnsNames: + - documentdb.example.com +``` + +## Troubleshooting + +### Check TLS Certificate Status + +```bash +# Check DocumentDB TLS status +kubectl get documentdb documentdb-preview -n documentdb-preview-ns -o yaml | grep -A 10 "tls:" + +# Check cert-manager certificates +kubectl get certificates -n documentdb-preview-ns + +# Check certificate details +kubectl describe certificate -n documentdb-preview-ns + +# Check TLS secret +kubectl get secret -n documentdb-preview-ns -o yaml +``` + +### Verify Gateway Pod Configuration + +```bash +# Check gateway sidecar logs +kubectl logs -n documentdb-preview-ns -c gateway-sidecar + +# Verify TLS secret is mounted +kubectl describe pod -n documentdb-preview-ns | grep -A 5 "Mounts:" +``` + +### Common Issues + +**Certificate Not Ready:** +```bash +# Check cert-manager logs +kubectl logs -n cert-manager deployment/cert-manager + +# Check certificate status +kubectl describe certificate -n documentdb-preview-ns +``` + +**Azure Key Vault Access Issues:** +```bash +# Verify managed identity binding +kubectl get azureidentitybinding -n documentdb-preview-ns + +# Check CSI driver logs +kubectl logs -n kube-system -l app=secrets-store-csi-driver +``` + +**Connection Issues:** +```bash +# Test TLS connectivity +openssl s_client -connect :10260 -servername documentdb-preview + +# Verify service external IP +kubectl get svc -n documentdb-preview-ns +``` + +## Additional Resources + +- [E2E Testing Guide](E2E-TESTING.md) - Comprehensive automated and manual testing procedures +- [Advanced Configuration](../../docs/operator-public-documentation/v1/advanced-configuration/README.md) - Production configurations and best practices +- [DocumentDB Operator Documentation](https://microsoft.github.io/documentdb-kubernetes-operator) - Complete operator documentation +- [cert-manager Documentation](https://cert-manager.io/docs/) - Certificate management +- [Azure Key Vault CSI Driver](https://azure.github.io/secrets-store-csi-driver-provider-azure/) - Azure secrets integration + +## Support + +For issues or questions: +- Create an [issue](https://github.com/microsoft/documentdb-kubernetes-operator/issues) +- Check [documentation](https://microsoft.github.io/documentdb-kubernetes-operator) +- Review [E2E Testing Guide](E2E-TESTING.md#troubleshooting) for troubleshooting diff --git a/documentdb-playground/tls/scripts/create-cluster.sh b/documentdb-playground/tls/scripts/create-cluster.sh new file mode 100755 index 00000000..fbb2516a --- /dev/null +++ b/documentdb-playground/tls/scripts/create-cluster.sh @@ -0,0 +1,292 @@ +#!/usr/bin/env bash + +####################################### +# DocumentDB TLS Setup - Quick Start Script +# +# This is the main entry point for creating a DocumentDB cluster with TLS support. +# It provides a simplified interface to the comprehensive gateway-tls-e2e.sh script. +# +# Usage: +# ./create-cluster.sh --suffix myname --subscription-id +# +# For full options, run: +# ./create-cluster.sh --help +####################################### + +set -euo pipefail + +# Determine script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# The comprehensive E2E script is in the same directory +E2E_SCRIPT="$SCRIPT_DIR/gateway-tls-e2e.sh" + +# Check if the E2E script exists +if [[ ! -f "$E2E_SCRIPT" ]]; then + echo "Error: Could not find gateway-tls-e2e.sh at: $E2E_SCRIPT" + echo "Please ensure all scripts are present in the scripts directory" + exit 1 +fi + +usage() { + cat <<'EOF' +DocumentDB TLS Setup - Quick Start + +This script creates a complete AKS cluster with DocumentDB operator and TLS support. +It handles everything from infrastructure setup to TLS validation. + +USAGE: + ./create-cluster.sh --suffix --subscription-id [OPTIONS] + +REQUIRED: + --suffix Unique identifier for your resources (e.g., your username) + --subscription-id Azure subscription ID + +OPTIONAL: + --location Azure region (default: eastus2) + --resource-group Resource group name (default: guanzhou--rg) + --aks-name AKS cluster name (default: guanzhou-) + --keyvault Azure Key Vault name (default: ddb-issuer-) + --namespace Kubernetes namespace (default: documentdb-preview-ns) + --docdb-name DocumentDB resource name (default: documentdb-preview) + --github-username GitHub username for operator images (optional) + --github-token GitHub token with read:packages scope (optional) + --skip-cluster Skip AKS cluster creation (use existing cluster) + --help Show this help message + +EXAMPLES: + # Minimal setup - creates everything with defaults + ./create-cluster.sh --suffix demo --subscription-id 12345678-1234-1234-1234-123456789012 + + # Custom region and names + ./create-cluster.sh \ + --suffix prod \ + --subscription-id 12345678-1234-1234-1234-123456789012 \ + --location westus2 \ + --resource-group my-documentdb-rg \ + --aks-name my-aks-cluster + + # Use existing AKS cluster + ./create-cluster.sh \ + --suffix dev \ + --subscription-id 12345678-1234-1234-1234-123456789012 \ + --skip-cluster + +WHAT IT DOES: + 1. โœ“ Creates AKS cluster with required addons (unless --skip-cluster) + 2. โœ“ Installs cert-manager and Secrets Store CSI driver + 3. โœ“ Creates Azure Key Vault for certificate storage + 4. โœ“ Deploys DocumentDB operator with Helm + 5. โœ“ Configures SelfSigned TLS mode and validates connectivity + 6. โœ“ Configures Provided TLS mode (Azure Key Vault) and validates + 7. โœ“ Provides connection strings and testing instructions + +TIME ESTIMATE: + - With cluster creation: ~20-30 minutes + - Without cluster creation: ~10-15 minutes + +CLEANUP: + To delete all resources after testing: + ./delete-cluster.sh --suffix --subscription-id --all + +For detailed documentation, see: + ../README.md + +For E2E testing instructions, see: + ../E2E-TESTING.md +EOF +} + +# Default values +SUFFIX="" +SUBSCRIPTION_ID="" +LOCATION="eastus2" +RESOURCE_GROUP="" +AKS_NAME="" +KEYVAULT_NAME="" +NAMESPACE="documentdb-preview-ns" +DOCDB_NAME="documentdb-preview" +SKIP_CLUSTER=0 +GITHUB_USERNAME="" +GITHUB_TOKEN="" + +# Parse arguments +while [[ $# -gt 0 ]]; do + case "$1" in + --suffix) + SUFFIX="$2" + shift 2 + ;; + --subscription-id) + SUBSCRIPTION_ID="$2" + shift 2 + ;; + --location) + LOCATION="$2" + shift 2 + ;; + --resource-group) + RESOURCE_GROUP="$2" + shift 2 + ;; + --aks-name) + AKS_NAME="$2" + shift 2 + ;; + --keyvault) + KEYVAULT_NAME="$2" + shift 2 + ;; + --namespace) + NAMESPACE="$2" + shift 2 + ;; + --docdb-name) + DOCDB_NAME="$2" + shift 2 + ;; + --github-username) + GITHUB_USERNAME="$2" + shift 2 + ;; + --github-token) + GITHUB_TOKEN="$2" + shift 2 + ;; + --skip-cluster) + SKIP_CLUSTER=1 + shift + ;; + --help|-h) + usage + exit 0 + ;; + *) + echo "Error: Unknown option: $1" + echo "" + usage + exit 1 + ;; + esac +done + +# Validate required parameters +if [[ -z "$SUFFIX" ]]; then + echo "Error: --suffix is required" + echo "" + usage + exit 1 +fi + +if [[ -z "$SUBSCRIPTION_ID" ]]; then + echo "Error: --subscription-id is required" + echo "" + usage + exit 1 +fi + +# Set defaults based on suffix if not provided +if [[ -z "$RESOURCE_GROUP" ]]; then + RESOURCE_GROUP="guanzhou-${SUFFIX}-rg" +fi + +if [[ -z "$AKS_NAME" ]]; then + AKS_NAME="guanzhou-${SUFFIX}" +fi + +if [[ -z "$KEYVAULT_NAME" ]]; then + KEYVAULT_NAME="ddb-issuer-${SUFFIX}" +fi + +# Print configuration +echo "============================================" +echo "DocumentDB TLS Setup - Configuration" +echo "============================================" +echo "Suffix: $SUFFIX" +echo "Subscription: $SUBSCRIPTION_ID" +echo "Location: $LOCATION" +echo "Resource Group: $RESOURCE_GROUP" +echo "AKS Cluster: $AKS_NAME" +echo "Key Vault: $KEYVAULT_NAME" +echo "Namespace: $NAMESPACE" +echo "DocumentDB: $DOCDB_NAME" +echo "Skip Cluster: $([ $SKIP_CLUSTER -eq 1 ] && echo 'Yes' || echo 'No')" +echo "============================================" +echo "" + +# Confirm before proceeding +read -p "Proceed with this configuration? (yes/no): " -r +echo "" +if [[ ! $REPLY =~ ^[Yy]es$ ]]; then + echo "Aborted by user." + exit 0 +fi + +# Build command for the E2E script +CMD=("$E2E_SCRIPT") +CMD+=(--suffix "$SUFFIX") +CMD+=(--location "$LOCATION") +CMD+=(--resource-group "$RESOURCE_GROUP") +CMD+=(--aks-name "$AKS_NAME") +CMD+=(--keyvault "$KEYVAULT_NAME") +CMD+=(--namespace "$NAMESPACE") +CMD+=(--docdb-name "$DOCDB_NAME") + +if [[ $SKIP_CLUSTER -eq 1 ]]; then + CMD+=(--skip-cluster) +fi + +if [[ -n "$GITHUB_USERNAME" ]]; then + CMD+=(--github-username "$GITHUB_USERNAME") +fi + +if [[ -n "$GITHUB_TOKEN" ]]; then + CMD+=(--github-token "$GITHUB_TOKEN") +fi + +# Export subscription ID for az commands +export AZURE_SUBSCRIPTION_ID="$SUBSCRIPTION_ID" +az account set --subscription "$SUBSCRIPTION_ID" 2>/dev/null || { + echo "Error: Failed to set Azure subscription. Please run 'az login' first." + exit 1 +} + +echo "Starting DocumentDB TLS setup..." +echo "This will take approximately 20-30 minutes..." +echo "" + +# Execute the E2E script +"${CMD[@]}" + +EXIT_CODE=$? + +if [[ $EXIT_CODE -eq 0 ]]; then + echo "" + echo "============================================" + echo "โœ“ DocumentDB TLS Setup Complete!" + echo "============================================" + echo "" + echo "Next steps:" + echo "1. Verify TLS status:" + echo " kubectl get documentdb $DOCDB_NAME -n $NAMESPACE -o jsonpath='{.status.tls}' | jq" + echo "" + echo "2. Get connection string:" + echo " kubectl get documentdb $DOCDB_NAME -n $NAMESPACE" + echo "" + echo "3. Test connectivity with mongosh:" + echo " (See output above for specific connection commands)" + echo "" + echo "To clean up resources:" + echo " ./delete-cluster.sh --suffix $SUFFIX --subscription-id $SUBSCRIPTION_ID --all" + echo "" +else + echo "" + echo "============================================" + echo "โœ— Setup encountered errors" + echo "============================================" + echo "" + echo "Please check the logs above for details." + echo "For troubleshooting, see: ../README.md#troubleshooting" + echo "" + exit $EXIT_CODE +fi diff --git a/documentdb-playground/tls/scripts/delete-cluster.sh b/documentdb-playground/tls/scripts/delete-cluster.sh new file mode 100755 index 00000000..3deda740 --- /dev/null +++ b/documentdb-playground/tls/scripts/delete-cluster.sh @@ -0,0 +1,410 @@ +#!/usr/bin/env bash + +####################################### +# DocumentDB TLS Setup - Cleanup Script +# +# This script deletes resources created by create-cluster.sh +# Supports multiple cleanup modes: +# --all: Delete everything (cluster, RG, Key Vault) +# --keep-cluster: Delete only DocumentDB resources +# --keep-keyvault: Delete cluster but preserve Key Vault +# +# Usage: +# ./delete-cluster.sh --suffix myname --subscription-id --all +####################################### + +set -euo pipefail + +usage() { + cat <<'EOF' +DocumentDB TLS Cleanup Script + +This script deletes resources created during DocumentDB TLS setup. +You can choose to delete everything or selectively preserve certain resources. + +USAGE: + ./delete-cluster.sh --suffix --subscription-id [MODE] [OPTIONS] + +REQUIRED: + --suffix The suffix used during cluster creation + --subscription-id Azure subscription ID + +CLEANUP MODES (choose one): + --all Delete everything: AKS, Resource Group, Key Vault, Kubernetes resources + --keep-cluster Delete only DocumentDB/Kubernetes resources, preserve AKS cluster + --keep-keyvault Delete AKS cluster but preserve Key Vault (for certificate reuse) + +OPTIONAL OVERRIDES: + --location Azure region (default: eastus2) + --resource-group Resource group name (default: guanzhou--rg) + --aks-name AKS cluster name (default: guanzhou-) + --keyvault Key Vault name (default: ddb-issuer-) + --namespace Kubernetes namespace (default: documentdb-preview-ns) + --help Show this help message + +EXAMPLES: + # Delete everything (most common) + ./delete-cluster.sh --suffix demo --subscription-id 12345678-1234-1234-1234-123456789012 --all + + # Keep cluster for reuse, delete only DocumentDB + ./delete-cluster.sh --suffix demo --subscription-id 12345678-1234-1234-1234-123456789012 --keep-cluster + + # Delete cluster but preserve Key Vault certificates + ./delete-cluster.sh --suffix demo --subscription-id 12345678-1234-1234-1234-123456789012 --keep-keyvault + + # Delete with custom names + ./delete-cluster.sh \ + --suffix prod \ + --subscription-id 12345678-1234-1234-1234-123456789012 \ + --resource-group my-rg \ + --all + +WHAT GETS DELETED: + --all mode: + โœ— DocumentDB resources (CRDs, pods, services) + โœ— Helm releases (operator, cert-manager) + โœ— Kubernetes namespaces + โœ— AKS cluster + โœ— Azure Resource Group + โœ— Azure Key Vault + + --keep-cluster mode: + โœ— DocumentDB resources + โœ— Helm releases (operator only) + โœ— Kubernetes namespaces + โœ“ AKS cluster (preserved) + โœ“ Resource Group (preserved) + โœ“ Key Vault (preserved) + + --keep-keyvault mode: + โœ— DocumentDB resources + โœ— Helm releases + โœ— Kubernetes namespaces + โœ— AKS cluster + โœ— Resource Group + โœ“ Key Vault (preserved in new RG) + +WARNINGS: + - Deletion is permanent and cannot be undone + - --all mode will delete the entire resource group + - You will be prompted to confirm before deletion + +EOF +} + +# Default values +SUFFIX="" +SUBSCRIPTION_ID="" +LOCATION="eastus2" +RESOURCE_GROUP="" +AKS_NAME="" +KEYVAULT_NAME="" +NAMESPACE="documentdb-preview-ns" + +# Cleanup modes +DELETE_ALL=0 +KEEP_CLUSTER=0 +KEEP_KEYVAULT=0 + +# Parse arguments +while [[ $# -gt 0 ]]; do + case "$1" in + --suffix) + SUFFIX="$2" + shift 2 + ;; + --subscription-id) + SUBSCRIPTION_ID="$2" + shift 2 + ;; + --location) + LOCATION="$2" + shift 2 + ;; + --resource-group) + RESOURCE_GROUP="$2" + shift 2 + ;; + --aks-name) + AKS_NAME="$2" + shift 2 + ;; + --keyvault) + KEYVAULT_NAME="$2" + shift 2 + ;; + --namespace) + NAMESPACE="$2" + shift 2 + ;; + --all) + DELETE_ALL=1 + shift + ;; + --keep-cluster) + KEEP_CLUSTER=1 + shift + ;; + --keep-keyvault) + KEEP_KEYVAULT=1 + shift + ;; + --help|-h) + usage + exit 0 + ;; + *) + echo "Error: Unknown option: $1" + echo "" + usage + exit 1 + ;; + esac +done + +# Validate required parameters +if [[ -z "$SUFFIX" ]]; then + echo "Error: --suffix is required" + echo "" + usage + exit 1 +fi + +if [[ -z "$SUBSCRIPTION_ID" ]]; then + echo "Error: --subscription-id is required" + echo "" + usage + exit 1 +fi + +# Validate cleanup mode +MODE_COUNT=$((DELETE_ALL + KEEP_CLUSTER + KEEP_KEYVAULT)) +if [[ $MODE_COUNT -eq 0 ]]; then + echo "Error: You must specify a cleanup mode: --all, --keep-cluster, or --keep-keyvault" + echo "" + usage + exit 1 +fi + +if [[ $MODE_COUNT -gt 1 ]]; then + echo "Error: Only one cleanup mode can be specified" + echo "" + usage + exit 1 +fi + +# Set defaults based on suffix +if [[ -z "$RESOURCE_GROUP" ]]; then + RESOURCE_GROUP="guanzhou-${SUFFIX}-rg" +fi + +if [[ -z "$AKS_NAME" ]]; then + AKS_NAME="guanzhou-${SUFFIX}" +fi + +if [[ -z "$KEYVAULT_NAME" ]]; then + KEYVAULT_NAME="ddb-issuer-${SUFFIX}" +fi + +# Determine mode description +if [[ $DELETE_ALL -eq 1 ]]; then + MODE_DESC="DELETE EVERYTHING (Cluster, Resource Group, Key Vault)" +elif [[ $KEEP_CLUSTER -eq 1 ]]; then + MODE_DESC="Delete DocumentDB only (Keep AKS cluster)" +elif [[ $KEEP_KEYVAULT -eq 1 ]]; then + MODE_DESC="Delete cluster (Keep Key Vault)" +fi + +# Set Azure subscription +az account set --subscription "$SUBSCRIPTION_ID" 2>/dev/null || { + echo "Error: Failed to set Azure subscription. Please run 'az login' first." + exit 1 +} + +# Print configuration +echo "============================================" +echo "DocumentDB TLS Cleanup - Configuration" +echo "============================================" +echo "Mode: $MODE_DESC" +echo "Suffix: $SUFFIX" +echo "Subscription: $SUBSCRIPTION_ID" +echo "Resource Group: $RESOURCE_GROUP" +echo "AKS Cluster: $AKS_NAME" +echo "Key Vault: $KEYVAULT_NAME" +echo "Namespace: $NAMESPACE" +echo "============================================" +echo "" +echo "โš ๏ธ WARNING: This action cannot be undone!" +echo "" + +# Confirm before proceeding +read -p "Are you sure you want to proceed? Type 'yes' to confirm: " -r +echo "" +if [[ ! $REPLY =~ ^yes$ ]]; then + echo "Aborted by user." + exit 0 +fi + +echo "Starting cleanup..." +echo "" + +# Function to delete Kubernetes resources +delete_k8s_resources() { + echo "โ†’ Deleting Kubernetes resources in namespace: $NAMESPACE" + + # Check if cluster is accessible + if ! kubectl cluster-info &>/dev/null; then + echo " โš ๏ธ Cannot access Kubernetes cluster, skipping K8s cleanup" + return + fi + + # Delete DocumentDB resources + if kubectl get namespace "$NAMESPACE" &>/dev/null; then + echo " โ€ข Deleting DocumentDB instances..." + kubectl delete documentdb --all -n "$NAMESPACE" --ignore-not-found=true --timeout=60s || true + + echo " โ€ข Deleting operator Helm release..." + helm uninstall documentdb-operator -n "$NAMESPACE" 2>/dev/null || true + + echo " โ€ข Deleting namespace..." + kubectl delete namespace "$NAMESPACE" --timeout=60s || true + else + echo " โ€ข Namespace $NAMESPACE not found, skipping" + fi + + echo " โœ“ Kubernetes resources deleted" +} + +# Function to delete cert-manager +delete_cert_manager() { + echo "โ†’ Deleting cert-manager..." + if kubectl get namespace cert-manager &>/dev/null; then + helm uninstall cert-manager -n cert-manager 2>/dev/null || true + kubectl delete namespace cert-manager --timeout=60s || true + echo " โœ“ cert-manager deleted" + else + echo " โ€ข cert-manager not found, skipping" + fi +} + +# Function to move Key Vault to new resource group +preserve_keyvault() { + echo "โ†’ Preserving Key Vault: $KEYVAULT_NAME" + + # Create new resource group for Key Vault + KV_RG="${KEYVAULT_NAME}-preserved-rg" + + if az keyvault show --name "$KEYVAULT_NAME" &>/dev/null; then + echo " โ€ข Creating new resource group: $KV_RG" + az group create --name "$KV_RG" --location "$LOCATION" --output none + + echo " โ€ข Moving Key Vault to new resource group..." + KV_ID=$(az keyvault show --name "$KEYVAULT_NAME" --query id -o tsv) + az resource move --destination-group "$KV_RG" --ids "$KV_ID" || { + echo " โš ๏ธ Failed to move Key Vault, it may be deleted with the resource group" + } + + echo " โœ“ Key Vault preserved in: $KV_RG" + echo " โ„น๏ธ To delete it later: az group delete --name $KV_RG" + else + echo " โ€ข Key Vault not found, nothing to preserve" + fi +} + +# Execute cleanup based on mode +if [[ $DELETE_ALL -eq 1 ]]; then + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "Mode: DELETE ALL" + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "" + + # Get AKS credentials for K8s cleanup + echo "โ†’ Getting AKS credentials..." + az aks get-credentials --resource-group "$RESOURCE_GROUP" --name "$AKS_NAME" --overwrite-existing 2>/dev/null || { + echo " โš ๏ธ Could not get AKS credentials, cluster may not exist" + } + + delete_k8s_resources + + echo "" + echo "โ†’ Deleting Azure Resource Group: $RESOURCE_GROUP" + echo " (This includes AKS cluster, Key Vault, and all other resources)" + + if az group show --name "$RESOURCE_GROUP" &>/dev/null; then + az group delete --name "$RESOURCE_GROUP" --yes --no-wait + echo " โœ“ Resource group deletion initiated (running in background)" + echo " โ„น๏ธ Check status with: az group show --name $RESOURCE_GROUP" + else + echo " โ€ข Resource group not found" + fi + +elif [[ $KEEP_CLUSTER -eq 1 ]]; then + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "Mode: KEEP CLUSTER" + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "" + + # Get AKS credentials + echo "โ†’ Getting AKS credentials..." + az aks get-credentials --resource-group "$RESOURCE_GROUP" --name "$AKS_NAME" --overwrite-existing || { + echo "Error: Could not get AKS credentials" + exit 1 + } + + delete_k8s_resources + + echo "" + echo " โœ“ Cleanup complete" + echo " โ„น๏ธ AKS cluster preserved: $AKS_NAME" + echo " โ„น๏ธ Resource group preserved: $RESOURCE_GROUP" + echo " โ„น๏ธ Key Vault preserved: $KEYVAULT_NAME" + +elif [[ $KEEP_KEYVAULT -eq 1 ]]; then + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "Mode: KEEP KEY VAULT" + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "" + + # Get AKS credentials + echo "โ†’ Getting AKS credentials..." + az aks get-credentials --resource-group "$RESOURCE_GROUP" --name "$AKS_NAME" --overwrite-existing 2>/dev/null || { + echo " โš ๏ธ Could not get AKS credentials" + } + + delete_k8s_resources + + echo "" + preserve_keyvault + + echo "" + echo "โ†’ Deleting Azure Resource Group: $RESOURCE_GROUP" + + if az group show --name "$RESOURCE_GROUP" &>/dev/null; then + az group delete --name "$RESOURCE_GROUP" --yes --no-wait + echo " โœ“ Resource group deletion initiated (running in background)" + else + echo " โ€ข Resource group not found" + fi +fi + +echo "" +echo "============================================" +echo "โœ“ Cleanup Complete" +echo "============================================" +echo "" + +if [[ $DELETE_ALL -eq 1 ]]; then + echo "All resources have been deleted or are being deleted." + echo "Deletion may take several minutes to complete." +elif [[ $KEEP_CLUSTER -eq 1 ]]; then + echo "DocumentDB resources deleted. AKS cluster is ready for reuse." + echo "" + echo "To redeploy DocumentDB:" + echo " ./create-cluster.sh --suffix $SUFFIX --subscription-id $SUBSCRIPTION_ID --skip-cluster" +elif [[ $KEEP_KEYVAULT -eq 1 ]]; then + echo "Cluster deleted. Key Vault preserved for certificate reuse." + echo "" + echo "Key Vault location: $KV_RG" +fi + +echo "" diff --git a/documentdb-playground/tls/scripts/documentdb-provided-mode-setup.sh b/documentdb-playground/tls/scripts/documentdb-provided-mode-setup.sh new file mode 100755 index 00000000..c7c8d115 --- /dev/null +++ b/documentdb-playground/tls/scripts/documentdb-provided-mode-setup.sh @@ -0,0 +1,407 @@ +#!/usr/bin/env bash + +set -euo pipefail + +usage() { + cat <<'EOF' +Usage: documentdb-provided-mode-setup.sh [options] + +Creates or updates the resources required to run the DocumentDB gateway in +Provided TLS mode, assuming the Azure Key Vault and certificate already exist. +The script wires up the Secrets Store CSI plumbing, manages the DocumentDB +manifest, and performs an optional mongosh connectivity check. + +Options: + -g, --resource-group Azure resource group with the Key Vault and AKS cluster (required) + --aks-name Azure Kubernetes Service cluster name (required) + --location Azure region (retained for backward compatibility) + --keyvault Azure Key Vault name (required) + --cert-name Azure Key Vault certificate name (default: documentdb-gateway) + --sni-host Hostname embedded in the certificate and used for TLS/SNI (required) + --namespace DocumentDB namespace (default: documentdb-preview-ns) + --docdb-name DocumentDB resource name (default: documentdb-preview) + --docdb-version DocumentDB version (default: 16) + --secret-name K8s secret with gateway credentials (default: documentdb-credentials) + --username Gateway username (default: docdbuser) + --password Gateway password (default: P@ssw0rd123) + --provided-secret K8s TLS secret synced from Key Vault (default: documentdb-provided-tls) + --spc-name SecretProviderClass name (default: documentdb-azure-tls) + --pvc-size Volume size for DocumentDB (default: 10Gi) + --storage-class StorageClass for DocumentDB PVCs (optional) + --user-assigned-client Kubelet user-assigned managed identity clientId (optional) + --skip-cert-manager Skip cert-manager install/upgrade + --skip-csi-install Skip installing the CSI driver/provider (assume already present) + --timeout Timeout for TLS readiness (default: 900) + --skip-mongosh Skip mongosh connectivity test + -h, --help Show this help text +EOF +} + +RESOURCE_GROUP="" +AKS_NAME="" +LOCATION="" +KEYVAULT_NAME="" +CERT_NAME="documentdb-gateway" +SNI_HOST="" +NAMESPACE="documentdb-preview-ns" +DOCDB_NAME="documentdb-preview" +DOCDB_VERSION="16" +SECRET_NAME="documentdb-credentials" +SECRET_USER="docdbuser" +SECRET_PASS="P@ssw0rd123" +PROVIDED_SECRET="documentdb-provided-tls" +SPC_NAME="documentdb-azure-tls" +PVC_SIZE="10Gi" +STORAGE_CLASS="" +USER_ASSIGNED_CLIENT="" +INSTALL_CERT_MANAGER=1 +INSTALL_CSI=1 +TIMEOUT=900 +RUN_MONGOSH=1 + +while [[ $# -gt 0 ]]; do + case "$1" in + -g|--resource-group) + RESOURCE_GROUP="$2"; shift 2 ;; + --aks-name) + AKS_NAME="$2"; shift 2 ;; + --location) + LOCATION="$2"; shift 2 ;; + --keyvault) + KEYVAULT_NAME="$2"; shift 2 ;; + --cert-name) + CERT_NAME="$2"; shift 2 ;; + --sni-host) + SNI_HOST="$2"; shift 2 ;; + --namespace) + NAMESPACE="$2"; shift 2 ;; + --docdb-name) + DOCDB_NAME="$2"; shift 2 ;; + --docdb-version) + DOCDB_VERSION="$2"; shift 2 ;; + --secret-name) + SECRET_NAME="$2"; shift 2 ;; + --username) + SECRET_USER="$2"; shift 2 ;; + --password) + SECRET_PASS="$2"; shift 2 ;; + --provided-secret) + PROVIDED_SECRET="$2"; shift 2 ;; + --spc-name) + SPC_NAME="$2"; shift 2 ;; + --pvc-size) + PVC_SIZE="$2"; shift 2 ;; + --storage-class) + STORAGE_CLASS="$2"; shift 2 ;; + --user-assigned-client) + USER_ASSIGNED_CLIENT="$2"; shift 2 ;; + --skip-cert-manager) + INSTALL_CERT_MANAGER=0; shift ;; + --skip-csi-install) + INSTALL_CSI=0; shift ;; + --timeout) + TIMEOUT="$2"; shift 2 ;; + --skip-mongosh) + RUN_MONGOSH=0; shift ;; + -h|--help) + usage; exit 0 ;; + *) + echo "Unknown argument: $1" >&2 + usage; exit 1 ;; + esac +done + +if [[ -z "$RESOURCE_GROUP" || -z "$AKS_NAME" || -z "$KEYVAULT_NAME" || -z "$SNI_HOST" ]]; then + echo "--resource-group, --aks-name, --keyvault, and --sni-host are required" >&2 + usage + exit 1 +fi + +sanitize_id() { + printf '%s' "$1" | tr -d '\r\n' +} + +for bin in az kubectl helm jq mongosh; do + if ! command -v "$bin" >/dev/null 2>&1; then + echo "Required command '$bin' not found" >&2 + exit 1 + fi +done + +ensure_operator_ready() { + local operator_namespace="documentdb-operator" + local operator_deployment="documentdb-operator" + + if ! kubectl get deployment -n "$operator_namespace" "$operator_deployment" >/dev/null 2>&1; then + echo "DocumentDB operator deployment not found in namespace '$operator_namespace'." >&2 + echo "Install the operator per docs/gateway-tls-validation.md step 1.11 before running this script." >&2 + exit 1 + fi + + if ! kubectl -n "$operator_namespace" rollout status deployment "$operator_deployment" --timeout=300s >/dev/null 2>&1; then + echo "DocumentDB operator deployment is not ready. Wait for the operator pods and retry." >&2 + exit 1 + fi +} + +if ! az account show >/dev/null 2>&1; then + echo "Azure CLI not logged in. Run 'az login' first." >&2 + exit 1 +fi + +verify_keyvault_assets() { + if ! az keyvault show -n "$KEYVAULT_NAME" -g "$RESOURCE_GROUP" >/dev/null 2>&1; then + echo "Key Vault $KEYVAULT_NAME not found in resource group $RESOURCE_GROUP" >&2 + exit 1 + fi + if ! az keyvault certificate show --vault-name "$KEYVAULT_NAME" -n "$CERT_NAME" >/dev/null 2>&1; then + echo "Certificate $CERT_NAME not found in Key Vault $KEYVAULT_NAME" >&2 + exit 1 + fi +} + +ensure_csi_driver() { + if [[ "$INSTALL_CSI" -eq 0 ]]; then + echo "Skipping CSI driver installation" + return + fi + if kubectl -n kube-system get ds secrets-store-csi-driver >/dev/null 2>&1; then + echo "Secrets Store CSI driver already installed" + return + fi + echo "Installing Secrets Store CSI driver + Azure provider" + if ! helm repo list 2>/dev/null | awk 'NR>1 {print $1}' | grep -qx "csi-azure"; then + helm repo add csi-azure https://azure.github.io/secrets-store-csi-driver-provider-azure/charts + fi + helm repo update >/dev/null + helm upgrade --install csi-azure-provider csi-azure/csi-secrets-store-provider-azure -n kube-system \ + --set "secrets-store-csi-driver.syncSecret.enabled=true" >/dev/null + kubectl -n kube-system wait --for=condition=Ready pod -l app=secrets-store-csi-driver --timeout=180s >/dev/null + kubectl -n kube-system wait --for=condition=Ready pod -l app=csi-secrets-store-provider-azure --timeout=180s >/dev/null +} + +ensure_cert_manager() { + if [[ "$INSTALL_CERT_MANAGER" -eq 0 ]]; then + echo "Skipping cert-manager install" + return + fi + if ! helm repo list 2>/dev/null | awk 'NR>1 {print $1}' | grep -qx "jetstack"; then + helm repo add jetstack https://charts.jetstack.io + fi + helm repo update >/dev/null + helm upgrade --install cert-manager jetstack/cert-manager \ + --namespace cert-manager \ + --create-namespace \ + --set installCRDs=true >/dev/null + for deploy in cert-manager cert-manager-cainjector cert-manager-webhook; do + kubectl -n cert-manager rollout status deployment "$deploy" --timeout=180s >/dev/null + done +} + +ensure_namespace_and_secret() { + kubectl create namespace "$NAMESPACE" --dry-run=client -o yaml | kubectl apply -f - >/dev/null + if kubectl -n "$NAMESPACE" get secret "$SECRET_NAME" >/dev/null 2>&1; then + echo "Credentials secret $SECRET_NAME already exists" + else + kubectl -n "$NAMESPACE" create secret generic "$SECRET_NAME" \ + --from-literal=username="$SECRET_USER" \ + --from-literal=password="$SECRET_PASS" + fi +} + +apply_secret_provider_class() { + TENANT_ID=$(sanitize_id "$(az account show --query tenantId -o tsv)") + { + cat </dev/null 2>&1; then + if kubectl -n "$NAMESPACE" get secret "$PROVIDED_SECRET" -o jsonpath='{.data.tls\.crt}' >/dev/null 2>&1 && \ + kubectl -n "$NAMESPACE" get secret "$PROVIDED_SECRET" -o jsonpath='{.data.tls\.key}' >/dev/null 2>&1; then + echo "TLS secret ${PROVIDED_SECRET} ready" + return + fi + fi + sleep 5 + done + echo "Timed out waiting for TLS secret ${PROVIDED_SECRET}" >&2 + exit 1 +} + +ensure_documentdb_resource() { + if kubectl -n "$NAMESPACE" get documentdb "$DOCDB_NAME" >/dev/null 2>&1; then + echo "Patching DocumentDB ${DOCDB_NAME} into Provided mode" + kubectl -n "$NAMESPACE" patch documentdb "$DOCDB_NAME" --type merge -p "$(cat </dev/null 2>&1; then + sleep 5 + continue + fi + status_json=$(kubectl -n "$NAMESPACE" get documentdb "$DOCDB_NAME" -o json) + tls_ready=$(echo "$status_json" | jq -r '.status.tls.ready // ""' | tr '[:upper:]' '[:lower:]') + tls_message=$(echo "$status_json" | jq -r '.status.tls.message // ""') + tls_secret=$(echo "$status_json" | jq -r '.status.tls.secretName // ""') + if [[ "$tls_ready" == "true" ]]; then + echo "DocumentDB reports TLS ready using secret ${tls_secret}" + return + fi + echo "TLS status: ${tls_ready:-} ${tls_message}" + sleep 10 + done + echo "Timed out waiting for DocumentDB TLS readiness" >&2 + exit 1 +} + +### Execution flow +verify_keyvault_assets +ensure_cert_manager +ensure_csi_driver +ensure_namespace_and_secret +ensure_operator_ready + +if [[ -z "$USER_ASSIGNED_CLIENT" ]]; then + UAI_CLIENT=$(sanitize_id "$(az aks show -g \"$RESOURCE_GROUP\" -n \"$AKS_NAME\" --query identityProfile.kubeletidentity.clientId -o tsv)") + USER_ASSIGNED_CLIENT="$UAI_CLIENT" +else + USER_ASSIGNED_CLIENT=$(sanitize_id "$USER_ASSIGNED_CLIENT") +fi + +apply_secret_provider_class +ensure_cert_puller +wait_for_tls_secret +ensure_documentdb_resource +wait_for_documentdb_tls + +echo "DocumentDB provided TLS setup complete." diff --git a/documentdb-playground/tls/scripts/gateway-tls-e2e.sh b/documentdb-playground/tls/scripts/gateway-tls-e2e.sh new file mode 100755 index 00000000..2a366956 --- /dev/null +++ b/documentdb-playground/tls/scripts/gateway-tls-e2e.sh @@ -0,0 +1,186 @@ +#!/usr/bin/env bash + +set -euo pipefail + +usage() { + cat <<'EOF' +Usage: gateway-tls-e2e.sh [options] + +Automates the end-to-end walkthrough from docs/gateway-tls-validation.md: + 1. Provision AKS prerequisites + 2. Validate the self-signed TLS flow + 3. Prepare Azure Key Vault assets + 4. Transition to provided TLS and validate connectivity + +Options: + --suffix String used to derive resource names (default: current timestamp) + --location Azure region for the resources (default: eastus2) + --resource-group Azure resource group for AKS/Key Vault (default: guanzhou--rg) + --aks-name AKS cluster name (default: guanzhou-) + --keyvault Azure Key Vault name (default: ddb-issuer-) + --namespace Kubernetes namespace for DocumentDB (default: documentdb-preview-ns) + --docdb-name DocumentDB resource name (default: documentdb-preview) + --github-username GitHub username for operator install (optional) + --github-token GitHub token with read:packages scope (optional) + --skip-cluster Assume AKS cluster already exists and skip creation + --help Show this message +EOF +} + +SUFFIX="$(date +%m%d%H%M)" +LOCATION="eastus2" +RESOURCE_GROUP="" +AKS_NAME="" +KEYVAULT_NAME="" +NAMESPACE="documentdb-preview-ns" +DOCDB_NAME="documentdb-preview" +SKIP_CLUSTER=0 +GITHUB_USERNAME="" +GITHUB_TOKEN="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --suffix) + SUFFIX="$2"; shift 2 ;; + --location) + LOCATION="$2"; shift 2 ;; + --resource-group) + RESOURCE_GROUP="$2"; shift 2 ;; + --aks-name) + AKS_NAME="$2"; shift 2 ;; + --keyvault) + KEYVAULT_NAME="$2"; shift 2 ;; + --namespace) + NAMESPACE="$2"; shift 2 ;; + --docdb-name) + DOCDB_NAME="$2"; shift 2 ;; + --github-username) + GITHUB_USERNAME="$2"; shift 2 ;; + --github-token) + GITHUB_TOKEN="$2"; shift 2 ;; + --skip-cluster) + SKIP_CLUSTER=1; shift ;; + --help|-h) + usage; exit 0 ;; + *) + echo "Unknown argument: $1" >&2 + usage + exit 1 ;; + esac +done + +if [[ -z "$RESOURCE_GROUP" ]]; then + RESOURCE_GROUP="guanzhou-${SUFFIX}-rg" +fi +if [[ -z "$AKS_NAME" ]]; then + AKS_NAME="guanzhou-${SUFFIX}" +fi +if [[ -z "$KEYVAULT_NAME" ]]; then + KEYVAULT_NAME="ddb-issuer-${SUFFIX}" +fi + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)" + +create_cluster_script="$SCRIPT_DIR/create-cluster.sh" +setup_selfsigned_script="$SCRIPT_DIR/setup-selfsigned-gateway-tls.sh" +tls_check_script="$SCRIPT_DIR/tls-connectivity-check.sh" +setup_akv_script="$SCRIPT_DIR/setup-documentdb-akv.sh" +provided_setup_script="$SCRIPT_DIR/documentdb-provided-mode-setup.sh" + +if [[ ! -x "$create_cluster_script" || ! -x "$setup_selfsigned_script" || ! -x "$tls_check_script" || ! -x "$setup_akv_script" || ! -x "$provided_setup_script" ]]; then + echo "Required helper scripts are missing or not executable" >&2 + exit 1 +fi + +run() { + local description="$1"; shift + echo "$(date +'%F %T') :: ${description}" + "$@" +} + +echo "Running end-to-end gateway TLS validation with:" +echo " Resource Group: ${RESOURCE_GROUP}" +echo " AKS Cluster: ${AKS_NAME}" +echo " Location: ${LOCATION}" +echo " Key Vault: ${KEYVAULT_NAME}" +echo " Namespace: ${NAMESPACE}" +echo " DocumentDB: ${DOCDB_NAME}" +echo + +if [[ "$SKIP_CLUSTER" -eq 0 ]]; then + if [[ -n "$GITHUB_USERNAME" ]]; then + export GITHUB_USERNAME + fi + if [[ -n "$GITHUB_TOKEN" ]]; then + export GITHUB_TOKEN + fi + run "Provision AKS cluster" bash "$create_cluster_script" \ + --cluster-name "$AKS_NAME" \ + --resource-group "$RESOURCE_GROUP" \ + --location "$LOCATION" \ + --install-all +else + run "Ensure kubeconfig for existing cluster" bash "$create_cluster_script" \ + --cluster-name "$AKS_NAME" \ + --resource-group "$RESOURCE_GROUP" \ + --location "$LOCATION" \ + --skip-operator \ + --skip-instance >/dev/null +fi + +run "Deploy DocumentDB self-signed mode" bash "$setup_selfsigned_script" \ + --namespace "$NAMESPACE" \ + --name "$DOCDB_NAME" \ + --skip-cert-manager + +run "Validate self-signed connectivity" bash "$tls_check_script" \ + --mode selfsigned \ + --namespace "$NAMESPACE" \ + --docdb-name "$DOCDB_NAME" \ + --skip-cert-manager + +SVC_IP=$(kubectl -n "$NAMESPACE" get svc "documentdb-service-${DOCDB_NAME}" -o jsonpath='{.status.loadBalancer.ingress[0].ip}') +if [[ -z "$SVC_IP" ]]; then + echo "Failed to retrieve LoadBalancer IP for documentdb-service-${DOCDB_NAME}" >&2 + exit 1 +fi +SNI_HOST="${SVC_IP}.sslip.io" +echo "Detected gateway endpoint: ${SVC_IP} (${SNI_HOST})" + +run "Prepare Azure Key Vault" bash "$setup_akv_script" \ + --resource-group "$RESOURCE_GROUP" \ + --location "$LOCATION" \ + --keyvault "$KEYVAULT_NAME" \ + --aks-name "$AKS_NAME" \ + --sni-host "$SNI_HOST" + +KUBELET_MI_CLIENT_ID=$(az aks show -g "$RESOURCE_GROUP" -n "$AKS_NAME" --query identityProfile.kubeletidentity.clientId -o tsv) +if [[ -z "$KUBELET_MI_CLIENT_ID" ]]; then + echo "Unable to obtain kubelet managed identity clientId" >&2 + exit 1 +fi + +yaml_secret_name="documentdb-provided-tls" +run "Switch cluster to provided TLS" bash "$provided_setup_script" \ + --resource-group "$RESOURCE_GROUP" \ + --aks-name "$AKS_NAME" \ + --keyvault "$KEYVAULT_NAME" \ + --cert-name documentdb-gateway \ + --sni-host "$SNI_HOST" \ + --namespace "$NAMESPACE" \ + --docdb-name "$DOCDB_NAME" \ + --provided-secret "$yaml_secret_name" \ + --user-assigned-client "$KUBELET_MI_CLIENT_ID" \ + --skip-cert-manager + +run "Validate provided-mode connectivity" bash "$tls_check_script" \ + --mode provided \ + --namespace "$NAMESPACE" \ + --docdb-name "$DOCDB_NAME" \ + --provided-secret "$yaml_secret_name" \ + --sni-host "$SNI_HOST" \ + --skip-cert-manager + +echo +echo "End-to-end gateway TLS validation completed successfully." \ No newline at end of file diff --git a/documentdb-playground/tls/scripts/setup-documentdb-akv.sh b/documentdb-playground/tls/scripts/setup-documentdb-akv.sh new file mode 100755 index 00000000..dd3e1f39 --- /dev/null +++ b/documentdb-playground/tls/scripts/setup-documentdb-akv.sh @@ -0,0 +1,202 @@ +#!/usr/bin/env bash + +set -euo pipefail + +usage() { + cat <<'EOF' +Usage: setup-documentdb-akv.sh [options] + +Idempotently prepares Azure Key Vault and RBAC prerequisites for DocumentDB +Provided TLS mode. The script can create the resource group and Key Vault, +assign the required roles to the current user and AKS kubelet identity, and +issue a self-signed certificate with the desired SNI host. + +Options: + -g, --resource-group Azure resource group to host the Key Vault (required) + -l, --location Azure location (required when creating the resource group or Key Vault) + --subscription Azure subscription ID (optional; defaults to current) + --keyvault Azure Key Vault name (required) + --aks-name AKS cluster name for kubelet identity (required) + --cert-name Certificate name in Key Vault (default: documentdb-gateway) + --sni-host Hostname for certificate CN/SAN (required) + --human-object-id Object ID to grant Key Vault Certificates Officer (default: signed-in user) + --human-principal-type Principal type for the human assignment (default: User) + --kubelet-object-id Object ID to grant Key Vault Secrets User (default: derived from AKS) + --kubelet-principal-type Principal type for the kubelet assignment (default: ServicePrincipal) + --validity-months Certificate validity in months (default: 12) + --skip-certificate Skip certificate creation (if managed externally) + -h, --help Show this help message +EOF +} + +RESOURCE_GROUP="" +LOCATION="" +SUBSCRIPTION_ID="" +KEYVAULT_NAME="" +AKS_NAME="" +CERT_NAME="documentdb-gateway" +SNI_HOST="" +HUMAN_OBJECT_ID="" +HUMAN_PRINCIPAL_TYPE="User" +KUBELET_OBJECT_ID="" +KUBELET_PRINCIPAL_TYPE="ServicePrincipal" +VALIDITY_MONTHS=12 +CREATE_CERT=1 + +while [[ $# -gt 0 ]]; do + case "$1" in + -g|--resource-group) + RESOURCE_GROUP="$2"; shift 2 ;; + -l|--location) + LOCATION="$2"; shift 2 ;; + --subscription) + SUBSCRIPTION_ID="$2"; shift 2 ;; + --keyvault) + KEYVAULT_NAME="$2"; shift 2 ;; + --aks-name) + AKS_NAME="$2"; shift 2 ;; + --cert-name) + CERT_NAME="$2"; shift 2 ;; + --sni-host) + SNI_HOST="$2"; shift 2 ;; + --human-object-id) + HUMAN_OBJECT_ID="$2"; shift 2 ;; + --human-principal-type) + HUMAN_PRINCIPAL_TYPE="$2"; shift 2 ;; + --kubelet-object-id) + KUBELET_OBJECT_ID="$2"; shift 2 ;; + --kubelet-principal-type) + KUBELET_PRINCIPAL_TYPE="$2"; shift 2 ;; + --validity-months) + VALIDITY_MONTHS="$2"; shift 2 ;; + --skip-certificate) + CREATE_CERT=0; shift ;; + -h|--help) + usage; exit 0 ;; + *) + echo "Unknown argument: $1" >&2 + usage; exit 1 ;; + esac +done + +if [[ -z "$RESOURCE_GROUP" || -z "$KEYVAULT_NAME" || -z "$AKS_NAME" || -z "$SNI_HOST" ]]; then + echo "Missing required arguments." >&2 + usage + exit 1 +fi + +sanitize_id() { + printf '%s' "$1" | tr -d '\r\n' +} + +for cmd in az jq; do + if ! command -v "$cmd" >/dev/null 2>&1; then + echo "Command '$cmd' not found in PATH" >&2 + exit 1 + fi +done + +if ! az account show >/dev/null 2>&1; then + echo "Azure CLI not logged in. Run 'az login' first." >&2 + exit 1 +fi + +if [[ -n "$SUBSCRIPTION_ID" ]]; then + az account set --subscription "$SUBSCRIPTION_ID" >/dev/null +fi +SUBSCRIPTION_ID=$(sanitize_id "$(az account show --query id -o tsv)") + +ensure_resource_group() { + if az group show -n "$RESOURCE_GROUP" >/dev/null 2>&1; then + echo "Resource group $RESOURCE_GROUP already exists" + else + if [[ -z "$LOCATION" ]]; then + echo "Resource group $RESOURCE_GROUP not found and --location not provided" >&2 + exit 1 + fi + echo "Creating resource group $RESOURCE_GROUP in $LOCATION" + az group create -n "$RESOURCE_GROUP" -l "$LOCATION" >/dev/null + fi +} + +ensure_key_vault() { + if az keyvault show -n "$KEYVAULT_NAME" -g "$RESOURCE_GROUP" >/dev/null 2>&1; then + echo "Key Vault $KEYVAULT_NAME already exists" + else + if [[ -z "$LOCATION" ]]; then + echo "Key Vault $KEYVAULT_NAME not found and --location not provided" >&2 + exit 1 + fi + echo "Creating Key Vault $KEYVAULT_NAME in $LOCATION" + az keyvault create -g "$RESOURCE_GROUP" -n "$KEYVAULT_NAME" -l "$LOCATION" --enable-rbac-authorization true >/dev/null + fi +} + +resolve_object_ids() { + if [[ -z "$HUMAN_OBJECT_ID" ]]; then + HUMAN_OBJECT_ID=$(az ad signed-in-user show --query id -o tsv) + fi + HUMAN_OBJECT_ID=$(sanitize_id "$HUMAN_OBJECT_ID") + echo "Using signed-in user objectId $HUMAN_OBJECT_ID for certificates officer role" + if [[ -z "$KUBELET_OBJECT_ID" ]]; then + KUBELET_OBJECT_ID=$(az aks show -g "$RESOURCE_GROUP" -n "$AKS_NAME" --query identityProfile.kubeletidentity.objectId -o tsv) + fi + KUBELET_OBJECT_ID=$(sanitize_id "$KUBELET_OBJECT_ID") + echo "Derived kubelet objectId $KUBELET_OBJECT_ID from AKS cluster" +} + +ensure_role_assignment() { + local ASSIGNEE="$1" + local ROLE_NAME="$2" + local PRINCIPAL_TYPE="$3" + ASSIGNEE=$(sanitize_id "$ASSIGNEE") + local SCOPE="/subscriptions/${SUBSCRIPTION_ID}/resourceGroups/${RESOURCE_GROUP}/providers/Microsoft.KeyVault/vaults/${KEYVAULT_NAME}" + if az role assignment list --assignee-object-id "$ASSIGNEE" --role "$ROLE_NAME" --scope "$SCOPE" --query '[0]' -o tsv 2>/dev/null | grep -q '.'; then + echo "Role $ROLE_NAME already assigned to $ASSIGNEE" + else + echo "Assigning $ROLE_NAME to $ASSIGNEE" + az role assignment create --assignee-object-id "$ASSIGNEE" --assignee-principal-type "$PRINCIPAL_TYPE" --role "$ROLE_NAME" --scope "$SCOPE" >/dev/null + fi +} + +ensure_certificate() { + if [[ "$CREATE_CERT" -eq 0 ]]; then + echo "Skipping certificate creation as requested" + return + fi + if az keyvault certificate show --vault-name "$KEYVAULT_NAME" -n "$CERT_NAME" >/dev/null 2>&1; then + echo "Certificate $CERT_NAME already exists in Key Vault" + return + fi + echo "Creating self-signed certificate $CERT_NAME with subject $SNI_HOST" + POLICY_FILE=$(mktemp) + cat < "$POLICY_FILE" +{ + "issuerParameters": { "name": "Self" }, + "x509CertificateProperties": { + "subject": "CN=${SNI_HOST}", + "subjectAlternativeNames": { "dnsNames": [ "${SNI_HOST}" ] }, + "keyUsage": [ "digitalSignature", "keyEncipherment" ], + "validityInMonths": ${VALIDITY_MONTHS} + }, + "keyProperties": { + "exportable": true, + "keyType": "RSA", + "keySize": 2048, + "reuseKey": false + }, + "secretProperties": { "contentType": "application/x-pem-file" } +} +EOF + az keyvault certificate create --vault-name "$KEYVAULT_NAME" -n "$CERT_NAME" --policy @"$POLICY_FILE" >/dev/null + rm -f "$POLICY_FILE" +} + +ensure_resource_group +ensure_key_vault +resolve_object_ids +ensure_role_assignment "$HUMAN_OBJECT_ID" "Key Vault Certificates Officer" "$HUMAN_PRINCIPAL_TYPE" +ensure_role_assignment "$KUBELET_OBJECT_ID" "Key Vault Secrets User" "$KUBELET_PRINCIPAL_TYPE" +ensure_certificate + +echo "Azure Key Vault setup complete." diff --git a/documentdb-playground/tls/scripts/setup-selfsigned-gateway-tls.sh b/documentdb-playground/tls/scripts/setup-selfsigned-gateway-tls.sh new file mode 100755 index 00000000..ae1d167d --- /dev/null +++ b/documentdb-playground/tls/scripts/setup-selfsigned-gateway-tls.sh @@ -0,0 +1,232 @@ +#!/usr/bin/env bash + +set -euo pipefail + +usage() { + cat <<'EOF' +Usage: setup-selfsigned-gateway-tls.sh [options] + +Installs cert-manager (unless skipped) and configures a DocumentDB cluster to use +SelfSigned gateway TLS as documented in docs/gateway-tls-validation.md. + +Options: + -n, --namespace Kubernetes namespace for the DocumentDB resource (default: documentdb-preview-ns) + --name DocumentDB resource name (default: documentdb-preview) + --docdb-version DocumentDB engine version (default: 16) + --docdb-image DocumentDB image reference (default: ghcr.io/microsoft/documentdb/documentdb-local:) + --gateway-image Gateway image reference (default: same as --docdb-image) + --pvc-size Persistent volume claim size (default: 10Gi) + --storage-class StorageClass to use for PVCs (optional) + --secret-name Credentials secret name (default: documentdb-credentials) + --username DocumentDB username (default: docdbuser) + --password DocumentDB password (default: P@ssw0rd123) + --skip-cert-manager Skip cert-manager install/upgrade + --cert-manager-version Helm chart version for cert-manager (optional) + --timeout Wait timeout for TLS readiness (default: 900) + --skip-wait Do not wait for TLS readiness + -h, --help Show this help text +EOF +} + +NAMESPACE="documentdb-preview-ns" +DOCDB_NAME="documentdb-preview" +DOCDB_VERSION="16" +DOCDB_IMAGE="" +GATEWAY_IMAGE="" +PVC_SIZE="10Gi" +STORAGE_CLASS="" +SECRET_NAME="documentdb-credentials" +SECRET_USER="docdbuser" +SECRET_PASS="P@ssw0rd123" +INSTALL_CERT_MANAGER=1 +CERT_MANAGER_VERSION="" +CERT_MANAGER_RELEASE="cert-manager" +CERT_MANAGER_NAMESPACE="cert-manager" +TIMEOUT=900 +WAIT_FOR_READY=1 + +while [[ $# -gt 0 ]]; do + case "$1" in + -n|--namespace) + NAMESPACE="$2" + shift 2 + ;; + --name) + DOCDB_NAME="$2" + shift 2 + ;; + --docdb-version) + DOCDB_VERSION="$2" + shift 2 + ;; + --docdb-image) + DOCDB_IMAGE="$2" + shift 2 + ;; + --gateway-image) + GATEWAY_IMAGE="$2" + shift 2 + ;; + --pvc-size) + PVC_SIZE="$2" + shift 2 + ;; + --storage-class) + STORAGE_CLASS="$2" + shift 2 + ;; + --secret-name) + SECRET_NAME="$2" + shift 2 + ;; + --username) + SECRET_USER="$2" + shift 2 + ;; + --password) + SECRET_PASS="$2" + shift 2 + ;; + --skip-cert-manager) + INSTALL_CERT_MANAGER=0 + shift + ;; + --cert-manager-version) + CERT_MANAGER_VERSION="$2" + shift 2 + ;; + --timeout) + TIMEOUT="$2" + shift 2 + ;; + --skip-wait) + WAIT_FOR_READY=0 + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + usage + exit 1 + ;; + esac +done + +if [[ -z "$DOCDB_IMAGE" ]]; then + DOCDB_IMAGE="ghcr.io/microsoft/documentdb/documentdb-local:${DOCDB_VERSION}" +fi +if [[ -z "$GATEWAY_IMAGE" ]]; then + GATEWAY_IMAGE="$DOCDB_IMAGE" +fi + +for bin in kubectl helm; do + if ! command -v "$bin" >/dev/null 2>&1; then + echo "Required command '$bin' not found on PATH" >&2 + exit 1 + fi +done + +if [[ "$INSTALL_CERT_MANAGER" -eq 1 ]]; then + if ! helm repo list 2>/dev/null | awk 'NR>1 {print $1}' | grep -qx "jetstack"; then + helm repo add jetstack https://charts.jetstack.io + fi + helm repo update >/dev/null + cm_args=(upgrade --install "$CERT_MANAGER_RELEASE" jetstack/cert-manager --namespace "$CERT_MANAGER_NAMESPACE" --create-namespace --set installCRDs=true) + if [[ -n "$CERT_MANAGER_VERSION" ]]; then + cm_args+=(--version "$CERT_MANAGER_VERSION") + fi + helm "${cm_args[@]}" + for deploy in cert-manager cert-manager-cainjector cert-manager-webhook; do + # Wait for cert-manager control plane pods to be ready before requesting certificates + kubectl -n "$CERT_MANAGER_NAMESPACE" rollout status deployment "$deploy" --timeout=180s + done +fi + +kubectl create namespace "$NAMESPACE" --dry-run=client -o yaml | kubectl apply -f - + +kubectl -n "$NAMESPACE" create secret generic "$SECRET_NAME" \ + --from-literal=username="$SECRET_USER" \ + --from-literal=password="$SECRET_PASS" \ + --dry-run=client -o yaml | kubectl apply -f - + +{ + cat </dev/null 2>&1; then + echo "DocumentDB resource not yet available; retrying..." + sleep 5 + continue + fi + + tls_ready=$(kubectl -n "$NAMESPACE" get documentdb "$DOCDB_NAME" -o jsonpath='{.status.tls.ready}' 2>/dev/null || echo "") + tls_message=$(kubectl -n "$NAMESPACE" get documentdb "$DOCDB_NAME" -o jsonpath='{.status.tls.message}' 2>/dev/null || echo "") + tls_secret=$(kubectl -n "$NAMESPACE" get documentdb "$DOCDB_NAME" -o jsonpath='{.status.tls.secretName}' 2>/dev/null || echo "") + + tls_ready=${tls_ready,,} + if [[ "$tls_ready" == "" ]]; then + tls_ready="" + fi + + if [[ "$tls_message" == "" ]]; then + tls_message="" + fi + if [[ "$tls_secret" == "" ]]; then + tls_secret="" + fi + + if [[ "$tls_ready" == "true" ]]; then + echo "Gateway TLS ready. Secret: ${tls_secret}" + break + fi + + echo "TLS status: ${tls_ready:-} ${tls_message}" + sleep 10 + done + + if (( SECONDS >= deadline )); then + echo "Timed out waiting for TLS readiness" >&2 + exit 1 + fi + + if svc_ip=$(kubectl -n "$NAMESPACE" get svc documentdb-service-"$DOCDB_NAME" -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null); then + if [[ -n "$svc_ip" ]]; then + echo "LoadBalancer IP: ${svc_ip}" + echo "Suggested SNI hostname: ${svc_ip}.sslip.io" + fi + fi +fi + +echo "SelfSigned gateway TLS setup complete." diff --git a/documentdb-playground/tls/scripts/tls-connectivity-check.sh b/documentdb-playground/tls/scripts/tls-connectivity-check.sh new file mode 100755 index 00000000..e4b312b7 --- /dev/null +++ b/documentdb-playground/tls/scripts/tls-connectivity-check.sh @@ -0,0 +1,403 @@ +#!/usr/bin/env bash + +set -euo pipefail + +usage() { + cat <<'EOF' +Usage: documentdb-gateway-check.sh [options] + +Automates validation of DocumentDB gateway TLS setup. Currently supports the +SelfSigned flow end-to-end and can reuse the same structure for Provided mode. + +Steps executed: + 1. Optionally install cert-manager + 2. Create namespace + credentials secret if missing + 3. Apply a DocumentDB manifest for gateway TLS (SelfSigned today) + 4. Wait for TLS readiness and capture service endpoint + 5. Run mongosh ping against the gateway + +Options: + -n, --namespace DocumentDB namespace (default: documentdb-preview-ns) + --docdb-name DocumentDB resource name (default: documentdb-preview) + --docdb-version DocumentDB version (default: 16) + --secret-name Credentials secret name (default: documentdb-credentials) + --username DocumentDB username (default: docdbuser) + --password DocumentDB password (default: P@ssw0rd123) + --pvc-size Volume size (default: 10Gi) + --storage-class StorageClass to use (optional) + --mode TLS mode: selfsigned|provided (default: selfsigned) + --provided-secret Secret with tls.crt/tls.key (required for provided if --keyvault not set) + --keyvault Azure Key Vault name to download the gateway certificate (optional) + --keyvault-cert Azure Key Vault certificate name (default: documentdb-gateway) + --sni-host Hostname used for TLS verification (recommended for provided mode) + --skip-cert-manager Skip cert-manager install/upgrade + --timeout Timeout for TLS readiness (default: 900) + --skip-wait Skip waiting for TLS readiness/mongosh + -h, --help Show this message +EOF +} + +NAMESPACE="documentdb-preview-ns" +DOCDB_NAME="documentdb-preview" +DOCDB_VERSION="16" +SECRET_NAME="documentdb-credentials" +SECRET_USER="docdbuser" +SECRET_PASS="P@ssw0rd123" +PVC_SIZE="10Gi" +STORAGE_CLASS="" +MODE="selfsigned" +PROVIDED_SECRET="" +KEYVAULT_NAME="" +KEYVAULT_CERT_NAME="documentdb-gateway" +SNI_HOST="" +INSTALL_CERT_MANAGER=1 +CERT_MANAGER_RELEASE="cert-manager" +CERT_MANAGER_NAMESPACE="cert-manager" +TIMEOUT=900 +WAIT_FOR_READY=1 + +while [[ $# -gt 0 ]]; do + case "$1" in + -n|--namespace) + NAMESPACE="$2"; shift 2 ;; + --docdb-name) + DOCDB_NAME="$2"; shift 2 ;; + --docdb-version) + DOCDB_VERSION="$2"; shift 2 ;; + --secret-name) + SECRET_NAME="$2"; shift 2 ;; + --username) + SECRET_USER="$2"; shift 2 ;; + --password) + SECRET_PASS="$2"; shift 2 ;; + --pvc-size) + PVC_SIZE="$2"; shift 2 ;; + --storage-class) + STORAGE_CLASS="$2"; shift 2 ;; + --mode) + MODE="$2"; shift 2 ;; + --provided-secret) + PROVIDED_SECRET="$2"; shift 2 ;; + --keyvault) + KEYVAULT_NAME="$2"; shift 2 ;; + --keyvault-cert) + KEYVAULT_CERT_NAME="$2"; shift 2 ;; + --sni-host) + SNI_HOST="$2"; shift 2 ;; + --skip-cert-manager) + INSTALL_CERT_MANAGER=0; shift ;; + --timeout) + TIMEOUT="$2"; shift 2 ;; + --skip-wait) + WAIT_FOR_READY=0; shift ;; + -h|--help) + usage; exit 0 ;; + *) + echo "Unknown argument: $1" >&2 + usage; exit 1 ;; + esac +done + +MODE=${MODE,,} +if [[ "$MODE" != "selfsigned" && "$MODE" != "provided" ]]; then + echo "Invalid --mode '$MODE'" >&2 + usage; exit 1 +fi + +for bin in kubectl helm mongosh jq openssl; do + if ! command -v "$bin" >/dev/null 2>&1; then + echo "Required command '$bin' not found" >&2 + exit 1 + fi +done + +ensure_operator_ready() { + local operator_namespace="documentdb-operator" + local operator_deployment="documentdb-operator" + + if ! kubectl get deployment -n "$operator_namespace" "$operator_deployment" >/dev/null 2>&1; then + echo "DocumentDB operator deployment not found in namespace '$operator_namespace'." >&2 + echo "Follow docs/gateway-tls-validation.md step 1.11 to install the operator before running this script." >&2 + exit 1 + fi + + if ! kubectl -n "$operator_namespace" rollout status deployment "$operator_deployment" --timeout=300s >/dev/null 2>&1; then + echo "DocumentDB operator deployment is not ready. Wait for the operator pods to become ready and retry." >&2 + exit 1 + fi +} + +if [[ -n "$KEYVAULT_NAME" ]]; then + if ! command -v az >/dev/null 2>&1; then + echo "Required command 'az' not found for Key Vault access" >&2 + exit 1 + fi + if ! az account show >/dev/null 2>&1; then + echo "Azure CLI not logged in. Run 'az login' first." >&2 + exit 1 + fi +fi + +if [[ "$INSTALL_CERT_MANAGER" -eq 1 ]]; then + if ! helm repo list 2>/dev/null | awk 'NR>1 {print $1}' | grep -qx "jetstack"; then + helm repo add jetstack https://charts.jetstack.io + fi + helm repo update >/dev/null + helm upgrade --install "$CERT_MANAGER_RELEASE" jetstack/cert-manager \ + --namespace "$CERT_MANAGER_NAMESPACE" \ + --create-namespace \ + --set installCRDs=true + for deploy in cert-manager cert-manager-cainjector cert-manager-webhook; do + kubectl -n "$CERT_MANAGER_NAMESPACE" rollout status deployment "$deploy" --timeout=180s + done +fi + +kubectl create namespace "$NAMESPACE" --dry-run=client -o yaml | kubectl apply -f - >/dev/null +kubectl -n "$NAMESPACE" create secret generic "$SECRET_NAME" \ + --from-literal=username="$SECRET_USER" \ + --from-literal=password="$SECRET_PASS" \ + --dry-run=client -o yaml | kubectl apply -f - >/dev/null + +ensure_operator_ready + +apply_documentdb_manifest() { + if [[ "$MODE" == "selfsigned" ]]; then + { + cat <&2 + exit 1 + fi + cat </dev/null 2>&1; then + echo "DocumentDB resource not ready, retrying..." + sleep 5 + continue + fi + status_json=$(kubectl -n "$NAMESPACE" get documentdb "$DOCDB_NAME" -o json) + tls_ready=$(echo "$status_json" | jq -r '.status.tls.ready // ""' | tr '[:upper:]' '[:lower:]') + tls_message=$(echo "$status_json" | jq -r '.status.tls.message // ""') + tls_secret=$(echo "$status_json" | jq -r '.status.tls.secretName // ""') + + if [[ "$tls_ready" == "true" ]]; then + echo "Gateway TLS ready. Secret: ${tls_secret}" + break + fi + echo "TLS status: ${tls_ready:-} ${tls_message}" + sleep 10 + done + + if (( SECONDS >= deadline )); then + echo "Timed out waiting for TLS readiness" >&2 + exit 1 + fi + + svc_name="documentdb-service-${DOCDB_NAME}" + echo "Waiting for service ${svc_name} (timeout ${TIMEOUT}s)..." + while (( SECONDS < deadline )); do + if kubectl -n "$NAMESPACE" get svc "$svc_name" >/dev/null 2>&1; then + break + fi + echo "Service ${svc_name} not created yet, retrying..." + sleep 5 + done + + if (( SECONDS >= deadline )); then + echo "Timed out waiting for service ${svc_name}" >&2 + exit 1 + fi + + svc_ip="" + while (( SECONDS < deadline )); do + svc_ip=$(kubectl -n "$NAMESPACE" get svc "$svc_name" -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null || true) + if [[ -n "$svc_ip" ]]; then + break + fi + echo "Service ${svc_name} pending LoadBalancer IP, retrying..." + sleep 5 + done + + if [[ -z "$svc_ip" ]]; then + echo "Service LoadBalancer IP not assigned yet" >&2 + exit 1 + fi + echo "LoadBalancer IP: ${svc_ip}" + default_sni_host="${svc_ip}.sslip.io" + echo "Suggested SNI hostname: ${default_sni_host}" + if [[ -z "$SNI_HOST" ]]; then + SNI_HOST="$default_sni_host" + fi + host_for_uri="$svc_ip" + extra_query='&tlsAllowInvalidCertificates=true&tlsAllowInvalidHostnames=true' + tmp_cert="" + if [[ "$MODE" == "provided" ]]; then + extra_query='&tlsAllowInvalidHostnames=true' + if [[ -n "$PROVIDED_SECRET" ]]; then + echo "Provided TLS secret in use: $PROVIDED_SECRET" + fi + if [[ -n "$SNI_HOST" ]]; then + if getent hosts "$SNI_HOST" >/dev/null 2>&1; then + host_for_uri="$SNI_HOST" + else + echo "Warning: --sni-host $SNI_HOST did not resolve; using LoadBalancer IP for connection" >&2 + fi + else + echo "Warning: --sni-host not supplied; TLS hostname verification may require relaxation" >&2 + fi + tmp_cert=$(mktemp) + if [[ -n "$KEYVAULT_NAME" ]]; then + cert_payload=$(az keyvault certificate show --vault-name "$KEYVAULT_NAME" -n "$KEYVAULT_CERT_NAME" --query cer -o tsv 2>/dev/null || true) + if [[ -z "$cert_payload" ]]; then + echo "Failed to fetch certificate $KEYVAULT_CERT_NAME from Key Vault $KEYVAULT_NAME" >&2 + rm -f "$tmp_cert" + exit 1 + fi + if ! printf '%s' "$cert_payload" | tr -d '\r\n ' | base64 -d 2>/dev/null | openssl x509 -inform der -out "$tmp_cert" >/dev/null 2>&1; then + echo "Failed to convert Key Vault certificate $KEYVAULT_CERT_NAME to PEM" >&2 + rm -f "$tmp_cert" + exit 1 + fi + echo "Using certificate from Key Vault $KEYVAULT_NAME/$KEYVAULT_CERT_NAME" + else + if [[ -z "$PROVIDED_SECRET" ]]; then + echo "--provided-secret is required when --keyvault is not specified" >&2 + rm -f "$tmp_cert" + exit 1 + fi + if ! kubectl -n "$NAMESPACE" get secret "$PROVIDED_SECRET" -o jsonpath='{.data.tls\.crt}' | base64 -d >"$tmp_cert" 2>/dev/null; then + echo "Failed to extract tls.crt from secret $PROVIDED_SECRET" >&2 + rm -f "$tmp_cert" + exit 1 + fi + echo "Using certificate from Kubernetes secret $PROVIDED_SECRET" + fi + fi + + if command -v mongosh >/dev/null 2>&1; then + mongo_user=$(kubectl -n "$NAMESPACE" get secret "$SECRET_NAME" -o jsonpath='{.data.username}' | base64 -d) + mongo_pass=$(kubectl -n "$NAMESPACE" get secret "$SECRET_NAME" -o jsonpath='{.data.password}' | base64 -d) + conn_uri="mongodb://${mongo_user}:${mongo_pass}@${host_for_uri}:10260/?directConnection=true&authMechanism=SCRAM-SHA-256&tls=true&replicaSet=rs0${extra_query}" + echo "Running mongosh ping..." + mongosh_args=($conn_uri "--eval" "db.runCommand({ ping: 1 })") + if [[ "$MODE" == "provided" ]]; then + mongosh_args=($conn_uri "--tlsCAFile" "$tmp_cert" "--eval" "db.runCommand({ ping: 1 })") + if [[ -n "$SNI_HOST" && $(mongosh --help 2>&1 | grep -c -- '--tlsHostname') -gt 0 ]]; then + mongosh_args+=("--tlsHostname" "$SNI_HOST") + fi + mongosh_args+=("--tlsAllowInvalidHostnames") + fi + mongosh_log=$(mktemp) + if mongosh "${mongosh_args[@]}" >"$mongosh_log" 2>&1; then + cat "$mongosh_log" + echo "mongosh connectivity OK" + else + if [[ "$MODE" == "provided" ]] && grep -qi 'self-signed certificate' "$mongosh_log"; then + echo "mongosh encountered a self-signed certificate; retrying with --tlsAllowInvalidCertificates" >&2 + if [[ " ${mongosh_args[*]} " != *" --tlsAllowInvalidCertificates "* ]]; then + mongosh_args+=("--tlsAllowInvalidCertificates") + fi + if mongosh "${mongosh_args[@]}" >"$mongosh_log" 2>&1; then + cat "$mongosh_log" + echo "mongosh connectivity OK (certificate relaxed)" + else + cat "$mongosh_log" >&2 + echo "mongosh connectivity failed" >&2 + rm -f "$mongosh_log" + if [[ -n "$tmp_cert" ]]; then rm -f "$tmp_cert"; fi + exit 1 + fi + elif [[ "$MODE" == "provided" ]] && grep -qi 'hostname' "$mongosh_log"; then + echo "mongosh encountered a hostname mismatch; retrying with --tlsAllowInvalidHostnames" >&2 + if [[ " ${mongosh_args[*]} " != *" --tlsAllowInvalidHostnames "* ]]; then + mongosh_args+=("--tlsAllowInvalidHostnames") + fi + if mongosh "${mongosh_args[@]}" >"$mongosh_log" 2>&1; then + cat "$mongosh_log" + echo "mongosh connectivity OK (hostname relaxed)" + else + cat "$mongosh_log" >&2 + echo "mongosh connectivity failed" >&2 + rm -f "$mongosh_log" + if [[ -n "$tmp_cert" ]]; then rm -f "$tmp_cert"; fi + exit 1 + fi + else + cat "$mongosh_log" >&2 + echo "mongosh connectivity failed" >&2 + rm -f "$mongosh_log" + if [[ -n "$tmp_cert" ]]; then rm -f "$tmp_cert"; fi + exit 1 + fi + fi + rm -f "$mongosh_log" + if [[ -n "$tmp_cert" ]]; then rm -f "$tmp_cert"; fi + else + echo "mongosh not found; skipping connectivity test" >&2 + if [[ -n "$tmp_cert" ]]; then rm -f "$tmp_cert"; fi + fi +fi + +echo "DocumentDB gateway validation complete." diff --git a/mkdocs.yml b/mkdocs.yml index 6294d8cf..847b93f3 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,11 +1,13 @@ site_name: DocumentDB-Kubernetes-Operator repo_url: https://github.com/microsoft/documentdb-kubernetes-operator +docs_dir: docs theme: name: readthedocs nav: - Preview: - Get Started: operator-public-documentation/preview/index.md + - Advanced Configuration: operator-public-documentation/v1/advanced-configuration/README.md - Tools: - Kubectl Plugin: ../documentdb-kubectl-plugin/kubectl-plugin.md diff --git a/operator/cnpg-plugins/sidecar-injector/internal/lifecycle/lifecycle.go b/operator/cnpg-plugins/sidecar-injector/internal/lifecycle/lifecycle.go index 52ce95be..4aae66e8 100644 --- a/operator/cnpg-plugins/sidecar-injector/internal/lifecycle/lifecycle.go +++ b/operator/cnpg-plugins/sidecar-injector/internal/lifecycle/lifecycle.go @@ -182,12 +182,55 @@ func (impl Implementation) reconcileMetadata( }, } + // If TLS secret parameter provided, mount it at /tls + // Track whether TLS secret is configured to augment container args later + hasTLSSecret := false + if tlsSecret, ok := helper.Parameters["gatewayTLSSecret"]; ok && tlsSecret != "" { + // Append volume only if not already present + found := false + for _, v := range mutatedPod.Spec.Volumes { + if v.Name == "gateway-tls" { + found = true + break + } + } + if !found { + mutatedPod.Spec.Volumes = append(mutatedPod.Spec.Volumes, corev1.Volume{ + Name: "gateway-tls", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{SecretName: tlsSecret}, + }, + }) + } + // Add mount to sidecar container + sidecar.VolumeMounts = append(sidecar.VolumeMounts, corev1.VolumeMount{Name: "gateway-tls", MountPath: "/tls", ReadOnly: true}) + // Provide env vars for gateway to load the mounted certificate and key + // Most gateway images respect CERT_PATH and KEY_FILE; keep TLS_CERT_DIR for backward-compat + sidecar.Env = append(sidecar.Env, + corev1.EnvVar{Name: "TLS_CERT_DIR", Value: "/tls"}, + corev1.EnvVar{Name: "CERT_PATH", Value: "/tls/tls.crt"}, + corev1.EnvVar{Name: "KEY_FILE", Value: "/tls/tls.key"}, + ) + // Mark that TLS secret is present so we can also pass explicit CLI args + hasTLSSecret = true + log.Printf("Injected TLS secret volume for gateway: %s", tlsSecret) + } + + // Build base args and append TLS file args if a TLS secret is configured + args := []string{"--start-pg", "false", "--pg-port", "5432"} + // Check if the pod has the label replication_cluster_type=replica + // Check if the pod has the label replication_cluster_type=replica or is not a local primary if mutatedPod.Labels["replication_cluster_type"] == "replica" || cluster.Status.TargetPrimary != mutatedPod.Name { sidecar.Args = []string{"--create-user", "false", "--start-pg", "false", "--pg-port", "5432"} } else { - sidecar.Args = []string{"--create-user", "true", "--start-pg", "false", "--pg-port", "5432"} + args = append([]string{"--create-user", "true"}, args...) + } + if hasTLSSecret { + // Pass cert and key via CLI args to align with emulator_entrypoint.sh interface + args = append(args, "--cert-path", "/tls/tls.crt", "--key-file", "/tls/tls.key") } + sidecar.Args = args // Inject the sidecar container err = object.InjectPluginSidecar(mutatedPod, sidecar, false) diff --git a/operator/documentdb-helm-chart/crds/db.microsoft.com_documentdbs.yaml b/operator/documentdb-helm-chart/crds/db.microsoft.com_documentdbs.yaml index db773b8c..b711909f 100644 --- a/operator/documentdb-helm-chart/crds/db.microsoft.com_documentdbs.yaml +++ b/operator/documentdb-helm-chart/crds/db.microsoft.com_documentdbs.yaml @@ -199,6 +199,72 @@ spec: minimum: 0 type: integer type: object + tls: + description: TLS configures certificate management for DocumentDB + components. + properties: + gateway: + description: 'Gateway configures TLS for the gateway sidecar (Phase + 1: certificate provisioning only).' + properties: + certManager: + description: CertManager config when Mode=CertManager. + properties: + dnsNames: + description: DNSNames for the certificate SANs. If empty, + operator will add Service DNS names. + items: + type: string + type: array + issuerRef: + description: IssuerRef references a cert-manager Issuer + or ClusterIssuer. + properties: + group: + description: Group defaults to cert-manager.io + type: string + kind: + description: Kind of issuer (Issuer or ClusterIssuer). + Defaults to Issuer. + type: string + name: + type: string + required: + - name + type: object + secretName: + description: SecretName optional explicit name for the + target secret. If empty a default is chosen. + type: string + required: + - issuerRef + type: object + mode: + description: Mode selects the TLS management strategy. + enum: + - Disabled + - SelfSigned + - CertManager + - Provided + type: string + provided: + description: Provided secret reference when Mode=Provided. + properties: + secretName: + type: string + required: + - secretName + type: object + type: object + globalEndpoints: + description: GlobalEndpoints configures TLS for global endpoints + (placeholder for future phases). + type: object + postgres: + description: Postgres configures TLS for the Postgres server (placeholder + for future phases). + type: object + type: object walReplicaPluginName: description: WalReplicaPluginName is the name of the wal replica plugin to use. @@ -213,14 +279,24 @@ spec: properties: connectionString: type: string - localPrimary: - type: string status: description: Status reflects the status field from the underlying CNPG Cluster. type: string targetPrimary: type: string + localPrimary: + type: string + tls: + description: TLS reports gateway TLS provisioning status (Phase 1). + properties: + message: + type: string + ready: + type: boolean + secretName: + type: string + type: object type: object type: object served: true diff --git a/operator/documentdb-helm-chart/templates/05_clusterrole.yaml b/operator/documentdb-helm-chart/templates/05_clusterrole.yaml index e3ed8422..d7be3641 100644 --- a/operator/documentdb-helm-chart/templates/05_clusterrole.yaml +++ b/operator/documentdb-helm-chart/templates/05_clusterrole.yaml @@ -36,6 +36,10 @@ rules: - apiGroups: ["postgresql.cnpg.io"] resources: ["clusters", "publications", "subscriptions", "clusters/status"] verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] +- apiGroups: ["cert-manager.io"] + resources: ["certificates", "certificates/status", "certificates/finalizers", "issuers", "clusterissuers"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # Backup permissions - apiGroups: ["db.microsoft.com"] resources: ["backups", "backups/status", "backups/finalizers"] diff --git a/operator/documentdb-helm-chart/values.yaml b/operator/documentdb-helm-chart/values.yaml index a5610d86..65c3628b 100644 --- a/operator/documentdb-helm-chart/values.yaml +++ b/operator/documentdb-helm-chart/values.yaml @@ -17,13 +17,13 @@ walReplica: false # Set to true to deploy the WAL replica plugin image: documentdbk8soperator: - repository: ghcr.io/microsoft/documentdb-kubernetes-operator/operator + repository: ghcr.io/documentdb/documentdb-kubernetes-operator/operator pullPolicy: Always sidecarinjector: - repository: ghcr.io/microsoft/documentdb-kubernetes-operator/sidecar + repository: ghcr.io/documentdb/documentdb-kubernetes-operator/sidecar pullPolicy: Always walreplica: - repository: ghcr.io/microsoft/documentdb-kubernetes-operator/wal-replica + repository: ghcr.io/documentdb/documentdb-kubernetes-operator/wal-replica pullPolicy: Always cloudnative-pg: namespaceOverride: cnpg-system diff --git a/operator/src/api/preview/documentdb_types.go b/operator/src/api/preview/documentdb_types.go index bdc21415..9498cb25 100644 --- a/operator/src/api/preview/documentdb_types.go +++ b/operator/src/api/preview/documentdb_types.go @@ -63,6 +63,9 @@ type DocumentDBSpec struct { Timeouts Timeouts `json:"timeouts,omitempty"` + // TLS configures certificate management for DocumentDB components. + TLS *TLSConfiguration `json:"tls,omitempty"` + // Overrides default log level for the DocumentDB cluster. LogLevel string `json:"logLevel,omitempty"` @@ -137,6 +140,60 @@ type Timeouts struct { StopDelay int32 `json:"stopDelay,omitempty"` } +// TLSConfiguration aggregates TLS settings across DocumentDB components. +type TLSConfiguration struct { + // Gateway configures TLS for the gateway sidecar (Phase 1: certificate provisioning only). + Gateway *GatewayTLS `json:"gateway,omitempty"` + + // Postgres configures TLS for the Postgres server (placeholder for future phases). + Postgres *PostgresTLS `json:"postgres,omitempty"` + + // GlobalEndpoints configures TLS for global endpoints (placeholder for future phases). + GlobalEndpoints *GlobalEndpointsTLS `json:"globalEndpoints,omitempty"` +} + +// GatewayTLS defines TLS configuration for the gateway sidecar (Phase 1: certificate provisioning only) +type GatewayTLS struct { + // Mode selects the TLS management strategy. + // +kubebuilder:validation:Enum=Disabled;SelfSigned;CertManager;Provided + Mode string `json:"mode,omitempty"` + + // CertManager config when Mode=CertManager. + CertManager *CertManagerTLS `json:"certManager,omitempty"` + + // Provided secret reference when Mode=Provided. + Provided *ProvidedTLS `json:"provided,omitempty"` +} + +// PostgresTLS acts as a placeholder for future Postgres TLS settings. +type PostgresTLS struct{} + +// GlobalEndpointsTLS acts as a placeholder for future global endpoint TLS settings. +type GlobalEndpointsTLS struct{} + +// CertManagerTLS holds parameters for cert-manager driven certificates. +type CertManagerTLS struct { + IssuerRef IssuerRef `json:"issuerRef"` + // DNSNames for the certificate SANs. If empty, operator will add Service DNS names. + DNSNames []string `json:"dnsNames,omitempty"` + // SecretName optional explicit name for the target secret. If empty a default is chosen. + SecretName string `json:"secretName,omitempty"` +} + +// ProvidedTLS references an existing secret that contains tls.crt/tls.key (and optional ca.crt). +type ProvidedTLS struct { + SecretName string `json:"secretName"` +} + +// IssuerRef references a cert-manager Issuer or ClusterIssuer. +type IssuerRef struct { + Name string `json:"name"` + // Kind of issuer (Issuer or ClusterIssuer). Defaults to Issuer. + Kind string `json:"kind,omitempty"` + // Group defaults to cert-manager.io + Group string `json:"group,omitempty"` +} + // DocumentDBStatus defines the observed state of DocumentDB. type DocumentDBStatus struct { // Status reflects the status field from the underlying CNPG Cluster. @@ -144,6 +201,16 @@ type DocumentDBStatus struct { ConnectionString string `json:"connectionString,omitempty"` TargetPrimary string `json:"targetPrimary,omitempty"` LocalPrimary string `json:"localPrimary,omitempty"` + + // TLS reports gateway TLS provisioning status (Phase 1). + TLS *TLSStatus `json:"tls,omitempty"` +} + +// TLSStatus captures readiness and secret information. +type TLSStatus struct { + Ready bool `json:"ready,omitempty"` + SecretName string `json:"secretName,omitempty"` + Message string `json:"message,omitempty"` } // +kubebuilder:printcolumn:name="Status",type=string,JSONPath=".status.status",description="CNPG Cluster Status" diff --git a/operator/src/api/preview/zz_generated.deepcopy.go b/operator/src/api/preview/zz_generated.deepcopy.go index 410f21b5..2f11b09c 100644 --- a/operator/src/api/preview/zz_generated.deepcopy.go +++ b/operator/src/api/preview/zz_generated.deepcopy.go @@ -12,6 +12,26 @@ import ( ) // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CertManagerTLS) DeepCopyInto(out *CertManagerTLS) { + *out = *in + out.IssuerRef = in.IssuerRef + if in.DNSNames != nil { + in, out := &in.DNSNames, &out.DNSNames + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CertManagerTLS. +func (in *CertManagerTLS) DeepCopy() *CertManagerTLS { + if in == nil { + return nil + } + out := new(CertManagerTLS) + in.DeepCopyInto(out) + return out +} + func (in *Backup) DeepCopyInto(out *Backup) { *out = *in out.TypeMeta = in.TypeMeta @@ -179,7 +199,7 @@ func (in *DocumentDB) DeepCopyInto(out *DocumentDB) { out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) in.Spec.DeepCopyInto(&out.Spec) - out.Status = in.Status + in.Status.DeepCopyInto(&out.Status) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DocumentDB. @@ -243,6 +263,11 @@ func (in *DocumentDBSpec) DeepCopyInto(out *DocumentDBSpec) { } out.ExposeViaService = in.ExposeViaService out.Timeouts = in.Timeouts + if in.TLS != nil { + in, out := &in.TLS, &out.TLS + *out = new(TLSConfiguration) + (*in).DeepCopyInto(*out) + } if in.Bootstrap != nil { in, out := &in.Bootstrap, &out.Bootstrap *out = new(BootstrapConfiguration) @@ -268,6 +293,11 @@ func (in *DocumentDBSpec) DeepCopy() *DocumentDBSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DocumentDBStatus) DeepCopyInto(out *DocumentDBStatus) { *out = *in + if in.TLS != nil { + in, out := &in.TLS, &out.TLS + *out = new(TLSStatus) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DocumentDBStatus. @@ -296,6 +326,90 @@ func (in *ExposeViaService) DeepCopy() *ExposeViaService { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GatewayTLS) DeepCopyInto(out *GatewayTLS) { + *out = *in + if in.CertManager != nil { + in, out := &in.CertManager, &out.CertManager + *out = new(CertManagerTLS) + (*in).DeepCopyInto(*out) + } + if in.Provided != nil { + in, out := &in.Provided, &out.Provided + *out = new(ProvidedTLS) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GatewayTLS. +func (in *GatewayTLS) DeepCopy() *GatewayTLS { + if in == nil { + return nil + } + out := new(GatewayTLS) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GlobalEndpointsTLS) DeepCopyInto(out *GlobalEndpointsTLS) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GlobalEndpointsTLS. +func (in *GlobalEndpointsTLS) DeepCopy() *GlobalEndpointsTLS { + if in == nil { + return nil + } + out := new(GlobalEndpointsTLS) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *IssuerRef) DeepCopyInto(out *IssuerRef) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new IssuerRef. +func (in *IssuerRef) DeepCopy() *IssuerRef { + if in == nil { + return nil + } + out := new(IssuerRef) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PostgresTLS) DeepCopyInto(out *PostgresTLS) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresTLS. +func (in *PostgresTLS) DeepCopy() *PostgresTLS { + if in == nil { + return nil + } + out := new(PostgresTLS) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ProvidedTLS) DeepCopyInto(out *ProvidedTLS) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ProvidedTLS. +func (in *ProvidedTLS) DeepCopy() *ProvidedTLS { + if in == nil { + return nil + } + out := new(ProvidedTLS) + in.DeepCopyInto(out) + return out +} + func (in *RecoveryConfiguration) DeepCopyInto(out *RecoveryConfiguration) { *out = *in in.Backup.DeepCopyInto(&out.Backup) @@ -328,6 +442,25 @@ func (in *Resource) DeepCopy() *Resource { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *TLSConfiguration) DeepCopyInto(out *TLSConfiguration) { + *out = *in + if in.Gateway != nil { + in, out := &in.Gateway, &out.Gateway + *out = new(GatewayTLS) + (*in).DeepCopyInto(*out) + } + if in.Postgres != nil { + in, out := &in.Postgres, &out.Postgres + *out = new(PostgresTLS) + **out = **in + } + if in.GlobalEndpoints != nil { + in, out := &in.GlobalEndpoints, &out.GlobalEndpoints + *out = new(GlobalEndpointsTLS) + **out = **in + } +} + func (in *ScheduledBackup) DeepCopyInto(out *ScheduledBackup) { *out = *in out.TypeMeta = in.TypeMeta @@ -397,6 +530,16 @@ func (in *ScheduledBackupSpec) DeepCopyInto(out *ScheduledBackupSpec) { } } +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TLSConfiguration. +func (in *TLSConfiguration) DeepCopy() *TLSConfiguration { + if in == nil { + return nil + } + out := new(TLSConfiguration) + in.DeepCopyInto(out) + return out +} + // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ScheduledBackupSpec. func (in *ScheduledBackupSpec) DeepCopy() *ScheduledBackupSpec { if in == nil { @@ -408,6 +551,20 @@ func (in *ScheduledBackupSpec) DeepCopy() *ScheduledBackupSpec { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *TLSStatus) DeepCopyInto(out *TLSStatus) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TLSStatus. +func (in *TLSStatus) DeepCopy() *TLSStatus { + if in == nil { + return nil + } + out := new(TLSStatus) + in.DeepCopyInto(out) + return out +} + func (in *ScheduledBackupStatus) DeepCopyInto(out *ScheduledBackupStatus) { *out = *in if in.LastScheduledTime != nil { diff --git a/operator/src/cmd/main.go b/operator/src/cmd/main.go index 966527f0..dfbc0fbc 100644 --- a/operator/src/cmd/main.go +++ b/operator/src/cmd/main.go @@ -24,6 +24,7 @@ import ( metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" "sigs.k8s.io/controller-runtime/pkg/webhook" + cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" dbpreview "github.com/microsoft/documentdb-operator/api/preview" "github.com/microsoft/documentdb-operator/internal/controller" @@ -41,6 +42,7 @@ func init() { utilruntime.Must(dbpreview.AddToScheme(scheme)) utilruntime.Must(cnpgv1.AddToScheme(scheme)) + utilruntime.Must(cmapi.AddToScheme(scheme)) utilruntime.Must(fleetv1alpha1.AddToScheme(scheme)) // +kubebuilder:scaffold:scheme } @@ -193,6 +195,14 @@ func main() { os.Exit(1) } + if err = (&controller.CertificateReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "Certificate") + os.Exit(1) + } + if err = (&controller.DocumentDBReconciler{ Client: mgr.GetClient(), Scheme: mgr.GetScheme(), diff --git a/operator/src/config/crd/bases/db.microsoft.com_documentdbs.yaml b/operator/src/config/crd/bases/db.microsoft.com_documentdbs.yaml index db773b8c..931c8d15 100644 --- a/operator/src/config/crd/bases/db.microsoft.com_documentdbs.yaml +++ b/operator/src/config/crd/bases/db.microsoft.com_documentdbs.yaml @@ -199,6 +199,72 @@ spec: minimum: 0 type: integer type: object + tls: + description: TLS configures certificate management for DocumentDB + components. + properties: + gateway: + description: 'Gateway configures TLS for the gateway sidecar (Phase + 1: certificate provisioning only).' + properties: + certManager: + description: CertManager config when Mode=CertManager. + properties: + dnsNames: + description: DNSNames for the certificate SANs. If empty, + operator will add Service DNS names. + items: + type: string + type: array + issuerRef: + description: IssuerRef references a cert-manager Issuer + or ClusterIssuer. + properties: + group: + description: Group defaults to cert-manager.io + type: string + kind: + description: Kind of issuer (Issuer or ClusterIssuer). + Defaults to Issuer. + type: string + name: + type: string + required: + - name + type: object + secretName: + description: SecretName optional explicit name for the + target secret. If empty a default is chosen. + type: string + required: + - issuerRef + type: object + mode: + description: Mode selects the TLS management strategy. + enum: + - Disabled + - SelfSigned + - CertManager + - Provided + type: string + provided: + description: Provided secret reference when Mode=Provided. + properties: + secretName: + type: string + required: + - secretName + type: object + type: object + globalEndpoints: + description: GlobalEndpoints configures TLS for global endpoints + (placeholder for future phases). + type: object + postgres: + description: Postgres configures TLS for the Postgres server (placeholder + for future phases). + type: object + type: object walReplicaPluginName: description: WalReplicaPluginName is the name of the wal replica plugin to use. @@ -219,6 +285,16 @@ spec: description: Status reflects the status field from the underlying CNPG Cluster. type: string + tls: + description: TLS reports gateway TLS provisioning status (Phase 1). + properties: + message: + type: string + ready: + type: boolean + secretName: + type: string + type: object targetPrimary: type: string type: object diff --git a/operator/src/config/samples/db_preview_documentdb.yaml b/operator/src/config/samples/db_preview_documentdb.yaml index 4976e71f..b41e21c1 100644 --- a/operator/src/config/samples/db_preview_documentdb.yaml +++ b/operator/src/config/samples/db_preview_documentdb.yaml @@ -6,4 +6,29 @@ metadata: app.kubernetes.io/managed-by: kustomize name: documentdb-sample spec: - # TODO(user): Add fields here + nodeCount: 1 + instancesPerNode: 1 + resource: + pvcSize: 1Gi + documentDBImage: ghcr.io/microsoft/documentdb/documentdb-local:16 + exposeViaService: + serviceType: ClusterIP + # Uncomment one TLS mode below + # tls: + # gateway: + # mode: SelfSigned + # tls: + # gateway: + # mode: Provided + # provided: + # secretName: my-existing-tls-secret + # tls: + # gateway: + # mode: CertManager + # certManager: + # issuerRef: + # name: selfsigned + # kind: ClusterIssuer + # # dnsNames optional; service DNS names are auto-added + # dnsNames: + # - extra.example.local diff --git a/operator/src/go.mod b/operator/src/go.mod index ec4b2a1f..9f2da63b 100644 --- a/operator/src/go.mod +++ b/operator/src/go.mod @@ -5,22 +5,25 @@ go 1.23.5 godebug default=go1.23 require ( + github.com/cert-manager/cert-manager v1.14.4 github.com/cloudnative-pg/cloudnative-pg v1.25.1 github.com/cloudnative-pg/machinery v0.1.0 github.com/go-logr/logr v1.4.2 + github.com/stretchr/testify v1.10.0 github.com/onsi/ginkgo/v2 v2.22.2 github.com/onsi/gomega v1.36.2 go.goms.io/fleet-networking v0.3.0 k8s.io/api v0.32.2 k8s.io/apimachinery v0.32.2 k8s.io/client-go v0.32.2 + k8s.io/utils v0.0.0-20241210054802-24370beab758 sigs.k8s.io/controller-runtime v0.20.4 ) require ( cel.dev/expr v0.19.0 // indirect github.com/antlr4-go/antlr/v4 v4.13.0 // indirect - github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a // indirect + github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/blang/semver/v4 v4.0.0 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect @@ -62,6 +65,7 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect github.com/pkg/errors v0.9.1 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.80.1 // indirect github.com/prometheus/client_golang v1.21.0 // indirect github.com/prometheus/client_model v0.6.1 // indirect @@ -104,8 +108,8 @@ require ( k8s.io/component-base v0.32.2 // indirect k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kube-openapi v0.0.0-20241212222426-2c72e554b1e7 // indirect - k8s.io/utils v0.0.0-20241210054802-24370beab758 // indirect sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.0 // indirect + sigs.k8s.io/gateway-api v1.0.0 // indirect sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.5.0 // indirect sigs.k8s.io/yaml v1.4.0 // indirect diff --git a/operator/src/go.sum b/operator/src/go.sum index 5c6ccbbb..8b199877 100644 --- a/operator/src/go.sum +++ b/operator/src/go.sum @@ -4,14 +4,16 @@ github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8 github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= -github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a h1:idn718Q4B6AGu/h5Sxe66HYVdqdGu2l9Iebqhi/AEoA= -github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= +github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so= +github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cert-manager/cert-manager v1.14.4 h1:DLXIZHx3jhkViYfobXo+N7/od/oj4YgG6AJw4ORJnYs= +github.com/cert-manager/cert-manager v1.14.4/go.mod h1:d+CBeRu5MbpHTfXkkiiamUhnfdvhbThoOPwilU4UM98= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cloudnative-pg/barman-cloud v0.1.0 h1:e/z52CehMBIh1LjZqNBJnncWJbS+1JYvRMBR8Js6Uiw= @@ -27,8 +29,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU= github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= -github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U= -github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/evanphx/json-patch v5.7.0+incompatible h1:vgGkfT/9f8zE6tvSCe74nfpAVDQ2tG6yudJd8LBksgI= +github.com/evanphx/json-patch v5.7.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU= github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= @@ -50,6 +52,7 @@ github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4= github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= @@ -265,6 +268,8 @@ sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.0 h1:CPT0ExVicCzcp sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.0/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw= sigs.k8s.io/controller-runtime v0.20.4 h1:X3c+Odnxz+iPTRobG4tp092+CvBU9UK0t/bRf+n0DGU= sigs.k8s.io/controller-runtime v0.20.4/go.mod h1:xg2XB0K5ShQzAgsoujxuKN4LNXR2LfwwHsPj7Iaw+XY= +sigs.k8s.io/gateway-api v1.0.0 h1:iPTStSv41+d9p0xFydll6d7f7MOBGuqXM6p2/zVYMAs= +sigs.k8s.io/gateway-api v1.0.0/go.mod h1:4cUgr0Lnp5FZ0Cdq8FdRwCvpiWws7LVhLHGIudLlf4c= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/structured-merge-diff/v4 v4.5.0 h1:nbCitCK2hfnhyiKo6uf2HxUPTCodY6Qaf85SbDIaMBk= diff --git a/operator/src/internal/cnpg/cnpg_cluster.go b/operator/src/internal/cnpg/cnpg_cluster.go index 3b8917cb..76226737 100644 --- a/operator/src/internal/cnpg/cnpg_cluster.go +++ b/operator/src/internal/cnpg/cnpg_cluster.go @@ -9,6 +9,7 @@ import ( cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" "github.com/go-logr/logr" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/pointer" dbpreview "github.com/microsoft/documentdb-operator/api/preview" util "github.com/microsoft/documentdb-operator/internal/utils" @@ -60,15 +61,18 @@ func GetCnpgClusterSpec(req ctrl.Request, documentdb *dbpreview.DocumentDB, docu Size: documentdb.Spec.Resource.Storage.PvcSize, }, InheritedMetadata: getInheritedMetadataLabels(documentdb.Name), - Plugins: []cnpgv1.PluginConfiguration{ - { - Name: sidecarPluginName, - Parameters: map[string]string{ - "gatewayImage": gatewayImage, - "documentDbCredentialSecret": credentialSecretName, - }, - }, - }, + Plugins: func() []cnpgv1.PluginConfiguration { + params := map[string]string{"gatewayImage": gatewayImage} + // If TLS is ready, surface secret name to plugin so it can mount certs. + if documentdb.Status.TLS != nil && documentdb.Status.TLS.Ready && documentdb.Status.TLS.SecretName != "" { + params["gatewayTLSSecret"] = documentdb.Status.TLS.SecretName + } + return []cnpgv1.PluginConfiguration{{ + Name: sidecarPluginName, + Enabled: pointer.Bool(true), + Parameters: params, + }} + }(), PostgresUID: 105, PostgresGID: 108, PostgresConfiguration: cnpgv1.PostgresConfiguration{ diff --git a/operator/src/internal/controller/certificate_controller.go b/operator/src/internal/controller/certificate_controller.go new file mode 100644 index 00000000..cecb5cce --- /dev/null +++ b/operator/src/internal/controller/certificate_controller.go @@ -0,0 +1,376 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package controller + +import ( + "context" + "time" + + cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" + cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/retry" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/log" + + dbpreview "github.com/microsoft/documentdb-operator/api/preview" + util "github.com/microsoft/documentdb-operator/internal/utils" +) + +// CertificateReconciler manages certificate lifecycle for DocumentDB components. +// Today it provisions gateway TLS assets; future work can layer in additional surfaces. +type CertificateReconciler struct { + client.Client + Scheme *runtime.Scheme +} + +// +kubebuilder:rbac:groups=db.microsoft.com,resources=documentdbs,verbs=get;list;watch +// +kubebuilder:rbac:groups=db.microsoft.com,resources=documentdbs/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=cert-manager.io,resources=certificates;issuers,verbs=get;list;watch;create;update;patch +// +kubebuilder:rbac:groups=cert-manager.io,resources=certificates/status;issuers/status,verbs=get +// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch + +func (r *CertificateReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + logger := log.FromContext(ctx) + + ddb := &dbpreview.DocumentDB{} + if err := r.Get(ctx, req.NamespacedName, ddb); err != nil { + if errors.IsNotFound(err) { + return ctrl.Result{}, nil + } + return ctrl.Result{}, err + } + + res, err := r.reconcileCertificates(ctx, ddb) + if err != nil { + logger.Error(err, "failed to reconcile certificate resources") + } + return res, err +} + +func (r *CertificateReconciler) reconcileCertificates(ctx context.Context, ddb *dbpreview.DocumentDB) (ctrl.Result, error) { + if ddb.Spec.TLS == nil || ddb.Spec.TLS.Gateway == nil { + return ctrl.Result{}, nil + } + + gatewayCfg := ddb.Spec.TLS.Gateway + if gatewayCfg.Mode == "" || gatewayCfg.Mode == "Disabled" { + if ddb.Status.TLS != nil && ddb.Status.TLS.Ready { + if err := r.updateTLSStatus(ctx, ddb, func(status *dbpreview.TLSStatus) { + status.Ready = false + status.Message = "Gateway TLS disabled" + }); err != nil { + return ctrl.Result{}, err + } + } + return ctrl.Result{}, nil + } + + if ddb.Status.TLS == nil { + if err := r.updateTLSStatus(ctx, ddb, func(status *dbpreview.TLSStatus) { + status.Ready = false + }); err != nil { + return ctrl.Result{}, err + } + } + + switch gatewayCfg.Mode { + case "SelfSigned": + return r.ensureSelfSignedCert(ctx, ddb) + case "Provided": + return r.ensureProvidedSecret(ctx, ddb) + case "CertManager": + return r.ensureCertManagerManagedCert(ctx, ddb) + default: + return ctrl.Result{}, nil + } +} + +func (r *CertificateReconciler) ensureProvidedSecret(ctx context.Context, ddb *dbpreview.DocumentDB) (ctrl.Result, error) { + gatewayCfg := ddb.Spec.TLS.Gateway + if gatewayCfg == nil || gatewayCfg.Provided == nil || gatewayCfg.Provided.SecretName == "" { + if err := r.updateTLSStatus(ctx, ddb, func(status *dbpreview.TLSStatus) { + status.Message = "Provided TLS secret name missing" + status.Ready = false + }); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } + + secret := &corev1.Secret{} + if err := r.Get(ctx, types.NamespacedName{Name: gatewayCfg.Provided.SecretName, Namespace: ddb.Namespace}, secret); err != nil { + if errors.IsNotFound(err) { + if err := r.updateTLSStatus(ctx, ddb, func(status *dbpreview.TLSStatus) { + status.Ready = false + status.SecretName = gatewayCfg.Provided.SecretName + status.Message = "Waiting for provided TLS secret" + }); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: RequeueAfterShort}, nil + } + return ctrl.Result{}, err + } + + if _, crtOk := secret.Data["tls.crt"]; !crtOk { + if err := r.updateTLSStatus(ctx, ddb, func(status *dbpreview.TLSStatus) { + status.Ready = false + status.Message = "Provided secret missing tls.crt" + status.SecretName = gatewayCfg.Provided.SecretName + }); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: RequeueAfterShort}, nil + } + if _, keyOk := secret.Data["tls.key"]; !keyOk { + if err := r.updateTLSStatus(ctx, ddb, func(status *dbpreview.TLSStatus) { + status.Ready = false + status.Message = "Provided secret missing tls.key" + status.SecretName = gatewayCfg.Provided.SecretName + }); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: RequeueAfterShort}, nil + } + + if err := r.updateTLSStatus(ctx, ddb, func(status *dbpreview.TLSStatus) { + status.Ready = true + status.SecretName = gatewayCfg.Provided.SecretName + status.Message = "Using provided TLS secret" + }); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil +} + +func (r *CertificateReconciler) ensureCertManagerManagedCert(ctx context.Context, ddb *dbpreview.DocumentDB) (ctrl.Result, error) { + gatewayCfg := ddb.Spec.TLS.Gateway + if gatewayCfg == nil || gatewayCfg.CertManager == nil { + if err := r.updateTLSStatus(ctx, ddb, func(status *dbpreview.TLSStatus) { + status.Ready = false + status.Message = "CertManager configuration missing" + }); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } + + cmCfg := gatewayCfg.CertManager + + issuerRef := cmmeta.ObjectReference{Name: cmCfg.IssuerRef.Name} + if cmCfg.IssuerRef.Kind != "" { + issuerRef.Kind = cmCfg.IssuerRef.Kind + } else { + issuerRef.Kind = "Issuer" + } + if cmCfg.IssuerRef.Group != "" { + issuerRef.Group = cmCfg.IssuerRef.Group + } else { + issuerRef.Group = "cert-manager.io" + } + + secretName := cmCfg.SecretName + if secretName == "" { + secretName = ddb.Name + "-gateway-cert-tls" + } + + serviceBase := util.DOCUMENTDB_SERVICE_PREFIX + ddb.Name + baseDNS := []string{serviceBase, serviceBase + "." + ddb.Namespace, serviceBase + "." + ddb.Namespace + ".svc"} + dnsSet := map[string]struct{}{} + finalDNS := []string{} + for _, n := range cmCfg.DNSNames { + if _, ok := dnsSet[n]; !ok && n != "" { + dnsSet[n] = struct{}{} + finalDNS = append(finalDNS, n) + } + } + for _, n := range baseDNS { + if _, ok := dnsSet[n]; !ok { + dnsSet[n] = struct{}{} + finalDNS = append(finalDNS, n) + } + } + + certName := ddb.Name + "-gateway-cert" + cert := &cmapi.Certificate{} + if err := r.Get(ctx, types.NamespacedName{Name: certName, Namespace: ddb.Namespace}, cert); err != nil { + if !errors.IsNotFound(err) { + return ctrl.Result{}, err + } + + cert = &cmapi.Certificate{ + ObjectMeta: metav1.ObjectMeta{Name: certName, Namespace: ddb.Namespace}, + Spec: cmapi.CertificateSpec{ + SecretName: secretName, + DNSNames: finalDNS, + IssuerRef: issuerRef, + Duration: &metav1.Duration{Duration: 90 * 24 * time.Hour}, + RenewBefore: &metav1.Duration{Duration: 15 * 24 * time.Hour}, + Usages: []cmapi.KeyUsage{cmapi.UsageServerAuth}, + }, + } + if err := controllerutil.SetControllerReference(ddb, cert, r.Scheme); err != nil { + return ctrl.Result{}, err + } + if err := r.Create(ctx, cert); err != nil { + return ctrl.Result{}, err + } + if err := r.updateTLSStatus(ctx, ddb, func(status *dbpreview.TLSStatus) { + status.Ready = false + status.SecretName = secretName + status.Message = "Creating cert-manager certificate" + }); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: RequeueAfterShort}, nil + } + + for _, cond := range cert.Status.Conditions { + if cond.Type == cmapi.CertificateConditionReady && cond.Status == cmmeta.ConditionTrue { + if !ddb.Status.TLS.Ready { + if err := r.updateTLSStatus(ctx, ddb, func(status *dbpreview.TLSStatus) { + status.Ready = true + status.SecretName = cert.Spec.SecretName + status.Message = "Gateway TLS certificate ready (cert-manager)" + }); err != nil { + return ctrl.Result{}, err + } + } + return ctrl.Result{}, nil + } + } + + if err := r.updateTLSStatus(ctx, ddb, func(status *dbpreview.TLSStatus) { + status.Ready = false + status.SecretName = cert.Spec.SecretName + status.Message = "Waiting for cert-manager certificate to become ready" + }); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: RequeueAfterShort}, nil +} + +func (r *CertificateReconciler) ensureSelfSignedCert(ctx context.Context, ddb *dbpreview.DocumentDB) (ctrl.Result, error) { + namespace := ddb.Namespace + issuerName := ddb.Name + "-gateway-selfsigned" + certName := ddb.Name + "-gateway-cert" + secretName := certName + "-tls" + + issuer := &cmapi.Issuer{} + if err := r.Get(ctx, types.NamespacedName{Name: issuerName, Namespace: namespace}, issuer); err != nil { + if !errors.IsNotFound(err) { + return ctrl.Result{}, err + } + + issuer = &cmapi.Issuer{ + ObjectMeta: metav1.ObjectMeta{Name: issuerName, Namespace: namespace}, + Spec: cmapi.IssuerSpec{IssuerConfig: cmapi.IssuerConfig{SelfSigned: &cmapi.SelfSignedIssuer{}}}, + } + if err := controllerutil.SetControllerReference(ddb, issuer, r.Scheme); err != nil { + return ctrl.Result{}, err + } + if err := r.Create(ctx, issuer); err != nil { + return ctrl.Result{}, err + } + } + + serviceBase := util.DOCUMENTDB_SERVICE_PREFIX + ddb.Name + dnsNames := []string{ + serviceBase, + serviceBase + "." + namespace, + serviceBase + "." + namespace + ".svc", + } + + cert := &cmapi.Certificate{} + if err := r.Get(ctx, types.NamespacedName{Name: certName, Namespace: namespace}, cert); err != nil { + if !errors.IsNotFound(err) { + return ctrl.Result{}, err + } + + cert = &cmapi.Certificate{ + ObjectMeta: metav1.ObjectMeta{Name: certName, Namespace: namespace}, + Spec: cmapi.CertificateSpec{ + SecretName: secretName, + Duration: &metav1.Duration{Duration: 90 * 24 * time.Hour}, + RenewBefore: &metav1.Duration{Duration: 15 * 24 * time.Hour}, + DNSNames: dnsNames, + IssuerRef: cmmeta.ObjectReference{Name: issuerName, Kind: "Issuer", Group: "cert-manager.io"}, + Usages: []cmapi.KeyUsage{cmapi.UsageServerAuth}, + }, + } + if err := controllerutil.SetControllerReference(ddb, cert, r.Scheme); err != nil { + return ctrl.Result{}, err + } + if err := r.Create(ctx, cert); err != nil { + return ctrl.Result{}, err + } + if err := r.updateTLSStatus(ctx, ddb, func(status *dbpreview.TLSStatus) { + status.Ready = false + status.SecretName = secretName + status.Message = "Creating self-signed certificate" + }); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: RequeueAfterShort}, nil + } + + for _, cond := range cert.Status.Conditions { + if cond.Type == cmapi.CertificateConditionReady && cond.Status == cmmeta.ConditionTrue { + if !ddb.Status.TLS.Ready { + if err := r.updateTLSStatus(ctx, ddb, func(status *dbpreview.TLSStatus) { + status.Ready = true + status.SecretName = cert.Spec.SecretName + status.Message = "Gateway TLS certificate ready" + }); err != nil { + return ctrl.Result{}, err + } + } + return ctrl.Result{}, nil + } + } + + if err := r.updateTLSStatus(ctx, ddb, func(status *dbpreview.TLSStatus) { + status.Ready = false + status.SecretName = cert.Spec.SecretName + status.Message = "Waiting for gateway TLS certificate to become ready" + }); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: RequeueAfterShort}, nil +} + +func (r *CertificateReconciler) updateTLSStatus(ctx context.Context, ddb *dbpreview.DocumentDB, mutate func(*dbpreview.TLSStatus)) error { + key := types.NamespacedName{Name: ddb.Name, Namespace: ddb.Namespace} + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + current := &dbpreview.DocumentDB{} + if err := r.Get(ctx, key, current); err != nil { + return err + } + if current.Status.TLS == nil { + current.Status.TLS = &dbpreview.TLSStatus{} + } + mutate(current.Status.TLS) + if err := r.Status().Update(ctx, current); err != nil { + return err + } + ddb.Status = current.Status + return nil + }) +} + +func (r *CertificateReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&dbpreview.DocumentDB{}). + Owns(&cmapi.Certificate{}). + Owns(&cmapi.Issuer{}). + Named("certificate-controller"). + Complete(r) +} diff --git a/operator/src/internal/controller/documentdb_controller.go b/operator/src/internal/controller/documentdb_controller.go index ddf39466..cdf5d9ce 100644 --- a/operator/src/internal/controller/documentdb_controller.go +++ b/operator/src/internal/controller/documentdb_controller.go @@ -74,6 +74,7 @@ func (r *DocumentDBReconciler) Reconcile(ctx context.Context, req ctrl.Request) } var documentDbServiceIp string + // Only create/manage the service if ExposeViaService is configured if documentdb.Spec.ExposeViaService.ServiceType != "" { serviceType := corev1.ServiceTypeClusterIP @@ -143,11 +144,43 @@ func (r *DocumentDBReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Update DocumentDB status with CNPG Cluster status and connection string if err := r.Client.Get(ctx, types.NamespacedName{Name: desiredCnpgCluster.Name, Namespace: req.Namespace}, currentCnpgCluster); err == nil { - if currentCnpgCluster.Status.Phase != "" { - documentdb.Status.Status = currentCnpgCluster.Status.Phase - if documentDbServiceIp != "" { - documentdb.Status.ConnectionString = util.GenerateConnectionString(documentdb, documentDbServiceIp) + // Ensure plugin enabled and TLS secret parameter kept in sync once ready + if documentdb.Status.TLS != nil && documentdb.Status.TLS.Ready && documentdb.Status.TLS.SecretName != "" { + logger.Info("Syncing TLS secret into CNPG Cluster plugin parameters", "secret", documentdb.Status.TLS.SecretName) + updated := false + for i := range currentCnpgCluster.Spec.Plugins { + p := ¤tCnpgCluster.Spec.Plugins[i] + if p.Name == desiredCnpgCluster.Spec.Plugins[0].Name { // target our sidecar plugin + if p.Enabled == nil || !*p.Enabled { + trueVal := true + p.Enabled = &trueVal + updated = true + logger.Info("Enabled sidecar plugin") + } + if p.Parameters == nil { + p.Parameters = map[string]string{} + } + currentVal := p.Parameters["gatewayTLSSecret"] + if currentVal != documentdb.Status.TLS.SecretName { + p.Parameters["gatewayTLSSecret"] = documentdb.Status.TLS.SecretName + updated = true + logger.Info("Updated gatewayTLSSecret parameter", "old", currentVal, "new", documentdb.Status.TLS.SecretName) + } + } + } + if updated { + if currentCnpgCluster.Annotations == nil { + currentCnpgCluster.Annotations = map[string]string{} + } + currentCnpgCluster.Annotations["db.microsoft.com/gateway-tls-rev"] = time.Now().Format(time.RFC3339Nano) + if err := r.Client.Update(ctx, currentCnpgCluster); err == nil { + logger.Info("Patched CNPG Cluster with TLS settings; requeueing for pod update") + return ctrl.Result{RequeueAfter: RequeueAfterShort}, nil + } else { + logger.Error(err, "Failed to update CNPG Cluster with TLS settings") + } } + if err := r.Status().Update(ctx, documentdb); err != nil { logger.Error(err, "Failed to update DocumentDB status and connection string") } @@ -181,6 +214,14 @@ func (r *DocumentDBReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{RequeueAfter: RequeueAfterShort}, nil } } + // Update status connection string + if documentDbServiceIp != "" { + trustTLS := documentdb.Status.TLS != nil && documentdb.Status.TLS.Ready + documentdb.Status.ConnectionString = util.GenerateConnectionString(documentdb, documentDbServiceIp, trustTLS) + } + if err := r.Status().Update(ctx, documentdb); err != nil { + logger.Error(err, "Failed to update DocumentDB status and connection string") + } } return ctrl.Result{RequeueAfter: RequeueAfterLong}, nil diff --git a/operator/src/internal/controller/documentdb_controller_test.go b/operator/src/internal/controller/documentdb_controller_test.go new file mode 100644 index 00000000..b3a230c1 --- /dev/null +++ b/operator/src/internal/controller/documentdb_controller_test.go @@ -0,0 +1,143 @@ +package controller + +import ( + "context" + "testing" + "time" + + cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" + cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + dbpreview "github.com/microsoft/documentdb-operator/api/preview" + util "github.com/microsoft/documentdb-operator/internal/utils" +) + +// helper to build TLS reconciler with objects +func buildCertificateReconciler(t *testing.T, objs ...runtime.Object) *CertificateReconciler { + scheme := runtime.NewScheme() + require.NoError(t, dbpreview.AddToScheme(scheme)) + require.NoError(t, cmapi.AddToScheme(scheme)) + require.NoError(t, corev1.AddToScheme(scheme)) + builder := fake.NewClientBuilder().WithScheme(scheme) + if len(objs) > 0 { + builder = builder.WithRuntimeObjects(objs...) + clientObjs := make([]client.Object, 0, len(objs)) + for _, obj := range objs { + if co, ok := obj.(client.Object); ok { + clientObjs = append(clientObjs, co) + } + } + if len(clientObjs) > 0 { + builder = builder.WithStatusSubresource(clientObjs...) + } + } + c := builder.Build() + return &CertificateReconciler{Client: c, Scheme: scheme} +} + +func baseDocumentDB(name, ns string) *dbpreview.DocumentDB { + return &dbpreview.DocumentDB{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns}, + Spec: dbpreview.DocumentDBSpec{ + NodeCount: 1, + InstancesPerNode: 1, + Resource: dbpreview.Resource{PvcSize: "1Gi"}, + DocumentDBImage: "test-image", + ExposeViaService: dbpreview.ExposeViaService{ServiceType: "ClusterIP"}, + }, + } +} + +func TestEnsureProvidedSecret(t *testing.T) { + ctx := context.Background() + ddb := baseDocumentDB("ddb-prov", "default") + ddb.Spec.TLS = &dbpreview.TLSConfiguration{Gateway: &dbpreview.GatewayTLS{Mode: "Provided", Provided: &dbpreview.ProvidedTLS{SecretName: "mycert"}}} + // Secret missing first + r := buildCertificateReconciler(t, ddb) + res, err := r.reconcileCertificates(ctx, ddb) + require.NoError(t, err) + require.Equal(t, RequeueAfterShort, res.RequeueAfter) + require.False(t, ddb.Status.TLS.Ready, "Should not be ready until secret exists") + + // Create secret with required keys then reconcile again + secret := &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "mycert", Namespace: "default"}, Data: map[string][]byte{"tls.crt": []byte("crt"), "tls.key": []byte("key")}} + require.NoError(t, r.Client.Create(ctx, secret)) + res, err = r.reconcileCertificates(ctx, ddb) + require.NoError(t, err) + require.Zero(t, res.RequeueAfter) + require.True(t, ddb.Status.TLS.Ready, "Provided secret should mark TLS ready") + require.Equal(t, "mycert", ddb.Status.TLS.SecretName) +} + +func TestEnsureCertManagerManagedCert(t *testing.T) { + ctx := context.Background() + ddb := baseDocumentDB("ddb-cm", "default") + ddb.Spec.TLS = &dbpreview.TLSConfiguration{Gateway: &dbpreview.GatewayTLS{Mode: "CertManager", CertManager: &dbpreview.CertManagerTLS{IssuerRef: dbpreview.IssuerRef{Name: "test-issuer", Kind: "Issuer"}, DNSNames: []string{"custom.example"}}}} + ddb.Status.TLS = &dbpreview.TLSStatus{} + issuer := &cmapi.Issuer{ObjectMeta: metav1.ObjectMeta{Name: "test-issuer", Namespace: "default"}, Spec: cmapi.IssuerSpec{IssuerConfig: cmapi.IssuerConfig{SelfSigned: &cmapi.SelfSignedIssuer{}}}} + r := buildCertificateReconciler(t, ddb, issuer) + + // Call certificate ensure twice to mimic reconcile loops + res, err := r.reconcileCertificates(ctx, ddb) + require.NoError(t, err) + require.Equal(t, RequeueAfterShort, res.RequeueAfter) + res, err = r.reconcileCertificates(ctx, ddb) + require.NoError(t, err) + require.Equal(t, RequeueAfterShort, res.RequeueAfter) + + cert := &cmapi.Certificate{} + // fetch certificate (self-created by reconcile). If not found, run reconcile again once. + require.NoError(t, r.Client.Get(ctx, types.NamespacedName{Name: "ddb-cm-gateway-cert", Namespace: "default"}, cert)) + // Debug: list all certificates to ensure store functioning + certList := &cmapi.CertificateList{} + _ = r.Client.List(ctx, certList) + for _, c := range certList.Items { + t.Logf("Found certificate: %s/%s secret=%s", c.Namespace, c.Name, c.Spec.SecretName) + } + require.Contains(t, cert.Spec.DNSNames, "custom.example") + // Should include service DNS names + serviceBase := util.DOCUMENTDB_SERVICE_PREFIX + ddb.Name + require.Contains(t, cert.Spec.DNSNames, serviceBase) + + // Simulate readiness condition then invoke ensure again (mimic reconcile loop) + cert.Status.Conditions = append(cert.Status.Conditions, cmapi.CertificateCondition{Type: cmapi.CertificateConditionReady, Status: cmmeta.ConditionTrue, LastTransitionTime: &metav1.Time{Time: time.Now()}}) + require.NoError(t, r.Client.Update(ctx, cert)) + res, err = r.reconcileCertificates(ctx, ddb) + require.NoError(t, err) + require.Zero(t, res.RequeueAfter) + require.True(t, ddb.Status.TLS.Ready, "Cert-manager managed cert should mark ready after condition true") + require.NotEmpty(t, ddb.Status.TLS.SecretName) +} + +func TestEnsureSelfSignedCert(t *testing.T) { + ctx := context.Background() + ddb := baseDocumentDB("ddb-ss", "default") + ddb.Spec.TLS = &dbpreview.TLSConfiguration{Gateway: &dbpreview.GatewayTLS{Mode: "SelfSigned"}} + ddb.Status.TLS = &dbpreview.TLSStatus{} + r := buildCertificateReconciler(t, ddb) + + // First call should create issuer and certificate + res, err := r.reconcileCertificates(ctx, ddb) + require.NoError(t, err) + require.Equal(t, RequeueAfterShort, res.RequeueAfter) + + // Certificate should exist + cert := &cmapi.Certificate{} + require.NoError(t, r.Client.Get(ctx, types.NamespacedName{Name: "ddb-ss-gateway-cert", Namespace: "default"}, cert)) + + // Simulate ready condition and call again + cert.Status.Conditions = append(cert.Status.Conditions, cmapi.CertificateCondition{Type: cmapi.CertificateConditionReady, Status: cmmeta.ConditionTrue, LastTransitionTime: &metav1.Time{Time: time.Now()}}) + require.NoError(t, r.Client.Update(ctx, cert)) + res, err = r.reconcileCertificates(ctx, ddb) + require.NoError(t, err) + require.Zero(t, res.RequeueAfter) + require.True(t, ddb.Status.TLS.Ready) + require.NotEmpty(t, ddb.Status.TLS.SecretName) +} diff --git a/operator/src/internal/utils/util.go b/operator/src/internal/utils/util.go index af2f41e6..c2130033 100644 --- a/operator/src/internal/utils/util.go +++ b/operator/src/internal/utils/util.go @@ -314,13 +314,18 @@ func DeleteRoleBinding(ctx context.Context, c client.Client, name, namespace str return nil } -// GenerateConnectionString returns a MongoDB connection string for the DocumentDB instance -func GenerateConnectionString(documentdb *dbpreview.DocumentDB, serviceIp string) string { +// GenerateConnectionString returns a MongoDB connection string for the DocumentDB instance. +// When trustTLS is true, tlsAllowInvalidCertificates is omitted for strict verification. +func GenerateConnectionString(documentdb *dbpreview.DocumentDB, serviceIp string, trustTLS bool) string { secretName := documentdb.Spec.DocumentDbCredentialSecret if secretName == "" { secretName = DEFAULT_DOCUMENTDB_CREDENTIALS_SECRET } - return fmt.Sprintf("mongodb://$(kubectl get secret %s -n %s -o jsonpath='{.data.username}' | base64 -d):$(kubectl get secret %s -n %s -o jsonpath='{.data.password}' | base64 -d)@%s:%d/?directConnection=true&authMechanism=SCRAM-SHA-256&tls=true&tlsAllowInvalidCertificates=true&replicaSet=rs0", secretName, documentdb.Namespace, secretName, documentdb.Namespace, serviceIp, GetPortFor(GATEWAY_PORT)) + conn := fmt.Sprintf("mongodb://$(kubectl get secret %s -n %s -o jsonpath='{.data.username}' | base64 -d):$(kubectl get secret %s -n %s -o jsonpath='{.data.password}' | base64 -d)@%s:%d/?directConnection=true&authMechanism=SCRAM-SHA-256&tls=true", secretName, documentdb.Namespace, secretName, documentdb.Namespace, serviceIp, GetPortFor(GATEWAY_PORT)) + if !trustTLS { + conn += "&tlsAllowInvalidCertificates=true" + } + return conn + "&replicaSet=rs0" } // GetGatewayImageForDocumentDB returns the gateway image for a DocumentDB instance. diff --git a/operator/src/scripts/deployment-examples/create-cluster.sh b/operator/src/scripts/deployment-examples/create-cluster.sh new file mode 100755 index 00000000..9ab9e095 --- /dev/null +++ b/operator/src/scripts/deployment-examples/create-cluster.sh @@ -0,0 +1,656 @@ +#!/bin/bash + +# DocumentDB AKS Cluster Creation Script +# This script creates a complete AKS cluster with all dependencies for DocumentDB + +#set -e # Exit on any error + +# Configuration +CLUSTER_NAME="guanzhou-101401-cluster" +RESOURCE_GROUP="guanzhou-101401-rg" +LOCATION="East US 2" +NODE_COUNT=2 +NODE_SIZE="Standard_D8s_v5" +KUBERNETES_VERSION="1.31.11" + +# DocumentDB Operator Configuration +# For testing: use hossain-rayhan/documentdb-operator (fork with Azure enhancements) +# For production: use microsoft/documentdb-operator (official) +OPERATOR_GITHUB_ORG="documentdb" +OPERATOR_CHART_VERSION="0.1.0" +# Feature flags - set to "true" to enable, "false" to skip +INSTALL_OPERATOR="${INSTALL_OPERATOR:-false}" +DEPLOY_INSTANCE="${DEPLOY_INSTANCE:-false}" +CREATE_STORAGE_CLASS="${CREATE_STORAGE_CLASS:-false}" + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + --skip-operator) + INSTALL_OPERATOR="false" + shift + ;; + --skip-instance) + DEPLOY_INSTANCE="false" + shift + ;; + --install-operator) + INSTALL_OPERATOR="true" + shift + ;; + --deploy-instance) + DEPLOY_INSTANCE="true" + shift + ;; + --install-all) + INSTALL_OPERATOR="true" + DEPLOY_INSTANCE="true" + shift + ;; + --create-storage-class) + CREATE_STORAGE_CLASS="true" + shift + ;; + --skip-storage-class) + CREATE_STORAGE_CLASS="false" + shift + ;; + --cluster-name) + CLUSTER_NAME="$2" + shift 2 + ;; + --resource-group) + RESOURCE_GROUP="$2" + shift 2 + ;; + --location) + LOCATION="$2" + shift 2 + ;; + --github-username) + GITHUB_USERNAME="$2" + shift 2 + ;; + --github-token) + GITHUB_TOKEN="$2" + shift 2 + ;; + -h|--help) + echo "Usage: $0 [OPTIONS]" + echo "" + echo "Options:" + echo " --skip-operator Skip DocumentDB operator installation (default)" + echo " --skip-instance Skip DocumentDB instance deployment (default)" + echo " --install-operator Install DocumentDB operator only (assumes cluster exists)" + echo " --deploy-instance Deploy DocumentDB instance only (assumes cluster+operator exist)" + echo " --install-all Create cluster + install operator + deploy instance" + echo " --create-storage-class Create custom Premium SSD storage class" + echo " --skip-storage-class Use AKS default storage (StandardSSD_LRS) - default" + echo " --cluster-name NAME AKS cluster name (default: documentdb-cluster)" + echo " --resource-group RG Azure resource group (default: documentdb-rg)" + echo " --location LOCATION Azure location (default: East US)" + echo " --github-username GitHub username for operator installation" + echo " --github-token GitHub token for operator installation" + echo " -h, --help Show this help message" + echo "" + echo "Examples:" + echo " $0 # Create cluster only" + echo " $0 --install-operator # Install operator only (assumes cluster exists)" + echo " $0 --deploy-instance # Deploy DocumentDB only (assumes cluster+operator exist)" + echo " $0 --install-all # Create cluster + install operator + deploy instance" + exit 0 + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Logging function +log() { + echo -e "${BLUE}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1" +} + +success() { + echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')] โœ… $1${NC}" +} + +warn() { + echo -e "${YELLOW}[$(date +'%Y-%m-%d %H:%M:%S')] โš ๏ธ $1${NC}" +} + +error() { + echo -e "${RED}[$(date +'%Y-%m-%d %H:%M:%S')] โŒ $1${NC}" + exit 1 +} + +# Check prerequisites +check_prerequisites() { + log "Checking prerequisites..." + + # Check Azure CLI + if ! command -v az &> /dev/null; then + error "Azure CLI not found. Please install Azure CLI first." + fi + + # Check kubectl + if ! command -v kubectl &> /dev/null; then + error "kubectl not found. Please install kubectl first." + fi + + # Check Helm + if ! command -v helm &> /dev/null; then + error "Helm not found. Please install Helm first." + fi + + # Check Azure login + if ! az account show &> /dev/null; then + error "Not logged into Azure. Please run 'az login' first." + fi + + success "All prerequisites met" +} + +# Create resource group +create_resource_group() { + log "Creating resource group: $RESOURCE_GROUP in location: $LOCATION" + + # Check if resource group already exists + if az group show --name $RESOURCE_GROUP &> /dev/null; then + warn "Resource group $RESOURCE_GROUP already exists. Skipping creation." + return 0 + fi + + # Create resource group + az group create --name $RESOURCE_GROUP --location "$LOCATION" + + if [ $? -eq 0 ]; then + success "Resource group created successfully" + else + error "Failed to create resource group" + fi +} + +# Create AKS cluster +create_cluster() { + log "Creating AKS cluster: $CLUSTER_NAME" + + # Check if cluster already exists + if az aks show --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME &> /dev/null; then + warn "Cluster $CLUSTER_NAME already exists. Skipping cluster creation." + else + # Create AKS cluster with managed identity and required addons + az aks create \ + --resource-group $RESOURCE_GROUP \ + --name $CLUSTER_NAME \ + --node-count $NODE_COUNT \ + --node-vm-size $NODE_SIZE \ + --kubernetes-version $KUBERNETES_VERSION \ + --enable-managed-identity \ + --enable-addons monitoring \ + --enable-cluster-autoscaler \ + --min-count 2 \ + --max-count 5 \ + --generate-ssh-keys \ + --network-plugin azure \ + --network-policy azure \ + --load-balancer-sku standard + + if [ $? -eq 0 ]; then + success "AKS cluster created successfully" + else + error "Failed to create AKS cluster" + fi + fi + + # Get cluster credentials + log "Getting cluster credentials..." + az aks get-credentials --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME --overwrite-existing + + # Handle WSL case - copy Windows kubeconfig to WSL + if grep -qi microsoft /proc/version 2>/dev/null; then + log "Detected WSL environment, copying kubeconfig from Windows to WSL..." + WIN_KUBE_CONFIG="/mnt/c/Users/$(whoami)/.kube/config" + if [ -f "$WIN_KUBE_CONFIG" ]; then + mkdir -p ~/.kube + cp "$WIN_KUBE_CONFIG" ~/.kube/config + chmod 600 ~/.kube/config + log "Kubeconfig copied to WSL" + else + warn "Windows kubeconfig not found at expected location" + fi + fi + + success "Cluster credentials configured" +} + +# Install Azure CSI drivers +install_azure_csi_drivers() { + log "Checking Azure CSI drivers..." + + # Check if CSI drivers are already enabled (modern AKS clusters have them by default) + CSI_STATUS=$(az aks show --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME --query "storageProfile" -o json 2>/dev/null) + DISK_CSI_ENABLED=$(echo "$CSI_STATUS" | jq -r '.diskCsiDriver.enabled // false') + FILE_CSI_ENABLED=$(echo "$CSI_STATUS" | jq -r '.fileCsiDriver.enabled // false') + + if [ "$DISK_CSI_ENABLED" == "true" ] && [ "$FILE_CSI_ENABLED" == "true" ]; then + success "Azure CSI drivers already enabled (Disk: โœ…, File: โœ…)" + return 0 + fi + + log "CSI drivers not fully enabled - installing..." + log "Current status: Disk=$DISK_CSI_ENABLED, File=$FILE_CSI_ENABLED" + + # Azure Disk CSI driver (only if not enabled) + if [ "$DISK_CSI_ENABLED" != "true" ]; then + log "Enabling Azure Disk CSI driver..." + az aks update --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME --enable-disk-driver >/dev/null 2>&1 + fi + + # Azure File CSI driver (only if not enabled) + if [ "$FILE_CSI_ENABLED" != "true" ]; then + log "Enabling Azure File CSI driver..." + az aks update --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME --enable-file-driver >/dev/null 2>&1 + fi + + success "Azure CSI drivers configured" +} + +# Verify Azure Load Balancer (built-in to AKS) +configure_load_balancer() { + log "Verifying Azure Load Balancer..." + + # Azure Load Balancer is built into AKS, just verify it's working + if kubectl get service kubernetes -n default >/dev/null 2>&1; then + success "Azure Load Balancer verified (built-in to AKS)" + else + warn "Unable to verify Kubernetes API service" + fi +} + +# Install cert-manager +install_cert_manager() { + log "Installing cert-manager..." + + # Check if already installed + if helm list -n cert-manager | grep -q cert-manager; then + warn "cert-manager already installed. Skipping installation." + return 0 + fi + + # Add Jetstack Helm repository + helm repo add jetstack https://charts.jetstack.io + helm repo update + + # Install cert-manager + helm install cert-manager jetstack/cert-manager \ + --namespace cert-manager \ + --create-namespace \ + --version v1.13.2 \ + --set installCRDs=true \ + --set prometheus.enabled=false \ + --set webhook.timeoutSeconds=30 + + # Wait for cert-manager to be ready + log "Waiting for cert-manager to be ready..." + sleep 30 + kubectl wait --for=condition=ready pod -l app.kubernetes.io/instance=cert-manager -n cert-manager --timeout=300s || warn "cert-manager pods may still be starting" + + success "cert-manager installed" +} + +# Create optimized storage class for Azure (optional) +create_storage_class() { + if [ "$CREATE_STORAGE_CLASS" != "true" ]; then + warn "Skipping custom storage class creation (using AKS default StandardSSD_LRS)" + return 0 + fi + + log "Creating DocumentDB custom Premium SSD storage class..." + + # Check if storage class already exists + if kubectl get storageclass documentdb-storage &> /dev/null; then + warn "DocumentDB storage class already exists. Skipping creation." + return 0 + fi + + kubectl apply -f - < /dev/null; then + error "Cannot reach ghcr.io. Please check your internet connection and firewall settings." + fi + + # Install DocumentDB operator using enhanced fork with Azure support + log "Installing DocumentDB operator from GitHub Container Registry (enhanced fork with Azure support)..." + + # Check for GitHub authentication + if [ -z "$GITHUB_TOKEN" ] || [ -z "$GITHUB_USERNAME" ]; then + error "DocumentDB operator installation requires GitHub authentication. + +Please set the following environment variables: + export GITHUB_USERNAME='your-github-username' + export GITHUB_TOKEN='your-github-token' + +To create a GitHub token: +1. Go to https://github.com/settings/tokens +2. Generate a new token with 'read:packages' scope +3. Export the token as shown above + +Then run the script again with --install-operator" + fi + + # Authenticate with GitHub Container Registry + log "Authenticating with GitHub Container Registry..." + if ! echo "$GITHUB_TOKEN" | helm registry login ghcr.io --username "$GITHUB_USERNAME" --password-stdin; then + error "Failed to authenticate with GitHub Container Registry. Please verify your GITHUB_TOKEN and GITHUB_USERNAME." + fi + + # Install DocumentDB operator from OCI registry + log "Pulling and installing DocumentDB operator from ghcr.io/${OPERATOR_GITHUB_ORG}/documentdb-operator..." + helm install documentdb-operator \ + oci://ghcr.io/${OPERATOR_GITHUB_ORG}/documentdb-operator \ + --version ${OPERATOR_CHART_VERSION} \ + --namespace documentdb-operator \ + --create-namespace \ + --wait \ + --timeout 10m + + if [ $? -eq 0 ]; then + success "DocumentDB operator installed successfully from ${OPERATOR_GITHUB_ORG}/documentdb-operator:${OPERATOR_CHART_VERSION}" + else + error "Failed to install DocumentDB operator from OCI registry. Please verify: +- Your GitHub token has 'read:packages' scope +- You have access to ${OPERATOR_GITHUB_ORG}/documentdb-operator repository +- The chart version ${OPERATOR_CHART_VERSION} exists" + fi + + # Wait for operator to be ready + log "Waiting for DocumentDB operator to be ready..." + kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=documentdb-operator -n documentdb-operator --timeout=300s || warn "DocumentDB operator pods may still be starting" + + success "DocumentDB operator installed" +} + +# Deploy DocumentDB instance (optional) +deploy_documentdb_instance() { + if [ "$DEPLOY_INSTANCE" != "true" ]; then + warn "Skipping DocumentDB instance deployment (--skip-instance specified or not enabled)" + return 0 + fi + + log "Deploying DocumentDB instance..." + + # Check if operator is installed + if ! kubectl get deployment -n documentdb-operator documentdb-operator &> /dev/null; then + error "DocumentDB operator not found. Cannot deploy instance without operator." + fi + + # Create DocumentDB namespace + kubectl apply -f - <:10260/" + fi + echo "" + echo "โš ๏ธ IMPORTANT: Run './delete-cluster.sh' when done to avoid Azure charges!" + echo "==================================================" +} + +# Main execution +main() { + log "Starting DocumentDB AKS cluster setup..." + log "Configuration:" + log " Cluster: $CLUSTER_NAME" + log " Resource Group: $RESOURCE_GROUP" + log " Location: $LOCATION" + log " Install Operator: $INSTALL_OPERATOR" + log " Deploy Instance: $DEPLOY_INSTANCE" + echo "" + + check_prerequisites + + # Simple logic based on parameters + if [ "$INSTALL_OPERATOR" == "true" ] && [ "$DEPLOY_INSTANCE" != "true" ]; then + # Case 1: --install-operator only + log "๐Ÿ”ง Installing operator only (assumes cluster exists)" + setup_kubeconfig + install_documentdb_operator + + elif [ "$DEPLOY_INSTANCE" == "true" ] && [ "$INSTALL_OPERATOR" != "true" ]; then + # Case 2: --deploy-instance only + log "๐Ÿš€ Deploying DocumentDB instance only (assumes cluster+operator exist)" + setup_kubeconfig + deploy_documentdb_instance + + elif [ "$INSTALL_OPERATOR" == "true" ] && [ "$DEPLOY_INSTANCE" == "true" ]; then + # Case 3: --install-all (both flags set) + log "๐ŸŽฏ Installing everything: cluster + operator + instance" + setup_cluster_infrastructure + install_documentdb_operator + deploy_documentdb_instance + + else + # Case 4: No flags - create cluster only + log "๐Ÿ—๏ธ Creating cluster only (no operator, no instance)" + setup_cluster_infrastructure + fi + + print_summary +} + +# Helper function to set up cluster infrastructure +setup_cluster_infrastructure() { + # Check if cluster already exists + CLUSTER_EXISTS=$(az aks show --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME --query "name" -o tsv 2>/dev/null) + + if [ "$CLUSTER_EXISTS" == "$CLUSTER_NAME" ]; then + log "โœ… Cluster $CLUSTER_NAME already exists, skipping infrastructure setup" + setup_kubeconfig + else + log "Creating new cluster and infrastructure..." + create_resource_group + create_cluster + install_azure_csi_drivers + configure_load_balancer + install_cert_manager + create_storage_class + fi +} + +# Helper function to set up kubeconfig +setup_kubeconfig() { + # Verify cluster exists + if ! az aks show --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME >/dev/null 2>&1; then + error "Cluster $CLUSTER_NAME not found. Create cluster first." + fi + + # Get cluster credentials + log "Getting cluster credentials..." + az aks get-credentials --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME --overwrite-existing + + # Handle WSL case + if grep -qi microsoft /proc/version 2>/dev/null; then + log "Detected WSL environment, copying kubeconfig from Windows to WSL..." + WIN_KUBE_CONFIG="/mnt/c/Users/$(whoami)/.kube/config" + if [ -f "$WIN_KUBE_CONFIG" ]; then + mkdir -p ~/.kube + cp "$WIN_KUBE_CONFIG" ~/.kube/config + chmod 600 ~/.kube/config + log "Kubeconfig copied to WSL" + fi + fi + + success "Cluster credentials configured" +} + +# Run main function +main "$@" \ No newline at end of file