Skip to content

Temporal Deployment

This guide provides comprehensive deployment instructions for Temporal.io server cluster in enterprise environments, covering high availability setup, configuration, scaling, and operational best practices.

Overview

The Temporal deployment includes: - Temporal server cluster with multiple services - High availability configuration - Auto-scaling and load balancing - Service mesh integration - Monitoring and observability - Configuration management

Architecture Components

Temporal Services

graph TB
    subgraph "Load Balancer"
        LB[Application Load Balancer]
    end

    subgraph "Temporal Cluster"
        FRONTEND[Frontend Service]
        HISTORY[History Service]
        MATCHING[Matching Service]
        WORKER[Worker Service]
        WEB[Web UI]
    end

    subgraph "Data Layer"
        POSTGRES[(PostgreSQL)]
        REDIS[(Redis)]
        ES[(Elasticsearch)]
    end

    subgraph "External Services"
        WORKERS[Application Workers]
        APIS[API Services]
    end

    LB --> FRONTEND
    LB --> WEB

    FRONTEND --> HISTORY
    FRONTEND --> MATCHING
    FRONTEND --> WORKER

    HISTORY --> POSTGRES
    MATCHING --> POSTGRES
    WORKER --> POSTGRES

    FRONTEND --> REDIS
    WEB --> FRONTEND

    WORKERS --> FRONTEND
    APIS --> FRONTEND

Helm Chart Configuration

Main Temporal Chart Values

# helm/values/temporal/production.yaml
server:
  image:
    repository: temporalio/server
    tag: "1.20.0"
    pullPolicy: IfNotPresent

  replicaCount: 3

  config:
    # Persistence configuration
    persistence:
      defaultStore: default
      visibilityStore: visibility
      numHistoryShards: 4096

      datastores:
        default:
          driver: "postgres"
          host: "postgresql-primary"
          port: 5432
          database: "temporal"
          user: "temporal"
          password: "${POSTGRES_PASSWORD}"
          maxConns: 50
          maxIdleConns: 10
          maxConnLifetime: "1h"
          tls:
            enabled: true
            caFile: "/etc/temporal/certs/ca.crt"
            certFile: "/etc/temporal/certs/tls.crt"
            keyFile: "/etc/temporal/certs/tls.key"
            serverName: "postgresql-primary"

        visibility:
          driver: "postgres"
          host: "postgresql-primary"
          port: 5432
          database: "temporal_visibility"
          user: "temporal"
          password: "${POSTGRES_PASSWORD}"
          maxConns: 10
          maxIdleConns: 5
          maxConnLifetime: "1h"
          tls:
            enabled: true
            caFile: "/etc/temporal/certs/ca.crt"
            certFile: "/etc/temporal/certs/tls.crt"
            keyFile: "/etc/temporal/certs/tls.key"
            serverName: "postgresql-primary"

    # Global configuration
    global:
      membership:
        maxJoinDuration: 30s
        broadcastAddress: ""

      pprof:
        port: 7936

      metrics:
        prometheus:
          timerType: "histogram"
          listenAddress: "0.0.0.0:9090"

        statsd: {}

      tls:
        internode:
          server:
            certFile: "/etc/temporal/certs/tls.crt"
            keyFile: "/etc/temporal/certs/tls.key"
            clientCAFile: "/etc/temporal/certs/ca.crt"
            requireClientAuth: true
          client:
            serverName: "temporal"
            certFile: "/etc/temporal/certs/tls.crt"
            keyFile: "/etc/temporal/certs/tls.key"
            caFile: "/etc/temporal/certs/ca.crt"

    # Service-specific configurations
    services:
      frontend:
        rpc:
          grpcPort: 7233
          membershipPort: 6933
          bindOnLocalHost: false

        metrics:
          prometheus:
            listenAddress: "0.0.0.0:9090"

        # Rate limiting
        rps: 1200

        # Authentication
        authorizer:
          jwtKeyProvider:
            keySourceURIs:
              - "https://auth.company.com/.well-known/jwks.json"
            refreshInterval: "1h"
          permissionsClaimName: "permissions"

      history:
        rpc:
          grpcPort: 7234
          membershipPort: 6934
          bindOnLocalHost: false

        metrics:
          prometheus:
            listenAddress: "0.0.0.0:9090"

        # History service tuning
        numHistoryShards: 4096
        historyCountLimitError: 50000
        historyCountLimitWarn: 10000

      matching:
        rpc:
          grpcPort: 7235
          membershipPort: 6935
          bindOnLocalHost: false

        metrics:
          prometheus:
            listenAddress: "0.0.0.0:9090"

        # Matching service tuning
        numTasklistWritePartitions: 3
        numTasklistReadPartitions: 3

      worker:
        rpc:
          grpcPort: 7239
          membershipPort: 6939
          bindOnLocalHost: false

        metrics:
          prometheus:
            listenAddress: "0.0.0.0:9090"

  # Resource configuration per service
  frontend:
    replicaCount: 3
    resources:
      requests:
        memory: "512Mi"
        cpu: "250m"
      limits:
        memory: "1Gi"
        cpu: "500m"

    service:
      type: ClusterIP
      port: 7233
      annotations:
        service.beta.kubernetes.io/aws-load-balancer-type: nlb
        service.beta.kubernetes.io/aws-load-balancer-internal: "true"

    autoscaling:
      enabled: true
      minReplicas: 3
      maxReplicas: 10
      targetCPUUtilizationPercentage: 70
      targetMemoryUtilizationPercentage: 80

  history:
    replicaCount: 3
    resources:
      requests:
        memory: "1Gi"
        cpu: "500m"
      limits:
        memory: "2Gi"
        cpu: "1000m"

    autoscaling:
      enabled: true
      minReplicas: 3
      maxReplicas: 10
      targetCPUUtilizationPercentage: 70
      targetMemoryUtilizationPercentage: 80

  matching:
    replicaCount: 3
    resources:
      requests:
        memory: "512Mi"
        cpu: "250m"
      limits:
        memory: "1Gi"
        cpu: "500m"

    autoscaling:
      enabled: true
      minReplicas: 3
      maxReplicas: 8
      targetCPUUtilizationPercentage: 70
      targetMemoryUtilizationPercentage: 80

  worker:
    replicaCount: 2
    resources:
      requests:
        memory: "512Mi"
        cpu: "250m"
      limits:
        memory: "1Gi"
        cpu: "500m"

# Web UI configuration
web:
  enabled: true
  image:
    repository: temporalio/web
    tag: "2.8.0"
    pullPolicy: IfNotPresent

  replicaCount: 2

  config:
    auth:
      enabled: true
      providers:
        - label: "Company SSO"
          type: "oidc"
          providerUrl: "https://auth.company.com"
          clientId: "temporal-web"
          clientSecret: "${OIDC_CLIENT_SECRET}"
          scopes:
            - "openid"
            - "profile"
            - "email"
          callbackUrl: "https://temporal.company.com/auth/callback"

    routing:
      default_to_namespace: "default"
      issue_report_link: "https://github.com/company/temporal-issues"

    temporal:
      grpc_endpoint: "temporal-frontend:7233"
      grpc_ca: "/etc/temporal/certs/ca.crt"
      grpc_cert: "/etc/temporal/certs/tls.crt"
      grpc_key: "/etc/temporal/certs/tls.key"

  service:
    type: ClusterIP
    port: 8080
    annotations:
      service.beta.kubernetes.io/aws-load-balancer-type: alb
      service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
      service.beta.kubernetes.io/aws-load-balancer-ssl-redirect: "443"

  ingress:
    enabled: true
    className: "alb"
    annotations:
      alb.ingress.kubernetes.io/scheme: internet-facing
      alb.ingress.kubernetes.io/target-type: ip
      alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:us-west-2:ACCOUNT:certificate/CERT-ID"
      alb.ingress.kubernetes.io/ssl-redirect: "443"
      alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]'
    hosts:
      - host: temporal.company.com
        paths:
          - path: /
            pathType: Prefix
    tls:
      - hosts:
          - temporal.company.com
        secretName: temporal-web-tls

# Elasticsearch for advanced visibility (optional)
elasticsearch:
  enabled: true
  image:
    repository: elasticsearch
    tag: "7.17.0"

  master:
    replicaCount: 3
    resources:
      requests:
        memory: "1Gi"
        cpu: "500m"
      limits:
        memory: "2Gi"
        cpu: "1000m"

  data:
    replicaCount: 3
    resources:
      requests:
        memory: "2Gi"
        cpu: "1000m"
      limits:
        memory: "4Gi"
        cpu: "2000m"

    persistence:
      storageClass: "gp3"
      size: "100Gi"

# Prometheus configuration
prometheus:
  enabled: true
  serviceMonitor:
    enabled: true
    interval: "30s"
    namespace: "monitoring"
    additionalLabels:
      app: temporal

Environment-Specific Values

Development Environment

# helm/values/temporal/development.yaml
server:
  replicaCount: 1

  frontend:
    replicaCount: 1
    resources:
      requests:
        memory: "256Mi"
        cpu: "100m"
      limits:
        memory: "512Mi"
        cpu: "250m"
    autoscaling:
      enabled: false

  history:
    replicaCount: 1
    resources:
      requests:
        memory: "512Mi"
        cpu: "250m"
      limits:
        memory: "1Gi"
        cpu: "500m"
    autoscaling:
      enabled: false

  matching:
    replicaCount: 1
    resources:
      requests:
        memory: "256Mi"
        cpu: "100m"
      limits:
        memory: "512Mi"
        cpu: "250m"
    autoscaling:
      enabled: false

  worker:
    replicaCount: 1

web:
  replicaCount: 1
  config:
    auth:
      enabled: false

elasticsearch:
  enabled: false

prometheus:
  enabled: true

Production Environment

# helm/values/temporal/production.yaml
server:
  replicaCount: 5

  frontend:
    replicaCount: 5
    resources:
      requests:
        memory: "1Gi"
        cpu: "500m"
      limits:
        memory: "2Gi"
        cpu: "1000m"
    autoscaling:
      enabled: true
      minReplicas: 5
      maxReplicas: 20

  history:
    replicaCount: 5
    resources:
      requests:
        memory: "2Gi"
        cpu: "1000m"
      limits:
        memory: "4Gi"
        cpu: "2000m"
    autoscaling:
      enabled: true
      minReplicas: 5
      maxReplicas: 15

  matching:
    replicaCount: 5
    resources:
      requests:
        memory: "1Gi"
        cpu: "500m"
      limits:
        memory: "2Gi"
        cpu: "1000m"
    autoscaling:
      enabled: true
      minReplicas: 5
      maxReplicas: 12

  worker:
    replicaCount: 3

web:
  replicaCount: 3
  config:
    auth:
      enabled: true

elasticsearch:
  enabled: true
  master:
    replicaCount: 3
  data:
    replicaCount: 5

prometheus:
  enabled: true

Secrets Configuration

Database Connection Secrets

# k8s/temporal/secrets/database-connection.yaml
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
  name: temporal-database-connection
  namespace: temporal-system
spec:
  refreshInterval: 1h
  secretStoreRef:
    name: vault-backend
    kind: SecretStore
  target:
    name: temporal-database-connection
    creationPolicy: Owner
    template:
      type: Opaque
      data:
        POSTGRES_PASSWORD: "{{ .password }}"
        POSTGRES_CONNECTION_STRING: "postgres://{{ .username }}:{{ .password }}@{{ .host }}:{{ .port }}/{{ .database }}?sslmode=require"
        VISIBILITY_CONNECTION_STRING: "postgres://{{ .username }}:{{ .password }}@{{ .host }}:{{ .port }}/{{ .visibility_database }}?sslmode=require"
  data:
  - secretKey: username
    remoteRef:
      key: temporal/database
      property: username
  - secretKey: password
    remoteRef:
      key: temporal/database
      property: password
  - secretKey: host
    remoteRef:
      key: temporal/database
      property: host
  - secretKey: port
    remoteRef:
      key: temporal/database
      property: port
  - secretKey: database
    remoteRef:
      key: temporal/database
      property: database
  - secretKey: visibility_database
    remoteRef:
      key: temporal/database
      property: visibility_database

Authentication Secrets

# k8s/temporal/secrets/auth-secrets.yaml
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
  name: temporal-auth-secrets
  namespace: temporal-system
spec:
  refreshInterval: 1h
  secretStoreRef:
    name: vault-backend
    kind: SecretStore
  target:
    name: temporal-auth-secrets
    creationPolicy: Owner
  data:
  - secretKey: OIDC_CLIENT_SECRET
    remoteRef:
      key: temporal/auth
      property: oidc_client_secret
  - secretKey: JWT_PRIVATE_KEY
    remoteRef:
      key: temporal/auth
      property: jwt_private_key
  - secretKey: JWT_PUBLIC_KEY
    remoteRef:
      key: temporal/auth
      property: jwt_public_key

Service Configuration

Service Definitions

# k8s/temporal/services/temporal-services.yaml
apiVersion: v1
kind: Service
metadata:
  name: temporal-frontend
  namespace: temporal-system
  labels:
    app.kubernetes.io/name: temporal
    app.kubernetes.io/component: frontend
spec:
  type: ClusterIP
  ports:
  - port: 7233
    targetPort: 7233
    protocol: TCP
    name: rpc
  - port: 9090
    targetPort: 9090
    protocol: TCP
    name: metrics
  selector:
    app.kubernetes.io/name: temporal
    app.kubernetes.io/component: frontend

---
apiVersion: v1
kind: Service
metadata:
  name: temporal-history
  namespace: temporal-system
  labels:
    app.kubernetes.io/name: temporal
    app.kubernetes.io/component: history
spec:
  type: ClusterIP
  ports:
  - port: 7234
    targetPort: 7234
    protocol: TCP
    name: rpc
  - port: 9090
    targetPort: 9090
    protocol: TCP
    name: metrics
  selector:
    app.kubernetes.io/name: temporal
    app.kubernetes.io/component: history

---
apiVersion: v1
kind: Service
metadata:
  name: temporal-matching
  namespace: temporal-system
  labels:
    app.kubernetes.io/name: temporal
    app.kubernetes.io/component: matching
spec:
  type: ClusterIP
  ports:
  - port: 7235
    targetPort: 7235
    protocol: TCP
    name: rpc
  - port: 9090
    targetPort: 9090
    protocol: TCP
    name: metrics
  selector:
    app.kubernetes.io/name: temporal
    app.kubernetes.io/component: matching

---
apiVersion: v1
kind: Service
metadata:
  name: temporal-web
  namespace: temporal-system
  labels:
    app.kubernetes.io/name: temporal
    app.kubernetes.io/component: web
spec:
  type: ClusterIP
  ports:
  - port: 8080
    targetPort: 8080
    protocol: TCP
    name: http
  selector:
    app.kubernetes.io/name: temporal
    app.kubernetes.io/component: web

Load Balancer Configuration

# k8s/temporal/services/load-balancer.yaml
apiVersion: v1
kind: Service
metadata:
  name: temporal-frontend-external
  namespace: temporal-system
  annotations:
    service.beta.kubernetes.io/aws-load-balancer-type: "nlb"
    service.beta.kubernetes.io/aws-load-balancer-internal: "true"
    service.beta.kubernetes.io/aws-load-balancer-cross-zone-load-balancing-enabled: "true"
    service.beta.kubernetes.io/aws-load-balancer-backend-protocol: "tcp"
    service.beta.kubernetes.io/aws-load-balancer-healthcheck-protocol: "tcp"
    service.beta.kubernetes.io/aws-load-balancer-healthcheck-interval: "10"
    service.beta.kubernetes.io/aws-load-balancer-healthcheck-timeout: "6"
    service.beta.kubernetes.io/aws-load-balancer-healthy-threshold: "2"
    service.beta.kubernetes.io/aws-load-balancer-unhealthy-threshold: "2"
spec:
  type: LoadBalancer
  ports:
  - port: 7233
    targetPort: 7233
    protocol: TCP
    name: rpc
  selector:
    app.kubernetes.io/name: temporal
    app.kubernetes.io/component: frontend

Monitoring and Observability

ServiceMonitor for Prometheus

# k8s/temporal/monitoring/service-monitor.yaml
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  name: temporal-server
  namespace: temporal-system
  labels:
    app.kubernetes.io/name: temporal
spec:
  selector:
    matchLabels:
      app.kubernetes.io/name: temporal
  endpoints:
  - port: metrics
    interval: 30s
    path: /metrics
    honorLabels: true
  namespaceSelector:
    matchNames:
    - temporal-system

Grafana Dashboard ConfigMap

# k8s/temporal/monitoring/grafana-dashboard.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: temporal-dashboard
  namespace: monitoring
  labels:
    grafana_dashboard: "1"
data:
  temporal-overview.json: |
    {
      "dashboard": {
        "id": null,
        "title": "Temporal Overview",
        "tags": ["temporal"],
        "style": "dark",
        "timezone": "browser",
        "panels": [
          {
            "id": 1,
            "title": "Frontend Service Health",
            "type": "stat",
            "targets": [
              {
                "expr": "up{job=\"temporal-server\", service=\"temporal-frontend\"}",
                "legendFormat": "{{instance}}"
              }
            ],
            "gridPos": {"h": 8, "w": 12, "x": 0, "y": 0}
          },
          {
            "id": 2,
            "title": "Request Rate",
            "type": "graph",
            "targets": [
              {
                "expr": "rate(temporal_request_total[5m])",
                "legendFormat": "{{operation}}"
              }
            ],
            "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0}
          },
          {
            "id": 3,
            "title": "Request Latency",
            "type": "graph",
            "targets": [
              {
                "expr": "histogram_quantile(0.95, rate(temporal_request_latency_bucket[5m]))",
                "legendFormat": "95th percentile"
              },
              {
                "expr": "histogram_quantile(0.50, rate(temporal_request_latency_bucket[5m]))",
                "legendFormat": "50th percentile"
              }
            ],
            "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8}
          },
          {
            "id": 4,
            "title": "Active Workflows",
            "type": "graph",
            "targets": [
              {
                "expr": "temporal_workflow_active_count",
                "legendFormat": "{{namespace}}"
              }
            ],
            "gridPos": {"h": 8, "w": 12, "x": 12, "y": 8}
          }
        ],
        "time": {
          "from": "now-1h",
          "to": "now"
        },
        "refresh": "30s"
      }
    }

Deployment Automation

Deployment Script

#!/bin/bash
# scripts/deploy-temporal.sh

set -euo pipefail

ENVIRONMENT=${1:-development}
NAMESPACE="temporal-system"
HELM_CHART_VERSION=${2:-1.20.0}

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'

log() {
    echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')] $1${NC}"
}

warn() {
    echo -e "${YELLOW}[$(date +'%Y-%m-%d %H:%M:%S')] WARNING: $1${NC}"
}

error() {
    echo -e "${RED}[$(date +'%Y-%m-%d %H:%M:%S')] ERROR: $1${NC}"
    exit 1
}

# Validate environment
if [[ ! "$ENVIRONMENT" =~ ^(development|staging|production)$ ]]; then
    error "Invalid environment. Must be one of: development, staging, production"
fi

log "Deploying Temporal cluster to $ENVIRONMENT environment"

# Check prerequisites
log "Checking prerequisites..."
if ! command -v kubectl &> /dev/null; then
    error "kubectl is required but not installed"
fi

if ! command -v helm &> /dev/null; then
    error "helm is required but not installed"
fi

# Verify cluster connectivity
if ! kubectl cluster-info > /dev/null 2>&1; then
    error "Cannot connect to Kubernetes cluster"
fi

# Create namespace if it doesn't exist
log "Ensuring namespace exists..."
kubectl create namespace "$NAMESPACE" --dry-run=client -o yaml | kubectl apply -f -

# Add Temporal Helm repository
log "Adding Temporal Helm repository..."
helm repo add temporalio https://go.temporal.io/helm-charts
helm repo update

# Apply secrets
log "Applying secrets..."
kubectl apply -f k8s/temporal/secrets/

# Wait for secrets to be ready
log "Waiting for external secrets to sync..."
kubectl wait --for=condition=Ready externalsecret/temporal-database-connection -n "$NAMESPACE" --timeout=300s
kubectl wait --for=condition=Ready externalsecret/temporal-auth-secrets -n "$NAMESPACE" --timeout=300s

# Deploy Temporal server
log "Deploying Temporal server..."
helm upgrade --install temporal temporalio/temporal \
    --namespace "$NAMESPACE" \
    --version "$HELM_CHART_VERSION" \
    --values "helm/values/temporal/base.yaml" \
    --values "helm/values/temporal/${ENVIRONMENT}.yaml" \
    --wait --timeout=15m

# Apply additional Kubernetes resources
log "Applying additional resources..."
kubectl apply -f k8s/temporal/services/
kubectl apply -f k8s/temporal/monitoring/

# Wait for services to be ready
log "Waiting for services to be ready..."
kubectl wait --for=condition=available deployment/temporal-frontend -n "$NAMESPACE" --timeout=600s
kubectl wait --for=condition=available deployment/temporal-history -n "$NAMESPACE" --timeout=600s
kubectl wait --for=condition=available deployment/temporal-matching -n "$NAMESPACE" --timeout=600s
kubectl wait --for=condition=available deployment/temporal-worker -n "$NAMESPACE" --timeout=600s

if helm get values temporal -n "$NAMESPACE" | grep -q "web:.*enabled: true"; then
    kubectl wait --for=condition=available deployment/temporal-web -n "$NAMESPACE" --timeout=600s
fi

# Verify deployment
log "Verifying deployment..."
FRONTEND_READY=$(kubectl get deployment temporal-frontend -n "$NAMESPACE" -o jsonpath='{.status.readyReplicas}')
HISTORY_READY=$(kubectl get deployment temporal-history -n "$NAMESPACE" -o jsonpath='{.status.readyReplicas}')
MATCHING_READY=$(kubectl get deployment temporal-matching -n "$NAMESPACE" -o jsonpath='{.status.readyReplicas}')

log "Frontend replicas ready: $FRONTEND_READY"
log "History replicas ready: $HISTORY_READY"
log "Matching replicas ready: $MATCHING_READY"

# Test connectivity
log "Testing connectivity..."
kubectl run temporal-test --image=temporalio/admin-tools:latest --rm -i --restart=Never -- \
    temporal --address temporal-frontend:7233 workflow list --namespace default || warn "Connectivity test failed"

log "Temporal deployment completed successfully!"
log "Access the web UI at: https://temporal.${ENVIRONMENT}.company.com"
log "gRPC endpoint: temporal-frontend.${NAMESPACE}.svc.cluster.local:7233"

Health Check Script

#!/bin/bash
# scripts/health-check-temporal.sh

set -euo pipefail

NAMESPACE="temporal-system"

log() {
    echo -e "\033[0;32m[$(date +'%Y-%m-%d %H:%M:%S')] $1\033[0m"
}

error() {
    echo -e "\033[0;31m[$(date +'%Y-%m-%d %H:%M:%S')] ERROR: $1\033[0m"
}

warn() {
    echo -e "\033[1;33m[$(date +'%Y-%m-%d %H:%M:%S')] WARNING: $1\033[0m"
}

log "Running Temporal cluster health checks..."

# Check pod status
log "Checking pod status..."
PODS_NOT_READY=$(kubectl get pods -n "$NAMESPACE" -o jsonpath='{.items[?(@.status.phase!="Running")].metadata.name}')
if [[ -n "$PODS_NOT_READY" ]]; then
    warn "Pods not ready: $PODS_NOT_READY"
else
    log "✓ All pods are running"
fi

# Check service endpoints
log "Checking service endpoints..."
SERVICES=("temporal-frontend" "temporal-history" "temporal-matching" "temporal-worker")
for service in "${SERVICES[@]}"; do
    ENDPOINTS=$(kubectl get endpoints "$service" -n "$NAMESPACE" -o jsonpath='{.subsets[*].addresses[*].ip}' | wc -w)
    if [[ $ENDPOINTS -gt 0 ]]; then
        log "✓ $service has $ENDPOINTS endpoints"
    else
        error "✗ $service has no endpoints"
    fi
done

# Check database connectivity
log "Checking database connectivity..."
kubectl run db-test --image=postgres:13 --rm -i --restart=Never -- \
    pg_isready -h postgresql-primary -p 5432 -U temporal > /dev/null 2>&1
if [[ $? -eq 0 ]]; then
    log "✓ Database connectivity OK"
else
    error "✗ Database connectivity failed"
fi

# Check Temporal frontend health
log "Checking Temporal frontend health..."
kubectl run temporal-health --image=temporalio/admin-tools:latest --rm -i --restart=Never -- \
    temporal --address temporal-frontend:7233 cluster health > /dev/null 2>&1
if [[ $? -eq 0 ]]; then
    log "✓ Temporal frontend health OK"
else
    error "✗ Temporal frontend health check failed"
fi

# Check metrics endpoints
log "Checking metrics endpoints..."
for service in "${SERVICES[@]}"; do
    kubectl run metrics-test --image=curlimages/curl:latest --rm -i --restart=Never -- \
        curl -f "http://${service}:9090/metrics" > /dev/null 2>&1
    if [[ $? -eq 0 ]]; then
        log "✓ $service metrics endpoint OK"
    else
        warn "✗ $service metrics endpoint failed"
    fi
done

# Check web UI (if enabled)
if kubectl get deployment temporal-web -n "$NAMESPACE" > /dev/null 2>&1; then
    log "Checking web UI health..."
    kubectl run web-test --image=curlimages/curl:latest --rm -i --restart=Never -- \
        curl -f "http://temporal-web:8080/" > /dev/null 2>&1
    if [[ $? -eq 0 ]]; then
        log "✓ Web UI health OK"
    else
        warn "✗ Web UI health check failed"
    fi
fi

log "Health check completed"

Rolling Update Script

#!/bin/bash
# scripts/rolling-update-temporal.sh

set -euo pipefail

NAMESPACE="temporal-system"
NEW_VERSION=${1:-latest}

log() {
    echo -e "\033[0;32m[$(date +'%Y-%m-%d %H:%M:%S')] $1\033[0m"
}

warn() {
    echo -e "\033[1;33m[$(date +'%Y-%m-%d %H:%M:%S')] WARNING: $1\033[0m"
}

error() {
    echo -e "\033[0;31m[$(date +'%Y-%m-%d %H:%M:%S')] ERROR: $1\033[0m"
    exit 1
}

log "Starting rolling update to version: $NEW_VERSION"

# Backup current deployment
log "Backing up current deployment..."
helm get values temporal -n "$NAMESPACE" > "temporal-backup-$(date +%Y%m%d-%H%M%S).yaml"

# Update Temporal server
log "Updating Temporal server..."
helm upgrade temporal temporalio/temporal \
    --namespace "$NAMESPACE" \
    --set server.image.tag="$NEW_VERSION" \
    --reuse-values \
    --wait --timeout=15m

# Monitor rollout
log "Monitoring rollout status..."
kubectl rollout status deployment/temporal-frontend -n "$NAMESPACE" --timeout=600s
kubectl rollout status deployment/temporal-history -n "$NAMESPACE" --timeout=600s
kubectl rollout status deployment/temporal-matching -n "$NAMESPACE" --timeout=600s
kubectl rollout status deployment/temporal-worker -n "$NAMESPACE" --timeout=600s

# Verify update
log "Verifying update..."
CURRENT_VERSION=$(kubectl get deployment temporal-frontend -n "$NAMESPACE" -o jsonpath='{.spec.template.spec.containers[0].image}' | cut -d: -f2)
if [[ "$CURRENT_VERSION" == "$NEW_VERSION" ]]; then
    log "✓ Update completed successfully to version: $CURRENT_VERSION"
else
    error "✗ Update failed. Current version: $CURRENT_VERSION, Expected: $NEW_VERSION"
fi

# Run health checks
log "Running post-update health checks..."
./scripts/health-check-temporal.sh

log "Rolling update completed successfully!"

This comprehensive Temporal deployment guide provides enterprise-grade deployment with high availability, monitoring, automation scripts, and operational procedures for production environments.