Skip to content

Database Setup

This guide provides comprehensive database setup and configuration for Temporal.io enterprise deployment, covering PostgreSQL installation, configuration, optimization, backup, and maintenance procedures.

Overview

The database setup includes: - PostgreSQL cluster deployment and configuration - High availability setup with replication - Performance tuning and optimization - Backup and recovery procedures - Monitoring and maintenance - Migration and schema management

PostgreSQL Deployment

High Availability PostgreSQL Cluster

PostgreSQL Helm Chart Configuration

# helm/values/database/postgresql-ha.yaml
postgresql:
  image:
    tag: "15.4"

  # Authentication
  auth:
    postgresPassword: ""  # Set via secret
    username: "temporal"
    password: ""  # Set via secret
    database: "temporal"
    existingSecret: "postgresql-credentials"
    secretKeys:
      adminPasswordKey: "postgres-password"
      userPasswordKey: "password"

  # Architecture
  architecture: replication

  # Primary configuration
  primary:
    name: primary
    persistence:
      enabled: true
      storageClass: "gp3"
      size: 100Gi

    resources:
      limits:
        memory: 4Gi
        cpu: 2000m
      requests:
        memory: 2Gi
        cpu: 1000m

    configuration: |
      # PostgreSQL configuration
      max_connections = 200
      shared_buffers = 1GB
      effective_cache_size = 3GB
      maintenance_work_mem = 256MB
      checkpoint_completion_target = 0.9
      wal_buffers = 16MB
      default_statistics_target = 100
      random_page_cost = 1.1
      effective_io_concurrency = 200
      work_mem = 4MB
      min_wal_size = 1GB
      max_wal_size = 4GB

      # Logging
      log_destination = 'stderr'
      logging_collector = on
      log_directory = 'log'
      log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log'
      log_rotation_age = 1d
      log_rotation_size = 100MB
      log_min_duration_statement = 1000
      log_line_prefix = '%t [%p]: [%l-1] user=%u,db=%d,app=%a,client=%h '
      log_checkpoints = on
      log_connections = on
      log_disconnections = on
      log_lock_waits = on
      log_temp_files = 10MB

      # SSL
      ssl = on
      ssl_cert_file = '/etc/ssl/certs/tls.crt'
      ssl_key_file = '/etc/ssl/private/tls.key'
      ssl_ca_file = '/etc/ssl/certs/ca.crt'

    initdb:
      scripts:
        01_temporal_setup.sql: |
          -- Create temporal databases
          CREATE DATABASE temporal;
          CREATE DATABASE temporal_visibility;

          -- Create temporal user with proper permissions
          CREATE USER temporal WITH PASSWORD '${TEMPORAL_PASSWORD}';
          GRANT ALL PRIVILEGES ON DATABASE temporal TO temporal;
          GRANT ALL PRIVILEGES ON DATABASE temporal_visibility TO temporal;

          -- Create read-only user for monitoring
          CREATE USER temporal_monitor WITH PASSWORD '${MONITOR_PASSWORD}';
          GRANT CONNECT ON DATABASE temporal TO temporal_monitor;
          GRANT CONNECT ON DATABASE temporal_visibility TO temporal_monitor;
          GRANT USAGE ON SCHEMA public TO temporal_monitor;
          GRANT SELECT ON ALL TABLES IN SCHEMA public TO temporal_monitor;
          ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON TABLES TO temporal_monitor;

  # Read replica configuration
  readReplicas:
    replicaCount: 2

    persistence:
      enabled: true
      storageClass: "gp3"
      size: 100Gi

    resources:
      limits:
        memory: 2Gi
        cpu: 1000m
      requests:
        memory: 1Gi
        cpu: 500m

  # Backup configuration
  backup:
    enabled: true
    cronjob:
      schedule: "0 2 * * *"  # Daily at 2 AM
      restartPolicy: OnFailure
      storage:
        storageClass: "gp3"
        size: 500Gi

    retention:
      days: 30

    s3:
      enabled: true
      bucket: "temporal-backups"
      region: "us-west-2"
      endpoint: ""
      accessKey: ""  # Set via secret
      secretKey: ""  # Set via secret

  # Metrics
  metrics:
    enabled: true
    image:
      tag: "0.11.1"

    serviceMonitor:
      enabled: true
      namespace: "monitoring"
      interval: "30s"

    resources:
      limits:
        memory: 256Mi
        cpu: 250m
      requests:
        memory: 128Mi
        cpu: 100m

  # Network policy
  networkPolicy:
    enabled: true
    allowExternal: false
    explicitNamespacesSelector:
      matchLabels:
        name: "temporal-system"

Database Secrets Management

PostgreSQL Credentials Secret

# k8s/database/secrets/postgresql-credentials.yaml
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
  name: postgresql-credentials
  namespace: temporal-system
spec:
  refreshInterval: 1h
  secretStoreRef:
    name: vault-backend
    kind: SecretStore
  target:
    name: postgresql-credentials
    creationPolicy: Owner
    template:
      type: Opaque
      data:
        postgres-password: "{{ .postgres_password }}"
        password: "{{ .temporal_password }}"
        monitor-password: "{{ .monitor_password }}"
        replication-password: "{{ .replication_password }}"
  data:
  - secretKey: postgres_password
    remoteRef:
      key: temporal/database
      property: postgres_password
  - secretKey: temporal_password
    remoteRef:
      key: temporal/database
      property: temporal_password
  - secretKey: monitor_password
    remoteRef:
      key: temporal/database
      property: monitor_password
  - secretKey: replication_password
    remoteRef:
      key: temporal/database
      property: replication_password

Database Schema Management

Temporal Schema Setup Job

# k8s/database/jobs/schema-setup.yaml
apiVersion: batch/v1
kind: Job
metadata:
  name: temporal-schema-setup
  namespace: temporal-system
  annotations:
    "helm.sh/hook": post-install,post-upgrade
    "helm.sh/hook-weight": "1"
    "helm.sh/hook-delete-policy": before-hook-creation
spec:
  template:
    metadata:
      name: temporal-schema-setup
    spec:
      restartPolicy: OnFailure
      containers:
      - name: temporal-admin-tools
        image: temporalio/admin-tools:1.20.0
        command:
        - /bin/bash
        - -c
        - |
          set -euo pipefail

          echo "Setting up Temporal database schema..."

          # Wait for database to be ready
          until pg_isready -h $DB_HOST -p $DB_PORT -U $DB_USER; do
            echo "Waiting for database to be ready..."
            sleep 5
          done

          # Setup default database
          temporal-sql-tool \
            --plugin postgres \
            --ep $DB_HOST \
            --port $DB_PORT \
            --user $DB_USER \
            --password $DB_PASSWORD \
            --database $DB_NAME \
            setup-schema -v 0.0

          temporal-sql-tool \
            --plugin postgres \
            --ep $DB_HOST \
            --port $DB_PORT \
            --user $DB_USER \
            --password $DB_PASSWORD \
            --database $DB_NAME \
            update-schema -d /etc/temporal/schema/postgresql/v96

          # Setup visibility database
          temporal-sql-tool \
            --plugin postgres \
            --ep $DB_HOST \
            --port $DB_PORT \
            --user $DB_USER \
            --password $DB_PASSWORD \
            --database $DB_VISIBILITY_NAME \
            setup-schema -v 0.0

          temporal-sql-tool \
            --plugin postgres \
            --ep $DB_HOST \
            --port $DB_PORT \
            --user $DB_USER \
            --password $DB_PASSWORD \
            --database $DB_VISIBILITY_NAME \
            update-schema -d /etc/temporal/schema/postgresql/visibility/versioned

          echo "Temporal database schema setup completed successfully"

        env:
        - name: DB_HOST
          value: "postgresql-primary"
        - name: DB_PORT
          value: "5432"
        - name: DB_USER
          value: "temporal"
        - name: DB_PASSWORD
          valueFrom:
            secretKeyRef:
              name: postgresql-credentials
              key: password
        - name: DB_NAME
          value: "temporal"
        - name: DB_VISIBILITY_NAME
          value: "temporal_visibility"

        resources:
          limits:
            memory: 512Mi
            cpu: 500m
          requests:
            memory: 256Mi
            cpu: 250m

      securityContext:
        runAsNonRoot: true
        runAsUser: 10001
        fsGroup: 10001

Performance Optimization

PostgreSQL Tuning Configuration

Performance Tuning ConfigMap

# k8s/database/config/postgresql-performance.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: postgresql-performance-config
  namespace: temporal-system
data:
  postgresql.conf: |
    # Connection Settings
    max_connections = 200
    superuser_reserved_connections = 3

    # Memory Settings
    shared_buffers = 1GB                    # 25% of RAM
    effective_cache_size = 3GB              # 75% of RAM
    maintenance_work_mem = 256MB
    work_mem = 4MB

    # Checkpoint Settings
    checkpoint_completion_target = 0.9
    checkpoint_timeout = 10min
    max_wal_size = 4GB
    min_wal_size = 1GB
    wal_buffers = 16MB

    # Query Planning
    default_statistics_target = 100
    constraint_exclusion = partition
    cursor_tuple_fraction = 0.1

    # Disk I/O Settings
    random_page_cost = 1.1                  # For SSD storage
    effective_io_concurrency = 200
    seq_page_cost = 1

    # Background Writer
    bgwriter_delay = 200ms
    bgwriter_lru_maxpages = 100
    bgwriter_lru_multiplier = 2.0
    bgwriter_flush_after = 512kB

    # Autovacuum Settings
    autovacuum = on
    autovacuum_max_workers = 3
    autovacuum_naptime = 1min
    autovacuum_vacuum_threshold = 50
    autovacuum_analyze_threshold = 50
    autovacuum_vacuum_scale_factor = 0.2
    autovacuum_analyze_scale_factor = 0.1
    autovacuum_freeze_max_age = 200000000
    autovacuum_multixact_freeze_max_age = 400000000
    autovacuum_vacuum_cost_delay = 20ms
    autovacuum_vacuum_cost_limit = 200

    # Logging Settings
    log_destination = 'stderr'
    logging_collector = on
    log_directory = 'log'
    log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log'
    log_rotation_age = 1d
    log_rotation_size = 100MB
    log_min_duration_statement = 1000
    log_line_prefix = '%t [%p]: [%l-1] user=%u,db=%d,app=%a,client=%h '
    log_checkpoints = on
    log_connections = on
    log_disconnections = on
    log_lock_waits = on
    log_temp_files = 10MB
    log_autovacuum_min_duration = 0
    log_error_verbosity = default

    # Replication Settings
    wal_level = replica
    max_wal_senders = 10
    max_replication_slots = 10
    hot_standby = on
    hot_standby_feedback = off

    # SSL Settings
    ssl = on
    ssl_cert_file = '/etc/ssl/certs/tls.crt'
    ssl_key_file = '/etc/ssl/private/tls.key'
    ssl_ca_file = '/etc/ssl/certs/ca.crt'
    ssl_ciphers = 'ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384'
    ssl_prefer_server_ciphers = on
    ssl_protocols = 'TLSv1.2,TLSv1.3'

Database Indexes for Temporal

Temporal Optimization Script

-- k8s/database/sql/temporal-indexes.sql
-- Additional indexes for Temporal performance optimization

-- Indexes for executions table
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_executions_namespace_id 
ON executions (namespace_id);

CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_executions_workflow_id_run_id 
ON executions (workflow_id, run_id);

CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_executions_state_created_time 
ON executions (state, created_time);

CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_executions_start_time 
ON executions (start_time) WHERE start_time IS NOT NULL;

-- Indexes for history_events table
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_history_events_workflow_id_run_id_event_id 
ON history_events (workflow_id, run_id, event_id);

CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_history_events_created_time 
ON history_events (created_time);

CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_history_events_event_type 
ON history_events (event_type);

-- Indexes for tasks table
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_tasks_task_queue_name_state 
ON tasks (task_queue_name, state);

CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_tasks_created_time 
ON tasks (created_time);

CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_tasks_visibility_timestamp 
ON tasks (visibility_timestamp) WHERE visibility_timestamp IS NOT NULL;

-- Indexes for activity_info_maps table
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_activity_info_maps_workflow_id_run_id 
ON activity_info_maps (workflow_id, run_id);

-- Indexes for timer_info_maps table
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_timer_info_maps_workflow_id_run_id 
ON timer_info_maps (workflow_id, run_id);

-- Indexes for child_execution_info_maps table
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_child_execution_info_maps_workflow_id_run_id 
ON child_execution_info_maps (workflow_id, run_id);

-- Indexes for visibility tables
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_executions_visibility_namespace_id_start_time 
ON executions_visibility (namespace_id, start_time);

CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_executions_visibility_workflow_type_start_time 
ON executions_visibility (workflow_type, start_time);

CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_executions_visibility_status_start_time 
ON executions_visibility (status, start_time);

-- Composite indexes for common queries
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_executions_visibility_namespace_status_start_time 
ON executions_visibility (namespace_id, status, start_time);

CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_executions_visibility_namespace_type_start_time 
ON executions_visibility (namespace_id, workflow_type, start_time);

-- Update table statistics
ANALYZE executions;
ANALYZE history_events;
ANALYZE tasks;
ANALYZE activity_info_maps;
ANALYZE timer_info_maps;
ANALYZE child_execution_info_maps;
ANALYZE executions_visibility;

Backup and Recovery

Automated Backup System

Backup CronJob

# k8s/database/backup/backup-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
  name: postgresql-backup
  namespace: temporal-system
spec:
  schedule: "0 2 * * *"  # Daily at 2 AM
  concurrencyPolicy: Forbid
  failedJobsHistoryLimit: 3
  successfulJobsHistoryLimit: 3
  jobTemplate:
    spec:
      template:
        metadata:
          annotations:
            prometheus.io/scrape: "false"
        spec:
          restartPolicy: OnFailure
          containers:
          - name: postgresql-backup
            image: postgres:15.4
            command:
            - /bin/bash
            - -c
            - |
              set -euo pipefail

              TIMESTAMP=$(date +%Y%m%d_%H%M%S)
              BACKUP_DIR="/backups"

              echo "Starting PostgreSQL backup at $(date)"

              # Create backup directory
              mkdir -p "$BACKUP_DIR"

              # Backup main database
              echo "Backing up temporal database..."
              PGPASSWORD="$DB_PASSWORD" pg_dump \
                -h "$DB_HOST" \
                -U "$DB_USER" \
                -d temporal \
                --verbose \
                --no-owner \
                --no-privileges \
                --clean \
                --if-exists \
                --format=custom \
                -f "$BACKUP_DIR/temporal_${TIMESTAMP}.dump"

              # Backup visibility database
              echo "Backing up temporal_visibility database..."
              PGPASSWORD="$DB_PASSWORD" pg_dump \
                -h "$DB_HOST" \
                -U "$DB_USER" \
                -d temporal_visibility \
                --verbose \
                --no-owner \
                --no-privileges \
                --clean \
                --if-exists \
                --format=custom \
                -f "$BACKUP_DIR/temporal_visibility_${TIMESTAMP}.dump"

              # Compress backups
              echo "Compressing backup files..."
              gzip "$BACKUP_DIR/temporal_${TIMESTAMP}.dump"
              gzip "$BACKUP_DIR/temporal_visibility_${TIMESTAMP}.dump"

              # Upload to S3
              echo "Uploading backups to S3..."
              aws s3 cp "$BACKUP_DIR/temporal_${TIMESTAMP}.dump.gz" \
                "s3://${S3_BUCKET}/database/temporal_${TIMESTAMP}.dump.gz" \
                --storage-class STANDARD_IA

              aws s3 cp "$BACKUP_DIR/temporal_visibility_${TIMESTAMP}.dump.gz" \
                "s3://${S3_BUCKET}/database/temporal_visibility_${TIMESTAMP}.dump.gz" \
                --storage-class STANDARD_IA

              # Clean up local files
              rm -f "$BACKUP_DIR"/*.dump.gz

              # Clean up old backups (keep last 30 days)
              aws s3 ls "s3://${S3_BUCKET}/database/" | \
                grep "temporal_" | \
                sort | \
                head -n -60 | \
                awk '{print $4}' | \
                xargs -I {} aws s3 rm "s3://${S3_BUCKET}/database/{}" || true

              echo "Backup completed successfully at $(date)"

            env:
            - name: DB_HOST
              value: "postgresql-primary"
            - name: DB_USER
              value: "temporal"
            - name: DB_PASSWORD
              valueFrom:
                secretKeyRef:
                  name: postgresql-credentials
                  key: password
            - name: S3_BUCKET
              value: "temporal-backups"
            - name: AWS_REGION
              value: "us-west-2"

            volumeMounts:
            - name: backup-storage
              mountPath: /backups

            resources:
              limits:
                memory: 1Gi
                cpu: 500m
              requests:
                memory: 512Mi
                cpu: 250m

          volumes:
          - name: backup-storage
            emptyDir:
              sizeLimit: 10Gi

          serviceAccountName: postgresql-backup

          securityContext:
            runAsNonRoot: true
            runAsUser: 999
            fsGroup: 999

Point-in-Time Recovery Setup

WAL-E Configuration for Continuous Archiving

# k8s/database/backup/wal-e-config.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: wal-e-config
  namespace: temporal-system
data:
  wal-e.conf: |
    [wal-e]
    s3_prefix = s3://temporal-backups/wal-e
    aws_region = us-west-2

    [postgresql]
    archive_mode = on
    archive_command = 'wal-e wal-push %p'
    archive_timeout = 60

    max_wal_senders = 10
    wal_keep_size = 1GB
    wal_level = replica

  recovery.conf.template: |
    standby_mode = 'on'
    primary_conninfo = 'host=${PRIMARY_HOST} port=5432 user=replicator password=${REPLICATION_PASSWORD}'
    restore_command = 'wal-e wal-fetch %f %p'
    recovery_target_time = '${RECOVERY_TARGET_TIME}'

Disaster Recovery Procedures

Recovery Script

#!/bin/bash
# scripts/database/disaster-recovery.sh

set -euo pipefail

BACKUP_DATE=${1:-latest}
RECOVERY_TYPE=${2:-full}  # full, point-in-time
TARGET_TIME=${3:-}

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'

log() {
    echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')] $1${NC}"
}

warn() {
    echo -e "${YELLOW}[$(date +'%Y-%m-%d %H:%M:%S')] WARNING: $1${NC}"
}

error() {
    echo -e "${RED}[$(date +'%Y-%m-%d %H:%M:%S')] ERROR: $1${NC}"
    exit 1
}

log "Starting disaster recovery process..."
log "Backup date: $BACKUP_DATE"
log "Recovery type: $RECOVERY_TYPE"

# Verify prerequisites
if ! command -v kubectl &> /dev/null; then
    error "kubectl is required but not installed"
fi

if ! command -v aws &> /dev/null; then
    error "aws CLI is required but not installed"
fi

# Get backup file
if [[ "$BACKUP_DATE" == "latest" ]]; then
    BACKUP_FILE=$(aws s3 ls s3://temporal-backups/database/ | sort | tail -n 1 | awk '{print $4}')
    if [[ -z "$BACKUP_FILE" ]]; then
        error "No backup files found"
    fi
    log "Using latest backup: $BACKUP_FILE"
else
    BACKUP_FILE="temporal_${BACKUP_DATE}.dump.gz"
fi

# Download backup
log "Downloading backup file..."
aws s3 cp "s3://temporal-backups/database/$BACKUP_FILE" "./backup.dump.gz"
gunzip backup.dump.gz

# Stop Temporal services
log "Stopping Temporal services..."
kubectl scale deployment temporal-frontend temporal-history temporal-matching temporal-worker --replicas=0 -n temporal-system

# Create recovery database
log "Creating recovery database..."
kubectl run recovery-db --image=postgres:15.4 --rm -i --restart=Never -- \
    createdb -h postgresql-primary -U postgres -O temporal temporal_recovery

# Restore backup
log "Restoring database backup..."
kubectl run restore-job --image=postgres:15.4 --rm -i --restart=Never -- \
    pg_restore -h postgresql-primary -U temporal -d temporal_recovery \
    --verbose --clean --if-exists < backup.dump

if [[ "$RECOVERY_TYPE" == "point-in-time" && -n "$TARGET_TIME" ]]; then
    log "Performing point-in-time recovery to $TARGET_TIME..."
    # Configure recovery.conf for point-in-time recovery
    kubectl run pitr-job --image=postgres:15.4 --rm -i --restart=Never -- \
        bash -c "
        echo \"recovery_target_time = '$TARGET_TIME'\" > /tmp/recovery.conf
        echo \"recovery_target_action = 'promote'\" >> /tmp/recovery.conf
        kubectl cp /tmp/recovery.conf postgresql-primary:/var/lib/postgresql/data/recovery.conf
        "
fi

# Validate recovery
log "Validating database recovery..."
RECORD_COUNT=$(kubectl run validate-job --image=postgres:15.4 --rm -i --restart=Never -- \
    psql -h postgresql-primary -U temporal -d temporal_recovery -t -c "SELECT COUNT(*) FROM executions;")

log "Database recovery validation: $RECORD_COUNT records found"

# Switch to recovered database
log "Switching to recovered database..."
kubectl run switch-db --image=postgres:15.4 --rm -i --restart=Never -- \
    bash -c "
    psql -h postgresql-primary -U postgres -c 'ALTER DATABASE temporal RENAME TO temporal_old;'
    psql -h postgresql-primary -U postgres -c 'ALTER DATABASE temporal_recovery RENAME TO temporal;'
    "

# Restart Temporal services
log "Restarting Temporal services..."
kubectl scale deployment temporal-frontend temporal-history temporal-matching temporal-worker --replicas=1 -n temporal-system

# Wait for services to be ready
kubectl wait --for=condition=available deployment/temporal-frontend -n temporal-system --timeout=300s

log "Disaster recovery completed successfully!"
log "Please verify system functionality before proceeding with normal operations"

# Clean up
rm -f backup.dump

Monitoring and Maintenance

Database Monitoring Setup

PostgreSQL Exporter Configuration

# k8s/database/monitoring/postgres-exporter.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: postgres-exporter
  namespace: temporal-system
  labels:
    app: postgres-exporter
spec:
  replicas: 1
  selector:
    matchLabels:
      app: postgres-exporter
  template:
    metadata:
      labels:
        app: postgres-exporter
    spec:
      containers:
      - name: postgres-exporter
        image: quay.io/prometheuscommunity/postgres-exporter:v0.11.1
        ports:
        - containerPort: 9187
          name: metrics
        env:
        - name: DATA_SOURCE_NAME
          valueFrom:
            secretKeyRef:
              name: postgres-exporter-secret
              key: connection-string
        - name: PG_EXPORTER_QUERIES_PATH
          value: "/etc/postgres_exporter/queries.yaml"

        volumeMounts:
        - name: queries-config
          mountPath: /etc/postgres_exporter

        resources:
          limits:
            memory: 256Mi
            cpu: 250m
          requests:
            memory: 128Mi
            cpu: 100m

        securityContext:
          allowPrivilegeEscalation: false
          readOnlyRootFilesystem: true
          runAsNonRoot: true
          runAsUser: 65534

      volumes:
      - name: queries-config
        configMap:
          name: postgres-exporter-queries

      serviceAccountName: postgres-exporter

---
apiVersion: v1
kind: Service
metadata:
  name: postgres-exporter
  namespace: temporal-system
  labels:
    app: postgres-exporter
spec:
  ports:
  - port: 9187
    targetPort: 9187
    name: metrics
  selector:
    app: postgres-exporter

---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  name: postgres-exporter
  namespace: temporal-system
spec:
  selector:
    matchLabels:
      app: postgres-exporter
  endpoints:
  - port: metrics
    interval: 30s
    path: /metrics

Custom PostgreSQL Queries for Monitoring

# k8s/database/monitoring/postgres-queries.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: postgres-exporter-queries
  namespace: temporal-system
data:
  queries.yaml: |
    pg_replication:
      query: "SELECT CASE WHEN NOT pg_is_in_recovery() THEN 0 ELSE GREATEST (0, EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))) END AS lag"
      master: true
      metrics:
        - lag:
            usage: "GAUGE"
            description: "Replication lag behind master in seconds"

    pg_postmaster:
      query: "SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()"
      master: true
      metrics:
        - start_time_seconds:
            usage: "GAUGE"
            description: "Time at which postmaster started"

    pg_stat_user_tables:
      query: |
        SELECT
          current_database() datname,
          schemaname,
          relname,
          seq_scan,
          seq_tup_read,
          idx_scan,
          idx_tup_fetch,
          n_tup_ins,
          n_tup_upd,
          n_tup_del,
          n_tup_hot_upd,
          n_live_tup,
          n_dead_tup,
          n_mod_since_analyze,
          COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum,
          COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum,
          COALESCE(last_analyze, '1970-01-01Z') as last_analyze,
          COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze,
          vacuum_count,
          autovacuum_count,
          analyze_count,
          autoanalyze_count
        FROM pg_stat_user_tables
      metrics:
        - datname:
            usage: "LABEL"
            description: "Name of current database"
        - schemaname:
            usage: "LABEL"
            description: "Name of the schema that this table is in"
        - relname:
            usage: "LABEL"
            description: "Name of this table"
        - seq_scan:
            usage: "COUNTER"
            description: "Number of sequential scans initiated on this table"
        - seq_tup_read:
            usage: "COUNTER"
            description: "Number of live rows fetched by sequential scans"
        - idx_scan:
            usage: "COUNTER"
            description: "Number of index scans initiated on this table"
        - idx_tup_fetch:
            usage: "COUNTER"
            description: "Number of live rows fetched by index scans"
        - n_tup_ins:
            usage: "COUNTER"
            description: "Number of rows inserted"
        - n_tup_upd:
            usage: "COUNTER"
            description: "Number of rows updated"
        - n_tup_del:
            usage: "COUNTER"
            description: "Number of rows deleted"
        - n_tup_hot_upd:
            usage: "COUNTER"
            description: "Number of rows HOT updated"
        - n_live_tup:
            usage: "GAUGE"
            description: "Estimated number of live rows"
        - n_dead_tup:
            usage: "GAUGE"
            description: "Estimated number of dead rows"
        - n_mod_since_analyze:
            usage: "GAUGE"
            description: "Estimated number of rows modified since this table was last analyzed"
        - last_vacuum:
            usage: "GAUGE"
            description: "Last time at which this table was manually vacuumed"
        - last_autovacuum:
            usage: "GAUGE"
            description: "Last time at which this table was vacuumed by the autovacuum daemon"
        - last_analyze:
            usage: "GAUGE"
            description: "Last time at which this table was manually analyzed"
        - last_autoanalyze:
            usage: "GAUGE"
            description: "Last time at which this table was analyzed by the autovacuum daemon"
        - vacuum_count:
            usage: "COUNTER"
            description: "Number of times this table has been manually vacuumed"
        - autovacuum_count:
            usage: "COUNTER"
            description: "Number of times this table has been vacuumed by the autovacuum daemon"
        - analyze_count:
            usage: "COUNTER"
            description: "Number of times this table has been manually analyzed"
        - autoanalyze_count:
            usage: "COUNTER"
            description: "Number of times this table has been analyzed by the autovacuum daemon"

    temporal_executions:
      query: |
        SELECT
          namespace_id,
          COUNT(*) as total_executions,
          COUNT(*) FILTER (WHERE state = 1) as running_executions,
          COUNT(*) FILTER (WHERE state = 2) as completed_executions,
          COUNT(*) FILTER (WHERE state = 3) as failed_executions,
          COUNT(*) FILTER (WHERE state = 4) as cancelled_executions,
          COUNT(*) FILTER (WHERE state = 5) as terminated_executions
        FROM executions
        GROUP BY namespace_id
      metrics:
        - namespace_id:
            usage: "LABEL"
            description: "Temporal namespace ID"
        - total_executions:
            usage: "GAUGE"
            description: "Total number of workflow executions"
        - running_executions:
            usage: "GAUGE"
            description: "Number of running workflow executions"
        - completed_executions:
            usage: "GAUGE"
            description: "Number of completed workflow executions"
        - failed_executions:
            usage: "GAUGE"
            description: "Number of failed workflow executions"
        - cancelled_executions:
            usage: "GAUGE"
            description: "Number of cancelled workflow executions"
        - terminated_executions:
            usage: "GAUGE"
            description: "Number of terminated workflow executions"

Database Maintenance Scripts

Maintenance CronJob

# k8s/database/maintenance/maintenance-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
  name: postgresql-maintenance
  namespace: temporal-system
spec:
  schedule: "0 3 * * 0"  # Weekly on Sunday at 3 AM
  concurrencyPolicy: Forbid
  jobTemplate:
    spec:
      template:
        spec:
          restartPolicy: OnFailure
          containers:
          - name: maintenance
            image: postgres:15.4
            command:
            - /bin/bash
            - -c
            - |
              set -euo pipefail

              echo "Starting database maintenance at $(date)"

              # Vacuum and analyze all databases
              for db in temporal temporal_visibility; do
                echo "Maintaining database: $db"

                # Vacuum analyze
                PGPASSWORD="$DB_PASSWORD" psql -h "$DB_HOST" -U "$DB_USER" -d "$db" -c "VACUUM ANALYZE;"

                # Reindex
                PGPASSWORD="$DB_PASSWORD" psql -h "$DB_HOST" -U "$DB_USER" -d "$db" -c "REINDEX DATABASE $db;"

                # Update statistics
                PGPASSWORD="$DB_PASSWORD" psql -h "$DB_HOST" -U "$DB_USER" -d "$db" -c "ANALYZE;"
              done

              echo "Database maintenance completed at $(date)"

            env:
            - name: DB_HOST
              value: "postgresql-primary"
            - name: DB_USER
              value: "temporal"
            - name: DB_PASSWORD
              valueFrom:
                secretKeyRef:
                  name: postgresql-credentials
                  key: password

            resources:
              limits:
                memory: 512Mi
                cpu: 500m
              requests:
                memory: 256Mi
                cpu: 250m

This comprehensive database setup guide provides enterprise-grade PostgreSQL deployment with high availability, performance optimization, backup/recovery procedures, and monitoring capabilities specifically tuned for Temporal.io workloads.