HTTP Caching

Configure HTTP response caching with different storage backends and per-route cache policies. This example demonstrates memory caching, stale-while-revalidate, and cache control handling.

Use Case

Cache API responses to reduce backend load
Long-lived caching for static assets
Short-lived caching for dynamic content
Stale-while-revalidate for improved availability
Per-route cache configuration
Cache bypass for authenticated requests

Architecture

                    ┌─────────────────┐
                    │    Clients      │
                    └────────┬────────┘
                             │
                    ┌────────▼────────┐
                    │    Zentinel     │
                    │   ┌─────────┐   │
                    │   │  Cache  │   │
                    │   │ mem/disk│   │
                    │   └─────────┘   │
                    └────────┬────────┘
                             │
              ┌──────────────┼──────────────┐
              │              │              │
              ▼              ▼              ▼
         ┌─────────┐   ┌─────────┐   ┌─────────┐
         │   API   │   │ Static  │   │  Slow   │
         │ Backend │   │ Backend │   │ Backend │
         └─────────┘   └─────────┘   └─────────┘

Configuration

Create zentinel.kdl:

// HTTP Caching Configuration
// Response caching with memory backend

system {
    worker-threads 4
    max-connections 10000
}

listeners {
    listener "http" {
        address "0.0.0.0:8080"
        protocol "http"
        request-timeout-secs 60
    }
}

upstreams {
    upstream "api-backend" {
        target "10.0.1.10:8080" weight=1
        target "10.0.1.11:8080" weight=1

        load-balancing "round-robin"

        health-check {
            type "http" {
                path "/health"
                expected-status 200
            }
            interval-secs 10
        }
    }

    upstream "static-backend" {
        target "10.0.2.10:8080" weight=1

        load-balancing "round-robin"
    }

    upstream "slow-backend" {
        target "10.0.3.10:8080" weight=1

        load-balancing "round-robin"

        timeouts {
            request-secs 120
        }
    }
}

// =============================================================================
// Global cache storage configuration
// =============================================================================
cache {
    enabled #true

    // Storage backend: "memory", "disk", or "hybrid"
    backend "memory"
    max-size 536870912         // 512MB

    // Lock timeout to prevent thundering herd
    lock-timeout 10
}

routes {
    // API with caching enabled
    route "api-cached" {
        priority "high"

        matches {
            path-prefix "/api/v1"
            method "GET"
        }

        upstream "api-backend"

        policies {
            timeout-secs 30

            cache {
                enabled #true

                // Cache TTL
                ttl-secs 300  // 5 minutes

                // Vary cache by these headers
                vary-headers "Accept" "Accept-Encoding" "Accept-Language"

                // Respect Cache-Control headers from upstream
                respect-cache-control #true

                // Stale-while-revalidate: serve stale while refreshing
                stale-while-revalidate-secs 60

                // Stale-if-error: serve stale on upstream error
                stale-if-error-secs 300
            }
        }
    }

    // Static assets with aggressive caching
    route "static-assets" {
        priority "normal"

        matches {
            path-prefix "/static"
            method "GET"
        }

        upstream "static-backend"

        policies {
            timeout-secs 10

            cache {
                enabled #true

                // Long TTL for static assets
                ttl-secs 86400  // 24 hours

                // Only vary by Accept-Encoding
                vary-headers "Accept-Encoding"

                // Ignore Cache-Control from origin
                respect-cache-control #false

                // Long stale times for availability
                stale-while-revalidate-secs 3600
                stale-if-error-secs 86400
            }

            response-headers {
                set {
                    "Cache-Control" "public, max-age=86400, immutable"
                }
            }
        }
    }

    // Short-lived cache for dynamic content
    route "dynamic-cached" {
        priority "normal"

        matches {
            path-prefix "/api/feed"
            method "GET"
        }

        upstream "api-backend"

        policies {
            timeout-secs 30

            cache {
                enabled #true
                ttl-secs 30  // Short TTL
                vary-headers "Accept" "Authorization"
                respect-cache-control #true
            }
        }
    }

    // API key-based cache variation
    route "api-key-cached" {
        priority "high"

        matches {
            path-prefix "/api/user"
            method "GET"
            header "X-API-Key"
        }

        upstream "api-backend"

        policies {
            timeout-secs 30

            cache {
                enabled #true
                ttl-secs 60
                // Cache varies by API key (per-user cache)
                vary-headers "X-API-Key" "Accept"
                respect-cache-control #true
            }
        }
    }

    // Slow endpoint with long cache
    route "slow-endpoint" {
        priority "normal"

        matches {
            path-prefix "/api/reports"
            method "GET"
        }

        upstream "slow-backend"

        policies {
            timeout-secs 120

            cache {
                enabled #true
                // Long cache for expensive computations
                ttl-secs 3600  // 1 hour
                vary-headers "Accept"
                stale-while-revalidate-secs 600
                stale-if-error-secs 3600
            }
        }
    }

    // Cache bypass for authenticated requests
    route "no-cache-auth" {
        priority "high"

        matches {
            path-prefix "/api/private"
            method "GET"
            header "Authorization"
        }

        upstream "api-backend"

        policies {
            timeout-secs 30

            // Disable caching for authenticated requests
            cache {
                enabled #false
            }
        }
    }

    // POST requests are not cached
    route "api-write" {
        priority "high"

        matches {
            path-prefix "/api"
            method "POST" "PUT" "DELETE" "PATCH"
        }

        upstream "api-backend"

        policies {
            timeout-secs 30
            // No cache block - mutations are not cached
        }
    }

    // Cache purge endpoint
    route "cache-purge" {
        priority "critical"

        matches {
            path-prefix "/_cache/purge"
            method "POST" "DELETE"
        }

        builtin-handler "cache-purge"

        policies {
            failure-mode "closed"
        }
    }

    // Cache stats endpoint
    route "cache-stats" {
        priority "critical"

        matches {
            path "/_cache/stats"
            method "GET"
        }

        builtin-handler "cache-stats"
    }

    // Health check
    route "health" {
        priority "critical"

        matches {
            path "/health"
        }

        builtin-handler "health"
    }
}

observability {
    metrics {
        enabled #true
        address "0.0.0.0:9090"
        path "/metrics"
        // Cache metrics:
        // - zentinel_cache_hits_total
        // - zentinel_cache_misses_total
        // - zentinel_cache_stale_hits_total
        // - zentinel_cache_size_bytes
        // - zentinel_cache_entries_count
        // - zentinel_cache_evictions_total
    }

    logging {
        level "info"
        format "json"

        access-log {
            enabled #true
            file "/var/log/zentinel/access.log"
            // Cache status in logs: HIT, MISS, STALE, BYPASS
        }
    }
}

limits {
    max-header-size-bytes 8192
    max-header-count 100
    max-body-size-bytes 10485760
}

Setup

1. Start Zentinel

zentinel -c zentinel.kdl

2. Start Backend Services

# Start API backend
node api/server.js --port 8080 &

# Start static file server
python -m http.server 8080 --directory /var/www/static &

Testing

Cache Hit/Miss

# First request - MISS
curl -I http://localhost:8080/api/v1/users
# X-Cache-Status: MISS

# Second request - HIT
curl -I http://localhost:8080/api/v1/users
# X-Cache-Status: HIT

Stale-While-Revalidate

# Request after TTL but within stale window
curl -I http://localhost:8080/api/v1/users
# X-Cache-Status: STALE
# Response served immediately while background refresh happens

Cache Stats

curl http://localhost:8080/_cache/stats

Response:

{
  "entries": 156,
  "size_bytes": 2345678,
  "hits": 12345,
  "misses": 1234,
  "stale_hits": 56,
  "evictions": 23
}

Cache Purge

# Purge specific path
curl -X POST "http://localhost:8080/_cache/purge?path=/api/v1/users"

# Purge by prefix
curl -X POST "http://localhost:8080/_cache/purge?prefix=/api/v1/"

# Purge all
curl -X DELETE "http://localhost:8080/_cache/purge"

Monitor Cache Metrics

curl http://localhost:9090/metrics | grep cache

Customizations

Disk-Based Cache

Persist cached responses to disk so they survive proxy restarts. Ideal for large caches (multi-GB) or static asset caching:

cache {
    enabled #true
    backend "disk"
    max-size 10737418240       // 10GB
    disk-path "/var/cache/zentinel"
    disk-shards 16
    lock-timeout 10
}

The disk backend stores entries in a sharded directory layout under disk-path. All writes are atomic (temp file + rename), so a crash never corrupts the cache. Orphaned temp files from previous crashes are cleaned up on startup.

Hybrid Cache (Memory + Disk)

Not yet implemented. Configuring backend "hybrid" currently falls back to the memory backend with a warning. Specify disk-path now so no config changes are needed when hybrid support lands.

cache {
    enabled #true
    backend "hybrid"
    max-size 10737418240       // 10GB total
    disk-path "/var/cache/zentinel"
    disk-shards 16
    lock-timeout 10
}

Cache Key Customization

route "custom-cache-key" {
    policies {
        cache {
            enabled #true
            ttl-secs 300
            // Include specific query params in cache key
            vary-query-params "page" "limit" "sort"
            // Ignore certain query params
            ignore-query-params "timestamp" "nonce"
        }
    }
}

Conditional Caching

route "conditional-cache" {
    policies {
        cache {
            enabled #true
            ttl-secs 300
            // Only cache successful responses
            cacheable-status-codes 200 301 302
            // Don't cache responses larger than 10MB
            max-cacheable-size-bytes 10485760
        }
    }
}

Cache Exclusion Rules

For broad routes that cache most content but need to skip certain paths or file types, use exclude-extensions and exclude-paths:

route "site-with-exclusions" {
    matches {
        path-prefix "/"
    }
    upstream "origin"

    policies {
        cache {
            enabled #true
            default-ttl-secs 3600

            // Don't cache dynamic file types
            exclude-extensions "php" "html" "asp"

            // Don't cache admin or login paths (glob patterns)
            exclude-paths "/wp-admin/**" "/login" "/api/auth/**"

            vary-headers "Accept-Encoding"
        }
    }
}

This avoids splitting a single route into multiple routes just to control caching. Excluded requests are forwarded directly to the upstream with a BYPASS cache status.

Next Steps

Distributed Rate Limiting - Add rate limiting
Static Site - Serve static files
API Gateway - Complete API management