Hi @francislavoie, thank you for your response.
Following your advice, I installed Caddy version 2.8.4 and created a shared volume for the home directory (which is /root
in my setup), in addition to the data and config volumes I already had shared across multiple Caddy instances.
I am conducting load tests with both a single instance and multiple instances of Caddy to ensure everything functions correctly with 10 pods. Despite this setup, when running just 2 instances, I encounter inconsistencies with certificate generation that do not occur with a single instance. I have triple-checked the volume mounts, and they appear to be configured correctly.
Attached below are the logs from the two pods, which indicate that certificate provisioning fails due to an HTTP 01 challenge failure. Could you help me understand what might be causing this issue?
Please note that example.io
is a fake domain and I have no issues generating certs with only a single instance of Caddy running.
Logs from 1st instance
{"level":"debug","ts":1718748229.3288038,"logger":"events","msg":"event","name":"tls_get_certificate","id":"aa31e8af-a6cf-4c24-9eb9-24b5bba678dd","origin":"tls","data":{"client_hello":{"CipherSuites":[56026,4865,4866,4867,49195,49199,49196,49200,52393,52392,49171,49172,156,157,47,53],"ServerName":"test-150.example.io","SupportedCurves":[51914,25497,29,23,24],"SupportedPoints":"AA==","SignatureSchemes":[1027,2052,1025,1283,2053,1281,2054,1537],"SupportedProtos":["h2","http/1.1"],"SupportedVersions":[23130,772,771],"RemoteAddr":{"IP":"10.142.0.37","Port":23858,"Zone":""},"LocalAddr":{"IP":"10.100.43.60","Port":443,"Zone":""}}}}
{"level":"debug","ts":1718748229.328903,"logger":"tls.handshake","msg":"no matching certificates and no custom selection logic","identifier":"test-150.example.io"}
{"level":"debug","ts":1718748229.3289223,"logger":"tls.handshake","msg":"no matching certificates and no custom selection logic","identifier":"*.example.io"}
{"level":"debug","ts":1718748229.3289258,"logger":"tls.handshake","msg":"no matching certificates and no custom selection logic","identifier":"*.*.io"}
{"level":"debug","ts":1718748229.3289292,"logger":"tls.handshake","msg":"no matching certificates and no custom selection logic","identifier":"*.*.*"}
{"level":"debug","ts":1718748229.3289466,"logger":"tls.handshake","msg":"all external certificate managers yielded no certificates and no errors","remote_ip":"10.142.0.37","remote_port":"23858","sni":"test-150.example.io"}
{"level":"debug","ts":1718748229.3289578,"logger":"tls","msg":"asking for permission for on-demand certificate","remote_ip":"10.142.0.37","domain":"test-150.example.io"}
{"level":"debug","ts":1718748229.3289871,"logger":"tls","msg":"asking permission endpoint","remote":"10.142.0.37:23858","domain":"test-150.example.io","url":"https://mock.httpstatus.io/200?domain=test-150.example.io"}
{"level":"debug","ts":1718748229.5971868,"logger":"tls","msg":"response from permission endpoint","remote":"10.142.0.37:23858","domain":"test-150.example.io","url":"https://mock.httpstatus.io/200?domain=test-150.example.io","status":200}
{"level":"debug","ts":1718748229.6456945,"logger":"tls.handshake","msg":"did not load cert from storage","remote_ip":"10.142.0.37","remote_port":"23858","server_name":"test-150.example.io","error":"no matching certificate to load for test-150.example.io: open /data/caddy/certificates/zerossl/wildcard_.example.io/wildcard_.example.io.key: no such file or directory"}
{"level":"info","ts":1718748229.6457565,"logger":"tls.on_demand","msg":"obtaining new certificate","remote_ip":"10.142.0.37","remote_port":"23858","server_name":"test-150.example.io"}
{"level":"info","ts":1718748229.746155,"logger":"tls.obtain","msg":"acquiring lock","identifier":"test-150.example.io"}
{"level":"debug","ts":1718748260.445699,"logger":"events","msg":"event","name":"tls_get_certificate","id":"5d39f6a0-f156-4ca9-a572-8f5cb4a71e2e","origin":"tls","data":{"client_hello":{"CipherSuites":[43690,4865,4866,4867,49195,49199,49196,49200,52393,52392,49171,49172,156,157,47,53],"ServerName":"test-150.example.io","SupportedCurves":[64250,25497,29,23,24],"SupportedPoints":"AA==","SignatureSchemes":[1027,2052,1025,1283,2053,1281,2054,1537],"SupportedProtos":["h2","http/1.1"],"SupportedVersions":[60138,772,771],"RemoteAddr":{"IP":"10.142.0.82","Port":36287,"Zone":""},"LocalAddr":{"IP":"10.100.43.60","Port":443,"Zone":""}}}}
{"level":"debug","ts":1718748260.4457433,"logger":"tls.handshake","msg":"no matching certificates and no custom selection logic","identifier":"test-150.example.io"}
{"level":"debug","ts":1718748260.4457526,"logger":"tls.handshake","msg":"no matching certificates and no custom selection logic","identifier":"*.example.io"}
{"level":"debug","ts":1718748260.4457557,"logger":"tls.handshake","msg":"no matching certificates and no custom selection logic","identifier":"*.*.io"}
{"level":"debug","ts":1718748260.4457579,"logger":"tls.handshake","msg":"no matching certificates and no custom selection logic","identifier":"*.*.*"}
Logs from 2nd instance
{"level":"debug","ts":1718748229.5923295,"logger":"tls.handshake","msg":"did not load cert from storage","remote_ip":"10.142.0.152","remote_port":"21391","server_name":"test-150.example.io","error":"no matching certificate to load for test-150.example.io: open /data/caddy/certificates/zerossl/wildcard_.example.io/wildcard_.example.io.key: no such file or directory"}
{"level":"info","ts":1718748229.5923765,"logger":"tls.on_demand","msg":"obtaining new certificate","remote_ip":"10.142.0.152","remote_port":"21391","server_name":"test-150.example.io"}
{"level":"info","ts":1718748229.7273417,"logger":"tls.obtain","msg":"acquiring lock","identifier":"test-150.example.io"}
{"level":"info","ts":1718748229.7499487,"logger":"tls.obtain","msg":"lock acquired","identifier":"test-150.example.io"}
{"level":"info","ts":1718748229.7505078,"logger":"tls.obtain","msg":"obtaining certificate","identifier":"test-150.example.io"}
{"level":"debug","ts":1718748229.7505803,"logger":"events","msg":"event","name":"cert_obtaining","id":"60e426a5-93de-4dc7-99da-dce014009d56","origin":"tls","data":{"identifier":"test-150.example.io"}}
{"level":"debug","ts":1718748229.7512107,"logger":"tls.obtain","msg":"trying issuer 1/1","issuer":"zerossl"}
{"level":"info","ts":1718748229.7513769,"logger":"tls.issuance.zerossl","msg":"creating certificate","identifiers":["test-150.example.io"]}
{"level":"info","ts":1718748231.6271038,"logger":"tls.issuance.zerossl","msg":"created certificate","identifiers":["test-150.example.io"],"cert_id":"1fddb9240c3fde28e4edb0e078d1a974"}
{"level":"info","ts":1718748231.633594,"logger":"tls.issuance.zerossl","msg":"validating identifiers","identifiers":["test-150.example.io"],"cert_id":"1fddb9240c3fde28e4edb0e078d1a974","verification_method":"HTTP_CSR_HASH"}
{"level":"info","ts":1718748232.2369077,"logger":"tls.issuance.zerossl","msg":"canceled certificate","identifiers":["test-150.example.io"],"cert_id":"1fddb9240c3fde28e4edb0e078d1a974","verification_method":"HTTP_CSR_HASH"}
{"level":"error","ts":1718748232.2369719,"logger":"tls.obtain","msg":"could not get certificate from issuer","identifier":"test-150.example.io","issuer":"zerossl","error":"verifying identifiers: POST https://api.zerossl.com/certificates/1fddb9240c3fde28e4edb0e078d1a974/challenges?access_key=redacted: HTTP 200: API error 0: domain_control_validation_failed (details=map[test-150.example.io:map[http://test-150.example.io/.well-known/pki-validation/4688054CE4FCE733C9852EC326D095C3.txt:{{0 0 } {0 true http_transport_failed Error: HTTP transport exception in file validation. Check if your server and the validation file are reachable and maybe retry afterwards?}}]]) (raw={\"success\":false,\"error\":{\"code\":0,\"type\":\"domain_control_validation_failed\",\"details\":{\"test-150.example.io\":{\"http:\\/\\/test-150.example.io\\/.well-known\\/pki-validation\\/4688054CE4FCE733C9852EC326D095C3.txt\":{\"file_found\":0,\"error\":true,\"error_slug\":\"http_transport_failed\",\"error_info\":\"Error: HTTP transport exception in file validation. Check if your server and the validation file are reachable and maybe retry afterwards?\"}}}}} decode_error=json: unknown field \"success\")"}
{"level":"debug","ts":1718748232.2370641,"logger":"events","msg":"event","name":"cert_failed","id":"edd16c96-b7c1-4d7a-9a17-440044e456a4","origin":"tls","data":{"error":{},"identifier":"test-150.example.io","issuers":["zerossl"],"renewal":false}}
{"level":"error","ts":1718748232.2371006,"logger":"tls.obtain","msg":"will retry","error":"[test-150.example.io] Obtain: verifying identifiers: POST https://api.zerossl.com/certificates/1fddb9240c3fde28e4edb0e078d1a974/challenges?access_key=redacted: HTTP 200: API error 0: domain_control_validation_failed (details=map[test-150.example.io:map[http://test-150.example.io/.well-known/pki-validation/4688054CE4FCE733C9852EC326D095C3.txt:{{0 0 } {0 true http_transport_failed Error: HTTP transport exception in file validation. Check if your server and the validation file are reachable and maybe retry afterwards?}}]]) (raw={\"success\":false,\"error\":{\"code\":0,\"type\":\"domain_control_validation_failed\",\"details\":{\"test-150.example.io\":{\"http:\\/\\/test-150.example.io\\/.well-known\\/pki-validation\\/4688054CE4FCE733C9852EC326D095C3.txt\":{\"file_found\":0,\"error\":true,\"error_slug\":\"http_transport_failed\",\"error_info\":\"Error: HTTP transport exception in file validation. Check if your server and the validation file are reachable and maybe retry afterwards?\"}}}}} decode_error=json: unknown field \"success\")","attempt":1,"retrying_in":60,"elapsed":2.487114281,"max_duration":2592000}
{"level":"debug","ts":1718748260.4458015,"logger":"events","msg":"event","name":"tls_get_certificate","id":"a82a219a-3193-4d06-a026-05dd1b772b4d","origin":"tls","data":{"client_hello":{"CipherSuites":[2570,4865,4866,4867,49195,49199,49196,49200,52393,52392,49171,49172,156,157,47,53],"ServerName":"test-150.example.io","SupportedCurves":[19018,25497,29,23,24],"SupportedPoints":"AA==","SignatureSchemes":[1027,2052,1025,1283,2053,1281,2054,1537],"SupportedProtos":["h2","http/1.1"],"SupportedVersions":[56026,772,771],"RemoteAddr":{"IP":"10.142.0.127","Port":58667,"Zone":""},"LocalAddr":{"IP":"10.100.52.160","Port":443,"Zone":""}}}}
{"level":"debug","ts":1718748260.4458406,"logger":"tls.handshake","msg":"no matching certificates and no custom selection logic","identifier":"test-150.example.io"}
{"level":"debug","ts":1718748260.4458485,"logger":"tls.handshake","msg":"no matching certificates and no custom selection logic","identifier":"*.example.io"}
{"level":"debug","ts":1718748260.4458528,"logger":"tls.handshake","msg":"no matching certificates and no custom selection logic","identifier":"*.*.io"}
{"level":"debug","ts":1718748260.4458563,"logger":"tls.handshake","msg":"no matching certificates and no custom selection logic","identifier":"*.*.*"}
{"level":"info","ts":1718748292.23929,"logger":"tls.obtain","msg":"obtaining certificate","identifier":"test-150.example.io"}
{"level":"debug","ts":1718748292.239399,"logger":"events","msg":"event","name":"cert_obtaining","id":"83cb1a1a-b481-40e1-9526-6c18c0e3257c","origin":"tls","data":{"identifier":"test-150.example.io"}}
{"level":"debug","ts":1718748292.239748,"logger":"tls.obtain","msg":"trying issuer 1/1","issuer":"zerossl"}
{"level":"info","ts":1718748292.2398481,"logger":"tls.issuance.zerossl","msg":"creating certificate","identifiers":["test-150.example.io"]}
{"level":"info","ts":1718748293.34797,"logger":"tls.issuance.zerossl","msg":"created certificate","identifiers":["test-150.example.io"],"cert_id":"a35da5dad6db5601ae040c14b0e72406"}
{"level":"info","ts":1718748293.35398,"logger":"tls.issuance.zerossl","msg":"validating identifiers","identifiers":["test-150.example.io"],"cert_id":"a35da5dad6db5601ae040c14b0e72406","verification_method":"HTTP_CSR_HASH"}
{"level":"info","ts":1718748293.7028039,"logger":"tls.issuance.zerossl","msg":"canceled certificate","identifiers":["test-150.example.io"],"cert_id":"a35da5dad6db5601ae040c14b0e72406","verification_method":"HTTP_CSR_HASH"}
{"level":"error","ts":1718748293.702842,"logger":"tls.obtain","msg":"could not get certificate from issuer","identifier":"test-150.example.io","issuer":"zerossl","error":"verifying identifiers: POST https://api.zerossl.com/certificates/a35da5dad6db5601ae040c14b0e72406/challenges?access_key=redacted: HTTP 200: API error 0: domain_control_validation_failed (details=map[test-150.example.io:map[http://test-150.example.io/.well-known/pki-validation/4122810758B64FF77BF703BE87856D87.txt:{{0 0 } {0 true http_transport_failed Error: HTTP transport exception in file validation. Check if your server and the validation file are reachable and maybe retry afterwards?}}]]) (raw={\"success\":false,\"error\":{\"code\":0,\"type\":\"domain_control_validation_failed\",\"details\":{\"test-150.example.io\":{\"http:\\/\\/test-150.example.io\\/.well-known\\/pki-validation\\/4122810758B64FF77BF703BE87856D87.txt\":{\"file_found\":0,\"error\":true,\"error_slug\":\"http_transport_failed\",\"error_info\":\"Error: HTTP transport exception in file validation. Check if your server and the validation file are reachable and maybe retry afterwards?\"}}}}} decode_error=json: unknown field \"success\")"}
{"level":"debug","ts":1718748293.702901,"logger":"events","msg":"event","name":"cert_failed","id":"810e0779-1a03-4e54-a5aa-cac78f0db072","origin":"tls","data":{"error":{},"identifier":"test-150.example.io","issuers":["zerossl"],"renewal":false}}
{"level":"error","ts":1718748293.7029192,"logger":"tls.obtain","msg":"will retry","error":"[test-150.example.io] Obtain: verifying identifiers: POST https://api.zerossl.com/certificates/a35da5dad6db5601ae040c14b0e72406/challenges?access_key=redacted: HTTP 200: API error 0: domain_control_validation_failed (details=map[test-150.example.io:map[http://test-150.example.io/.well-known/pki-validation/4122810758B64FF77BF703BE87856D87.txt:{{0 0 } {0 true http_transport_failed Error: HTTP transport exception in file validation. Check if your server and the validation file are reachable and maybe retry afterwards?}}]]) (raw={\"success\":false,\"error\":{\"code\":0,\"type\":\"domain_control_validation_failed\",\"details\":{\"test-150.example.io\":{\"http:\\/\\/test-150.example.io\\/.well-known\\/pki-validation\\/4122810758B64FF77BF703BE87856D87.txt\":{\"file_found\":0,\"error\":true,\"error_slug\":\"http_transport_failed\",\"error_info\":\"Error: HTTP transport exception in file validation. Check if your server and the validation file are reachable and maybe retry afterwards?\"}}}}} decode_error=json: unknown field \"success\")","attempt":2,"retrying_in":120,"elapsed":63.952933847,"max_duration":2592000}
Here is my updated config for Caddy + GKE setup:
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: caddy-home-pvc
namespace: default
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 5Gi
storageClassName: standard-rwx
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: caddy-data-pvc
namespace: default
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 10Gi
storageClassName: standard-rwx
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: caddy-config-pvc
namespace: default
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 1Gi
storageClassName: standard-rwx
---
apiVersion: v1
kind: ConfigMap
metadata:
name: caddy-config
namespace: default
data:
caddy.json: |
{
"logging": {
"logs": {
"default": {
"level": "DEBUG"
}
}
},
"apps": {
"http": {
"servers": {
"srv0": {
"listen": [":443"],
"routes": [{
"handle": [{
"handler": "reverse_proxy",
"upstreams": [{
"dial": "app.mycompany.com:443"
}],
"transport": {
"protocol": "http",
"tls": {}
}
}]
}],
"automatic_https": {
"disable_redirects": true
},
"max_header_bytes": 4096,
"read_header_timeout": "10s",
"read_timeout": "10s",
"write_timeout": "10s",
"idle_timeout": "1m"
}
}
},
"tls": {
"automation": {
"on_demand": {
"ask": "https://mock.httpstatus.io/200"
},
"policies": [
{
"on_demand": true,
"issuers": [
{
"module": "zerossl",
"api_key": "<redacted>"
}
]
}
]
}
}
}
}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: caddy-deployment
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: caddy
template:
metadata:
labels:
app: caddy
spec:
containers:
- name: caddy
command: ["caddy", "run", "--config", "/etc/caddy/caddy.json"]
image: caddy:2.8.4
ports:
- containerPort: 443
- containerPort: 80
volumeMounts:
- name: caddy-config
mountPath: /etc/caddy/caddy.json
subPath: caddy.json
- name: caddy-data
mountPath: /data
- name: caddy-config-dir
mountPath: /config
- name: caddy-home-dir
mountPath: /root
volumes:
- name: caddy-config
configMap:
name: caddy-config
- name: caddy-data
persistentVolumeClaim:
claimName: caddy-data-pvc
- name: caddy-config-dir
persistentVolumeClaim:
claimName: caddy-config-pvc
- name: caddy-home-dir
persistentVolumeClaim:
claimName: caddy-home-pvc
---
apiVersion: v1
kind: Service
metadata:
name: caddy-service
namespace: default
spec:
type: LoadBalancer
selector:
app: caddy
ports:
- protocol: TCP
port: 443
targetPort: 443
name: https
- protocol: TCP
port: 80
targetPort: 80
name: http
---