diff --git a/kubernetes/linera-validator/static/kube-dns-upstream-service.yaml b/kubernetes/linera-validator/static/kube-dns-upstream-service.yaml new file mode 100644 index 000000000000..88f956133c17 --- /dev/null +++ b/kubernetes/linera-validator/static/kube-dns-upstream-service.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: Service +metadata: + name: kube-dns-upstream + namespace: kube-system + labels: + app: node-local-dns + k8s-app: kube-dns-upstream +spec: + selector: + k8s-app: kube-dns + ports: + - name: dns + port: 53 + protocol: UDP + targetPort: 53 + - name: dns-tcp + port: 53 + protocol: TCP + targetPort: 53 + type: ClusterIP diff --git a/kubernetes/linera-validator/templates/node-local-dns-config.yaml b/kubernetes/linera-validator/templates/node-local-dns-config.yaml new file mode 100644 index 000000000000..3e460afc05f8 --- /dev/null +++ b/kubernetes/linera-validator/templates/node-local-dns-config.yaml @@ -0,0 +1,79 @@ +{{- if .Values.nodeLocalDns.enabled }} +{{- if not .Values.nodeLocalDns.kubeDnsIp }} +{{- fail "nodeLocalDns.kubeDnsIp is required when nodeLocalDns is enabled. Get it with: kubectl get svc kube-dns -n kube-system -o jsonpath='{.spec.clusterIP}'" }} +{{- end }} +{{- /* Get the kube-dns-upstream service IP. If it doesn't exist yet, use kubeDnsUpstreamIp from values or fall back to kubeDnsIp */ -}} +{{- $upstreamSvc := lookup "v1" "Service" "kube-system" "kube-dns-upstream" }} +{{- $upstreamIp := "" }} +{{- if $upstreamSvc }} +{{- $upstreamIp = $upstreamSvc.spec.clusterIP }} +{{- else if .Values.nodeLocalDns.kubeDnsUpstreamIp }} +{{- $upstreamIp = .Values.nodeLocalDns.kubeDnsUpstreamIp }} +{{- else }} +{{- /* First deploy: upstream service doesn't exist yet, will be created by this release */ -}} +{{- /* Use kubeDnsIp as fallback - the DaemonSet will need to be restarted after service is created */ -}} +{{- $upstreamIp = .Values.nodeLocalDns.kubeDnsIp }} +{{- end }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: node-local-dns + namespace: kube-system + labels: + app: node-local-dns + annotations: + # Force ConfigMap update when upstream IP changes + nodelocaldns.kubernetes.io/upstream-ip: {{ $upstreamIp | quote }} +data: + Corefile: | + {{ .Values.nodeLocalDns.clusterDomain }}:53 { + errors + cache { + success {{ .Values.nodeLocalDns.successTtl }} + denial {{ .Values.nodeLocalDns.denialTtl }} + } + reload + loop + bind {{ .Values.nodeLocalDns.localDnsIp }} + forward . {{ $upstreamIp }} { + force_tcp + } + prometheus :9253 + health {{ .Values.nodeLocalDns.localDnsIp }}:8080 + } + in-addr.arpa:53 { + errors + cache 30 + reload + loop + bind {{ .Values.nodeLocalDns.localDnsIp }} + forward . {{ $upstreamIp }} { + force_tcp + } + prometheus :9253 + } + ip6.arpa:53 { + errors + cache 30 + reload + loop + bind {{ .Values.nodeLocalDns.localDnsIp }} + forward . {{ $upstreamIp }} { + force_tcp + } + prometheus :9253 + } + .:53 { + errors + cache 30 + reload + loop + bind {{ .Values.nodeLocalDns.localDnsIp }} + {{- if .Values.nodeLocalDns.upstreamServers }} + forward . {{ .Values.nodeLocalDns.upstreamServers }} + {{- else }} + forward . /etc/resolv.conf + {{- end }} + prometheus :9253 + } +{{- end }} diff --git a/kubernetes/linera-validator/templates/node-local-dns-daemonset.yaml b/kubernetes/linera-validator/templates/node-local-dns-daemonset.yaml new file mode 100644 index 000000000000..22d8e4d3d425 --- /dev/null +++ b/kubernetes/linera-validator/templates/node-local-dns-daemonset.yaml @@ -0,0 +1,91 @@ +{{- if .Values.nodeLocalDns.enabled }} +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: node-local-dns + namespace: kube-system + labels: + app: node-local-dns +spec: + selector: + matchLabels: + app: node-local-dns + updateStrategy: + rollingUpdate: + maxUnavailable: 10% + type: RollingUpdate + template: + metadata: + labels: + app: node-local-dns + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9253" + # Trigger pod restart when ConfigMap changes (includes upstream IP) + checksum/config: {{ include (print $.Template.BasePath "/node-local-dns-config.yaml") . | sha256sum }} + spec: + serviceAccountName: node-local-dns + priorityClassName: system-node-critical + hostNetwork: true + dnsPolicy: Default + tolerations: + - key: CriticalAddonsOnly + operator: Exists + - operator: Exists + effect: NoSchedule + - operator: Exists + effect: NoExecute + containers: + - name: node-cache + image: "{{ .Values.nodeLocalDns.image }}:{{ .Values.nodeLocalDns.imageTag }}" + imagePullPolicy: {{ .Values.nodeLocalDns.imagePullPolicy }} + args: + - "-localip" + - "{{ .Values.nodeLocalDns.localDnsIp }}" + - "-conf" + - "/etc/Corefile" + - "-upstreamsvc" + - "kube-dns" + - "-skipteardown=true" + - "-setupinterface=true" + - "-setupiptables=true" + securityContext: + privileged: true + ports: + - name: dns + containerPort: 53 + protocol: UDP + - name: dns-tcp + containerPort: 53 + protocol: TCP + - name: metrics + containerPort: 9253 + protocol: TCP + livenessProbe: + httpGet: + host: {{ .Values.nodeLocalDns.localDnsIp }} + path: /health + port: 8080 + initialDelaySeconds: 60 + timeoutSeconds: 5 + resources: + {{- toYaml .Values.nodeLocalDns.resources | nindent 12 }} + volumeMounts: + - name: config + mountPath: /etc/Corefile + subPath: Corefile + - name: xtables-lock + mountPath: /run/xtables.lock + readOnly: false + volumes: + - name: config + configMap: + name: node-local-dns + items: + - key: Corefile + path: Corefile + - name: xtables-lock + hostPath: + path: /run/xtables.lock + type: FileOrCreate +{{- end }} diff --git a/kubernetes/linera-validator/templates/node-local-dns-service.yaml b/kubernetes/linera-validator/templates/node-local-dns-service.yaml new file mode 100644 index 000000000000..f19c63314a18 --- /dev/null +++ b/kubernetes/linera-validator/templates/node-local-dns-service.yaml @@ -0,0 +1,18 @@ +{{- if .Values.nodeLocalDns.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: node-local-dns + namespace: kube-system + labels: + app: node-local-dns +spec: + clusterIP: None + selector: + app: node-local-dns + ports: + - name: metrics + port: 9253 + targetPort: 9253 + protocol: TCP +{{- end }} diff --git a/kubernetes/linera-validator/templates/node-local-dns-serviceaccount.yaml b/kubernetes/linera-validator/templates/node-local-dns-serviceaccount.yaml new file mode 100644 index 000000000000..d6aa7d473a24 --- /dev/null +++ b/kubernetes/linera-validator/templates/node-local-dns-serviceaccount.yaml @@ -0,0 +1,9 @@ +{{- if .Values.nodeLocalDns.enabled }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: node-local-dns + namespace: kube-system + labels: + app: node-local-dns +{{- end }} diff --git a/kubernetes/linera-validator/templates/otel-collector-router-deployment.yaml b/kubernetes/linera-validator/templates/otel-collector-router-deployment.yaml index e6a4df93dc23..c3b5b90424f4 100644 --- a/kubernetes/linera-validator/templates/otel-collector-router-deployment.yaml +++ b/kubernetes/linera-validator/templates/otel-collector-router-deployment.yaml @@ -26,6 +26,32 @@ spec: tolerations: {{- toYaml .Values.otelCollector.router.tolerations | nindent 8 }} {{- end }} + {{- if .Values.nodeLocalDns.enabled }} + dnsPolicy: None + dnsConfig: + nameservers: + - {{ .Values.nodeLocalDns.localDnsIp }} + searches: + - {{ .Release.Namespace }}.svc.cluster.local + - svc.cluster.local + - cluster.local + options: + - name: ndots + value: "5" + {{- end }} + initContainers: + - name: wait-for-sampler-dns + image: busybox:1.36 + command: + - sh + - -c + - | + echo "Waiting for sampler DNS to be resolvable..." + until nslookup otel-collector-sampler-headless.{{ .Release.Namespace }}.svc.cluster.local; do + echo "DNS not ready, retrying in 2 seconds..." + sleep 2 + done + echo "DNS resolution successful!" containers: - name: otel-collector-router image: {{ .Values.otelCollector.router.image }}:{{ .Values.otelCollector.router.imageTag }} diff --git a/kubernetes/linera-validator/templates/proxy.yaml b/kubernetes/linera-validator/templates/proxy.yaml index e64b99d75e40..a5d81d1f4433 100644 --- a/kubernetes/linera-validator/templates/proxy.yaml +++ b/kubernetes/linera-validator/templates/proxy.yaml @@ -58,6 +58,19 @@ spec: value: "true" effect: NoSchedule {{- end }} + {{- if .Values.nodeLocalDns.enabled }} + dnsPolicy: None + dnsConfig: + nameservers: + - {{ .Values.nodeLocalDns.localDnsIp }} + searches: + - {{ .Release.Namespace }}.svc.cluster.local + - svc.cluster.local + - cluster.local + options: + - name: ndots + value: "5" + {{- end }} terminationGracePeriodSeconds: 10 initContainers: - name: linera-proxy-initializer diff --git a/kubernetes/linera-validator/templates/shards.yaml b/kubernetes/linera-validator/templates/shards.yaml index 136ee4f85847..289c6e150357 100644 --- a/kubernetes/linera-validator/templates/shards.yaml +++ b/kubernetes/linera-validator/templates/shards.yaml @@ -38,6 +38,19 @@ spec: value: "true" effect: NoSchedule {{- end }} + {{- if .Values.nodeLocalDns.enabled }} + dnsPolicy: None + dnsConfig: + nameservers: + - {{ .Values.nodeLocalDns.localDnsIp }} + searches: + - {{ .Release.Namespace }}.svc.cluster.local + - svc.cluster.local + - cluster.local + options: + - name: ndots + value: "5" + {{- end }} terminationGracePeriodSeconds: 10 initContainers: - name: linera-server-initializer diff --git a/kubernetes/linera-validator/values.yaml b/kubernetes/linera-validator/values.yaml index 70276c114ba0..995ab53d71ce 100644 --- a/kubernetes/linera-validator/values.yaml +++ b/kubernetes/linera-validator/values.yaml @@ -360,3 +360,56 @@ otelCollector: # Tempo endpoint (where Tier 2 exports to) tempoEndpoint: "tempo.tempo.svc.cluster.local:4317" tempoInsecure: true + +# ============================================================================ +# NodeLocal DNSCache - Reduces CoreDNS load by caching DNS at node level +# ============================================================================ +# Deploys a DaemonSet that runs a DNS cache on every node, reducing +# latency and load on the cluster's CoreDNS pods. + +nodeLocalDns: + # Enable/disable NodeLocal DNSCache + enabled: false + + # Local DNS IP - link-local address for the node cache + # Must not conflict with any existing cluster IPs + localDnsIp: "169.254.20.10" + + # kube-dns service IP - REQUIRED for iptables interception + # Get this with: kubectl get svc kube-dns -n kube-system -o jsonpath='{.spec.clusterIP}' + # For GKE, typically 10.x.0.10 where x depends on your service CIDR + kubeDnsIp: "" + + # kube-dns-upstream service IP - Used for forwarding DNS queries + # This service bypasses NOTRACK iptables rules, allowing NAT to work properly + # If empty, Helm will look up the service IP automatically (requires service to exist) + # On first deploy, falls back to kubeDnsIp (requires second helm upgrade to fix) + kubeDnsUpstreamIp: "" + + # Image configuration + image: "registry.k8s.io/dns/k8s-dns-node-cache" + imageTag: "1.23.1" + imagePullPolicy: "IfNotPresent" + + # Resource limits - lineractl calculates proportional values based on VM vCPUs + # Shard nodes have high DNS load from ScyllaDB connection queries + resources: + requests: + cpu: "400m" + memory: "64Mi" + limits: + cpu: "1200m" + memory: "256Mi" + + # DNS configuration + # Cluster DNS domain (usually cluster.local) + clusterDomain: "cluster.local" + + # Upstream DNS servers for external queries (empty = use node's resolv.conf) + upstreamServers: "" + + # Cache TTL settings (in seconds) + # How long to cache successful DNS responses + successTtl: 30 + # How long to cache negative DNS responses (NXDOMAIN) + denialTtl: 5