From 932ab109d6aea706fdf5092ba7bf266c272a5de0 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Thu, 7 Aug 2025 17:18:11 -0400 Subject: [PATCH 1/3] Forward-port CockroachDB operator docs to v25.3 --- .../orchestration/kubernetes-limitations.md | 10 +- .../orchestration/kubernetes-stop-cluster.md | 4 +- .../orchestration/operator-check-namespace.md | 2 +- .../start-cockroachdb-operator-secure.md | 14 +- .../sidebar-data/self-hosted-deployments.json | 87 +++ .../v25.3/cockroachdb-operator-overview.md | 48 ++ .../v25.3/cockroachdb-operator-performance.md | 308 ++++++++ .../v25.3/configure-cockroachdb-kubernetes.md | 34 +- .../v25.3/configure-cockroachdb-operator.md | 183 +++++ .../create-security-certificates-custom-ca.md | 2 +- ...y-cockroachdb-with-cockroachdb-operator.md | 731 ++++++++++++++++++ ...y-cockroachdb-with-kubernetes-openshift.md | 18 +- .../deploy-cockroachdb-with-kubernetes.md | 16 +- src/current/v25.3/kubernetes-overview.md | 8 +- src/current/v25.3/kubernetes-performance.md | 4 + .../migrate-cockroachdb-kubernetes-helm.md | 252 ++++++ ...migrate-cockroachdb-kubernetes-operator.md | 320 ++++++++ .../v25.3/monitor-cockroachdb-kubernetes.md | 20 +- .../v25.3/monitor-cockroachdb-operator.md | 361 +++++++++ ...estrate-a-local-cluster-with-kubernetes.md | 8 +- ...ckroachdb-with-kubernetes-multi-cluster.md | 4 +- ...override-templates-cockroachdb-operator.md | 90 +++ .../v25.3/scale-cockroachdb-kubernetes.md | 28 +- .../v25.3/scale-cockroachdb-operator.md | 106 +++ .../v25.3/schedule-cockroachdb-kubernetes.md | 42 +- .../v25.3/schedule-cockroachdb-operator.md | 365 +++++++++ .../v25.3/secure-cockroachdb-kubernetes.md | 46 +- .../v25.3/secure-cockroachdb-operator.md | 201 +++++ ...ate-a-multi-region-cluster-on-localhost.md | 2 +- .../v25.3/upgrade-cockroachdb-kubernetes.md | 8 +- .../v25.3/upgrade-cockroachdb-operator.md | 149 ++++ 31 files changed, 3356 insertions(+), 115 deletions(-) create mode 100644 src/current/v25.3/cockroachdb-operator-overview.md create mode 100644 src/current/v25.3/cockroachdb-operator-performance.md create mode 100644 src/current/v25.3/configure-cockroachdb-operator.md create mode 100644 src/current/v25.3/deploy-cockroachdb-with-cockroachdb-operator.md create mode 100644 src/current/v25.3/migrate-cockroachdb-kubernetes-helm.md create mode 100644 src/current/v25.3/migrate-cockroachdb-kubernetes-operator.md create mode 100644 src/current/v25.3/monitor-cockroachdb-operator.md create mode 100644 src/current/v25.3/override-templates-cockroachdb-operator.md create mode 100644 src/current/v25.3/scale-cockroachdb-operator.md create mode 100644 src/current/v25.3/schedule-cockroachdb-operator.md create mode 100644 src/current/v25.3/secure-cockroachdb-operator.md create mode 100644 src/current/v25.3/upgrade-cockroachdb-operator.md diff --git a/src/current/_includes/v25.3/orchestration/kubernetes-limitations.md b/src/current/_includes/v25.3/orchestration/kubernetes-limitations.md index 5e9784c28d1..0144b1cb4ed 100644 --- a/src/current/_includes/v25.3/orchestration/kubernetes-limitations.md +++ b/src/current/_includes/v25.3/orchestration/kubernetes-limitations.md @@ -2,11 +2,11 @@ To deploy CockroachDB {{page.version.version}}, Kubernetes 1.18 or higher is required. Cockroach Labs strongly recommends that you use a Kubernetes version that is [eligible for patch support by the Kubernetes project](https://kubernetes.io/releases/). -#### Kubernetes Operator +#### {{ site.data.products.public-operator }} -- The CockroachDB Kubernetes Operator currently deploys clusters in a single region. For multi-region deployments using manual configs, see [Orchestrate CockroachDB Across Multiple Kubernetes Clusters]({% link {{ page.version.version }}/orchestrate-cockroachdb-with-kubernetes-multi-cluster.md %}). +- The {{ site.data.products.public-operator }} deploys clusters in a single region. For multi-region deployments using manual configs, Cockroach Labs recommends using the [{{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/cockroachdb-operator-overview.md %}) which is designed to support multi-region deployments. For guidance on how to force multi-region support with the {{ site.data.products.public-operator }}, see [Orchestrate CockroachDB Across Multiple Kubernetes Clusters]({% link {{ page.version.version }}/orchestrate-cockroachdb-with-kubernetes-multi-cluster.md %}). -- Using the Operator, you can give a new cluster an arbitrary number of [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/). However, a cluster's labels cannot be modified after it is deployed. To track the status of this limitation, refer to [#993](https://github.com/cockroachdb/cockroach-operator/issues/993) in the Operator project's issue tracker. +- Using the {{ site.data.products.public-operator }}, you can give a new cluster an arbitrary number of [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/). However, a cluster's labels cannot be modified after it is deployed. To track the status of this limitation, refer to [#993](https://github.com/cockroachdb/cockroach-operator/issues/993) in the {{ site.data.products.public-operator }} project's issue tracker. {% unless page.name == "orchestrate-cockroachdb-with-kubernetes-multi-cluster.md" %} #### Helm version @@ -17,9 +17,9 @@ The CockroachDB Helm chart requires Helm 3.0 or higher. If you attempt to use an Error: UPGRADE FAILED: template: cockroachdb/templates/tests/client.yaml:6:14: executing "cockroachdb/templates/tests/client.yaml" at <.Values.networkPolicy.enabled>: nil pointer evaluating interface {}.enabled ~~~ -The CockroachDB Helm chart is currently not under active development, and no new features are planned. However, Cockroach Labs remains committed to fully supporting the Helm chart by addressing defects, providing security patches, and addressing breaking changes due to deprecations in Kubernetes APIs. +The public Helm chart is currently not under active development, and no new features are planned. However, Cockroach Labs remains committed to fully supporting the Helm chart by addressing defects, providing security patches, and addressing breaking changes due to deprecations in Kubernetes APIs. -A deprecation notice for the Helm chart will be provided to customers a minimum of 6 months in advance of actual deprecation. +A deprecation notice for the public Helm chart will be provided to customers a minimum of 6 months in advance of actual deprecation. {% endunless %} #### Network diff --git a/src/current/_includes/v25.3/orchestration/kubernetes-stop-cluster.md b/src/current/_includes/v25.3/orchestration/kubernetes-stop-cluster.md index 58d79611e6d..c1db8bca26a 100644 --- a/src/current/_includes/v25.3/orchestration/kubernetes-stop-cluster.md +++ b/src/current/_includes/v25.3/orchestration/kubernetes-stop-cluster.md @@ -10,14 +10,14 @@ To shut down the CockroachDB cluster: kubectl delete -f example.yaml ~~~ -1. Remove the Operator: +1. Remove the {{ site.data.products.public-operator }}: {% include_cached copy-clipboard.html %} ~~~ shell kubectl delete -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/install/operator.yaml ~~~ - This will delete the CockroachDB cluster being run by the Operator. It intentionally does **not** delete: + This will delete the CockroachDB cluster being run by the {{ site.data.products.public-operator }}. It intentionally does **not** delete: - The persistent volumes that were attached to the pods, to avoid the risk of data loss. Before deleting a cluster's persistent volumes, be sure to back them up. For more information, refer to [Delete a Cluster's Persistent Volumes](#delete-a-clusters-persistent-volumes) in the Kubernetes project's documentation. - Any secrets you may have created. For more information on managing secrets, refer to [Managing Secrets Using `kubectl`](https://kubernetes.io/docs/tasks/configmap-secret/managing-secret-using-kubectl) in the Kubernetes project's documentation. diff --git a/src/current/_includes/v25.3/orchestration/operator-check-namespace.md b/src/current/_includes/v25.3/orchestration/operator-check-namespace.md index bc37c6e1681..4a37876acd4 100644 --- a/src/current/_includes/v25.3/orchestration/operator-check-namespace.md +++ b/src/current/_includes/v25.3/orchestration/operator-check-namespace.md @@ -1,3 +1,3 @@ {{site.data.alerts.callout_info}} -All `kubectl` steps should be performed in the [namespace where you installed the Operator]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#install-the-operator). By default, this is `cockroach-operator-system`. +All `kubectl` steps should be performed in the [namespace where you installed the operator]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#install-the-operator). By default, this is `cockroach-operator-system`. {{site.data.alerts.end}} \ No newline at end of file diff --git a/src/current/_includes/v25.3/orchestration/start-cockroachdb-operator-secure.md b/src/current/_includes/v25.3/orchestration/start-cockroachdb-operator-secure.md index 5cbc1c49af9..feb492fcd41 100644 --- a/src/current/_includes/v25.3/orchestration/start-cockroachdb-operator-secure.md +++ b/src/current/_includes/v25.3/orchestration/start-cockroachdb-operator-secure.md @@ -28,7 +28,7 @@ ~~~ {% endcapture %} -1. Apply the [custom resource definition (CRD)](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/#customresourcedefinitions) for the Operator: +1. Apply the [custom resource definition (CRD)](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/#customresourcedefinitions) for the {{ site.data.products.public-operator }}: {% include_cached copy-clipboard.html %} ~~~ shell @@ -39,16 +39,16 @@ customresourcedefinition.apiextensions.k8s.io/crdbclusters.crdb.cockroachlabs.com created ~~~ -1. By default, the Operator is configured to install in the `cockroach-operator-system` namespace and to manage CockroachDB instances for all namespaces on the cluster. +1. By default, the {{ site.data.products.public-operator }} is configured to install in the `cockroach-operator-system` namespace and to manage CockroachDB instances for all namespaces on the cluster. -1. Set your current namespace to the one used by the Operator. For example, to use the Operator's default namespace: +1. Set your current namespace to the one used by the {{ site.data.products.public-operator }}. For example, to use the {{ site.data.products.public-operator }}'s default namespace: {% include_cached copy-clipboard.html %} ~~~ shell $ kubectl config set-context --current --namespace=cockroach-operator-system ~~~ -1. Validate that the Operator is running: +1. Validate that the operator is running: {% include_cached copy-clipboard.html %} ~~~ shell @@ -66,7 +66,7 @@ After a cluster managed by the Kubernetes operator is initialized, its Kubernetes labels cannot be modified. For more details, refer to [Best practices](#best-practices). {{site.data.alerts.end}} -1. Download `example.yaml`, a custom resource that tells the Operator how to configure the Kubernetes cluster. +1. Download `example.yaml`, a custom resource that tells the operator how to configure the Kubernetes cluster. {% include_cached copy-clipboard.html %} ~~~ shell @@ -76,7 +76,7 @@ After a cluster managed by the Kubernetes operator is initialized, its Kubernete By default, this custom resource specifies CPU and memory resources that are appropriate for the virtual machines used in this deployment example. On a production cluster, you should substitute values that are appropriate for your machines and workload. For details on configuring your deployment, see [Configure the Cluster](configure-cockroachdb-kubernetes.html). {{site.data.alerts.callout_info}} - By default, the Operator will generate and sign 1 client and 1 node certificate to secure the cluster. This means that if you do not provide a CA, a `cockroach`-generated CA is used. If you want to authenticate using your own CA, [specify the generated secrets in the custom resource](secure-cockroachdb-kubernetes.html#use-a-custom-ca) **before** proceeding to the next step. + By default, the operator will generate and sign 1 client and 1 node certificate to secure the cluster. This means that if you do not provide a CA, a `cockroach`-generated CA is used. If you want to authenticate using your own CA, [specify the generated secrets in the custom resource](secure-cockroachdb-kubernetes.html#use-a-custom-ca) **before** proceeding to the next step. {{site.data.alerts.end}} 1. Apply `example.yaml`: @@ -86,7 +86,7 @@ After a cluster managed by the Kubernetes operator is initialized, its Kubernete $ kubectl apply -f example.yaml ~~~ - The Operator will create a StatefulSet and initialize the nodes as a cluster. + The operator will create a StatefulSet and initialize the nodes as a cluster. ~~~ crdbcluster.crdb.cockroachlabs.com/cockroachdb created diff --git a/src/current/_includes/v25.3/sidebar-data/self-hosted-deployments.json b/src/current/_includes/v25.3/sidebar-data/self-hosted-deployments.json index 8f997fcaf03..d603be44fc9 100644 --- a/src/current/_includes/v25.3/sidebar-data/self-hosted-deployments.json +++ b/src/current/_includes/v25.3/sidebar-data/self-hosted-deployments.json @@ -170,6 +170,93 @@ } ] }, + { + "title": "Deploy in Kubernetes with CockroachDB Operator", + "items": [ + { + "title": "Overview", + "urls": [ + "/${VERSION}/cockroachdb-operator-overview.html" + ] + }, + { + "title": "CockroachDB Operator Deployment Guide", + "urls": [ + "/${VERSION}/deploy-cockroachdb-with-cockroachdb-operator.html" + ] + }, + { + "title": "Migrate from Other Kubernetes Deployments", + "items": [ + { + "title": "Migrate from Helm StatefulSet", + "urls": [ + "/${VERSION}/migrate-cockroachdb-kubernetes-helm.html" + ] + }, + { + "title": "Migrate from {{ site.data.products.public-operator }}", + "urls": [ + "/${VERSION}/migrate-cockroachdb-kubernetes-operator.html" + ] + } + ] + }, + { + "title": "Operate CockroachDB with Kubernetes", + "items": [ + { + "title": "Pod Scheduling", + "urls": [ + "/${VERSION}/schedule-cockroachdb-operator.html" + ] + }, + { + "title": "Resource Management", + "urls": [ + "/${VERSION}/configure-cockroachdb-operator.html" + ] + }, + { + "title": "Certificate Management", + "urls": [ + "/${VERSION}/secure-cockroachdb-operator.html" + ] + }, + { + "title": "Cluster Scaling", + "urls": [ + "/${VERSION}/scale-cockroachdb-operator.html" + ] + }, + { + "title": "Cluster Monitoring", + "urls": [ + "/${VERSION}/monitor-cockroachdb-operator.html" + ] + }, + { + "title": "Cluster Upgrades", + "urls": [ + "/${VERSION}/upgrade-cockroachdb-operator.html" + ] + }, + { + "title": "Override Templates", + "urls": [ + "/${VERSION}/override-templates-cockroachdb-operator.html" + ] + }, + { + "title": "Kubernetes Performance", + "urls": [ + "/${VERSION}/cockroachdb-operator-performance.html" + ] + } + ] + } + ] + }, { "title": "Multi-Region for Self-Hosted Deployments", "items": [ diff --git a/src/current/v25.3/cockroachdb-operator-overview.md b/src/current/v25.3/cockroachdb-operator-overview.md new file mode 100644 index 00000000000..45ebe199749 --- /dev/null +++ b/src/current/v25.3/cockroachdb-operator-overview.md @@ -0,0 +1,48 @@ +--- +title: CockroachDB Operator Overview +summary: An overview of deployment and management of a CockroachDB cluster using the CockroachDB operator with Kubernetes. +toc: true +toc_not_nested: true +secure: true +docs_area: deploy +key: operate-cockroachdb-kubernetes-operator.html +--- + +The {{ site.data.products.cockroachdb-operator }} is a fully-featured [Kubernetes operator](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/) that allows you to deploy and manage CockroachDB self-hosted clusters. + +{{site.data.alerts.callout_info}} +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). + +For information on the generally-available {{ site.data.products.public-operator }}, read the [{{ site.data.products.public-operator }} documentation]({% link {{ page.version.version }}/kubernetes-overview.md %}) and see the [GitHub repository](https://github.com/cockroachdb/cockroach-operator). +{{site.data.alerts.end}} + +With the {{ site.data.products.cockroachdb-operator }}, you can deploy CockroachDB clusters across multiple regions with separate operator instances per region. Using [Helm](https://helm.sh/), set configurations that manage the operator and CockroachDB nodes across regions. + +## {{ site.data.products.cockroachdb-operator }} + +This section describes how to: + +- [Deploy a CockroachDB cluster using the {{ site.data.products.cockroachdb-operator }}]({% link {{page.version.version}}/deploy-cockroachdb-with-cockroachdb-operator.md %}). +- Migrate from an existing CockroachDB Kubernetes deployment using [Helm]({% link {{page.version.version}}/migrate-cockroachdb-kubernetes-helm.md %}) or the [{{ site.data.products.public-operator }}]({% link {{page.version.version}}/migrate-cockroachdb-kubernetes-operator.md %}). +- Operate a CockroachDB cluster: + + - [Manage pod scheduling]({% link {{page.version.version}}/schedule-cockroachdb-operator.md %}). + - [Manage cluster resources]({% link {{page.version.version}}/configure-cockroachdb-operator.md %}). + - [Manage certificates]({% link {{page.version.version}}/secure-cockroachdb-operator.md %}). + - [Scale a cluster]({% link {{page.version.version}}/scale-cockroachdb-operator.md %}). + - [Monitor a cluster]({% link {{page.version.version}}/monitor-cockroachdb-operator.md %}). + - [Upgrade a cluster]({% link {{page.version.version}}/upgrade-cockroachdb-operator.md %}). + - [Override deployment templates]({% link {{page.version.version}}/override-templates-cockroachdb-operator.md %}). + - [Improve cluster performance]({% link {{page.version.version}}/cockroachdb-operator-performance.md %}). + +## Kubernetes terminology + +Before starting, review some basic Kubernetes terminology. Note that CockroachDB [nodes]({% link {{ page.version.version }}/architecture/glossary.md %}#cockroachdb-architecture-terms) are distinct from Kubernetes "nodes" or "worker nodes". + +Feature | Description +--------|------------ +[node](https://kubernetes.io/docs/concepts/architecture/nodes/) | A physical or virtual machine. In the [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}), you'll create instances and join them as worker nodes into a single Kubernetes cluster. +[pod](http://kubernetes.io/docs/user-guide/pods/) | A pod is a group of one of more Docker containers. In the [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}), each pod will run on a separate Kubernetes worker node and include one Docker container running a single CockroachDB node, reflecting our [topology recommendations]({% link {{ page.version.version }}/recommended-production-settings.md %}#topology). +[operator](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/) | An operator is an extension to Kubernetes that uses custom resources to efficiently manage specific applications. The {{ site.data.products.cockroachdb-operator }} includes two custom resource definitions to manage CockroachDB, `CrdbCluster` and `CrdbNode`. Unlike the older [{{ site.data.products.public-operator }}](https://github.com/cockroachdb/cockroach-operator), the {{ site.data.products.cockroachdb-operator }} does not use StatefulSets and is designed to simplify multi-region deployments. +[persistent volume](http://kubernetes.io/docs/user-guide/persistent-volumes/) | A persistent volume is a piece of networked storage (Persistent Disk on GCE, Elastic Block Store on AWS) mounted into a pod. The lifetime of a persistent volume is decoupled from the lifetime of the pod that's using it, ensuring that each CockroachDB node binds back to the same storage on restart.

The [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}) assumes that dynamic volume provisioning is available. When that is not the case, [persistent volume claims](http://kubernetes.io/docs/user-guide/persistent-volumes/#persistentvolumeclaims) need to be created manually. +[RBAC](https://kubernetes.io/docs/reference/access-authn-authz/rbac/) | RBAC, or Role-Based Access Control, is the system Kubernetes uses to manage permissions within the cluster. In order to take an action (e.g., `get` or `create`) on an API resource (e.g., a `pod`), the client must have a `Role` that allows it to do so. The [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}) creates the RBAC resources necessary for CockroachDB to create and access certificates. diff --git a/src/current/v25.3/cockroachdb-operator-performance.md b/src/current/v25.3/cockroachdb-operator-performance.md new file mode 100644 index 00000000000..98d6abb9077 --- /dev/null +++ b/src/current/v25.3/cockroachdb-operator-performance.md @@ -0,0 +1,308 @@ +--- +title: Performance with the CockroachDB Operator +summary: How running CockroachDB in Kubernetes affects its performance and how to get the best possible performance when running in Kubernetes using the CockroachDB operator. +toc: true +docs_area: deploy +--- + +Kubernetes provides many useful abstractions for deploying and operating distributed systems, but some of the abstractions come with a performance overhead and an increase in underlying system complexity. This page outlines potential bottlenecks when running CockroachDB in Kubernetes and how to optimize performance. + +{{site.data.alerts.callout_info}} +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +{{site.data.alerts.end}} + +## Before you begin + +Before you focus on optimizing a Kubernetes-orchestrated CockroachDB cluster: + +1. Before deploying on Kubernetes, ensure that performance is optimized for your workload on identical hardware. You may find that you first need to [modify your workload]({% link {{ page.version.version }}/performance-best-practices-overview.md %}) or use [different machine specs]({% link {{ page.version.version }}/recommended-production-settings.md %}#hardware) to achieve the performance you need. + +1. Read the documentation for [deploying CockroachDB on a Kubernetes cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster) to familiarize yourself with the necessary Kubernetes terminology and deployment abstractions. + +## Performance factors + +A number of independent factors affect performance when running CockroachDB on Kubernetes. Most are easiest to change before you create your CockroachDB cluster. If you need to modify a CockroachDB cluster that is already running on Kubernetes, extra care and testing is strongly recommended. + +The following sections show how to modify excerpts from the Cockroach Labs-provided Kubernetes configuration YAML files. You can find the most up-to-date version of this file [on GitHub](https://github.com/cockroachdb/helm-charts/blob/master/cockroachdb-parent/charts/cockroachdb/values.yaml). + +### Version of CockroachDB + +Because CockroachDB is under very active development, there are typically substantial performance gains in each release. If you are not experiencing optimal performance and aren't running the latest release, consider upgrading. + +### Client workload + +Your workload is the single most important factor in database performance. Read through [SQL performance best practices]({% link {{ page.version.version }}/performance-best-practices-overview.md %}) and determine whether you can make workload changes to speed up your application. + +### Machine size + +The size of the machines you're using is not a Kubernetes-specific concern, but is a good place to start if you want more performance. Using machines with more CPU will almost always allow for greater throughput. Because Kubernetes runs a set of processes on every machine in a cluster, it is typically more efficient to use fewer large machines than more small machines. For specific suggestions, refer to [Hardware]({% link {{ page.version.version }}/recommended-production-settings.md %}#hardware). + +### Disk type + +CockroachDB makes heavy use of the disks you provide it, so using faster disks is an easy way to improve your cluster's performance. For the best performance, [SSDs are strongly recommended]({% link {{ page.version.version }}/recommended-production-settings.md %}#hardware). + +The Cockroach Labs-provided configuration does not specify disk type, so in most environments Kubernetes will auto-provision disks of the default type. In the common cloud environments (AWS, GCP, Azure) this means you'll get slow disks that aren't optimized for database workloads (e.g., HDDs on GCE, SSDs without provisioned IOPS on AWS). + +#### Create a different disk type + +Kubernetes exposes the disk types used by its volume provisioner via its [`StorageClass` API object](https://kubernetes.io/docs/concepts/storage/storage-classes/). Each cloud environment has a default `StorageClass`, but you can easily change the default or create a new named class that you can specify later. + +To do this, pick a volume provisioner from the list in the [Kubernetes documentation](https://kubernetes.io/docs/concepts/storage/storage-classes/), modify the example YAML file to specify the disk type you want, then run `kubectl create -f {your-storage-class-file}.yaml`. For example, in order to use the `pd-ssd` disk type on Google Compute Engine or Google Kubernetes Engine, you can use a `StorageClass` file like the following: + +~~~ yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: {your-ssd-class-name} +provisioner: kubernetes.io/gce-pd +parameters: + type: pd-ssd +~~~ + +You may also want to set additional parameters as documented in the list of Kubernetes [Kubernetes documentation](https://kubernetes.io/docs/concepts/storage/storage-classes/), such as configuring the `iopsPerGB` if you're creating a `StorageClass` for AWS's `io1` Provisioned IOPS volume type. + +You can configure this new disk type to only be used by CockroachDB nodes or as the default for all volumes in your cluster: + +#### Configure the disk type used by CockroachDB + +To use a new `StorageClass` without making it the default in your cluster, modify your application's YAML file to ask for it. In the CockroachDB configuration, that means adding `storageClassName` to `cockroachdb.crdbCluster.dataStore.volumeClaimTemplates`: + +~~~ yaml +cockroachdb: + crdbCluster: + dataStore: + volumeClaimTemplate: + storageClassName: +~~~ + +When running `kubectl create -f` on your modified YAML file, Kubernetes should create volumes using the specified `storageClassName`. + +#### Change the default disk type + +To make a new `StorageClass` the default for all volumes in your cluster, run the following `kubectl` commands. + +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl get storageclasses +~~~ +~~~ shell +NAME PROVISIONER +ssd kubernetes.io/gce-pd +standard (default) kubernetes.io/gce-pd +~~~ +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl patch storageclass standard -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}' +~~~ +~~~ shell +storageclass "standard" patched +~~~ +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl patch storageclass ssd -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}' +~~~ +~~~ shell +storageclass "ssd" patched +~~~ + +### Disk size + +On some cloud providers, including all GCP disks and the AWS `io1` disk type, the number of IOPS available to a disk is directly correlated to the size of the disk. In such cases, increasing the size of your disks can significantly improve CockroachDB performance, and decrease the risk of filling them up. Before you create your CockroachDB cluster, modify the `cockroachdb.crdbCluster.dataStore.volumeClaimTemplate` in the CockroachDB YAML file to ask for more space. The following example sets this value to 1TB: + +~~~ yaml +cockroachdb: + crdbCluster: + dataStore: + volumeClaimTemplate: + spec: + resources: + requests: + storage: 1024Gi +~~~ + +Since [GCE disk IOPS scale linearly with disk size](https://cloud.google.com/compute/docs/disks/performance#type_comparison), a 1TiB disk gives 1024 times as many IOPS as a 1GiB disk, which can make a very large difference for write-heavy workloads. + +### Local disks + +The examples thus far assume the use of auto-provisioned, remotely attached disks. However, local disks typically provide better performance than remotely attached disks. For example, SSD Instance Store Volumes outperform EBS Volumes on AWS, and Local SSDs outperform Persistent Disks on GCE. As of v1.14, Kubernetes supports [local volumes](https://kubernetes.io/docs/concepts/storage/volumes/#local). + +When using local disks, consider using [replication controls]({% link {{ page.version.version }}/configure-replication-zones.md %}) to increase the replication factor of your data from 3 (default) to 5. This is because local disks have a greater chance of experiencing a disk failure than a cloud provider's network-attached disks, which are often replicated underneath the covers. + +### Resource requests and limits + +When you ask Kubernetes to run a pod, you can tell it to reserve certain amounts of CPU or memory for each container in the pod, or to limit the CPU or memory of each container. Setting resource [requests](#resource-requests) or [limits](#resource-limits) can have different implications, depending on your Kubernetes cluster's resource utilization. For the authoritative information on this topic, refer to the [Kubernetes documentation](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/). + +#### Resource requests + +Resource requests reserve a certain amount of CPU or memory for your container. If you add resource requests to your CockroachDB YAML file, Kubernetes will schedule each CockroachDB pod onto a node with sufficient unreserved resources and ensure the pods are guaranteed the reserved resources using the applicable Linux container primitives. If you are running other workloads in your Kubernetes cluster, setting resource requests is strongly recommended to ensure good performance. If you do not set resource requests, CockroachDB could be starved of CPU cycles or [OOM-stopped]({% link {{ page.version.version }}/cluster-setup-troubleshooting.md %}#out-of-memory-oom-crash) before less important processes. + +To determine how many resources are usable on your Kubernetes nodes, you can run: + +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl describe nodes +~~~ +~~~ shell +Name: gke-perf-default-pool-aafee20c-k4t8 +[...] +Capacity: + cpu: 4 + memory: 15393536Ki + pods: 110 +Allocatable: + cpu: 3920m + memory: 12694272Ki + pods: 110 +[...] +Non-terminated Pods: (2 in total) + Namespace Name CPU Requests CPU Limits Memory Requests Memory Limits + --------- ---- ------------ ---------- --------------- ------------- + kube-system kube-dns-778977457c-kqtlr 260m (6%) 0 (0%) 110Mi (0%) 170Mi (1%) + kube-system kube-proxy-gke-perf-default-pool-aafee20c-k4t8 100m (2%) 0 (0%) 0 (0%) 0 (0%) +Allocated resources: + (Total limits may be over 100 percent, i.e., overcommitted.) + CPU Requests CPU Limits Memory Requests Memory Limits + ------------ ---------- --------------- ------------- + 360m (9%) 0 (0%) 110Mi (0%) 170Mi (1%) +~~~ + +In the output, the `Allocatable` field shows the `cpu` and `memory` resources Kubernetes will provide to pods running on the machine. The difference between the machine's `Capacity` and its `Allocatable` resources is taken up by the operating system and Kubernetes' management processes. In the preceding output, `3920m` stands for 3920 "milli-CPUs", or "thousandths of a CPU". + +Kubernetes runs additional pods in the `kube-system` namespace that are part of the cluster infrastructure. If you want to run CockroachDB on every node in your cluster, you must leave room for these processes, which are essential for the Kubernetes cluster's health. If you are only running CockroachDB on a subset of the Kubernetes machines, you can take up all the `Allocatable` space other than what is used by the `kube-system` pods that are on all the Kubernetes machines, such as `kube-proxy` or the `fluentd` logging agent. + +On Kubernetes v1.10 or earlier, it is difficult to truly use all of the allocatable space, because you'd have to manually preempt the `kube-system` pods on each machine (by deleting them). When the Kubernetes [Pod Priority](https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/) feature is promoted from alpha to beta and becomes more widely available, you could set the CockroachDB pods to a higher priority, causing the Kubernetes scheduler to preempt and reschedule the `kube-system` pods onto other machines. + +Once you've picked out an amount of CPU and memory to reserve for Cockroach, configure the resource requests in your CockroachDB YAML file. They should go underneath the `containers` heading. For example, to use most of the available resources on the machines described above, you'd configure the following lines of your values file: + +~~~ yaml +cockroachdb: + crdbCluster: + resources: + requests: + cpu: 3500m + memory: 12300Mi +~~~ + +When you initialize the cluster, check that all the CockroachDB pods are scheduled successfully. If you see any get stuck in the pending state, run `kubectl describe pod {podname}` and check the `Events` for information about why they're still pending. You may need to manually preempt pods on one or more nodes by running `kubectl delete pod` on them to make room for the CockroachDB pods. As long as the pods you delete were created by a higher-level Kubernetes object such as a `Deployment`, they'll be safely recreated on another node. + +#### Resource limits + +Resource limits cap the resources used by a pod to no more than the provided limit. This makes for more predictable performance because your pods will not be allowed to use any excess capacity on their machines. Pods will not have more resources available to them at some times (e.g., lulls in traffic) than others (e.g., busy periods where the other pods on a machine are also fully utilizing their reserved resources). Resource limits also increase the ["Quality of Service" guaranteed by the Kubernetes runtime](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node/resource-qos.md) on Kubernetes v1.8 and earlier, making the pods less likely to be preempted when a machine is oversubscribed. Finally, memory limits in particular define the amount of memory the container perceives as available, which is useful when specifying percentage-based values for the CockroachDB `--cache` and `--max-sql-memory` flags, as in our default configuration file. + +To set resource limits, in addition to the [resource requests](#resource-requests) described in the preceding section, change the configuration as follows: + +~~~ yaml +cockroachdb: + crdbCluster: + resources: + requests: + cpu: 3500m + memory: 12300Mi + limits: + memory: 12300Mi +~~~ + +Pods will be limited to their reserved resources and are unlikely to be preempted, except in rare cases. This will not improve performance on an underutilized Kubernetes cluster, but provides more predictable performance as other workloads run. + +{{site.data.alerts.callout_danger}} +While setting memory limits is strongly recommended, [setting CPU limits can hurt tail latencies as currently implemented by Kubernetes](https://github.com/kubernetes/kubernetes/issues/51135). Cockroach Labs recommends not setting CPU limits at all, unless you have explicitly enabled the non-default [Static CPU Management Policy](https://kubernetes.io/docs/tasks/administer-cluster/cpu-management-policies/#static-policy) when setting up your Kubernetes cluster. In this case, set CPU limits as integers and match memory limits exactly to their corresponding requests. +{{site.data.alerts.end}} + +#### Default resource requests and limits + +Even if you do not manually set resource requests, they are likely being applied. In many installations of Kubernetes, a [LimitRange](https://kubernetes.io/docs/tasks/administer-cluster/cpu-default-namespace/) is preconfigured for the `default` namespace that applies a default CPU request of `100m`, or one-tenth of a CPU. You can see this configuration by running the following command: + +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl describe limitranges +~~~ + +Experimentally, this does not appear to have a noticeable effect on CockroachDB's performance when a Kubernetes cluster isn't heavily utilized, but do not be surprised if you see CPU requests on your pods that you didn't set. + +### Other pods on the same machines as CockroachDB + +As described in [Resource requests and limits](#resource-requests-and-limits), your Kubernetes cluster will always run pods other than CockroachDB. You can see them by running: + +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl get pods --all-namespaces +~~~ +~~~ shell +NAMESPACE NAME READY STATUS RESTARTS AGE +kube-system event-exporter-v0.1.7-5c4d9556cf-6v7lf 2/2 Running 0 2m +kube-system fluentd-gcp-v2.0.9-6rvmk 2/2 Running 0 2m +kube-system fluentd-gcp-v2.0.9-m2xgp 2/2 Running 0 2m +kube-system fluentd-gcp-v2.0.9-sfgps 2/2 Running 0 2m +kube-system fluentd-gcp-v2.0.9-szwwn 2/2 Running 0 2m +kube-system heapster-v1.4.3-968544ffd-5tsb8 3/3 Running 0 1m +kube-system kube-dns-778977457c-4s7vv 3/3 Running 0 1m +kube-system kube-dns-778977457c-ls6fq 3/3 Running 0 2m +kube-system kube-dns-autoscaler-7db47cb9b7-x2cc4 1/1 Running 0 2m +kube-system kube-proxy-gke-test-default-pool-828d39a7-dbn0 1/1 Running 0 2m +kube-system kube-proxy-gke-test-default-pool-828d39a7-nr06 1/1 Running 0 2m +kube-system kube-proxy-gke-test-default-pool-828d39a7-rc4m 1/1 Running 0 2m +kube-system kube-proxy-gke-test-default-pool-828d39a7-trd1 1/1 Running 0 2m +kube-system kubernetes-dashboard-768854d6dc-v7ng8 1/1 Running 0 2m +kube-system l7-default-backend-6497bcdb4d-2kbh4 1/1 Running 0 2m +~~~ + +These ["cluster add-ons"](https://github.com/kubernetes/kubernetes/tree/master/cluster/addons) provide a variety of basic services like managing DNS entries for services within the cluster, powering the Kubernetes dashboard UI, or collecting logs or metrics from all the pods running in the cluster. If you do not like having them take up space in your cluster, you can prevent some of them from running by configuring your Kubernetes cluster appropriately. For example, on GKE, you can create a cluster with the minimal set of add-ons by running: + +{% include_cached copy-clipboard.html %} +~~~ shell +gcloud container clusters create --no-enable-cloud-logging --no-enable-cloud-monitoring --addons="" +~~~ + +However, some pods like `kube-proxy` and `kube-dns` are required for compliant Kubernetes clusters. Since there will always be pods other than CockroachDB running in your cluster, it's important to understand and account for the effects of having CockroachDB share a machine with other processes. The more processes there are on the same machine as a CockroachDB pod, the slower and less predictable its performance will likely be. To protect against this, it's strongly recommended to specify [resource requests](#resource-requests) on your CockroachDB pods to provide some level of CPU and memory isolation. + +Even with resource requests, there can still be contention for shared resources like network I/O or, in [exceptional](https://sysdig.com/blog/container-isolation-gone-wrong/) cases, internal kernel data structures. For these reasons and because of the Kubernetes infrastructure processes running on each machine, CockroachDB running on Kubernetes cannot match the performance of running CockroachDB directly on dedicated machines, although it can get quite close with careful configuration. + +If setting appropriate resource requests still isn't getting you the performance you expect, consider using [dedicated nodes](#dedicated-nodes). + +#### Client applications on the same machines as CockroachDB + +Client applications such as benchmarking applications running on the same machines as CockroachDB are likely to compete for resources. As application load increases, so does the load on CockroachDB processes. The best way to avoid this is to [set resource requests and limits](#resource-requests-and-limits). Alternatively, you can also set [anti-affinity scheduling policies](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity) on your client applications: + +~~~ yaml +cockroachdb: + crdbCluster: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - loadgen + topologyKey: kubernetes.io/hostname + - weight: 99 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - cockroachdb + topologyKey: kubernetes.io/hostname +~~~ + +The preceding configuration will first prefer to put the `loadgen` pods on different nodes from each other, which is important for the fault tolerance of the `loadgen` pods themselves. As a secondary priority, it will attempt to put the pods on nodes that do not already have a running `CockroachDB` pod. This will ensure the best possible balance of fault tolerance and performance for the load generator and CockroachDB cluster. + +### Networking + +[Kubernetes places significant demands on the underlying network](https://kubernetes.io/docs/concepts/cluster-administration/networking/) in order to provide each pod a routable IP address and isolated Linux network namespace, among other requirements. While the impact is heavily dependent on your Kubernetes cluster's network setup, Docker and Kubernetes' networking abstractions often introduce a performance penalty for high-throughput distributed applications such as CockroachDB. + +Experimenting with networking can be a way to eke more performance out of your cluster. You can either replace your cluster's networking solution with a more performant one, or bypass most of the networking overhead by using the host machines' networks directly. + +#### Networking solutions + +If you aren't using a hosted Kubernetes service, you'll need to choose a [networking solution](https://kubernetes.io/docs/concepts/cluster-administration/networking/#how-to-achieve-this) when creating a Kubernetes cluster. While Cockroach Labs does not endorse any specific networking solutions, note that your choice can meaningfully impact CockroachDB's performance compared to running it outside of Kubernetes. + +### Dedicated nodes + +If your Kubernetes cluster uses heterogeneous hardware, you will likely want to ensure that CockroachDB only runs on specific machines. To optimize performance, it can be beneficial to dedicate those machines exclusively to CockroachDB. + +For more information, refer to [Pod scheduling]({% link {{ page.version.version }}/schedule-cockroachdb-operator.md %}). diff --git a/src/current/v25.3/configure-cockroachdb-kubernetes.md b/src/current/v25.3/configure-cockroachdb-kubernetes.md index ac07d9131f4..00cd0065fc1 100644 --- a/src/current/v25.3/configure-cockroachdb-kubernetes.md +++ b/src/current/v25.3/configure-cockroachdb-kubernetes.md @@ -9,13 +9,17 @@ docs_area: deploy {% capture latest_operator_version %}{% include_cached latest_operator_version.md %}{% endcapture %} -This page explains how to configure Kubernetes cluster resources such as memory, CPU, and storage. +This page explains how to configure Kubernetes cluster resources such as memory, CPU, and storage. + +This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Resource management with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/configure-cockroachdb-operator.md %}). + +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} These settings override the defaults used when [deploying CockroachDB on Kubernetes]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}).
- - + +
@@ -42,7 +46,7 @@ You can set the CPU and memory resources allocated to the CockroachDB container {{site.data.alerts.end}}
-Specify CPU and memory values in `resources.requests` and `resources.limits` in the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +Specify CPU and memory values in `resources.requests` and `resources.limits` in the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): ~~~ yaml spec: @@ -110,7 +114,7 @@ Each CockroachDB node reserves a portion of its available memory for its cache a Our Kubernetes manifests dynamically set cache size and SQL memory size each to 1/4 (the recommended fraction) of the available memory, which depends on the memory request and limit you [specified](#memory-and-cpu) for your configuration. If you want to customize these values, set them explicitly. -Specify `cache` and `maxSQLMemory` in the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +Specify `cache` and `maxSQLMemory` in the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): ~~~ yaml spec: @@ -148,7 +152,7 @@ conf: When you start your cluster, Kubernetes dynamically provisions and mounts a persistent volume into each pod. For more information on persistent volumes, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/storage/persistent-volumes/).
-The storage capacity of each volume is set in `pvc.spec.resources` in the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +The storage capacity of each volume is set in `pvc.spec.resources` in the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): ~~~ yaml spec: @@ -191,7 +195,7 @@ You should provision an appropriate amount of disk storage for your workload. Fo If you discover that you need more capacity, you can expand the persistent volumes on a running cluster. Increasing disk size is often [beneficial for CockroachDB performance]({% link {{ page.version.version }}/kubernetes-performance.md %}#disk-size).
-Specify a new volume size in `resources.requests` and `resources.limits` in the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +Specify a new volume size in `resources.requests` and `resources.limits` in the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): ~~~ yaml spec: @@ -207,7 +211,7 @@ spec: {% include {{ page.version.version }}/orchestration/apply-custom-resource.md %} -The Operator updates the StatefulSet and triggers a rolling restart of the pods with the new storage capacity. +The {{ site.data.products.public-operator }} updates the StatefulSet and triggers a rolling restart of the pods with the new storage capacity. To verify that the storage capacity has been updated, run `kubectl get pvc` to view the persistent volume claims (PVCs). It will take a few minutes before the PVCs are completely updated.
@@ -223,7 +227,7 @@ To verify that the storage capacity has been updated, run `kubectl get pvc` to v
## Network ports -The Operator separates network traffic into three ports: +The {{ site.data.products.public-operator }} separates network traffic into three ports: | Protocol | Default | Description | Custom Resource Field | |----------|---------|---------------------------------------------------------------------|-----------------------| @@ -231,7 +235,7 @@ The Operator separates network traffic into three ports: | HTTP | 8080 | Used to [access the DB Console]({% link {{ page.version.version }}/ui-overview.md %}#db-console-access) | `httpPort` | | SQL | 26257 | Used for SQL shell access | `sqlPort` | -Specify alternate port numbers in the Operator's [custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster) (for example, to match the default port `5432` on PostgreSQL): +Specify alternate port numbers in the {{ site.data.products.public-operator }}'s [custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster) (for example, to match the default port `5432` on PostgreSQL): ~~~ yaml spec: @@ -240,19 +244,19 @@ spec: {% include {{ page.version.version }}/orchestration/apply-custom-resource.md %} -The Operator updates the StatefulSet and triggers a rolling restart of the pods with the new port settings. +The {{ site.data.products.public-operator }} updates the StatefulSet and triggers a rolling restart of the pods with the new port settings. {{site.data.alerts.callout_danger}} -Currently, only the pods are updated with new ports. To connect to the cluster, you need to ensure that the `public` service is also updated to use the new port. You can do this by deleting the service with `kubectl delete service {cluster-name}-public`. When service is recreated by the Operator, it will use the new port. This is a known limitation that will be fixed in an Operator update. +Currently, only the pods are updated with new ports. To connect to the cluster, you need to ensure that the `public` service is also updated to use the new port. You can do this by deleting the service with `kubectl delete service {cluster-name}-public`. When service is recreated by the operator, it will use the new port. This is a known limitation. {{site.data.alerts.end}} ## Ingress You can configure an [Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) object to expose an internal HTTP or SQL [`ClusterIP` service](https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types) through a hostname. -In order to use the Ingress resource, your cluster must be running an [Ingress controller](https://kubernetes.io/docs/concepts/services-networking/ingress-controllers/) for load balancing. This is **not** handled by the Operator and must be deployed separately. +In order to use the Ingress resource, your cluster must be running an [Ingress controller](https://kubernetes.io/docs/concepts/services-networking/ingress-controllers/) for load balancing. This is **not** handled by the {{ site.data.products.public-operator }} and must be deployed separately. -Specify Ingress objects in `ingress.ui` (HTTP) or `ingress.sql` (SQL) in the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +Specify Ingress objects in `ingress.ui` (HTTP) or `ingress.sql` (SQL) in the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): ~~~ yaml spec: @@ -283,5 +287,5 @@ spec: Changing the SQL Ingress `host` on a running deployment will cause a rolling restart of the cluster, due to new node certificates being generated for the SQL host. {{site.data.alerts.end}} -The [custom resource definition](https://github.com/cockroachdb/cockroach-operator/blob/v{{ latest_operator_version }}/config/crd/bases/crdb.cockroachlabs.com_crdbclusters.yaml) details the fields supported by the Operator. +The [custom resource definition](https://github.com/cockroachdb/cockroach-operator/blob/v{{ latest_operator_version }}/config/crd/bases/crdb.cockroachlabs.com_crdbclusters.yaml) details the fields supported by the operator.
diff --git a/src/current/v25.3/configure-cockroachdb-operator.md b/src/current/v25.3/configure-cockroachdb-operator.md new file mode 100644 index 00000000000..9029f6bd2fa --- /dev/null +++ b/src/current/v25.3/configure-cockroachdb-operator.md @@ -0,0 +1,183 @@ +--- +title: Resource Management with the CockroachDB Operator +summary: Allocate CPU, memory, and storage resources for a cluster deployed with the CockroachDB operator. +toc: true +toc_not_nested: true +secure: true +docs_area: deploy +--- + +This page explains how to configure Kubernetes cluster resources such as memory, CPU, and storage. + +{{site.data.alerts.callout_info}} +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +{{site.data.alerts.end}} + +On a production cluster, the resources you allocate to CockroachDB should be proportionate to your machine types and workload. Cockroach Labs recommends that you determine and set these values before deploying the cluster, but you can also update the values on a running cluster. + +{{site.data.alerts.callout_info}} +Run `kubectl describe nodes` to see the available resources on the instances that you have provisioned. +{{site.data.alerts.end}} + +## Memory and CPU + +You can set the CPU and memory resources allocated to the CockroachDB container on each pod. + +{{site.data.alerts.callout_info}} +1 CPU in Kubernetes is equivalent to 1 vCPU or 1 hyperthread. For best practices on provisioning CPU and memory for CockroachDB, refer to the [Production Checklist]({% link {{ page.version.version }}/recommended-production-settings.md %}#hardware). +{{site.data.alerts.end}} + +Specify CPU and memory values in `cockroachdb.crdbCluster.resources.limits` and `cockroachdb.crdbCluster.resources.requests` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster): + +~~~ yaml +cockroachdb: + crdbCluster: + resources: + limits: + cpu: 4000m + memory: 16Gi + requests: + cpu: 4000m + memory: 16Gi +~~~ + +Apply the new settings to the cluster: + +{% include_cached copy-clipboard.html %} +~~~ shell +helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE +~~~ + +Cockroach Labs recommends using identical values for `resources.requests` and `resources.limits`. When setting the new values, note that not all of a pod's resources will be available to the CockroachDB container. This is because a fraction of the CPU and memory is reserved for Kubernetes. + +{{site.data.alerts.callout_info}} +If no resource limits are specified, the pods will be able to consume the maximum available CPUs and memory. However, to avoid overallocating resources when another memory-intensive workload is on the same instance, always set resource requests and limits explicitly. +{{site.data.alerts.end}} + +For more information on how Kubernetes handles resources, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/). + +### Cache and SQL memory size + +Each CockroachDB node reserves a portion of its available memory for its cache and for storing temporary data for SQL queries. For more information on these settings, see the [Production Checklist]({% link {{ page.version.version }}/recommended-production-settings.md %}#cache-and-sql-memory-size). + +The {{ site.data.products.cockroachdb-operator }} dynamically sets cache size and SQL memory size each to 25% (the recommended percentage) of the available memory, which depends on the memory request and limit you [specified](#memory-and-cpu) for your configuration. These values can be modified by adding the `cache` or `max-sql-memory` fields to `cockroachdb.crdbCluster.flags`, which is equivalent to appending `--cache` or `--max-sql-memory` as [cockroach start flags]({% link {{ page.version.version }}/cockroach-start.md %}#flags). + +## Persistent storage + +When you start your cluster, Kubernetes dynamically provisions and mounts a persistent volume into each pod. For more information on persistent volumes, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/storage/persistent-volumes/). + +The storage capacity of each volume is set in `cockroachdb.crdbCluster.dataStore.volumeClaimTemplate.spec.resources` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster): + +~~~ yaml +cockroachdb: + crdbCluster: + dataStore: + volumeClaimTemplate: + spec: + resources: + requests: + storage: "10Gi" +~~~ + +You should provision an appropriate amount of disk storage for your workload. For recommendations on this, see the [Production Checklist]({% link {{ page.version.version }}/recommended-production-settings.md %}#storage). + +### Expand disk size + +If you discover that you need more capacity, you can expand the persistent volumes on a running cluster. Increasing disk size is often [beneficial for CockroachDB performance]({% link {{ page.version.version }}/cockroachdb-operator-performance.md %}). + +{{site.data.alerts.callout_info}} +The volume size should only adjusted on disk types that can dynamically scale up, such as Amazon EBS volumes. Adjusting the volume size on non-dynamically scaling disks is not recommended, and instead you should horizontally scale the number of disks used. +{{site.data.alerts.end}} + +Specify a new volume size in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster): + +~~~ yaml +cockroachdb: + crdbCluster: + dataStore: + volumeClaimTemplate: + spec: + resources: + requests: + storage: "100Gi" +~~~ + +Apply the new settings to the cluster: + +{% include_cached copy-clipboard.html %} +~~~ shell +helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE +~~~ + +The {{ site.data.products.cockroachdb-operator }} updates all nodes and triggers a rolling restart of the pods with the new storage capacity. + +To verify that the storage capacity has been updated, run `kubectl get pvc` to view the persistent volume claims (PVCs). It will take a few minutes before the PVCs are completely updated. + +## Network ports + +The {{ site.data.products.cockroachdb-operator }} separates network traffic into three ports: + +| Protocol | Default Port| Description | Custom Resource Field | +|------------|-------------|-------------------------------|----------------------------------| +| gRPC | 26258 | Used for node connections | service.ports.grpc | +| HTTP | 8080 | Used to access the DB Console | service.ports.http | +| SQL | 26257 | Used for SQL shell access | service.ports.sql | + +Specify alternate port numbers in `cockroachdb.crdbCluster.service.ports` of the {{ site.data.products.cockroachdb-operator }}'s [custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster) (for example, to match the default port `5432` on PostgreSQL): + +~~~ yaml +cockroachdb: + crdbCluster: + service: + ports: + sql: 5432 +~~~ + +Apply the new settings to the cluster: + +{% include_cached copy-clipboard.html %} +~~~ shell +helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE +~~~ + +The {{ site.data.products.cockroachdb-operator }} updates all nodes and triggers a rolling restart of the pods with the new port settings. + +## Ingress + +You can configure an [Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) object to expose an internal HTTP or SQL [`ClusterIP` service](https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types) through a hostname. + +In order to use the Ingress resource, your cluster must be running an [Ingress controller](https://kubernetes.io/docs/concepts/services-networking/ingress-controllers/) for load balancing. This is **not** handled by the {{ site.data.products.cockroachdb-operator }} and must be deployed separately. + +Specify Ingress objects in `cockroachdb.crdbCluster.service.ingress`. Set `ingress.enabled` to `true` and specify `ingress.ui` (HTTP) or `ingress.sql` (SQL) in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster): + +~~~ yaml +cockroachdb: + crdbCluster: + service: + ingress: + enabled: true + ui: + ingressClassName: nginx + annotations: + key: value + host: ui.example.com + sql: + ingressClassName: nginx + annotations: + key: value + host: sql.example.com +~~~ + +- `ingressClassName` specifies the [`IngressClass`](https://kubernetes.io/docs/concepts/services-networking/ingress/#ingress-class) of the Ingress controller. This example uses the [nginx](https://kubernetes.github.io/ingress-nginx/) controller. + +- The `host` must be made publicly accessible. For example, create a route in [Amazon Route 53](https://aws.amazon.com/route53/), or add an entry to `/etc/hosts` that maps the IP address of the Ingress controller to the hostname. + + {{site.data.alerts.callout_info}} + Multiple hosts can be mapped to the same Ingress controller IP. + {{site.data.alerts.end}} + +- TCP connections for SQL clients must be enabled for the Ingress controller. For an example, see the [nginx documentation](https://kubernetes.github.io/ingress-nginx/user-guide/exposing-tcp-udp-services/). + + {{site.data.alerts.callout_info}} + Changing the SQL Ingress `host` on a running deployment will cause a rolling restart of the cluster, due to new node certificates being generated for the SQL host. + {{site.data.alerts.end}} diff --git a/src/current/v25.3/create-security-certificates-custom-ca.md b/src/current/v25.3/create-security-certificates-custom-ca.md index 9f745bce7e7..27dc77b6cc2 100644 --- a/src/current/v25.3/create-security-certificates-custom-ca.md +++ b/src/current/v25.3/create-security-certificates-custom-ca.md @@ -31,7 +31,7 @@ For secure clusters, you can avoid getting the warning message by using a certif pkill -SIGHUP -x cockroach ~~~ The `SIGHUP` signal must be sent by the same user running the process or by a user with adequate privileges to send signals to processes owned by another user, such as a user with `sudo` access. - - In a cluster deployed using the [Kubernetes Operator]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}), there is no way to send a `SIGHUP` signal to the individual `cockroach` process on each cluster node. Instead, perform a rolling restart of the cluster's pods. + - In a cluster deployed using the [{{ site.data.products.public-operator }}]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}), there is no way to send a `SIGHUP` signal to the individual `cockroach` process on each cluster node. Instead, perform a rolling restart of the cluster's pods. ### Node key and certificates diff --git a/src/current/v25.3/deploy-cockroachdb-with-cockroachdb-operator.md b/src/current/v25.3/deploy-cockroachdb-with-cockroachdb-operator.md new file mode 100644 index 00000000000..89718c6ae95 --- /dev/null +++ b/src/current/v25.3/deploy-cockroachdb-with-cockroachdb-operator.md @@ -0,0 +1,731 @@ +--- +title: Deploy CockroachDB with the CockroachDB Operator +summary: Deploy a secure 3-node CockroachDB cluster with the CockroachDB operator. +toc: true +toc_not_nested: false +secure: true +docs_area: deploy +--- + +This page describes how to start and stop a secure 3-node CockroachDB cluster in a single [Kubernetes](http://kubernetes.io/) cluster. + +{{site.data.alerts.callout_info}} +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +{{site.data.alerts.end}} + +## Prerequisites and best practices + +### Kubernetes version + +To deploy CockroachDB v25.1 or later, Kubernetes 1.30 or higher is required. Cockroach Labs strongly recommends that you use a Kubernetes version that is eligible for [patch support by the Kubernetes project](https://kubernetes.io/releases/). + +### Helm version + +The CockroachDB Helm chart requires Helm 3.0 or higher. If you attempt to use an incompatible Helm version, an error like the following occurs: + +~~~ +Error: UPGRADE FAILED: template: cockroachdb/templates/tests/client.yaml:6:14: executing "cockroachdb/templates/tests/client.yaml" at <.Values.networkPolicy.enabled>: nil pointer evaluating interface {}.enabled +~~~ + +There are two Helm charts that must be deployed: + +- `operator`: The {{ site.data.products.cockroachdb-operator }} chart to be installed first. +- `cockroachdb`: The CockroachDB application chart to be installed after the operator is ready. + +### Network + +Service Name Indication (SNI) is an extension to the TLS protocol that allows a client to indicate which hostname it is attempting to connect to at the start of the TCP handshake process. The server can present multiple certificates on the same IP address and TCP port number, and one server can serve multiple secure websites or API services even if they use different certificates. + +Due to its order of operations, the PostgreSQL wire protocol's implementation of TLS is incompatible with SNI-based routing in the Kubernetes ingress controller. Instead, use a TCP load balancer for CockroachDB that is not shared with other services. + +If you want to secure your cluster to use TLS certificates for all network communications, Helm must be installed with RBAC privileges. Otherwise, you will get an `attempt to grant extra privileges` error. + +### Localities + +CockroachDB clusters use localities to efficiently distribute replicas. This is especially important in multi-region deployments. With the {{ site.data.products.cockroachdb-operator }}, you specify mappings between locality levels and the location on a Kubernetes node where the value for that locality can be found. + +In cloud provider deployments (e.g., [GKE](#hosted-gke), [EKS](#hosted-eks), or [AKS](#hosted-aks)), the [`topology.kubernetes.io/region`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesioregion) and [`topology.kubernetes.io/zone`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesiozone) values on Kubernetes nodes are populated by cloud provider. For further granularity, you can define arbitrary locality labels (e.g., `province`, `datacenter`, `rack`), but these need to be applied individually to the Kubernetes node when initialized so that CockroachDB can understand where the node lives and distribute replicas accordingly. + +On bare metal Kubernetes deployments, you must plan a hierarchy of localities that suit your CockroachDB node distribution, then apply these values individually to nodes when they are initialized. Although you can set most of these values arbitrarily, you must set region and zone locations in the reserved `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` namespaces, respectively. + +For more information on how locality labels are used by CockroachDB, refer to the [`--locality` documentation]({% link {{ page.version.version }}/cockroach-start.md %}#locality). + +### Architecture + +The {{ site.data.products.cockroachdb-operator }} is only supported in environments with an ARM64 or AMD64 architecture. + +### Resources + +When starting Kubernetes, select machines with at least 4 vCPUs and 16 GiB of memory, and provision at least 2 vCPUs and 8 GiB of memory to CockroachDB per pod. These minimum settings are used by default in this deployment guide, and are appropriate for testing purposes only. On a production deployment, you should adjust the resource settings for your workload. + +### Storage + +Kubernetes deployments use external persistent volumes that are often replicated by the provider. CockroachDB replicates data automatically, and this redundant layer of [replication]({% link {{ page.version.version }}/architecture/overview.md %}#replication) can impact performance. Using [local volumes](https://kubernetes.io/docs/concepts/storage/volumes/#local) may improve performance. + +## Step 1. Start Kubernetes + +You can use the hosted [Google Kubernetes Engine (GKE)](#hosted-gke) service, hosted [Amazon Elastic Kubernetes Service (EKS)](#hosted-eks), or [Microsoft Azure Kubernetes Service (AKS)](#hosted-aks) to quickly start Kubernetes. + +{{site.data.alerts.callout_success}} +Cloud providers such as GKE, EKS, and AKS are not required to run CockroachDB on Kubernetes. You can use any cluster hardware with the minimum recommended Kubernetes version and at least 3 pods, each presenting sufficient resources to start a CockroachDB node. However, note that support for other deployments may vary. +{{site.data.alerts.end}} + +### Hosted GKE + +1. Complete the **Before You Begin** steps described in the [Google Kubernetes Engine Quickstart](https://cloud.google.com/kubernetes-engine/docs/quickstart) documentation. + + This includes installing `gcloud`, which is used to create and delete Kubernetes Engine clusters, and `kubectl`, which is the command-line tool used to manage Kubernetes from your workstation. + + The documentation offers the choice of using Google's Cloud Shell product or using a local shell on your machine. Choose to use a local shell if you want to be able to view the DB Console using the steps in this guide. + +1. From your local workstation, start the Kubernetes cluster, specifying one of the available [regions](https://cloud.google.com/compute/docs/regions-zones#available) (e.g., `us-east1`). + + The process can take a few minutes, so do not move on to the next step until you see a `Creating cluster cockroachdb...done` message and details about your cluster. + + {% include_cached copy-clipboard.html %} + ~~~ shell + gcloud container clusters create cockroachdb --machine-type n2-standard-4 --region {region-name} --num-nodes 1 + ~~~ + ~~~ shell + Creating cluster cockroachdb...done. + ~~~ + + {{site.data.alerts.callout_info}} + Since this region can differ from your default `gcloud` region, be sure to include the `--region` flag to run `gcloud` commands against this cluster. + {{site.data.alerts.end}} + + This creates GKE instances and joins them into a single Kubernetes cluster named `cockroachdb`. The `--region` flag specifies a [regional three-zone cluster](https://cloud.google.com/kubernetes-engine/docs/how-to/creating-a-regional-cluster), and `--num-nodes` specifies one Kubernetes worker node in each zone. + + The `--machine-type` flag tells the node pool to use the [n2-standard-4](https://cloud.google.com/compute/docs/machine-types#standard_machine_types) machine type (4 vCPUs, 16 GB memory), which meets our [recommended CPU and memory configuration]({% link {{ page.version.version }}/recommended-production-settings.md %}#basic-hardware-recommendations). + + {{site.data.alerts.callout_info}} + Consider creating another, dedicated node group for the operator pod for system resource availability. + {{site.data.alerts.end}} + +1. Get the email address associated with your Google Cloud account: + + {% include_cached copy-clipboard.html %} + ~~~ shell + gcloud info | grep Account + ~~~ + ~~~ shell + Account: [your.google.cloud.email@example.org] + ~~~ + + The preceding command returns your email address in all lowercase. However, in the next step, you must enter the address using the accurate capitalization. For example, if your address is `YourName@example.com`, you must use `YourName@example.com` and not `yourname@example.com`. + +1. [Create the RBAC roles](https://cloud.google.com/kubernetes-engine/docs/how-to/role-based-access-control#prerequisites_for_using_role-based_access_control) CockroachDB needs for running on GKE, using the address from the previous step: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl create clusterrolebinding $USER-cluster-admin-binding \ + --clusterrole=cluster-admin \ + --user={your.google.cloud.email@example.org} + ~~~ + ~~~ shell + clusterrolebinding.rbac.authorization.k8s.io/your.username-cluster-admin-binding created + ~~~ + +### Hosted EKS + +1. Complete the steps described in the [EKS Getting Started](https://docs.aws.amazon.com/eks/latest/userguide/getting-started-eksctl.html) documentation. + + This includes installing and configuring the AWS CLI and `eksctl`, which is the command-line tool used to create and delete Kubernetes clusters on EKS, and `kubectl`, which is the command-line tool used to manage Kubernetes from your workstation. + + If you are running [EKS-Anywhere](https://aws.amazon.com/eks/eks-anywhere/), CockroachDB requires that you [configure your default storage class](https://kubernetes.io/docs/tasks/administer-cluster/change-default-storage-class/) to auto-provision persistent volumes. Alternatively, you can define a custom storage configuration as required by your install pattern. + +1. From your local workstation, start the Kubernetes cluster: + + To ensure that all 3 nodes can be placed into a different availability zone, you may want to first [confirm that at least 3 zones are available in the region](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#availability-zones-describe) for your account. + + Cluster provisioning usually takes between 10 and 15 minutes. Do not move on to the next step until you see a message like `[✔] EKS cluster "cockroachdb" in "us-east-1" region is ready` and details about your cluster. + + {% include_cached copy-clipboard.html %} + ~~~ shell + eksctl create cluster \ + --name cockroachdb \ + --nodegroup-name standard-workers \ + --node-type m6i.xlarge \ + --nodes 3 \ + --nodes-min 1 \ + --nodes-max 4 \ + --node-ami auto + ~~~ + + This creates EKS instances and joins them into a single Kubernetes cluster named `cockroachdb`. The `--node-type` flag tells the node pool to use the [m6i.xlarge](https://aws.amazon.com/ec2/instance-types/) instance type (4 vCPUs, 16 GB memory), which meets our [recommended CPU and memory configuration]({% link {{ page.version.version }}/recommended-production-settings.md %}#basic-hardware-recommendations). + + {{site.data.alerts.callout_info}} + Consider creating another, dedicated node group for the operator pod for system resource availability. + {{site.data.alerts.end}} + +1. Open the [AWS CloudFormation console](https://console.aws.amazon.com/cloudformation/home) to verify that the stacks `eksctl-cockroachdb-cluster` and `eksctl-cockroachdb-nodegroup-standard-workers` were successfully created. Be sure that your region is selected in the console. + +### Hosted AKS + +1. Complete the **Before you begin**, **Define environment variables**, and **Create a resource groups** steps described in the [AKS quickstart guide](https://learn.microsoft.com/azure/aks/learn/quick-kubernetes-deploy-cli). This includes setting up the Azure CLI and the `az` tool, which is the command-line tool to create and manage Azure cloud resources. + + Set the environment variables as desired for your CockroachDB deployment. For these instructions, set the `MY_AKS_CLUSTER_NAME` variable to `cockroachdb`. + + Do not follow the **Create an AKS cluster** steps or following sections of the AKS quickstart guide, as these topics will be described specifically for CockroachDB in this documentation. + +1. From your workstation, create the Kubernetes cluster: + + {% include_cached copy-clipboard.html %} + ~~~ shell + az aks create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $MY_AKS_CLUSTER_NAME \ + --node-count 3 \ + --generate-ssh-keys + ~~~ + +1. Create an application in your Azure tenant and create a secret named `azure-cluster-identity-credentials-secret` that contains `AZURE_CLIENT_ID` and `AZURE_CLIENT_SECRET` to hold the application credentials. You can use the following example YAML to define this application: + + ~~~ yaml + apiVersion: v1 + kind: Secret + metadata: + name: azure-cluster-identity-credentials-secret + type: Opaque + stringData: + azure-credentials: | + azure_client_id: 11111111-1111-1111-1111-111111111111 + azure_client_secret: s3cr3t + ~~~ + + For more information on how to use these variables, refer to the [`Azure.Identity` documentation](https://learn.microsoft.com/dotnet/api/azure.identity.environmentcredential?view=azure-dotnet). + +### Bare metal deployments + +For bare metal deployments, the specific Kubernetes infrastructure deployment steps should be similar to those described in [Hosted GKE](#hosted-gke) and [Hosted EKS](#hosted-eks). + +- You must plan a hierarchy of [locality labels](#localities) that suit your CockroachDB node distribution, then apply these labels individually to nodes when they are initialized. Although you can set most of these values arbitrarily, you must set region and zone locations in the reserved `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` namespaces, respectively. + +## Step 2. Start CockroachDB + +### Install the operator sub-chart + +1. Check out the CockroachDB Helm repository from GitHub: + + {% include_cached copy-clipboard.html %} + ~~~ shell + git clone https://github.com/cockroachdb/helm-charts.git + ~~~ + +1. Set your environment variables. This step is optional but recommended in order to use the example commands and templates described in the following instructions. Note the default Kubernetes namespace of `cockroach-ns`. + + {% include_cached copy-clipboard.html %} + ~~~ shell + export CRDBOPERATOR=crdb-operator + export CRDBCLUSTER=cockroachdb + export NAMESPACE=cockroach-ns + ~~~ + +1. Install the operator sub-chart: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl create namespace $NAMESPACE + ~~~ + {% include_cached copy-clipboard.html %} + ~~~ shell + helm install $CRDBOPERATOR ./cockroachdb-parent/charts/operator -n $NAMESPACE + ~~~ + +### Initialize the cluster + +1. Open `cockroachdb-parent/charts/cockroachdb/values.yaml`, a values file that tells Helm how to configure the Kubernetes cluster, in your text editor. + +1. Modify the `cockroachdb.crdbCluster.regions` section to describe the number of CockroachDB nodes to deploy and what region(s) to deploy them in. Replace the default `cloudProvider` with the appropriate value (`gcp`, `aws`, `azure`). For bare metal deployments, you can remove the `cloudProvider` field. The following example initializes three nodes on Google Cloud in the `us-central1` region: + + ~~~ yaml + cockroachdb: + crdbCluster: + regions: + - code: us-central1 + nodes: 3 + cloudProvider: gcp + namespace: cockroach-ns + ~~~ + + {{site.data.alerts.callout_info}} + If you intend to deploy CockroachDB nodes across multiple different regions, follow the additional steps described in [Deploy across multiple regions](#deploy-across-multiple-regions). + {{site.data.alerts.callout_end}} + +1. Uncomment and modify `cockroachdb.crdbCluster.resources` in the values file with the CPU and memory requests and limits for each node to use. The default values are 4vCPU and 16GiB of memory: + + For more information on configuring node resource allocation, refer to [Resource management]({% link {{ page.version.version }}/configure-cockroachdb-operator.md %}) + +1. Modify the TLS configuration as desired. For a secure deployment, set `cockroachdb.tls.enabled` in the values file to `true`. You can either allow the operator to generate self-signed certificates, provide a custom CA certificate and generate other certificates, or use your own certificates. + - **All self-signed certificates**: By default, the certificates are created automatically by a self-signer utility, which requires no configuration beyond setting a custom certificate duration if desired. This utility creates self-signed certificates for the nodes and root client which are stored in a secret. You can see these certificates by running `kubectl get secrets`: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get secrets + ~~~ + ~~~ + crdb-cockroachdb-ca-secret Opaque 2 23s + crdb-cockroachdb-client-secret kubernetes.io/tls 3 22s + crdb-cockroachdb-node-secret kubernetes.io/tls 3 23s + ~~~ + + {{site.data.alerts.callout_info}} + If you are deploying on OpenShift you must also set `cockroachdb.tls.selfSigner.securityContext.enabled` to `false` to mitigate stricter security policies. + {{site.data.alerts.end}} + - **Custom CA certificate**: If you wish to supply your own CA certificates to the deployed nodes but allow automatic generation of client certificates, create a Kubernetes secret with the custom CA certificate. To perform these steps using the `cockroach cert` command: + + {% include_cached copy-clipboard.html %} + ~~~ shell + mkdir certs + ~~~ + {% include_cached copy-clipboard.html %} + ~~~ shell + mkdir my-safe-directory + ~~~ + {% include_cached copy-clipboard.html %} + ~~~ shell + cockroach cert create-ca --certs-dir=certs --ca-key=my-safe-directory/ca.key + ~~~ + + Set `cockroachdb.tls.selfSigner.caProvided` to `true` and specify the secret where the certificate is stored: + + ~~~ yaml + cockroachdb: + tls: + enabled: true + selfSigner: + enabled: true + caProvided: true + caSecret: {ca-secret-name} + ~~~ + + {{site.data.alerts.callout_info}} + If you are deploying on OpenShift you must also set `cockroachdb.tls.selfSigner.securityContext.enabled` to `false` to mitigate stricter security policies. + {{site.data.alerts.end}} + - **All custom certificates**: Set up your certificates and load them into your Kubernetes cluster as secrets using the following commands: + + {% include_cached copy-clipboard.html %} + ~~~ shell + mkdir certs + ~~~ + {% include_cached copy-clipboard.html %} + ~~~ shell + mkdir my-safe-directory + ~~~ + {% include_cached copy-clipboard.html %} + ~~~ shell + cockroach cert create-ca --certs-dir=certs --ca-key=my-safe-directory/ca.key + ~~~ + {% include_cached copy-clipboard.html %} + ~~~ shell + cockroach cert create-client root --certs-dir=certs --ca-key=my-safe-directory/ca.key + ~~~ + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl create secret generic cockroachdb-root --from-file=certs + ~~~ + ~~~ shell + secret/cockroachdb-root created + ~~~ + {% include_cached copy-clipboard.html %} + ~~~ shell + cockroach cert create-node --certs-dir=certs --ca-key=my-safe-directory/ca.key localhost 127.0.0.1 my-release-cockroachdb-public my-release-cockroachdb-public.cockroach-ns my-release-cockroachdb-public.cockroach-ns.svc.cluster.local *.my-release-cockroachdb *.my-release-cockroachdb.cockroach-ns *.my-release-cockroachdb.cockroach-ns.svc.cluster.local + kubectl create secret generic cockroachdb-node --from-file=certs + ~~~ + ~~~ shell + secret/cockroachdb-node created + ~~~ + + {{site.data.alerts.callout_info}} + The subject alternative names are based on a release called `my-release` in the `cockroach-ns` namespace. Make sure they match the services created with the release during Helm install. + {{site.data.alerts.end}} + + If you wish to supply certificates with [cert-manager](https://cert-manager.io/), set `cockroachdb.tls.certManager.enabled` to `true`, and `cockroachdb.tls.certManager.issuer` to an IssuerRef (as they appear in certificate resources) pointing to a clusterIssuer or issuer that you have set up in the cluster: + + ~~~ yaml + cockroachdb: + tls: + enabled: true + certManager: + enabled: true + caConfigMap: cockroachdb-ca + nodeSecret: cockroachdb-node + clientRootSecret: cockroachdb-root + issuer: + group: cert-manager.io + kind: Issuer + name: cockroachdb-cert-issuer + clientCertDuration: 672h + clientCertExpiryWindow: 48h + nodeCertDuration: 8760h + nodeCertExpiryWindow: 168h + ~~~ + + The following Kubernetes application describes an example issuer. + + ~~~ yaml + apiVersion: v1 + kind: Secret + metadata: + name: cockroachdb-ca + namespace: cockroach-ns + data: + tls.crt: [BASE64 Encoded ca.crt] + tls.key: [BASE64 Encoded ca.key] + type: kubernetes.io/tls + --- + apiVersion: cert-manager.io/v1alpha3 + kind: Issuer + metadata: + name: cockroachdb-cert-issuer + namespace: cockroach-ns + spec: + ca: + secretName: cockroachdb-ca + ~~~ + + If your certificates are stored in TLS secrets, such as secrets generated by `cert-manager`, the secret will contain files named: `ca.crt`, `tls.crt`, and `tls.key`. + + For CockroachDB, rename these files as applicable to match the following naming scheme: `ca.crt`, `node.crt`, `node.key`, `client.root.crt`, and `client.root.key`. + + Add the following to the values file: + + ~~~ yaml + cockroachdb: + tls: + enabled: true + externalCertificates: + enabled: true + certificates: + nodeSecretName: {node_secret_name} + nodeClientSecretName: {client_secret_name} + ~~~ + + Replacing the following: + - `{node_secret_name}`: The name of the Kubernetes secret that contains the generated client certificate and key. + - `{client_secret_name}`: The name of the Kubernetes secret that contains the generated node certificate and key. + + For a detailed tutorial of a TLS configuration with manual certificates, refer to [Authenticate with cockroach cert](#authenticate-with-cockroach-cert). + +1. In `cockroachdb.crdbCluster.localityMappings`, provide [locality mappings](#localities) that define locality levels and map them to node labels where the locality information of each Kubernetes node is stored. When CockroachDB is initialized on a node, it processes these values as though they are provided through the [`cockroach start --locality`]({% link {{ page.version.version }}/cockroach-start.md %}#locality) flag. + + If `localityMappings` is not configured, by default the {{ site.data.products.cockroachdb-operator }} uses the `region` and `zone` locality labels, mapped implicitly to the [`topology.kubernetes.io/region`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesioregion) and [`topology.kubernetes.io/zone`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesiozone) node labels. + - In cloud provider deployments, the `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` values on a node are populated by the cloud provider. + - In bare metal deployments, the `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` node label values are not set implicitly by a cloud provider when initializing the node, so you must set them manually or configure custom locality labels. + + To add more granular levels of locality to your nodes or use different locality labels, add custom locality levels as values in the `cockroachdb.crdbCluster.localityMappings` list. Any custom `localityMappings` configuration overrides the default `region` and `zone` configuration, so if you append an additional locality level but wish to keep the `region` and `zone` labels you must declare them manually. + + The following example uses the existing `region` and `zone` labels and adds an additional `datacenter` locality mapping that is more granular than `zone`. This example declares that the `dc` locality information is stored in the `example.datacenter.locality` node label: + + ~~~ yaml + cockroachdb: + crdbCluster: + localityMappings: + - nodeLabel: "topology.kubernetes.io/region" + localityLabel: "region" + - nodeLabel: "topology.kubernetes.io/zone" + localityLabel: "zone" + - nodeLabel: "example.datacenter.locality" + localityLabel: "dc" + ~~~ + + The list of `localityMappings` is processed in a top-down hierarchy, where each entry is processed as a lower locality level than the previous locality. In this example, if a Kubernetes node is initialized in the `us-central1` region, `us-central1-c` zone, and `dc2` datacenter, its `cockroach start --locality` flag would be equivalent to the following: + + ~~~ shell + cockroach start --locality region=us-central1,zone=us-central1-c,dc=dc2 + ~~~ + + Optionally, review the `cockroachdb.crdbCluster.topologySpreadConstraints` configuration and set `topologyKey` to the `nodeLabel` value of a locality level that has distinct values for each node. By default the lowest locality level is `zone`, so the following configuration sets that value as the `topologyKey`: + + ~~~ yaml + cockroachdb: + crdbCluster: + topologySpreadConstraints: + topologyKey: topology.kubernetes.io/zone + ~~~ + + For more information on localities and topology planning, see the [topology patterns documentation]({% link {{ page.version.version }}/topology-patterns.md %}). + +1. Modify other relevant parts of the configuration such as other `topologySpreadConstraints` fields, `service.ports`, and others as needed for your configuration. + +1. Run the following command to install the CockroachDB chart using Helm: + + {% include_cached copy-clipboard.html %} + ~~~ shell + helm install $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb -n $NAMESPACE + ~~~ + + You can override the default parameters using the `--set key=value[,key=value]` argument while installing the chart: + + {% include_cached copy-clipboard.html %} + ~~~ shell + helm install $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --set clusterDomain=cluster-test.local -n $NAMESPACE + ~~~ + +#### Deploy across multiple regions + +The Helm chart supports specifying multiple region definitions in `cockroachdb.crdbCluster.regions` with their respective node counts. You must ensure the required networking is set up to allow for service discovery across regions. Also, ensure that the same CA cert is used across all the regions. + +For each region, modify the `regions` configuration as described in [Initialize the cluster](#initialize-the-cluster) and perform `helm install` against the respective Kubernetes cluster. While applying the installation in a given region, do the following: + +- Verify that the domain matches `cockroachdb.clusterDomain` in the values file. +- Ensure that `cockroachdb.crdbCluster.regions` captures the information for regions that have already been deployed, including the current region. This allows CockroachDB in the current region to connect to clusters deployed in the existing regions. + +The following example shows a configuration across two regions, `us-central1` and `us-east1`, with 3 nodes in each cluster: + +~~~ yaml +cockroachdb: + clusterDomain: cluster.gke.gcp-us-east1 + crdbCluster: + regions: + - code: us-central1 + nodes: 3 + cloudProvider: gcp + domain: cluster.gke.gcp-us-central1 + namespace: cockroach-ns + - code: us-east1 + nodes: 3 + cloudProvider: gcp + domain: cluster.gke.gcp-us-east1 + namespace: cockroach-ns +~~~ + +## Step 3. Use the built-in SQL client + +To use the CockroachDB SQL client, follow these steps to launch a secure pod running the `cockroach` binary. + +1. Download the secure client Kubernetes application: + + {% include_cached copy-clipboard.html %} + ~~~ shell + curl -O https://raw.githubusercontent.com/cockroachdb/helm-charts/master/examples/client-secure.yaml + ~~~ + + {{site.data.alerts.callout_danger}} + This client tool logs into CockroachDB as `root` using the root certificates. + {{site.data.alerts.end}} + +1. Edit the yaml file with the following values: + - `spec.serviceAccountName: my-release-cockroachdb` + - `spec.image: cockroachdb/cockroach:` + - `spec.volumes[0].project.sources[0].secret.name: my-release-cockroachdb-client-secret` + +1. Launch a pod using this file and keep it running indefinitely: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl create -f client-secure.yaml + ~~~ + +1. Get a shell into the pod and start the CockroachDB [built-in SQL client]({% link {{ page.version.version }}/cockroach-sql.md %}): + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl exec -it cockroachdb-client-secure \ + -- ./cockroach sql \ + --certs-dir=/cockroach/cockroach-certs \ + --host=cockroachdb-public + ~~~ + ~~~ shell + # Welcome to the CockroachDB SQL shell. + # All statements must be terminated by a semicolon. + # To exit, type: \q. + # + # Server version: CockroachDB CCL v21.1.0 (x86_64-unknown-linux-gnu, built 2021/04/23 13:54:57, go1.13.14) (same version as client) + # Cluster ID: a96791d9-998c-4683-a3d3-edbf425bbf11 + # + # Enter \? for a brief introduction. + # + root@cockroachdb-public:26257/defaultdb> + ~~~ + + This pod will continue running indefinitely, so any time you need to reopen the built-in SQL client or run any other cockroach client commands (e.g., cockroach node), repeat this step using the appropriate cockroach command. If you'd prefer to delete the pod and recreate it when needed, run `kubectl delete pod cockroachdb-client-secure`. + +1. Run some basic [CockroachDB SQL statements]({% link {{ page.version.version }}/learn-cockroachdb-sql.md %}): + + ~~~ sql + CREATE DATABASE bank; + CREATE TABLE bank.accounts (id INT PRIMARY KEY, balance DECIMAL); + INSERT INTO bank.accounts VALUES (1, 1000.50); + SELECT * FROM bank.accounts; + id | balance + +----+---------+ + 1 | 1000.50 + (1 row) + ~~~ + +1. [Create a user with a password]({% link {{ page.version.version }}/create-user.md %}#create-a-user-with-a-password): + + ~~~ sql + CREATE USER roach WITH PASSWORD 'Q7gc8rEdS'; + ~~~ + + You will need this username and password to access the DB Console later. + +1. Exit the SQL shell and pod: + + ~~~ sql + \q + ~~~ + +## Step 4. Access the DB Console + +To access the cluster's [DB Console]({% link {{ page.version.version }}/ui-overview.md %}): + +1. On secure clusters, [certain pages of the DB Console]({% link {{ page.version.version }}/ui-overview.md %}#db-console-access) can only be accessed by `admin` users. + + Get a shell into the pod and start the CockroachDB [built-in SQL client]({% link {{ page.version.version }}/cockroach-sql.md %}): + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl exec -it cockroachdb-client-secure \ + -- ./cockroach sql \ + --certs-dir=/cockroach/cockroach-certs \ + --host=cockroachdb-public + ~~~ + +1. Assign `roach` to the `admin` role (you only need to do this once): + + ~~~ sql + GRANT admin TO roach; + ~~~ + +1. Exit the SQL shell and pod: + + ~~~ sql + \q + ~~~ + +1. In a new terminal window, port-forward from your local machine to the `cockroachdb-public` service: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl port-forward service/cockroachdb-public 8080 + ~~~ + ~~~ shell + Forwarding from 127.0.0.1:8080 -> 8080 + ~~~ + + Run the `port-forward` command on the same machine as the web browser in which you want to view the DB Console. If you have been running these commands from a cloud instance or other non-local shell, you will not be able to view the UI without configuring `kubectl` locally and running the preceding `port-forward` command on your local machine. + +1. Go to [`https://localhost:8080`](https://localhost:8080/) and log in with the username and password you created earlier. + + {{site.data.alerts.callout_info}} + If you are using Google Chrome, and get an error about not being able to reach `localhost` because its certificate has been revoked, go to `chrome://flags/#allow-insecure-localhost`, enable "Allow invalid certificates for resources loaded from localhost", and then restart the browser. This degrades security for all sites running on `localhost`, not just CockroachDB's DB Console, so enable the feature only temporarily. + {{site.data.alerts.end}} + +1. In the DB Console, verify that the cluster is running as expected: + 1. View the [**Node List**]({% link {{ page.version.version }}/ui-cluster-overview-page.md %}#node-list) to ensure that all nodes successfully joined the cluster. + 1. Click the **Databases** tab on the left to verify that `bank` is listed. + +## Next steps + +Read the following pages for detailed information on cluster scaling, certificate management, resource management, best practices, and other cluster operation details: + +- [Pod scheduling]({% link {{ page.version.version }}/schedule-cockroachdb-operator.md %}) +- [Resource management]({% link {{ page.version.version }}/configure-cockroachdb-operator.md %}) +- [Certificate management]({% link {{ page.version.version }}/secure-cockroachdb-operator.md %}) +- [Cluster scaling]({% link {{ page.version.version }}/scale-cockroachdb-operator.md %}) +- [Cluster monitoring]({% link {{ page.version.version }}/monitor-cockroachdb-operator.md %}) +- [Upgrade a cluster]({% link {{ page.version.version }}/upgrade-cockroachdb-operator.md %}) +- [Override deployment templates]({% link {{ page.version.version }}/override-templates-cockroachdb-operator.md %}) +- [CockroachDB performance on Kubernetes]({% link {{ page.version.version }}/cockroachdb-operator-performance.md %}) + +## Examples + +### Authenticate with `cockroach cert` + +The following example uses [cockroach cert commands]({% link {{ page.version.version }}/cockroach-cert.md %}) to generate and sign the CockroachDB node and client certificates. To learn more about the supported methods of signing certificates, refer to [Authentication]({% link {{ page.version.version }}/authentication.md %}#using-digital-certificates-with-cockroachdb). + +1. Create two directories: + + {% include_cached copy-clipboard.html %} + ~~~ shell + mkdir certs my-safe-directory + ~~~ + +1. Create the CA certificate and key pair: + + {% include_cached copy-clipboard.html %} + ~~~ shell + cockroach cert create-ca \ + --certs-dir=certs \ + --ca-key=my-safe-directory/ca.key + ~~~ + +1. Create a client certificate and key pair for the root user: + + {% include_cached copy-clipboard.html %} + ~~~ shell + cockroach cert create-client root \ + --certs-dir=certs \ + --ca-key=my-safe-directory/ca.key + ~~~ + +1. Upload the client certificate and key to the Kubernetes cluster as a secret, renaming them to the filenames required by the {{ site.data.products.cockroachdb-operator }}: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl create secret generic cockroachdb.client.root \ + --from-file=tls.key=certs/client.root.key \ + --from-file=tls.crt=certs/client.root.crt \ + --from-file=ca.crt=certs/ca.crt + ~~~ + ~~~ shell + secret/cockroachdb.client.root created + ~~~ + +1. Create the certificate and key pair for your CockroachDB nodes, specifying the namespace you used when deploying the cluster. This example uses the `cockroach-ns` namespace: + + {% include_cached copy-clipboard.html %} + ~~~ shell + cockroach cert create-node localhost \ + 127.0.0.1 \ + cockroachdb-public \ + cockroachdb-public.cockroach-ns \ + cockroachdb-public.cockroach-ns.svc.cluster.local \ + *.cockroachdb \ + *.cockroachdb.cockroach-ns \ + *.cockroachdb.cockroach-ns.svc.cluster.local \ + --certs-dir=certs \ + --ca-key=my-safe-directory/ca.key + ~~~ + +1. Upload the node certificate and key to the Kubernetes cluster as a secret, renaming them to the filenames required by the {{ site.data.products.cockroachdb-operator }}: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl create secret generic cockroachdb.node \ + --from-file=tls.key=certs/node.key \ + --from-file=tls.crt=certs/node.crt \ + --from-file=ca.crt=certs/ca.crt + ~~~ + ~~~ shell + secret/cockroachdb.node created + ~~~ + +1. Check that the secrets were created on the cluster: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get secrets + ~~~ + ~~~ shell + NAME TYPE DATA AGE + cockroachdb.client.root Opaque 3 13s + cockroachdb.node Opaque 3 3s + default-token-6js7b kubernetes.io/service-account-token 3 9h + ~~~ + +1. Add `cockroachdb.tls.externalCertificates.certificates.nodeSecretName` and `cockroachdb.tls.externalCertificates.certificates.nodeClientSecretName` to the values file used to deploy the cluster: + + ~~~ yaml + cockroachdb: + tls: + enabled: true + externalCertificates: + enabled: true + certificates: + nodeSecretName: cockroachdb.node + nodeClientSecretName: cockroachdb.client.root + ~~~ diff --git a/src/current/v25.3/deploy-cockroachdb-with-kubernetes-openshift.md b/src/current/v25.3/deploy-cockroachdb-with-kubernetes-openshift.md index 786b68d61e8..2ae415693dc 100644 --- a/src/current/v25.3/deploy-cockroachdb-with-kubernetes-openshift.md +++ b/src/current/v25.3/deploy-cockroachdb-with-kubernetes-openshift.md @@ -6,7 +6,9 @@ secure: true docs_area: --- -This page shows you how to start and stop a secure 3-node CockroachDB cluster on the Red Hat OpenShift platform, using the [CockroachDB Kubernetes Operator](https://marketplace.redhat.com/en-us/products/cockroachdb-operator). +This page shows you how to start and stop a secure 3-node CockroachDB cluster on the Red Hat OpenShift platform, using the [{{ site.data.products.public-operator }}](https://marketplace.redhat.com/en-us/products/cockroachdb-operator). + +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} ## Before you begin @@ -59,7 +61,7 @@ This article assumes you have already installed the OpenShift Container Platform This lets you issue `oc` commands without having to specify the namespace each time. -## Step 2. Install the Operator +## Step 2. Install the {{ site.data.products.public-operator }} 1. Navigate to your OpenShift web console and click **OperatorHub**. @@ -71,7 +73,7 @@ This article assumes you have already installed the OpenShift Container Platform 1. On the **Install Operator** page, select `cockroachdb` in the **Installed Namespace** dropdown and click **Install**. -1. Confirm that the Operator is running: +1. Confirm that the operator is running: {% include_cached copy-clipboard.html %} ~~~ shell @@ -87,13 +89,13 @@ This article assumes you have already installed the OpenShift Container Platform {% capture latest_operator_version %}{% include_cached latest_operator_version.md %}{% endcapture %} -1. When the Operator is ready, click **View Operator** to navigate to the **Installed Operators** page. +1. When the operator is ready, click **View Operator** to navigate to the **Installed Operators** page. 1. In the **CockroachDB Operator** tile, click **Create instance**. OpenShift OperatorHub -1. Make sure **CockroachDB Version** is set to a valid CockroachDB version. For a list of compatible image names, see `spec.containers.env` in the [Operator manifest](https://raw.github.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/install/operator.yaml) on GitHub. +1. Make sure **CockroachDB Version** is set to a valid CockroachDB version. For a list of compatible image names, see `spec.containers.env` in the [pulic operator manifest](https://raw.github.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/install/operator.yaml) on GitHub. 1. This will open the **Create CrdbCluster** page. By default, this deploys a 3-node secure cluster. Leave the other fields unchanged and click **Create**. @@ -120,10 +122,10 @@ This article assumes you have already installed the OpenShift Container Platform To use the CockroachDB SQL client, first launch a secure pod running the `cockroach` binary. -This can be defined with the following YAML, which mounts the Operator's generated certificates: +This can be defined with the following YAML, which mounts the operator's generated certificates: {{site.data.alerts.callout_success}} -`spec.containers.image` should match the **Image** value that is displayed under the **Containers** section on the **Pods** page when you select a CockroachDB pod. Be sure to select a CockroachDB pod and not the Operator pod. +`spec.containers.image` should match the **Image** value that is displayed under the **Containers** section on the **Pods** page when you select a CockroachDB pod. Be sure to select a CockroachDB pod and not the operator pod. Note that OpenShift may display the image SHA instead of the tag. In this case, you should use the SHA for `spec.containers.image`. {{site.data.alerts.end}} @@ -327,7 +329,7 @@ If you want to continue using this cluster, see the documentation on [configurin OpenShift OperatorHub -This will delete the CockroachDB cluster being run by the Operator. It will *not* delete: +This will delete the CockroachDB cluster being run by the operator. It will *not* delete: - The persistent volumes that were attached to the pods. This can be done by deleting the PVCs via **Storage** > **Persistent Volume Claims**. - The opaque secrets used to authenticate the cluster. This can be done via **Workloads** > **Secrets**. diff --git a/src/current/v25.3/deploy-cockroachdb-with-kubernetes.md b/src/current/v25.3/deploy-cockroachdb-with-kubernetes.md index 2220b2f21f6..6b84fc3ce7a 100644 --- a/src/current/v25.3/deploy-cockroachdb-with-kubernetes.md +++ b/src/current/v25.3/deploy-cockroachdb-with-kubernetes.md @@ -9,12 +9,14 @@ docs_area: {% include {{ page.version.version }}/filter-tabs/crdb-single-kubernetes.md %} -This page shows you how to start and stop a secure 3-node CockroachDB cluster in a single [Kubernetes](http://kubernetes.io/) cluster. You can use any of the following approaches: +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} -- [CockroachDB Kubernetes Operator](https://github.com/cockroachdb/cockroach-operator) +This page shows you how to start and stop a secure 3-node CockroachDB cluster in a single [Kubernetes](http://kubernetes.io/) cluster using the following approaches: + +- [{{ site.data.products.public-operator }}](https://github.com/cockroachdb/cockroach-operator) {{site.data.alerts.callout_info}} - The CockroachDB Kubernetes Operator is also available on platforms such as [Red Hat OpenShift]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-openshift.md %}) and [IBM Cloud Pak for Data](https://www.ibm.com/products/cloud-pak-for-data). + The {{ site.data.products.public-operator }} is also available on platforms such as [Red Hat OpenShift]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-openshift.md %}) and [IBM Cloud Pak for Data](https://www.ibm.com/products/cloud-pak-for-data). {{site.data.alerts.end}} - Manual [StatefulSet](http://kubernetes.io/docs/concepts/abstractions/controllers/statefulsets/) configuration @@ -36,14 +38,14 @@ This page shows you how to start and stop a secure 3-node CockroachDB cluster in Choose how you want to deploy and maintain the CockroachDB cluster. {{site.data.alerts.callout_info}} -The [CockroachDB Kubernetes Operator](https://github.com/cockroachdb/cockroach-operator) eases CockroachDB cluster creation and management on a single Kubernetes cluster. +The [Public Kubernetes operator](https://github.com/cockroachdb/cockroach-operator) eases CockroachDB cluster creation and management on a single Kubernetes cluster. -The Operator does not provision or apply an Enterprise license key. To use CockroachDB with the Operator, [set a license]({% link {{ page.version.version }}/licensing-faqs.md %}#set-a-license) in the SQL shell. +The {{ site.data.products.public-operator }} does not provision or apply a license key. To use CockroachDB with the {{ site.data.products.public-operator }}, [set a license]({% link {{ page.version.version }}/licensing-faqs.md %}#set-a-license) in the SQL shell. {{site.data.alerts.end}}
- - + +
diff --git a/src/current/v25.3/kubernetes-overview.md b/src/current/v25.3/kubernetes-overview.md index c0155fed329..db3bd953e08 100644 --- a/src/current/v25.3/kubernetes-overview.md +++ b/src/current/v25.3/kubernetes-overview.md @@ -10,12 +10,14 @@ key: operate-cockroachdb-kubernetes.html Kubernetes is a portable, extensible, open source platform for managing containerized workloads and services. For a given workload, you provide Kubernetes with a configuration, and Kubernetes applies that configuration to all Kubernetes nodes that are running the application. -CockroachDB can be deployed and managed on Kubernetes using the following methods: +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} -- [CockroachDB Kubernetes Operator](https://github.com/cockroachdb/cockroach-operator) +You can also deploy CockroachDB on Kubernetes using the following methods: + +- [{{ site.data.products.public-operator }}](https://github.com/cockroachdb/cockroach-operator) {{site.data.alerts.callout_info}} - The CockroachDB Kubernetes Operator is also available on platforms such as [Red Hat OpenShift]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-openshift.md %}) and [IBM Cloud Pak for Data](https://www.ibm.com/products/cloud-pak-for-data). + The {{ site.data.products.public-operator }} is also available on platforms such as [Red Hat OpenShift]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-openshift.md %}) and [IBM Cloud Pak for Data](https://www.ibm.com/products/cloud-pak-for-data). {{site.data.alerts.end}} - Manual [StatefulSet](http://kubernetes.io/docs/concepts/abstractions/controllers/statefulsets/) configuration diff --git a/src/current/v25.3/kubernetes-performance.md b/src/current/v25.3/kubernetes-performance.md index 47aef82797d..6fa4d534442 100644 --- a/src/current/v25.3/kubernetes-performance.md +++ b/src/current/v25.3/kubernetes-performance.md @@ -7,6 +7,10 @@ docs_area: deploy Kubernetes provides many useful abstractions for deploying and operating distributed systems, but some of the abstractions come with a performance overhead and an increase in underlying system complexity. This page explains potential bottlenecks to be aware of when [running CockroachDB in Kubernetes]({% link {{ page.version.version }}/kubernetes-overview.md %}) and shows you how to optimize your deployment for better performance. +This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [CockroachDB Performance on Kubernetes with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/cockroachdb-operator-performance.md %}). + +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} +
## Before you begin diff --git a/src/current/v25.3/migrate-cockroachdb-kubernetes-helm.md b/src/current/v25.3/migrate-cockroachdb-kubernetes-helm.md new file mode 100644 index 00000000000..5a8ece3cb28 --- /dev/null +++ b/src/current/v25.3/migrate-cockroachdb-kubernetes-helm.md @@ -0,0 +1,252 @@ +--- +title: Migrate from Helm StatefulSet +summary: Migration guide detailing how to migrate away from a Helm deployment of CockroachDB to the {{ site.data.products.cockroachdb-operator }}. +toc: true +toc_not_nested: true +secure: true +docs_area: deploy +--- + +This guide describes how to migrate an existing CockroachDB cluster managed via StatefulSet to the {{ site.data.products.cockroachdb-operator }}. + +{{site.data.alerts.callout_info}} +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +{{site.data.alerts.end}} + +These instructions assume that you are migrating from a StatefulSet cluster that was configured using the Helm chart with the following command: + +~~~ shell +helm upgrade --install --set operator.enabled=false crdb-test --debug ./cockroachdb +~~~ + +{{site.data.alerts.callout_success}} +If your existing cluster was created using the {{ site.data.products.public-operator }}, refer to the [{{ site.data.products.public-operator }} migration guide]({% link {{ page.version.version }}/migrate-cockroachdb-kubernetes-operator.md %}). +{{site.data.alerts.end}} + +This migration can be completed without affecting cluster availability, and preserves existing disks so that data doesn't need to be replicated into empty volumes. The process scales down the StatefulSet by one node before adding each operator-managed pod, so the maximum cluster capacity will be reduced by one node periodically throughout the migration. + +{{site.data.alerts.callout_danger}} +Commands that use RPCs (such as `cockroach node drain` and `cockroach node decommission`) will be unavailable until the public service is updated in step 4. The {{ site.data.products.cockroachdb-operator }} uses a different port than StatefulSets for RPC services, causing these commands to fail for a limited time. +{{site.data.alerts.end}} + +## Step 1. Prepare the migration helper + +In the root of the [cockroachdb/helm-charts](https://github.com/cockroachdb/helm-charts/tree/master) repository, build the migration helper and add the `./bin` directory to your PATH: + +{% include_cached copy-clipboard.html %} +~~~ shell +make bin/migration-helper +export PATH=$PATH:$(pwd)/bin +~~~ + +Export environment variables for the existing deployment: + +- Set STS_NAME to the cockroachdb statefulset deployed via helm chart: + {% include_cached copy-clipboard.html %} + ~~~ shell + export STS_NAME="crdb-example-cockroachdb" + ~~~ + +- Set NAMESPACE to the namespace where the statefulset is installed: + {% include_cached copy-clipboard.html %} + ~~~ shell + export NAMESPACE="default" + ~~~ + +- Set CLOUD_PROVIDER to the cloud vendor where Kubernetes cluster is residing. All major cloud providers are supported (gcp, aws, azure): + {% include_cached copy-clipboard.html %} + ~~~ shell + export CLOUD_PROVIDER=gcp + ~~~ + +- Set REGION to the cloud provider's identifier of this region. This region must match the "topology.kubernetes.io/region" label in the Kubernetes nodes for this cluster: + {% include_cached copy-clipboard.html %} + ~~~ shell + export REGION=us-central1 + ~~~ + +## Step 2. Generate manifests with the migration helper + +The operator uses slightly different certificates than the CockroachDB Helm chart, and mounts them in configmaps and secrets with different names. Use the migration helper utility with the `migrate-certs` option to re-map and generate TLS certificates: + +{% include_cached copy-clipboard.html %} +~~~ shell +bin/migration-helper migrate-certs --statefulset-name $STS_NAME --namespace $NAMESPACE +~~~ + +Generate a manifest for each crdbnode and the crdbcluster based on the state of the StatefulSet. The new pods and their associated PVCs must have the same names as the original StatefulSet-managed pods and PVCs. The new operator-managed pods will then use the original PVCs, rather than replicate data into empty nodes. + +{% include_cached copy-clipboard.html %} +~~~ shell +mkdir -p manifests +bin/migration-helper build-manifest helm --statefulset-name $STS_NAME --namespace $NAMESPACE --cloud-provider $CLOUD_PROVIDER --cloud-region $REGION --output-dir ./manifests +~~~ + +## Step 3. Replace statefulset pods with operator nodes + +To migrate seamlessly from the CockroachDB Helm chart to the operator, scale down StatefulSet-managed pods and replace them with crdbnode objects, one by one. Then we’ll create the crdbcluster object that manages the crdbnodes. + +Create objects with `kubectl` that will eventually be owned by the crdbcluster: + +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl create priorityclass crdb-critical --value 500000000 +~~~ + +Install the `crdb-operator` with Helm: + +{% include_cached copy-clipboard.html %} +~~~ shell +helm upgrade --install crdb-operator ./cockroachdb-parent/charts/operator +~~~ + +For each pod in the StatefulSet, perform the following steps: + +1. Scale the StatefulSet down by one replica. For example, for a five-node cluster, scale the StatefulSet down to four replicas: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl scale statefulset/$STS_NAME --replicas=4 + ~~~ + +2. Create the `crdbnode` resource that corresponds to the StatefulSet pod you just scaled down. Each manifest is labeled with the pattern `crdbnode-X.yaml`, where `X` corresponds to a StatefulSet pod named `{STS_NAME}-X`. Note the pod that was scaled down and specify its manifest in a command like the following: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl apply -f manifests/crdbnode-4.yaml + ~~~ + +3. Wait for the new pod to become ready. If it doesn’t, [check the operator logs]({% link {{ page.version.version }}/monitor-cockroachdb-operator.md %}#monitor-the-operator) for errors. + +4. Before moving on to the next replica migration, verify that there are no underreplicated ranges: + 1. Set up port forwarding to access the CockroachDB node’s HTTP interface. Note that the DB Console runs on port 8080 by default: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl port-forward pod/cockroachdb-4 8080:8080 + ~~~ + + 2. Check that there are zero underreplicated ranges. The following command outputs the number of under-replicated ranges on this CockroachDB node: + + ~~~ shell + curl --insecure -s https://localhost:8080/_status/vars | grep "ranges_underreplicated{" | awk '{print $2}' + ~~~ + +Repeat these steps until the StatefulSet has zero replicas. + +{{site.data.alerts.callout_danger}} +If there are issues with the migration and you need to revert back to the previous deployment, follow the [rollback process](#roll-back-a-migration-in-progress). +{{site.data.alerts.end}} + +## Step 4. Update the public service + +The Helm chart creates a public Service that exposes both SQL and gRPC connections over a single power. However, the operator uses a different port for gRPC communication. To ensure compatibility, update the public Service to reflect the correct gRPC port used by the operator. + +Apply the updated Service manifest: + +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl apply -f manifests/public-service.yaml +~~~ + +The existing StatefulSet creates a PodDisruptionBudget (PDB) that conflicts with the one managed by the operator. To avoid this conflict, delete the existing PDB: + +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl delete poddisruptionbudget $STS_NAME-budget +~~~ + +## Step 5. Deploy the crdbcluster object + +Delete the StatefulSet that was scaled down to zero, as the Helm upgrade can only proceed if no StatefulSet is present: + +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl delete statefulset $STS_NAME +~~~ + +Apply the crdbcluster manifest using Helm: + +{% include_cached copy-clipboard.html %} +~~~ shell +helm upgrade $RELEASE_NAME ./cockroachdb-parent/charts/cockroachdb -f manifests/values.yaml +~~~ + +## Roll back a migration in progress + +If the migration to the {{ site.data.products.cockroachdb-operator}} fails during the stage where you are applying the generated `crdbnode` manifests, follow the steps below to safely restore the original state using the previously backed-up resources and preserved volumes. This assumes the StatefulSet and PVCs are not deleted. + +1. Delete the applied `crdbnode` resources and simultaneously scale the StatefulSet back up. + + Delete the individual `crdbnode` manifests in the reverse order of their creation (starting with the last one created, e.g., `crdbnode-1.yaml`) and scale the StatefulSet back to its original replica count (e.g., 2). For example, assuming you have applied two `crdbnode` yaml files (`crdbnode-2.yaml` and `crdbnode-1.yaml`): + + 1. Delete a `crdbnode` manifest in reverse order, starting with `crdbnode-1.yaml`. + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete -f manifests/crdbnode-1.yaml + ~~~ + + 1. Scale the StatefulSet replica count up by one (to 2). + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl scale statefulset $CRDBCLUSTER --replicas=2 + ~~~ + + 1. Verify that data has propagated by waiting for there to be zero under-replicated ranges: + + 1. Set up port forwarding to access the CockroachDB node's HTTP interface, replacing `cockroachdb-X` with the node name: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl port-forward pod/cockroachdb-X 8080:8080 + ~~~ + + The DB Console runs on port 8080 by default. + + 1. Check the `ranges_underreplicated` metric: + + {% include_cached copy-clipboard.html %} + ~~~ shell + curl --insecure -s https://localhost:8080/_status/vars | grep "ranges_underreplicated{" | awk ' {print $2}' + ~~~ + + This command outputs the number of under-replicated ranges on the node, which should be zero before proceeding with the next node. This may take some time depending on the deployment, but is necessary to ensure that there is no downtime in data availability. + + 1. Repeat steps a through c for each node, deleting the `crdbnode-2.yaml`, scaling replica count to 3, and so on. + + Repeat the `kubectl delete -f ... command` for each `crdbnode` manifest you applied during migration. Make sure to verify that there are no underreplicated ranges after rolling back each node. + +1. Delete the PriorityClass and RBAC resources created for the CockroachDB operator: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete priorityclass crdb-critical + kubectl delete -f manifests/rbac.yaml + ~~~ + +1. Uninstall the {{ site.data.products.cockroachdb-operator }}: + + {% include_cached copy-clipboard.html %} + ~~~ shell + helm uninstall crdb-operator + ~~~ + +1. Clean up {{ site.data.products.cockroachdb-operator }} resources and custom resource definitions: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete crds crdbnodes.crdb.cockroachlabs.com + kubectl delete crds crdbtenants.crdb.cockroachlabs.com + kubectl delete serviceaccount cockroachdb-sa + kubectl delete service cockroach-webhook-service + kubectl delete validatingwebhookconfiguration cockroach-webhook-config + ~~~ + +1. Confirm that all CockroachDB pods are "Running" or "Ready" as shown with the following command: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods + ~~~ diff --git a/src/current/v25.3/migrate-cockroachdb-kubernetes-operator.md b/src/current/v25.3/migrate-cockroachdb-kubernetes-operator.md new file mode 100644 index 00000000000..2f0fb8f9092 --- /dev/null +++ b/src/current/v25.3/migrate-cockroachdb-kubernetes-operator.md @@ -0,0 +1,320 @@ +--- +title: Migrate from the Public Operator +summary: Migration guide detailing how to migrate away from a deployment using the public Kubernetes operator to the CockroachDB operator. +toc: true +toc_not_nested: true +secure: true +docs_area: deploy +--- + +This guide describes how to migrate an existing CockroachDB cluster managed via the {{ site.data.products.public-operator }} to the {{ site.data.products.cockroachdb-operator }}. + +{{site.data.alerts.callout_info}} +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +{{site.data.alerts.end}} + +These instructions assume that you are migrating from a {{ site.data.products.public-operator }} cluster that is managed with kubectl via the following yaml files: + +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl apply -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v2.17.0/install/crds.yaml +kubectl apply -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v2.17.0/install/operator.yaml +kubectl apply -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v2.17.0/examples/example.yaml +~~~ + +{{site.data.alerts.callout_success}} +If your existing cluster was created as a StatefulSet using Helm, refer to the [Helm migration guide]({% link {{ page.version.version }}/migrate-cockroachdb-kubernetes-helm.md %}). +{{site.data.alerts.end}} + +This migration process can be completed without affecting cluster availability, and preserves existing disks so that data doesn’t need to be replicated into empty volumes. This process scales down the StatefulSet by one node before adding each operator-managed pod, so the maximum cluster capacity will be reduced by one node periodically throughout the migration. + +## Step 1. Prepare the migration helper + +In the root of the [cockroachdb/helm-charts](https://github.com/cockroachdb/helm-charts/tree/master) repository, build the migration helper and add the `./bin` directory to your PATH: + +{% include_cached copy-clipboard.html %} +~~~ shell +make bin/migration-helper +export PATH=$PATH:$(pwd)/bin +~~~ + +Export environment variables for the existing deployment: + +- Set CRDBCLUSTER to the crdbcluster custom resource name in the {{ site.data.products.public-operator }}: + {% include_cached copy-clipboard.html %} + ~~~ shell + export CRDBCLUSTER="cockroachdb" + ~~~ + +- Set NAMESPACE to the namespace where the statefulset is installed: + {% include_cached copy-clipboard.html %} + ~~~ shell + export NAMESPACE="default" + ~~~ + +- Set CLOUD_PROVIDER to the cloud vendor where Kubernetes cluster is residing. All major cloud providers are supported (gcp, aws, azure): + {% include_cached copy-clipboard.html %} + ~~~ shell + export CLOUD_PROVIDER=gcp + ~~~ + +- Set REGION to the cloud provider's identifier of this region. This region must match the "topology.kubernetes.io/region" label in the Kubernetes nodes for this cluster: + {% include_cached copy-clipboard.html %} + ~~~ shell + export REGION=us-central1 + ~~~ + +Back up the crdbcluster resource in case there is a need to revert: + +{% include_cached copy-clipboard.html %} +~~~ shell +mkdir -p backup +kubectl get crdbcluster -o yaml $CRDBCLUSTER > backup/crdbcluster-$CRDBCLUSTER.yaml +~~~ + +## Step 2. Generate manifests with the migration helper + +The {{ site.data.products.cockroachdb-operator }} uses slightly different certificates than the {{ site.data.products.public-operator }}, and mounts them in configmaps and secrets with different names. Use the migration helper utility with the `migrate-certs` option to re-map and generate TLS certificates: + +{% include_cached copy-clipboard.html %} +~~~ shell +bin/migration-helper migrate-certs --statefulset-name $STS_NAME --namespace $NAMESPACE +~~~ + +Generate a manifest for each crdbnode and the crdbcluster based on the state of the StatefulSet. The new pods and their associated PVCs must have the same names as the original StatefulSet-managed pods and PVCs. The new {{ site.data.products.cockroachdb-operator }}-managed pods will then use the original PVCs, rather than replicate data into empty nodes. + +{% include_cached copy-clipboard.html %} +~~~ shell +mkdir -p manifests +bin/migration-helper build-manifest helm --statefulset-name $STS_NAME --namespace $NAMESPACE --cloud-provider $CLOUD_PROVIDER --cloud-region $REGION --output-dir ./manifests +~~~ + +## Step 3. Uninstall and replace the {{ site.data.products.public-operator }} + +The {{ site.data.products.public-operator }} and the {{ site.data.products.cockroachdb-operator }} use custom resource definitions with the same names, so you must remove the {{ site.data.products.public-operator }} before installing the {{ site.data.products.cockroachdb-operator }}. Run the following commands to uninstall the {{ site.data.products.public-operator }}, without deleting its managed resources: + +- Ensure that the operator can't accidentally delete managed Kubernetes objects: + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete clusterrolebinding cockroach-operator-rolebinding + ~~~ + +- Delete the {{ site.data.products.public-operator }} custom resource: + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete crdbcluster $CRDBCLUSTER --cascade=orphan + ~~~ + + The `--cascade=orphan` flag tells Kubernetes not to delete the dependent resources (StatefulSets, Services, PVCs, etc.) created by the `CrdbCluster` custom resource. This ensures that only the parent custom resource is deleted, while child resources are left intact in the cluster. This allows the CockroachDB cluster to continue running as a StatefulSet until the migration is complete. + +- Delete {{ site.data.products.public-operator }} resources and custom resource definition: + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v2.17.0/install/crds.yaml + kubectl delete serviceaccount cockroach-operator-sa -n cockroach-operator-system + kubectl delete clusterrole cockroach-operator-role + kubectl delete clusterrolebinding cockroach-operator-rolebinding + kubectl delete service cockroach-operator-webhook-service -n cockroach-operator-system + kubectl delete deployment cockroach-operator-manager -n cockroach-operator-system + kubectl delete mutatingwebhookconfigurations cockroach-operator-mutating-webhook-configuration + kubectl delete validatingwebhookconfigurations cockroach-operator-validating-webhook-configuration + ~~~ + +Run `helm upgrade` to install the {{ site.data.products.cockroachdb-operator }} and wait for it to become ready: + +{% include_cached copy-clipboard.html %} +~~~ shell +helm upgrade --install crdb-operator ./cockroachdb-parent/charts/operator +kubectl rollout status deployment/cockroach-operator --timeout=60s +~~~ + +## Step 4. Replace statefulset pods with operator-managed nodes + +To migrate seamlessly from the {{ site.data.products.public-operator }} to the {{ site.data.products.cockroachdb-operator }}, scale down StatefulSet-managed pods and replace them with crdbnode objects, one by one. Then we’ll create the crdbcluster object that manages the crdbnodes. + +Create objects with `kubectl` that will eventually be owned by the crdbcluster: + +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl create priorityclass crdb-critical --value 500000000 +kubectl apply -f manifests/rbac.yaml +~~~ + +Install the `crdb-operator` with Helm: + +{% include_cached copy-clipboard.html %} +~~~ shell +helm upgrade --install crdb-operator ./cockroachdb-parent/charts/operator +~~~ + +For each pod in the StatefulSet, perform the following steps: + +1. Scale the StatefulSet down by one replica. For example, for a five-node cluster, scale the StatefulSet down to four replicas: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl scale statefulset/$STS_NAME --replicas=4 + ~~~ + +1. Create the `crdbnode` resource that corresponds to the StatefulSet pod you just scaled down. Each manifest is labeled with the pattern `crdbnode-X.yaml`, where `X` corresponds to a StatefulSet pod named `{STS_NAME}-X`. Note the pod that was scaled down and specify its manifest in a command like the following: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl apply -f manifests/crdbnode-4.yaml + ~~~ + +1. Wait for the new pod to become ready. If it doesn’t, [check the operator logs]({% link {{ page.version.version }}/monitor-cockroachdb-operator.md %}#monitor-the-operator) for errors. + +1. Before moving on to the next replica migration, verify that there are no underreplicated ranges: + 1. Set up port forwarding to access the CockroachDB node’s HTTP interface. Note that the DB Console runs on port 8080 by default: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl port-forward pod/cockroachdb-4 8080:8080 + ~~~ + + 1. Check that there are zero underreplicated ranges. The following command outputs the number of under-replicated ranges on this CockroachDB node: + + {% include_cached copy-clipboard.html %} + ~~~ shell + curl --insecure -s https://localhost:8080/_status/vars | grep "ranges_underreplicated{" | awk '{print $2}' + ~~~ + +Repeat these steps until the StatefulSet has zero replicas. + +{{site.data.alerts.callout_danger}} +If there are issues with the migration and you need to revert back to the previous deployment, follow the [rollback process](#roll-back-a-migration-in-progress). +{{site.data.alerts.end}} + +## Step 5. Update the crdbcluster manifest + +The {{ site.data.products.public-operator }} creates a pod disruption budget that conflicts with a pod disruption budget managed by the {{ site.data.products.cockroachdb-operator }}. Before applying the crdbcluster manifest, delete the existing pod disruption budget: + +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl delete poddisruptionbudget $CRDBCLUSTER +~~~ + +Annotate the existing Kubernetes objects so they can managed by the Helm chart: + +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl annotate service $CRDBCLUSTER-public meta.helm.sh/release-name="$CRDBCLUSTER" +kubectl annotate service $CRDBCLUSTER-public meta.helm.sh/release-namespace="$NAMESPACE" +kubectl label service $CRDBCLUSTER-public app.kubernetes.io/managed-by=Helm --overwrite=true +~~~ + +Apply the crdbcluster manifest: + +{% include_cached copy-clipboard.html %} +~~~ shell +helm install $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb -f manifests/values.yaml +~~~ + +Once the migration is successful, delete the StatefulSet that was created by the {{ site.data.products.public-operator }}: + +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl delete poddisruptionbudget $STS_NAME-budget +~~~ + +## Roll back a migration in progress + +If the migration to the {{ site.data.products.cockroachdb-operator}} fails during the stage where you are applying the generated `crdbnode` manifests, follow the steps below to safely restore the original state using the previously backed-up resources and preserved volumes. This assumes the StatefulSet and PVCs are not deleted. + +1. Delete the applied `crdbnode` resources and simultaneously scale the StatefulSet back up. + + Delete the individual `crdbnode` manifests in the reverse order of their creation (starting with the last one created, e.g., `crdbnode-1.yaml`) and scale the StatefulSet back to its original replica count (e.g., 2). For example, assuming you have applied two `crdbnode` yaml files (`crdbnode-2.yaml` and `crdbnode-1.yaml`): + + 1. Delete a `crdbnode` manifest in reverse order, starting with `crdbnode-1.yaml`. + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete -f manifests/crdbnode-1.yaml + ~~~ + + 1. Scale the StatefulSet replica count up by one (to 2). + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl scale statefulset $CRDBCLUSTER --replicas=2 + ~~~ + + 1. Verify that data has propagated by waiting for there to be zero under-replicated ranges: + + 1. Set up port forwarding to access the CockroachDB node's HTTP interface, replacing `cockroachdb-X` with the node name: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl port-forward pod/cockroachdb-X 8080:8080 + ~~~ + + The DB Console runs on port 8080 by default. + + 1. Check the `ranges_underreplicated` metric: + + {% include_cached copy-clipboard.html %} + ~~~ shell + curl --insecure -s https://localhost:8080/_status/vars | grep "ranges_underreplicated{" | awk ' {print $2}' + ~~~ + + This command outputs the number of under-replicated ranges on the node, which should be zero before proceeding with the next node. This may take some time depending on the deployment, but is necessary to ensure that there is no downtime in data availability. + + 1. Repeat steps a through c for each node, deleting the `crdbnode-2.yaml`, scaling replica count to 3, and so on. + + Repeat the `kubectl delete -f ... command` for each `crdbnode` manifest you applied during migration. Make sure to verify that there are no underreplicated ranges after rolling back each node. + +1. Delete the PriorityClass and RBAC resources created for the CockroachDB operator: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete priorityclass crdb-critical + kubectl delete -f manifests/rbac.yaml + ~~~ + +1. Uninstall the {{ site.data.products.cockroachdb-operator }}: + + {% include_cached copy-clipboard.html %} + ~~~ shell + helm uninstall crdb-operator + ~~~ + +1. Clean up {{ site.data.products.cockroachdb-operator }} resources and custom resource definitions: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete crds crdbnodes.crdb.cockroachlabs.com + kubectl delete crds crdbtenants.crdb.cockroachlabs.com + kubectl delete serviceaccount cockroachdb-sa + kubectl delete service cockroach-webhook-service + kubectl delete validatingwebhookconfiguration cockroach-webhook-config + ~~~ + +1. Restore the {{ site.data.products.public-operator }}: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl apply -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v2.17.0/install/crds.yaml + kubectl apply -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v2.17.0/install/operator.yaml + ~~~ + + Wait for the operator pod to be "Running" as shown with the following command: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods -n cockroach-operator-system + ~~~ + +1. Restore the original `crdbcluster` custom resource: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl apply -f backup/crdbcluster-$CRDBCLUSTER.yaml + ~~~ + +1. Confirm that all CockroachDB pods are "Running" or "Ready" as shown with the following command: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods + ~~~ diff --git a/src/current/v25.3/monitor-cockroachdb-kubernetes.md b/src/current/v25.3/monitor-cockroachdb-kubernetes.md index b8b24929e4b..91f82dbcfaf 100644 --- a/src/current/v25.3/monitor-cockroachdb-kubernetes.md +++ b/src/current/v25.3/monitor-cockroachdb-kubernetes.md @@ -12,9 +12,13 @@ This article assumes you have already [deployed CockroachDB on a single Kubernet Despite CockroachDB's various [built-in safeguards against failure]({% link {{ page.version.version }}/architecture/replication-layer.md %}), it is critical to actively monitor the overall health and performance of a cluster running in production and to create alerting rules that promptly send notifications when there are events that require investigation or intervention. +This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Cluster Monitoring with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/monitor-cockroachdb-operator.md %}). + +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} +
- - + +
@@ -132,7 +136,7 @@ If you're on Hosted GKE, before starting, make sure the email address associated ~~~ {{site.data.alerts.callout_info}} - By default, this manifest uses the secret name generated by the CockroachDB Kubernetes Operator. If you generated your own certificates and keys when [starting CockroachDB]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#step-2-start-cockroachdb), be sure that `ca.secret.name` matches the name of the node secret you created. + By default, this manifest uses the secret name generated by the {{ site.data.products.public-operator }}. If you generated your own certificates and keys when [starting CockroachDB]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#step-2-start-cockroachdb), be sure that `ca.secret.name` matches the name of the node secret you created. {{site.data.alerts.end}} 1. Apply the Prometheus manifest. This creates the various objects necessary to run a Prometheus instance: @@ -286,7 +290,7 @@ Active monitoring helps you spot problems early, but it is also essential to sen ## Configure logging -When running CockroachDB v21.1 and later, you can use the Operator to configure the CockroachDB logging system. This allows you to output logs to [configurable log sinks] (configure-logs.html#configure-log-sinks) such as file or network logging destinations. +When running CockroachDB v21.1 and later, you can use the {{ site.data.products.public-operator }} to configure the CockroachDB logging system. This allows you to output logs to [configurable log sinks] (configure-logs.html#configure-log-sinks) such as file or network logging destinations. {{site.data.alerts.callout_info}} By default, Kubernetes deployments running CockroachDB v20.2 or earlier output all logs to `stderr`. @@ -325,14 +329,14 @@ The above configuration overrides the [default logging configuration]({% link {{ - Save debug-level logs (the `DEV` [log channel]({% link {{ page.version.version }}/logging-overview.md %}#logging-channels)) to disk for troubleshooting. - Send operational- and security-level logs to a [network collector]({% link {{ page.version.version }}/logging-use-cases.md %}#network-logging), in this case [Fluentd]({% link {{ page.version.version }}/configure-logs.md %}#fluentd-logging-format). -The ConfigMap `name` must match the `logConfigMap` object of the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +The ConfigMap `name` must match the `logConfigMap` object of the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): ~~~ yaml spec: logConfigMap: logconfig ~~~ -By default, the Operator also modifies the [default logging configuration]({% link {{ page.version.version }}/configure-logs.md %}#default-logging-configuration) with the following: +By default, the {{ site.data.products.public-operator }} also modifies the [default logging configuration]({% link {{ page.version.version }}/configure-logs.md %}#default-logging-configuration) with the following: ~~~ yaml sinks: @@ -347,7 +351,7 @@ This outputs logging events in the [`OPS`]({% link {{ page.version.version }}/lo In this example, CockroachDB has already been deployed on a Kubernetes cluster. We override the [default logging configuration]({% link {{ page.version.version }}/configure-logs.md %}#default-logging-configuration) to output [`DEV`]({% link {{ page.version.version }}/logging.md %}#dev) logs to a `cockroach-dev.log` file. -1. Create a ConfigMap named `logconfig`. Note that `namespace` is set to the Operator's default namespace (`cockroach-operator-system`): +1. Create a ConfigMap named `logconfig`. Note that `namespace` is set to the {{ site.data.products.public-operator }}'s default namespace (`cockroach-operator-system`): {% include_cached copy-clipboard.html %} ~~~ yaml @@ -384,7 +388,7 @@ In this example, CockroachDB has already been deployed on a Kubernetes cluster. configmap/logconfig created ~~~ -1. Add the `name` of the ConfigMap in `logConfigMap` to the [Operator's custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +1. Add the `name` of the ConfigMap in `logConfigMap` to the [{{ site.data.products.public-operator }}'s custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): {% include_cached copy-clipboard.html %} ~~~ yaml diff --git a/src/current/v25.3/monitor-cockroachdb-operator.md b/src/current/v25.3/monitor-cockroachdb-operator.md new file mode 100644 index 00000000000..16ba0212569 --- /dev/null +++ b/src/current/v25.3/monitor-cockroachdb-operator.md @@ -0,0 +1,361 @@ +--- +title: Cluster Monitoring with the CockroachDB Operator +summary: How to monitor a secure CockroachDB cluster deployed with the CockroachDB operator. +toc: true +toc_not_nested: true +docs_area: deploy +--- + +Despite CockroachDB's various [built-in safeguards against failure]({% link {{ page.version.version }}/architecture/replication-layer.md %}), it is critical to actively monitor the overall health and performance of a cluster running in production and to create alerting rules that promptly send notifications when there are events that require investigation or intervention. + +{{site.data.alerts.callout_info}} +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +{{site.data.alerts.end}} + +## Configure Prometheus + +Every node of a CockroachDB cluster exports granular timeseries metrics formatted for easy integration with [Prometheus](https://prometheus.io/), an open source tool for storing, aggregating, and querying timeseries data. This section shows you how to orchestrate Prometheus as part of your Kubernetes cluster and pull these metrics into Prometheus for external monitoring. + +This guidance is based on [CoreOS's Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator/tree/main), which allows a Prometheus instance to be managed using built-in Kubernetes concepts. + +{{site.data.alerts.callout_info}} +If you're on Hosted GKE, before starting, make sure the email address associated with your Google Cloud account is part of the `cluster-admin` RBAC group, as shown in [Deploy CockroachDB with Kubernetes]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}). +{{site.data.alerts.end}} + +1. From your local workstation, edit the cockroachdb service to add the prometheus: cockroachdb label: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl label svc cockroachdb prometheus=cockroachdb + ~~~ + ~~~ shell + service/cockroachdb labeled + ~~~ + + This ensures that only the cockroachdb (not the cockroach-public service) is being monitored by a Prometheus job. + +1. Determine the latest version of [CoreOS's Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator/releases/) and run the following to download and apply the latest `bundle.yaml` definition file: + + {{site.data.alerts.callout_info}} + Be sure to specify the latest [CoreOS Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator/releases/) version in the following command, in place of this example's use of version `v0.82.0`. + {{site.data.alerts.end}} + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl apply \ + -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.82.0/bundle.yaml \ + --server-side + ~~~ + ~~~ shell + customresourcedefinition.apiextensions.k8s.io/alertmanagers.monitoring.coreos.com serverside-applied + customresourcedefinition.apiextensions.k8s.io/podmonitors.monitoring.coreos.com serverside-applied + customresourcedefinition.apiextensions.k8s.io/probes.monitoring.coreos.com serverside-applied + customresourcedefinition.apiextensions.k8s.io/prometheuses.monitoring.coreos.com serverside-applied + customresourcedefinition.apiextensions.k8s.io/prometheusrules.monitoring.coreos.com serverside-applied + customresourcedefinition.apiextensions.k8s.io/servicemonitors.monitoring.coreos.com serverside-applied + customresourcedefinition.apiextensions.k8s.io/thanosrulers.monitoring.coreos.com serverside-applied + clusterrolebinding.rbac.authorization.k8s.io/prometheus-operator serverside-applied + clusterrole.rbac.authorization.k8s.io/prometheus-operator serverside-applied + deployment.apps/prometheus-operator serverside-applied + serviceaccount/prometheus-operator serverside-applied + service/prometheus-operator serverside-applied + ~~~ + +1. Confirm that the `prometheus-operator` has started: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get deploy prometheus-operator + ~~~ + ~~~ shell + NAME READY UP-TO-DATE AVAILABLE AGE + prometheus-operator 1/1 1 1 27s + ~~~ + +1. Download our Prometheus manifest: + + {% include_cached copy-clipboard.html %} + ~~~ shell + curl -O https://raw.githubusercontent.com/cockroachdb/cockroach/master/cloud/kubernetes/prometheus/prometheus.yaml + ~~~ + +1. Apply the Prometheus manifest. This creates the various objects necessary to run a Prometheus instance: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl apply -f prometheus.yaml + ~~~ + ~~~ shell + serviceaccount/prometheus created + clusterrole.rbac.authorization.k8s.io/prometheus created + clusterrolebinding.rbac.authorization.k8s.io/prometheus created + servicemonitor.monitoring.coreos.com/cockroachdb created + prometheus.monitoring.coreos.com/cockroachdb created + ~~~ + +1. Access the Prometheus UI locally and verify that CockroachDB is feeding data into Prometheus: + 1. Port-forward from your local machine to the pod running Prometheus: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl port-forward prometheus-cockroachdb-0 9090 + ~~~ + + 1. Go to [http://localhost:9090](http://localhost:9090/) in your browser. + + 1. To verify that each CockroachDB node is connected to Prometheus, go to **Status > Targets**. The screen should look like this: + + Prometheus targets + + 1. To verify that data is being collected, go to **Graph**, enter the `sys_uptime` variable in the field, click **Execute**, and then click the **Graph** tab. The screen should like this: + + Prometheus graph + + {{site.data.alerts.callout_info}} + Prometheus auto-completes CockroachDB time series metrics for you, but if you want to see a full listing, with descriptions, port-forward as described in [Access the DB Console]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#step-4-access-the-db-console) and then point your browser to [http://localhost:8080/_status/vars](http://localhost:8080/_status/vars). + {{site.data.alerts.end}} + +For more details on using the Prometheus UI, see their [official documentation](https://prometheus.io/docs/introduction/getting_started/). + +## Configure Alertmanager + +Active monitoring helps you spot problems early, but it is also essential to send notifications when there are events that require investigation or intervention. This section shows you how to use [Alertmanager](https://prometheus.io/docs/alerting/alertmanager/) and CockroachDB's starter [alerting rules](https://github.com/cockroachdb/cockroach/blob/master/cloud/kubernetes/prometheus/alert-rules.yaml) to do this. + +1. Download our [alertmanager-config.yaml](https://raw.githubusercontent.com/cockroachdb/cockroach/master/cloud/kubernetes/prometheus/alertmanager-config.yaml) configuration file: + + {% include_cached copy-clipboard.html %} + ~~~ shell + curl -O https://raw.githubusercontent.com/cockroachdb/cockroach/master/cloud/kubernetes/prometheus/alertmanager-config.yaml + ~~~ + +1. Edit the `alertmanager-config.yaml` file to [specify the desired receivers for notifications](https://prometheus.io/docs/alerting/configuration/#receiver). Initially, the file contains a placeholder web hook. + +1. Add this configuration to the Kubernetes cluster as a secret, renaming it to `alertmanager.yaml` and labeling it to make it easier to find: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl create secret generic alertmanager-cockroachdb \ + --from-file=alertmanager.yaml=alertmanager-config.yaml + ~~~ + ~~~ shell + secret/alertmanager-cockroachdb created + ~~~ + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl label secret alertmanager-cockroachdb app=cockroachdb + ~~~ + ~~~ shell + secret/alertmanager-cockroachdb labeled + ~~~ + + {{site.data.alerts.callout_danger}} + The name of the secret, `alertmanager-cockroachdb`, must match the name used in the `alertmanager.yaml` file. If they differ, the Alertmanager instance will start without configuration, and nothing will happen. + {{site.data.alerts.end}} + +1. Use our [alertmanager.yaml](https://github.com/cockroachdb/cockroach/blob/master/cloud/kubernetes/prometheus/alertmanager.yaml) file to create the various objects necessary to run an Alertmanager instance, including a ClusterIP service so that Prometheus can forward alerts: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl apply \ + -f https://raw.githubusercontent.com/cockroachdb/cockroach/master/cloud/kubernetes/prometheus/alertmanager.yaml + ~~~ + ~~~ shell + alertmanager.monitoring.coreos.com/cockroachdb created + service/alertmanager-cockroachdb created + ~~~ + +1. Verify that Alertmanager is running: + 1. Port-forward from your local machine to the pod running Alertmanager: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl port-forward alertmanager-cockroachdb-0 9093 + ~~~ + + 1. Go to [http://localhost:9093](http://localhost:9093/) in your browser. The screen should look like this: + + Alertmanager + +1. Ensure that the Alertmanagers are visible to Prometheus by opening [http://localhost:9090/status](http://localhost:9090/status). The screen should look like this: + + Alertmanager + +1. Add CockroachDB's starter [alerting rules](https://github.com/cockroachdb/cockroach/blob/master/cloud/kubernetes/prometheus/alert-rules.yaml): + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl apply \ + -f https://raw.githubusercontent.com/cockroachdb/cockroach/master/cloud/kubernetes/prometheus/alert-rules.yaml + ~~~ + ~~~ shell + prometheusrule.monitoring.coreos.com/prometheus-cockroachdb-rules created + ~~~ + +1. Ensure that the rules are visible to Prometheus by opening [http://localhost:9090/rules](http://localhost:9090/rules). The screen should look like this: + + Alertmanager + +1. Verify that the `TestAlertManager` example alert is firing by opening [http://localhost:9090/alerts](http://localhost:9090/alerts). The screen should look like this: + + Alertmanager + +1. To remove the example alert: + 1. Use the `kubectl edit` command to open the rules for editing: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl edit prometheusrules prometheus-cockroachdb-rules + ~~~ + + 1. Remove the `dummy.rules` block and save the file: + + ~~~ yaml + - name: rules/dummy.rules + rules: + - alert: TestAlertManager + expr: vector(1) + ~~~ + +## Monitor the operator + +The {{ site.data.products.cockroachdb-operator }} automatically exposes [Prometheus-style metrics](https://prometheus.io/docs/concepts/metric_types/) that you can monitor to observe its operations. + +Metrics can be collected from the operator via HTTP requests (port 8080 by default) against the `/metrics` endpoint. The response will describe the current node metrics, for example: + +~~~json +... +# HELP node_decommissioning Whether a CockroachDB node is decommissioning. +# TYPE node_decommissioning gauge +node_decommissioning{node="cockroachdb-nvq2l"} 0 +node_decommissioning{node="cockroachdb-pmp45"} 0 +node_decommissioning{node="cockroachdb-q6784"} 0 +node_decommissioning{node="cockroachdb-r4wz8"} 0 +... +~~~ + +## Configure logging + +You can use the operator to configure the CockroachDB logging system. This allows you to output logs to [configurable log sinks]({% link {{ page.version.version }}/configure-logs.md %}#configure-log-sinks) such as file or network logging destinations. + +The logging configuration is defined in a [ConfigMap](https://kubernetes.io/docs/concepts/configuration/configmap/) object, using a key named `logs.yaml`. For example: + +~~~ yaml +apiVersion: v1 +data: + logs.yaml: | + sinks: + file-groups: + dev: + channels: DEV + filter: WARNING +kind: ConfigMap +metadata: + name: logconfig + namespace: cockroach-ns +~~~ + +The above configuration overrides the [default logging configuration]({% link {{ page.version.version }}/configure-logs.md %}#default-logging-configuration) and saves debug-level logs (the `DEV` [log channel]({% link {{ page.version.version }}/logging-overview.md %}#logging-channels)) to disk for troubleshooting. + +The ConfigMap `name` must match the `cockroachdb.crdbCluster.loggingConfigMapName` object in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster): + +~~~ yaml +cockroachdb: + crdbCluster: + loggingConfigMapName: logconfig +~~~ + +By default, the operator also modifies the [default logging configuration]({% link {{ page.version.version }}/configure-logs.md %}#default-logging-configuration) with the following: + +~~~ yaml +sinks: + stderr: + channels: {INFO: "HEALTH, OPS", WARNING: "STORAGE, DEV"} + redact: true +~~~ + +This outputs logging events in the [OPS]({% link {{ page.version.version }}/logging.md %}#ops) channel to a `cockroach-stderr.log` file. + +### Example: Configuring a troubleshooting log file on pods + +In this example, CockroachDB has already been deployed on a Kubernetes cluster. Override the [default logging configuration]({% link {{ page.version.version }}/configure-logs.md %}#default-logging-configuration) to output [DEV]({% link {{ page.version.version }}/logging.md %}#dev) logs to a `cockroach-dev.log` file. + +1. Create a ConfigMap named `logconfig`. Note that `namespace` is set to the `cockroach-ns` namespace: + + ~~~ yaml + apiVersion: v1 + data: + logs.yaml: | + sinks: + file-groups: + dev: + channels: DEV + filter: WARNING + kind: ConfigMap + metadata: + name: logconfig + namespace: cockroach-ns + ~~~ + + For simplicity, also name the YAML file `logconfig.yaml`. + + {{site.data.alerts.callout_info}} + The ConfigMap key is not related to the ConfigMap `name` or YAML filename, and must be named `logging.yaml`. + {{site.data.alerts.end}} + + This configuration outputs `DEV` logs that have severity [WARNING]({% link {{ page.version.version }}/logging.md %}#logging-levels-severities) to a `cockroach-dev.log` file on each pod. + +1. Apply the ConfigMap to the cluster: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl apply -f logconfig.yaml + ~~~ + ~~~ shell + configmap/logconfig created + ~~~ + +1. Add the `name` of the ConfigMap in `loggingConfigMapName` to the values file: + + ~~~ yaml + cockroachdb: + crdbCluster: + loggingConfigMapName: logconfig + ~~~ + +1. Apply the new settings to the cluster: + + {% include_cached copy-clipboard.html %} + ~~~ shell + helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ~~~ + + The changes will be rolled out to each pod. + +1. See the log files available on a pod: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl exec cockroachdb-2 -- ls cockroach-data/logs + ~~~ + ~~~ shell + cockroach-dev.cockroachdb-2.unknownuser.2022-05-02T19_03_03Z.000001.log + cockroach-dev.log + cockroach-health.cockroachdb-2.unknownuser.2022-05-02T18_53_01Z.000001.log + cockroach-health.log + cockroach-pebble.cockroachdb-2.unknownuser.2022-05-02T18_52_48Z.000001.log + cockroach-pebble.log + cockroach-stderr.cockroachdb-2.unknownuser.2022-05-02T18_52_48Z.000001.log + cockroach-stderr.cockroachdb-2.unknownuser.2022-05-02T19_03_03Z.000001.log + cockroach-stderr.cockroachdb-2.unknownuser.2022-05-02T20_04_03Z.000001.log + cockroach-stderr.log + cockroach.cockroachdb-2.unknownuser.2022-05-02T18_52_48Z.000001.log + cockroach.log + ... + ~~~ + +1. View a specific log file: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl exec cockroachdb-2 -- cat cockroach-data/logs/cockroach-dev.log + ~~~ diff --git a/src/current/v25.3/orchestrate-a-local-cluster-with-kubernetes.md b/src/current/v25.3/orchestrate-a-local-cluster-with-kubernetes.md index b8746775bdf..852d8cbe6f4 100644 --- a/src/current/v25.3/orchestrate-a-local-cluster-with-kubernetes.md +++ b/src/current/v25.3/orchestrate-a-local-cluster-with-kubernetes.md @@ -13,7 +13,7 @@ On top of CockroachDB's built-in automation, you can use a third-party [orchestr This page demonstrates a basic integration with the open-source [Kubernetes](http://kubernetes.io/) orchestration system. Using either the CockroachDB [Helm](https://helm.sh/) chart or a few configuration files, you'll quickly create a 3-node local cluster. You'll run some SQL commands against the cluster and then simulate node failure, watching how Kubernetes auto-restarts without the need for any manual intervention. You'll then scale the cluster with a single command before shutting the cluster down, again with a single command. {{site.data.alerts.callout_info}} -To orchestrate a physically distributed cluster in production, see [Orchestrated Deployments]({% link {{ page.version.version }}/kubernetes-overview.md %}). To deploy a 30-day free CockroachDB {{ site.data.products.dedicated }} cluster instead of running CockroachDB yourself, see the [Quickstart]({% link cockroachcloud/quickstart.md %}). +To orchestrate a physically distributed cluster in production, see [Orchestrated Deployments]({% link {{ page.version.version }}/cockroachdb-operator-overview.md %}). To deploy a 30-day free CockroachDB {{ site.data.products.dedicated }} cluster instead of running CockroachDB yourself, see the [Quickstart]({% link cockroachcloud/quickstart.md %}). {{site.data.alerts.end}} @@ -27,14 +27,14 @@ To orchestrate a physically distributed cluster in production, see [Orchestrated Choose a way to deploy and maintain the CockroachDB cluster: -- [CockroachDB Kubernetes Operator](https://github.com/cockroachdb/cockroach-operator) (recommended) +- [{{ site.data.products.public-operator }}](https://github.com/cockroachdb/cockroach-operator) - [Helm](https://helm.sh/) package manager - Manually apply our StatefulSet configuration and related files
- + - +
diff --git a/src/current/v25.3/orchestrate-cockroachdb-with-kubernetes-multi-cluster.md b/src/current/v25.3/orchestrate-cockroachdb-with-kubernetes-multi-cluster.md index b47593bda96..55f144f4d74 100644 --- a/src/current/v25.3/orchestrate-cockroachdb-with-kubernetes-multi-cluster.md +++ b/src/current/v25.3/orchestrate-cockroachdb-with-kubernetes-multi-cluster.md @@ -15,9 +15,9 @@ docs_area: deploy This page shows you how to orchestrate a secure CockroachDB deployment across three [Kubernetes](http://kubernetes.io/) clusters, each in a different geographic region, using [StatefulSets](http://kubernetes.io/docs/concepts/abstractions/controllers/statefulsets/) to manage the containers within each cluster and linking them together via DNS. This will result in a single, multi-region CockroachDB cluster running on Kubernetes. -{{site.data.alerts.callout_success}} +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} + To deploy CockroachDB in a single Kubernetes cluster instead, see [Kubernetes Single-Cluster Deployment]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}). Also, for details about potential performance bottlenecks to be aware of when running CockroachDB in Kubernetes and guidance on how to optimize your deployment for better performance, see [CockroachDB Performance on Kubernetes]({% link {{ page.version.version }}/kubernetes-performance.md %}). -{{site.data.alerts.end}} ## Before you begin diff --git a/src/current/v25.3/override-templates-cockroachdb-operator.md b/src/current/v25.3/override-templates-cockroachdb-operator.md new file mode 100644 index 00000000000..9b27b3debbb --- /dev/null +++ b/src/current/v25.3/override-templates-cockroachdb-operator.md @@ -0,0 +1,90 @@ +--- +title: Override Deployment Templates with the CockroachDB Operator +summary: Use advanced configuration operations to manually override pod templates and cockroach start flags with the CockcroachDB operator. +toc: true +docs_area: deploy +--- + +The {{ site.data.products.cockroachdb-operator }} provides abstractions that simplify cluster deployment and node initialization: + +- A default pod specification is used for the CockroachDB Kubernetes pod. +- The `values.yaml` configuration maps to a subset of `cockroach start` flags when CockroachDB is initialized. + +This page describes configuration options that allow advanced users to manually override the pod template and `cockroach start` flags as needed for deployment. + +{{site.data.alerts.callout_info}} +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +{{site.data.alerts.end}} + +{{site.data.alerts.callout_danger}} +This page describes advanced configurations that override the supported default templates used by the {{ site.data.products.cockroachdb-operator }}. Cockroach Labs strongly recommends testing these configurations in a non-production environment first. +{{site.data.alerts.end}} + +## Override the default pod + +The `cockroachdb.crdbCluster.podTemplate` field allows you to override the default pod metadata and specification configured by the {{ site.data.products.cockroachdb-operator }}. The values in this field are merged with the default pod specification, where settings in `podTemplate` override any values in the default. + +~~~ yaml +cockroachdb: + crdbCluster: + podTemplate: + # metadata captures the pod metadata for CockroachDB pods. + metadata: {} + # spec captures the pod specification for CockroachDB pods. + spec: + # initContainers captures the list of init containers for CockroachDB pods. + initContainers: + - name : cockroachdb-init + image: us-docker.pkg.dev/cockroach-cloud-images/data-plane/init-container@sha256:c3e4ba851802a429c7f76c639a64b9152d206cebb31162c1760f05e98f7c4254 + # containers captures the list of containers for CockroachDB pods. + containers: + - name: cockroachdb + image: cockroachdb/cockroach:v25.3.2 + # imagePullSecrets captures the secrets for fetching images from private registries. + imagePullSecrets: [] +~~~ + +At least one value for `containers` must be specified if any part of `podTemplate` is being modified. For example, the following `podTemplate` configuration overrides pod anti-affinity behavior and specifies a default `cockroachdb/cockroach:v25.3.2` container image: + +~~~ yaml +cockroachdb: + crdbCluster: + podTemplate: + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/component + operator: In + values: + - cockroachdb + topologyKey: kubernetes.io/hostname + containers: + - name: cockroachdb + image: cockroachdb/cockroach:v25.3.2 +~~~ + +## Override the default `cockroach start` flags + +The `cockroachdb.crdbCluster.startFlags` field allows you to customize the [`cockroach start` flags]({% link {{ page.version.version }}/cockroach-start.md %}#flags) used when initializing the CockroachDB cluster. + +Within this field, you can specify flags to upsert and flags to omit: + +- Upserted flags are added to the `cockroach start` command, their values overriding any matching flags in the command. +- Omitted flags are removed from the `cockroach start` command if they were present. + +~~~ yaml +cockroachdb: + crdbCluster: + startFlags: + # upsert captures a set of flags that are given higher precedence in the start command. + upsert: + - "--cache=30%" + # omit defines a set of flags which will be omitted from the start command. + omit: + - "--max-sql-memory" +~~~ diff --git a/src/current/v25.3/scale-cockroachdb-kubernetes.md b/src/current/v25.3/scale-cockroachdb-kubernetes.md index 7e62cf5cdf7..4c03460161e 100644 --- a/src/current/v25.3/scale-cockroachdb-kubernetes.md +++ b/src/current/v25.3/scale-cockroachdb-kubernetes.md @@ -13,8 +13,12 @@ This article assumes you have already [deployed CockroachDB on a single Kubernet This page explains how to add and remove CockroachDB nodes on Kubernetes. +This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Cluster Scaling with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/scale-cockroachdb-operator.md %}). + +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} +
- +
@@ -51,7 +55,7 @@ If your cluster has 3 CockroachDB nodes distributed across 3 availability zones 1. If you are adding nodes after previously [scaling down](#remove-nodes), and have not enabled [automatic PVC pruning](#automatic-pvc-pruning), you must first manually delete any persistent volumes that were orphaned by node removal. {{site.data.alerts.callout_info}} - Due to a [known issue](https://github.com/cockroachdb/cockroach-operator/issues/542), automatic pruning of PVCs is currently disabled by default. This means that after decommissioning and removing a node, the Operator will not remove the persistent volume that was mounted to its pod. + Due to a [known issue](https://github.com/cockroachdb/cockroach-operator/issues/542), automatic pruning of PVCs is currently disabled by default. This means that after decommissioning and removing a node, the {{ site.data.products.public-operator }} will not remove the persistent volume that was mounted to its pod. {{site.data.alerts.end}} View the PVCs on the cluster: @@ -103,7 +107,7 @@ If your cluster has 3 CockroachDB nodes distributed across 3 availability zones persistentvolumeclaim "datadir-cockroachdb-5" deleted ~~~ -1. Update `nodes` in the Operator's custom resource, which you downloaded when [deploying the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster), with the target size of the CockroachDB cluster. This value refers to the number of CockroachDB nodes, each running in one pod: +1. Update `nodes` in the {{ site.data.products.public-operator }}'s custom resource, which you downloaded when [deploying the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster), with the target size of the CockroachDB cluster. This value refers to the number of CockroachDB nodes, each running in one pod: ~~~ nodes: 6 @@ -157,13 +161,13 @@ Do **not** scale down to fewer than 3 nodes. This is considered an anti-pattern
{{site.data.alerts.callout_danger}} -Due to a [known issue](https://github.com/cockroachdb/cockroach-operator/issues/542), automatic pruning of PVCs is currently disabled by default. This means that after decommissioning and removing a node, the Operator will not remove the persistent volume that was mounted to its pod. +Due to a [known issue](https://github.com/cockroachdb/cockroach-operator/issues/542), automatic pruning of PVCs is currently disabled by default. This means that after decommissioning and removing a node, the {{ site.data.products.public-operator }} will not remove the persistent volume that was mounted to its pod. If you plan to eventually [scale up](#add-nodes) the cluster after scaling down, you will need to manually delete any PVCs that were orphaned by node removal before scaling up. For more information, see [Add nodes](#add-nodes). {{site.data.alerts.end}} {{site.data.alerts.callout_info}} -If you want to enable the Operator to automatically prune PVCs when scaling down, see [Automatic PVC pruning](#automatic-pvc-pruning). However, note that this workflow is currently unsupported. +If you want to enable the {{ site.data.products.public-operator }} to automatically prune PVCs when scaling down, see [Automatic PVC pruning](#automatic-pvc-pruning). However, note that this workflow is currently unsupported. {{site.data.alerts.end}} Before scaling down CockroachDB, note the following [topology recommendation]({% link {{ page.version.version }}/recommended-production-settings.md %}#topology): @@ -179,7 +183,7 @@ If your nodes are distributed across 3 availability zones (as in our [deployment ~~~ {{site.data.alerts.callout_info}} - Before removing a node, the Operator first decommissions the node. This lets a node finish in-flight requests, rejects any new requests, and transfers all range replicas and range leases off the node. + Before removing a node, the {{ site.data.products.public-operator }} first decommissions the node. This lets a node finish in-flight requests, rejects any new requests, and transfers all range replicas and range leases off the node. {{site.data.alerts.end}} 1. Apply the new settings to the cluster: @@ -189,7 +193,7 @@ If your nodes are distributed across 3 availability zones (as in our [deployment $ kubectl apply -f example.yaml ~~~ - The Operator will remove nodes from the cluster one at a time, starting from the pod with the highest number in its address. + The {{ site.data.products.public-operator }} will remove nodes from the cluster one at a time, starting from the pod with the highest number in its address. 1. Verify that the pods were successfully removed: @@ -208,7 +212,7 @@ If your nodes are distributed across 3 availability zones (as in our [deployment ### Automatic PVC pruning -To enable the Operator to automatically remove persistent volumes when [scaling down](#remove-nodes) a cluster, turn on automatic PVC pruning through a feature gate. +To enable the {{ site.data.products.public-operator }} to automatically remove persistent volumes when [scaling down](#remove-nodes) a cluster, turn on automatic PVC pruning through a feature gate. {{site.data.alerts.callout_danger}} This workflow is unsupported and should be enabled at your own risk. @@ -216,28 +220,28 @@ This workflow is unsupported and should be enabled at your own risk. {% capture latest_operator_version %}{% include_cached latest_operator_version.md %}{% endcapture %} -1. Download the Operator manifest: +1. Download the {{ site.data.products.public-operator }} manifest: {% include_cached copy-clipboard.html %} ~~~ shell $ curl -0 https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/install/operator.yaml ~~~ -1. Uncomment the following lines in the Operator manifest: +1. Uncomment the following lines in the {{ site.data.products.public-operator }} manifest: ~~~ yaml - feature-gates - AutoPrunePVC=true ~~~ -1. Reapply the Operator manifest: +1. Reapply the {{ site.data.products.public-operator }} manifest: {% include_cached copy-clipboard.html %} ~~~ shell $ kubectl apply -f operator.yaml ~~~ -1. Validate that the Operator is running: +1. Validate that the {{ site.data.products.public-operator }} is running: {% include_cached copy-clipboard.html %} ~~~ shell diff --git a/src/current/v25.3/scale-cockroachdb-operator.md b/src/current/v25.3/scale-cockroachdb-operator.md new file mode 100644 index 00000000000..76de7364979 --- /dev/null +++ b/src/current/v25.3/scale-cockroachdb-operator.md @@ -0,0 +1,106 @@ +--- +title: Cluster Scaling with the CockroachDB Operator +summary: How to scale a secure CockroachDB cluster deployed with the CockroachDB operator. +toc: true +toc_not_nested: true +secure: true +docs_area: deploy +--- + +This page explains how to add and remove CockroachDB nodes on Kubernetes. + +{{site.data.alerts.callout_info}} +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +{{site.data.alerts.end}} + +## Add nodes + +Before scaling up CockroachDB, note the following [topology recommendations]({% link {{ page.version.version }}/recommended-production-settings.md %}#topology): + +- Each CockroachDB node (running in its own pod) should run on a separate Kubernetes worker node. +- Each availability zone should have the same number of CockroachDB nodes. + +If your cluster has 3 CockroachDB nodes distributed across 3 availability zones (as in our [deployment example]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster)), Cockroach Labs recommends scaling up by a multiple of 3 to retain an even distribution of nodes. You should therefore scale up to a minimum of 6 CockroachDB nodes, with 2 nodes in each zone. + +1. Run `kubectl get nodes` to list the worker nodes in your Kubernetes cluster. There should be at least as many worker nodes as pods you plan to add. This ensures that no more than one pod will be placed on each worker node. + +1. If you need to add worker nodes, resize your cluster by specifying the desired number of worker nodes in each zone. Using Google Kubernetes Engine as an example: + + {% include_cached copy-clipboard.html %} + ~~~ shell + gcloud container clusters resize {cluster-name} --region {region-name} --num-nodes 2 + ~~~ + + This example distributes 2 worker nodes across the default 3 zones, raising the total to 6 worker nodes. + +1. Update `cockroachdb.crdbCluster.regions.code.nodes` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster), with the target size of the CockroachDB cluster in the specified region. This value refers to the number of CockroachDB nodes, each running in one pod: + + ~~~ yaml + cockroachdb: + crdbCluster: + regions: + - code: us-central1 + cloudProvider: gcp + domain: cluster.domain.us-central + nodes: 6 + ~~~ + +1. Apply the new settings to the cluster: + + {% include_cached copy-clipboard.html %} + ~~~ shell + helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ~~~ + +1. Verify that the new pods were successfully started: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods + ~~~ + ~~~ shell + NAME READY STATUS RESTARTS AGE + cockroach-operator-655fbf7847-zn9v8 1/1 Running 0 30m + cockroachdb-0 1/1 Running 0 24m + cockroachdb-1 1/1 Running 0 24m + cockroachdb-2 1/1 Running 0 24m + cockroachdb-3 1/1 Running 0 30s + cockroachdb-4 1/1 Running 0 30s + cockroachdb-5 1/1 Running 0 30s + ~~~ + + Each pod should be running in one of the 6 worker nodes. + +## Remove nodes + +If your nodes are distributed across 3 availability zones (as in our [deployment example]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster)), Cockroach Labs recommends scaling down by a multiple of 3 to retain an even distribution. If your cluster has 6 CockroachDB nodes, you should therefore scale down to 3, with 1 node in each zone. + +{{site.data.alerts.callout_danger}} +Do not scale down to fewer than 3 nodes. This is considered an anti-pattern on CockroachDB and will cause errors. Before scaling down CockroachDB, note that each availability zone should have the same number of CockroachDB nodes. +{{site.data.alerts.end}} + +1. Update `cockroachdb.crdbCluster.regions.code.nodes` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster), with the target size of the CockroachDB cluster. For instance, to scale a cluster in Google Cloud down to 3 nodes: + + ~~~ yaml + cockroachdb: + crdbCluster: + regions: + - code: us-central1 + cloudProvider: gcp + domain: cluster.domain.us-central + nodes: 3 + ~~~ + +1. Apply the new settings to the cluster: + + {% include_cached copy-clipboard.html %} + ~~~ shell + helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ~~~ + +1. Verify that the pods were successfully removed: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods + ~~~ diff --git a/src/current/v25.3/schedule-cockroachdb-kubernetes.md b/src/current/v25.3/schedule-cockroachdb-kubernetes.md index 973d234ffa9..dd67d369c5e 100644 --- a/src/current/v25.3/schedule-cockroachdb-kubernetes.md +++ b/src/current/v25.3/schedule-cockroachdb-kubernetes.md @@ -7,15 +7,25 @@ secure: true docs_area: deploy --- -This page describes how to configure the following, using the [Operator](https://github.com/cockroachdb/cockroach-operator): +This page describes how to configure the following, using the [{{ site.data.products.public-operator }}](https://github.com/cockroachdb/cockroach-operator): +- [Enable feature gates](#enable-feature-gates) - [Node selectors](#node-selectors) -- [Node affinities](#add-a-node-affinity) -- [Pod affinities and anti-affinities](#add-a-pod-affinity-or-anti-affinity) +- [Affinities and anti-affinities](#affinities-and-anti-affinities) + - [Add a node affinity](#add-a-node-affinity) + - [Add a pod affinity or anti-affinity](#add-a-pod-affinity-or-anti-affinity) + - [Example: Scheduling CockroachDB onto labeled nodes](#example-scheduling-cockroachdb-onto-labeled-nodes) - [Taints and tolerations](#taints-and-tolerations) + - [Add a toleration](#add-a-toleration) + - [Example: Evicting CockroachDB from a running worker node](#example-evicting-cockroachdb-from-a-running-worker-node) - [Topology spread constraints](#topology-spread-constraints) + - [Add a topology spread constraint](#add-a-topology-spread-constraint) - [Resource labels and annotations](#resource-labels-and-annotations) +This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Pod Scheduling with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/schedule-cockroachdb-operator.md %}). + +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} + These settings control how CockroachDB pods can be identified or scheduled onto worker nodes. {% include {{ page.version.version }}/orchestration/operator-check-namespace.md %} @@ -24,7 +34,7 @@ These settings control how CockroachDB pods can be identified or scheduled onto {% capture latest_operator_version %}{% include_cached latest_operator_version.md %}{% endcapture %} -To enable the [affinity](#affinities-and-anti-affinities), [toleration](#taints-and-tolerations), and [topology spread constraint](#topology-spread-constraints) rules, [download the Operator manifest](https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/install/operator.yaml) and add the following line to the `spec.containers.args` field: +To enable the [affinity](#affinities-and-anti-affinities), [toleration](#taints-and-tolerations), and [topology spread constraint](#topology-spread-constraints) rules, [download the {{ site.data.products.public-operator }} manifest](https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/install/operator.yaml) and add the following line to the `spec.containers.args` field: {% include_cached copy-clipboard.html %} ~~~ yaml @@ -38,7 +48,7 @@ spec: A pod with a *node selector* will be scheduled onto a worker node that has matching [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/), or key-value pairs. -Specify the labels in `nodeSelector` in the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). If you specify multiple `nodeSelector` labels, the node must match all of them. +Specify the labels in `nodeSelector` in the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). If you specify multiple `nodeSelector` labels, the node must match all of them. The following configuration causes CockroachDB pods to be scheduled onto worker nodes that have *both* the labels `worker-pool-name=crdb-workers` and `kubernetes.io/arch=amd64`: @@ -71,7 +81,7 @@ For an example, see [Scheduling CockroachDB onto labeled nodes](#example-schedul ### Add a node affinity -Specify node affinities in `affinity.nodeAffinity` in the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. +Specify node affinities in `affinity.nodeAffinity` in the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. The following configuration requires that CockroachDB pods are scheduled onto worker nodes running either an `intel` or `amd64` CPU, with a preference against worker nodes in the `us-east4-b` availability zone. @@ -102,11 +112,11 @@ The `requiredDuringSchedulingIgnoredDuringExecution` node affinity rule, using t The `preferredDuringSchedulingIgnoredDuringExecution` node affinity rule, using the `NotIn` operator and specified `weight`, discourages (but does not disallow) CockroachDB pods from being scheduled onto nodes with the label `topology.kubernetes.io/zone=us-east4-b`. This achieves a similar effect as a `PreferNoSchedule` [taint](#taints-and-tolerations). -For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). The [custom resource definition](https://github.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/config/crd/bases/crdb.cockroachlabs.com_crdbclusters.yaml) details the fields supported by the Operator. +For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). The [custom resource definition](https://github.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/config/crd/bases/crdb.cockroachlabs.com_crdbclusters.yaml) details the fields supported by the {{ site.data.products.public-operator }}. ### Add a pod affinity or anti-affinity -Specify pod affinities and anti-affinities in `affinity.podAffinity` and `affinity.podAntiAffinity` in the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. +Specify pod affinities and anti-affinities in `affinity.podAffinity` and `affinity.podAntiAffinity` in the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. The following configuration attempts to schedule CockroachDB pods in the same zones as the pods that run our example [load generator](https://github.com/cockroachdb/cockroach/blob/master/cloud/kubernetes/example-app.yaml) app. It disallows CockroachDB pods from being co-located on the same worker node. @@ -140,7 +150,7 @@ The `preferredDuringSchedulingIgnoredDuringExecution` pod affinity rule, using t The `requiredDuringSchedulingIgnoredDuringExecution` pod anti-affinity rule, using the `In` operator, requires CockroachDB pods not to be co-located on a worker node, as specified with `topologyKey`. -For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). The [custom resource definition](https://raw.github.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/config/crd/bases/crdb.cockroachlabs.com_crdbclusters.yaml) details the fields supported by the Operator. +For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). The [custom resource definition](https://raw.github.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/config/crd/bases/crdb.cockroachlabs.com_crdbclusters.yaml) details the fields supported by the {{ site.data.products.public-operator }}. ### Example: Scheduling CockroachDB onto labeled nodes @@ -182,7 +192,7 @@ In this example, CockroachDB has not yet been deployed to a running Kubernetes c This also ensures that the CockroachDB pods, which will be bound to persistent volumes in the same 3 availability zones, can be scheduled onto worker nodes in their respective zones. {{site.data.alerts.end}} -1. Add the following rules to the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +1. Add the following rules to the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): {% include_cached copy-clipboard.html %} ~~~ yaml @@ -248,7 +258,7 @@ For an example, see [Evicting CockroachDB from a running worker node](#example-e ### Add a toleration -Specify pod tolerations in the `tolerations` object of the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). +Specify pod tolerations in the `tolerations` object of the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). The following toleration matches a taint with the specified key, value, and `NoSchedule` effect, using the `Equal` operator. A toleration that uses the `Equal` operator must include a `value` field: @@ -282,7 +292,7 @@ spec: A `NoExecute` taint on a node prevents pods from being scheduled onto the node, and evicts pods from the node if they are already running on the node. The matching toleration allows a pod to be scheduled onto the node, and to continue running on the node if `tolerationSeconds` is not specified. If `tolerationSeconds` is specified, the pod is evicted after this number of seconds. -For more information on using taints and tolerations, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/). The [custom resource definition](https://raw.github.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/config/crd/bases/crdb.cockroachlabs.com_crdbclusters.yaml) details the fields supported by the Operator. +For more information on using taints and tolerations, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/). The [custom resource definition](https://raw.github.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/config/crd/bases/crdb.cockroachlabs.com_crdbclusters.yaml) details the fields supported by the {{ site.data.products.public-operator }}. ### Example: Evicting CockroachDB from a running worker node @@ -316,7 +326,7 @@ In this example, CockroachDB has already been deployed on a Kubernetes cluster. node/gke-cockroachdb-default-pool-4e5ce539-j1h1 tainted ~~~ -1. Add a matching `tolerations` object to the Operator's custom resource, which was used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +1. Add a matching `tolerations` object to the {{ site.data.products.public-operator }}'s custom resource, which was used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): ~~~ yaml spec: @@ -362,7 +372,7 @@ A pod with a *topology spread constraint* must satisfy its conditions when being ### Add a topology spread constraint -Specify pod topology spread constraints in the `topologySpreadConstraints` object of the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). If you specify multiple `topologySpreadConstraints` objects, the matching pods must satisfy all of the constraints. +Specify pod topology spread constraints in the `topologySpreadConstraints` object of the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). If you specify multiple `topologySpreadConstraints` objects, the matching pods must satisfy all of the constraints. The following topology spread constraint ensures that CockroachDB pods deployed with the label `environment=production` will not be unevenly distributed across zones by more than `1` pod: @@ -380,13 +390,13 @@ spec: The `DoNotSchedule` condition prevents labeled pods from being scheduled onto Kubernetes worker nodes when doing so would fail to meet the spread and topology constraints specified with `maxSkew` and `topologyKey`, respectively. -For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/). The [custom resource definition](https://raw.github.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/config/crd/bases/crdb.cockroachlabs.com_crdbclusters.yaml) details the fields supported by the Operator. +For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/). The [custom resource definition](https://raw.github.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/config/crd/bases/crdb.cockroachlabs.com_crdbclusters.yaml) details the fields supported by the {{ site.data.products.public-operator }}. ## Resource labels and annotations To assist in working with your cluster, you can add labels and annotations to your resources. -Specify labels in `additionalLabels` and annotations in `additionalAnnotations` in the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +Specify labels in `additionalLabels` and annotations in `additionalAnnotations` in the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): {% include_cached copy-clipboard.html %} ~~~ yaml diff --git a/src/current/v25.3/schedule-cockroachdb-operator.md b/src/current/v25.3/schedule-cockroachdb-operator.md new file mode 100644 index 00000000000..6b3b5563ec6 --- /dev/null +++ b/src/current/v25.3/schedule-cockroachdb-operator.md @@ -0,0 +1,365 @@ +--- +title: Pod Scheduling with the CockroachDB Operator +summary: Schedule CockroachDB pods on Kubernetes using the CockroachDB operator. +toc: true +toc_not_nested: true +secure: true +docs_area: deploy +--- + +This page describes how to configure pod scheduling settings. These settings control how CockroachDB pods should be identified or scheduled onto worker nodes, which are then proxied to the Kubernetes scheduler. + +{{site.data.alerts.callout_info}} +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +{{site.data.alerts.end}} + +## Node selectors + +A pod with a *node selector* will be scheduled onto a worker node that has matching [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/), or key-value pairs. + +Specify the labels in `cockroachdb.crdbCluster.nodeSelector` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster). If you specify multiple `nodeSelector` labels, the node must match all of them. + +The following configuration causes CockroachDB pods to be scheduled onto worker nodes that have *both* the labels `worker-pool-name=crdb-workers` and `kubernetes.io/arch=amd64`: + +~~~ yaml +cockroachdb: + crdbCluster: + nodeSelector: + worker-pool-name: crdb-workers + kubernetes.io/arch: amd64 +~~~ + +For an example of labeling nodes, see [Scheduling CockroachDB onto labeled nodes](#example-scheduling-cockroachdb-onto-labeled-nodes). + +## Affinities and anti-affinities + +A pod with a *node affinity* seeks out worker nodes that have matching [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/). A pod with a *pod affinity* seeks out pods that have matching labels. A pod with a *pod anti-affinity* avoids pods that have matching labels. + +Affinities and anti-affinities can be used together with `operator` fields to: + +- Require CockroachDB pods to be scheduled onto a labeled worker node. +- Require CockroachDB pods to be co-located with labeled pods (e.g., on a node or region). +- Prevent CockroachDB pods from being scheduled onto a labeled worker node. +- Prevent CockroachDB pods from being co-located with labeled pods (e.g., on a node or region). + +For an example, see [Scheduling CockroachDB onto labeled nodes](#example-scheduling-cockroachdb-onto-labeled-nodes). + +### Add a node affinity + +Specify node affinities in `cockroachdb.crdbCluster.affinity.nodeAffinity` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. + +The following configuration requires that CockroachDB pods are scheduled onto worker nodes running a Linux operating system, with a preference against worker nodes in the `us-east4-b` availability zone. + +~~~ yaml +cockroachdb: + crdbCluster: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/os + operator: In + values: + - linux + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: topology.kubernetes.io/zone + operator: NotIn + values: + - us-east4-b +~~~ + +The `requiredDuringSchedulingIgnoredDuringExecution` node affinity rule, using the `In` operator, requires CockroachDB pods to be scheduled onto nodes with the matching label `kubernetes.io/os=linux`. It will not evict pods that are already running on nodes that do not match the affinity requirements. + +The `preferredDuringSchedulingIgnoredDuringExecution` node affinity rule, using the `NotIn` operator and specified `weight`, discourages (but does not disallow) CockroachDB pods from being scheduled onto nodes with the label `topology.kubernetes.io/zone=us-east4-b`. This achieves a similar effect as a `PreferNoSchedule` [taint](#taints-and-tolerations). + +For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). The [custom resource definition](https://github.com/cockroachdb/helm-charts/blob/master/cockroachdb-parent/charts/cockroachdb/values.yaml) details the fields supported by the operator. + +### Add a pod affinity or anti-affinity + +Specify pod affinities and node anti-affinities in `cockroachdb.crdbCluster.affinity.podAffinity` and `cockroachdb.crdbCluster.affinity.podAntiAffinity` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. + +The {{ site.data.products.cockroachdb-operator }} hard-codes the pod template to only allow one pod per Kubernetes node. If you need to override this value, you can [override the pod template]({% link {{ page.version.version }}/override-templates-cockroachdb-operator.md %}#override-the-default-pod). + +The following configuration attempts to schedule CockroachDB pods in the same zones as the pods that run our example [load generator](https://github.com/cockroachdb/cockroach/blob/master/cloud/kubernetes/example-app.yaml) app. It disallows CockroachDB pods from being co-located on the same worker node. + +~~~ yaml +cockroachdb: + crdbCluster: + affinity: + podAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - loadgen + topologyKey: topology.kubernetes.io/zone + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - cockroachdb + topologyKey: kubernetes.io/hostname +~~~ + +The `preferredDuringSchedulingIgnoredDuringExecution` pod affinity rule, using the `In` operator and specified `weight`, encourages (but does not require) CockroachDB pods to be co-located with pods labeled `app=loadgen` already running in the same zone, as specified with `topologyKey`. + +The `requiredDuringSchedulingIgnoredDuringExecution` pod anti-affinity rule, using the `In` operator, requires CockroachDB pods not to be co-located on a worker node, as specified with `topologyKey`. + +For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). The [custom resource definition](https://github.com/cockroachdb/helm-charts/blob/master/cockroachdb-parent/charts/cockroachdb/values.yaml) details the fields supported by the operator. + +### Example: Scheduling CockroachDB onto labeled nodes + +In this example, CockroachDB has not yet been deployed to a running Kubernetes cluster. Use a combination of node affinity and pod anti-affinity rules to schedule 3 CockroachDB pods onto three labeled worker nodes. + +1. List the worker nodes on the running Kubernetes cluster: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get nodes + ~~~ + ~~~ shell + NAME STATUS ROLES AGE VERSION + gke-cockroachdb-default-pool-263138a5-kp3v Ready 3m56s v1.20.10-gke.301 + gke-cockroachdb-default-pool-263138a5-nn62 Ready 3m56s v1.20.10-gke.301 + gke-cockroachdb-default-pool-41796213-75c9 Ready 3m56s v1.20.10-gke.301 + gke-cockroachdb-default-pool-41796213-bw3z Ready 3m54s v1.20.10-gke.301 + gke-cockroachdb-default-pool-ccd74623-dghs Ready 3m54s v1.20.10-gke.301 + gke-cockroachdb-default-pool-ccd74623-p5mf Ready 3m55s v1.20.10-gke.301 + ~~~ + +1. Add a `node=crdb` label to three of the running worker nodes. + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl label nodes gke-cockroachdb-default-pool-263138a5-kp3v gke-cockroachdb-default-pool-41796213-75c9 gke-cockroachdb-default-pool-ccd74623-dghs node=crdb + ~~~ + ~~~ shell + node/gke-cockroachdb-default-pool-5726e554-77r7 labeled + node/gke-cockroachdb-default-pool-ee4d4d67-0922 labeled + node/gke-cockroachdb-default-pool-ee4d4d67-w18b labeled + ~~~ + + In this example, 6 GKE nodes are deployed in 3 [node pools](https://cloud.google.com/kubernetes-engine/docs/concepts/node-pools), and each node pool resides in a separate availability zone. To maintain an even distribution of CockroachDB pods as specified in our [topology recommendations]({% link {{ page.version.version }}/recommended-production-settings.md %}#topology), each of the 3 labeled worker nodes must belong to a different node pool. + + {{site.data.alerts.callout_info}} + This also ensures that the CockroachDB pods, which will be bound to persistent volumes in the same three availability zones, can be scheduled onto worker nodes in their respective zones. + {{site.data.alerts.end}} + +1. Add the following rules to the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster): + + ~~~ yaml + cockroachdb: + crdbCluster: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node + operator: In + values: + - crdb + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - cockroachdb + topologyKey: kubernetes.io/hostname + ~~~ + + The `nodeAffinity` rule requires CockroachDB pods to be scheduled onto worker nodes with the label `node=crdb`. The `podAntiAffinity` rule requires CockroachDB pods not to be co-located on a worker node, as specified with `topologyKey`. + +1. Apply the settings to the cluster: + + {% include_cached copy-clipboard.html %} + ~~~ shell + helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ~~~ + +1. The CockroachDB pods will be deployed to the 3 labeled nodes. To observe this, run: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods -o wide + ~~~ + ~~~ shell + NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES + cockroach-operator-bfdbfc9c7-tbpsw 1/1 Running 0 171m 10.32.2.4 gke-cockroachdb-default-pool-263138a5-kp3v + cockroachdb-0 1/1 Running 0 100s 10.32.4.10 gke-cockroachdb-default-pool-ccd74623-dghs + cockroachdb-1 1/1 Running 0 100s 10.32.2.6 gke-cockroachdb-default-pool-263138a5-kp3v + cockroachdb-2 1/1 Running 0 100s 10.32.0.5 gke-cockroachdb-default-pool-41796213-75c9 + ~~~ + +## Taints and tolerations + +When a *taint* is added to a Kubernetes worker node, pods are prevented from being scheduled onto that node. This effect is ignored by adding a *toleration* to a pod that specifies a matching taint. + +Taints and tolerations are useful if you want to: + +- Prevent CockroachDB pods from being scheduled onto a labeled worker node. +- Evict CockroachDB pods from a labeled worker node on which they are currently running. + +For an example, see [Evicting CockroachDB from a running worker node](#example-evicting-cockroachdb-from-a-running-worker-node). + +### Add a toleration + +Specify pod tolerations in the `cockroachdb.crdbCluster.tolerations` object in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster). + +The following toleration matches a taint with the specified key, value, and `NoSchedule` effect, using the `Equal` operator. A toleration that uses the `Equal` operator must include a `value` field: + +~~~ yaml +cockroachdb: + crdbCluster: + tolerations: + - key: "test" + operator: "Equal" + value: "example" + effect: "NoSchedule" +~~~ + +A `NoSchedule` taint on a node prevents pods from being scheduled onto the node. The matching toleration allows a pod to be scheduled onto the node. A `NoSchedule` toleration is therefore best included before [deploying the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster). + +{{site.data.alerts.callout_info}} +A `PreferNoSchedule` taint discourages, but does not disallow, pods from being scheduled onto the node. +{{site.data.alerts.end}} + +The following toleration matches every taint with the specified key and `NoExecute` effect, using the `Exists` operator. A toleration that uses the `Exists` operator must exclude a `value` field: + +~~~ yaml +cockroachdb: + crdbCluster: + tolerations: + - key: "test" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 3600 +~~~ + +A `NoExecute` taint on a node prevents pods from being scheduled onto the node, and evicts pods from the node if they are already running on the node. The matching toleration allows a pod to be scheduled onto the node, and to continue running on the node if `tolerationSeconds` is not specified. If `tolerationSeconds` is specified, the pod is evicted after this number of seconds. + +For more information on using taints and tolerations, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/). The [custom resource definition](https://github.com/cockroachdb/helm-charts/blob/master/cockroachdb-parent/charts/cockroachdb/values.yaml) details the fields supported by the operator. + +### Example: Evicting CockroachDB from a running worker node + +In this example, CockroachDB has already been deployed on a Kubernetes cluster. Use the `NoExecute` effect to evict one of the CockroachDB pods from its worker node. + +1. List the worker nodes on the running Kubernetes cluster: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get nodes + ~~~ + ~~~ shell + NAME STATUS ROLES AGE VERSION + gke-cockroachdb-default-pool-4e5ce539-68p5 Ready 56m v1.20.9-gke.1001 + gke-cockroachdb-default-pool-4e5ce539-j1h1 Ready 56m v1.20.9-gke.1001 + gke-cockroachdb-default-pool-95fde00d-173d Ready 56m v1.20.9-gke.1001 + gke-cockroachdb-default-pool-95fde00d-hw04 Ready 56m v1.20.9-gke.1001 + gke-cockroachdb-default-pool-eb2b2889-q15v Ready 56m v1.20.9-gke.1001 + gke-cockroachdb-default-pool-eb2b2889-q704 Ready 56m v1.20.9-gke.1001 + ~~~ + +1. Add a taint to a running worker node: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl taint nodes gke-cockroachdb-default-pool-4e5ce539-j1h1 test=example:NoExecute + ~~~ + ~~~ shell + node/gke-cockroachdb-default-pool-4e5ce539-j1h1 tainted + ~~~ + +1. Add a matching tolerations object in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster). + + ~~~ yaml + cockroachdb: + crdbCluster: + tolerations: + - key: "test" + operator: "Exists" + effect: "NoExecute" + ~~~ + + Because no tolerationSeconds is specified, CockroachDB will be evicted immediately from the tainted worker node. + +1. Apply the new settings to the cluster: + + {% include_cached copy-clipboard.html %} + ~~~ shell + helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ~~~ + +1. The CockroachDB pod running on the tainted node (in this case, cockroachdb-2) will be evicted and started on a different worker node. To observe this: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods -o wide + ~~~ + ~~~ shell + NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES + cockroach-operator-c9fc6cb5c-bl6rs 1/1 Running 0 44m 10.32.2.4 gke-cockroachdb-default-pool-4e5ce539-68p5 + cockroachdb-0 1/1 Running 0 9m21s 10.32.4.10 gke-cockroachdb-default-pool-95fde00d-173d + cockroachdb-1 1/1 Running 0 9m21s 10.32.2.6 gke-cockroachdb-default-pool-eb2b2889-q15v + cockroachdb-2 0/1 Running 0 6s 10.32.0.5 gke-cockroachdb-default-pool-4e5ce539-68p5 + ~~~ + + `cockroachdb-2` is now scheduled onto the `gke-cockroachdb-default-pool-4e5ce539-68p5` node. + +## Topology spread constraints + +A pod with a *topology spread constraint* must satisfy its conditions when being deployed to a given topology. This is used to control the degree to which pods are unevenly distributed across failure domains. + +### Add a topology spread constraint + +Specify pod topology spread constraints in the `cockroachdb.crdbCluster.topologySpreadConstraints` object of the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster). If you specify multiple `topologySpreadConstraints` objects, the matching pods must satisfy all of the constraints. + +The following topology spread constraint ensures that CockroachDB pods deployed with the label `environment=production` will not be unevenly distributed across zones by more than `1` pod: + +~~~ yaml +cockroachdb: + crdbCluster: + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: topology.kubernetes.io/zone + whenUnsatisfiable: DoNotSchedule + labelSelector: + matchLabels: + environment: production +~~~ + +The `DoNotSchedule` condition prevents labeled pods from being scheduled onto Kubernetes worker nodes when doing so would fail to meet the spread and topology constraints specified with `maxSkew` and `topologyKey`, respectively. + +For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/). The [custom resource definition](https://github.com/cockroachdb/helm-charts/blob/master/cockroachdb-parent/charts/cockroachdb/values.yaml) details the fields supported by the operator. + +## Resource labels and annotations + +To assist in working with your cluster, you can add labels and annotations to your resources. + +Specify labels in `cockroachdb.crdbCluster.podLabels` and annotations in `cockroachdb.crdbCluster.podAnnotations` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster): + +~~~ yaml +cockroachdb: + crdbCluster: + podLabels: + app.kubernetes.io/version: v25.1.4 + podAnnotations + operator: https://raw.githubusercontent.com/cockroachdb/helm-charts/refs/heads/master/cockroachdb-parent/charts/cockroachdb/values.yaml +~~~ + +To verify that the labels and annotations were applied to a pod, for example, run `kubectl describe pod {pod-name}`. + +For more information about [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/) and [annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/), see the Kubernetes documentation. diff --git a/src/current/v25.3/secure-cockroachdb-kubernetes.md b/src/current/v25.3/secure-cockroachdb-kubernetes.md index 6fcf7b7fee8..fc14a375b08 100644 --- a/src/current/v25.3/secure-cockroachdb-kubernetes.md +++ b/src/current/v25.3/secure-cockroachdb-kubernetes.md @@ -8,23 +8,27 @@ docs_area: deploy --- {{site.data.alerts.callout_info}} -This article assumes you have already [deployed CockroachDB securely on a single Kubernetes cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}) using the Operator or Helm. However, it's possible to configure these settings before starting CockroachDB on Kubernetes. +This article assumes you have already [deployed CockroachDB securely on a single Kubernetes cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}) using the {{ site.data.products.public-operator }} or Helm. However, it's possible to configure these settings before starting CockroachDB on Kubernetes. {{site.data.alerts.end}} -By default, self-signed certificates are used when using the Operator or Helm to securely [deploy CockroachDB on Kubernetes]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}). However, the recommended approach is to use `cert-manager` for certificate management. For details, refer to [Deploy cert-manager for mTLS](?filters=helm#deploy-cert-manager-for-mtls). +By default, self-signed certificates are used when using the {{ site.data.products.public-operator }} or Helm to securely [deploy CockroachDB on Kubernetes]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}). However, the recommended approach is to use `cert-manager` for certificate management. For details, refer to [Deploy cert-manager for mTLS](?filters=helm#deploy-cert-manager-for-mtls). + +This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Certificate Management with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/secure-cockroachdb-operator.md %}). + +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} This page explains how to: -- Authenticate an Operator or Helm deployment using a [custom CA](#use-a-custom-ca) +- Authenticate a {{ site.data.products.public-operator }} or Helm deployment using a [custom CA](#use-a-custom-ca) - [Rotate security certificates](#rotate-security-certificates) -- [Secure the webhooks](#secure-the-webhooks) (Operator) +- [Secure the webhooks](#secure-the-webhooks) (public perator) {{site.data.alerts.callout_danger}} If you are running a secure Helm deployment on Kubernetes 1.22 and later, you must migrate away from using the Kubernetes CA for cluster authentication. The recommended approach is to use `cert-manager` for certificate management. For details, refer to [Deploy cert-manager for mTLS](?filters=helm#deploy-cert-manager-for-mtls). {{site.data.alerts.end}}
- +
@@ -33,9 +37,9 @@ If you are running a secure Helm deployment on Kubernetes 1.22 and later, you mu ## Use a custom CA
-By default, the Operator will generate and sign 1 client and 1 node certificate to secure the cluster. +By default, the {{ site.data.products.public-operator }} will generate and sign 1 client and 1 node certificate to secure the cluster. -To use your own certificate authority instead, add the following to the Operator's custom resource **before** [initializing the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +To use your own certificate authority instead, add the following to the {{ site.data.products.public-operator }}'s custom resource **before** [initializing the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): {% include_cached copy-clipboard.html %} ~~~ yaml @@ -50,7 +54,7 @@ Replace: - `{client_secret_name}`: the name of the Kubernetes secret that contains the generated node certificate and key. {{site.data.alerts.callout_info}} -Currently, the Operator requires that the client and node secrets each contain the filenames `tls.crt` and `tls.key`. +Currently, the {{ site.data.products.public-operator }} requires that the client and node secrets each contain the filenames `tls.crt` and `tls.key`. {{site.data.alerts.end}} {% include {{ page.version.version }}/orchestration/apply-custom-resource.md %} @@ -120,7 +124,7 @@ Complete the following steps **before** [initializing the cluster]({% link {{ pa --ca-key=my-safe-directory/ca.key ~~~ -1. Upload the client certificate and key to the Kubernetes cluster as a secret, renaming them to the filenames required by the Operator: +1. Upload the client certificate and key to the Kubernetes cluster as a secret, renaming them to the filenames required by the {{ site.data.products.public-operator }}: {% include_cached copy-clipboard.html %} ~~~ shell @@ -134,7 +138,7 @@ Complete the following steps **before** [initializing the cluster]({% link {{ pa secret/cockroachdb.client.root created ~~~ -1. Create the certificate and key pair for your CockroachDB nodes, specifying the namespace you used when [deploying the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). This example uses the Operator's default namespace (`cockroach-operator-system`): +1. Create the certificate and key pair for your CockroachDB nodes, specifying the namespace you used when [deploying the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). This example uses the {{ site.data.products.public-operator }}'s default namespace (`cockroach-operator-system`): {% include_cached copy-clipboard.html %} ~~~ shell @@ -150,7 +154,7 @@ Complete the following steps **before** [initializing the cluster]({% link {{ pa --ca-key=my-safe-directory/ca.key ~~~ -1. Upload the node certificate and key to the Kubernetes cluster as a secret, renaming them to the filenames required by the Operator: +1. Upload the node certificate and key to the Kubernetes cluster as a secret, renaming them to the filenames required by the {{ site.data.products.public-operator }}: {% include_cached copy-clipboard.html %} ~~~ shell @@ -178,7 +182,7 @@ Complete the following steps **before** [initializing the cluster]({% link {{ pa default-token-6js7b kubernetes.io/service-account-token 3 9h ~~~ -1. Add `nodeTLSSecret` and `clientTLSSecret` to the Operator's [custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster), specifying the generated secret names: +1. Add `nodeTLSSecret` and `clientTLSSecret` to the {{ site.data.products.public-operator }}'s [custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster), specifying the generated secret names: ~~~ yaml spec: @@ -318,7 +322,7 @@ If you previously [authenticated with `cockroach cert`](#example-authenticate-wi --overwrite ~~~ -1. Upload the new client certificate and key to the Kubernetes cluster as a **new** secret, renaming them to the filenames required by the Operator: +1. Upload the new client certificate and key to the Kubernetes cluster as a **new** secret, renaming them to the filenames required by the {{ site.data.products.public-operator }}: {% include_cached copy-clipboard.html %} ~~~ shell @@ -332,7 +336,7 @@ If you previously [authenticated with `cockroach cert`](#example-authenticate-wi secret/cockroachdb.client.root.2 created ~~~ -1. Create a new certificate and key pair for your CockroachDB nodes, overwriting the previous certificate and key. Specify the namespace you used when [deploying the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). This example uses the Operator's default namespace (`cockroach-operator-system`): +1. Create a new certificate and key pair for your CockroachDB nodes, overwriting the previous certificate and key. Specify the namespace you used when [deploying the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). This example uses the {{ site.data.products.public-operator }}'s default namespace (`cockroach-operator-system`): {% include_cached copy-clipboard.html %} ~~~ shell @@ -349,7 +353,7 @@ If you previously [authenticated with `cockroach cert`](#example-authenticate-wi --overwrite ~~~ -1. Upload the new node certificate and key to the Kubernetes cluster as a **new** secret, renaming them to the filenames required by the Operator: +1. Upload the new node certificate and key to the Kubernetes cluster as a **new** secret, renaming them to the filenames required by the {{ site.data.products.public-operator }}: {% include_cached copy-clipboard.html %} ~~~ shell @@ -363,7 +367,7 @@ If you previously [authenticated with `cockroach cert`](#example-authenticate-wi secret/cockroachdb.node.2 created ~~~ -1. Add `nodeTLSSecret` and `clientTLSSecret` to the Operator's [custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster), specifying the new secret names: +1. Add `nodeTLSSecret` and `clientTLSSecret` to the {{ site.data.products.public-operator }}'s [custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster), specifying the new secret names: ~~~ yaml spec: @@ -386,7 +390,7 @@ If you previously [authenticated with `cockroach cert`](#example-authenticate-wi ~~~ {{site.data.alerts.callout_info}} - Remember that `nodeTLSSecret` and `clientTLSSecret` in the Operator's [custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster) must specify these secret names. For details, see [Use a custom CA](#use-a-custom-ca). + Remember that `nodeTLSSecret` and `clientTLSSecret` in the {{ site.data.products.public-operator }}'s [custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster) must specify these secret names. For details, see [Use a custom CA](#use-a-custom-ca). {{site.data.alerts.end}} 1. Apply the new settings to the cluster: @@ -563,11 +567,11 @@ Previously, the Helm chart used a self-signer for cluster authentication. This a ## Secure the webhooks -The Operator ships with both [mutating](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#mutatingadmissionwebhook) and [validating](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#validatingadmissionwebhook) webhooks. Communication between the Kubernetes API server and the webhook service must be secured with TLS. +The {{ site.data.products.public-operator }} ships with both [mutating](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#mutatingadmissionwebhook) and [validating](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#validatingadmissionwebhook) webhooks. Communication between the Kubernetes API server and the webhook service must be secured with TLS. -By default, the Operator searches for the TLS secret `cockroach-operator-webhook-ca`, which contains a CA certificate. If the secret is not found, the Operator auto-generates `cockroach-operator-webhook-ca` with a CA certificate for future runs. +By default, the {{ site.data.products.public-operator }} searches for the TLS secret `cockroach-operator-webhook-ca`, which contains a CA certificate. If the secret is not found, the {{ site.data.products.public-operator }} auto-generates `cockroach-operator-webhook-ca` with a CA certificate for future runs. -The Operator then generates a one-time server certificate for the webhook server that is signed with `cockroach-operator-webhook-ca`. Finally, the CA bundle for both mutating and validating webhook configurations is patched with the CA certificate. +The {{ site.data.products.public-operator }} then generates a one-time server certificate for the webhook server that is signed with `cockroach-operator-webhook-ca`. Finally, the CA bundle for both mutating and validating webhook configurations is patched with the CA certificate. You can also use your own certificate authority rather than `cockroach-operator-webhook-ca`. Both the certificate and key files you generate must be PEM-encoded. See the following [example](#example-using-openssl-to-secure-the-webhooks). @@ -607,7 +611,7 @@ These steps demonstrate how to use the [`openssl genrsa`](https://www.openssl.or rm tls.crt tls.key ~~~ -1. Roll the Operator deployment to ensure a new server certificate is generated: +1. Roll the {{ site.data.products.public-operator }} deployment to ensure a new server certificate is generated: {% include_cached copy-clipboard.html %} ~~~ shell diff --git a/src/current/v25.3/secure-cockroachdb-operator.md b/src/current/v25.3/secure-cockroachdb-operator.md new file mode 100644 index 00000000000..f88415e4367 --- /dev/null +++ b/src/current/v25.3/secure-cockroachdb-operator.md @@ -0,0 +1,201 @@ +--- +title: Certificate Management with the CockroachDB Operator +summary: How to authenticate a secure CockroachDB cluster deployed with the CockroachDB operator. +toc: true +toc_not_nested: true +secure: true +docs_area: deploy +--- + +This page describes steps for additional procedures related to certificate management. + +## Rotate security certificates + +You may need to rotate the node, client, or CA certificates in the following scenarios: + +- The node, client, or CA certificates are expiring soon. +- Your organization's compliance policy requires periodic certificate rotation. +- The key (for a node, client, or CA) is compromised. +- You need to modify the contents of a certificate, for example, to add another DNS name or the IP address of a load balancer through which a node can be reached. In this case, you would need to rotate only the node certificates. + +### Example: Rotate certificates signed with `cockroach cert` + +If you previously [authenticated with cockroach cert]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster), follow these steps to rotate the certificates using the same CA: + +1. Create a new client certificate and key pair for the root user, overwriting the previous certificate and key: + + {% include_cached copy-clipboard.html %} + ~~~ shell + cockroach cert create-client root \ + --certs-dir=certs \ + --ca-key=my-safe-directory/ca.key \ + --overwrite + ~~~ + +1. Upload the new client certificate and key to the Kubernetes cluster as a **new** secret, renaming them to the filenames required by the {{ site.data.products.cockroachdb-operator }}: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl create secret generic cockroachdb.client.root.2 \ + --from-file=tls.key=certs/client.root.key \ + --from-file=tls.crt=certs/client.root.crt \ + --from-file=ca.crt=certs/ca.crt + ~~~ + ~~~ shell + secret/cockroachdb.client.root.2 created + ~~~ + +1. Create a new certificate and key pair for your CockroachDB nodes, overwriting the previous certificate and key. Specify the namespace you used when [deploying the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster). This example uses the `cockroach-ns` namespace: + + {% include_cached copy-clipboard.html %} + ~~~ shell + cockroach cert create-node localhost \ + 127.0.0.1 \ + cockroachdb-public \ + cockroachdb-public.cockroach-ns \ + cockroachdb-public.cockroach-ns.svc.cluster.local \ + *.cockroachdb \ + *.cockroachdb.cockroach-ns \ + *.cockroachdb.cockroach-ns.svc.cluster.local \ + --certs-dir=certs \ + --ca-key=my-safe-directory/ca.key \ + --overwrite + ~~~ + +1. Upload the new node certificate and key to the Kubernetes cluster as a **new** secret, renaming them to the filenames required by the {{ site.data.products.cockroachdb-operator }}: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl create secret generic cockroachdb.node.2 \ + --from-file=tls.key=certs/node.key \ + --from-file=tls.crt=certs/node.crt \ + --from-file=ca.crt=certs/ca.crt + ~~~ + ~~~ shell + secret/cockroachdb.node.2 created + ~~~ + +1. Add `cockroachdb.tls.externalCertificates.certificates.nodeClientSecretName` and `cockroachdb.tls.externalCertificates.certificates.nodeSecretName` to the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster): + + ~~~ yaml + cockroachdb: + tls: + externalCertificates: + enabled: true + certificates: + nodeClientSecretName: "cockroachdb.client.root.2" + nodeSecretName: "cockroachdb.node.2" + ~~~ + +1. Check that the secrets were created on the cluster: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get secrets + ~~~ + ~~~ shell + NAME TYPE DATA AGE + cockroachdb.client.root.2 Opaque 3 4s + cockroachdb.node.2 Opaque 3 1s + default-token-6js7b kubernetes.io/service-account-token 3 9h + ~~~ + + {{site.data.alerts.callout_info}} + Remember that `nodeSecretName` and `nodeClientSecretName` in the operator configuration must specify these secret names. For details, see the [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster). + {{site.data.alerts.end}} + +1. Apply the new settings to the cluster: + + {% include_cached copy-clipboard.html %} + ~~~ shell + helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ~~~ + + The pods will terminate and restart one at a time, using the new certificates. You can observe this process: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods + ~~~ + ~~~ shell + NAME READY STATUS RESTARTS AGE + cockroach-operator-655fbf7847-lvz6x 1/1 Running 0 4h29m + cockroachdb-0 1/1 Running 0 4h16m + cockroachdb-1 1/1 Terminating 0 4h16m + cockroachdb-2 1/1 Running 0 43s + ~~~ + +1. Delete the existing client secret that is no longer in use: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete secret cockroachdb.client.root + ~~~ + ~~~ shell + secret "cockroachdb.client.root" deleted + ~~~ + +1. Delete the existing node secret that is no longer in use: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete secret cockroachdb.node + ~~~ + ~~~ shell + secret "cockroachdb.node" deleted + ~~~ + +## Secure the webhooks + +The operator ships with both [mutating](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#mutatingadmissionwebhook) and [validating](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#validatingadmissionwebhook) webhooks. Communication between the Kubernetes API server and the webhook service must be secured with TLS. + +By default, the {{ site.data.products.cockroachdb-operator }} searches for the TLS secret `cockroach-operator-certs`, which contains a CA certificate. If the secret is not found, the operator auto-generates `cockroach-operator-certs` with a CA certificate for future runs. + +The operator then generates a one-time server certificate for the webhook server that is signed with `cockroach-operator-certs`. Finally, the CA bundle for both mutating and validating webhook configurations is patched with the CA certificate. + +You can also use your own certificate authority rather than `cockroach-operator-certs`. Both the certificate and key files you generate must be PEM-encoded. See the following [example](#example-using-openssl-to-secure-the-webhooks). + +### Example: Using OpenSSL to secure the webhooks + +These steps demonstrate how to use the [openssl genrsa](https://www.openssl.org/docs/manmaster/man1/genrsa.html) and [openssl req](https://www.openssl.org/docs/manmaster/man1/req.html) subcommands to secure the webhooks on a running Kubernetes cluster: + +1. Generate a 4096-bit RSA private key: + + {% include_cached copy-clipboard.html %} + ~~~ shell + openssl genrsa -out tls.key 4096 + ~~~ + +1. Generate an X.509 certificate, valid for 10 years. You will be prompted for the certificate field values. + + {% include_cached copy-clipboard.html %} + ~~~ shell + openssl req -x509 -new -nodes -key tls.key -sha256 -days 3650 -out tls.crt + ~~~ + +1. Create the secret, making sure that you are in the correct namespace: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl create secret tls cockroach-operator-certs --cert=tls.crt --key=tls.key + ~~~ + ~~~ shell + secret/cockroach-operator-certs created + ~~~ + +1. Remove the certificate and key from your local environment: + + {% include_cached copy-clipboard.html %} + ~~~ shell + rm tls.crt tls.key + ~~~ + +1. Roll the operator deployment to ensure a new server certificate is generated: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl rollout restart deploy/cockroach-operator-manager + ~~~ + ~~~ shell + deployment.apps/cockroach-operator-manager restarted + ~~~ diff --git a/src/current/v25.3/simulate-a-multi-region-cluster-on-localhost.md b/src/current/v25.3/simulate-a-multi-region-cluster-on-localhost.md index 3ddca542d37..59450587dba 100644 --- a/src/current/v25.3/simulate-a-multi-region-cluster-on-localhost.md +++ b/src/current/v25.3/simulate-a-multi-region-cluster-on-localhost.md @@ -8,7 +8,7 @@ docs_area: deploy Once you've [installed CockroachDB]({% link {{ page.version.version }}/install-cockroachdb.md %}), you can simulate multi-region cluster on your local machine using [`cockroach demo`]({% link {{ page.version.version }}/cockroach-demo.md %})to learn about CockroachDB's [multi-region abstractions]({% link {{ page.version.version }}/multiregion-overview.md %}). {{site.data.alerts.callout_info}} -[`cockroach demo`]({% link {{ page.version.version }}/cockroach-demo.md %}) is not suitable for production deployments. Additionally, simulating multiple geographically distributed nodes on a single host is not representative of the [performance you should expect]({% link {{ page.version.version }}/frequently-asked-questions.md %}#single-row-perf) in a production deployment. To learn more about production multi-region deployments, refer to [Orchestrate CockroachDB Across Multiple Kubernetes Clusters]({% link {{ page.version.version }}/orchestrate-cockroachdb-with-kubernetes-multi-cluster.md %}) and [Deploy a Global, Serverless Application]({% link {{ page.version.version }}/movr-flask-deployment.md %}), and review the [Production Checklist](recommended-production-settings.html). +[`cockroach demo`]({% link {{ page.version.version }}/cockroach-demo.md %}) is not suitable for production deployments. Additionally, simulating multiple geographically distributed nodes on a single host is not representative of the [performance you should expect]({% link {{ page.version.version }}/frequently-asked-questions.md %}#single-row-perf) in a production deployment. To learn more about production multi-region deployments, refer to [Orchestrate CockroachDB Across Multiple Kubernetes Clusters]({% link {{ page.version.version }}/orchestrate-cockroachdb-with-kubernetes-multi-cluster.md %}) and [Deploy a Global, Serverless Application]({% link {{ page.version.version }}/movr-flask-deployment.md %}), and review the [Production Checklist]({% link {{ page.version.version }}/recommended-production-settings.md %}). {{site.data.alerts.end}} ## Before you begin diff --git a/src/current/v25.3/upgrade-cockroachdb-kubernetes.md b/src/current/v25.3/upgrade-cockroachdb-kubernetes.md index 69116e7bc8b..dabf930eef0 100644 --- a/src/current/v25.3/upgrade-cockroachdb-kubernetes.md +++ b/src/current/v25.3/upgrade-cockroachdb-kubernetes.md @@ -9,6 +9,10 @@ docs_area: deploy This page shows how to upgrade a CockroachDB cluster that is [deployed on a Kubernetes cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}). +This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Upgrade a Cluster in Kubernetes with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/upgrade-cockroachdb-operator.md %}). + +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} + ## Overview {% include common/upgrade/overview.md %} @@ -18,8 +22,8 @@ On Kubernetes, the upgrade is a [staged update](https://kubernetes.io/docs/tutor Select the cluster's deployment method to continue.
- - + +
diff --git a/src/current/v25.3/upgrade-cockroachdb-operator.md b/src/current/v25.3/upgrade-cockroachdb-operator.md new file mode 100644 index 00000000000..07497950a2e --- /dev/null +++ b/src/current/v25.3/upgrade-cockroachdb-operator.md @@ -0,0 +1,149 @@ +--- +title: Upgrade a cluster in Kubernetes with the CockroachDB Operator +summary: How to upgrade a secure CockroachDB cluster deployed with the CockroachDB operator. +toc: true +toc_not_nested: true +secure: true +docs_area: deploy +--- + +This page describes how to upgrade a CockroachDB cluster that is [deployed on a Kubernetes cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}) with the {{ site.data.products.cockroachdb-operator }}. + +{{site.data.alerts.callout_info}} +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +{{site.data.alerts.end}} + +## Overview + +{% include common/upgrade/overview.md %} + +On Kubernetes, the upgrade is a staged update in which each pod's container image for CockroachDB is updated in a rolling fashion. The cluster remains available during the upgrade. + +## Before you begin + +{% include {{ page.version.version }}/orchestration/operator-check-namespace.md %} +{% include common/upgrade/prepare-to-upgrade-self-hosted.md %} + +### Ensure you have a valid license key + +{% include common/upgrade-cockroach-version-license-limitations.md %} + +## Perform a patch upgrade + +To upgrade from one patch release to another within the same major version, perform the following steps on one node at a time: + +1. Change the container image in the custom resource: + + ~~~ yaml + cockroachdb: + crdbCluster: + image: + name: cockroachdb/cockroach:v25.3.2 + ~~~ + +1. Apply the new settings to the cluster: + + {% include_cached copy-clipboard.html %} + ~~~ shell + helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ~~~ + + The operator will perform the staged update. + +1. To check the status of the rolling upgrade, run `kubectl get pods`. + +1. Verify that all pods have been upgraded: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods \ + -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.containers[0].image}{"\n"}' + ~~~ + +You can also check the CockroachDB version of each node in the [DB Console]({% link {{ page.version.version }}/ui-cluster-overview-page.md %}#node-details). + +### Roll back a patch upgrade + +{% include_cached common/upgrade/patch-rollback-kubernetes.md %} + +## Perform a major-version upgrade + +To perform a major upgrade: + +1. Change the container image in the values file: + + ~~~ yaml + cockroachdb: + crdbCluster: + image: + name: cockroachdb/cockroach:v25.1.4 + ~~~ + +1. Apply the new settings to the cluster: + + {% include_cached copy-clipboard.html %} + ~~~ shell + helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ~~~ + + The operator will perform the staged update. + +1. To check the status of the rolling upgrade, run `kubectl get pods`. + +1. Verify that all pods have been upgraded: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods \ + -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.containers[0].image}{"\n"}' + ~~~ + +1. If auto-finalization is enabled (the default), finalization begins as soon as the last node rejoins the cluster. When finalization finishes, the upgrade is complete. + +1. If auto-finalization is disabled, follow your organization's testing procedures to decide whether to [finalize the upgrade](#finalize-a-major-version-upgrade-manually) or [roll back](#roll-back-a-major-version-upgrade) the upgrade. After finalization begins, you can no longer roll back to the cluster's previous major version. + +### Finalize a major-version upgrade manually + +{% include common/upgrade/finalize-kubernetes.md %} + +### Roll back a major-version upgrade + +To roll back to the previous major version before an upgrade is finalized: + +1. Change the container image in the custom resource to use the previous major version: + + ~~~ yaml + cockroachdb: + crdbCluster: + image: + name: cockroachdb/cockroach:v24.3 + ~~~ + +1. Apply the new settings to the cluster: + + {% include_cached copy-clipboard.html %} + ~~~ shell + helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ~~~ + + The operator will perform the staged rollback. + +1. To check the status of the rollback, run `kubectl get pods`. + +1. Verify that all pods have been rolled back: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods \ + -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.containers[0].image}{"\n"}' + ~~~ + +Rollbacks do not require finalization. + +## Disable auto-finalization + +{% include common/upgrade/disable-auto-finalization.md %} + +## Troubleshooting + +{% include common/upgrade/troubleshooting-self-hosted.md %} From 6aac6c5548b7f23e55f815ad8549ca57cb398393 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Thu, 7 Aug 2025 17:22:01 -0400 Subject: [PATCH 2/3] Fix unnecessary hard-coded version --- .../_includes/v25.2/cockroachdb-operator-recommendation.md | 4 ++-- .../_includes/v25.3/cockroachdb-operator-recommendation.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/current/_includes/v25.2/cockroachdb-operator-recommendation.md b/src/current/_includes/v25.2/cockroachdb-operator-recommendation.md index 167c3e3ef05..bfb0d5efb4b 100644 --- a/src/current/_includes/v25.2/cockroachdb-operator-recommendation.md +++ b/src/current/_includes/v25.2/cockroachdb-operator-recommendation.md @@ -1,8 +1,8 @@ {% if page.name == "kubernetes-operator.md" %} {{ site.data.alerts.callout_success }} -The {{ site.data.products.cockroachdb-operator }} is a fully-featured Kubernetes operator that is designed for ease of deployment and scaling of both single-region and multi-region clusters. To learn more, read the [{{ site.data.products.cockroachdb-operator }} documentation]({% link v25.2/cockroachdb-operator-overview.md %}). +The {{ site.data.products.cockroachdb-operator }} is a fully-featured Kubernetes operator that is designed for ease of deployment and scaling of both single-region and multi-region clusters. To learn more, read the [{{ site.data.products.cockroachdb-operator }} documentation]({% link {{ site.versions["stable"] }}/cockroachdb-operator-overview.md %}). -Cockroach Labs recommends that new deployments of CockroachDB on Kubernetes use the {{ site.data.products.cockroachdb-operator }}. To migrate an existing deployment to use the {{ site.data.products.cockroachdb-operator }}, read the [Helm]({% link v25.2/migrate-cockroachdb-kubernetes-helm.md %}) and [{{ site.data.products.public-operator }}]({% link v25.2/migrate-cockroachdb-kubernetes-operator.md %}) migration guides. +Cockroach Labs recommends that new deployments of CockroachDB on Kubernetes use the {{ site.data.products.cockroachdb-operator }}. To migrate an existing deployment to use the {{ site.data.products.cockroachdb-operator }}, read the [Helm]({% link {{ site.versions["stable"] }}/migrate-cockroachdb-kubernetes-helm.md %}) and [{{ site.data.products.public-operator }}]({% link {{ site.versions["stable"] }}/migrate-cockroachdb-kubernetes-operator.md %}) migration guides. {{ site.data.alerts.end }} {% else %} {{ site.data.alerts.callout_success }} diff --git a/src/current/_includes/v25.3/cockroachdb-operator-recommendation.md b/src/current/_includes/v25.3/cockroachdb-operator-recommendation.md index 59ec152a77a..1b37c88d6da 100644 --- a/src/current/_includes/v25.3/cockroachdb-operator-recommendation.md +++ b/src/current/_includes/v25.3/cockroachdb-operator-recommendation.md @@ -1,8 +1,8 @@ {% if page.name == "kubernetes-operator.md" %} {{ site.data.alerts.callout_success }} -The {{ site.data.products.cockroachdb-operator }} is a fully-featured Kubernetes operator that is designed for ease of deployment and scaling of multi-region clusters. To learn more, read the [{{ site.data.products.cockroachdb-operator }} documentation]({% link v25.2/cockroachdb-operator-overview.md %}). +The {{ site.data.products.cockroachdb-operator }} is a fully-featured Kubernetes operator that is designed for ease of deployment and scaling of multi-region clusters. To learn more, read the [{{ site.data.products.cockroachdb-operator }} documentation]({% link {{ site.versions["stable"] }}/cockroachdb-operator-overview.md %}). -New deployments of CockroachDB on Kubernetes are recommended to use the {{ site.data.products.cockroachdb-operator }}. To migrate an existing deployment to use the {{ site.data.products.cockroachdb-operator }}, read the [Helm]({% link v25.2/migrate-cockroachdb-kubernetes-helm.md %}) and [{{ site.data.products.public-operator }}]({% link v25.2/migrate-cockroachdb-kubernetes-operator.md %}) migration guides. +New deployments of CockroachDB on Kubernetes are recommended to use the {{ site.data.products.cockroachdb-operator }}. To migrate an existing deployment to use the {{ site.data.products.cockroachdb-operator }}, read the [Helm]({% link {{ site.versions["stable"] }}/migrate-cockroachdb-kubernetes-helm.md %}) and [{{ site.data.products.public-operator }}]({% link {{ site.versions["stable"] }}/migrate-cockroachdb-kubernetes-operator.md %}) migration guides. {{ site.data.alerts.end }} {% else %} {{ site.data.alerts.callout_success }} From 0823e0ebd44c886b8c142cc9a86ee1fb304da7dc Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Thu, 7 Aug 2025 17:47:16 -0400 Subject: [PATCH 3/3] Correct too-long frontmatter --- src/current/v25.2/cockroachdb-operator-performance.md | 2 +- src/current/v25.3/cockroachdb-operator-performance.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/current/v25.2/cockroachdb-operator-performance.md b/src/current/v25.2/cockroachdb-operator-performance.md index 98d6abb9077..309b0b44024 100644 --- a/src/current/v25.2/cockroachdb-operator-performance.md +++ b/src/current/v25.2/cockroachdb-operator-performance.md @@ -1,6 +1,6 @@ --- title: Performance with the CockroachDB Operator -summary: How running CockroachDB in Kubernetes affects its performance and how to get the best possible performance when running in Kubernetes using the CockroachDB operator. +summary: How running CockroachDB in Kubernetes affects its performance and how to get the best possible performance when using the CockroachDB operator. toc: true docs_area: deploy --- diff --git a/src/current/v25.3/cockroachdb-operator-performance.md b/src/current/v25.3/cockroachdb-operator-performance.md index 98d6abb9077..309b0b44024 100644 --- a/src/current/v25.3/cockroachdb-operator-performance.md +++ b/src/current/v25.3/cockroachdb-operator-performance.md @@ -1,6 +1,6 @@ --- title: Performance with the CockroachDB Operator -summary: How running CockroachDB in Kubernetes affects its performance and how to get the best possible performance when running in Kubernetes using the CockroachDB operator. +summary: How running CockroachDB in Kubernetes affects its performance and how to get the best possible performance when using the CockroachDB operator. toc: true docs_area: deploy ---