diff --git a/deployment/live/monitoring/ci/.terraform.lock.hcl b/deployment/live/monitoring/ci/.terraform.lock.hcl new file mode 100644 index 0000000..17311c3 --- /dev/null +++ b/deployment/live/monitoring/ci/.terraform.lock.hcl @@ -0,0 +1,22 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/google" { + version = "5.14.0" + constraints = "5.14.0" + hashes = [ + "h1:+2NBnhyLVoyv3smDe+ujn31Iw/+NNqioP64P9W8nX3o=", + "zh:3927ef7417d9d8a56077e6655d76c99f4175f9746e39226a00ee0555f8c63f8f", + "zh:4b4f521f0779a1797047a8c531afda093aade934b4a49c080fe8d38680b3a52f", + "zh:7e880c5b72684fc8342e03180a1fbbec65c6afeb70511b9c16181d5e168269e6", + "zh:81a7f2efc30e698f476d3e240ee2d82f14eda374852059429fe808ad77b6addd", + "zh:826d4ea55b4afceefb332646f21c6b6dc590b39b16e8d9b5d4a4211beb91dc5e", + "zh:865600ef669fcdd4ae77515c3fd12565fab0f2a263fa2a6dae562f6fe68ed093", + "zh:8e933d1d10fd316e62340175667264f093e4d24457b63d5adf3c424cce22b495", + "zh:bf261924f7350074a355e5b9337f3a8054efb20d316e9085f2b5766dfb5126c4", + "zh:e28e67dcbd4bbd82798561baf86d3dd04f97e08bbf523dfb9f355564ef27d3d6", + "zh:f33cdd3117af8a15f33d375dbe398a5e558730cf6a7a145a479ab68e77572c12", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + "zh:f913a0e0708391ccd26fc3458158cc1e10d68dc621bef3a1583328c61a77225d", + ] +} diff --git a/deployment/live/monitoring/ci/terragrunt.hcl b/deployment/live/monitoring/ci/terragrunt.hcl new file mode 100644 index 0000000..ade78bf --- /dev/null +++ b/deployment/live/monitoring/ci/terragrunt.hcl @@ -0,0 +1,19 @@ +include { + path = find_in_parent_folders() +} + +terraform { + source = "${get_path_to_repo_root()}/deployment/modules/monitoring" +} + +locals { + common_vars = read_terragrunt_config(find_in_parent_folders()) +} + +inputs = merge( + local.common_vars.locals, + { + env = "ci" + } +) + diff --git a/deployment/live/monitoring/dev/.terraform.lock.hcl b/deployment/live/monitoring/dev/.terraform.lock.hcl new file mode 100644 index 0000000..17311c3 --- /dev/null +++ b/deployment/live/monitoring/dev/.terraform.lock.hcl @@ -0,0 +1,22 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/google" { + version = "5.14.0" + constraints = "5.14.0" + hashes = [ + "h1:+2NBnhyLVoyv3smDe+ujn31Iw/+NNqioP64P9W8nX3o=", + "zh:3927ef7417d9d8a56077e6655d76c99f4175f9746e39226a00ee0555f8c63f8f", + "zh:4b4f521f0779a1797047a8c531afda093aade934b4a49c080fe8d38680b3a52f", + "zh:7e880c5b72684fc8342e03180a1fbbec65c6afeb70511b9c16181d5e168269e6", + "zh:81a7f2efc30e698f476d3e240ee2d82f14eda374852059429fe808ad77b6addd", + "zh:826d4ea55b4afceefb332646f21c6b6dc590b39b16e8d9b5d4a4211beb91dc5e", + "zh:865600ef669fcdd4ae77515c3fd12565fab0f2a263fa2a6dae562f6fe68ed093", + "zh:8e933d1d10fd316e62340175667264f093e4d24457b63d5adf3c424cce22b495", + "zh:bf261924f7350074a355e5b9337f3a8054efb20d316e9085f2b5766dfb5126c4", + "zh:e28e67dcbd4bbd82798561baf86d3dd04f97e08bbf523dfb9f355564ef27d3d6", + "zh:f33cdd3117af8a15f33d375dbe398a5e558730cf6a7a145a479ab68e77572c12", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + "zh:f913a0e0708391ccd26fc3458158cc1e10d68dc621bef3a1583328c61a77225d", + ] +} diff --git a/deployment/live/monitoring/dev/terragrunt.hcl b/deployment/live/monitoring/dev/terragrunt.hcl new file mode 100644 index 0000000..89aa94f --- /dev/null +++ b/deployment/live/monitoring/dev/terragrunt.hcl @@ -0,0 +1,19 @@ +include { + path = find_in_parent_folders() +} + +terraform { + source = "${get_path_to_repo_root()}/deployment/modules/monitoring" +} + +locals { + common_vars = read_terragrunt_config(find_in_parent_folders()) +} + +inputs = merge( + local.common_vars.locals, + { + env = "dev" + } +) + diff --git a/deployment/live/monitoring/prod/.terraform.lock.hcl b/deployment/live/monitoring/prod/.terraform.lock.hcl new file mode 100644 index 0000000..17311c3 --- /dev/null +++ b/deployment/live/monitoring/prod/.terraform.lock.hcl @@ -0,0 +1,22 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/google" { + version = "5.14.0" + constraints = "5.14.0" + hashes = [ + "h1:+2NBnhyLVoyv3smDe+ujn31Iw/+NNqioP64P9W8nX3o=", + "zh:3927ef7417d9d8a56077e6655d76c99f4175f9746e39226a00ee0555f8c63f8f", + "zh:4b4f521f0779a1797047a8c531afda093aade934b4a49c080fe8d38680b3a52f", + "zh:7e880c5b72684fc8342e03180a1fbbec65c6afeb70511b9c16181d5e168269e6", + "zh:81a7f2efc30e698f476d3e240ee2d82f14eda374852059429fe808ad77b6addd", + "zh:826d4ea55b4afceefb332646f21c6b6dc590b39b16e8d9b5d4a4211beb91dc5e", + "zh:865600ef669fcdd4ae77515c3fd12565fab0f2a263fa2a6dae562f6fe68ed093", + "zh:8e933d1d10fd316e62340175667264f093e4d24457b63d5adf3c424cce22b495", + "zh:bf261924f7350074a355e5b9337f3a8054efb20d316e9085f2b5766dfb5126c4", + "zh:e28e67dcbd4bbd82798561baf86d3dd04f97e08bbf523dfb9f355564ef27d3d6", + "zh:f33cdd3117af8a15f33d375dbe398a5e558730cf6a7a145a479ab68e77572c12", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + "zh:f913a0e0708391ccd26fc3458158cc1e10d68dc621bef3a1583328c61a77225d", + ] +} diff --git a/deployment/live/monitoring/prod/terragrunt.hcl b/deployment/live/monitoring/prod/terragrunt.hcl new file mode 100644 index 0000000..ac324ba --- /dev/null +++ b/deployment/live/monitoring/prod/terragrunt.hcl @@ -0,0 +1,19 @@ +include { + path = find_in_parent_folders() +} + +terraform { + source = "${get_path_to_repo_root()}/deployment/modules/monitoring" +} + +locals { + common_vars = read_terragrunt_config(find_in_parent_folders()) +} + +inputs = merge( + local.common_vars.locals, + { + env = "prod" + } +) + diff --git a/deployment/live/monitoring/terragrunt.hcl b/deployment/live/monitoring/terragrunt.hcl new file mode 100644 index 0000000..20e881b --- /dev/null +++ b/deployment/live/monitoring/terragrunt.hcl @@ -0,0 +1,20 @@ +locals { + project_id = "checkpoint-distributor" + region = "us-central1" + env = path_relative_to_include() +} + +remote_state { + backend = "gcs" + + config = { + project = local.project_id + location = local.region + bucket = "${local.project_id}-monitoring-${local.env}-tfstate" + prefix = "${path_relative_to_include()}/terraform.tfstate" + + gcs_bucket_labels = { + name = "terraform_state_storage" + } + } +} diff --git a/deployment/modules/monitoring/main.tf b/deployment/modules/monitoring/main.tf new file mode 100644 index 0000000..23beff3 --- /dev/null +++ b/deployment/modules/monitoring/main.tf @@ -0,0 +1,91 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +# Project data +provider "google" { + project = var.project_id +} + +data "google_project" "project" { + project_id = var.project_id +} + +# This will be configured by terragrunt when deploying +terraform { + backend "gcs" {} + required_providers { + google = { + source = "hashicorp/google" + version = "5.14.0" + } + } +} + +resource "google_project_service" "monitoring_api" { + service = "monitoring.googleapis.com" + disable_on_destroy = false +} + +locals { + distributor_service = "distributor-service-${var.env}" + duration = "5m" +} + +resource "google_monitoring_alert_policy" "receiving_updates" { + display_name = "Receiving Updates (${var.env})" + combiner = "OR" + conditions { + display_name = "Requests are present (${var.env})" + condition_prometheus_query_language { + query = <<-EOT +sum( + absent(distributor_update_checkpoint_request{service_name="${local.distributor_service}"}) + OR + rate(distributor_update_checkpoint_request{service_name="${local.distributor_service}"}[${local.duration}]) == 0 +) +EOT + duration = "1800s" + evaluation_interval = "60s" + } + } + + alert_strategy { + auto_close = "1800s" + } +} + +resource "google_monitoring_alert_policy" "successful_updates" { + display_name = "Successful Updates (${var.env})" + combiner = "OR" + conditions { + display_name = "Success ratio is healthy (${var.env})" + condition_prometheus_query_language { + query = <<-EOT +sum( + rate(distributor_update_checkpoint_success{service_name="${local.distributor_service}"}[${local.duration}]) + / + rate(distributor_update_checkpoint_request{service_name="${local.distributor_service}"}[${local.duration}]) +) < 0.5 +EOT + duration = "1800s" + evaluation_interval = "60s" + } + } + + alert_strategy { + auto_close = "1800s" + } +} diff --git a/deployment/modules/monitoring/outputs.tf b/deployment/modules/monitoring/outputs.tf new file mode 100644 index 0000000..35828ec --- /dev/null +++ b/deployment/modules/monitoring/outputs.tf @@ -0,0 +1,16 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + diff --git a/deployment/modules/monitoring/variables.tf b/deployment/modules/monitoring/variables.tf new file mode 100644 index 0000000..0b9a24f --- /dev/null +++ b/deployment/modules/monitoring/variables.tf @@ -0,0 +1,31 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +variable "project_id" { + description = "The project ID to host the cluster in" + type = string +} + +variable "region" { + description = "The region to host the cluster in" + type = string +} + +variable "env" { + description = "Unique identifier for the env, e.g. ci or prod" + type = string +} +