Skip to content

Pull out cockroach-metrics, add minimal stats to inventory #8426

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 30 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
d924fcd
starting parsing
smklein Jun 3, 2025
6432b23
[test-utils] Parse CockroachDB's chosen HTTP address
smklein Jun 3, 2025
21976c8
fmt
smklein Jun 3, 2025
f512e9f
Merge branch 'main' into crdb-parse-http-addr
smklein Jun 10, 2025
e8a9969
Merge branch 'main' into crdb-parse-http-addr
smklein Jun 17, 2025
60d4517
Merge branch 'main' into crdb-parse-http-addr
smklein Jun 17, 2025
40128bf
Merge branch 'main' into crdb-parse-http-addr
smklein Jun 18, 2025
6684dbe
Proxy cockroach HTTP requests through the admin server
smklein Jun 18, 2025
184eba8
Access prometheus metrics through admin server
smklein Jun 18, 2025
cfa2f13
clippy
smklein Jun 18, 2025
0fe774f
Pull out cockroach-metrics, add minimal stats to inventory
smklein Jun 23, 2025
c400d20
Merge branch 'main' into crdb-parse-http-addr
smklein Jun 23, 2025
de94374
Merge branch 'crdb-parse-http-addr' into crdb-admin-access
smklein Jun 23, 2025
5454b87
Merge branch 'crdb-admin-access' into crdb-prometheus
smklein Jun 23, 2025
81f648f
Merge branch 'crdb-prometheus' into range-in-inventory
smklein Jun 23, 2025
34ea517
EXPECTORATE
smklein Jun 24, 2025
cd46e44
Fix doctests
smklein Jun 24, 2025
4ecca5c
Merge branch 'main' into crdb-parse-http-addr
smklein Jun 26, 2025
74fd628
Merge branch 'crdb-parse-http-addr' into crdb-admin-access
smklein Jun 26, 2025
7d19f04
Merge branch 'crdb-admin-access' into crdb-prometheus
smklein Jun 26, 2025
6276d9c
Merge branch 'crdb-prometheus' into range-in-inventory
smklein Jun 26, 2025
82e542b
Add liveness_live_nodes
smklein Jun 27, 2025
3536254
Add it to inventory
smklein Jun 27, 2025
7bcf3f5
Move cockroach metrics to new crate
smklein Jun 27, 2025
a1bdb8e
Merge branch 'crdb-prometheus' into range-in-inventory
smklein Jun 27, 2025
38a5b3a
more lenient test
smklein Jun 27, 2025
dbadc5b
feedback
smklein Jun 27, 2025
0a2427d
Merge branch 'crdb-prometheus' into range-in-inventory
smklein Jun 30, 2025
1a38870
Merge branch 'main' into range-in-inventory
smklein Jun 30, 2025
396af02
btree beats hash
smklein Jun 30, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion dev-tools/ls-apis/tests/api_dependencies.out
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Clickhouse Single-Node Cluster Admin (client: clickhouse-admin-single-client)
consumed by: omicron-nexus (omicron/nexus) via 2 paths

CockroachDB Cluster Admin (client: cockroach-admin-client)
consumed by: omicron-nexus (omicron/nexus) via 2 paths
consumed by: omicron-nexus (omicron/nexus) via 3 paths
consumed by: omicron-sled-agent (omicron/sled-agent) via 1 path

Crucible Agent (client: crucible-agent-client)
Expand Down
1 change: 1 addition & 0 deletions nexus/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ nexus-networking.workspace = true
nexus-saga-recovery.workspace = true
nexus-test-interface.workspace = true
num-integer.workspace = true
omicron-cockroach-metrics.workspace = true
openssl.workspace = true
oximeter-client.workspace = true
oximeter-db = { workspace = true, default-features = false, features = [ "oxql" ] }
Expand Down
64 changes: 61 additions & 3 deletions nexus/db-model/src/inventory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ use nexus_db_schema::schema::inv_zone_manifest_non_boot;
use nexus_db_schema::schema::inv_zone_manifest_zone;
use nexus_db_schema::schema::{
hw_baseboard_id, inv_caboose, inv_clickhouse_keeper_membership,
inv_collection, inv_collection_error, inv_dataset,
inv_cockroachdb_status, inv_collection, inv_collection_error, inv_dataset,
inv_last_reconciliation_dataset_result,
inv_last_reconciliation_disk_result,
inv_last_reconciliation_orphaned_dataset,
Expand All @@ -56,8 +56,8 @@ use nexus_sled_agent_shared::inventory::{
OmicronZoneDataset, OmicronZoneImageSource, OmicronZoneType,
};
use nexus_types::inventory::{
BaseboardId, Caboose, Collection, NvmeFirmware, PowerState, RotPage,
RotSlot,
BaseboardId, Caboose, CockroachStatus, Collection, NvmeFirmware,
PowerState, RotPage, RotSlot,
};
use omicron_common::api::external;
use omicron_common::api::internal::shared::NetworkInterface;
Expand Down Expand Up @@ -2526,6 +2526,64 @@ impl InvClickhouseKeeperMembership {
}
}

#[derive(Queryable, Clone, Debug, Selectable, Insertable)]
#[diesel(table_name = inv_cockroachdb_status)]
pub struct InvCockroachStatus {
pub inv_collection_id: DbTypedUuid<CollectionKind>,
pub ranges_underreplicated: Option<i64>,
pub liveness_live_nodes: Option<i64>,
}

impl InvCockroachStatus {
pub fn new(
inv_collection_id: CollectionUuid,
status: &CockroachStatus,
) -> Result<Self, anyhow::Error> {
Ok(Self {
inv_collection_id: inv_collection_id.into(),
ranges_underreplicated: status
.ranges_underreplicated
.map(|n| i64::try_from(n))
.transpose()
.with_context(
|| "Converting ranges_underreplicated from u64 to i64",
)?,
liveness_live_nodes: status
.liveness_live_nodes
.map(|n| i64::try_from(n))
.transpose()
.with_context(
|| "Converting liveness_live_nodes from u64 to i64",
)?,
})
}
}

impl TryFrom<InvCockroachStatus> for CockroachStatus {
type Error = anyhow::Error;

fn try_from(value: InvCockroachStatus) -> anyhow::Result<Self> {
Ok(Self {
ranges_underreplicated: value
.ranges_underreplicated
.map(|n| {
u64::try_from(n).with_context(|| {
format!("Failed to convert ranges_underreplicated ({n}) to u64")
})
})
.transpose()?,
liveness_live_nodes: value
.liveness_live_nodes
.map(|n| {
u64::try_from(n).with_context(|| {
format!("Failed to convert liveness_live_nodes ({n}) to u64")
})
})
.transpose()?,
})
}
}

#[cfg(test)]
mod test {
use nexus_types::inventory::NvmeFirmware;
Expand Down
3 changes: 2 additions & 1 deletion nexus/db-model/src/schema_versions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock};
///
/// This must be updated when you change the database schema. Refer to
/// schema/crdb/README.adoc in the root of this repository for details.
pub const SCHEMA_VERSION: Version = Version::new(153, 0, 0);
pub const SCHEMA_VERSION: Version = Version::new(154, 0, 0);

/// List of all past database schema versions, in *reverse* order
///
Expand All @@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock<Vec<KnownVersion>> = LazyLock::new(|| {
// | leaving the first copy as an example for the next person.
// v
// KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
KnownVersion::new(154, "inv_cockroachdb_status"),
KnownVersion::new(153, "chicken-switches"),
KnownVersion::new(152, "ereports"),
KnownVersion::new(151, "zone-image-resolver-inventory"),
Expand Down
63 changes: 62 additions & 1 deletion nexus/db-queries/src/db/datastore/inventory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ use nexus_db_errors::public_error_from_diesel;
use nexus_db_errors::public_error_from_diesel_lookup;
use nexus_db_model::InvCaboose;
use nexus_db_model::InvClickhouseKeeperMembership;
use nexus_db_model::InvCockroachStatus;
use nexus_db_model::InvCollection;
use nexus_db_model::InvCollectionError;
use nexus_db_model::InvConfigReconcilerStatus;
Expand Down Expand Up @@ -368,6 +369,12 @@ impl DataStore {
);
}

let inv_cockroach_status = InvCockroachStatus::new(
collection_id,
&collection.cockroach_status,
)
.map_err(|e| Error::internal_error(&e.to_string()))?;

// This implementation inserts all records associated with the
// collection in one transaction. This is primarily for simplicity. It
// means we don't have to worry about other readers seeing a
Expand Down Expand Up @@ -1361,6 +1368,16 @@ impl DataStore {
.await?;
}

// Insert the cockroach status information we've observed
{
use nexus_db_schema::schema::inv_cockroachdb_status::dsl;
diesel::insert_into(dsl::inv_cockroachdb_status)
.values(inv_cockroach_status)
.execute_async(&conn)
.await?;
}


// Finally, insert the list of errors.
{
use nexus_db_schema::schema::inv_collection_error::dsl as errors_dsl;
Expand Down Expand Up @@ -1647,6 +1664,7 @@ impl DataStore {
nzpools: usize,
nerrors: usize,
nclickhouse_keeper_membership: usize,
ncockroach_status: usize,
}

let NumRowsDeleted {
Expand Down Expand Up @@ -1674,6 +1692,7 @@ impl DataStore {
nzpools,
nerrors,
nclickhouse_keeper_membership,
ncockroach_status,
} =
self.transaction_retry_wrapper("inventory_delete_collection")
.transaction(&conn, |conn| async move {
Expand Down Expand Up @@ -1901,6 +1920,17 @@ impl DataStore {
.execute_async(&conn)
.await?
};
// Remove rows for cockroach status
let ncockroach_status = {
use nexus_db_schema::schema::inv_cockroachdb_status::dsl;
diesel::delete(
dsl::inv_cockroachdb_status.filter(
dsl::inv_collection_id.eq(db_collection_id),
),
)
.execute_async(&conn)
.await?
};

Ok(NumRowsDeleted {
ncollections,
Expand All @@ -1927,6 +1957,7 @@ impl DataStore {
nzpools,
nerrors,
nclickhouse_keeper_membership,
ncockroach_status,
})
})
.await
Expand Down Expand Up @@ -1963,7 +1994,8 @@ impl DataStore {
"nomicron_sled_config_zone_nics" => nomicron_sled_config_zone_nics,
"nzpools" => nzpools,
"nerrors" => nerrors,
"nclickhouse_keeper_membership" => nclickhouse_keeper_membership
"nclickhouse_keeper_membership" => nclickhouse_keeper_membership,
"ncockroach_status" => ncockroach_status,
);

Ok(())
Expand Down Expand Up @@ -3151,6 +3183,34 @@ impl DataStore {
memberships
};

// Load the cockroach status, if it exists.
let cockroach_status: nexus_types::inventory::CockroachStatus = {
use nexus_db_schema::schema::inv_cockroachdb_status::dsl;

let maybe_status = dsl::inv_cockroachdb_status
.filter(dsl::inv_collection_id.eq(db_id))
.select(InvCockroachStatus::as_select())
.first_async(&*conn)
.await
.optional()
.map_err(|e| {
public_error_from_diesel(e, ErrorHandler::Server)
})?;

if let Some(status) = maybe_status {
// If we can read a record of Cockroach Status, use it.
status
.try_into()
.map_err(|e| Error::internal_error(&format!("{e:#}")))?
} else {
// If we have no record of Cockroach Status, make a default one.
//
// This provides backwards compatibility for collections without
// CockroachDB statuses.
nexus_types::inventory::CockroachStatus::default()
}
};

// Finally, build up the sled-agent map using the sled agent and
// omicron zone rows. A for loop is easier to understand than into_iter
// + filter_map + return Result + collect.
Expand Down Expand Up @@ -3330,6 +3390,7 @@ impl DataStore {
rot_pages_found,
sled_agents,
clickhouse_keeper_cluster_membership,
cockroach_status,
})
}
}
Expand Down
8 changes: 8 additions & 0 deletions nexus/db-schema/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1854,6 +1854,14 @@ table! {
}
}

table! {
inv_cockroachdb_status (inv_collection_id) {
inv_collection_id -> Uuid,
ranges_underreplicated -> Nullable<Int8>,
liveness_live_nodes -> Nullable<Int8>,
}
}

/* blueprints */

table! {
Expand Down
2 changes: 2 additions & 0 deletions nexus/inventory/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,13 @@ thiserror.workspace = true
tufaceous-artifact.workspace = true
typed-rng.workspace = true
uuid.workspace = true
omicron-cockroach-metrics.workspace = true
omicron-workspace-hack.workspace = true

[dev-dependencies]
expectorate.workspace = true
gateway-test-utils.workspace = true
httpmock.workspace = true
omicron-sled-agent.workspace = true
regex.workspace = true
tokio.workspace = true
Loading
Loading