Skip to content

Commit d1d3313

Browse files
authored
[sled-agent] Integrate config-reconciler (#8064)
Make `sled-agent` use the new `sled-agent-config-reconciler`. Major visible changes: * We go from three ledgers (disks, datasets, zones) to one (`omicron-sled-config.json`). * The sled-agent `PUT /omicron-config` is now mostly asynchronous: it returns after ledgering the new config but before applying it. * Inventory reports from sled-agent now include `ledgered_sled_config` (the current contents of the ledger), `reconciler_status` (status of the config reconciliation task, including whether it's running or idle), and `last_reconciliation` (status of the most recent completed reconciliation attempt, including the status of all configured disks, datasets, and zones).
1 parent 5ad3fc1 commit d1d3313

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+2762
-5031
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

clients/sled-agent-client/src/lib.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ progenitor::generate_api!(
6767
OmicronPhysicalDiskConfig = omicron_common::disk::OmicronPhysicalDiskConfig,
6868
OmicronPhysicalDisksConfig = omicron_common::disk::OmicronPhysicalDisksConfig,
6969
OmicronSledConfig = nexus_sled_agent_shared::inventory::OmicronSledConfig,
70-
OmicronSledConfigResult = nexus_sled_agent_shared::inventory::OmicronSledConfigResult,
7170
OmicronZoneConfig = nexus_sled_agent_shared::inventory::OmicronZoneConfig,
7271
OmicronZoneDataset = nexus_sled_agent_shared::inventory::OmicronZoneDataset,
7372
OmicronZoneImageSource = nexus_sled_agent_shared::inventory::OmicronZoneImageSource,

dev-tools/omdb/src/bin/omdb/db.rs

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7351,30 +7351,28 @@ fn inv_collection_print_sleds(collection: &Collection) {
73517351
"LAST RECONCILED CONFIG",
73527352
&last_reconciliation.last_reconciled_config,
73537353
);
7354-
let disk_errs = collect_config_reconciler_errors(
7355-
&last_reconciliation.external_disks,
7356-
);
7357-
let dataset_errs = collect_config_reconciler_errors(
7358-
&last_reconciliation.datasets,
7359-
);
7360-
let zone_errs = collect_config_reconciler_errors(
7361-
&last_reconciliation.zones,
7362-
);
7363-
for (label, errs) in [
7364-
("disk", disk_errs),
7365-
("dataset", dataset_errs),
7366-
("zone", zone_errs),
7367-
] {
7368-
if errs.is_empty() {
7369-
println!(" all {label}s reconciled successfully");
7370-
} else {
7371-
println!(
7372-
" {} {label} reconciliation errors:",
7373-
errs.len()
7374-
);
7375-
for err in errs {
7376-
println!(" {err}");
7377-
}
7354+
}
7355+
let disk_errs = collect_config_reconciler_errors(
7356+
&last_reconciliation.external_disks,
7357+
);
7358+
let dataset_errs =
7359+
collect_config_reconciler_errors(&last_reconciliation.datasets);
7360+
let zone_errs =
7361+
collect_config_reconciler_errors(&last_reconciliation.zones);
7362+
for (label, errs) in [
7363+
("disk", disk_errs),
7364+
("dataset", dataset_errs),
7365+
("zone", zone_errs),
7366+
] {
7367+
if errs.is_empty() {
7368+
println!(" all {label}s reconciled successfully");
7369+
} else {
7370+
println!(
7371+
" {} {label} reconciliation errors:",
7372+
errs.len()
7373+
);
7374+
for err in errs {
7375+
println!(" {err}");
73787376
}
73797377
}
73807378
}

dev-tools/omdb/src/bin/omdb/sled_agent.rs

Lines changed: 0 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,6 @@ enum SledAgentCommands {
3434
#[clap(subcommand)]
3535
Zones(ZoneCommands),
3636

37-
/// print information about zpools
38-
#[clap(subcommand)]
39-
Zpools(ZpoolCommands),
40-
41-
/// print information about datasets
42-
#[clap(subcommand)]
43-
Datasets(DatasetCommands),
44-
4537
/// print information about the local bootstore node
4638
#[clap(subcommand)]
4739
Bootstore(BootstoreCommands),
@@ -97,12 +89,6 @@ impl SledAgentArgs {
9789
SledAgentCommands::Zones(ZoneCommands::List) => {
9890
cmd_zones_list(&client).await
9991
}
100-
SledAgentCommands::Zpools(ZpoolCommands::List) => {
101-
cmd_zpools_list(&client).await
102-
}
103-
SledAgentCommands::Datasets(DatasetCommands::List) => {
104-
cmd_datasets_list(&client).await
105-
}
10692
SledAgentCommands::Bootstore(BootstoreCommands::Status) => {
10793
cmd_bootstore_status(&client).await
10894
}
@@ -129,44 +115,6 @@ async fn cmd_zones_list(
129115
Ok(())
130116
}
131117

132-
/// Runs `omdb sled-agent zpools list`
133-
async fn cmd_zpools_list(
134-
client: &sled_agent_client::Client,
135-
) -> Result<(), anyhow::Error> {
136-
let response = client.zpools_get().await.context("listing zpools")?;
137-
let zpools = response.into_inner();
138-
139-
println!("zpools:");
140-
if zpools.is_empty() {
141-
println!(" <none>");
142-
}
143-
for zpool in &zpools {
144-
println!(" {:?}", zpool);
145-
}
146-
147-
Ok(())
148-
}
149-
150-
/// Runs `omdb sled-agent datasets list`
151-
async fn cmd_datasets_list(
152-
client: &sled_agent_client::Client,
153-
) -> Result<(), anyhow::Error> {
154-
let response = client.datasets_get().await.context("listing datasets")?;
155-
let response = response.into_inner();
156-
157-
println!("dataset configuration @ generation {}:", response.generation);
158-
let datasets = response.datasets;
159-
160-
if datasets.is_empty() {
161-
println!(" <none>");
162-
}
163-
for dataset in &datasets {
164-
println!(" {:?}", dataset);
165-
}
166-
167-
Ok(())
168-
}
169-
170118
/// Runs `omdb sled-agent bootstore status`
171119
async fn cmd_bootstore_status(
172120
client: &sled_agent_client::Client,

dev-tools/omdb/tests/test_all_output.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,6 @@ async fn test_omdb_usage_errors() {
107107
&["nexus", "sleds"],
108108
&["sled-agent"],
109109
&["sled-agent", "zones"],
110-
&["sled-agent", "zpools"],
111110
&["oximeter", "--help"],
112111
&["oxql", "--help"],
113112
// Mispelled argument

dev-tools/omdb/tests/usage_errors.out

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -909,8 +909,6 @@ Usage: omdb sled-agent [OPTIONS] <COMMAND>
909909

910910
Commands:
911911
zones print information about zones
912-
zpools print information about zpools
913-
datasets print information about datasets
914912
bootstore print information about the local bootstore node
915913
help Print this message or the help of the given subcommand(s)
916914

@@ -949,32 +947,6 @@ Connection Options:
949947
--sled-agent-url <SLED_AGENT_URL> URL of the Sled internal API [env: OMDB_SLED_AGENT_URL=]
950948
--dns-server <DNS_SERVER> [env: OMDB_DNS_SERVER=]
951949

952-
Safety Options:
953-
-w, --destructive Allow potentially-destructive subcommands
954-
=============================================
955-
EXECUTING COMMAND: omdb ["sled-agent", "zpools"]
956-
termination: Exited(2)
957-
---------------------------------------------
958-
stdout:
959-
---------------------------------------------
960-
stderr:
961-
print information about zpools
962-
963-
Usage: omdb sled-agent zpools [OPTIONS] <COMMAND>
964-
965-
Commands:
966-
list Print list of all zpools managed by the sled agent
967-
help Print this message or the help of the given subcommand(s)
968-
969-
Options:
970-
--log-level <LOG_LEVEL> log level filter [env: LOG_LEVEL=] [default: warn]
971-
--color <COLOR> Color output [default: auto] [possible values: auto, always, never]
972-
-h, --help Print help
973-
974-
Connection Options:
975-
--sled-agent-url <SLED_AGENT_URL> URL of the Sled internal API [env: OMDB_SLED_AGENT_URL=]
976-
--dns-server <DNS_SERVER> [env: OMDB_DNS_SERVER=]
977-
978950
Safety Options:
979951
-w, --destructive Allow potentially-destructive subcommands
980952
=============================================

nexus-sled-agent-shared/src/inventory.rs

Lines changed: 84 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,14 @@ use chrono::{DateTime, Utc};
1212
use daft::Diffable;
1313
use id_map::IdMap;
1414
use id_map::IdMappable;
15+
use omicron_common::disk::{DatasetKind, DatasetName};
1516
use omicron_common::ledger::Ledgerable;
1617
use omicron_common::{
1718
api::{
1819
external::{ByteCount, Generation},
1920
internal::shared::{NetworkInterface, SourceNatConfig},
2021
},
21-
disk::{
22-
DatasetConfig, DatasetManagementStatus, DiskManagementStatus,
23-
DiskVariant, OmicronPhysicalDiskConfig,
24-
},
22+
disk::{DatasetConfig, DiskVariant, OmicronPhysicalDiskConfig},
2523
update::ArtifactId,
2624
zpool_name::ZpoolName,
2725
};
@@ -132,6 +130,49 @@ pub struct ConfigReconcilerInventory {
132130
pub zones: BTreeMap<OmicronZoneUuid, ConfigReconcilerInventoryResult>,
133131
}
134132

133+
impl ConfigReconcilerInventory {
134+
/// Iterate over all running zones as reported by the last reconciliation
135+
/// result.
136+
///
137+
/// This includes zones that are both present in `last_reconciled_config`
138+
/// and whose status in `zones` indicates "successfully running".
139+
pub fn running_omicron_zones(
140+
&self,
141+
) -> impl Iterator<Item = &OmicronZoneConfig> {
142+
self.zones.iter().filter_map(|(zone_id, result)| match result {
143+
ConfigReconcilerInventoryResult::Ok => {
144+
self.last_reconciled_config.zones.get(zone_id)
145+
}
146+
ConfigReconcilerInventoryResult::Err { .. } => None,
147+
})
148+
}
149+
150+
/// Given a sled config, produce a reconciler result that sled-agent could
151+
/// have emitted if reconciliation succeeded.
152+
///
153+
/// This method should only be used by tests and dev tools; real code should
154+
/// look at the actual `last_reconciliation` value from the parent
155+
/// [`Inventory`].
156+
pub fn debug_assume_success(config: OmicronSledConfig) -> Self {
157+
let external_disks = config
158+
.disks
159+
.iter()
160+
.map(|d| (d.id, ConfigReconcilerInventoryResult::Ok))
161+
.collect();
162+
let datasets = config
163+
.datasets
164+
.iter()
165+
.map(|d| (d.id, ConfigReconcilerInventoryResult::Ok))
166+
.collect();
167+
let zones = config
168+
.zones
169+
.iter()
170+
.map(|z| (z.id, ConfigReconcilerInventoryResult::Ok))
171+
.collect();
172+
Self { last_reconciled_config: config, external_disks, datasets, zones }
173+
}
174+
}
175+
135176
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, JsonSchema, Serialize)]
136177
#[serde(tag = "result", rename_all = "snake_case")]
137178
pub enum ConfigReconcilerInventoryResult {
@@ -187,8 +228,6 @@ pub enum SledRole {
187228
}
188229

189230
/// Describes the set of Reconfigurator-managed configuration elements of a sled
190-
// TODO this struct should have a generation number; at the moment, each of
191-
// the fields has a separete one internally.
192231
#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)]
193232
pub struct OmicronSledConfig {
194233
pub generation: Generation,
@@ -223,14 +262,6 @@ impl Ledgerable for OmicronSledConfig {
223262
}
224263
}
225264

226-
/// Result of the currently-synchronous `omicron_config_put` endpoint.
227-
#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
228-
#[must_use = "this `DatasetManagementResult` may contain errors, which should be handled"]
229-
pub struct OmicronSledConfigResult {
230-
pub disks: Vec<DiskManagementStatus>,
231-
pub datasets: Vec<DatasetManagementStatus>,
232-
}
233-
234265
/// Describes the set of Omicron-managed zones running on a sled
235266
#[derive(
236267
Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash,
@@ -297,6 +328,10 @@ impl OmicronZoneConfig {
297328
Some(self.id),
298329
)
299330
}
331+
332+
pub fn dataset_name(&self) -> Option<DatasetName> {
333+
self.zone_type.dataset_name()
334+
}
300335
}
301336

302337
/// Describes a persistent ZFS dataset associated with an Omicron zone
@@ -583,6 +618,41 @@ impl OmicronZoneType {
583618
| OmicronZoneType::Oximeter { .. } => None,
584619
}
585620
}
621+
622+
/// If this kind of zone has an associated dataset, return the dataset's
623+
/// name. Otherwise, return `None`.
624+
pub fn dataset_name(&self) -> Option<DatasetName> {
625+
let (dataset, dataset_kind) = match self {
626+
OmicronZoneType::BoundaryNtp { .. }
627+
| OmicronZoneType::InternalNtp { .. }
628+
| OmicronZoneType::Nexus { .. }
629+
| OmicronZoneType::Oximeter { .. }
630+
| OmicronZoneType::CruciblePantry { .. } => None,
631+
OmicronZoneType::Clickhouse { dataset, .. } => {
632+
Some((dataset, DatasetKind::Clickhouse))
633+
}
634+
OmicronZoneType::ClickhouseKeeper { dataset, .. } => {
635+
Some((dataset, DatasetKind::ClickhouseKeeper))
636+
}
637+
OmicronZoneType::ClickhouseServer { dataset, .. } => {
638+
Some((dataset, DatasetKind::ClickhouseServer))
639+
}
640+
OmicronZoneType::CockroachDb { dataset, .. } => {
641+
Some((dataset, DatasetKind::Cockroach))
642+
}
643+
OmicronZoneType::Crucible { dataset, .. } => {
644+
Some((dataset, DatasetKind::Crucible))
645+
}
646+
OmicronZoneType::ExternalDns { dataset, .. } => {
647+
Some((dataset, DatasetKind::ExternalDns))
648+
}
649+
OmicronZoneType::InternalDns { dataset, .. } => {
650+
Some((dataset, DatasetKind::InternalDns))
651+
}
652+
}?;
653+
654+
Some(DatasetName::new(dataset.pool_name, dataset_kind))
655+
}
586656
}
587657

588658
/// Like [`OmicronZoneType`], but without any associated data.

nexus/inventory/src/examples.rs

Lines changed: 3 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ use gateway_client::types::SpType;
1616
use gateway_types::rot::RotSlot;
1717
use nexus_sled_agent_shared::inventory::Baseboard;
1818
use nexus_sled_agent_shared::inventory::ConfigReconcilerInventory;
19-
use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult;
2019
use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus;
2120
use nexus_sled_agent_shared::inventory::Inventory;
2221
use nexus_sled_agent_shared::inventory::InventoryDataset;
@@ -643,29 +642,9 @@ pub fn sled_agent(
643642
ledgered_sled_config: Option<OmicronSledConfig>,
644643
) -> Inventory {
645644
// Assume the `ledgered_sled_config` was reconciled successfully.
646-
let last_reconciliation = ledgered_sled_config.clone().map(|config| {
647-
let external_disks = config
648-
.disks
649-
.iter()
650-
.map(|d| (d.id, ConfigReconcilerInventoryResult::Ok))
651-
.collect();
652-
let datasets = config
653-
.datasets
654-
.iter()
655-
.map(|d| (d.id, ConfigReconcilerInventoryResult::Ok))
656-
.collect();
657-
let zones = config
658-
.zones
659-
.iter()
660-
.map(|z| (z.id, ConfigReconcilerInventoryResult::Ok))
661-
.collect();
662-
ConfigReconcilerInventory {
663-
last_reconciled_config: config,
664-
external_disks,
665-
datasets,
666-
zones,
667-
}
668-
});
645+
let last_reconciliation = ledgered_sled_config
646+
.clone()
647+
.map(ConfigReconcilerInventory::debug_assume_success);
669648

670649
let reconciler_status = if last_reconciliation.is_some() {
671650
ConfigReconcilerInventoryStatus::Idle {

0 commit comments

Comments
 (0)