Skip to content

[sled-agent-config-reconciler] Add DumpSetupTask #8146

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 113 additions & 0 deletions sled-agent/config-reconciler/src/dump_setup_task.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! Long-running tokio task responsible for updating the dump device setup in
//! response to changes in available disks.

use crate::InternalDisksReceiver;
use crate::dump_setup::DumpSetup;
use sled_storage::config::MountConfig;
use sled_storage::disk::Disk;
use slog::Logger;
use slog::error;
use std::collections::HashSet;
use std::ops::Deref;
use std::sync::Arc;
use tokio::sync::watch;

pub(crate) fn spawn(
internal_disks_rx: InternalDisksReceiver,
external_disks_rx: watch::Receiver<HashSet<Disk>>,
mount_config: Arc<MountConfig>,
base_log: &Logger,
) {
tokio::spawn(
DumpSetupTask::new(
internal_disks_rx,
external_disks_rx,
mount_config,
base_log,
)
.run(),
);
}

struct DumpSetupTask {
// Input channels on which we receive updates about disk changes.
internal_disks_rx: InternalDisksReceiver,
external_disks_rx: watch::Receiver<HashSet<Disk>>,

// Invokes dumpadm(8) and savecore(8) when new disks are encountered
dump_setup: DumpSetup,

// Set of internal + external disks we most recently passed to `dump_setup`.
last_disks_used: HashSet<Disk>,

log: Logger,
}

impl DumpSetupTask {
fn new(
internal_disks_rx: InternalDisksReceiver,
external_disks_rx: watch::Receiver<HashSet<Disk>>,
mount_config: Arc<MountConfig>,
base_log: &Logger,
) -> Self {
Self {
internal_disks_rx,
external_disks_rx,
dump_setup: DumpSetup::new(base_log, mount_config),
last_disks_used: HashSet::new(),
log: base_log.new(slog::o!("component" => "DumpSetupTask")),
}
}

async fn run(mut self) {
loop {
self.update_setup_if_needed().await;

// Wait for changes on either input channel. Exit if either channel
// is closed, which should never happen in production.
tokio::select! {
// Cancel-safe per docs on `changed()`
res = self.internal_disks_rx.changed() => {
if res.is_err() {
error!(
self.log,
"internal disks channel closed: exiting task"
);
return;
}
}

// Cancel-safe per docs on `changed()`
res = self.external_disks_rx.changed() => {
if res.is_err() {
error!(
self.log,
"external disks channel closed: exiting task"
);
return;
}
}
}
}
}

async fn update_setup_if_needed(&mut self) {
// Combine internal and external disks.
let disks_avail = self
.internal_disks_rx
.borrow_and_update_raw_disks()
.iter()
.map(|d| d.deref().clone())
.chain(self.external_disks_rx.borrow_and_update().iter().cloned())
.collect::<HashSet<_>>();

if disks_avail != self.last_disks_used {
self.dump_setup.update_dumpdev_setup(disks_avail.iter()).await;
self.last_disks_used = disks_avail;
}
}
}
21 changes: 18 additions & 3 deletions sled-agent/config-reconciler/src/handle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

use camino::Utf8PathBuf;
use illumos_utils::dladm::EtherstubVnic;
use illumos_utils::zpool::PathInPool;
use key_manager::StorageKeyRequester;
use nexus_sled_agent_shared::inventory::InventoryDataset;
Expand All @@ -14,10 +13,12 @@ use omicron_common::disk::DatasetName;
use omicron_common::disk::DiskIdentity;
use sled_agent_api::ArtifactConfig;
use sled_storage::config::MountConfig;
use sled_storage::disk::Disk;
use sled_storage::manager::NestedDatasetConfig;
use sled_storage::manager::NestedDatasetListOptions;
use sled_storage::manager::NestedDatasetLocation;
use slog::Logger;
use std::collections::HashSet;
use std::sync::Arc;
use std::sync::OnceLock;
use tokio::sync::watch;
Expand Down Expand Up @@ -45,6 +46,7 @@ use crate::SledAgentFacilities;
use crate::TimeSyncStatus;
use crate::dataset_serialization_task::DatasetTaskHandle;
use crate::dataset_serialization_task::NestedDatasetMountError;
use crate::dump_setup_task;
use crate::internal_disks::InternalDisksReceiver;
use crate::ledger::LedgerTaskHandle;
use crate::raw_disks;
Expand All @@ -68,6 +70,7 @@ pub struct ConfigReconcilerSpawnToken {
time_sync_config: TimeSyncConfig,
reconciler_result_tx: watch::Sender<ReconcilerResult>,
currently_managed_zpools_tx: watch::Sender<Arc<CurrentlyManagedZpools>>,
external_disks_tx: watch::Sender<HashSet<Disk>>,
ledger_task_log: Logger,
reconciler_task_log: Logger,
}
Expand Down Expand Up @@ -111,6 +114,16 @@ impl ConfigReconcilerHandle {
base_log,
);

// Spawn the task that manages dump devices.
let (external_disks_tx, external_disks_rx) =
watch::channel(HashSet::new());
dump_setup_task::spawn(
internal_disks_rx.clone(),
external_disks_rx,
Arc::clone(&mount_config),
base_log,
);

let (reconciler_result_tx, reconciler_result_rx) =
watch::channel(ReconcilerResult::default());
let (currently_managed_zpools_tx, currently_managed_zpools_rx) =
Expand Down Expand Up @@ -142,6 +155,7 @@ impl ConfigReconcilerHandle {
time_sync_config,
reconciler_result_tx,
currently_managed_zpools_tx,
external_disks_tx,
ledger_task_log: base_log
.new(slog::o!("component" => "SledConfigLedgerTask")),
reconciler_task_log: base_log
Expand All @@ -164,7 +178,6 @@ impl ConfigReconcilerHandle {
U: SledAgentArtifactStore,
>(
&self,
underlay_vnic: EtherstubVnic,
sled_agent_facilities: T,
sled_agent_artifact_store: U,
token: ConfigReconcilerSpawnToken,
Expand All @@ -174,6 +187,7 @@ impl ConfigReconcilerHandle {
time_sync_config,
reconciler_result_tx,
currently_managed_zpools_tx,
external_disks_tx,
ledger_task_log,
reconciler_task_log,
} = token;
Expand All @@ -198,12 +212,13 @@ impl ConfigReconcilerHandle {
}

reconciler_task::spawn(
Arc::clone(self.internal_disks_rx.mount_config()),
key_requester,
time_sync_config,
underlay_vnic,
current_config_rx,
reconciler_result_tx,
currently_managed_zpools_tx,
external_disks_tx,
sled_agent_facilities,
reconciler_task_log,
);
Expand Down
4 changes: 4 additions & 0 deletions sled-agent/config-reconciler/src/internal_disks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,10 @@ impl InternalDisksReceiver {
)
}

pub(crate) fn mount_config(&self) -> &Arc<MountConfig> {
&self.mount_config
}

fn spawn_with_disk_adopter<T: DiskAdopter>(
mount_config: Arc<MountConfig>,
raw_disks_rx: watch::Receiver<IdMap<RawDiskWithId>>,
Expand Down
1 change: 1 addition & 0 deletions sled-agent/config-reconciler/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@

mod dataset_serialization_task;
mod disks_common;
mod dump_setup_task;
mod handle;
mod internal_disks;
mod ledger;
Expand Down
25 changes: 15 additions & 10 deletions sled-agent/config-reconciler/src/reconciler_task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@

use chrono::DateTime;
use chrono::Utc;
use illumos_utils::dladm::EtherstubVnic;
use illumos_utils::zpool::PathInPool;
use key_manager::StorageKeyRequester;
use nexus_sled_agent_shared::inventory::OmicronSledConfig;
use sled_storage::config::MountConfig;
use sled_storage::disk::Disk;
use slog::Logger;
use std::collections::HashSet;
use std::sync::Arc;
use std::time::Duration;
use std::time::Instant;
Expand All @@ -23,30 +25,38 @@ use crate::sled_agent_facilities::SledAgentFacilities;
mod external_disks;
mod zones;

use self::external_disks::ExternalDisks;

pub use self::external_disks::CurrentlyManagedZpools;
pub use self::external_disks::CurrentlyManagedZpoolsReceiver;
pub use self::zones::TimeSyncError;
pub use self::zones::TimeSyncStatus;

#[allow(clippy::too_many_arguments)]
pub(crate) fn spawn<T: SledAgentFacilities>(
mount_config: Arc<MountConfig>,
key_requester: StorageKeyRequester,
time_sync_config: TimeSyncConfig,
underlay_vnic: EtherstubVnic,
current_config_rx: watch::Receiver<CurrentSledConfig>,
reconciler_result_tx: watch::Sender<ReconcilerResult>,
currently_managed_zpools_tx: watch::Sender<Arc<CurrentlyManagedZpools>>,
external_disks_tx: watch::Sender<HashSet<Disk>>,
sled_agent_facilities: T,
log: Logger,
) {
let external_disks = ExternalDisks::new(
mount_config,
currently_managed_zpools_tx,
external_disks_tx,
);

tokio::spawn(
ReconcilerTask {
key_requester,
time_sync_config,
underlay_vnic,
current_config_rx,
reconciler_result_tx,
currently_managed_zpools_tx,
external_disks,
sled_agent_facilities,
log,
}
Expand Down Expand Up @@ -123,16 +133,11 @@ struct LatestReconcilerTaskResultInner {
struct ReconcilerTask<T> {
key_requester: StorageKeyRequester,
time_sync_config: TimeSyncConfig,
underlay_vnic: EtherstubVnic,
current_config_rx: watch::Receiver<CurrentSledConfig>,
reconciler_result_tx: watch::Sender<ReconcilerResult>,
currently_managed_zpools_tx: watch::Sender<Arc<CurrentlyManagedZpools>>,
external_disks: ExternalDisks,
sled_agent_facilities: T,
log: Logger,
// TODO where do we want to do dump setup? Needs both internal and external
// disks. Maybe this task, or maybe a task just for dump setup?
// Invokes dumpadm(8) and savecore(8) when new disks are encountered
// dump_setup: DumpSetup,
}

impl<T: SledAgentFacilities> ReconcilerTask<T> {
Expand Down
Loading
Loading