From d3f64817f6d29a365f0ba9740cebcf05156db663 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Tue, 8 Aug 2023 17:52:35 +0000 Subject: [PATCH] Adds background task which automatically removes zone bundles - Adds a background task to the sled agent, running `zone_bundle::cleanup_task`. That periodically combs through all the zone bundles, and removes "low priority" ones to remain within a percentage of the storage dataset quota. - Adds tools for controlling the priority of a bundle. This currently sorts bundles by their timestamp and cause, where for example explicitly requested bundles have the highest priority. - Adds RPC-like mechanism for asking the cleanup task to report usage information, the number of bytes consumed in the storage directories, for zone bundles. - Adds RPC and sled-agent API for updating the cleanup context, including period of auto cleanups, priority order, and the percentage of the dataset quota allowed. - Adds RPC and sled agent API for making explicit request to trigger a cleanup. - Adds a bunch of tests around the priority ordering and cleanup RPC mechanics Wrap zone bundling into a single type - Adds the `ZoneBundler` for managing the creation and listing of zones, and running background cleanup task. This is created pretty early, by the `StorageManager`, and cloned to the other objects that need access to bundles. - Move public free functions from the `zone_bundle` crate into methods on the `ZoneBundler`, and call them from the instance / service managers and sled agent. - Make the "list-all-zones" endpoint in the sled agent accept a filter directly and propagate to the listing routines inside the bundler. This removes filtering on the client. - Additional tests around listing / filtering. Review feedback - Add more error variants, to better maintain context and the actual error chains. - Remove zone-specific commmands - Remove hand-rolled `PartialOrd` implementation for simplicity, but add comment about the variant order being load-bearing. - Add the handle for the cleanup task to the `ZoneBundler`, and abort it on drop. --- Cargo.lock | 1 - openapi/sled-agent.json | 362 ++++- schema/zone-bundle-metadata.json | 8 +- sled-agent/Cargo.toml | 1 - sled-agent/src/bin/zone-bundle.rs | 295 +++- sled-agent/src/bootstrap/pre_server.rs | 1 + sled-agent/src/http_entrypoints.rs | 151 +- sled-agent/src/instance.rs | 44 +- sled-agent/src/instance_manager.rs | 5 + sled-agent/src/params.rs | 112 +- sled-agent/src/services.rs | 79 +- sled-agent/src/sled_agent.rs | 161 +- sled-agent/src/storage_manager.rs | 43 +- sled-agent/src/zone_bundle.rs | 2024 +++++++++++++++++++++--- 14 files changed, 2753 insertions(+), 534 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f82acc29c6e..38ff89be0f3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5206,7 +5206,6 @@ dependencies = [ "static_assertions", "subprocess", "tar", - "tempfile", "thiserror", "tofino", "tokio", diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 5e34227a25b..748c1a4716a 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -10,34 +10,6 @@ "version": "0.0.1" }, "paths": { - "/all-zone-bundles": { - "get": { - "summary": "List all zone bundles that exist, even for now-deleted zones.", - "operationId": "zone_bundle_list_all", - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "title": "Array_of_ZoneBundleMetadata", - "type": "array", - "items": { - "$ref": "#/components/schemas/ZoneBundleMetadata" - } - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } - }, "/cockroachdb": { "post": { "summary": "Initializes a CockroachDB cluster", @@ -554,7 +526,151 @@ } } }, - "/zones/{zone_name}/bundles": { + "/zones/bundle-cleanup": { + "post": { + "summary": "Trigger a zone bundle cleanup.", + "operationId": "zone_bundle_cleanup", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Map_of_CleanupCount", + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/CleanupCount" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/zones/bundle-cleanup/context": { + "get": { + "summary": "Return context used by the zone-bundle cleanup task.", + "operationId": "zone_bundle_cleanup_context", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CleanupContext" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "put": { + "summary": "Update context used by the zone-bundle cleanup task.", + "operationId": "zone_bundle_cleanup_context_update", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CleanupContextUpdate" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/zones/bundle-cleanup/utilization": { + "get": { + "summary": "Return utilization information about all zone bundles.", + "operationId": "zone_bundle_utilization", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Map_of_BundleUtilization", + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/BundleUtilization" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/zones/bundles": { + "get": { + "summary": "List all zone bundles that exist, even for now-deleted zones.", + "operationId": "zone_bundle_list_all", + "parameters": [ + { + "in": "query", + "name": "filter", + "description": "An optional substring used to filter zone bundles.", + "schema": { + "nullable": true, + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_ZoneBundleMetadata", + "type": "array", + "items": { + "$ref": "#/components/schemas/ZoneBundleMetadata" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/zones/bundles/{zone_name}": { "get": { "summary": "List the zone bundles that are available for a running zone.", "operationId": "zone_bundle_list", @@ -626,7 +742,7 @@ } } }, - "/zones/{zone_name}/bundles/{bundle_id}": { + "/zones/bundles/{zone_name}/{bundle_id}": { "get": { "summary": "Fetch the binary content of a single zone bundle.", "operationId": "zone_bundle_get", @@ -747,12 +863,137 @@ } }, "schemas": { + "BundleUtilization": { + "description": "The portion of a debug dataset used for zone bundles.", + "type": "object", + "properties": { + "bytes_available": { + "description": "The total number of bytes available for zone bundles.\n\nThis is `dataset_quota` multiplied by the context's storage limit.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "bytes_used": { + "description": "Total bundle usage, in bytes.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "dataset_quota": { + "description": "The total dataset quota, in bytes.", + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + "required": [ + "bytes_available", + "bytes_used", + "dataset_quota" + ] + }, "ByteCount": { "description": "Byte count to express memory or storage capacity.", "type": "integer", "format": "uint64", "minimum": 0 }, + "CleanupContext": { + "description": "Context provided for the zone bundle cleanup task.", + "type": "object", + "properties": { + "period": { + "description": "The period on which automatic checks and cleanup is performed.", + "allOf": [ + { + "$ref": "#/components/schemas/CleanupPeriod" + } + ] + }, + "priority": { + "description": "The priority ordering for keeping old bundles.", + "allOf": [ + { + "$ref": "#/components/schemas/PriorityOrder" + } + ] + }, + "storage_limit": { + "description": "The limit on the dataset quota available for zone bundles.", + "allOf": [ + { + "$ref": "#/components/schemas/StorageLimit" + } + ] + } + }, + "required": [ + "period", + "priority", + "storage_limit" + ] + }, + "CleanupContextUpdate": { + "description": "Parameters used to update the zone bundle cleanup context.", + "type": "object", + "properties": { + "period": { + "nullable": true, + "description": "The new period on which automatic cleanups are run.", + "allOf": [ + { + "$ref": "#/components/schemas/Duration" + } + ] + }, + "priority": { + "nullable": true, + "description": "The priority ordering for preserving old zone bundles.", + "allOf": [ + { + "$ref": "#/components/schemas/PriorityOrder" + } + ] + }, + "storage_limit": { + "nullable": true, + "description": "The new limit on the underlying dataset quota allowed for bundles.", + "type": "integer", + "format": "uint8", + "minimum": 0 + } + } + }, + "CleanupCount": { + "description": "The count of bundles / bytes removed during a cleanup operation.", + "type": "object", + "properties": { + "bundles": { + "description": "The number of bundles removed.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "bytes": { + "description": "The number of bytes removed.", + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + "required": [ + "bundles", + "bytes" + ] + }, + "CleanupPeriod": { + "description": "A period on which bundles are automatically cleaned up.", + "allOf": [ + { + "$ref": "#/components/schemas/Duration" + } + ] + }, "CrucibleOpts": { "type": "object", "properties": { @@ -1274,6 +1515,25 @@ "M2" ] }, + "Duration": { + "type": "object", + "properties": { + "nanos": { + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "secs": { + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + "required": [ + "nanos", + "secs" + ] + }, "Error": { "description": "Error information from a response.", "type": "object", @@ -1952,6 +2212,34 @@ } ] }, + "PriorityDimension": { + "description": "A dimension along with bundles can be sorted, to determine priority.", + "oneOf": [ + { + "description": "Sorting by time, with older bundles with lower priority.", + "type": "string", + "enum": [ + "time" + ] + }, + { + "description": "Sorting by the cause for creating the bundle.", + "type": "string", + "enum": [ + "cause" + ] + } + ] + }, + "PriorityOrder": { + "description": "The priority order for bundles during cleanup.\n\nBundles are sorted along the dimensions in [`PriorityDimension`], with each dimension appearing exactly once. During cleanup, lesser-priority bundles are pruned first, to maintain the dataset quota. Note that bundles are sorted by each dimension in the order in which they appear, with each dimension having higher priority than the next.", + "type": "array", + "items": { + "$ref": "#/components/schemas/PriorityDimension" + }, + "minItems": 2, + "maxItems": 2 + }, "SemverVersion": { "type": "string", "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" @@ -2410,6 +2698,12 @@ "last_port" ] }, + "StorageLimit": { + "description": "The limit on space allowed for zone bundles, as a percentage of the overall dataset's quota.", + "type": "integer", + "format": "uint8", + "minimum": 0 + }, "TimeSync": { "type": "object", "properties": { @@ -2729,10 +3023,10 @@ "description": "The reason or cause for a zone bundle, i.e., why it was created.", "oneOf": [ { - "description": "Generated in response to an explicit request to the sled agent.", + "description": "Some other, unspecified reason.", "type": "string", "enum": [ - "explicit_request" + "other" ] }, { @@ -2750,10 +3044,10 @@ ] }, { - "description": "Some other, unspecified reason.", + "description": "Generated in response to an explicit request to the sled agent.", "type": "string", "enum": [ - "other" + "explicit_request" ] } ] diff --git a/schema/zone-bundle-metadata.json b/schema/zone-bundle-metadata.json index 561d5f42d35..082856d07cc 100644 --- a/schema/zone-bundle-metadata.json +++ b/schema/zone-bundle-metadata.json @@ -43,10 +43,10 @@ "description": "The reason or cause for a zone bundle, i.e., why it was created.", "oneOf": [ { - "description": "Generated in response to an explicit request to the sled agent.", + "description": "Some other, unspecified reason.", "type": "string", "enum": [ - "explicit_request" + "other" ] }, { @@ -64,10 +64,10 @@ ] }, { - "description": "Some other, unspecified reason.", + "description": "Generated in response to an explicit request to the sled agent.", "type": "string", "enum": [ - "other" + "explicit_request" ] } ] diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index f9e5ad11ed9..ca93c3140e9 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -67,7 +67,6 @@ slog-term.workspace = true smf.workspace = true sp-sim.workspace = true tar.workspace = true -tempfile.workspace = true thiserror.workspace = true tofino.workspace = true tokio = { workspace = true, features = [ "full" ] } diff --git a/sled-agent/src/bin/zone-bundle.rs b/sled-agent/src/bin/zone-bundle.rs index f1492a7c52f..162de6ba53f 100644 --- a/sled-agent/src/bin/zone-bundle.rs +++ b/sled-agent/src/bin/zone-bundle.rs @@ -5,12 +5,18 @@ //! Small CLI to view and inspect zone bundles from the sled agent. use anyhow::anyhow; +use anyhow::bail; use anyhow::Context; use camino::Utf8PathBuf; +use clap::Args; use clap::Parser; use clap::Subcommand; use futures::stream::StreamExt; use omicron_common::address::SLED_AGENT_PORT; +use sled_agent_client::types::CleanupContextUpdate; +use sled_agent_client::types::Duration; +use sled_agent_client::types::PriorityDimension; +use sled_agent_client::types::PriorityOrder; use sled_agent_client::Client; use slog::Drain; use slog::Level; @@ -75,6 +81,44 @@ impl ListFields { } } +#[derive(Clone, Copy, Debug, clap::ValueEnum)] +enum UtilizationFields { + Directory, + BytesUsed, + BytesAvailable, + DatasetQuota, + PctAvailable, + PctQuota, +} + +impl std::fmt::Display for UtilizationFields { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + use UtilizationFields::*; + match self { + Directory => write!(f, "directory"), + BytesUsed => write!(f, "bytes-used"), + BytesAvailable => write!(f, "bytes-available"), + DatasetQuota => write!(f, "dataset-quota"), + PctAvailable => write!(f, "pct-available"), + PctQuota => write!(f, "pct-quota"), + } + } +} + +impl UtilizationFields { + fn all() -> Vec { + use UtilizationFields::*; + vec![ + Directory, + BytesUsed, + BytesAvailable, + DatasetQuota, + PctAvailable, + PctQuota, + ] + } +} + #[derive(Clone, Debug, Subcommand)] enum Cmd { /// List the zones available for collecting bundles from. @@ -124,6 +168,47 @@ enum Cmd { /// The ID of the bundle to delete. bundle_id: Uuid, }, + /// Fetch the zone bundle cleanup context. + /// + /// This returns the data used to manage automatic cleanup of zone bundles, + /// including the period; the directories searched; and the strategy used to + /// preserve bundles. + #[clap(visible_alias = "context")] + CleanupContext, + /// Set parameters of the zone bundle cleanup context. + #[clap(visible_alias = "set-context")] + SetCleanupContext(SetCleanupContextArgs), + /// Return the utilization of the datasets allocated for zone bundles. + Utilization { + /// Generate parseable output. + #[arg(long, short, default_value_t = false)] + parseable: bool, + /// Fields to print. + #[arg(long, short = 'o', default_values_t = UtilizationFields::all(), value_delimiter = ',')] + fields: Vec, + }, + /// Trigger an explicit request to cleanup low-priority zone bundles. + Cleanup, +} + +// Number of expected sort dimensions. Must match +// `sled_agent::zone_bundle::PriorityOrder::EXPECTED_SIZE`. +const EXPECTED_DIMENSIONS: usize = 2; + +#[derive(Args, Clone, Debug)] +#[group(required = true, multiple = true)] +struct SetCleanupContextArgs { + /// The new period on which to run automatic cleanups, in seconds. + #[arg(long)] + period: Option, + /// The new order used to determine priority when cleaning up bundles. + #[arg(long, value_delimiter = ',')] + priority: Option>, + /// The limit on the underlying dataset quota allowed for zone bundles. + /// + /// This should be expressed as percentage of the dataset quota. + #[arg(long, value_parser = clap::value_parser!(u8).range(0..=100))] + storage_limit: Option, } #[tokio::main] @@ -149,19 +234,10 @@ async fn main() -> anyhow::Result<()> { } Cmd::List { filter, parseable, fields } => { let bundles = client - .zone_bundle_list_all() + .zone_bundle_list_all(filter.as_deref()) .await .context("failed to list zone bundles")? - .into_inner() - .into_iter() - .filter(|bundle| { - if let Some(filter) = &filter { - bundle.id.zone_name.contains(filter) - } else { - true - } - }) - .collect::>(); + .into_inner(); if bundles.is_empty() { return Ok(()); } @@ -293,6 +369,203 @@ async fn main() -> anyhow::Result<()> { .await .context("failed to delete zone bundle")?; } + Cmd::CleanupContext => { + let context = client + .zone_bundle_cleanup_context() + .await + .context("failed to fetch cleanup context")?; + println!("Period: {}s", context.period.0.secs); + println!("Priority: {:?}", context.priority.0); + println!("Storage limit: {}%", context.storage_limit.0); + } + Cmd::SetCleanupContext(args) => { + let priority = match args.priority { + None => None, + Some(pri) => { + let Ok(arr): Result<[PriorityDimension; EXPECTED_DIMENSIONS], _> = pri.try_into() else { + bail!("must provide {EXPECTED_DIMENSIONS} priority dimensions"); + }; + Some(PriorityOrder::from(arr)) + } + }; + let ctx = CleanupContextUpdate { + period: args.period.map(|secs| Duration { nanos: 0, secs }), + priority, + storage_limit: args.storage_limit, + }; + client + .zone_bundle_cleanup_context_update(&ctx) + .await + .context("failed to update zone bundle cleanup context")?; + } + Cmd::Utilization { parseable, fields } => { + let utilization_by_dir = client + .zone_bundle_utilization() + .await + .context("failed to get zone bundle utilization")?; + const BYTES_USED_SIZE: usize = 16; + const BYTES_AVAIL_SIZE: usize = 16; + const QUOTA_SIZE: usize = 16; + const PCT_OF_AVAIL_SIZE: usize = 10; + const PCT_OF_QUOTA_SIZE: usize = 10; + if !utilization_by_dir.is_empty() { + use UtilizationFields::*; + if parseable { + for (dir, utilization) in utilization_by_dir.iter() { + for (i, field) in fields.iter().enumerate() { + match field { + Directory => print!("{}", dir), + BytesUsed => { + print!("{}", utilization.bytes_used) + } + BytesAvailable => { + print!("{}", utilization.bytes_available) + } + DatasetQuota => { + print!("{}", utilization.dataset_quota) + } + PctAvailable => print!( + "{}", + as_pct( + utilization.bytes_used, + utilization.bytes_available + ) + ), + PctQuota => print!( + "{}", + as_pct( + utilization.bytes_used, + utilization.dataset_quota + ) + ), + } + if i < fields.len() - 1 { + print!(","); + } + } + println!(); + } + } else { + let dir_col_size = utilization_by_dir + .keys() + .map(|d| d.len()) + .max() + .unwrap(); + for field in fields.iter() { + match field { + Directory => { + print!("{:dir_col_size$}", "Directory") + } + BytesUsed => { + print!("{:BYTES_USED_SIZE$}", "Bytes used") + } + BytesAvailable => print!( + "{:BYTES_AVAIL_SIZE$}", + "Bytes available" + ), + DatasetQuota => { + print!("{:QUOTA_SIZE$}", "Dataset quota") + } + PctAvailable => { + print!("{:PCT_OF_AVAIL_SIZE$}", "% of limit") + } + PctQuota => { + print!("{:PCT_OF_QUOTA_SIZE$}", "% of quota") + } + } + print!(" "); + } + println!(); + for (dir, utilization) in utilization_by_dir.iter() { + for field in fields.iter() { + match field { + Directory => print!("{:dir_col_size$}", dir), + BytesUsed => print!( + "{:BYTES_USED_SIZE$}", + as_human_bytes(utilization.bytes_used) + ), + BytesAvailable => print!( + "{:BYTES_AVAIL_SIZE$}", + as_human_bytes(utilization.bytes_available) + ), + DatasetQuota => print!( + "{:QUOTA_SIZE$}", + as_human_bytes(utilization.dataset_quota) + ), + PctAvailable => print!( + "{:PCT_OF_AVAIL_SIZE$}", + as_pct_str( + utilization.bytes_used, + utilization.bytes_available + ) + ), + PctQuota => print!( + "{:PCT_OF_QUOTA_SIZE$}", + as_pct_str( + utilization.bytes_used, + utilization.dataset_quota + ) + ), + } + print!(" "); + } + println!(); + } + } + } + } + Cmd::Cleanup => { + let cleaned = client + .zone_bundle_cleanup() + .await + .context("failed to trigger zone bundle cleanup")?; + const COUNT_SIZE: usize = 5; + const BYTES_SIZE: usize = 16; + if !cleaned.is_empty() { + let dir_col_size = + cleaned.keys().map(|d| d.len()).max().unwrap(); + println!( + "{:dir_col_size$} {:COUNT_SIZE$} {:BYTES_SIZE$}", + "Directory", "Count", "Bytes", + ); + for (dir, counts) in cleaned.iter() { + println!( + "{:dir_col_size$} {: u64 { + (used * 100) / avail +} + +// Format used / avail as a percentage string. +fn as_pct_str(used: u64, avail: u64) -> String { + format!("{}%", as_pct(used, avail)) +} + +// Format the provided `size` in bytes as a human-friendly byte estimate. +fn as_human_bytes(size: u64) -> String { + const KIB: f64 = 1024.0; + const MIB: f64 = KIB * 1024.0; + const GIB: f64 = MIB * 1024.0; + const TIB: f64 = GIB * 1024.0; + let size = size as f64; + if size >= TIB { + format!("{:0.2} TiB", size / TIB) + } else if size >= GIB { + format!("{:0.2} GiB", size / GIB) + } else if size >= MIB { + format!("{:0.2} MiB", size / MIB) + } else if size >= KIB { + format!("{:0.2} KiB", size / KIB) + } else { + format!("{} B", size) + } +} diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs index f3b9d13443e..0899bdd82fe 100644 --- a/sled-agent/src/bootstrap/pre_server.rs +++ b/sled-agent/src/bootstrap/pre_server.rs @@ -243,6 +243,7 @@ impl BootstrapAgentStartup { config.sidecar_revision.clone(), config.switch_zone_maghemite_links.clone(), storage_manager.resources().clone(), + storage_manager.zone_bundler().clone(), ); Ok(Self { diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 2b6551f899f..e9e569d992a 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -4,16 +4,21 @@ //! HTTP entrypoint functions for the sled agent's exposed API +use super::sled_agent::SledAgent; use crate::params::{ - DiskEnsureBody, InstanceEnsureBody, InstancePutMigrationIdsBody, - InstancePutStateBody, InstancePutStateResponse, InstanceUnregisterResponse, - ServiceEnsureBody, SledRole, TimeSync, VpcFirewallRulesEnsureBody, - ZoneBundleId, ZoneBundleMetadata, Zpool, + CleanupContextUpdate, DiskEnsureBody, InstanceEnsureBody, + InstancePutMigrationIdsBody, InstancePutStateBody, + InstancePutStateResponse, InstanceUnregisterResponse, ServiceEnsureBody, + SledRole, TimeSync, VpcFirewallRulesEnsureBody, ZoneBundleId, + ZoneBundleMetadata, Zpool, }; +use crate::sled_agent::Error as SledAgentError; +use crate::zone_bundle; +use camino::Utf8PathBuf; use dropshot::{ endpoint, ApiDescription, FreeformBody, HttpError, HttpResponseCreated, HttpResponseDeleted, HttpResponseHeaders, HttpResponseOk, - HttpResponseUpdatedNoContent, Path, RequestContext, TypedBody, + HttpResponseUpdatedNoContent, Path, Query, RequestContext, TypedBody, }; use illumos_utils::opte::params::SetVirtualNetworkInterfaceHost; use omicron_common::api::external::Error; @@ -22,10 +27,9 @@ use omicron_common::api::internal::nexus::InstanceRuntimeState; use omicron_common::api::internal::nexus::UpdateArtifactId; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; use uuid::Uuid; -use super::sled_agent::SledAgent; - type SledApiDescription = ApiDescription; /// Returns a description of the sled agent API @@ -45,6 +49,10 @@ pub fn api() -> SledApiDescription { api.register(zone_bundle_create)?; api.register(zone_bundle_get)?; api.register(zone_bundle_delete)?; + api.register(zone_bundle_utilization)?; + api.register(zone_bundle_cleanup_context)?; + api.register(zone_bundle_cleanup_context_update)?; + api.register(zone_bundle_cleanup)?; api.register(sled_role_get)?; api.register(set_v2p)?; api.register(del_v2p)?; @@ -69,16 +77,24 @@ struct ZonePathParam { zone_name: String, } +#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +struct ZoneBundleFilter { + /// An optional substring used to filter zone bundles. + filter: Option, +} + /// List all zone bundles that exist, even for now-deleted zones. #[endpoint { method = GET, - path = "/all-zone-bundles", + path = "/zones/bundles", }] async fn zone_bundle_list_all( rqctx: RequestContext, + query: Query, ) -> Result>, HttpError> { let sa = rqctx.context(); - sa.list_all_zone_bundles() + let filter = query.into_inner().filter; + sa.list_all_zone_bundles(filter.as_deref()) .await .map(HttpResponseOk) .map_err(HttpError::from) @@ -87,7 +103,7 @@ async fn zone_bundle_list_all( /// List the zone bundles that are available for a running zone. #[endpoint { method = GET, - path = "/zones/{zone_name}/bundles", + path = "/zones/bundles/{zone_name}", }] async fn zone_bundle_list( rqctx: RequestContext, @@ -105,7 +121,7 @@ async fn zone_bundle_list( /// Ask the sled agent to create a zone bundle. #[endpoint { method = POST, - path = "/zones/{zone_name}/bundles", + path = "/zones/bundles/{zone_name}", }] async fn zone_bundle_create( rqctx: RequestContext, @@ -123,7 +139,7 @@ async fn zone_bundle_create( /// Fetch the binary content of a single zone bundle. #[endpoint { method = GET, - path = "/zones/{zone_name}/bundles/{bundle_id}", + path = "/zones/bundles/{zone_name}/{bundle_id}", }] async fn zone_bundle_get( rqctx: RequestContext, @@ -133,12 +149,15 @@ async fn zone_bundle_get( let zone_name = params.zone_name; let bundle_id = params.bundle_id; let sa = rqctx.context(); - let Some(path) = sa.get_zone_bundle_path(&zone_name, &bundle_id) + let Some(path) = sa.get_zone_bundle_paths(&zone_name, &bundle_id) .await - .map_err(HttpError::from)? else { - return Err(HttpError::for_not_found( - None, - format!("No zone bundle for zone '{}' with ID '{}'", zone_name, bundle_id))); + .map_err(HttpError::from)? + .into_iter() + .next() + else { + return Err(HttpError::for_not_found( + None, + format!("No zone bundle for zone '{}' with ID '{}'", zone_name, bundle_id))); }; let f = tokio::fs::File::open(&path).await.map_err(|e| { HttpError::for_internal_error(format!( @@ -159,7 +178,7 @@ async fn zone_bundle_get( /// Delete a zone bundle. #[endpoint { method = DELETE, - path = "/zones/{zone_name}/bundles/{bundle_id}", + path = "/zones/bundles/{zone_name}/{bundle_id}", }] async fn zone_bundle_delete( rqctx: RequestContext, @@ -169,20 +188,100 @@ async fn zone_bundle_delete( let zone_name = params.zone_name; let bundle_id = params.bundle_id; let sa = rqctx.context(); - let Some(path) = sa.get_zone_bundle_path(&zone_name, &bundle_id) + let paths = sa + .get_zone_bundle_paths(&zone_name, &bundle_id) .await - .map_err(HttpError::from)? else { + .map_err(HttpError::from)?; + if paths.is_empty() { return Err(HttpError::for_not_found( - None, - format!("No zone bundle for zone '{}' with ID '{}'", zone_name, bundle_id))); + None, + format!( + "No zone bundle for zone '{}' with ID '{}'", + zone_name, bundle_id + ), + )); }; - tokio::fs::remove_file(&path).await.map(|_| HttpResponseDeleted()).map_err( - |e| { + for path in paths.into_iter() { + tokio::fs::remove_file(&path).await.map_err(|e| { HttpError::for_internal_error(format!( "Failed to delete zone bundle: {e}" )) - }, - ) + })?; + } + Ok(HttpResponseDeleted()) +} + +/// Return utilization information about all zone bundles. +#[endpoint { + method = GET, + path = "/zones/bundle-cleanup/utilization", +}] +async fn zone_bundle_utilization( + rqctx: RequestContext, +) -> Result< + HttpResponseOk>, + HttpError, +> { + let sa = rqctx.context(); + sa.zone_bundle_utilization() + .await + .map(HttpResponseOk) + .map_err(HttpError::from) +} + +/// Return context used by the zone-bundle cleanup task. +#[endpoint { + method = GET, + path = "/zones/bundle-cleanup/context", +}] +async fn zone_bundle_cleanup_context( + rqctx: RequestContext, +) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.zone_bundle_cleanup_context().await)) +} + +/// Update context used by the zone-bundle cleanup task. +#[endpoint { + method = PUT, + path = "/zones/bundle-cleanup/context", +}] +async fn zone_bundle_cleanup_context_update( + rqctx: RequestContext, + body: TypedBody, +) -> Result { + let sa = rqctx.context(); + let params = body.into_inner(); + let new_period = params + .period + .map(zone_bundle::CleanupPeriod::new) + .transpose() + .map_err(|e| HttpError::from(SledAgentError::from(e)))?; + let new_priority = params.priority; + let new_limit = params + .storage_limit + .map(zone_bundle::StorageLimit::new) + .transpose() + .map_err(|e| HttpError::from(SledAgentError::from(e)))?; + sa.update_zone_bundle_cleanup_context(new_period, new_limit, new_priority) + .await + .map(|_| HttpResponseUpdatedNoContent()) + .map_err(HttpError::from) +} + +/// Trigger a zone bundle cleanup. +#[endpoint { + method = POST, + path = "/zones/bundle-cleanup", +}] +async fn zone_bundle_cleanup( + rqctx: RequestContext, +) -> Result< + HttpResponseOk>, + HttpError, +> { + let sa = rqctx.context(); + sa.zone_bundle_cleanup().await.map(HttpResponseOk).map_err(HttpError::from) } /// List the zones that are currently managed by the sled agent. diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 84bc33babf2..baf92af28aa 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -10,12 +10,16 @@ use crate::common::instance::{ }; use crate::instance_manager::InstanceTicket; use crate::nexus::NexusClientWithResolver; +use crate::params::ZoneBundleCause; +use crate::params::ZoneBundleMetadata; use crate::params::{ InstanceHardware, InstanceMigrationSourceParams, InstanceMigrationTargetParams, InstanceStateRequested, VpcFirewallRule, }; use crate::profile::*; use crate::storage_manager::StorageResources; +use crate::zone_bundle::BundleError; +use crate::zone_bundle::ZoneBundler; use anyhow::anyhow; use backoff::BackoffError; use futures::lock::{Mutex, MutexGuard}; @@ -33,11 +37,6 @@ use omicron_common::api::internal::shared::{ NetworkInterface, SourceNatConfig, }; use omicron_common::backoff; -//use propolis_client::generated::DiskRequest; -use crate::params::ZoneBundleCause; -use crate::params::ZoneBundleMetadata; -use crate::zone_bundle; -use crate::zone_bundle::BundleError; use propolis_client::Client as PropolisClient; use rand::prelude::SliceRandom; use rand::SeedableRng; @@ -246,6 +245,9 @@ struct InstanceInner { // Storage resources storage: StorageResources, + // Object used to collect zone bundles from this instance when terminated. + zone_bundler: ZoneBundler, + // Object representing membership in the "instance manager". instance_ticket: InstanceTicket, } @@ -549,16 +551,13 @@ impl InstanceInner { }; // Take a zone bundle whenever this instance stops. - let context = self - .storage - .zone_bundle_context(&zname, ZoneBundleCause::TerminatedInstance) - .await; - if let Err(e) = zone_bundle::create( - &self.log, - &running_state.running_zone, - &context, - ) - .await + if let Err(e) = self + .zone_bundler + .create( + &running_state.running_zone, + ZoneBundleCause::TerminatedInstance, + ) + .await { error!( self.log, @@ -624,6 +623,7 @@ impl Instance { port_manager: PortManager, nexus_client: NexusClientWithResolver, storage: StorageResources, + zone_bundler: ZoneBundler, ) -> Result { info!(log, "Instance::new w/initial HW: {:?}", initial); let instance = InstanceInner { @@ -655,6 +655,7 @@ impl Instance { running_state: None, nexus_client, storage, + zone_bundler, instance_ticket: ticket, }; @@ -674,18 +675,13 @@ impl Instance { Err(BundleError::Unavailable { name }) } InstanceInner { - ref log, running_state: Some(RunningState { ref running_zone, .. }), .. } => { - let context = inner - .storage - .zone_bundle_context( - &name, - ZoneBundleCause::ExplicitRequest, - ) - .await; - zone_bundle::create(log, running_zone, &context).await + inner + .zone_bundler + .create(running_zone, ZoneBundleCause::ExplicitRequest) + .await } } } diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index 2be8223bcef..0b6d820c918 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -14,6 +14,7 @@ use crate::params::{ }; use crate::storage_manager::StorageResources; use crate::zone_bundle::BundleError; +use crate::zone_bundle::ZoneBundler; use illumos_utils::dladm::Etherstub; use illumos_utils::link::VnicAllocator; use illumos_utils::opte::PortManager; @@ -62,6 +63,7 @@ struct InstanceManagerInternal { vnic_allocator: VnicAllocator, port_manager: PortManager, storage: StorageResources, + zone_bundler: ZoneBundler, } /// All instances currently running on the sled. @@ -77,6 +79,7 @@ impl InstanceManager { etherstub: Etherstub, port_manager: PortManager, storage: StorageResources, + zone_bundler: ZoneBundler, ) -> Result { Ok(InstanceManager { inner: Arc::new(InstanceManagerInternal { @@ -89,6 +92,7 @@ impl InstanceManager { vnic_allocator: VnicAllocator::new("Instance", etherstub), port_manager, storage, + zone_bundler, }), }) } @@ -212,6 +216,7 @@ impl InstanceManager { self.inner.port_manager.clone(), self.inner.nexus_client.clone(), self.inner.storage.clone(), + self.inner.zone_bundler.clone(), )?; let instance_clone = instance.clone(); let _old = instances diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index fa4eab265f9..fcd69394139 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -2,8 +2,12 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use chrono::DateTime; -use chrono::Utc; +use crate::zone_bundle::PriorityOrder; +pub use crate::zone_bundle::ZoneBundleCause; +pub use crate::zone_bundle::ZoneBundleId; +pub use crate::zone_bundle::ZoneBundleMetadata; +pub use illumos_utils::opte::params::VpcFirewallRule; +pub use illumos_utils::opte::params::VpcFirewallRulesEnsureBody; use omicron_common::api::internal::nexus::{ DiskRuntimeState, InstanceRuntimeState, }; @@ -13,15 +17,13 @@ use omicron_common::api::internal::shared::{ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use sled_hardware::Baseboard; +pub use sled_hardware::DendriteAsic; use std::fmt::{Debug, Display, Formatter, Result as FormatResult}; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::time::Duration; use thiserror::Error; use uuid::Uuid; -pub use illumos_utils::opte::params::VpcFirewallRule; -pub use illumos_utils::opte::params::VpcFirewallRulesEnsureBody; -pub use sled_hardware::DendriteAsic; - /// Used to request a Disk state change #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize, JsonSchema)] #[serde(rename_all = "lowercase", tag = "state", content = "instance")] @@ -845,93 +847,13 @@ pub enum SledRole { Scrimlet, } -/// An identifier for a zone bundle. -#[derive( - Clone, - Debug, - Deserialize, - Eq, - Hash, - JsonSchema, - Ord, - PartialEq, - PartialOrd, - Serialize, -)] -pub struct ZoneBundleId { - /// The name of the zone this bundle is derived from. - pub zone_name: String, - /// The ID for this bundle itself. - pub bundle_id: Uuid, -} - -/// The reason or cause for a zone bundle, i.e., why it was created. -#[derive( - Clone, - Copy, - Debug, - Default, - Deserialize, - Eq, - Hash, - JsonSchema, - Ord, - PartialEq, - PartialOrd, - Serialize, -)] -#[serde(rename_all = "snake_case")] -#[non_exhaustive] -pub enum ZoneBundleCause { - /// Generated in response to an explicit request to the sled agent. - ExplicitRequest, - /// A zone bundle taken when a sled agent finds a zone that it does not - /// expect to be running. - UnexpectedZone, - /// An instance zone was terminated. - TerminatedInstance, - /// Some other, unspecified reason. - #[default] - Other, -} - -/// Metadata about a zone bundle. -#[derive( - Clone, - Debug, - Deserialize, - Eq, - Hash, - JsonSchema, - Ord, - PartialEq, - PartialOrd, - Serialize, -)] -pub struct ZoneBundleMetadata { - /// Identifier for this zone bundle - pub id: ZoneBundleId, - /// The time at which this zone bundle was created. - pub time_created: DateTime, - /// A version number for this zone bundle. - pub version: u8, - /// The reason or cause a bundle was created. - pub cause: ZoneBundleCause, -} - -impl ZoneBundleMetadata { - const VERSION: u8 = 0; - - /// Create a new set of metadata for the provided zone. - pub(crate) fn new(zone_name: &str, cause: ZoneBundleCause) -> Self { - Self { - id: ZoneBundleId { - zone_name: zone_name.to_string(), - bundle_id: Uuid::new_v4(), - }, - time_created: Utc::now(), - version: Self::VERSION, - cause, - } - } +/// Parameters used to update the zone bundle cleanup context. +#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +pub struct CleanupContextUpdate { + /// The new period on which automatic cleanups are run. + pub period: Option, + /// The priority ordering for preserving old zone bundles. + pub priority: Option, + /// The new limit on the underlying dataset quota allowed for bundles. + pub storage_limit: Option, } diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index e8edea4ec45..cfa1cdb5ace 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -39,8 +39,8 @@ use crate::profile::*; use crate::smf_helper::Service; use crate::smf_helper::SmfHelper; use crate::storage_manager::StorageResources; -use crate::zone_bundle; use crate::zone_bundle::BundleError; +use crate::zone_bundle::ZoneBundler; use anyhow::anyhow; use camino::{Utf8Path, Utf8PathBuf}; use ddm_admin_client::{Client as DdmAdminClient, DdmError}; @@ -370,6 +370,7 @@ pub struct ServiceManagerInner { sled_info: OnceCell, switch_zone_bootstrap_address: Ipv6Addr, storage: StorageResources, + zone_bundler: ZoneBundler, ledger_directory_override: OnceCell, image_directory_override: OnceCell, } @@ -414,6 +415,7 @@ impl ServiceManager { sidecar_revision: SidecarRevision, switch_zone_maghemite_links: Vec, storage: StorageResources, + zone_bundler: ZoneBundler, ) -> Self { let log = log.new(o!("component" => "ServiceManager")); Self { @@ -445,6 +447,7 @@ impl ServiceManager { switch_zone_bootstrap_address: bootstrap_networking .switch_zone_bootstrap_ip, storage, + zone_bundler, ledger_directory_override: OnceCell::new(), image_directory_override: OnceCell::new(), }), @@ -2020,26 +2023,18 @@ impl ServiceManager { &*self.inner.switch_zone.lock().await { if zone.name() == name { - let context = self + return self .inner - .storage - .zone_bundle_context(name, ZoneBundleCause::ExplicitRequest) + .zone_bundler + .create(zone, ZoneBundleCause::ExplicitRequest) .await; - return crate::zone_bundle::create( - &self.inner.log, - zone, - &context, - ) - .await; } } if let Some(zone) = self.inner.zones.lock().await.get(name) { - let context = self + return self .inner - .storage - .zone_bundle_context(name, ZoneBundleCause::ExplicitRequest) - .await; - return crate::zone_bundle::create(&self.inner.log, zone, &context) + .zone_bundler + .create(zone, ZoneBundleCause::ExplicitRequest) .await; } Err(BundleError::NoSuchZone { name: name.to_string() }) @@ -2123,15 +2118,11 @@ impl ServiceManager { "removing an existing zone"; "zone_name" => &expected_zone_name, ); - let context = self + if let Err(e) = self .inner - .storage - .zone_bundle_context( - &expected_zone_name, - ZoneBundleCause::UnexpectedZone, - ) - .await; - if let Err(e) = zone_bundle::create(log, &zone, &context).await + .zone_bundler + .create(&zone, ZoneBundleCause::UnexpectedZone) + .await { error!( log, @@ -3091,6 +3082,12 @@ mod test { let log = logctx.log.clone(); let test_config = TestConfig::new().await; + let resources = StorageResources::new_for_test(); + let zone_bundler = ZoneBundler::new( + log.clone(), + resources.clone(), + Default::default(), + ); let mgr = ServiceManager::new( &log, DdmAdminClient::localhost(&log).unwrap(), @@ -3099,7 +3096,8 @@ mod test { Some(true), SidecarRevision::Physical("rev-test".to_string()), vec![], - StorageResources::new_for_test(), + resources, + zone_bundler, ); test_config.override_paths(&mgr); @@ -3132,6 +3130,12 @@ mod test { let log = logctx.log.clone(); let test_config = TestConfig::new().await; + let resources = StorageResources::new_for_test(); + let zone_bundler = ZoneBundler::new( + log.clone(), + resources.clone(), + Default::default(), + ); let mgr = ServiceManager::new( &log, DdmAdminClient::localhost(&log).unwrap(), @@ -3140,7 +3144,8 @@ mod test { Some(true), SidecarRevision::Physical("rev-test".to_string()), vec![], - StorageResources::new_for_test(), + resources, + zone_bundler, ); test_config.override_paths(&mgr); @@ -3178,6 +3183,12 @@ mod test { // First, spin up a ServiceManager, create a new service, and tear it // down. + let resources = StorageResources::new_for_test(); + let zone_bundler = ZoneBundler::new( + log.clone(), + resources.clone(), + Default::default(), + ); let mgr = ServiceManager::new( &log, ddmd_client.clone(), @@ -3186,7 +3197,8 @@ mod test { Some(true), SidecarRevision::Physical("rev-test".to_string()), vec![], - StorageResources::new_for_test(), + resources.clone(), + zone_bundler.clone(), ); test_config.override_paths(&mgr); @@ -3218,7 +3230,8 @@ mod test { Some(true), SidecarRevision::Physical("rev-test".to_string()), vec![], - StorageResources::new_for_test(), + resources.clone(), + zone_bundler.clone(), ); test_config.override_paths(&mgr); @@ -3253,6 +3266,12 @@ mod test { // First, spin up a ServiceManager, create a new service, and tear it // down. + let resources = StorageResources::new_for_test(); + let zone_bundler = ZoneBundler::new( + log.clone(), + resources.clone(), + Default::default(), + ); let mgr = ServiceManager::new( &log, ddmd_client.clone(), @@ -3261,7 +3280,8 @@ mod test { Some(true), SidecarRevision::Physical("rev-test".to_string()), vec![], - StorageResources::new_for_test(), + resources.clone(), + zone_bundler.clone(), ); test_config.override_paths(&mgr); @@ -3298,7 +3318,8 @@ mod test { Some(true), SidecarRevision::Physical("rev-test".to_string()), vec![], - StorageResources::new_for_test(), + resources.clone(), + zone_bundler.clone(), ); test_config.override_paths(&mgr); diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index e5c10534b1e..f1fe46da9e2 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -45,7 +45,7 @@ use omicron_common::backoff::{ use sled_hardware::underlay; use sled_hardware::HardwareManager; use slog::Logger; -use std::collections::BTreeSet; +use std::collections::BTreeMap; use std::net::{Ipv6Addr, SocketAddrV6}; use std::sync::Arc; use uuid::Uuid; @@ -174,6 +174,10 @@ impl From for dropshot::HttpError { BundleError::NoSuchZone { .. } => { HttpError::for_not_found(None, inner.to_string()) } + BundleError::InvalidStorageLimit + | BundleError::InvalidCleanupPeriod => { + HttpError::for_bad_request(None, inner.to_string()) + } _ => HttpError::for_internal_error(err.to_string()), }, e => HttpError::for_internal_error(e.to_string()), @@ -188,9 +192,6 @@ struct SledAgentInner { // ID of the Sled id: Uuid, - // Logger used for generic sled agent operations, e.g., zone bundles. - log: Logger, - // Subnet of the Sled's underlay. // // The Sled Agent's address can be derived from this value. @@ -222,6 +223,9 @@ struct SledAgentInner { // The rack network config provided at RSS time. rack_network_config: Option, + + // Object managing zone bundles. + zone_bundler: zone_bundle::ZoneBundler, } impl SledAgentInner { @@ -339,6 +343,7 @@ impl SledAgent { etherstub.clone(), port_manager.clone(), storage.resources().clone(), + storage.zone_bundler().clone(), )?; match config.vmm_reservoir_percentage { @@ -414,10 +419,10 @@ impl SledAgent { rack_network_config.clone(), )?; + let zone_bundler = storage.zone_bundler().clone(); let sled_agent = SledAgent { inner: Arc::new(SledAgentInner { id: request.id, - log: log.clone(), subnet: request.subnet, storage, instances, @@ -435,6 +440,7 @@ impl SledAgent { // request queue? nexus_request_queue: NexusRequestQueue::new(), rack_network_config, + zone_bundler, }), log: log.clone(), }; @@ -585,78 +591,9 @@ impl SledAgent { /// List all zone bundles on the system, for any zones live or dead. pub async fn list_all_zone_bundles( &self, + filter: Option<&str>, ) -> Result, Error> { - let mut bundles = BTreeSet::new(); - let log = &self.inner.log; - for path in - self.inner.storage.resources().all_zone_bundle_directories().await - { - debug!(log, "searching zone bundle directory"; "directory" => ?path); - // It's possible that the debug directories themselves do not exist, - // since we create them when we create the first bundles. Return an - // empty set in this case. - let mut entries = match tokio::fs::read_dir(path).await { - Ok(ent) => ent, - Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue, - Err(e) => { - return Err(Error::from(BundleError::from( - anyhow::anyhow!("failed to read bundle directory: {e}"), - ))); - } - }; - - // First iterate over all the possible zone-names here. - loop { - let Some(zone_name) = entries - .next_entry() - .await - .map_err(|e| BundleError::from(anyhow::anyhow!("failed to read zone bundle dir entry: {e}")))? else { - break; - }; - - // Enumerate and iterate over all the contained entries, which - // _should_ all be zone bundles. - let mut bundle_entries = tokio::fs::read_dir(zone_name.path()) - .await - .map_err(|e| { - BundleError::from(anyhow::anyhow!( - "failed to read zone directory: {e}" - )) - })?; - loop { - let Some(bundle) = bundle_entries - .next_entry() - .await - .map_err(|e| BundleError::from(anyhow::anyhow!("failed to read zone bundle dir entry: {e}")))? else { - break; - }; - match zone_bundle::extract_zone_bundle_metadata( - bundle.path(), - ) - .await - { - Ok(metadata) => { - debug!( - log, - "found zone bundle"; - "zone_name" => &metadata.id.zone_name, - "id" => %&metadata.id.bundle_id, - "path" => ?bundle.path(), - ); - bundles.insert(metadata); - } - Err(e) => warn!( - log, - "found file in zone bundle directory which doesn't \ - appear to be a valid zone bundle"; - "path" => ?bundle.path(), - "err" => ?e, - ), - } - } - } - } - Ok(bundles.into_iter().collect()) + self.inner.zone_bundler.list(filter).await.map_err(Error::from) } /// List zone bundles for the provided zone. @@ -664,22 +601,7 @@ impl SledAgent { &self, name: &str, ) -> Result, Error> { - // The zone bundles are replicated in several places, so we'll use a set - // to collect them all, to avoid duplicating. - let mut bundles = BTreeSet::new(); - let log = &self.inner.log; - for path in - self.inner.storage.resources().all_zone_bundle_directories().await - { - debug!(log, "searching zone bundle directory"; "directory" => ?path); - bundles.extend( - zone_bundle::list_bundles_for_zone(log, &path, name) - .await? - .into_iter() - .map(|(_path, bdl)| bdl), - ); - } - Ok(bundles.into_iter().collect()) + self.inner.zone_bundler.list_for_zone(name).await.map_err(Error::from) } /// Create a zone bundle for the provided zone. @@ -704,20 +626,17 @@ impl SledAgent { } } - /// Fetch the path to a zone bundle. - pub async fn get_zone_bundle_path( + /// Fetch the paths to all zone bundles with the provided name and ID. + pub async fn get_zone_bundle_paths( &self, name: &str, id: &Uuid, - ) -> Result, Error> { - zone_bundle::get_zone_bundle_path( - &self.inner.log, - &self.inner.storage.resources().all_zone_bundle_directories().await, - name, - id, - ) - .await - .map_err(Error::from) + ) -> Result, Error> { + self.inner + .zone_bundler + .bundle_paths(name, id) + .await + .map_err(Error::from) } /// List the zones that the sled agent is currently managing. @@ -739,6 +658,42 @@ impl SledAgent { .map_err(|e| Error::from(BundleError::from(e))) } + /// Fetch the zone bundle cleanup context. + pub async fn zone_bundle_cleanup_context( + &self, + ) -> zone_bundle::CleanupContext { + self.inner.zone_bundler.cleanup_context().await + } + + /// Update the zone bundle cleanup context. + pub async fn update_zone_bundle_cleanup_context( + &self, + period: Option, + storage_limit: Option, + priority: Option, + ) -> Result<(), Error> { + self.inner + .zone_bundler + .update_cleanup_context(period, storage_limit, priority) + .await + .map_err(Error::from) + } + + /// Fetch the current utilization of the relevant datasets for zone bundles. + pub async fn zone_bundle_utilization( + &self, + ) -> Result, Error> + { + self.inner.zone_bundler.utilization().await.map_err(Error::from) + } + + /// Trigger an explicit request to cleanup old zone bundles. + pub async fn zone_bundle_cleanup( + &self, + ) -> Result, Error> { + self.inner.zone_bundler.cleanup().await.map_err(Error::from) + } + /// Ensures that particular services should be initialized. /// /// These services will be instantiated by this function, will be recorded diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 7153bd33dfd..a2d2b5f8620 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -5,10 +5,9 @@ //! Management of sled-local storage. use crate::nexus::NexusClientWithResolver; -use crate::params::ZoneBundleCause; use crate::storage::dataset::DatasetName; use crate::storage::dump_setup::DumpSetup; -use crate::zone_bundle::ZoneBundleContext; +use crate::zone_bundle::ZoneBundler; use camino::Utf8PathBuf; use derive_more::From; use futures::stream::FuturesOrdered; @@ -351,32 +350,6 @@ impl StorageResources { .map(|p| p.join(BUNDLE_DIRECTORY).join(ZONE_BUNDLE_DIRECTORY)) .collect() } - - /// Return context for storing zone bundles. - /// - /// See [`ZoneBundleContext`] for details. - pub async fn zone_bundle_context( - &self, - zone_name: &str, - cause: ZoneBundleCause, - ) -> ZoneBundleContext { - // As of #3713, rotated log files are moved out of their original home, - // and onto longer-term storage some U.2s. Which one houses them is - // effectively random. Add the U.2 debug datasets into the - // `extra_log_dirs` field for search during the zone bundle process. - let extra_log_dirs = self - .all_u2_mountpoints(sled_hardware::disk::U2_DEBUG_DATASET) - .await - .into_iter() - .map(|p| p.join(zone_name)) - .collect(); - ZoneBundleContext { - cause, - storage_dirs: self.all_zone_bundle_directories().await, - extra_log_dirs, - ..Default::default() - } - } } /// Describes the access to the underlay used by the StorageManager. @@ -1255,6 +1228,7 @@ struct StorageManagerInner { #[derive(Clone)] pub struct StorageManager { inner: Arc, + zone_bundler: ZoneBundler, } impl StorageManager { @@ -1267,6 +1241,10 @@ impl StorageManager { }; let (tx, rx) = mpsc::channel(30); + let zb_log = log.new(o!("component" => "ZoneBundler")); + let zone_bundler = + ZoneBundler::new(zb_log, resources.clone(), Default::default()); + StorageManager { inner: Arc::new(StorageManagerInner { log: log.clone(), @@ -1286,9 +1264,18 @@ impl StorageManager { worker.do_work(resources).await }), }), + zone_bundler, } } + /// Return a reference to the object used to manage zone bundles. + /// + /// This can be cloned by other code wishing to create and manage their own + /// zone bundles. + pub fn zone_bundler(&self) -> &ZoneBundler { + &self.zone_bundler + } + /// Ensures that the storage manager tracks exactly the provided disks. /// /// This acts similar to a batch [Self::upsert_disk] for all new disks, and diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs index f26ecd4c768..31b0211dd3c 100644 --- a/sled-agent/src/zone_bundle.rs +++ b/sled-agent/src/zone_bundle.rs @@ -6,42 +6,452 @@ //! Tools for collecting and inspecting service bundles for zones. -use crate::params::ZoneBundleCause; -use crate::params::ZoneBundleMetadata; +use crate::storage_manager::StorageResources; +use anyhow::anyhow; +use anyhow::Context; use camino::FromPathBufError; use camino::Utf8Path; use camino::Utf8PathBuf; +use chrono::DateTime; +use chrono::Utc; use flate2::bufread::GzDecoder; use illumos_utils::running_zone::is_oxide_smf_log_file; use illumos_utils::running_zone::RunningZone; +use illumos_utils::zfs::ZFS; use illumos_utils::zone::AdmError; +use schemars::JsonSchema; +use serde::Deserialize; +use serde::Serialize; use slog::Logger; +use std::cmp::Ord; +use std::cmp::Ordering; +use std::cmp::PartialOrd; +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::collections::HashSet; use std::io::Cursor; -use std::path::PathBuf; +use std::sync::Arc; +use std::time::Duration; use std::time::SystemTime; use tar::Archive; use tar::Builder; use tar::Header; +use tokio::process::Command; +use tokio::sync::Mutex; +use tokio::sync::Notify; +use tokio::time::sleep; +use tokio::time::Instant; use uuid::Uuid; -/// Context for creating a bundle of a specified zone. -#[derive(Debug, Default)] -pub struct ZoneBundleContext { - /// The directories into which the zone bundles are written. - pub storage_dirs: Vec, - /// The reason or cause for creating a zone bundle. +/// An identifier for a zone bundle. +#[derive( + Clone, + Debug, + Deserialize, + Eq, + Hash, + JsonSchema, + Ord, + PartialEq, + PartialOrd, + Serialize, +)] +pub struct ZoneBundleId { + /// The name of the zone this bundle is derived from. + pub zone_name: String, + /// The ID for this bundle itself. + pub bundle_id: Uuid, +} + +/// The reason or cause for a zone bundle, i.e., why it was created. +// +// NOTE: The ordering of the enum variants is important, and should not be +// changed without careful consideration. +// +// The ordering is used when deciding which bundles to remove automatically. In +// addition to time, the cause is used to sort bundles, so changing the variant +// order will change that priority. +#[derive( + Clone, + Copy, + Debug, + Default, + Deserialize, + Eq, + Hash, + JsonSchema, + Ord, + PartialEq, + PartialOrd, + Serialize, +)] +#[serde(rename_all = "snake_case")] +#[non_exhaustive] +pub enum ZoneBundleCause { + /// Some other, unspecified reason. + #[default] + Other, + /// A zone bundle taken when a sled agent finds a zone that it does not + /// expect to be running. + UnexpectedZone, + /// An instance zone was terminated. + TerminatedInstance, + /// Generated in response to an explicit request to the sled agent. + ExplicitRequest, +} + +/// Metadata about a zone bundle. +#[derive( + Clone, + Debug, + Deserialize, + Eq, + Hash, + JsonSchema, + Ord, + PartialEq, + PartialOrd, + Serialize, +)] +pub struct ZoneBundleMetadata { + /// Identifier for this zone bundle + pub id: ZoneBundleId, + /// The time at which this zone bundle was created. + pub time_created: DateTime, + /// A version number for this zone bundle. + pub version: u8, + /// The reason or cause a bundle was created. pub cause: ZoneBundleCause, - /// Extra directories searched for logfiles for the name zone. +} + +impl ZoneBundleMetadata { + const VERSION: u8 = 0; + + /// Create a new set of metadata for the provided zone. + pub(crate) fn new(zone_name: &str, cause: ZoneBundleCause) -> Self { + Self { + id: ZoneBundleId { + zone_name: zone_name.to_string(), + bundle_id: Uuid::new_v4(), + }, + time_created: Utc::now(), + version: Self::VERSION, + cause, + } + } +} + +/// A type managing zone bundle creation and automatic cleanup. +#[derive(Clone)] +pub struct ZoneBundler { + log: Logger, + inner: Arc>, + // Channel for notifying the cleanup task that it should reevaluate. + notify_cleanup: Arc, + // Tokio task handle running the period cleanup operation. + cleanup_task: Arc>, +} + +impl Drop for ZoneBundler { + fn drop(&mut self) { + self.cleanup_task.abort(); + } +} + +// State shared between tasks, e.g., used when creating a bundle in different +// tasks or between a creation and cleanup. +struct Inner { + resources: StorageResources, + cleanup_context: CleanupContext, + last_cleanup_at: Instant, +} + +impl Inner { + // Return the time at which the next cleanup should occur, and the duration + // until that time. + // + // The instant may be in the past, in which case duration would be 0. + fn next_cleanup(&self) -> (Instant, Duration) { + let next = + self.last_cleanup_at + self.cleanup_context.period.as_duration(); + let delta = next.saturating_duration_since(Instant::now()); + (next, delta) + } + + // Ensure that the zone bundle directories that _can_ exist in fact do. + // + // The zone bundles are stored in a ZFS dataset on each M.2. These datasets + // are created by the storage manager upon request. Until those parent + // datasets exist, the bundle directories themselves cannot be accessed + // either. + // + // This method takes the _expected_ zone bundle directories; creates any + // that can exist but do not, i.e., those whose parent datasets already + // exist; and returns those. + async fn bundle_directories(&self) -> Vec { + let expected = self.resources.all_zone_bundle_directories().await; + let mut out = Vec::with_capacity(expected.len()); + for each in expected.into_iter() { + if tokio::fs::create_dir_all(&each).await.is_ok() { + out.push(each); + } + } + out + } +} + +impl ZoneBundler { + // A task run in the background that periodically cleans up bundles. + // + // This waits for: + // + // - A timeout at the current cleanup period + // - A notification that the cleanup context has changed. + // + // When needed, it actually runs the period cleanup itself, using the + // current context. + async fn periodic_cleanup( + log: Logger, + inner: Arc>, + notify_cleanup: Arc, + ) { + let (mut next_cleanup, mut time_to_next_cleanup) = + inner.lock().await.next_cleanup(); + loop { + info!( + log, + "top of bundle cleanup loop"; + "next_cleanup" => ?&next_cleanup, + "time_to_next_cleanup" => ?time_to_next_cleanup, + ); + + // Wait for the cleanup period to expire, or a notification that the + // context has been changed. + tokio::select! { + _ = sleep(time_to_next_cleanup) => { + info!(log, "running automatic periodic zone bundle cleanup"); + let mut inner_ = inner.lock().await; + let dirs = inner_.bundle_directories().await; + let res = run_cleanup(&log, &dirs, &inner_.cleanup_context).await; + inner_.last_cleanup_at = Instant::now(); + (next_cleanup, time_to_next_cleanup) = inner_.next_cleanup(); + debug!(log, "cleanup completed"; "result" => ?res); + } + _ = notify_cleanup.notified() => { + debug!(log, "notified about cleanup context change"); + let inner_ = inner.lock().await; + (next_cleanup, time_to_next_cleanup) = inner_.next_cleanup(); + } + } + } + } + + /// Create a new zone bundler. /// - /// Logs are periodically archived out of their original location, and onto - /// one or more U.2 drives. This field is used to specify that archive - /// location, so that rotated logs for the zone's services may be found. - pub extra_log_dirs: Vec, - /// Any zone-specific commands that will be part of the zone bundle. + /// This creates an object that manages zone bundles on the system. It can + /// be used to create bundles from running zones, and runs a period task to + /// clean them up to free up space. + pub fn new( + log: Logger, + resources: StorageResources, + cleanup_context: CleanupContext, + ) -> Self { + let notify_cleanup = Arc::new(Notify::new()); + let inner = Arc::new(Mutex::new(Inner { + resources, + cleanup_context, + last_cleanup_at: Instant::now(), + })); + let cleanup_log = log.new(slog::o!("component" => "auto-cleanup-task")); + let notify_clone = notify_cleanup.clone(); + let inner_clone = inner.clone(); + let cleanup_task = Arc::new(tokio::task::spawn( + Self::periodic_cleanup(cleanup_log, inner_clone, notify_clone), + )); + Self { log, inner, notify_cleanup, cleanup_task } + } + + /// Trigger an immediate cleanup of low-priority zone bundles. + pub async fn cleanup( + &self, + ) -> Result, BundleError> { + let mut inner = self.inner.lock().await; + let dirs = inner.bundle_directories().await; + let res = run_cleanup(&self.log, &dirs, &inner.cleanup_context).await; + inner.last_cleanup_at = Instant::now(); + self.notify_cleanup.notify_one(); + res + } + + /// Return the utilization of the system for zone bundles. + pub async fn utilization( + &self, + ) -> Result, BundleError> { + let inner = self.inner.lock().await; + let dirs = inner.bundle_directories().await; + compute_bundle_utilization(&self.log, &dirs, &inner.cleanup_context) + .await + } + + /// Return the context used to periodically clean up zone bundles. + pub async fn cleanup_context(&self) -> CleanupContext { + self.inner.lock().await.cleanup_context + } + + /// Update the context used to periodically clean up zone bundles. + pub async fn update_cleanup_context( + &self, + new_period: Option, + new_storage_limit: Option, + new_priority: Option, + ) -> Result<(), BundleError> { + let mut inner = self.inner.lock().await; + info!( + self.log, + "received request to update cleanup context"; + "period" => ?new_period, + "priority" => ?new_priority, + "storage_limit" => ?new_storage_limit, + ); + let mut notify_cleanup_task = false; + if let Some(new_period) = new_period { + if new_period < inner.cleanup_context.period { + warn!( + self.log, + "auto cleanup period has been reduced, \ + the cleanup task will be notified" + ); + notify_cleanup_task = true; + } + inner.cleanup_context.period = new_period; + } + if let Some(new_priority) = new_priority { + inner.cleanup_context.priority = new_priority; + } + if let Some(new_storage_limit) = new_storage_limit { + if new_storage_limit < inner.cleanup_context.storage_limit { + notify_cleanup_task = true; + warn!( + self.log, + "storage limit has been lowered, a \ + cleanup will be run immediately" + ); + } + inner.cleanup_context.storage_limit = new_storage_limit; + } + if notify_cleanup_task { + self.notify_cleanup.notify_one(); + } + Ok(()) + } + + /// Create a bundle from the provided zone. + pub async fn create( + &self, + zone: &RunningZone, + cause: ZoneBundleCause, + ) -> Result { + let inner = self.inner.lock().await; + let storage_dirs = inner.bundle_directories().await; + let extra_log_dirs = inner + .resources + .all_u2_mountpoints(sled_hardware::disk::U2_DEBUG_DATASET) + .await + .into_iter() + .map(|p| p.join(zone.name())) + .collect(); + let context = ZoneBundleContext { cause, storage_dirs, extra_log_dirs }; + info!( + self.log, + "creating zone bundle"; + "zone_name" => zone.name(), + "context" => ?context, + ); + create(&self.log, zone, &context).await + } + + /// Return the paths for all bundles of the provided zone and ID. + pub async fn bundle_paths( + &self, + name: &str, + id: &Uuid, + ) -> Result, BundleError> { + let inner = self.inner.lock().await; + let dirs = inner.bundle_directories().await; + get_zone_bundle_paths(&self.log, &dirs, name, id).await + } + + /// List bundles for a zone with the provided name. + pub async fn list_for_zone( + &self, + name: &str, + ) -> Result, BundleError> { + // The zone bundles are replicated in several places, so we'll use a set + // to collect them all, to avoid duplicating. + let mut bundles = BTreeSet::new(); + let inner = self.inner.lock().await; + let dirs = inner.bundle_directories().await; + for dir in dirs.iter() { + bundles.extend( + list_bundles_for_zone(&self.log, &dir, name) + .await? + .into_iter() + .map(|(_path, bdl)| bdl), + ); + } + Ok(bundles.into_iter().collect()) + } + + /// List all zone bundles that match the provided filter, if any. /// - /// These should be specified as a list of strings, as passed into - /// `RunningZone::run_cmd()`. - pub zone_specific_commands: Vec>, + /// The filter is a simple substring match -- any zone bundle with a zone + /// name that contains the filter anywhere will match. If no filter is + /// provided, all extant bundles will be listed. + pub async fn list( + &self, + filter: Option<&str>, + ) -> Result, BundleError> { + // The zone bundles are replicated in several places, so we'll use a set + // to collect them all, to avoid duplicating. + let mut bundles = BTreeSet::new(); + let inner = self.inner.lock().await; + let dirs = inner.bundle_directories().await; + for dir in dirs.iter() { + let mut rd = tokio::fs::read_dir(dir).await.map_err(|err| { + BundleError::ReadDirectory { directory: dir.to_owned(), err } + })?; + while let Some(entry) = rd.next_entry().await.map_err(|err| { + BundleError::ReadDirectory { directory: dir.to_owned(), err } + })? { + let search_dir = Utf8PathBuf::try_from(entry.path())?; + bundles.extend( + filter_zone_bundles(&self.log, &search_dir, |md| { + filter + .map(|filt| md.id.zone_name.contains(filt)) + .unwrap_or(true) + }) + .await? + .into_values(), + ); + } + } + Ok(bundles.into_iter().collect()) + } +} + +// Context for creating a bundle of a specified zone. +#[derive(Debug, Default)] +struct ZoneBundleContext { + // The directories into which the zone bundles are written. + storage_dirs: Vec, + // The reason or cause for creating a zone bundle. + cause: ZoneBundleCause, + // Extra directories searched for logfiles for the name zone. + // + // Logs are periodically archived out of their original location, and onto + // one or more U.2 drives. This field is used to specify that archive + // location, so that rotated logs for the zone's services may be found. + extra_log_dirs: Vec, } // The set of zone-wide commands, which don't require any details about the @@ -61,8 +471,62 @@ const ZONE_BUNDLE_METADATA_FILENAME: &str = "metadata.toml"; /// Errors related to managing service zone bundles. #[derive(Debug, thiserror::Error)] pub enum BundleError { - #[error("I/O error")] - Io(#[from] std::io::Error), + #[error("I/O error running command '{cmd}'")] + Command { + cmd: String, + #[source] + err: std::io::Error, + }, + + #[error("I/O error creating directory '{directory}'")] + CreateDirectory { + directory: Utf8PathBuf, + #[source] + err: std::io::Error, + }, + + #[error("I/O error opening bundle tarball '{path}'")] + OpenBundleFile { + path: Utf8PathBuf, + #[source] + err: std::io::Error, + }, + + #[error("I/O error adding bundle tarball data to '{tarball_path}'")] + AddBundleData { + tarball_path: Utf8PathBuf, + #[source] + err: std::io::Error, + }, + + #[error("I/O error reading bundle tarball data from '{path}'")] + ReadBundleData { + path: Utf8PathBuf, + #[source] + err: std::io::Error, + }, + + #[error("I/O error copying bundle tarball from '{from}' to '{to}'")] + CopyArchive { + from: Utf8PathBuf, + to: Utf8PathBuf, + #[source] + err: std::io::Error, + }, + + #[error("I/O error reading directory '{directory}'")] + ReadDirectory { + directory: Utf8PathBuf, + #[source] + err: std::io::Error, + }, + + #[error("I/O error fetching metadata for '{path}'")] + Metadata { + path: Utf8PathBuf, + #[source] + err: std::io::Error, + }, #[error("TOML serialization failure")] Serialization(#[from] toml::ser::Error), @@ -90,15 +554,56 @@ pub enum BundleError { #[error("Zone '{name}' cannot currently be bundled")] Unavailable { name: String }, + + #[error("Storage limit must be expressed as a percentage in (0, 100]")] + InvalidStorageLimit, + + #[error( + "Cleanup period must be between {min:?} and {max:?}, inclusive", + min = CleanupPeriod::MIN, + max = CleanupPeriod::MAX, + )] + InvalidCleanupPeriod, + + #[error( + "Invalid priority ordering. Each element must appear exactly once." + )] + InvalidPriorityOrder, + + #[error("Cleanup failed")] + Cleanup(#[source] anyhow::Error), } -/// Create a service bundle for the provided zone. -/// -/// This runs a series of debugging commands in the zone, to collect data about -/// the state of the zone and any Oxide service processes running inside. The -/// data is packaged into a tarball, and placed in the provided output -/// directories. -pub async fn create( +// Helper function to write an array of bytes into the tar archive, with +// the provided name. +fn insert_data( + builder: &mut Builder, + name: &str, + contents: &[u8], +) -> Result<(), BundleError> { + let mtime = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .context("failed to compute mtime")? + .as_secs(); + + let mut hdr = Header::new_ustar(); + hdr.set_size(contents.len().try_into().unwrap()); + hdr.set_mode(0o444); + hdr.set_mtime(mtime); + hdr.set_entry_type(tar::EntryType::Regular); + // NOTE: This internally sets the path and checksum. + builder.append_data(&mut hdr, name, Cursor::new(contents)).map_err(|err| { + BundleError::AddBundleData { tarball_path: name.into(), err } + }) +} + +// Create a service bundle for the provided zone. +// +// This runs a series of debugging commands in the zone, to collect data about +// the state of the zone and any Oxide service processes running inside. The +// data is packaged into a tarball, and placed in the provided output +// directories. +async fn create( log: &Logger, zone: &RunningZone, context: &ZoneBundleContext, @@ -108,12 +613,16 @@ pub async fn create( warn!(log, "no directories available for zone bundles"); return Err(BundleError::NoStorage); } - info!(log, "creating zone bundle"; "zone" => zone.name()); let mut zone_bundle_dirs = Vec::with_capacity(context.storage_dirs.len()); for dir in context.storage_dirs.iter() { let bundle_dir = dir.join(zone.name()); debug!(log, "creating bundle directory"; "dir" => %bundle_dir); - tokio::fs::create_dir_all(&bundle_dir).await?; + tokio::fs::create_dir_all(&bundle_dir).await.map_err(|err| { + BundleError::CreateDirectory { + directory: bundle_dir.to_owned(), + err, + } + })?; zone_bundle_dirs.push(bundle_dir); } @@ -141,7 +650,10 @@ pub async fn create( "file" => %full_path, "error" => ?e, ); - return Err(BundleError::from(e)); + return Err(BundleError::OpenBundleFile { + path: full_path.to_owned(), + err: e, + }); } }; debug!( @@ -155,29 +667,6 @@ pub async fn create( .write(file, flate2::Compression::best()); let mut builder = Builder::new(gz); - // Helper function to write an array of bytes into the tar archive, with - // the provided name. - fn insert_data( - builder: &mut Builder, - name: &str, - contents: &[u8], - ) -> Result<(), BundleError> { - let mtime = SystemTime::now() - .duration_since(SystemTime::UNIX_EPOCH) - .map_err(|e| anyhow::anyhow!("failed to compute mtime: {e}"))? - .as_secs(); - - let mut hdr = Header::new_ustar(); - hdr.set_size(contents.len().try_into().unwrap()); - hdr.set_mode(0o444); - hdr.set_mtime(mtime); - hdr.set_entry_type(tar::EntryType::Regular); - // NOTE: This internally sets the path and checksum. - builder - .append_data(&mut hdr, name, Cursor::new(contents)) - .map_err(BundleError::from) - } - // Write the metadata file itself, in TOML format. let contents = toml::to_string(&zone_metadata)?; insert_data( @@ -213,38 +702,6 @@ pub async fn create( } } - // Run any caller-requested zone-specific commands. - for (i, cmd) in context.zone_specific_commands.iter().enumerate() { - if cmd.is_empty() { - continue; - } - debug!( - log, - "running user-requested zone bundle command"; - "zone" => zone.name(), - "command" => ?cmd, - ); - let output = match zone.run_cmd(cmd) { - Ok(s) => s, - Err(e) => format!("{}", e), - }; - let contents = format!("Command: {:?}\n{}", cmd, output).into_bytes(); - - // We'll insert the index into the filename as well, since it's - // plausible that users will run multiple executions of the same - // command. - let filename = format!("zone-specific-{}-{}", i, &cmd[0]); - if let Err(e) = insert_data(&mut builder, &filename, &contents) { - error!( - log, - "failed to save zone bundle command output"; - "zone" => zone.name(), - "command" => ?cmd, - "error" => ?e, - ); - } - } - // Debugging commands run on the specific processes this zone defines. const ZONE_PROCESS_COMMANDS: [&str; 3] = [ "pfiles", "pstack", @@ -253,7 +710,10 @@ pub async fn create( // the above commands and much more. It seems like overkill now, // however. ]; - let procs = match zone.service_processes() { + let procs = match zone + .service_processes() + .context("failed to enumerate zone service processes") + { Ok(p) => { debug!( log, @@ -270,10 +730,7 @@ pub async fn create( "zone" => zone.name(), "error" => ?e, ); - let err = anyhow::anyhow!( - "failed to enumerate zone service processes: {e}" - ); - return Err(BundleError::from(err)); + return Err(BundleError::from(e)); } }; for svc in procs.into_iter() { @@ -342,7 +799,10 @@ pub async fn create( "log_file" => %svc.log_file, "error" => ?e, ); - return Err(e.into()); + return Err(BundleError::AddBundleData { + tarball_path: svc.log_file.file_name().unwrap().into(), + err: e, + }); } for f in svc.rotated_log_files.iter().chain(archived_log_files.iter()) { debug!( @@ -361,22 +821,31 @@ pub async fn create( "log_file" => %f, "error" => ?e, ); - return Err(e.into()); + return Err(BundleError::AddBundleData { + tarball_path: f.file_name().unwrap().into(), + err: e, + }); } } } // Finish writing out the tarball itself. - builder - .into_inner() - .map_err(|e| anyhow::anyhow!("Failed to build bundle: {e}"))?; + builder.into_inner().context("Failed to build bundle")?; // Copy the bundle to the other locations. We really want the bundles to // be duplicates, not an additional, new bundle. - for other_dir in zone_bundle_dirs[1..].iter() { + // + // TODO-robustness: We should probably create the bundle in a temp dir, and + // copy it to all the final locations. This would make it easier to cleanup + // the final locations should that last copy fail for any of them. + // + // See: https://github.com/oxidecomputer/omicron/issues/3876. + for other_dir in zone_bundle_dirs.iter().skip(1) { let to = other_dir.join(&filename); debug!(log, "copying bundle"; "from" => %full_path, "to" => %to); - tokio::fs::copy(&full_path, to).await?; + tokio::fs::copy(&full_path, &to).await.map_err(|err| { + BundleError::CopyArchive { from: full_path.to_owned(), to, err } + })?; } info!(log, "finished zone bundle"; "metadata" => ?zone_metadata); @@ -471,16 +940,20 @@ async fn find_archived_log_files( // Extract the zone bundle metadata from a file, if it exists. fn extract_zone_bundle_metadata_impl( - path: &std::path::PathBuf, + path: &Utf8PathBuf, ) -> Result { // Build a reader for the whole archive. - let reader = std::fs::File::open(path).map_err(BundleError::from)?; + let reader = std::fs::File::open(path).map_err(|err| { + BundleError::OpenBundleFile { path: path.clone(), err } + })?; let buf_reader = std::io::BufReader::new(reader); let gz = GzDecoder::new(buf_reader); let mut archive = Archive::new(gz); // Find the metadata entry, if it exists. - let entries = archive.entries()?; + let entries = archive.entries().map_err(|err| { + BundleError::ReadBundleData { path: path.clone(), err } + })?; let Some(md_entry) = entries // The `Archive::entries` iterator // returns a result, so filter to those @@ -494,69 +967,36 @@ fn extract_zone_bundle_metadata_impl( }) else { return Err(BundleError::from( - anyhow::anyhow!("Zone bundle is missing metadata file") + anyhow!("Zone bundle is missing metadata file") )); }; // Extract its contents and parse as metadata. - let contents = std::io::read_to_string(md_entry)?; + let contents = std::io::read_to_string(md_entry).map_err(|err| { + BundleError::ReadBundleData { path: path.clone(), err } + })?; toml::from_str(&contents).map_err(BundleError::from) } -/// List the extant zone bundles for the provided zone, in the provided -/// directory. -pub async fn list_bundles_for_zone( +// List the extant zone bundles for the provided zone, in the provided +// directory. +async fn list_bundles_for_zone( log: &Logger, path: &Utf8Path, zone_name: &str, ) -> Result, BundleError> { - let mut bundles = Vec::new(); let zone_bundle_dir = path.join(zone_name); - if zone_bundle_dir.is_dir() { - let mut dir = tokio::fs::read_dir(zone_bundle_dir) - .await - .map_err(BundleError::from)?; - while let Some(zone_bundle) = - dir.next_entry().await.map_err(BundleError::from)? - { - let bundle_path = zone_bundle.path(); - debug!( - log, - "checking possible zone bundle"; - "bundle_path" => %bundle_path.display(), - ); - - // Zone bundles _should_ be named like: - // - // .../bundle/zone//.tar.gz. - // - // However, really a zone bundle is any tarball with the - // right metadata file, which contains a TOML-serialized - // `ZoneBundleMetadata` file. Try to create an archive out - // of each file we find in this directory, and parse out a - // metadata file. - let tarball = bundle_path.to_owned(); - let metadata = match extract_zone_bundle_metadata(tarball).await { - Ok(md) => md, - Err(e) => { - error!( - log, - "failed to read zone bundle metadata"; - "error" => ?e, - ); - return Err(e); - } - }; - debug!(log, "found zone bundle"; "metadata" => ?metadata); - bundles.push((Utf8PathBuf::try_from(bundle_path)?, metadata)); - } - } - Ok(bundles) + Ok(filter_zone_bundles(log, &zone_bundle_dir, |md| { + md.id.zone_name == zone_name + }) + .await? + .into_iter() + .collect::>()) } -/// Extract zone bundle metadata from the provided file, if possible. -pub async fn extract_zone_bundle_metadata( - path: PathBuf, +// Extract zone bundle metadata from the provided file, if possible. +async fn extract_zone_bundle_metadata( + path: Utf8PathBuf, ) -> Result { let task = tokio::task::spawn_blocking(move || { extract_zone_bundle_metadata_impl(&path) @@ -564,45 +1004,1273 @@ pub async fn extract_zone_bundle_metadata( task.await? } -/// Get the path to a zone bundle, if it exists. -pub async fn get_zone_bundle_path( +// Find zone bundles in the provided directory, which match the filter function. +async fn filter_zone_bundles( + log: &Logger, + directory: &Utf8PathBuf, + filter: impl Fn(&ZoneBundleMetadata) -> bool, +) -> Result, BundleError> { + let mut out = BTreeMap::new(); + debug!(log, "searching directory for zone bundles"; "directory" => %directory); + let mut rd = tokio::fs::read_dir(directory).await.map_err(|err| { + BundleError::ReadDirectory { directory: directory.to_owned(), err } + })?; + while let Some(entry) = rd.next_entry().await.map_err(|err| { + BundleError::ReadDirectory { directory: directory.to_owned(), err } + })? { + let path = Utf8PathBuf::try_from(entry.path())?; + debug!(log, "checking path as zone bundle"; "path" => %path); + match extract_zone_bundle_metadata(path.clone()).await { + Ok(md) => { + trace!(log, "extracted zone bundle metadata"; "metadata" => ?md); + if filter(&md) { + trace!(log, "filter matches bundle metadata"; "metadata" => ?md); + out.insert(path, md); + } + } + Err(e) => { + warn!( + log, + "failed to extract zone bundle metadata, skipping"; + "path" => %path, + "reason" => ?e, + ); + } + } + } + Ok(out) +} + +// Get the paths to a zone bundle, if it exists. +// +// Zone bundles are replicated in multiple storage directories. This returns +// every path at which the bundle with the provided ID exists, in the same +// order as `directories`. +async fn get_zone_bundle_paths( log: &Logger, directories: &[Utf8PathBuf], zone_name: &str, id: &Uuid, -) -> Result, BundleError> { +) -> Result, BundleError> { + let mut out = Vec::with_capacity(directories.len()); for path in directories { - debug!(log, "searching zone bundle directory"; "directory" => ?path); - let zone_bundle_dir = path.join(zone_name); - if zone_bundle_dir.is_dir() { - let mut dir = tokio::fs::read_dir(zone_bundle_dir) + out.extend( + filter_zone_bundles(log, path, |md| { + md.id.zone_name == zone_name && md.id.bundle_id == *id + }) + .await? + .into_keys(), + ); + } + Ok(out) +} + +/// The portion of a debug dataset used for zone bundles. +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] +pub struct BundleUtilization { + /// The total dataset quota, in bytes. + pub dataset_quota: u64, + /// The total number of bytes available for zone bundles. + /// + /// This is `dataset_quota` multiplied by the context's storage limit. + pub bytes_available: u64, + /// Total bundle usage, in bytes. + pub bytes_used: u64, +} + +#[derive(Clone, Debug, PartialEq)] +struct ZoneBundleInfo { + // The raw metadata for the bundle + metadata: ZoneBundleMetadata, + // The full path to the bundle + path: Utf8PathBuf, + // The number of bytes consumed on disk by the bundle + bytes: u64, +} + +// Enumerate all zone bundles under the provided directory. +async fn enumerate_zone_bundles( + log: &Logger, + dirs: &[Utf8PathBuf], +) -> Result>, BundleError> { + let mut out = BTreeMap::new(); + + // Each of these is a storage directory. + // + // We should have under here zone-names, followed by bundles within each of + // those. + for dir in dirs.iter() { + let mut rd = tokio::fs::read_dir(dir).await.map_err(|err| { + BundleError::ReadDirectory { directory: dir.to_owned(), err } + })?; + let mut info_by_dir = Vec::new(); + while let Some(zone_dir) = rd.next_entry().await.map_err(|err| { + BundleError::ReadDirectory { directory: dir.to_owned(), err } + })? { + let mut zone_rd = tokio::fs::read_dir(zone_dir.path()) .await - .map_err(BundleError::from)?; - while let Some(zone_bundle) = - dir.next_entry().await.map_err(BundleError::from)? - { - let path = zone_bundle.path(); - let metadata = match extract_zone_bundle_metadata(path).await { - Ok(md) => md, - Err(e) => { - error!( - log, - "failed to read zone bundle metadata"; - "error" => ?e, - ); - return Err(e); + .map_err(|err| BundleError::ReadDirectory { + directory: zone_dir.path().try_into().unwrap(), + err, + })?; + while let Some(maybe_bundle) = + zone_rd.next_entry().await.map_err(|err| { + BundleError::ReadDirectory { + directory: zone_dir.path().try_into().unwrap(), + err, } - }; - let bundle_id = &metadata.id; - if bundle_id.zone_name == zone_name - && bundle_id.bundle_id == *id + })? + { + // TODO-robustness: What do we do with files that do _not_ + // appear to be valid zone bundles. + // + // On the one hand, someone may have put something there + // intentionally. On the other hand, that would be weird, and we + // _also_ know that it's possible that IO errors happen while + // creating the bundle that render it impossible to recover the + // metadata. So it's plausible that we end up with a lot of + // detritus here in that case. + let path = Utf8PathBuf::try_from(maybe_bundle.path())?; + if let Ok(metadata) = + extract_zone_bundle_metadata(path.clone()).await { - return Utf8PathBuf::try_from(zone_bundle.path()) - .map(|p| Some(p)) - .map_err(BundleError::from); + let info = ZoneBundleInfo { + metadata, + path: path.clone(), + bytes: maybe_bundle + .metadata() + .await + .map_err(|err| BundleError::Metadata { path, err })? + .len(), + }; + info_by_dir.push(info); + } else { + warn!( + log, + "found non-zone-bundle file in zone bundle directory"; + "path" => %path, + ); + } + } + } + out.insert(dir.clone(), info_by_dir); + } + Ok(out) +} + +/// The count of bundles / bytes removed during a cleanup operation. +#[derive(Clone, Copy, Debug, Default, Deserialize, JsonSchema, Serialize)] +pub struct CleanupCount { + /// The number of bundles removed. + bundles: u64, + /// The number of bytes removed. + bytes: u64, +} + +// Run a cleanup, removing old bundles according to the strategy. +// +// Return the number of bundles removed and the new usage. +async fn run_cleanup( + log: &Logger, + storage_dirs: &[Utf8PathBuf], + context: &CleanupContext, +) -> Result, BundleError> { + // First, determine how much space we are allowed to use and have used. + // + // Let's avoid doing anything at all if we're still within the limits. + let usages = compute_bundle_utilization(log, storage_dirs, context).await?; + if usages.values().all(|usage| usage.bytes_used <= usage.bytes_available) { + debug!(log, "all usages below storage limit, returning"); + return Ok(BTreeMap::new()); + } + + // There's some work to do, let's enumerate all the bundles. + let bundles = enumerate_zone_bundles(log, &storage_dirs).await?; + debug!( + log, + "enumerated {} zone bundles across {} directories", + bundles.values().map(Vec::len).sum::(), + bundles.len(), + ); + + // Remove bundles from each storage directory, until we fall below the + // number of bytes we would like to use to satisfy the storage limit. + let mut cleanup_counts = BTreeMap::new(); + for (dir, mut info) in bundles.into_iter() { + debug!( + log, + "cleaning up bundles from directory"; + "directory" => dir.as_str() + ); + let mut count = CleanupCount::default(); + + // Sort all the bundles in the current directory, using the priority + // described in `context.priority`. + info.sort_by(|lhs, rhs| context.priority.compare_bundles(lhs, rhs)); + let current_usage = usages.get(&dir).unwrap(); + + // Remove bundles until we fall below the threshold. + let mut n_bytes = current_usage.bytes_used; + for each in info.into_iter() { + if n_bytes <= current_usage.bytes_available { + break; + } + tokio::fs::remove_file(&each.path).await.map_err(|_| { + BundleError::Cleanup(anyhow!("failed to remove bundle")) + })?; + trace!(log, "removed old zone bundle"; "info" => ?&each); + n_bytes = n_bytes.saturating_sub(each.bytes); + count.bundles += 1; + count.bytes += each.bytes; + } + + cleanup_counts.insert(dir, count); + } + info!(log, "finished bundle cleanup"; "cleanup_counts" => ?&cleanup_counts); + Ok(cleanup_counts) +} + +// Return the total utilization for all zone bundles. +async fn compute_bundle_utilization( + log: &Logger, + storage_dirs: &[Utf8PathBuf], + context: &CleanupContext, +) -> Result, BundleError> { + let mut out = BTreeMap::new(); + for dir in storage_dirs.iter() { + debug!(log, "computing bundle usage"; "directory" => %dir); + // Fetch the ZFS dataset quota. + let dataset_quota = zfs_quota(dir).await?; + debug!(log, "computed dataset quota"; "quota" => dataset_quota); + + // Compute the bytes available, using the provided storage limit. + let bytes_available = + context.storage_limit.bytes_available(dataset_quota); + debug!( + log, + "computed bytes available"; + "storage_limit" => %context.storage_limit, + "bytes_available" => bytes_available + ); + + // Compute the size of the actual storage directory. + // + // TODO-correctness: This takes into account the directories themselves, + // and may be not quite what we want. But it is very easy and pretty + // close. + let bytes_used = disk_usage(dir).await?; + debug!(log, "computed bytes used"; "bytes_used" => bytes_used); + out.insert( + dir.clone(), + BundleUtilization { dataset_quota, bytes_available, bytes_used }, + ); + } + Ok(out) +} + +/// Context provided for the zone bundle cleanup task. +#[derive( + Clone, Copy, Debug, Default, Deserialize, JsonSchema, PartialEq, Serialize, +)] +pub struct CleanupContext { + /// The period on which automatic checks and cleanup is performed. + pub period: CleanupPeriod, + /// The limit on the dataset quota available for zone bundles. + pub storage_limit: StorageLimit, + /// The priority ordering for keeping old bundles. + pub priority: PriorityOrder, +} + +// Return the number of bytes occupied by the provided directory. +// +// This returns an error if: +// +// - The "du" command fails +// - Parsing stdout fails +// - Parsing the actual size as a u64 fails +async fn disk_usage(path: &Utf8PathBuf) -> Result { + // Each OS implements slightly different `du` options. + // + // Linux and illumos support the "apparent" size in bytes, though using + // different options. macOS doesn't support bytes at all, and has a minimum + // block size of 512. + // + // We'll suffer the lower resolution on macOS, and get higher resolution on + // the others. + cfg_if::cfg_if! { + if #[cfg(target_os = "illumos")] { + const BLOCK_SIZE: u64 = 1; + const DU_ARG: &str = "-A"; + } else if #[cfg(target_os = "linux")] { + const BLOCK_SIZE: u64 = 1; + const DU_ARG: &str = "-b"; + } else if #[cfg(target_os = "macos")] { + const BLOCK_SIZE: u64 = 512; + const DU_ARG: &str = "-k"; + } else { + compile_error!("unsupported target OS"); + } + } + const DU: &str = "/usr/bin/du"; + let args = &[DU_ARG, "-s", path.as_str()]; + let output = Command::new(DU).args(args).output().await.map_err(|err| { + BundleError::Command { cmd: format!("{DU} {}", args.join(" ")), err } + })?; + let err = |msg: &str| { + BundleError::Cleanup(anyhow!( + "failed to fetch disk usage for {}: {}", + path, + msg, + )) + }; + if !output.status.success() { + return Err(err("du command failed")); + } + let Ok(s) = std::str::from_utf8(&output.stdout) else { + return Err(err("non-UTF8 stdout")); + }; + let Some(line) = s.lines().next() else { + return Err(err("no lines in du output")); + }; + let Some(part) = line + .trim() + .split_ascii_whitespace() + .next() else { + return Err(err("no disk usage size computed in output")); + }; + part.parse() + .map(|x: u64| x.saturating_mul(BLOCK_SIZE)) + .map_err(|_| err("failed to parse du output")) +} + +// Return the quota for a ZFS dataset, or the available size. +// +// This fails if: +// +// - The "zfs" command fails +// - Parsing stdout fails +// - Parsing the actual quota as a u64 fails +async fn zfs_quota(path: &Utf8PathBuf) -> Result { + let args = &["list", "-Hpo", "quota,avail", path.as_str()]; + let output = + Command::new(ZFS).args(args).output().await.map_err(|err| { + BundleError::Command { + cmd: format!("{ZFS} {}", args.join(" ")), + err, + } + })?; + let err = |msg: &str| { + BundleError::Cleanup(anyhow!( + "failed to fetch ZFS quota for {}: {}", + path, + msg, + )) + }; + if !output.status.success() { + return Err(err("zfs list command failed")); + } + let Ok(s) = std::str::from_utf8(&output.stdout) else { + return Err(err("non-UTF8 stdout")); + }; + let Some(line) = s.lines().next() else { + return Err(err("no lines in zfs list output")); + }; + let mut parts = line.split_ascii_whitespace(); + let quota = parts.next().ok_or_else(|| err("no quota part of line"))?; + let avail = parts.next().ok_or_else(|| err("no avail part of line"))?; + + // Parse the available space, which is always defined. + let avail = avail + .trim() + .parse() + .map_err(|_| err("failed to parse available space"))?; + + // Quotas can be reported a few different ways. + // + // If the dataset is a volume (which should not happen, but we don't enforce + // here), then this is a literal dash `-`. Something without a quota is + // reported as `0`. Anything else is an integer. + // + // No quota is reported as `u64::MAX`. + match quota.trim() { + "-" | "0" => Ok(avail), + x => x.parse().or(Ok(avail)), + } +} + +/// The limit on space allowed for zone bundles, as a percentage of the overall +/// dataset's quota. +#[derive( + Clone, + Copy, + Debug, + Deserialize, + JsonSchema, + PartialEq, + PartialOrd, + Serialize, +)] +pub struct StorageLimit(u8); + +impl std::fmt::Display for StorageLimit { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}%", self.as_u8()) + } +} + +impl Default for StorageLimit { + fn default() -> Self { + StorageLimit(25) + } +} + +impl StorageLimit { + /// Minimum percentage of dataset quota supported. + pub const MIN: Self = Self(0); + + /// Maximum percentage of dataset quota supported. + pub const MAX: Self = Self(50); + + /// Construct a new limit allowed for zone bundles. + /// + /// This should be expressed as a percentage, in the range (Self::MIN, + /// Self::MAX]. + pub const fn new(percentage: u8) -> Result { + if percentage > Self::MIN.0 && percentage <= Self::MAX.0 { + Ok(Self(percentage)) + } else { + Err(BundleError::InvalidStorageLimit) + } + } + + /// Return the contained quota percentage. + pub const fn as_u8(&self) -> u8 { + self.0 + } + + // Compute the number of bytes available from a dataset quota, in bytes. + const fn bytes_available(&self, dataset_quota: u64) -> u64 { + (dataset_quota * self.as_u8() as u64) / 100 + } +} + +/// A dimension along with bundles can be sorted, to determine priority. +#[derive( + Clone, + Copy, + Debug, + Deserialize, + Eq, + Hash, + JsonSchema, + Serialize, + Ord, + PartialEq, + PartialOrd, +)] +#[serde(rename_all = "snake_case")] +pub enum PriorityDimension { + /// Sorting by time, with older bundles with lower priority. + Time, + /// Sorting by the cause for creating the bundle. + Cause, + // TODO-completeness: Support zone or zone type (e.g., service vs instance)? +} + +/// The priority order for bundles during cleanup. +/// +/// Bundles are sorted along the dimensions in [`PriorityDimension`], with each +/// dimension appearing exactly once. During cleanup, lesser-priority bundles +/// are pruned first, to maintain the dataset quota. Note that bundles are +/// sorted by each dimension in the order in which they appear, with each +/// dimension having higher priority than the next. +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] +pub struct PriorityOrder([PriorityDimension; PriorityOrder::EXPECTED_SIZE]); + +impl std::ops::Deref for PriorityOrder { + type Target = [PriorityDimension; PriorityOrder::EXPECTED_SIZE]; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Default for PriorityOrder { + fn default() -> Self { + Self::DEFAULT + } +} + +impl PriorityOrder { + // NOTE: Must match the number of variants in `PriorityDimension`. + const EXPECTED_SIZE: usize = 2; + const DEFAULT: Self = + Self([PriorityDimension::Cause, PriorityDimension::Time]); + + /// Construct a new priority order. + /// + /// This requires that each dimension appear exactly once. + pub fn new(dims: &[PriorityDimension]) -> Result { + if dims.len() != Self::EXPECTED_SIZE { + return Err(BundleError::InvalidPriorityOrder); + } + let mut seen = HashSet::new(); + for dim in dims.iter() { + if !seen.insert(dim) { + return Err(BundleError::InvalidPriorityOrder); + } + } + Ok(Self(dims.try_into().unwrap())) + } + + // Order zone bundle info according to the contained priority. + // + // We sort the info by each dimension, in the order in which it appears. + // That means earlier dimensions have higher priority than later ones. + fn compare_bundles( + &self, + lhs: &ZoneBundleInfo, + rhs: &ZoneBundleInfo, + ) -> Ordering { + for dim in self.0.iter() { + let ord = match dim { + PriorityDimension::Cause => { + lhs.metadata.cause.cmp(&rhs.metadata.cause) + } + PriorityDimension::Time => { + lhs.metadata.time_created.cmp(&rhs.metadata.time_created) } + }; + if matches!(ord, Ordering::Equal) { + continue; + } + return ord; + } + Ordering::Equal + } +} + +/// A period on which bundles are automatically cleaned up. +#[derive( + Clone, Copy, Deserialize, JsonSchema, PartialEq, PartialOrd, Serialize, +)] +pub struct CleanupPeriod(Duration); + +impl Default for CleanupPeriod { + fn default() -> Self { + Self(Duration::from_secs(600)) + } +} + +impl CleanupPeriod { + /// The minimum supported cleanup period. + pub const MIN: Self = Self(Duration::from_secs(60)); + + /// The maximum supported cleanup period. + pub const MAX: Self = Self(Duration::from_secs(60 * 60 * 24)); + + /// Construct a new cleanup period, checking that it's valid. + pub fn new(duration: Duration) -> Result { + if duration >= Self::MIN.as_duration() + && duration <= Self::MAX.as_duration() + { + Ok(Self(duration)) + } else { + Err(BundleError::InvalidCleanupPeriod) + } + } + + /// Return the period as a duration. + pub const fn as_duration(&self) -> Duration { + self.0 + } +} + +impl TryFrom for CleanupPeriod { + type Error = BundleError; + + fn try_from(duration: Duration) -> Result { + Self::new(duration) + } +} + +impl std::fmt::Debug for CleanupPeriod { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + self.0.fmt(f) + } +} + +#[cfg(test)] +mod tests { + use super::disk_usage; + use super::PriorityDimension; + use super::PriorityOrder; + use super::StorageLimit; + use super::Utf8PathBuf; + use super::ZoneBundleCause; + use super::ZoneBundleId; + use super::ZoneBundleInfo; + use super::ZoneBundleMetadata; + use chrono::TimeZone; + use chrono::Utc; + + #[test] + fn test_sort_zone_bundle_cause() { + use ZoneBundleCause::*; + let mut original = + [ExplicitRequest, Other, TerminatedInstance, UnexpectedZone]; + let expected = + [Other, UnexpectedZone, TerminatedInstance, ExplicitRequest]; + original.sort(); + assert_eq!(original, expected); + } + + #[test] + fn test_priority_dimension() { + assert!(PriorityOrder::new(&[]).is_err()); + assert!(PriorityOrder::new(&[PriorityDimension::Cause]).is_err()); + assert!(PriorityOrder::new(&[ + PriorityDimension::Cause, + PriorityDimension::Cause + ]) + .is_err()); + assert!(PriorityOrder::new(&[ + PriorityDimension::Cause, + PriorityDimension::Cause, + PriorityDimension::Time + ]) + .is_err()); + + assert!(PriorityOrder::new(&[ + PriorityDimension::Cause, + PriorityDimension::Time + ]) + .is_ok()); + assert_eq!( + PriorityOrder::new(&PriorityOrder::default().0).unwrap(), + PriorityOrder::default() + ); + } + + #[tokio::test] + async fn test_disk_usage() { + let path = + Utf8PathBuf::from(concat!(env!("CARGO_MANIFEST_DIR"), "/src")); + let usage = disk_usage(&path).await.unwrap(); + // Run `du -As /path/to/omicron/sled-agent/src`, which currently shows this + // directory is ~450 KiB. + assert!( + usage >= 1024 * 400, + "sled-agent manifest directory disk usage not correct?" + ); + let path = Utf8PathBuf::from("/some/nonexistent/path"); + assert!(disk_usage(&path).await.is_err()); + } + + #[test] + fn test_storage_limit_bytes_available() { + let pct = StorageLimit(1); + assert_eq!(pct.bytes_available(100), 1); + assert_eq!(pct.bytes_available(1000), 10); + + let pct = StorageLimit(100); + assert_eq!(pct.bytes_available(100), 100); + assert_eq!(pct.bytes_available(1000), 1000); + + let pct = StorageLimit(100); + assert_eq!(pct.bytes_available(99), 99); + + let pct = StorageLimit(99); + assert_eq!(pct.bytes_available(1), 0); + + // Test non-power of 10. + let pct = StorageLimit(25); + assert_eq!(pct.bytes_available(32768), 8192); + } + + #[test] + fn test_compare_bundles() { + use PriorityDimension::*; + let time_first = PriorityOrder([Time, Cause]); + let cause_first = PriorityOrder([Cause, Time]); + + fn make_info( + year: i32, + month: u32, + day: u32, + cause: ZoneBundleCause, + ) -> ZoneBundleInfo { + ZoneBundleInfo { + metadata: ZoneBundleMetadata { + id: ZoneBundleId { + zone_name: String::from("oxz_whatever"), + bundle_id: uuid::Uuid::new_v4(), + }, + time_created: Utc + .with_ymd_and_hms(year, month, day, 0, 0, 0) + .single() + .unwrap(), + cause, + version: 0, + }, + path: Utf8PathBuf::from("/some/path"), + bytes: 0, } } + + let info = [ + make_info(2020, 1, 2, ZoneBundleCause::TerminatedInstance), + make_info(2020, 1, 2, ZoneBundleCause::ExplicitRequest), + make_info(2020, 1, 1, ZoneBundleCause::TerminatedInstance), + make_info(2020, 1, 1, ZoneBundleCause::ExplicitRequest), + ]; + + let mut sorted = info.clone(); + sorted.sort_by(|lhs, rhs| time_first.compare_bundles(lhs, rhs)); + // Low -> high priority + // [old/terminated, old/explicit, new/terminated, new/explicit] + let expected = [ + info[2].clone(), + info[3].clone(), + info[0].clone(), + info[1].clone(), + ]; + assert_eq!( + sorted, expected, + "sorting zone bundles by time-then-cause failed" + ); + + let mut sorted = info.clone(); + sorted.sort_by(|lhs, rhs| cause_first.compare_bundles(lhs, rhs)); + // Low -> high priority + // [old/terminated, new/terminated, old/explicit, new/explicit] + let expected = [ + info[2].clone(), + info[0].clone(), + info[3].clone(), + info[1].clone(), + ]; + assert_eq!( + sorted, expected, + "sorting zone bundles by cause-then-time failed" + ); + } +} + +#[cfg(all(target_os = "illumos", test))] +mod illumos_tests { + use super::zfs_quota; + use super::CleanupContext; + use super::CleanupPeriod; + use super::PriorityOrder; + use super::StorageLimit; + use super::StorageResources; + use super::Utf8Path; + use super::Utf8PathBuf; + use super::Uuid; + use super::ZoneBundleCause; + use super::ZoneBundleId; + use super::ZoneBundleInfo; + use super::ZoneBundleMetadata; + use super::ZoneBundler; + use super::ZFS; + use anyhow::Context; + use chrono::TimeZone; + use chrono::Utc; + use slog::Drain; + use slog::Logger; + use tokio::process::Command; + + #[tokio::test] + async fn test_zfs_quota() { + let path = + Utf8PathBuf::try_from(std::env::current_dir().unwrap()).unwrap(); + let quota = zfs_quota(&path).await.unwrap(); + assert!( + quota < (100 * 1024 * 1024 * 1024 * 1024), + "100TiB should be enough for anyone", + ); + let path = Utf8PathBuf::from("/some/nonexistent/path"); + assert!(zfs_quota(&path).await.is_err()); + } + + struct CleanupTestContext { + resource_wrapper: ResourceWrapper, + context: CleanupContext, + bundler: ZoneBundler, + } + + // A wrapper around `StorageResources`, that automatically creates dummy + // directories in the provided test locations and removes them on drop. + // + // I'd much prefer this to be done in $TEMPDIR. However, `StorageResources` + // is difficult to mock out or modify in such a way that the underlying + // dataset locations can be controlled. + // + // This creates completely BS disks, and fake names for the zpools on them. + // Those pools are _supposed_ to live at directories like: + // + // `/pool/int/` + // + // They don't exist when you just do `StorageResources::new_for_test()`. + // This type creates the datasets at the expected mountpoints, backed by the + // ramdisk, and removes them on drop. This is basically a tempdir-like + // system, that creates the directories implied by the `StorageResources` + // expected disk structure. + struct ResourceWrapper { + resources: StorageResources, + dirs: Vec, + } + + impl ResourceWrapper { + // Create new storage resources, and mount fake datasets at the required + // locations. + async fn new() -> Self { + let resources = StorageResources::new_for_test(); + let dirs = resources.all_zone_bundle_directories().await; + for d in dirs.iter() { + let id = + d.components().nth(3).unwrap().as_str().parse().unwrap(); + create_test_dataset(&id, d).await.unwrap(); + } + Self { resources, dirs } + } + } + + impl Drop for ResourceWrapper { + fn drop(&mut self) { + for d in self.dirs.iter() { + let id = + d.components().nth(3).unwrap().as_str().parse().unwrap(); + remove_test_dataset(&id).unwrap(); + } + } + } + + async fn setup_fake_cleanup_task() -> anyhow::Result { + let dec = + slog_term::PlainSyncDecorator::new(slog_term::TestStdoutWriter); + let drain = slog_term::FullFormat::new(dec).build().fuse(); + let log = + Logger::root(drain, slog::o!("component" => "fake-cleanup-task")); + let context = CleanupContext::default(); + let resource_wrapper = ResourceWrapper::new().await; + let bundler = + ZoneBundler::new(log, resource_wrapper.resources.clone(), context); + Ok(CleanupTestContext { resource_wrapper, context, bundler }) + } + + #[tokio::test] + async fn test_context() { + let ctx = setup_fake_cleanup_task().await.unwrap(); + let context = ctx.bundler.cleanup_context().await; + assert_eq!(context, ctx.context, "received incorrect context"); + } + + #[tokio::test] + async fn test_update_context() { + let ctx = setup_fake_cleanup_task().await.unwrap(); + let new_context = CleanupContext { + period: CleanupPeriod::new(ctx.context.period.as_duration() / 2) + .unwrap(), + storage_limit: StorageLimit(ctx.context.storage_limit.as_u8() / 2), + priority: PriorityOrder::new( + &ctx.context.priority.iter().copied().rev().collect::>(), + ) + .unwrap(), + }; + ctx.bundler + .update_cleanup_context( + Some(new_context.period), + Some(new_context.storage_limit), + Some(new_context.priority), + ) + .await + .expect("failed to set context"); + let context = ctx.bundler.cleanup_context().await; + assert_eq!(context, new_context, "failed to update context"); + } + + // Quota applied to test datasets. + // + // This needs to be at least this big lest we get "out of space" errors when + // creating. Not sure where those come from, but could be ZFS overhead. + const TEST_QUOTA: u64 = 1024 * 32; + + async fn create_test_dataset( + id: &Uuid, + mountpoint: &Utf8PathBuf, + ) -> anyhow::Result<()> { + let output = Command::new("/usr/bin/pfexec") + .arg(ZFS) + .arg("create") + .arg("-o") + .arg(format!("quota={TEST_QUOTA}")) + .arg("-o") + .arg(format!("mountpoint={mountpoint}")) + .arg(format!("rpool/{id}")) + .output() + .await + .context("failed to spawn zfs create operation")?; + anyhow::ensure!( + output.status.success(), + "zfs create operation failed: {}", + String::from_utf8_lossy(&output.stderr), + ); + + // Make the path operable by the test code. + let output = Command::new("/usr/bin/pfexec") + .arg("chmod") + .arg("a+rw") + .arg(&mountpoint) + .output() + .await + .context("failed to spawn chmod operation")?; + anyhow::ensure!( + output.status.success(), + "chmod-ing the dataset failed: {}", + String::from_utf8_lossy(&output.stderr), + ); + Ok(()) + } + + fn remove_test_dataset(id: &Uuid) -> anyhow::Result<()> { + let output = std::process::Command::new("/usr/bin/pfexec") + .arg(ZFS) + .arg("destroy") + .arg(format!("rpool/{id}")) + .output() + .context("failed to spawn zfs destroy operation")?; + anyhow::ensure!( + output.status.success(), + "zfs destroy operation failed: {}", + String::from_utf8_lossy(&output.stderr), + ); + Ok(()) + } + + async fn run_test_with_zfs_dataset(test: T) + where + T: FnOnce(CleanupTestContext) -> Fut, + Fut: std::future::Future>, + { + let context = setup_fake_cleanup_task() + .await + .expect("failed to create cleanup task"); + let result = test(context).await; + result.expect("test failed!"); + } + + #[tokio::test] + async fn test_utilization() { + run_test_with_zfs_dataset(test_utilization_body).await; + } + + async fn test_utilization_body( + ctx: CleanupTestContext, + ) -> anyhow::Result<()> { + let utilization = ctx.bundler.utilization().await?; + let paths = utilization.keys().cloned().collect::>(); + + // Check that we've looked at all the paths in the context. + anyhow::ensure!( + paths == ctx.resource_wrapper.dirs, + "Usage RPC returned incorrect paths" + ); + + // Check that we fetched the correct quota from the datasets. + let bundle_utilization = utilization + .values() + .next() + .context("no utilization information?")?; + anyhow::ensure!( + bundle_utilization.dataset_quota == TEST_QUOTA, + "computed incorrect dataset quota" + ); + + // Check that the number of bytes available is accurate. + let pct = u64::from(ctx.context.storage_limit.as_u8()); + let expected_bytes_available = + (bundle_utilization.dataset_quota * pct) / 100; + anyhow::ensure!( + bundle_utilization.bytes_available == expected_bytes_available, + "incorrect bytes available computed for storage: actual {}, expected {}", + bundle_utilization.bytes_available, + expected_bytes_available, + ); + anyhow::ensure!( + bundle_utilization.bytes_used < 64, + "there should be basically zero bytes used" + ); + + // Now let's add a fake bundle, and make sure that we get the right size + // back. + let info = insert_fake_bundle( + &paths[0], + 2020, + 1, + 1, + ZoneBundleCause::ExplicitRequest, + ) + .await?; + + let new_utilization = ctx.bundler.utilization().await?; + anyhow::ensure!( + paths == new_utilization.keys().cloned().collect::>(), + "paths should not change" + ); + let new_bundle_utilization = new_utilization + .values() + .next() + .context("no utilization information?")?; + anyhow::ensure!( + bundle_utilization.dataset_quota + == new_bundle_utilization.dataset_quota, + "dataset quota should not change" + ); + anyhow::ensure!( + bundle_utilization.bytes_available + == new_bundle_utilization.bytes_available, + "bytes available for bundling should not change", + ); + + // We should have consumed _some_ bytes, at least the size of the + // tarball itself. + let change = + new_bundle_utilization.bytes_used - bundle_utilization.bytes_used; + anyhow::ensure!( + change > info.bytes, + "bytes used should drop by at least the size of the tarball", + ); + + // This is a pretty weak test, but let's make sure that the actual + // number of bytes we use is within 5% of the computed size of the + // tarball in bytes. This should account for the directories containing + // it. + const THRESHOLD: f64 = 0.05; + let used = new_bundle_utilization.bytes_used as f64; + let size = info.bytes as f64; + let change = (used - size) / used; + anyhow::ensure!( + change > 0.0 && change <= THRESHOLD, + "bytes used should be positive and within {:02} of the \ + size of the new tarball, used = {}, tarball size = {}", + THRESHOLD, + used, + size, + ); + Ok(()) + } + + #[tokio::test] + async fn test_cleanup() { + run_test_with_zfs_dataset(test_cleanup_body).await; + } + + async fn test_cleanup_body(ctx: CleanupTestContext) -> anyhow::Result<()> { + // Let's add a bunch of fake bundles, until we should be over the + // storage limit. These will all be explicit requests, so the priority + // should be decided based on time, i.e., the ones first added should be + // removed. + // + // First, reduce the storage limit, so that we only need to add a few + // bundles. + ctx.bundler + .update_cleanup_context(None, Some(StorageLimit(2)), None) + .await + .context("failed to update cleanup context")?; + + let mut day = 1; + let mut info = Vec::new(); + let mut utilization = ctx.bundler.utilization().await?; + loop { + let us = utilization + .values() + .next() + .context("no utilization information")?; + if us.bytes_used > us.bytes_available { + break; + } + let it = insert_fake_bundle( + &ctx.resource_wrapper.dirs[0], + 2020, + 1, + day, + ZoneBundleCause::ExplicitRequest, + ) + .await?; + day += 1; + info.push(it); + utilization = ctx.bundler.utilization().await?; + } + + // Trigger a cleanup. + let counts = + ctx.bundler.cleanup().await.context("failed to run cleanup")?; + + // We should have cleaned up items in the same paths that we have in the + // context. + anyhow::ensure!( + counts.keys().zip(ctx.resource_wrapper.dirs.iter()).all(|(a, b)| a == b), + "cleaned-up directories do not match the context's storage directories", + ); + + // We should have cleaned up the first-inserted bundle. + let count = counts.values().next().context("no cleanup counts")?; + anyhow::ensure!(count.bundles == 1, "expected to cleanup one bundle"); + anyhow::ensure!( + count.bytes == info[0].bytes, + "expected to cleanup the number of bytes occupied by the first bundle", + ); + let exists = tokio::fs::try_exists(&info[0].path) + .await + .context("failed to check if file exists")?; + anyhow::ensure!( + !exists, + "the cleaned up bundle still appears to exist on-disk", + ); + for each in info.iter().skip(1) { + let exists = tokio::fs::try_exists(&each.path) + .await + .context("failed to check if file exists")?; + anyhow::ensure!(exists, "cleaned up an unexpected bundle"); + } + + Ok(()) + } + + #[tokio::test] + async fn test_list_with_filter() { + run_test_with_zfs_dataset(test_list_with_filter_body).await; + } + + async fn test_list_with_filter_body( + ctx: CleanupTestContext, + ) -> anyhow::Result<()> { + let mut day = 1; + let mut info = Vec::new(); + const N_BUNDLES: usize = 3; + for i in 0..N_BUNDLES { + let it = insert_fake_bundle_with_zone_name( + &ctx.resource_wrapper.dirs[0], + 2020, + 1, + day, + ZoneBundleCause::ExplicitRequest, + format!("oxz_whatever_{i}").as_str(), + ) + .await?; + day += 1; + info.push(it); + } + + // Listing with no filter should return all of them. + let all_md = ctx.bundler.list(None).await?; + anyhow::ensure!( + all_md + == info + .iter() + .map(|each| each.metadata.clone()) + .collect::>(), + "Expected listing with no filter to return all bundles" + ); + + // Each bundle is from a zone named like `oxz_whatver_`. + // + // So filters like `oxz_` should return all of them, while ones on the + // index should return exactly that one matching. + let filt = Some("oxz_"); + let all_md = ctx.bundler.list(filt).await?; + anyhow::ensure!( + all_md + == info + .iter() + .map(|each| each.metadata.clone()) + .collect::>(), + "Expected listing with simple to return all bundles" + ); + for i in 0..N_BUNDLES { + let filt = Some(i.to_string()); + let matching_md = ctx.bundler.list(filt.as_deref()).await?; + let expected_md = &info[i].metadata; + anyhow::ensure!( + matching_md.len() == 1, + "expected exactly one bundle" + ); + anyhow::ensure!( + &matching_md[0] == expected_md, + "Matched incorrect zone bundle with a filter", + ); + } + Ok(()) + } + + async fn insert_fake_bundle( + dir: &Utf8Path, + year: i32, + month: u32, + day: u32, + cause: ZoneBundleCause, + ) -> anyhow::Result { + insert_fake_bundle_with_zone_name( + dir, + year, + month, + day, + cause, + "oxz_whatever", + ) + .await + } + + async fn insert_fake_bundle_with_zone_name( + dir: &Utf8Path, + year: i32, + month: u32, + day: u32, + cause: ZoneBundleCause, + zone_name: &str, + ) -> anyhow::Result { + let metadata = ZoneBundleMetadata { + id: ZoneBundleId { + zone_name: String::from(zone_name), + bundle_id: uuid::Uuid::new_v4(), + }, + time_created: Utc + .with_ymd_and_hms(year, month, day, 0, 0, 0) + .single() + .context("invalid year/month/day")?, + cause, + version: 0, + }; + + let zone_dir = dir.join(&metadata.id.zone_name); + tokio::fs::create_dir_all(&zone_dir) + .await + .context("failed to create zone directory")?; + let path = zone_dir.join(format!("{}.tar.gz", metadata.id.bundle_id)); + + // Create a tarball at the path with this fake metadata. + let file = tokio::fs::OpenOptions::new() + .read(true) + .write(true) + .create(true) + .open(&path) + .await + .context("failed to open zone bundle path")? + .into_std() + .await; + let gz = flate2::GzBuilder::new() + .filename(path.as_str()) + .write(file, flate2::Compression::best()); + let mut builder = tar::Builder::new(gz); + let contents = toml::to_string(&metadata)?; + super::insert_data( + &mut builder, + super::ZONE_BUNDLE_METADATA_FILENAME, + contents.as_bytes(), + )?; + let _ = builder.into_inner().context("failed to finish tarball")?; + let bytes = tokio::fs::metadata(&path).await?.len(); + Ok(ZoneBundleInfo { metadata, path, bytes }) } - Ok(None) }