Skip to content

Commit eab1cf5

Browse files
authored
Adds service bundles for zones (#3388)
- Adds a dataset to the M.2s for storing debugging data. - Adds basic mechanism for setting a ZFS quota on datasets. - Adds HTTP endpoints for listing, creating, and fetching zone service bundles from the sled agent. - Adds methods to `ServiceManager` for implementing the above. Zone bundles run a set of commands to get the zone-wide output and some key process-specific data for relevant processes from an Oxide service zone. These are packed into a tarball along with a simple metdata file, describing the zone bundle. - Adds some helper methods in `RunningZone` and related for listing the expected SMF service names and processes associated with them based on the zone's manifest files. - Adds dev tool `zb` for talking to the sled agent to operate on zone bundles.
1 parent 51d2c58 commit eab1cf5

File tree

14 files changed

+1350
-19
lines changed

14 files changed

+1350
-19
lines changed

Cargo.lock

Lines changed: 26 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ http = "0.2.9"
183183
httptest = "0.15.4"
184184
hyper-rustls = "0.24.0"
185185
hyper = "0.14"
186+
hyper-staticfile = "0.9.5"
186187
humantime = "2.1.0"
187188
illumos-utils = { path = "illumos-utils" }
188189
indexmap = "1.9.3"

illumos-utils/src/running_zone.rs

Lines changed: 149 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use crate::zone::{AddressRequest, IPADM, ZONE_PREFIX};
1313
use camino::{Utf8Path, Utf8PathBuf};
1414
use ipnetwork::IpNetwork;
1515
use omicron_common::backoff;
16+
use slog::error;
1617
use slog::info;
1718
use slog::o;
1819
use slog::warn;
@@ -24,6 +25,16 @@ use crate::zone::MockZones as Zones;
2425
#[cfg(not(any(test, feature = "testing")))]
2526
use crate::zone::Zones;
2627

28+
/// Errors returned from methods for fetching SMF services and log files
29+
#[derive(thiserror::Error, Debug)]
30+
pub enum ServiceError {
31+
#[error("I/O error")]
32+
Io(#[from] std::io::Error),
33+
34+
#[error("Failed to run a command")]
35+
RunCommand(#[from] RunCommandError),
36+
}
37+
2738
/// Errors returned from [`RunningZone::run_cmd`].
2839
#[derive(thiserror::Error, Debug)]
2940
#[error("Error running command in zone '{zone}': {err}")]
@@ -762,6 +773,128 @@ impl RunningZone {
762773
pub fn links(&self) -> &Vec<Link> {
763774
&self.inner.links
764775
}
776+
777+
/// Return the running processes associated with all the SMF services this
778+
/// zone is intended to run.
779+
pub fn service_processes(
780+
&self,
781+
) -> Result<Vec<ServiceProcess>, ServiceError> {
782+
let service_names = self.service_names()?;
783+
let mut services = Vec::with_capacity(service_names.len());
784+
for service_name in service_names.into_iter() {
785+
let output = self.run_cmd(["ptree", "-s", &service_name])?;
786+
787+
// All Oxide SMF services currently run a single binary, though it
788+
// may be run in a contract via `ctrun`. We don't care about that
789+
// binary, but any others we _do_ want to collect data from.
790+
for line in output.lines() {
791+
if line.contains("ctrun") {
792+
continue;
793+
}
794+
let line = line.trim();
795+
let mut parts = line.split_ascii_whitespace();
796+
797+
// The first two parts should be the PID and the process binary
798+
// path, respectively.
799+
let Some(pid_s) = parts.next() else {
800+
error!(
801+
self.inner.log,
802+
"failed to get service PID from ptree output";
803+
"service" => &service_name,
804+
);
805+
continue;
806+
};
807+
let Ok(pid) = pid_s.parse() else {
808+
error!(
809+
self.inner.log,
810+
"failed to parse service PID from ptree output";
811+
"service" => &service_name,
812+
"pid" => pid_s,
813+
);
814+
continue;
815+
};
816+
let Some(path) = parts.next() else {
817+
error!(
818+
self.inner.log,
819+
"failed to get service binary from ptree output";
820+
"service" => &service_name,
821+
);
822+
continue;
823+
};
824+
let binary = Utf8PathBuf::from(path);
825+
826+
// Fetch any log files for this SMF service.
827+
let Some((log_file, rotated_log_files)) = self.service_log_files(&service_name)? else {
828+
error!(
829+
self.inner.log,
830+
"failed to find log files for existing service";
831+
"service_name" => &service_name,
832+
);
833+
continue;
834+
};
835+
836+
services.push(ServiceProcess {
837+
service_name: service_name.clone(),
838+
binary,
839+
pid,
840+
log_file,
841+
rotated_log_files,
842+
});
843+
}
844+
}
845+
Ok(services)
846+
}
847+
848+
/// Return the names of the Oxide SMF services this zone is intended to run.
849+
pub fn service_names(&self) -> Result<Vec<String>, ServiceError> {
850+
const NEEDLES: [&str; 2] = ["/oxide", "/system/illumos"];
851+
let output = self.run_cmd(&["svcs", "-H", "-o", "fmri"])?;
852+
Ok(output
853+
.lines()
854+
.filter(|line| NEEDLES.iter().any(|needle| line.contains(needle)))
855+
.map(|line| line.trim().to_string())
856+
.collect())
857+
}
858+
859+
/// Return any SMF log files associated with the named service.
860+
///
861+
/// Given a named service, this returns a tuple of the latest or current log
862+
/// file, and an array of any rotated log files. If the service does not
863+
/// exist, or there are no log files, `None` is returned.
864+
pub fn service_log_files(
865+
&self,
866+
name: &str,
867+
) -> Result<Option<(Utf8PathBuf, Vec<Utf8PathBuf>)>, ServiceError> {
868+
let output = self.run_cmd(&["svcs", "-L", name])?;
869+
let mut lines = output.lines();
870+
let Some(current) = lines.next() else {
871+
return Ok(None);
872+
};
873+
// We need to prepend the zonepath root to get the path in the GZ. We
874+
// can do this with `join()`, but that will _replace_ the path if the
875+
// second one is absolute. So trim any prefixed `/` from each path.
876+
let root = self.root();
877+
let current_log_file =
878+
root.join(current.trim().trim_start_matches('/'));
879+
880+
// The rotated log files should have the same prefix as the current, but
881+
// with an index appended. We'll search the parent directory for
882+
// matching names, skipping the current file.
883+
//
884+
// See https://illumos.org/man/8/logadm for details on the naming
885+
// conventions around these files.
886+
let dir = current_log_file.parent().unwrap();
887+
let mut rotated_files = Vec::new();
888+
for entry in dir.read_dir_utf8()? {
889+
let entry = entry?;
890+
let path = entry.path();
891+
if path != current_log_file && path.starts_with(&current_log_file) {
892+
rotated_files
893+
.push(root.join(path.strip_prefix("/").unwrap_or(path)));
894+
}
895+
}
896+
Ok(Some((current_log_file, rotated_files)))
897+
}
765898
}
766899

767900
impl Drop for RunningZone {
@@ -783,6 +916,21 @@ impl Drop for RunningZone {
783916
}
784917
}
785918

919+
/// A process running in the zone associated with an SMF service.
920+
#[derive(Clone, Debug)]
921+
pub struct ServiceProcess {
922+
/// The name of the SMF service.
923+
pub service_name: String,
924+
/// The path of the binary in the process image.
925+
pub binary: Utf8PathBuf,
926+
/// The PID of the process.
927+
pub pid: u32,
928+
/// The path for the current log file.
929+
pub log_file: Utf8PathBuf,
930+
/// The paths for any rotated log files.
931+
pub rotated_log_files: Vec<Utf8PathBuf>,
932+
}
933+
786934
/// Errors returned from [`InstalledZone::install`].
787935
#[derive(thiserror::Error, Debug)]
788936
pub enum InstallZoneError {
@@ -817,7 +965,7 @@ pub struct InstalledZone {
817965
// NIC used for control plane communication.
818966
control_vnic: Link,
819967

820-
// Nic used for bootstrap network communication
968+
// NIC used for bootstrap network communication
821969
bootstrap_vnic: Option<Link>,
822970

823971
// OPTE devices for the guest network interfaces

illumos-utils/src/zfs.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,12 +171,15 @@ impl Zfs {
171171
}
172172

173173
/// Creates a new ZFS filesystem named `name`, unless one already exists.
174+
///
175+
/// Applies an optional quota, provided _in bytes_.
174176
pub fn ensure_filesystem(
175177
name: &str,
176178
mountpoint: Mountpoint,
177179
zoned: bool,
178180
do_format: bool,
179181
encryption_details: Option<EncryptionDetails>,
182+
quota: Option<usize>,
180183
) -> Result<(), EnsureFilesystemError> {
181184
let (exists, mounted) = Self::dataset_exists(name, &mountpoint)?;
182185
if exists {
@@ -225,9 +228,23 @@ impl Zfs {
225228
cmd.args(&["-o", &format!("mountpoint={}", mountpoint), name]);
226229
execute(cmd).map_err(|err| EnsureFilesystemError {
227230
name: name.to_string(),
228-
mountpoint,
231+
mountpoint: mountpoint.clone(),
229232
err: err.into(),
230233
})?;
234+
235+
// Apply any quota.
236+
if let Some(quota) = quota {
237+
if let Err(err) =
238+
Self::set_value(name, "quota", &format!("{quota}"))
239+
{
240+
return Err(EnsureFilesystemError {
241+
name: name.to_string(),
242+
mountpoint,
243+
// Take the execution error from the SetValueError
244+
err: err.err.into(),
245+
});
246+
}
247+
}
231248
Ok(())
232249
}
233250

0 commit comments

Comments
 (0)