Skip to content

[compiletest] Ignore known paths when abbreviating output #96551

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jun 6, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 90 additions & 56 deletions src/tools/compiletest/src/read2.rs
Original file line number Diff line number Diff line change
@@ -1,71 +1,24 @@
// FIXME: This is a complete copy of `cargo/src/cargo/util/read2.rs`
// Consider unify the read2() in libstd, cargo and this to prevent further code duplication.

#[cfg(test)]
mod tests;

pub use self::imp::read2;
use std::io;
use std::io::{self, Write};
use std::mem::replace;
use std::process::{Child, Output};

pub fn read2_abbreviated(mut child: Child) -> io::Result<Output> {
use io::Write;
use std::mem::replace;

const HEAD_LEN: usize = 160 * 1024;
const TAIL_LEN: usize = 256 * 1024;

enum ProcOutput {
Full(Vec<u8>),
Abbreviated { head: Vec<u8>, skipped: usize, tail: Box<[u8]> },
}

impl ProcOutput {
fn extend(&mut self, data: &[u8]) {
let new_self = match *self {
ProcOutput::Full(ref mut bytes) => {
bytes.extend_from_slice(data);
let new_len = bytes.len();
if new_len <= HEAD_LEN + TAIL_LEN {
return;
}
let tail = bytes.split_off(new_len - TAIL_LEN).into_boxed_slice();
let head = replace(bytes, Vec::new());
let skipped = new_len - HEAD_LEN - TAIL_LEN;
ProcOutput::Abbreviated { head, skipped, tail }
}
ProcOutput::Abbreviated { ref mut skipped, ref mut tail, .. } => {
*skipped += data.len();
if data.len() <= TAIL_LEN {
tail[..data.len()].copy_from_slice(data);
tail.rotate_left(data.len());
} else {
tail.copy_from_slice(&data[(data.len() - TAIL_LEN)..]);
}
return;
}
};
*self = new_self;
}

fn into_bytes(self) -> Vec<u8> {
match self {
ProcOutput::Full(bytes) => bytes,
ProcOutput::Abbreviated { mut head, skipped, tail } => {
write!(&mut head, "\n\n<<<<<< SKIPPED {} BYTES >>>>>>\n\n", skipped).unwrap();
head.extend_from_slice(&tail);
head
}
}
}
}

let mut stdout = ProcOutput::Full(Vec::new());
let mut stderr = ProcOutput::Full(Vec::new());
pub fn read2_abbreviated(mut child: Child, exclude_from_len: &[String]) -> io::Result<Output> {
let mut stdout = ProcOutput::new();
let mut stderr = ProcOutput::new();

drop(child.stdin.take());
read2(
child.stdout.take().unwrap(),
child.stderr.take().unwrap(),
&mut |is_stdout, data, _| {
if is_stdout { &mut stdout } else { &mut stderr }.extend(data);
if is_stdout { &mut stdout } else { &mut stderr }.extend(data, exclude_from_len);
data.clear();
},
)?;
Expand All @@ -74,6 +27,87 @@ pub fn read2_abbreviated(mut child: Child) -> io::Result<Output> {
Ok(Output { status, stdout: stdout.into_bytes(), stderr: stderr.into_bytes() })
}

const HEAD_LEN: usize = 160 * 1024;
const TAIL_LEN: usize = 256 * 1024;
const EXCLUDED_PLACEHOLDER_LEN: isize = 32;

enum ProcOutput {
Full { bytes: Vec<u8>, excluded_len: isize },
Abbreviated { head: Vec<u8>, skipped: usize, tail: Box<[u8]> },
}

impl ProcOutput {
fn new() -> Self {
ProcOutput::Full { bytes: Vec::new(), excluded_len: 0 }
}

fn extend(&mut self, data: &[u8], exclude_from_len: &[String]) {
let new_self = match *self {
ProcOutput::Full { ref mut bytes, ref mut excluded_len } => {
let old_len = bytes.len();
bytes.extend_from_slice(data);

// We had problems in the past with tests failing only in some environments,
// due to the length of the base path pushing the output size over the limit.
//
// To make those failures deterministic across all environments we ignore known
// paths when calculating the string length, while still including the full
// path in the output. This could result in some output being larger than the
// threshold, but it's better than having nondeterministic failures.
//
// The compiler emitting only excluded strings is addressed by adding a
// placeholder size for each excluded segment, which will eventually reach
// the configured threshold.
for pattern in exclude_from_len {
let pattern_bytes = pattern.as_bytes();
// We start matching `pattern_bytes - 1` into the previously loaded data,
// to account for the fact a pattern might be included across multiple
// `extend` calls. Starting from `- 1` avoids double-counting patterns.
let matches = (&bytes[(old_len.saturating_sub(pattern_bytes.len() - 1))..])
.windows(pattern_bytes.len())
.filter(|window| window == &pattern_bytes)
.count();
*excluded_len += matches as isize
* (EXCLUDED_PLACEHOLDER_LEN - pattern_bytes.len() as isize);
}

let new_len = bytes.len();
if (new_len as isize + *excluded_len) as usize <= HEAD_LEN + TAIL_LEN {
return;
}

let mut head = replace(bytes, Vec::new());
let mut middle = head.split_off(HEAD_LEN);
let tail = middle.split_off(middle.len() - TAIL_LEN).into_boxed_slice();
let skipped = new_len - HEAD_LEN - TAIL_LEN;
ProcOutput::Abbreviated { head, skipped, tail }
}
ProcOutput::Abbreviated { ref mut skipped, ref mut tail, .. } => {
*skipped += data.len();
if data.len() <= TAIL_LEN {
tail[..data.len()].copy_from_slice(data);
tail.rotate_left(data.len());
} else {
tail.copy_from_slice(&data[(data.len() - TAIL_LEN)..]);
}
return;
}
};
*self = new_self;
}

fn into_bytes(self) -> Vec<u8> {
match self {
ProcOutput::Full { bytes, .. } => bytes,
ProcOutput::Abbreviated { mut head, skipped, tail } => {
write!(&mut head, "\n\n<<<<<< SKIPPED {} BYTES >>>>>>\n\n", skipped).unwrap();
head.extend_from_slice(&tail);
head
}
}
}
}

#[cfg(not(any(unix, windows)))]
mod imp {
use std::io::{self, Read};
Expand Down
123 changes: 123 additions & 0 deletions src/tools/compiletest/src/read2/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
use crate::read2::{ProcOutput, EXCLUDED_PLACEHOLDER_LEN, HEAD_LEN, TAIL_LEN};

#[test]
fn test_abbreviate_short_string() {
let mut out = ProcOutput::new();
out.extend(b"Hello world!", &[]);
assert_eq!(b"Hello world!", &*out.into_bytes());
}

#[test]
fn test_abbreviate_short_string_multiple_steps() {
let mut out = ProcOutput::new();

out.extend(b"Hello ", &[]);
out.extend(b"world!", &[]);

assert_eq!(b"Hello world!", &*out.into_bytes());
}

#[test]
fn test_abbreviate_long_string() {
let mut out = ProcOutput::new();

let data = vec![b'.'; HEAD_LEN + TAIL_LEN + 16];
out.extend(&data, &[]);

let mut expected = vec![b'.'; HEAD_LEN];
expected.extend_from_slice(b"\n\n<<<<<< SKIPPED 16 BYTES >>>>>>\n\n");
Copy link
Member

@Mark-Simulacrum Mark-Simulacrum May 7, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW, I think we should make the read2_abbreviated return an error or panic instead of inserting this skipped message -- that typically leads to a JSON de-serialization failure and a pretty opaque one at that, which I think isn't too helpful -- we can probably say something more useful like "captured X bytes, which is above the limit in compiletest. See abbreviated output: {output}".

(Can be out of scope for this PR).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, let's do that in a separate PR though.

expected.extend_from_slice(&vec![b'.'; TAIL_LEN]);

// We first check the length to avoid endless terminal output if the length differs, since
// `out` is hundreds of KBs in size.
let out = out.into_bytes();
assert_eq!(expected.len(), out.len());
assert_eq!(expected, out);
}

#[test]
fn test_abbreviate_long_string_multiple_steps() {
let mut out = ProcOutput::new();

out.extend(&vec![b'.'; HEAD_LEN], &[]);
out.extend(&vec![b'.'; TAIL_LEN], &[]);
// Also test whether the rotation works
out.extend(&vec![b'!'; 16], &[]);
out.extend(&vec![b'?'; 16], &[]);

let mut expected = vec![b'.'; HEAD_LEN];
expected.extend_from_slice(b"\n\n<<<<<< SKIPPED 32 BYTES >>>>>>\n\n");
expected.extend_from_slice(&vec![b'.'; TAIL_LEN - 32]);
expected.extend_from_slice(&vec![b'!'; 16]);
expected.extend_from_slice(&vec![b'?'; 16]);

// We first check the length to avoid endless terminal output if the length differs, since
// `out` is hundreds of KBs in size.
let out = out.into_bytes();
assert_eq!(expected.len(), out.len());
assert_eq!(expected, out);
}

#[test]
fn test_abbreviate_exclusions_are_detected() {
let mut out = ProcOutput::new();
let exclusions = &["foo".to_string(), "quux".to_string()];

out.extend(b"Hello foo", exclusions);
// Check items from a previous extension are not double-counted.
out.extend(b"! This is a qu", exclusions);
// Check items are detected across extensions.
out.extend(b"ux.", exclusions);

match out {
ProcOutput::Full { excluded_len, .. } => assert_eq!(
excluded_len,
EXCLUDED_PLACEHOLDER_LEN * exclusions.len() as isize
- exclusions.iter().map(|i| i.len() as isize).sum::<isize>()
),
ProcOutput::Abbreviated { .. } => panic!("out should not be abbreviated"),
}

assert_eq!(b"Hello foo! This is a quux.", &*out.into_bytes());
}

#[test]
fn test_abbreviate_exclusions_avoid_abbreviations() {
let mut out = ProcOutput::new();
let exclusions = &[std::iter::repeat('a').take(64).collect::<String>()];

let mut expected = vec![b'.'; HEAD_LEN - EXCLUDED_PLACEHOLDER_LEN as usize];
expected.extend_from_slice(exclusions[0].as_bytes());
expected.extend_from_slice(&vec![b'.'; TAIL_LEN]);

out.extend(&expected, exclusions);

// We first check the length to avoid endless terminal output if the length differs, since
// `out` is hundreds of KBs in size.
let out = out.into_bytes();
assert_eq!(expected.len(), out.len());
assert_eq!(expected, out);
}

#[test]
fn test_abbreviate_exclusions_can_still_cause_abbreviations() {
let mut out = ProcOutput::new();
let exclusions = &[std::iter::repeat('a').take(64).collect::<String>()];

let mut input = vec![b'.'; HEAD_LEN];
input.extend_from_slice(&vec![b'.'; TAIL_LEN]);
input.extend_from_slice(exclusions[0].as_bytes());

let mut expected = vec![b'.'; HEAD_LEN];
expected.extend_from_slice(b"\n\n<<<<<< SKIPPED 64 BYTES >>>>>>\n\n");
expected.extend_from_slice(&vec![b'.'; TAIL_LEN - 64]);
expected.extend_from_slice(&vec![b'a'; 64]);

out.extend(&input, exclusions);

// We first check the length to avoid endless terminal output if the length differs, since
// `out` is hundreds of KBs in size.
let out = out.into_bytes();
assert_eq!(expected.len(), out.len());
assert_eq!(expected, out);
}
29 changes: 25 additions & 4 deletions src/tools/compiletest/src/runtest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use std::hash::{Hash, Hasher};
use std::io::prelude::*;
use std::io::{self, BufReader};
use std::path::{Path, PathBuf};
use std::process::{Command, ExitStatus, Output, Stdio};
use std::process::{Child, Command, ExitStatus, Output, Stdio};
use std::str;

use glob::glob;
Expand Down Expand Up @@ -1735,6 +1735,28 @@ impl<'test> TestCx<'test> {
dylib
}

fn read2_abbreviated(&self, child: Child) -> Output {
let mut exclude_from_len = Vec::new();
let mut add_path = |path: &Path| {
let path = path.display().to_string();
let windows = path.replace("\\", "\\\\");
if windows != path {
exclude_from_len.push(windows);
}
exclude_from_len.push(path);
};

// List of strings that will not be measured when determining whether the output is larger
// than the output truncation threshold.
//
// Note: avoid adding a subdirectory of an already excluded directory here, otherwise the
// same slice of text will be double counted and the truncation might not happen.
add_path(&self.config.src_base);
add_path(&self.config.build_base);

read2_abbreviated(child, &exclude_from_len).expect("failed to read output")
}

fn compose_and_run(
&self,
mut command: Command,
Expand Down Expand Up @@ -1769,8 +1791,7 @@ impl<'test> TestCx<'test> {
child.stdin.as_mut().unwrap().write_all(input.as_bytes()).unwrap();
}

let Output { status, stdout, stderr } =
read2_abbreviated(child).expect("failed to read output");
let Output { status, stdout, stderr } = self.read2_abbreviated(child);

let result = ProcRes {
status,
Expand Down Expand Up @@ -2959,7 +2980,7 @@ impl<'test> TestCx<'test> {
}
}

let output = cmd.spawn().and_then(read2_abbreviated).expect("failed to spawn `make`");
let output = self.read2_abbreviated(cmd.spawn().expect("failed to spawn `make`"));
if !output.status.success() {
let res = ProcRes {
status: output.status,
Expand Down