Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion gix-diff/src/blob/unified_diff/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,8 @@ where
}

impl DiffLineKind {
const fn to_prefix(self) -> char {
/// Returns a one-character representation for use in unified diffs.
pub const fn to_prefix(self) -> char {
match self {
DiffLineKind::Context => ' ',
DiffLineKind::Add => '+',
Expand Down
1 change: 1 addition & 0 deletions gix-diff/tests/diff/blob/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub(crate) mod pipeline;
mod platform;
mod slider;
mod unified_diff;
258 changes: 258 additions & 0 deletions gix-diff/tests/diff/blob/slider.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
use std::{iter::Peekable, path::PathBuf};

use gix_diff::blob::{
intern::TokenSource,
unified_diff::{ConsumeHunk, ContextSize, HunkHeader},
Algorithm, UnifiedDiff,
};
use gix_object::bstr::{self, BString, ByteVec};

#[derive(Debug, PartialEq)]
struct DiffHunk {
header: HunkHeader,
lines: BString,
}

struct DiffHunkRecorder {
inner: Vec<DiffHunk>,
}

impl DiffHunkRecorder {
fn new() -> Self {
Self { inner: Vec::new() }
}
}

impl ConsumeHunk for DiffHunkRecorder {
type Out = Vec<DiffHunk>;

fn consume_hunk(
&mut self,
header: HunkHeader,
lines: &[(gix_diff::blob::unified_diff::DiffLineKind, &[u8])],
) -> std::io::Result<()> {
let mut buf = Vec::new();

for &(kind, line) in lines {
buf.push(kind.to_prefix() as u8);
buf.extend_from_slice(line);
buf.push(b'\n');
}

let diff_hunk = DiffHunk {
header,
lines: buf.into(),
};

self.inner.push(diff_hunk);

Ok(())
}

fn finish(self) -> Self::Out {
self.inner
}
}

struct Baseline<'a> {
lines: Peekable<bstr::Lines<'a>>,
}

mod baseline {
use std::path::Path;

use gix_diff::blob::unified_diff::HunkHeader;
use gix_object::bstr::ByteSlice;

use super::{Baseline, DiffHunk};

static START_OF_HEADER: &[u8; 4] = b"@@ -";

impl Baseline<'_> {
pub fn collect(baseline_path: impl AsRef<Path>) -> std::io::Result<Vec<DiffHunk>> {
let content = std::fs::read(baseline_path)?;

let mut baseline = Baseline {
lines: content.lines().peekable(),
};

baseline.skip_header();

Ok(baseline.collect())
}

fn skip_header(&mut self) {
// diff --git a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa b/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
// index ccccccc..ddddddd 100644
// --- a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
// +++ b/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb

let line = self.lines.next().expect("line to be present");
assert!(line.starts_with(b"diff --git "));

let line = self.lines.next().expect("line to be present");
assert!(line.starts_with(b"index "));

let line = self.lines.next().expect("line to be present");
assert!(line.starts_with(b"--- "));

let line = self.lines.next().expect("line to be present");
assert!(line.starts_with(b"+++ "));
}

/// Parse diff hunk headers that conform to the unified diff hunk header format.
///
/// The parser is very primitive and relies on the fact that `+18` is parsed as `18`. This
/// allows us to split the input on ` ` and `,` only.
///
/// @@ -18,6 +18,7 @@ abc def ghi
/// @@ -{before_hunk_start},{before_hunk_len} +{after_hunk_start},{after_hunk_len} @@
fn parse_hunk_header(&self, line: &[u8]) -> gix_testtools::Result<HunkHeader> {
let Some(line) = line.strip_prefix(START_OF_HEADER) else {
todo!()
};

let parts: Vec<_> = line.split(|b| *b == b' ' || *b == b',').collect();
let [before_hunk_start, before_hunk_len, after_hunk_start, after_hunk_len, ..] = parts[..] else {
todo!()
};

Ok(HunkHeader {
before_hunk_start: self.parse_number(before_hunk_start),
before_hunk_len: self.parse_number(before_hunk_len),
after_hunk_start: self.parse_number(after_hunk_start),
after_hunk_len: self.parse_number(after_hunk_len),
})
}

fn parse_number(&self, bytes: &[u8]) -> u32 {
bytes
.to_str()
.expect("to be a valid UTF-8 string")
.parse::<u32>()
.expect("to be a number")
}
}

impl Iterator for Baseline<'_> {
type Item = DiffHunk;

fn next(&mut self) -> Option<Self::Item> {
let mut hunk_header = None;
let mut hunk_lines = Vec::new();

while let Some(line) = self.lines.next() {
if line.starts_with(START_OF_HEADER) {
assert!(hunk_header.is_none(), "should not overwrite existing hunk_header");
hunk_header = self.parse_hunk_header(line).ok();

continue;
}

match line[0] {
b' ' | b'+' | b'-' => {
hunk_lines.extend_from_slice(line);
hunk_lines.push(b'\n');
}
_ => todo!(),
}

match self.lines.peek() {
Some(next_line) if next_line.starts_with(START_OF_HEADER) => break,
None => break,
_ => {}
}
}

hunk_header.map(|hunk_header| DiffHunk {
header: hunk_header,
lines: hunk_lines.into(),
})
}
}
}

#[test]
fn sliders() -> gix_testtools::Result {
let worktree_path = fixture_path()?;
let asset_dir = worktree_path.join("assets");

let dir = std::fs::read_dir(&worktree_path)?;

for entry in dir {
let entry = entry?;
let file_name = entry.file_name().into_string().expect("to be string");

if !file_name.ends_with(".baseline") {
continue;
}

let parts: Vec<_> = file_name.split('.').collect();
let [name, algorithm, ..] = parts[..] else {
unimplemented!()
};
let algorithm = match algorithm {
"myers" => Algorithm::Myers,
"histogram" => Algorithm::Histogram,
_ => unimplemented!(),
};

let parts: Vec<_> = name.split('-').collect();
let [old_blob_id, new_blob_id] = parts[..] else {
unimplemented!();
};

let old_data = std::fs::read(asset_dir.join(format!("{old_blob_id}.blob")))?;
let new_data = std::fs::read(asset_dir.join(format!("{new_blob_id}.blob")))?;

let interner = gix_diff::blob::intern::InternedInput::new(
tokens_for_diffing(old_data.as_slice()),
tokens_for_diffing(new_data.as_slice()),
);

let actual = gix_diff::blob::diff(
algorithm,
&interner,
UnifiedDiff::new(&interner, DiffHunkRecorder::new(), ContextSize::symmetrical(3)),
)?;

let baseline_path = worktree_path.join(file_name);
let baseline = Baseline::collect(baseline_path).unwrap();

let actual = actual
.iter()
.fold(BString::default(), |mut acc, diff_hunk| {
acc.push_str(diff_hunk.header.to_string().as_str());
acc.push(b'\n');

acc.extend_from_slice(&diff_hunk.lines);

acc
})
.to_string();

let baseline = baseline
.iter()
.fold(BString::default(), |mut acc, diff_hunk| {
acc.push_str(diff_hunk.header.to_string().as_str());
acc.push(b'\n');

acc.extend_from_slice(&diff_hunk.lines);

acc
})
.to_string();

pretty_assertions::assert_eq!(actual, baseline);
}

Ok(())
}

fn tokens_for_diffing(data: &[u8]) -> impl TokenSource<Token = &[u8]> {
gix_diff::blob::sources::byte_lines(data)
}

fn fixture_path() -> gix_testtools::Result<PathBuf> {
gix_testtools::scripted_fixture_read_only_standalone("make_diff_for_sliders_repo.sh")
}
23 changes: 23 additions & 0 deletions gix-diff/tests/fixtures/make_diff_for_sliders_repo.sh
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I love to keep this info, but it feels it would be more accessible in gix-diff/README.md (instead of a seemingly fake shell script).
Or am I missing something, maybe the placeholder is needed for something?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And in that file, could you provide a snipped, behind <details><summary>… that I can use as a diff slider example?
That way I don't have to repeat all these steps and… run python… (yes, I am snobby ;)), just to try it out.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I can tell, make_diff_for_sliders_repo.sh needs to be present for this line not to fail: https://github.com/cruessler/gitoxide/blob/dd5127f54ce4e055e6a87719b4962482cd82090d/gix-diff/tests/diff/blob/slider.rs#L257. I just deleted the script locally which made the associated test fail with a file not found message.

What do you mean by “diff slider example”? If you want the test to run on one single diff in order to compare the git baseline against gix-diff, you would need two .blob files in assets as well as the corresponding code in the script to generate the .baseline files (one for each of the 2 diff algorithms currently being tested). While that, in an of itself, would be easy to add, the resulting test would most likely fail as gix-diff doesn’t yet match the git baseline in most cases as far as I can tell. Which is why I didn’t even try in the first place. 😄

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apart from that, I was also wondering where the best place for the info that currently lives in the script is. Feel free to move it to a more appropriate place, the README.md seems like a good choice to me!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This clearly shows that I don't fully understand the setup yet. What I read made me think the slider file is all that's needed and the test only depends on that, but that's not the case as it doesn't include the actual blobs. It just refers to them by ID.

So I will be back with this tomorrow probably and spend the time that I probably tried to safe today 😅.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That kind of feedback is already very valuable! 😄 I’m happy to provide more context, hoping to make this a bit more self-contained.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually think you already did everything you could and I will use my ignorance and naiveté towards that matter to make it work for other readers with a similar level of ignorance and naiveté :D. So I think I am predestined to do that 😁.

Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env bash
set -eu -o pipefail

# This script is a placeholder for a script generated by the
# `create-diff-cases` subcommand of `internal-tools`. The placeholder does
# nothing except making the associated test trivially pass.
#
# ## Usage
#
# Follow these instructions to generate a file containing sliders:
# https://github.com/mhagger/diff-slider-tools/blob/b59ed13d7a2a6cfe14a8f79d434b6221cc8b04dd/README.md?plain=1#L122-L146
#
# Run `create-diff-cases` to create the script in `gix-diff/tests/fixtures/`.
# The script will be called `make_diff_for_sliders_repo.sh` and it will
# overwrite the placeholder.
#
# ```
# # run inside `gitoxide`
# cargo run --package internal-tools -- create-diff-cases --sliders-file $PATH_TO_DIFF_SLIDER_TOOLS/corpus/git.sliders --worktree-dir $PATH_TO_DIFF_SLIDER_TOOLS/corpus/git.git/ --destination-dir gix-diff/tests/fixtures/
# ```
#
# Run `cargo test sliders -- --nocapture` inside `gix-diff/tests` to run the
# actual tests.
28 changes: 28 additions & 0 deletions tests/it/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,34 @@ pub enum Subcommands {
#[clap(value_parser = AsPathSpec)]
patterns: Vec<gix::pathspec::Pattern>,
},
/// Take a slider file generated with the help of [diff-slider-tools] and turn it into a series
/// of baseline diffs to be used in [slider-rs].
///
/// See [make-diff-for-sliders-repo] for details.
///
/// [diff-slider-tools]: https://github.com/mhagger/diff-slider-tools
/// [slider-rs]: gix-diff/tests/diff/blob/slider.rs
/// [make-diff-for-sliders-repo]: gix-diff/tests/fixtures/make_diff_for_sliders_repo.sh
CreateDiffCases {
/// Don't really copy anything.
#[clap(long, short = 'n')]
dry_run: bool,
/// The `.sliders` file that contains a list of sliders.
#[clap(long)]
sliders_file: PathBuf,
/// The git root to extract the diff-related parts from.
#[clap(long)]
worktree_dir: PathBuf,
/// The directory into which to copy the files.
#[clap(long)]
destination_dir: PathBuf,
/// The number of sliders to generate test cases for.
#[clap(long, default_value_t = 10)]
count: usize,
/// The directory to place assets in.
#[clap(long)]
asset_dir: Option<BString>,
},
/// Check for executable bits that disagree with shebangs.
///
/// This checks committed and staged files, but not anything unstaged, to find shell scripts
Expand Down
Loading
Loading