Skip to content

Commit dc47f91

Browse files
committed
Implement a new unified function for figuring out how if a set of paths have been modified locally
Also adds several git tests to make sure that the behavior works in common cases (PR CI, auto CI, local usage).
1 parent 20f0e27 commit dc47f91

File tree

5 files changed

+463
-17
lines changed

5 files changed

+463
-17
lines changed

Cargo.lock

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ version = "0.1.0"
317317
dependencies = [
318318
"serde",
319319
"serde_derive",
320+
"tempfile",
320321
]
321322

322323
[[package]]
@@ -2135,6 +2136,12 @@ version = "0.4.15"
21352136
source = "registry+https://github.com/rust-lang/crates.io-index"
21362137
checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
21372138

2139+
[[package]]
2140+
name = "linux-raw-sys"
2141+
version = "0.9.3"
2142+
source = "registry+https://github.com/rust-lang/crates.io-index"
2143+
checksum = "fe7db12097d22ec582439daf8618b8fdd1a7bef6270e9af3b1ebcd30893cf413"
2144+
21382145
[[package]]
21392146
name = "litemap"
21402147
version = "0.7.4"
@@ -4832,7 +4839,20 @@ dependencies = [
48324839
"bitflags",
48334840
"errno",
48344841
"libc",
4835-
"linux-raw-sys",
4842+
"linux-raw-sys 0.4.15",
4843+
"windows-sys 0.59.0",
4844+
]
4845+
4846+
[[package]]
4847+
name = "rustix"
4848+
version = "1.0.2"
4849+
source = "registry+https://github.com/rust-lang/crates.io-index"
4850+
checksum = "f7178faa4b75a30e269c71e61c353ce2748cf3d76f0c44c393f4e60abf49b825"
4851+
dependencies = [
4852+
"bitflags",
4853+
"errno",
4854+
"libc",
4855+
"linux-raw-sys 0.9.3",
48364856
"windows-sys 0.59.0",
48374857
]
48384858

@@ -5246,15 +5266,14 @@ dependencies = [
52465266

52475267
[[package]]
52485268
name = "tempfile"
5249-
version = "3.15.0"
5269+
version = "3.19.0"
52505270
source = "registry+https://github.com/rust-lang/crates.io-index"
5251-
checksum = "9a8a559c81686f576e8cd0290cd2a24a2a9ad80c98b3478856500fcbd7acd704"
5271+
checksum = "488960f40a3fd53d72c2a29a58722561dee8afdd175bd88e3db4677d7b2ba600"
52525272
dependencies = [
5253-
"cfg-if",
52545273
"fastrand",
5255-
"getrandom 0.2.15",
5274+
"getrandom 0.3.1",
52565275
"once_cell",
5257-
"rustix",
5276+
"rustix 1.0.2",
52585277
"windows-sys 0.59.0",
52595278
]
52605279

@@ -6599,8 +6618,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
65996618
checksum = "e105d177a3871454f754b33bb0ee637ecaaac997446375fd3e5d43a2ed00c909"
66006619
dependencies = [
66016620
"libc",
6602-
"linux-raw-sys",
6603-
"rustix",
6621+
"linux-raw-sys 0.4.15",
6622+
"rustix 0.38.43",
66046623
]
66056624

66066625
[[package]]

src/bootstrap/Cargo.lock

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -225,12 +225,12 @@ dependencies = [
225225

226226
[[package]]
227227
name = "errno"
228-
version = "0.3.9"
228+
version = "0.3.10"
229229
source = "registry+https://github.com/rust-lang/crates.io-index"
230-
checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba"
230+
checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d"
231231
dependencies = [
232232
"libc",
233-
"windows-sys 0.52.0",
233+
"windows-sys 0.59.0",
234234
]
235235

236236
[[package]]
@@ -334,9 +334,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
334334

335335
[[package]]
336336
name = "libc"
337-
version = "0.2.167"
337+
version = "0.2.171"
338338
source = "registry+https://github.com/rust-lang/crates.io-index"
339-
checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc"
339+
checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
340340

341341
[[package]]
342342
name = "libredox"

src/build_helper/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,6 @@ edition = "2021"
88
[dependencies]
99
serde = "1"
1010
serde_derive = "1"
11+
12+
[dev-dependencies]
13+
tempfile = "3.19"

src/build_helper/src/git.rs

Lines changed: 175 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
#[cfg(test)]
2+
mod tests;
3+
14
use std::path::{Path, PathBuf};
25
use std::process::{Command, Stdio};
36

@@ -129,7 +132,7 @@ pub fn get_closest_merge_commit(
129132
git.current_dir(git_dir);
130133
}
131134

132-
let channel = include_str!("../../ci/channel");
135+
let channel = include_str!("../../ci/channel").trim();
133136

134137
let merge_base = {
135138
if CiEnv::is_ci() &&
@@ -165,16 +168,184 @@ pub fn get_closest_merge_commit(
165168
Ok(output_result(&mut git)?.trim().to_owned())
166169
}
167170

171+
/// Represents the result of checking whether a set of paths
172+
/// have been modified locally or not.
173+
#[derive(PartialEq, Debug)]
174+
pub enum PathFreshness {
175+
/// Artifacts should be downloaded from this upstream commit,
176+
/// there are no local modifications.
177+
LastModifiedUpstream { upstream: String },
178+
/// There are local modifications to a certain set of paths.
179+
/// "Local" essentially means "not-upstream" here.
180+
/// `upstream` is the latest upstream merge commit that made modifications to the
181+
/// set of paths.
182+
HasLocalModifications { upstream: String },
183+
}
184+
185+
/// This function figures out if a set of paths was last modified upstream or
186+
/// if there are some local modifications made to them.
187+
///
188+
/// It can be used to figure out if we should download artifacts from CI or rather
189+
/// build them locally.
190+
///
191+
/// `target_paths` should be a non-empty slice of paths (relative to `git_dir` or the
192+
/// current working directory) whose modifications would invalidate the artifact.
193+
///
194+
/// The function behaves differently in CI and outside CI.
195+
///
196+
/// - Outside CI, we want to find out if `target_paths` were modified in some local commit on
197+
/// top of the local master branch.
198+
/// If not, we try to find the most recent upstream commit (which we assume are commits
199+
/// made by bors) that modified `target_paths`.
200+
/// We don't want to simply take the latest master commit to avoid changing the output of
201+
/// this function frequently after rebasing on the latest master branch even if `target_paths`
202+
/// were not modified upstream in the meantime. In that case we would be redownloading CI
203+
/// artifacts unnecessarily.
204+
///
205+
/// - In CI, we always fetch only a single parent merge commit, so we do not have access
206+
/// to the full git history.
207+
/// Luckily, we only need to distinguish between two situations. The first is that the current
208+
/// PR made modifications to `target_paths`. If not, then we simply take the latest upstream
209+
/// commit, because on CI there is no need to avoid redownloading.
210+
pub fn check_path_modifications(
211+
git_dir: Option<&Path>,
212+
config: &GitConfig<'_>,
213+
target_paths: &[&str],
214+
ci_env: CiEnv,
215+
) -> Result<PathFreshness, String> {
216+
assert!(!target_paths.is_empty());
217+
for path in target_paths {
218+
assert!(Path::new(path.trim_start_matches(":!")).is_relative());
219+
}
220+
221+
let upstream_sha = if matches!(ci_env, CiEnv::GitHubActions) {
222+
// Here the situation is different for PR CI and try/auto CI.
223+
// For PR CI, we have the following history:
224+
// <merge commit made by GitHub>
225+
// 1-N PR commits
226+
// upstream merge commit made by bors
227+
//
228+
// For try/auto CI, we have the following history:
229+
// <**non-upstream** merge commit made by bors>
230+
// 1-N PR commits
231+
// upstream merge commit made by bors
232+
//
233+
// But on both cases, HEAD should be a merge commit.
234+
// So if HEAD contains modifications of `target_paths`, our PR has modified
235+
// them. If not, we can use the only available upstream commit for downloading
236+
// artifacts.
237+
238+
// Do not include HEAD, as it is never an upstream commit
239+
get_closest_upstream_commit(git_dir, config, ci_env)?
240+
} else {
241+
// Outside CI, we have to find the most recent upstream commit that
242+
// modified the set of paths, to have an upstream reference.
243+
let upstream_sha = get_latest_commit_that_modified_files(
244+
git_dir,
245+
target_paths,
246+
config.git_merge_commit_email,
247+
)?;
248+
let Some(upstream_sha) = upstream_sha else {
249+
eprintln!("No upstream commit that modified paths {target_paths:?} found.");
250+
eprintln!("Try to fetch more upstream history.");
251+
return Err("No upstream commit with modifications found".to_string());
252+
};
253+
upstream_sha
254+
};
255+
256+
if has_changed_since(git_dir, &upstream_sha, target_paths) {
257+
Ok(PathFreshness::HasLocalModifications { upstream: upstream_sha })
258+
} else {
259+
Ok(PathFreshness::LastModifiedUpstream { upstream: upstream_sha })
260+
}
261+
}
262+
263+
/// Returns true if any of the passed `paths` have changed since the `base` commit.
264+
pub fn has_changed_since(git_dir: Option<&Path>, base: &str, paths: &[&Path]) -> bool {
265+
let mut git = Command::new("git");
266+
267+
if let Some(git_dir) = git_dir {
268+
git.current_dir(git_dir);
269+
}
270+
271+
git.args(["diff-index", "--quiet", base, "--"]).args(paths);
272+
273+
// Exit code 0 => no changes
274+
// Exit code 1 => some changes were detected
275+
!git.status().expect("cannot run git diff-index").success()
276+
}
277+
278+
/// Returns the latest commit that modified `target_paths`, or `None` if no such commit was found.
279+
/// If `author` is `Some`, only considers commits made by that author.
280+
fn get_latest_commit_that_modified_files(
281+
git_dir: Option<&Path>,
282+
target_paths: &[&str],
283+
author: &str,
284+
) -> Result<Option<String>, String> {
285+
let mut git = Command::new("git");
286+
287+
if let Some(git_dir) = git_dir {
288+
git.current_dir(git_dir);
289+
}
290+
291+
git.args(["rev-list", "-n1", "--first-parent", "HEAD", "--author", author]);
292+
293+
if !target_paths.is_empty() {
294+
git.arg("--").args(target_paths);
295+
}
296+
let output = output_result(&mut git)?.trim().to_owned();
297+
if output.is_empty() { Ok(None) } else { Ok(Some(output)) }
298+
}
299+
300+
/// Returns the most recent commit found in the local history that should definitely
301+
/// exist upstream. We identify upstream commits by the e-mail of the commit author.
302+
///
303+
/// If `include_head` is false, the HEAD (current) commit will be ignored and only
304+
/// its parents will be searched. This is useful for try/auto CI, where HEAD is
305+
/// actually a commit made by bors, although it is not upstream yet.
306+
fn get_closest_upstream_commit(
307+
git_dir: Option<&Path>,
308+
config: &GitConfig<'_>,
309+
env: CiEnv,
310+
) -> Result<String, String> {
311+
let mut git = Command::new("git");
312+
313+
if let Some(git_dir) = git_dir {
314+
git.current_dir(git_dir);
315+
}
316+
317+
let base = match env {
318+
CiEnv::None => "HEAD",
319+
CiEnv::GitHubActions => {
320+
// On CI, we always have a merge commit at the tip.
321+
// We thus skip it, because although it can be creatd by
322+
// `config.git_merge_commit_email`, it should not be upstream.
323+
"HEAD^1"
324+
}
325+
};
326+
git.args([
327+
"rev-list",
328+
&format!("--author={}", config.git_merge_commit_email),
329+
"-n1",
330+
"--first-parent",
331+
&base,
332+
]);
333+
334+
Ok(output_result(&mut git)?.trim().to_owned())
335+
}
336+
168337
/// Returns the files that have been modified in the current branch compared to the master branch.
338+
/// This includes committed changes, uncommitted changes, and changes that are not even staged.
339+
///
169340
/// The `extensions` parameter can be used to filter the files by their extension.
170341
/// Does not include removed files.
171342
/// If `extensions` is empty, all files will be returned.
172343
pub fn get_git_modified_files(
173344
config: &GitConfig<'_>,
174345
git_dir: Option<&Path>,
175346
extensions: &[&str],
176-
) -> Result<Option<Vec<String>>, String> {
177-
let merge_base = get_closest_merge_commit(git_dir, config, &[])?;
347+
) -> Result<Vec<String>, String> {
348+
let merge_base = get_closest_upstream_commit(git_dir, config, CiEnv::None)?;
178349

179350
let mut git = Command::new("git");
180351
if let Some(git_dir) = git_dir {
@@ -195,7 +366,7 @@ pub fn get_git_modified_files(
195366
}
196367
})
197368
.collect();
198-
Ok(Some(files))
369+
Ok(files)
199370
}
200371

201372
/// Returns the files that haven't been added to git yet.

0 commit comments

Comments
 (0)