Skip to content

Commit 95ce5af

Browse files
authored
Rollup merge of #143883 - pietroalbini:pa-linkchecker-extra-target, r=ehuss
Add `--link-targets-dir` argument to linkchecker In my release notes API list tool (#143053) I want to check whether all links generated by the tool are actually valid, and using linkchecker seems to be the most sensible choice. Linkchecker currently has a fairly big limitation though: it can only check a single directory, it checks *all* of the files within it, and link targets must point inside that same directory. This works great when checking the whole documentation package, but in my case I only need to check that one file contains valid links to the standard library docs. To solve that, this PR adds a new `--link-targets-dir` flag to linkchecker. Directories passed to it will be valid link targets (with lower priority than the root being checked), but links within them will not be checked. I'm not that happy with the name of the flag, happy for it to be bikeshedded.
2 parents cb6785f + 6693b39 commit 95ce5af

File tree

4 files changed

+101
-26
lines changed

4 files changed

+101
-26
lines changed

src/bootstrap/src/core/build_steps/check.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,3 +556,9 @@ tool_check_step!(Compiletest {
556556
allow_features: COMPILETEST_ALLOW_FEATURES,
557557
default: false,
558558
});
559+
560+
tool_check_step!(Linkchecker {
561+
path: "src/tools/linkchecker",
562+
mode: |_builder| Mode::ToolBootstrap,
563+
default: false
564+
});

src/bootstrap/src/core/builder/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,6 +1033,7 @@ impl<'a> Builder<'a> {
10331033
check::Compiletest,
10341034
check::FeaturesStatusDump,
10351035
check::CoverageDump,
1036+
check::Linkchecker,
10361037
// This has special staging logic, it may run on stage 1 while others run on stage 0.
10371038
// It takes quite some time to build stage 1, so put this at the end.
10381039
//

src/tools/linkchecker/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22
name = "linkchecker"
33
version = "0.1.0"
4-
edition = "2021"
4+
edition = "2024"
55

66
[[bin]]
77
name = "linkchecker"

src/tools/linkchecker/main.rs

Lines changed: 93 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,13 @@
1717
//! should catch the majority of "broken link" cases.
1818
1919
use std::cell::{Cell, RefCell};
20+
use std::collections::hash_map::Entry;
2021
use std::collections::{HashMap, HashSet};
21-
use std::io::ErrorKind;
22+
use std::fs;
23+
use std::iter::once;
2224
use std::path::{Component, Path, PathBuf};
2325
use std::rc::Rc;
2426
use std::time::Instant;
25-
use std::{env, fs};
2627

2728
use html5ever::tendril::ByteTendril;
2829
use html5ever::tokenizer::{
@@ -110,10 +111,25 @@ macro_rules! t {
110111
};
111112
}
112113

114+
struct Cli {
115+
docs: PathBuf,
116+
link_targets_dirs: Vec<PathBuf>,
117+
}
118+
113119
fn main() {
114-
let docs = env::args_os().nth(1).expect("doc path should be first argument");
115-
let docs = env::current_dir().unwrap().join(docs);
116-
let mut checker = Checker { root: docs.clone(), cache: HashMap::new() };
120+
let cli = match parse_cli() {
121+
Ok(cli) => cli,
122+
Err(err) => {
123+
eprintln!("error: {err}");
124+
usage_and_exit(1);
125+
}
126+
};
127+
128+
let mut checker = Checker {
129+
root: cli.docs.clone(),
130+
link_targets_dirs: cli.link_targets_dirs,
131+
cache: HashMap::new(),
132+
};
117133
let mut report = Report {
118134
errors: 0,
119135
start: Instant::now(),
@@ -125,16 +141,58 @@ fn main() {
125141
intra_doc_exceptions: 0,
126142
has_broken_urls: false,
127143
};
128-
checker.walk(&docs, &mut report);
144+
checker.walk(&cli.docs, &mut report);
129145
report.report();
130146
if report.errors != 0 {
131147
println!("found some broken links");
132148
std::process::exit(1);
133149
}
134150
}
135151

152+
fn parse_cli() -> Result<Cli, String> {
153+
fn to_absolute_path(arg: &str) -> Result<PathBuf, String> {
154+
std::path::absolute(arg).map_err(|e| format!("could not convert to absolute {arg}: {e}"))
155+
}
156+
157+
let mut verbatim = false;
158+
let mut docs = None;
159+
let mut link_targets_dirs = Vec::new();
160+
161+
let mut args = std::env::args().skip(1);
162+
while let Some(arg) = args.next() {
163+
if !verbatim && arg == "--" {
164+
verbatim = true;
165+
} else if !verbatim && (arg == "-h" || arg == "--help") {
166+
usage_and_exit(0)
167+
} else if !verbatim && arg == "--link-targets-dir" {
168+
link_targets_dirs.push(to_absolute_path(
169+
&args.next().ok_or("missing value for --link-targets-dir")?,
170+
)?);
171+
} else if !verbatim && let Some(value) = arg.strip_prefix("--link-targets-dir=") {
172+
link_targets_dirs.push(to_absolute_path(value)?);
173+
} else if !verbatim && arg.starts_with('-') {
174+
return Err(format!("unknown flag: {arg}"));
175+
} else if docs.is_none() {
176+
docs = Some(arg);
177+
} else {
178+
return Err("too many positional arguments".into());
179+
}
180+
}
181+
182+
Ok(Cli {
183+
docs: to_absolute_path(&docs.ok_or("missing first positional argument")?)?,
184+
link_targets_dirs,
185+
})
186+
}
187+
188+
fn usage_and_exit(code: i32) -> ! {
189+
eprintln!("usage: linkchecker PATH [--link-targets-dir=PATH ...]");
190+
std::process::exit(code)
191+
}
192+
136193
struct Checker {
137194
root: PathBuf,
195+
link_targets_dirs: Vec<PathBuf>,
138196
cache: Cache,
139197
}
140198

@@ -420,37 +478,34 @@ impl Checker {
420478

421479
/// Load a file from disk, or from the cache if available.
422480
fn load_file(&mut self, file: &Path, report: &mut Report) -> (String, &FileEntry) {
423-
// https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
424-
#[cfg(windows)]
425-
const ERROR_INVALID_NAME: i32 = 123;
426-
427481
let pretty_path =
428482
file.strip_prefix(&self.root).unwrap_or(file).to_str().unwrap().to_string();
429483

430-
let entry =
431-
self.cache.entry(pretty_path.clone()).or_insert_with(|| match fs::metadata(file) {
484+
for base in once(&self.root).chain(self.link_targets_dirs.iter()) {
485+
let entry = self.cache.entry(pretty_path.clone());
486+
if let Entry::Occupied(e) = &entry
487+
&& !matches!(e.get(), FileEntry::Missing)
488+
{
489+
break;
490+
}
491+
492+
let file = base.join(&pretty_path);
493+
entry.insert_entry(match fs::metadata(&file) {
432494
Ok(metadata) if metadata.is_dir() => FileEntry::Dir,
433495
Ok(_) => {
434496
if file.extension().and_then(|s| s.to_str()) != Some("html") {
435497
FileEntry::OtherFile
436498
} else {
437499
report.html_files += 1;
438-
load_html_file(file, report)
500+
load_html_file(&file, report)
439501
}
440502
}
441-
Err(e) if e.kind() == ErrorKind::NotFound => FileEntry::Missing,
442-
Err(e) => {
443-
// If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME` rather than `NotFound`.
444-
// Explicitly check for that so that the broken link can be allowed in `LINKCHECK_EXCEPTIONS`.
445-
#[cfg(windows)]
446-
if e.raw_os_error() == Some(ERROR_INVALID_NAME)
447-
&& file.as_os_str().to_str().map_or(false, |s| s.contains("::"))
448-
{
449-
return FileEntry::Missing;
450-
}
451-
panic!("unexpected read error for {}: {}", file.display(), e);
452-
}
503+
Err(e) if is_not_found_error(&file, &e) => FileEntry::Missing,
504+
Err(e) => panic!("unexpected read error for {}: {}", file.display(), e),
453505
});
506+
}
507+
508+
let entry = self.cache.get(&pretty_path).unwrap();
454509
(pretty_path, entry)
455510
}
456511
}
@@ -629,3 +684,16 @@ fn parse_ids(ids: &mut HashSet<String>, file: &str, source: &str, report: &mut R
629684
ids.insert(encoded);
630685
}
631686
}
687+
688+
fn is_not_found_error(path: &Path, error: &std::io::Error) -> bool {
689+
// https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
690+
const WINDOWS_ERROR_INVALID_NAME: i32 = 123;
691+
692+
error.kind() == std::io::ErrorKind::NotFound
693+
// If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME`
694+
// rather than `NotFound`. Explicitly check for that so that the broken link can be allowed
695+
// in `LINKCHECK_EXCEPTIONS`.
696+
|| (cfg!(windows)
697+
&& error.raw_os_error() == Some(WINDOWS_ERROR_INVALID_NAME)
698+
&& path.as_os_str().to_str().map_or(false, |s| s.contains("::")))
699+
}

0 commit comments

Comments
 (0)