17
17
//! should catch the majority of "broken link" cases.
18
18
19
19
use std:: cell:: { Cell , RefCell } ;
20
+ use std:: collections:: hash_map:: Entry ;
20
21
use std:: collections:: { HashMap , HashSet } ;
21
- use std:: io:: ErrorKind ;
22
+ use std:: fs;
23
+ use std:: iter:: once;
22
24
use std:: path:: { Component , Path , PathBuf } ;
23
25
use std:: rc:: Rc ;
24
26
use std:: time:: Instant ;
25
- use std:: { env, fs} ;
26
27
27
28
use html5ever:: tendril:: ByteTendril ;
28
29
use html5ever:: tokenizer:: {
@@ -110,10 +111,25 @@ macro_rules! t {
110
111
} ;
111
112
}
112
113
114
+ struct Cli {
115
+ docs : PathBuf ,
116
+ link_targets_dirs : Vec < PathBuf > ,
117
+ }
118
+
113
119
fn main ( ) {
114
- let docs = env:: args_os ( ) . nth ( 1 ) . expect ( "doc path should be first argument" ) ;
115
- let docs = env:: current_dir ( ) . unwrap ( ) . join ( docs) ;
116
- let mut checker = Checker { root : docs. clone ( ) , cache : HashMap :: new ( ) } ;
120
+ let cli = match parse_cli ( ) {
121
+ Ok ( cli) => cli,
122
+ Err ( err) => {
123
+ eprintln ! ( "error: {err}" ) ;
124
+ usage_and_exit ( 1 ) ;
125
+ }
126
+ } ;
127
+
128
+ let mut checker = Checker {
129
+ root : cli. docs . clone ( ) ,
130
+ link_targets_dirs : cli. link_targets_dirs ,
131
+ cache : HashMap :: new ( ) ,
132
+ } ;
117
133
let mut report = Report {
118
134
errors : 0 ,
119
135
start : Instant :: now ( ) ,
@@ -125,16 +141,58 @@ fn main() {
125
141
intra_doc_exceptions : 0 ,
126
142
has_broken_urls : false ,
127
143
} ;
128
- checker. walk ( & docs, & mut report) ;
144
+ checker. walk ( & cli . docs , & mut report) ;
129
145
report. report ( ) ;
130
146
if report. errors != 0 {
131
147
println ! ( "found some broken links" ) ;
132
148
std:: process:: exit ( 1 ) ;
133
149
}
134
150
}
135
151
152
+ fn parse_cli ( ) -> Result < Cli , String > {
153
+ fn to_absolute_path ( arg : & str ) -> Result < PathBuf , String > {
154
+ std:: path:: absolute ( arg) . map_err ( |e| format ! ( "could not convert to absolute {arg}: {e}" ) )
155
+ }
156
+
157
+ let mut verbatim = false ;
158
+ let mut docs = None ;
159
+ let mut link_targets_dirs = Vec :: new ( ) ;
160
+
161
+ let mut args = std:: env:: args ( ) . skip ( 1 ) ;
162
+ while let Some ( arg) = args. next ( ) {
163
+ if !verbatim && arg == "--" {
164
+ verbatim = true ;
165
+ } else if !verbatim && ( arg == "-h" || arg == "--help" ) {
166
+ usage_and_exit ( 0 )
167
+ } else if !verbatim && arg == "--link-targets-dir" {
168
+ link_targets_dirs. push ( to_absolute_path (
169
+ & args. next ( ) . ok_or ( "missing value for --link-targets-dir" ) ?,
170
+ ) ?) ;
171
+ } else if !verbatim && let Some ( value) = arg. strip_prefix ( "--link-targets-dir=" ) {
172
+ link_targets_dirs. push ( to_absolute_path ( value) ?) ;
173
+ } else if !verbatim && arg. starts_with ( '-' ) {
174
+ return Err ( format ! ( "unknown flag: {arg}" ) ) ;
175
+ } else if docs. is_none ( ) {
176
+ docs = Some ( arg) ;
177
+ } else {
178
+ return Err ( "too many positional arguments" . into ( ) ) ;
179
+ }
180
+ }
181
+
182
+ Ok ( Cli {
183
+ docs : to_absolute_path ( & docs. ok_or ( "missing first positional argument" ) ?) ?,
184
+ link_targets_dirs,
185
+ } )
186
+ }
187
+
188
+ fn usage_and_exit ( code : i32 ) -> ! {
189
+ eprintln ! ( "usage: linkchecker PATH [--link-targets-dir=PATH ...]" ) ;
190
+ std:: process:: exit ( code)
191
+ }
192
+
136
193
struct Checker {
137
194
root : PathBuf ,
195
+ link_targets_dirs : Vec < PathBuf > ,
138
196
cache : Cache ,
139
197
}
140
198
@@ -420,37 +478,34 @@ impl Checker {
420
478
421
479
/// Load a file from disk, or from the cache if available.
422
480
fn load_file ( & mut self , file : & Path , report : & mut Report ) -> ( String , & FileEntry ) {
423
- // https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
424
- #[ cfg( windows) ]
425
- const ERROR_INVALID_NAME : i32 = 123 ;
426
-
427
481
let pretty_path =
428
482
file. strip_prefix ( & self . root ) . unwrap_or ( file) . to_str ( ) . unwrap ( ) . to_string ( ) ;
429
483
430
- let entry =
431
- self . cache . entry ( pretty_path. clone ( ) ) . or_insert_with ( || match fs:: metadata ( file) {
484
+ for base in once ( & self . root ) . chain ( self . link_targets_dirs . iter ( ) ) {
485
+ let entry = self . cache . entry ( pretty_path. clone ( ) ) ;
486
+ if let Entry :: Occupied ( e) = & entry
487
+ && !matches ! ( e. get( ) , FileEntry :: Missing )
488
+ {
489
+ break ;
490
+ }
491
+
492
+ let file = base. join ( & pretty_path) ;
493
+ entry. insert_entry ( match fs:: metadata ( & file) {
432
494
Ok ( metadata) if metadata. is_dir ( ) => FileEntry :: Dir ,
433
495
Ok ( _) => {
434
496
if file. extension ( ) . and_then ( |s| s. to_str ( ) ) != Some ( "html" ) {
435
497
FileEntry :: OtherFile
436
498
} else {
437
499
report. html_files += 1 ;
438
- load_html_file ( file, report)
500
+ load_html_file ( & file, report)
439
501
}
440
502
}
441
- Err ( e) if e. kind ( ) == ErrorKind :: NotFound => FileEntry :: Missing ,
442
- Err ( e) => {
443
- // If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME` rather than `NotFound`.
444
- // Explicitly check for that so that the broken link can be allowed in `LINKCHECK_EXCEPTIONS`.
445
- #[ cfg( windows) ]
446
- if e. raw_os_error ( ) == Some ( ERROR_INVALID_NAME )
447
- && file. as_os_str ( ) . to_str ( ) . map_or ( false , |s| s. contains ( "::" ) )
448
- {
449
- return FileEntry :: Missing ;
450
- }
451
- panic ! ( "unexpected read error for {}: {}" , file. display( ) , e) ;
452
- }
503
+ Err ( e) if is_not_found_error ( & file, & e) => FileEntry :: Missing ,
504
+ Err ( e) => panic ! ( "unexpected read error for {}: {}" , file. display( ) , e) ,
453
505
} ) ;
506
+ }
507
+
508
+ let entry = self . cache . get ( & pretty_path) . unwrap ( ) ;
454
509
( pretty_path, entry)
455
510
}
456
511
}
@@ -629,3 +684,16 @@ fn parse_ids(ids: &mut HashSet<String>, file: &str, source: &str, report: &mut R
629
684
ids. insert ( encoded) ;
630
685
}
631
686
}
687
+
688
+ fn is_not_found_error ( path : & Path , error : & std:: io:: Error ) -> bool {
689
+ // https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
690
+ const WINDOWS_ERROR_INVALID_NAME : i32 = 123 ;
691
+
692
+ error. kind ( ) == std:: io:: ErrorKind :: NotFound
693
+ // If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME`
694
+ // rather than `NotFound`. Explicitly check for that so that the broken link can be allowed
695
+ // in `LINKCHECK_EXCEPTIONS`.
696
+ || ( cfg ! ( windows)
697
+ && error. raw_os_error ( ) == Some ( WINDOWS_ERROR_INVALID_NAME )
698
+ && path. as_os_str ( ) . to_str ( ) . map_or ( false , |s| s. contains ( "::" ) ) )
699
+ }
0 commit comments