Skip to content

Commit ce4a45d

Browse files
authored
Rollup merge of rust-lang#63525 - matklad:centraliza-file-loading, r=petrochenkov
Make sure that all file loading happens via SourceMap That way, callers don't need to repeat "let's add this to sm manually for tracking dependencies" trick. It should make it easier to switch to using `FileLoader` for binary files in the future as well cc rust-lang#62948 r? @petrochenkov
2 parents 7ff5b38 + 14bc998 commit ce4a45d

File tree

6 files changed

+51
-34
lines changed

6 files changed

+51
-34
lines changed

src/libsyntax/ext/expand.rs

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ use syntax_pos::{Span, DUMMY_SP, FileName};
2525

2626
use rustc_data_structures::fx::FxHashMap;
2727
use rustc_data_structures::sync::Lrc;
28-
use std::fs;
2928
use std::io::ErrorKind;
3029
use std::{iter, mem};
3130
use std::ops::DerefMut;
@@ -1239,13 +1238,11 @@ impl<'a, 'b> MutVisitor for InvocationCollector<'a, 'b> {
12391238
}
12401239

12411240
let filename = self.cx.resolve_path(&*file.as_str(), it.span());
1242-
match fs::read_to_string(&filename) {
1243-
Ok(src) => {
1244-
let src_interned = Symbol::intern(&src);
1245-
1246-
// Add this input file to the code map to make it available as
1247-
// dependency information
1248-
self.cx.source_map().new_source_file(filename.into(), src);
1241+
match self.cx.source_map().load_file(&filename) {
1242+
Ok(source_file) => {
1243+
let src = source_file.src.as_ref()
1244+
.expect("freshly loaded file should have a source");
1245+
let src_interned = Symbol::intern(src.as_str());
12491246

12501247
let include_info = vec![
12511248
ast::NestedMetaItem::MetaItem(

src/libsyntax/source_map.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,26 @@ impl SourceMap {
170170
Ok(self.new_source_file(filename, src))
171171
}
172172

173+
/// Loads source file as a binary blob.
174+
///
175+
/// Unlike `load_file`, guarantees that no normalization like BOM-removal
176+
/// takes place.
177+
pub fn load_binary_file(&self, path: &Path) -> io::Result<Vec<u8>> {
178+
// Ideally, this should use `self.file_loader`, but it can't
179+
// deal with binary files yet.
180+
let bytes = fs::read(path)?;
181+
182+
// We need to add file to the `SourceMap`, so that it is present
183+
// in dep-info. There's also an edge case that file might be both
184+
// loaded as a binary via `include_bytes!` and as proper `SourceFile`
185+
// via `mod`, so we try to use real file contents and not just an
186+
// empty string.
187+
let text = std::str::from_utf8(&bytes).unwrap_or("")
188+
.to_string();
189+
self.new_source_file(path.to_owned().into(), text);
190+
Ok(bytes)
191+
}
192+
173193
pub fn files(&self) -> MappedLockGuard<'_, Vec<Lrc<SourceFile>>> {
174194
LockGuard::map(self.files.borrow(), |files| &mut files.source_files)
175195
}

src/libsyntax_ext/source_util.rs

Lines changed: 11 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ use syntax::tokenstream;
99
use smallvec::SmallVec;
1010
use syntax_pos::{self, Pos, Span};
1111

12-
use std::fs;
13-
use std::io::ErrorKind;
1412
use rustc_data_structures::sync::Lrc;
1513

1614
// These macros all relate to the file system; they either return
@@ -114,20 +112,17 @@ pub fn expand_include_str(cx: &mut ExtCtxt<'_>, sp: Span, tts: &[tokenstream::To
114112
None => return DummyResult::any(sp)
115113
};
116114
let file = cx.resolve_path(file, sp);
117-
match fs::read_to_string(&file) {
118-
Ok(src) => {
119-
let interned_src = Symbol::intern(&src);
120-
121-
// Add this input file to the code map to make it available as
122-
// dependency information
123-
cx.source_map().new_source_file(file.into(), src);
124-
125-
base::MacEager::expr(cx.expr_str(sp, interned_src))
115+
match cx.source_map().load_binary_file(&file) {
116+
Ok(bytes) => match std::str::from_utf8(&bytes) {
117+
Ok(src) => {
118+
let interned_src = Symbol::intern(&src);
119+
base::MacEager::expr(cx.expr_str(sp, interned_src))
120+
}
121+
Err(_) => {
122+
cx.span_err(sp, &format!("{} wasn't a utf-8 file", file.display()));
123+
DummyResult::any(sp)
124+
}
126125
},
127-
Err(ref e) if e.kind() == ErrorKind::InvalidData => {
128-
cx.span_err(sp, &format!("{} wasn't a utf-8 file", file.display()));
129-
DummyResult::any(sp)
130-
}
131126
Err(e) => {
132127
cx.span_err(sp, &format!("couldn't read {}: {}", file.display(), e));
133128
DummyResult::any(sp)
@@ -142,18 +137,8 @@ pub fn expand_include_bytes(cx: &mut ExtCtxt<'_>, sp: Span, tts: &[tokenstream::
142137
None => return DummyResult::any(sp)
143138
};
144139
let file = cx.resolve_path(file, sp);
145-
match fs::read(&file) {
140+
match cx.source_map().load_binary_file(&file) {
146141
Ok(bytes) => {
147-
// Add the contents to the source map if it contains UTF-8.
148-
let (contents, bytes) = match String::from_utf8(bytes) {
149-
Ok(s) => {
150-
let bytes = s.as_bytes().to_owned();
151-
(s, bytes)
152-
},
153-
Err(e) => (String::new(), e.into_bytes()),
154-
};
155-
cx.source_map().new_source_file(file.into(), contents);
156-
157142
base::MacEager::expr(cx.expr_lit(sp, ast::LitKind::ByteStr(Lrc::new(bytes))))
158143
},
159144
Err(e) => {

src/test/ui/.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
lexer-crlf-line-endings-string-literal-doc-comment.rs -text
22
trailing-carriage-return-in-string.rs -text
3+
*.bin -text

src/test/ui/include-macros/data.bin

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
This file starts with BOM.
2+
Lines are separated by \r\n.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// run-pass
2+
3+
fn main() {
4+
assert_eq!(
5+
&include_bytes!("data.bin")[..],
6+
&b"\xEF\xBB\xBFThis file starts with BOM.\r\nLines are separated by \\r\\n.\r\n"[..],
7+
);
8+
assert_eq!(
9+
include_str!("data.bin"),
10+
"\u{FEFF}This file starts with BOM.\r\nLines are separated by \\r\\n.\r\n",
11+
);
12+
}

0 commit comments

Comments
 (0)