From b33ccc309da4d83244495c5a6d71cb2891e0a476 Mon Sep 17 00:00:00 2001 From: Sasha Pourcelot Date: Thu, 26 Aug 2021 12:07:39 +0200 Subject: [PATCH 01/12] Add the -Z html-output option and ui header command Our goal is to generate HTML pages representing the error emitted by the compiler. This commit adds a -Z html-output flag to rustc. When passed, the compiler will emit its errors in HTML format in stderr. This flag conflicts with any value of the error-format argument. As a bonus, the compiletest crate has been updated to handle the // html-output compiler directive. Adding this flag will invoke the compiler with the -Z html-output flag and compare its output with a corresponding .html file, similarly as .stderr files. --- compiler/rustc_errors/src/emitter.rs | 166 ++++++++++++++++++++++++++ compiler/rustc_interface/src/tests.rs | 1 + compiler/rustc_session/src/config.rs | 18 +-- compiler/rustc_session/src/options.rs | 3 + compiler/rustc_session/src/session.rs | 32 ++++- src/librustdoc/core.rs | 2 +- src/librustdoc/doctest.rs | 2 +- src/tools/compiletest/src/common.rs | 2 + src/tools/compiletest/src/header.rs | 11 ++ src/tools/compiletest/src/runtest.rs | 74 ++++++++++-- 10 files changed, 286 insertions(+), 25 deletions(-) diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs index 29f352ae58559..6d51882fc8be3 100644 --- a/compiler/rustc_errors/src/emitter.rs +++ b/compiler/rustc_errors/src/emitter.rs @@ -637,6 +637,39 @@ impl EmitterWriter { } } + pub fn html( + dst: Box, + source_map: Option>, + short_message: bool, + teach: bool, + macro_backtrace: bool, + ) -> EmitterWriter { + let output_file = HtmlFormatter::new(dst); + + let dst = Destination::HtmlFile(output_file); + EmitterWriter { + dst, + sm: source_map, + short_message, + teach, + ui_testing: false, + // FIXME(scrabsha): do we expect a certain terminal width in + // html-rendered files? + terminal_width: None, + macro_backtrace, + } + } + + pub fn html_stderr( + source_map: Option>, + short_message: bool, + teach: bool, + macro_backtrace: bool, + ) -> EmitterWriter { + let dst = Box::new(io::stderr()); + EmitterWriter::html(dst, source_map, short_message, teach, macro_backtrace) + } + pub fn ui_testing(mut self, ui_testing: bool) -> Self { self.ui_testing = ui_testing; self @@ -2126,6 +2159,8 @@ fn emit_to_destination( let mut dst = dst.writable(); + dst.begin()?; + // In order to prevent error message interleaving, where multiple error lines get intermixed // when multiple compiler processes error simultaneously, we emit errors with additional // steps. @@ -2150,6 +2185,7 @@ fn emit_to_destination( } } dst.flush()?; + dst.end()?; Ok(()) } @@ -2158,6 +2194,7 @@ pub enum Destination { Buffered(BufferWriter), // The bool denotes whether we should be emitting ansi color codes or not Raw(Box<(dyn Write + Send)>, bool), + HtmlFile(HtmlFormatter), } pub enum WritableDst<'a> { @@ -2165,6 +2202,7 @@ pub enum WritableDst<'a> { Buffered(&'a mut BufferWriter, Buffer), Raw(&'a mut (dyn Write + Send)), ColoredRaw(Ansi<&'a mut (dyn Write + Send)>), + HtmlFile(&'a mut HtmlFormatter), } impl Destination { @@ -2192,6 +2230,7 @@ impl Destination { } Destination::Raw(ref mut t, false) => WritableDst::Raw(t), Destination::Raw(ref mut t, true) => WritableDst::ColoredRaw(Ansi::new(t)), + Destination::HtmlFile(ref mut file) => WritableDst::HtmlFile(file), } } @@ -2200,6 +2239,7 @@ impl Destination { Self::Terminal(ref stream) => stream.supports_color(), Self::Buffered(ref buffer) => buffer.buffer().supports_color(), Self::Raw(_, supports_color) => supports_color, + Self::HtmlFile(_) => true, } } } @@ -2261,6 +2301,7 @@ impl<'a> WritableDst<'a> { WritableDst::Buffered(_, ref mut t) => t.set_color(color), WritableDst::ColoredRaw(ref mut t) => t.set_color(color), WritableDst::Raw(_) => Ok(()), + WritableDst::HtmlFile(ref mut t) => t.set_color(color), } } @@ -2270,6 +2311,27 @@ impl<'a> WritableDst<'a> { WritableDst::Buffered(_, ref mut t) => t.reset(), WritableDst::ColoredRaw(ref mut t) => t.reset(), WritableDst::Raw(_) => Ok(()), + WritableDst::HtmlFile(ref mut t) => t.reset(), + } + } + + fn begin(&mut self) -> io::Result<()> { + match *self { + WritableDst::Terminal(_) => Ok(()), + WritableDst::Buffered(_, _) => Ok(()), + WritableDst::Raw(_) => Ok(()), + WritableDst::ColoredRaw(_) => Ok(()), + WritableDst::HtmlFile(ref mut t) => t.begin(), + } + } + + fn end(&mut self) -> io::Result<()> { + match *self { + WritableDst::Terminal(_) => Ok(()), + WritableDst::Buffered(_, _) => Ok(()), + WritableDst::Raw(_) => Ok(()), + WritableDst::ColoredRaw(_) => Ok(()), + WritableDst::HtmlFile(ref mut t) => t.end(), } } } @@ -2281,6 +2343,7 @@ impl<'a> Write for WritableDst<'a> { WritableDst::Buffered(_, ref mut buf) => buf.write(bytes), WritableDst::Raw(ref mut w) => w.write(bytes), WritableDst::ColoredRaw(ref mut t) => t.write(bytes), + WritableDst::HtmlFile(ref mut f) => f.write(bytes), } } @@ -2290,6 +2353,7 @@ impl<'a> Write for WritableDst<'a> { WritableDst::Buffered(_, ref mut buf) => buf.flush(), WritableDst::Raw(ref mut w) => w.flush(), WritableDst::ColoredRaw(ref mut w) => w.flush(), + WritableDst::HtmlFile(ref mut f) => f.flush(), } } } @@ -2302,6 +2366,108 @@ impl<'a> Drop for WritableDst<'a> { } } +/// A type that formats everything it receives to HTML. It implements both +/// Write (so that we can write text in it) and WriteColor (so that we can +/// set which color to use). +pub struct HtmlFormatter { + inner: Box, +} + +impl HtmlFormatter { + fn new(inner: Box) -> HtmlFormatter { + HtmlFormatter { inner } + } + + fn begin(&mut self) -> io::Result<()> { + write!(self.inner, "
")
+    }
+
+    fn end(&mut self) -> io::Result<()> {
+        write!(self.inner, "
") + } + + // Creates the string of a `style` attribute in an html element + fn mk_style_string(spec: &ColorSpec) -> String { + let mut buffer = String::new(); + + let colors = [("color", spec.fg()), ("background-color", spec.bg())]; + let colors = colors.iter().flat_map(|(c_name, c_value)| { + c_value.map(|c_value| (c_name, term_color_to_html_color(*c_value))) + }); + + for (key, value) in colors { + buffer.push_str(format!("{}: {};", key, value).as_str()); + } + + let font_modifiers = [ + ("font-weight:bold", spec.bold()), + ("font-style:italic", spec.italic()), + ("text-decoration:underline", spec.underline()), + ]; + let font_modifiers = font_modifiers + .iter() + .flat_map(|(property, applicable)| if *applicable { Some(property) } else { None }) + .copied(); + + for font_modifier in font_modifiers { + buffer.push_str(format!("{};", font_modifier).as_str()); + } + + // The trailing semicolon must removed as defined in the style + // attribute grammar: + // https://drafts.csswg.org/css-style-attr/#syntax + let _ = buffer.pop(); + + buffer + } +} + +impl Write for HtmlFormatter { + fn write(&mut self, buf: &[u8]) -> io::Result { + self.inner.write(buf) + } + + fn flush(&mut self) -> io::Result<()> { + self.inner.flush() + } +} + +impl WriteColor for HtmlFormatter { + fn supports_color(&self) -> bool { + true + } + + fn set_color(&mut self, spec: &ColorSpec) -> io::Result<()> { + write!(self, "", Self::mk_style_string(spec)) + } + + fn reset(&mut self) -> io::Result<()> { + write!(self, "") + } +} + +fn term_color_to_html_color(color: Color) -> Cow<'static, str> { + // All the CSS color keywords are available at: + // https://drafts.csswg.org/css-color/#named-colors + match color { + Color::Black => Cow::Borrowed("black"), + Color::Blue => Cow::Borrowed("blue"), + Color::Green => Cow::Borrowed("green"), + Color::Red => Cow::Borrowed("red"), + Color::Cyan => Cow::Borrowed("cyan"), + Color::Magenta => Cow::Borrowed("magenta"), + Color::Yellow => Cow::Borrowed("yellow"), + Color::White => Cow::Borrowed("white"), + + Color::Rgb(r, g, b) => Cow::Owned(format!("rgb({},{},{})", r, g, b)), + + // FIXME: should we support ANSI colors? + Color::Ansi256(_) => unreachable!(), + + anything => panic!("Unknown color code: {:?}", anything), + } +} + /// Whether the original and suggested code are visually similar enough to warrant extra wording. pub fn is_case_difference(sm: &SourceMap, suggested: &str, sp: Span) -> bool { // FIXME: this should probably be extended to also account for `FO0` → `FOO` and unicode. diff --git a/compiler/rustc_interface/src/tests.rs b/compiler/rustc_interface/src/tests.rs index afab919bc3c2c..ee3efe19032be 100644 --- a/compiler/rustc_interface/src/tests.rs +++ b/compiler/rustc_interface/src/tests.rs @@ -649,6 +649,7 @@ fn test_debugging_options_tracking_hash() { untracked!(emit_stack_sizes, true); untracked!(future_incompat_test, true); untracked!(hir_stats, true); + untracked!(html_output, true); untracked!(identify_regions, true); untracked!(incremental_ignore_spans, true); untracked!(incremental_info, true); diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs index fdedb7e6a4afe..54c73ec3aa476 100644 --- a/compiler/rustc_session/src/config.rs +++ b/compiler/rustc_session/src/config.rs @@ -291,7 +291,7 @@ impl OutputType { #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum ErrorOutputType { /// Output meant for the consumption of humans. - HumanReadable(HumanReadableErrorType), + HumanReadable(HumanReadableErrorType, bool /* Explicitely indicated by cli */), /// Output that's consumed by other tools such as `rustfix` or the `RLS`. Json { /// Render the JSON in a human readable way (with indents and newlines). @@ -304,7 +304,7 @@ pub enum ErrorOutputType { impl Default for ErrorOutputType { fn default() -> Self { - Self::HumanReadable(HumanReadableErrorType::Default(ColorConfig::Auto)) + Self::HumanReadable(HumanReadableErrorType::Default(ColorConfig::Auto), false) } } @@ -1268,17 +1268,19 @@ pub fn parse_error_format( let error_format = if matches.opts_present(&["error-format".to_owned()]) { match matches.opt_str("error-format").as_ref().map(|s| &s[..]) { None | Some("human") => { - ErrorOutputType::HumanReadable(HumanReadableErrorType::Default(color)) + ErrorOutputType::HumanReadable(HumanReadableErrorType::Default(color), true) } Some("human-annotate-rs") => { - ErrorOutputType::HumanReadable(HumanReadableErrorType::AnnotateSnippet(color)) + ErrorOutputType::HumanReadable(HumanReadableErrorType::AnnotateSnippet(color), true) } Some("json") => ErrorOutputType::Json { pretty: false, json_rendered }, Some("pretty-json") => ErrorOutputType::Json { pretty: true, json_rendered }, - Some("short") => ErrorOutputType::HumanReadable(HumanReadableErrorType::Short(color)), + Some("short") => { + ErrorOutputType::HumanReadable(HumanReadableErrorType::Short(color), true) + } Some(arg) => early_error( - ErrorOutputType::HumanReadable(HumanReadableErrorType::Default(color)), + ErrorOutputType::HumanReadable(HumanReadableErrorType::Default(color), false), &format!( "argument for `--error-format` must be `human`, `json` or \ `short` (instead was `{}`)", @@ -1287,7 +1289,7 @@ pub fn parse_error_format( ), } } else { - ErrorOutputType::HumanReadable(HumanReadableErrorType::Default(color)) + ErrorOutputType::HumanReadable(HumanReadableErrorType::Default(color), false) }; match error_format { @@ -1352,7 +1354,7 @@ fn check_debug_option_stability( "`--error-format=pretty-json` is unstable", ); } - if let ErrorOutputType::HumanReadable(HumanReadableErrorType::AnnotateSnippet(_)) = + if let ErrorOutputType::HumanReadable(HumanReadableErrorType::AnnotateSnippet(_), _) = error_format { early_error( diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index 9a1be40558ccb..a7dbdfff3b24c 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -1097,6 +1097,9 @@ options! { environment variable `RUSTC_GRAPHVIZ_FONT` (default: `Courier, monospace`)"), hir_stats: bool = (false, parse_bool, [UNTRACKED], "print some statistics about AST and HIR (default: no)"), + // FIXME(scrabsha): update tests in compiler/rustc_interface/src/test.rs + html_output: bool = (false, parse_bool, [UNTRACKED], + "prints the compiler output as HTML code (default: no)"), human_readable_cgu_names: bool = (false, parse_bool, [TRACKED], "generate human-readable, predictable names for codegen units (default: no)"), identify_regions: bool = (false, parse_bool, [UNTRACKED], diff --git a/compiler/rustc_session/src/session.rs b/compiler/rustc_session/src/session.rs index c71595ab57e72..19cbb4757fb21 100644 --- a/compiler/rustc_session/src/session.rs +++ b/compiler/rustc_session/src/session.rs @@ -1,7 +1,7 @@ use crate::cgu_reuse_tracker::CguReuseTracker; use crate::code_stats::CodeStats; pub use crate::code_stats::{DataTypeKind, FieldInfo, SizeKind, VariantInfo}; -use crate::config::{self, CrateType, OutputType, SwitchWithOptPath}; +use crate::config::{self, CrateType, ErrorOutputType, OutputType, SwitchWithOptPath}; use crate::filesearch; use crate::lint::{self, LintId}; use crate::parse::ParseSess; @@ -1093,8 +1093,32 @@ fn default_emitter( emitter_dest: Option>, ) -> Box { let macro_backtrace = sopts.debugging_opts.macro_backtrace; + + let error_format_passed_explicitely = matches!( + sopts.error_format, + config::ErrorOutputType::HumanReadable(_, true) | config::ErrorOutputType::Json { .. } + ); + + if error_format_passed_explicitely && sopts.debugging_opts.html_output { + early_error( + ErrorOutputType::default(), + "-Z html-output debug flag conflicts with --error-format", + ); + } + + if sopts.debugging_opts.html_output { + // FIXME(scrabsha): ensure that there's no conflicts with other + // parameters. + return Box::new(EmitterWriter::html_stderr( + Some(source_map), + false, + true, + macro_backtrace, + )); + } + match (sopts.error_format, emitter_dest) { - (config::ErrorOutputType::HumanReadable(kind), dst) => { + (config::ErrorOutputType::HumanReadable(kind, _), dst) => { let (short, color_config) = kind.unzip(); if let HumanReadableErrorType::AnnotateSnippet(_) = kind { @@ -1408,7 +1432,7 @@ pub enum IncrCompSession { pub fn early_error_no_abort(output: config::ErrorOutputType, msg: &str) { let emitter: Box = match output { - config::ErrorOutputType::HumanReadable(kind) => { + config::ErrorOutputType::HumanReadable(kind, _) => { let (short, color_config) = kind.unzip(); Box::new(EmitterWriter::stderr(color_config, None, short, false, None, false)) } @@ -1427,7 +1451,7 @@ pub fn early_error(output: config::ErrorOutputType, msg: &str) -> ! { pub fn early_warn(output: config::ErrorOutputType, msg: &str) { let emitter: Box = match output { - config::ErrorOutputType::HumanReadable(kind) => { + config::ErrorOutputType::HumanReadable(kind, _) => { let (short, color_config) = kind.unzip(); Box::new(EmitterWriter::stderr(color_config, None, short, false, None, false)) } diff --git a/src/librustdoc/core.rs b/src/librustdoc/core.rs index bd1d970fc199b..1cb037f4fbbb6 100644 --- a/src/librustdoc/core.rs +++ b/src/librustdoc/core.rs @@ -148,7 +148,7 @@ crate fn new_handler( debugging_opts: &DebuggingOptions, ) -> rustc_errors::Handler { let emitter: Box = match error_format { - ErrorOutputType::HumanReadable(kind) => { + ErrorOutputType::HumanReadable(kind, _) => { let (short, color_config) = kind.unzip(); Box::new( EmitterWriter::stderr( diff --git a/src/librustdoc/doctest.rs b/src/librustdoc/doctest.rs index e6097f5cad7f3..fa4a9a51afd04 100644 --- a/src/librustdoc/doctest.rs +++ b/src/librustdoc/doctest.rs @@ -367,7 +367,7 @@ fn run_test( path.to_str().expect("target path must be valid unicode").to_string() } }); - if let ErrorOutputType::HumanReadable(kind) = options.error_format { + if let ErrorOutputType::HumanReadable(kind, _) = options.error_format { let (short, color_config) = kind.unzip(); if short { diff --git a/src/tools/compiletest/src/common.rs b/src/tools/compiletest/src/common.rs index 99b0a3454e89c..e0e56454233c2 100644 --- a/src/tools/compiletest/src/common.rs +++ b/src/tools/compiletest/src/common.rs @@ -413,10 +413,12 @@ pub const UI_EXTENSIONS: &[&str] = &[ UI_STDERR_64, UI_STDERR_32, UI_STDERR_16, + HTML_OUTPUT, ]; pub const UI_STDERR: &str = "stderr"; pub const UI_STDOUT: &str = "stdout"; pub const UI_FIXED: &str = "fixed"; +pub const HTML_OUTPUT: &str = "html"; pub const UI_RUN_STDERR: &str = "run.stderr"; pub const UI_RUN_STDOUT: &str = "run.stdout"; pub const UI_STDERR_64: &str = "64bit.stderr"; diff --git a/src/tools/compiletest/src/header.rs b/src/tools/compiletest/src/header.rs index 28089e85b55b4..bdcda298450b4 100644 --- a/src/tools/compiletest/src/header.rs +++ b/src/tools/compiletest/src/header.rs @@ -135,6 +135,8 @@ pub struct TestProps { pub should_ice: bool, // If true, the stderr is expected to be different across bit-widths. pub stderr_per_bitwidth: bool, + // If true, the errors are emitted in a separate .html file + pub html_output: bool, } impl TestProps { @@ -175,6 +177,7 @@ impl TestProps { assembly_output: None, should_ice: false, stderr_per_bitwidth: false, + html_output: false, } } @@ -350,6 +353,10 @@ impl TestProps { if !self.stderr_per_bitwidth { self.stderr_per_bitwidth = config.parse_stderr_per_bitwidth(ln); } + + if !self.html_output { + self.html_output = config.parse_html_output(ln); + } }); } @@ -731,6 +738,10 @@ impl Config { fn parse_edition(&self, line: &str) -> Option { self.parse_name_value_directive(line, "edition") } + + fn parse_html_output(&self, line: &str) -> bool { + self.parse_name_directive(line, "html-output") + } } fn expand_variables(mut value: String, config: &Config) -> String { diff --git a/src/tools/compiletest/src/runtest.rs b/src/tools/compiletest/src/runtest.rs index 51a4d74109a63..d75ff1bd282cb 100644 --- a/src/tools/compiletest/src/runtest.rs +++ b/src/tools/compiletest/src/runtest.rs @@ -1,6 +1,8 @@ // ignore-tidy-filelength -use crate::common::{expected_output_path, UI_EXTENSIONS, UI_FIXED, UI_STDERR, UI_STDOUT}; +use crate::common::{ + expected_output_path, HTML_OUTPUT, UI_EXTENSIONS, UI_FIXED, UI_STDERR, UI_STDOUT, +}; use crate::common::{output_base_dir, output_base_name, output_testname_unique}; use crate::common::{Assembly, Incremental, JsDocTest, MirOpt, RunMake, RustdocJson, Ui}; use crate::common::{Codegen, CodegenUnits, DebugInfo, Debugger, Rustdoc}; @@ -1597,8 +1599,13 @@ impl<'test> TestCx<'test> { _ => AllowUnused::No, }; - let mut rustc = - self.make_compile_args(&self.testpaths.file, output_file, emit_metadata, allow_unused); + let mut rustc = self.make_compile_args( + &self.testpaths.file, + output_file, + emit_metadata, + allow_unused, + DisableErrorFormat::No, + ); rustc.arg("-L").arg(&self.aux_output_dir_name()); @@ -1828,8 +1835,13 @@ impl<'test> TestCx<'test> { // Create the directory for the stdout/stderr files. create_dir_all(aux_cx.output_base_dir()).unwrap(); let input_file = &aux_testpaths.file; - let mut aux_rustc = - aux_cx.make_compile_args(input_file, aux_output, EmitMetadata::No, AllowUnused::No); + let mut aux_rustc = aux_cx.make_compile_args( + input_file, + aux_output, + EmitMetadata::No, + AllowUnused::No, + DisableErrorFormat::No, + ); for key in &aux_props.unset_rustc_env { aux_rustc.env_remove(key); @@ -1947,6 +1959,7 @@ impl<'test> TestCx<'test> { output_file: TargetLocation, emit_metadata: EmitMetadata, allow_unused: AllowUnused, + disable_no_error_format: DisableErrorFormat, ) -> Command { let is_aux = input_file.components().map(|c| c.as_os_str()).any(|c| c == "auxiliary"); let is_rustdoc = self.is_rustdoc() && !is_aux; @@ -1987,14 +2000,18 @@ impl<'test> TestCx<'test> { // If we are extracting and matching errors in the new // fashion, then you want JSON mode. Old-skool error // patterns still match the raw compiler output. - if self.props.error_patterns.is_empty() { + if self.props.error_patterns.is_empty() + && disable_no_error_format == DisableErrorFormat::No + { rustc.args(&["--error-format", "json"]); } rustc.arg("-Zui-testing"); rustc.arg("-Zdeduplicate-diagnostics=no"); } Ui => { - if !self.props.compile_flags.iter().any(|s| s.starts_with("--error-format")) { + if !self.props.compile_flags.iter().any(|s| s.starts_with("--error-format")) + && disable_no_error_format == DisableErrorFormat::No + { rustc.args(&["--error-format", "json"]); } rustc.arg("-Ccodegen-units=1"); @@ -2294,8 +2311,13 @@ impl<'test> TestCx<'test> { let output_file = TargetLocation::ThisDirectory(self.output_base_dir()); let input_file = &self.testpaths.file; - let mut rustc = - self.make_compile_args(input_file, output_file, EmitMetadata::No, AllowUnused::No); + let mut rustc = self.make_compile_args( + input_file, + output_file, + EmitMetadata::No, + AllowUnused::No, + DisableErrorFormat::No, + ); rustc.arg("-L").arg(aux_dir).arg("--emit=llvm-ir"); self.compose_and_run_compiler(rustc, None) @@ -2308,8 +2330,13 @@ impl<'test> TestCx<'test> { let output_file = TargetLocation::ThisFile(output_path.clone()); let input_file = &self.testpaths.file; - let mut rustc = - self.make_compile_args(input_file, output_file, EmitMetadata::No, AllowUnused::No); + let mut rustc = self.make_compile_args( + input_file, + output_file, + EmitMetadata::No, + AllowUnused::No, + DisableErrorFormat::No, + ); rustc.arg("-L").arg(self.aux_output_dir_name()); @@ -2452,6 +2479,7 @@ impl<'test> TestCx<'test> { output_file, EmitMetadata::No, AllowUnused::Yes, + DisableErrorFormat::No, ); rustc.arg("-L").arg(&new_rustdoc.aux_output_dir_name()); new_rustdoc.build_all_auxiliary(&mut rustc); @@ -3355,6 +3383,23 @@ impl<'test> TestCx<'test> { ); } + if self.props.html_output { + let mut rustc = self.make_compile_args( + &self.testpaths.file, + TargetLocation::ThisDirectory(self.make_exe_name()), + emit_metadata, + AllowUnused::No, + DisableErrorFormat::Yes, + ); + rustc.arg("-Zhtml_output"); + let res = self.compose_and_run_compiler(rustc, None); + let html_output = res.stderr; + + let expected_html = self.load_expected_output(HTML_OUTPUT); + + errors += self.compare_output(HTML_OUTPUT, &html_output, &expected_html); + } + if errors > 0 { println!("To update references, rerun the tests and pass the `--bless` flag"); let relative_path_to_file = @@ -3433,6 +3478,7 @@ impl<'test> TestCx<'test> { TargetLocation::ThisFile(self.make_exe_name()), emit_metadata, AllowUnused::No, + DisableErrorFormat::No, ); rustc.arg("-L").arg(&self.aux_output_dir_name()); let res = self.compose_and_run_compiler(rustc, None); @@ -3961,6 +4007,12 @@ enum AllowUnused { No, } +#[derive(PartialEq)] +enum DisableErrorFormat { + Yes, + No, +} + fn read2_abbreviated(mut child: Child) -> io::Result { use crate::read2::read2; use std::mem::replace; From be3af9f419cdae47e8e2c2f58445a1fa9597a2b0 Mon Sep 17 00:00:00 2001 From: Sasha Pourcelot Date: Tue, 7 Sep 2021 19:13:26 +0200 Subject: [PATCH 02/12] wait, wtf --- compiler/rustc_errors/src/emitter.rs | 38 +++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs index 6d51882fc8be3..a511382c642bf 100644 --- a/compiler/rustc_errors/src/emitter.rs +++ b/compiler/rustc_errors/src/emitter.rs @@ -2420,11 +2420,43 @@ impl HtmlFormatter { buffer } + + #[allow(dead_code)] + fn substitute_reserved_characters(buf: &[u8]) -> Vec { + // dbg!(buf); + + // Given that we always push at least one element to out, it is + // guaranteed to be at least as large as buf. + let mut out = Vec::with_capacity(buf.len()); + + // let buf_as_string = String::from_utf8(buf.to_vec()).unwrap(); + // dbg!(buf_as_string); + + for chr in buf { + match chr { + // Reserved characters are described at: + // https://developer.mozilla.org/en-US/docs/Glossary/Entity#reserved_characters + // b'&' => out.extend(b"&"), + // b'<' => out.extend(b"<"), + b'>' => out.extend_from_slice(b">"), + // b'"' => out.extend(b"""), + other => out.push(*other), + } + } + + // let out_as_string = String::from_utf8(out.clone()).unwrap(); + // dbg!(out_as_string); + + // dbg!(out) + + b"coucou".to_vec() + } } impl Write for HtmlFormatter { fn write(&mut self, buf: &[u8]) -> io::Result { - self.inner.write(buf) + let escaped_buffer = Self::substitute_reserved_characters(buf); + self.inner.write(escaped_buffer.as_slice()) } fn flush(&mut self) -> io::Result<()> { @@ -2438,11 +2470,11 @@ impl WriteColor for HtmlFormatter { } fn set_color(&mut self, spec: &ColorSpec) -> io::Result<()> { - write!(self, "", Self::mk_style_string(spec)) + write!(self.inner, "", Self::mk_style_string(spec)) } fn reset(&mut self) -> io::Result<()> { - write!(self, "") + write!(self.inner, "") } } From 7e718d56bc09a4ff6734026aeff947caad15c4a1 Mon Sep 17 00:00:00 2001 From: Sasha Pourcelot Date: Wed, 8 Sep 2021 09:53:44 +0200 Subject: [PATCH 03/12] Fix the whole thing, add a comment too --- compiler/rustc_errors/src/emitter.rs | 32 ++++++++++++++-------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs index a511382c642bf..537582a719e56 100644 --- a/compiler/rustc_errors/src/emitter.rs +++ b/compiler/rustc_errors/src/emitter.rs @@ -2423,40 +2423,40 @@ impl HtmlFormatter { #[allow(dead_code)] fn substitute_reserved_characters(buf: &[u8]) -> Vec { - // dbg!(buf); - // Given that we always push at least one element to out, it is // guaranteed to be at least as large as buf. let mut out = Vec::with_capacity(buf.len()); - // let buf_as_string = String::from_utf8(buf.to_vec()).unwrap(); - // dbg!(buf_as_string); - for chr in buf { match chr { // Reserved characters are described at: // https://developer.mozilla.org/en-US/docs/Glossary/Entity#reserved_characters - // b'&' => out.extend(b"&"), - // b'<' => out.extend(b"<"), - b'>' => out.extend_from_slice(b">"), - // b'"' => out.extend(b"""), + b'&' => out.extend(b"&"), + b'<' => out.extend(b"<"), + b'>' => out.extend(b">"), + b'"' => out.extend(b"""), other => out.push(*other), } } - // let out_as_string = String::from_utf8(out.clone()).unwrap(); - // dbg!(out_as_string); - - // dbg!(out) - - b"coucou".to_vec() + out } } impl Write for HtmlFormatter { fn write(&mut self, buf: &[u8]) -> io::Result { let escaped_buffer = Self::substitute_reserved_characters(buf); - self.inner.write(escaped_buffer.as_slice()) + + // HACK: as we're not writing the exact same data as what was given to + // us, we can't just call self.inner.write(escaped_buffer.as_slice()), + // as its Ok return value belongs to the range 0..=escaped_buffer.len(), + // which breaks the invariant of Write::write, where the Ok return value + // must belong to 0..=buf.len(). + // + // As a workaround, we can write the whole substituted buffer at once + // using Write::write_all and return the length of the initial buffer. + self.inner.write_all(escaped_buffer.as_slice())?; + Ok(buf.len()) } fn flush(&mut self) -> io::Result<()> { From ae79e44cca86fc96087ecaadc6b6b005fb5d9348 Mon Sep 17 00:00:00 2001 From: Sasha Date: Wed, 8 Sep 2021 10:02:53 +0200 Subject: [PATCH 04/12] Fix typo, remove old fixme --- compiler/rustc_session/src/options.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index a7dbdfff3b24c..3bdcfae8350b6 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -1097,9 +1097,8 @@ options! { environment variable `RUSTC_GRAPHVIZ_FONT` (default: `Courier, monospace`)"), hir_stats: bool = (false, parse_bool, [UNTRACKED], "print some statistics about AST and HIR (default: no)"), - // FIXME(scrabsha): update tests in compiler/rustc_interface/src/test.rs html_output: bool = (false, parse_bool, [UNTRACKED], - "prints the compiler output as HTML code (default: no)"), + "print the compiler output as HTML code (default: no)"), human_readable_cgu_names: bool = (false, parse_bool, [TRACKED], "generate human-readable, predictable names for codegen units (default: no)"), identify_regions: bool = (false, parse_bool, [UNTRACKED], From de287624bb80288447243472fa1f9fdbc9e9c356 Mon Sep 17 00:00:00 2001 From: Sasha Pourcelot Date: Wed, 8 Sep 2021 15:04:04 +0200 Subject: [PATCH 05/12] Restructure code a bit, avoid an allocation --- compiler/rustc_errors/src/emitter.rs | 57 +++++++++++++--------------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs index 537582a719e56..4aa06b4d36916 100644 --- a/compiler/rustc_errors/src/emitter.rs +++ b/compiler/rustc_errors/src/emitter.rs @@ -26,10 +26,10 @@ use rustc_data_structures::sync::Lrc; use rustc_span::hygiene::{ExpnKind, MacroKind}; use std::borrow::Cow; use std::cmp::{max, min, Reverse}; -use std::io; -use std::io::prelude::*; +use std::io::{prelude::*, ErrorKind}; use std::iter; use std::path::Path; +use std::{io, slice}; use termcolor::{Ansi, BufferWriter, ColorChoice, ColorSpec, StandardStream}; use termcolor::{Buffer, Color, WriteColor}; use tracing::*; @@ -2421,41 +2421,38 @@ impl HtmlFormatter { buffer } - #[allow(dead_code)] - fn substitute_reserved_characters(buf: &[u8]) -> Vec { - // Given that we always push at least one element to out, it is - // guaranteed to be at least as large as buf. - let mut out = Vec::with_capacity(buf.len()); - - for chr in buf { - match chr { - // Reserved characters are described at: - // https://developer.mozilla.org/en-US/docs/Glossary/Entity#reserved_characters - b'&' => out.extend(b"&"), - b'<' => out.extend(b"<"), - b'>' => out.extend(b">"), - b'"' => out.extend(b"""), - other => out.push(*other), - } + // We take chr as reference so that we can turn it into a slice with + // slice::from_ref. + fn substitute_reserved_char(chr: &u8) -> &[u8] { + match chr { + // Reserved characters are described at: + // https://developer.mozilla.org/en-US/docs/Glossary/Entity#reserved_characters + b'&' => b"&", + b'<' => b"<", + b'>' => b">", + b'"' => b""", + + chr => slice::from_ref(chr), } - - out } } impl Write for HtmlFormatter { fn write(&mut self, buf: &[u8]) -> io::Result { - let escaped_buffer = Self::substitute_reserved_characters(buf); + let escaped_chars = buf + .iter() + .enumerate() + .map(|(idx, content)| (idx, Self::substitute_reserved_char(content))); + + for (idx, content) in escaped_chars { + match self.inner.write_all(content) { + Ok(()) => {} + + Err(err) if err.kind() == ErrorKind::WriteZero => return Ok(idx), + Err(err) => return Err(err), + } + } - // HACK: as we're not writing the exact same data as what was given to - // us, we can't just call self.inner.write(escaped_buffer.as_slice()), - // as its Ok return value belongs to the range 0..=escaped_buffer.len(), - // which breaks the invariant of Write::write, where the Ok return value - // must belong to 0..=buf.len(). - // - // As a workaround, we can write the whole substituted buffer at once - // using Write::write_all and return the length of the initial buffer. - self.inner.write_all(escaped_buffer.as_slice())?; Ok(buf.len()) } From 512d0b996cce228405a116ed6abb2f2d39235162 Mon Sep 17 00:00:00 2001 From: Sasha Pourcelot Date: Wed, 8 Sep 2021 15:15:15 +0200 Subject: [PATCH 06/12] Remove margins, make raw strings --- compiler/rustc_errors/src/emitter.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs index 4aa06b4d36916..db073d5d70026 100644 --- a/compiler/rustc_errors/src/emitter.rs +++ b/compiler/rustc_errors/src/emitter.rs @@ -2379,11 +2379,11 @@ impl HtmlFormatter { } fn begin(&mut self) -> io::Result<()> { - write!(self.inner, "
")
+        write!(self.inner, r#"
"#)
     }
 
     fn end(&mut self) -> io::Result<()> {
-        write!(self.inner, "
") + write!(self.inner, r#"
"#) } // Creates the string of a `style` attribute in an html element @@ -2467,11 +2467,11 @@ impl WriteColor for HtmlFormatter { } fn set_color(&mut self, spec: &ColorSpec) -> io::Result<()> { - write!(self.inner, "", Self::mk_style_string(spec)) + write!(self.inner, r#""#, Self::mk_style_string(spec)) } fn reset(&mut self) -> io::Result<()> { - write!(self.inner, "") + write!(self.inner, r#""#) } } From 88614efe2f7af017d46ab6e318e33a0fba922b6f Mon Sep 17 00:00:00 2001 From: Sasha Pourcelot Date: Wed, 8 Sep 2021 16:22:20 +0200 Subject: [PATCH 07/12] Add test file for html output --- src/test/ui/html-output.html | 12 ++++++++++++ src/test/ui/html-output.rs | 11 +++++++++++ src/test/ui/html-output.stderr | 12 ++++++++++++ 3 files changed, 35 insertions(+) create mode 100644 src/test/ui/html-output.html create mode 100644 src/test/ui/html-output.rs create mode 100644 src/test/ui/html-output.stderr diff --git a/src/test/ui/html-output.html b/src/test/ui/html-output.html new file mode 100644 index 0000000000000..1accf361e1d1a --- /dev/null +++ b/src/test/ui/html-output.html @@ -0,0 +1,12 @@ +
error[E0580]: `main` function has wrong type
+  --> /home/ssha/iomentum/rust/src/test/ui/html-output.rs:20:1
+   |
+20 | fn main(_: ()) {}
+   | ^^^^^^^^^^^^^^ incorrect number of function parameters
+
   |
+
   = note: expected fn pointer `fn()`
+              found fn pointer `fn(())`
+
+
error: aborting due to previous error
+
+
For more information about this error, try `rustc --explain E0580`.
diff --git a/src/test/ui/html-output.rs b/src/test/ui/html-output.rs new file mode 100644 index 0000000000000..cfe4fcd08ed34 --- /dev/null +++ b/src/test/ui/html-output.rs @@ -0,0 +1,11 @@ +// Allows us to test that the compiler emits a correct html output. +// +// The error in this test allows us to test the following features: +// - the output is colored as it is when displayed on the terminal, +// - the margin between each
 element is explicitely removed,
+//   - text is in bold where expected.
+
+// html-output
+
+fn main(_: ()) {}
+//~^ ERROR `main` function has wrong type [E0580]
diff --git a/src/test/ui/html-output.stderr b/src/test/ui/html-output.stderr
new file mode 100644
index 0000000000000..6ade12a9e7737
--- /dev/null
+++ b/src/test/ui/html-output.stderr
@@ -0,0 +1,12 @@
+error[E0580]: `main` function has wrong type
+  --> $DIR/html-output.rs:20:1
+   |
+LL | fn main(_: ()) {}
+   | ^^^^^^^^^^^^^^ incorrect number of function parameters
+   |
+   = note: expected fn pointer `fn()`
+              found fn pointer `fn(())`
+
+error: aborting due to previous error
+
+For more information about this error, try `rustc --explain E0580`.

From e2e20b1d4797c5ba219157697d22c22d05e009b5 Mon Sep 17 00:00:00 2001
From: Sasha Pourcelot 
Date: Wed, 8 Sep 2021 17:14:28 +0200
Subject: [PATCH 08/12] Enable UTF-8

---
 compiler/rustc_errors/src/emitter.rs | 39 +++++++++++++++++-----------
 src/test/ui/html-output.html         |  4 +--
 src/test/ui/html-output.stderr       |  2 +-
 3 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs
index db073d5d70026..0a776b13f13dc 100644
--- a/compiler/rustc_errors/src/emitter.rs
+++ b/compiler/rustc_errors/src/emitter.rs
@@ -26,10 +26,10 @@ use rustc_data_structures::sync::Lrc;
 use rustc_span::hygiene::{ExpnKind, MacroKind};
 use std::borrow::Cow;
 use std::cmp::{max, min, Reverse};
+use std::io;
 use std::io::{prelude::*, ErrorKind};
 use std::iter;
 use std::path::Path;
-use std::{io, slice};
 use termcolor::{Ansi, BufferWriter, ColorChoice, ColorSpec, StandardStream};
 use termcolor::{Buffer, Color, WriteColor};
 use tracing::*;
@@ -2421,31 +2421,40 @@ impl HtmlFormatter {
         buffer
     }
 
-    // We take chr as reference so that we can turn it into a slice with
-    // slice::from_ref.
-    fn substitute_reserved_char(chr: &u8) -> &[u8] {
+    // buf may or may not be used as a buffer to store the output data.
+    fn substitute_reserved_char(chr: char, buf: &mut [u8; 4]) -> &str {
         match chr {
             // Reserved characters are described at:
             // https://developer.mozilla.org/en-US/docs/Glossary/Entity#reserved_characters
-            b'&' => b"&",
-            b'<' => b"<",
-            b'>' => b">",
-            b'"' => b""",
+            '&' => "&",
+            '<' => "<",
+            '>' => ">",
+            '"' => """,
 
-            chr => slice::from_ref(chr),
+            chr => chr.encode_utf8(buf),
         }
     }
 }
 
 impl Write for HtmlFormatter {
     fn write(&mut self, buf: &[u8]) -> io::Result {
-        let escaped_chars = buf
-            .iter()
-            .enumerate()
-            .map(|(idx, content)| (idx, Self::substitute_reserved_char(content)));
+        // This should not panic since there are two things that are printed
+        // in HtmlFormatter:
+        //   - error messages, hardcoded or generated on the fly,
+        //   - characters from the input file.
+        // Error messages are handled by Rustc and stored in Strings, so they
+        // are always UTF8-formatted. We also require input files to be UTF-8
+        // encoded. As such, everything that an HtmlFormatter can print is
+        // UTF8-encoded.
+
+        let input_buf = std::str::from_utf8(buf).expect("Attempt to write non-UTF8 error message");
+
+        let mut escaped_char_buf = [0; 4];
+
+        for (idx, chr) in input_buf.char_indices() {
+            let content = Self::substitute_reserved_char(chr, &mut escaped_char_buf);
 
-        for (idx, content) in escaped_chars {
-            match self.inner.write_all(content) {
+            match self.inner.write_all(content.as_bytes()) {
                 Ok(()) => {}
 
                 Err(err) if err.kind() == ErrorKind::WriteZero => return Ok(idx),
diff --git a/src/test/ui/html-output.html b/src/test/ui/html-output.html
index 1accf361e1d1a..bf7d613abecbe 100644
--- a/src/test/ui/html-output.html
+++ b/src/test/ui/html-output.html
@@ -1,7 +1,7 @@
 
error[E0580]: `main` function has wrong type
-  --> /home/ssha/iomentum/rust/src/test/ui/html-output.rs:20:1
+  --> /home/ssha/iomentum/rust/src/test/ui/html-output.rs:10:1
    |
-20 | fn main(_: ()) {}
+10 | fn main(_: ()) {}
    | ^^^^^^^^^^^^^^ incorrect number of function parameters
 
   |
 
   = note: expected fn pointer `fn()`
diff --git a/src/test/ui/html-output.stderr b/src/test/ui/html-output.stderr
index 6ade12a9e7737..04b3687fb9061 100644
--- a/src/test/ui/html-output.stderr
+++ b/src/test/ui/html-output.stderr
@@ -1,5 +1,5 @@
 error[E0580]: `main` function has wrong type
-  --> $DIR/html-output.rs:20:1
+  --> $DIR/html-output.rs:10:1
    |
 LL | fn main(_: ()) {}
    | ^^^^^^^^^^^^^^ incorrect number of function parameters

From 6757dbd3c49a6a26865e0665ae84664270e0f050 Mon Sep 17 00:00:00 2001
From: Sasha Pourcelot 
Date: Wed, 8 Sep 2021 20:57:27 +0200
Subject: [PATCH 09/12] Use split_inclusive to eliminate some calls to write

---
 compiler/rustc_errors/src/emitter.rs | 69 ++++++++++++++++++++--------
 1 file changed, 51 insertions(+), 18 deletions(-)

diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs
index 0a776b13f13dc..bde3538ff2656 100644
--- a/compiler/rustc_errors/src/emitter.rs
+++ b/compiler/rustc_errors/src/emitter.rs
@@ -26,8 +26,8 @@ use rustc_data_structures::sync::Lrc;
 use rustc_span::hygiene::{ExpnKind, MacroKind};
 use std::borrow::Cow;
 use std::cmp::{max, min, Reverse};
-use std::io;
-use std::io::{prelude::*, ErrorKind};
+use std::io::prelude::*;
+use std::io::{self, ErrorKind};
 use std::iter;
 use std::path::Path;
 use termcolor::{Ansi, BufferWriter, ColorChoice, ColorSpec, StandardStream};
@@ -2421,17 +2421,30 @@ impl HtmlFormatter {
         buffer
     }
 
-    // buf may or may not be used as a buffer to store the output data.
-    fn substitute_reserved_char(chr: char, buf: &mut [u8; 4]) -> &str {
+    fn is_reserved_char(chr: char) -> bool {
+        // Reserved characters are described at:
+        // https://developer.mozilla.org/en-US/docs/Glossary/Entity#reserved_characters
+        ['&', '<', '>', '"'].contains(&chr)
+    }
+
+    fn substitute_reserved_char(chr: char) -> &'static str {
         match chr {
-            // Reserved characters are described at:
+            // Substitutions for reserved characters are described at:
             // https://developer.mozilla.org/en-US/docs/Glossary/Entity#reserved_characters
             '&' => "&",
             '<' => "<",
             '>' => ">",
             '"' => """,
 
-            chr => chr.encode_utf8(buf),
+            _ => unreachable!(),
+        }
+    }
+
+    fn escaped_and_unescaped_pair(input: &str) -> (&str, Option) {
+        let mut chars = input.chars();
+        match chars.next_back() {
+            Some(chr) if Self::is_reserved_char(chr) => (chars.as_str(), Some(chr)),
+            Some(_) | None => (input, None),
         }
     }
 }
@@ -2443,22 +2456,42 @@ impl Write for HtmlFormatter {
         //   - error messages, hardcoded or generated on the fly,
         //   - characters from the input file.
         // Error messages are handled by Rustc and stored in Strings, so they
-        // are always UTF8-formatted. We also require input files to be UTF-8
+        // are always UTF8-encoded. We also require input files to be UTF-8
         // encoded. As such, everything that an HtmlFormatter can print is
         // UTF8-encoded.
+        let buf_as_str = std::str::from_utf8(buf).expect("Attempt to write non-UTF8 error message");
+
+        let mut idx = 0;
+        let segments_to_print = buf_as_str
+            .split_inclusive(Self::is_reserved_char)
+            .map(Self::escaped_and_unescaped_pair)
+            .flat_map(|(already_escaped, to_escape)| {
+                let already_escaped_len = already_escaped.len();
+                let already_escaped = Some((idx, already_escaped, false));
+                idx += already_escaped_len;
+
+                let just_escaped = to_escape
+                    .map(Self::substitute_reserved_char)
+                    .map(|substitute| (idx, substitute, true));
+                idx += to_escape.map(char::len_utf8).unwrap_or_default();
+
+                [already_escaped, just_escaped]
+            })
+            .flatten();
 
-        let input_buf = std::str::from_utf8(buf).expect("Attempt to write non-UTF8 error message");
-
-        let mut escaped_char_buf = [0; 4];
-
-        for (idx, chr) in input_buf.char_indices() {
-            let content = Self::substitute_reserved_char(chr, &mut escaped_char_buf);
-
-            match self.inner.write_all(content.as_bytes()) {
-                Ok(()) => {}
+        for (idx, segment, is_escaped) in segments_to_print {
+            if is_escaped {
+                match self.inner.write_all(segment.as_bytes()) {
+                    Ok(()) => {}
 
-                Err(err) if err.kind() == ErrorKind::WriteZero => return Ok(idx),
-                Err(err) => return Err(err),
+                    Err(err) if err.kind() == ErrorKind::WriteZero => return Ok(idx),
+                    Err(err) => return Err(err),
+                };
+            } else {
+                let bytes_written = self.inner.write(segment.as_bytes())?;
+                if bytes_written != segment.len() {
+                    return Ok(idx + bytes_written);
+                }
             }
         }
 

From 5f212def03f85d59495177ceef833727dee59b97 Mon Sep 17 00:00:00 2001
From: Sasha Pourcelot 
Date: Tue, 14 Sep 2021 15:52:24 +0200
Subject: [PATCH 10/12] Put the escaper in its own state machine

---
 compiler/rustc_errors/src/emitter.rs | 153 +++++++++++++++------------
 1 file changed, 88 insertions(+), 65 deletions(-)

diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs
index bde3538ff2656..836800e2dad13 100644
--- a/compiler/rustc_errors/src/emitter.rs
+++ b/compiler/rustc_errors/src/emitter.rs
@@ -21,6 +21,7 @@ use crate::{
 
 use rustc_lint_defs::pluralize;
 
+use core::slice::SplitInclusive;
 use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::sync::Lrc;
 use rustc_span::hygiene::{ExpnKind, MacroKind};
@@ -2420,77 +2421,24 @@ impl HtmlFormatter {
 
         buffer
     }
-
-    fn is_reserved_char(chr: char) -> bool {
-        // Reserved characters are described at:
-        // https://developer.mozilla.org/en-US/docs/Glossary/Entity#reserved_characters
-        ['&', '<', '>', '"'].contains(&chr)
-    }
-
-    fn substitute_reserved_char(chr: char) -> &'static str {
-        match chr {
-            // Substitutions for reserved characters are described at:
-            // https://developer.mozilla.org/en-US/docs/Glossary/Entity#reserved_characters
-            '&' => "&",
-            '<' => "<",
-            '>' => ">",
-            '"' => """,
-
-            _ => unreachable!(),
-        }
-    }
-
-    fn escaped_and_unescaped_pair(input: &str) -> (&str, Option) {
-        let mut chars = input.chars();
-        match chars.next_back() {
-            Some(chr) if Self::is_reserved_char(chr) => (chars.as_str(), Some(chr)),
-            Some(_) | None => (input, None),
-        }
-    }
 }
 
 impl Write for HtmlFormatter {
     fn write(&mut self, buf: &[u8]) -> io::Result {
-        // This should not panic since there are two things that are printed
-        // in HtmlFormatter:
-        //   - error messages, hardcoded or generated on the fly,
-        //   - characters from the input file.
-        // Error messages are handled by Rustc and stored in Strings, so they
-        // are always UTF8-encoded. We also require input files to be UTF-8
-        // encoded. As such, everything that an HtmlFormatter can print is
-        // UTF8-encoded.
-        let buf_as_str = std::str::from_utf8(buf).expect("Attempt to write non-UTF8 error message");
-
-        let mut idx = 0;
-        let segments_to_print = buf_as_str
-            .split_inclusive(Self::is_reserved_char)
-            .map(Self::escaped_and_unescaped_pair)
-            .flat_map(|(already_escaped, to_escape)| {
-                let already_escaped_len = already_escaped.len();
-                let already_escaped = Some((idx, already_escaped, false));
-                idx += already_escaped_len;
-
-                let just_escaped = to_escape
-                    .map(Self::substitute_reserved_char)
-                    .map(|substitute| (idx, substitute, true));
-                idx += to_escape.map(char::len_utf8).unwrap_or_default();
-
-                [already_escaped, just_escaped]
-            })
-            .flatten();
-
-        for (idx, segment, is_escaped) in segments_to_print {
-            if is_escaped {
-                match self.inner.write_all(segment.as_bytes()) {
+        for (idx, segment) in EscapedHtmlIter::new(buf) {
+            match segment {
+                EscapedHtmlSegment::Modified(buf) => match self.inner.write_all(buf) {
                     Ok(()) => {}
 
-                    Err(err) if err.kind() == ErrorKind::WriteZero => return Ok(idx),
-                    Err(err) => return Err(err),
-                };
-            } else {
-                let bytes_written = self.inner.write(segment.as_bytes())?;
-                if bytes_written != segment.len() {
-                    return Ok(idx + bytes_written);
+                    Err(e) if e.kind() == ErrorKind::WriteZero => return Ok(idx),
+                    Err(e) => return Err(e),
+                },
+                EscapedHtmlSegment::Unmodified(buf) => {
+                    let bytes_written = self.inner.write(buf)?;
+
+                    if bytes_written != buf.len() {
+                        return Ok(idx + bytes_written);
+                    }
                 }
             }
         }
@@ -2517,6 +2465,81 @@ impl WriteColor for HtmlFormatter {
     }
 }
 
+struct EscapedHtmlIter<'a> {
+    iter: SplitInclusive<'a, u8, fn(&u8) -> bool>,
+    index: usize,
+    trimmed_escaped: Option<&'static [u8]>,
+}
+
+impl<'a> EscapedHtmlIter<'a> {
+    fn new(input: &'a [u8]) -> Self {
+        EscapedHtmlIter {
+            iter: input.split_inclusive(Self::is_reserved_byte),
+            index: 0,
+            trimmed_escaped: None,
+        }
+    }
+
+    fn is_reserved_byte(byte: &u8) -> bool {
+        // Reserved characters are described at:
+        // https://developer.mozilla.org/en-US/docs/Glossary/Entity#reserved_characters
+        [b'&', b'<', b'>', b'"'].contains(&byte)
+    }
+
+    fn substitute_reserved_byte(byte: u8) -> Option<&'static [u8]> {
+        match byte {
+            // Substitutions for reserved characters are described at:
+            // https://developer.mozilla.org/en-US/docs/Glossary/Entity#reserved_characters
+            b'&' => Some(b"&"),
+            b'<' => Some(b"<"),
+            b'>' => Some(b">"),
+            b'"' => Some(b"""),
+
+            _ => None,
+        }
+    }
+
+    fn trim_escapable_and_escape(input: &[u8]) -> (&[u8], Option<&'static [u8]>) {
+        input
+            .split_last()
+            .and_then(|(last, rest)| {
+                Self::substitute_reserved_byte(*last).map(|substituted| (rest, Some(substituted)))
+            })
+            .unwrap_or_else(|| (input, None))
+    }
+}
+
+impl<'a> Iterator for EscapedHtmlIter<'a> {
+    type Item = (usize, EscapedHtmlSegment<'a>);
+
+    fn next(&mut self) -> Option {
+        match self.trimmed_escaped.take() {
+            Some(substitution) => {
+                let idx = self.index;
+                self.index += 1;
+
+                Some((idx, EscapedHtmlSegment::Modified(substitution)))
+            }
+
+            None => {
+                let segment = self.iter.next()?;
+                let (already_escaped, just_escaped) = Self::trim_escapable_and_escape(segment);
+                let idx = self.index;
+
+                self.trimmed_escaped = just_escaped;
+                self.index += already_escaped.len();
+
+                Some((idx, EscapedHtmlSegment::Unmodified(already_escaped)))
+            }
+        }
+    }
+}
+
+enum EscapedHtmlSegment<'a> {
+    Modified(&'a [u8]),
+    Unmodified(&'a [u8]),
+}
+
 fn term_color_to_html_color(color: Color) -> Cow<'static, str> {
     // All the CSS color keywords are available at:
     // https://drafts.csswg.org/css-color/#named-colors

From 6e27b89d94528ce0b8b183728ae8d5729f34d41f Mon Sep 17 00:00:00 2001
From: Sasha Pourcelot 
Date: Thu, 16 Sep 2021 16:18:20 +0200
Subject: [PATCH 11/12] Don't report an error if we succeeded in writing at
 something

---
 compiler/rustc_errors/src/emitter.rs | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs
index 836800e2dad13..fb07cd2d19a3e 100644
--- a/compiler/rustc_errors/src/emitter.rs
+++ b/compiler/rustc_errors/src/emitter.rs
@@ -2425,22 +2425,27 @@ impl HtmlFormatter {
 
 impl Write for HtmlFormatter {
     fn write(&mut self, buf: &[u8]) -> io::Result {
+        let mut first = true;
+
         for (idx, segment) in EscapedHtmlIter::new(buf) {
             match segment {
                 EscapedHtmlSegment::Modified(buf) => match self.inner.write_all(buf) {
                     Ok(()) => {}
 
-                    Err(e) if e.kind() == ErrorKind::WriteZero => return Ok(idx),
-                    Err(e) => return Err(e),
+                    Err(e) if first => return Err(e),
+                    Err(e) => return Ok(idx),
                 },
-                EscapedHtmlSegment::Unmodified(buf) => {
-                    let bytes_written = self.inner.write(buf)?;
 
-                    if bytes_written != buf.len() {
-                        return Ok(idx + bytes_written);
-                    }
-                }
+                EscapedHtmlSegment::Unmodified(buf) => match self.inner.write(buf) {
+                    Ok(bytes_written) if bytes_written == buf.len() => {}
+                    Ok(bytes_written) => return Ok(idx + bytes_written),
+
+                    Err(e) if first => return Err(e),
+                    Err(e) => return Ok(idx),
+                },
             }
+
+            first = false;
         }
 
         Ok(buf.len())

From 8e064c1031cbf30fbd87f0af411ab238ca68eac1 Mon Sep 17 00:00:00 2001
From: Sasha Pourcelot 
Date: Sat, 18 Sep 2021 15:27:11 +0200
Subject: [PATCH 12/12] Use the segment index instead of a `first` boolean

---
 compiler/rustc_errors/src/emitter.rs | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs
index fb07cd2d19a3e..ba3b61635608f 100644
--- a/compiler/rustc_errors/src/emitter.rs
+++ b/compiler/rustc_errors/src/emitter.rs
@@ -2425,14 +2425,12 @@ impl HtmlFormatter {
 
 impl Write for HtmlFormatter {
     fn write(&mut self, buf: &[u8]) -> io::Result {
-        let mut first = true;
-
         for (idx, segment) in EscapedHtmlIter::new(buf) {
             match segment {
                 EscapedHtmlSegment::Modified(buf) => match self.inner.write_all(buf) {
                     Ok(()) => {}
 
-                    Err(e) if first => return Err(e),
+                    Err(e) if idx == 0 => return Err(e),
                     Err(e) => return Ok(idx),
                 },
 
@@ -2440,7 +2438,7 @@ impl Write for HtmlFormatter {
                     Ok(bytes_written) if bytes_written == buf.len() => {}
                     Ok(bytes_written) => return Ok(idx + bytes_written),
 
-                    Err(e) if first => return Err(e),
+                    Err(e) if idx == 0 => return Err(e),
                     Err(e) => return Ok(idx),
                 },
             }