Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create a more flexible HTML comment parser and raise warning #516

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 73 additions & 8 deletions crates/weaver_semconv_gen/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
use crate::Error;
use crate::GenerateMarkdownArgs;
use crate::MarkdownGenParameters;
use nom::bytes::complete::take_until;
use nom::error::ErrorKind;
use nom::error::ParseError;
use nom::multi::many0_count;
use nom::{
branch::alt,
Expand All @@ -16,8 +19,10 @@ use nom::{
IResult,
};

/// exact string we expect for starting a semconv snippet.
const SEMCONV_HEADER: &str = "semconv";
/// exact string we expect for ending a semconv snippet.
const SEMCONV_TRAILER: &str = "<!-- endsemconv -->";
const SEMCONV_TRAILER: &str = "endsemconv";

/// nom parser for tag values.
fn parse_value(input: &str) -> IResult<&str, &str> {
Expand Down Expand Up @@ -78,17 +83,37 @@ fn parse_id(input: &str) -> IResult<&str, &str> {
))(input)
}

/// nom parser for <!-- semconv {id}({args}) -->
fn parse_markdown_snippet_raw(input: &str) -> IResult<&str, GenerateMarkdownArgs> {
/// nom parser for HTML comments: `<!--{comment}-->
fn parse_html_comment(input: &str) -> IResult<&str, &str> {
// Comments must have the following format:
// The string "<!--".
let (input, _) = tag("<!--")(input)?;
// Optionally, text, with the additional restriction that the text must not start with the string ">", nor start with the string "->", nor contain the strings "<!--", "-->", or "--!>", nor end with the string "<!-".
let (input, result) = take_until("-->")(input)?;

// Check for spacing issues and warn if found
if result.starts_with("semconv") && !input.trim().starts_with("semconv ") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Three things:

  1. I don't think we need to force spaces here.

  2. For the purpose of separation of concerns, the validation logic should go in parse_semconv_snippet_directive. Ideally this function JUST parses HTML headers, which have no restrictions on whitespaces.

  3. We're not using eprintln! for warnings, instead you'll want to find a way to return a Diagnostic, e.g. using weaver's result WResult. This might require some wiring to make sure non-fatal-errors (NFE) can make it out of the parser. I haven't tried to sort out NFE w/ the nom parsers bits yet. Let me know if you have issues I'll see if I can put a scaffold up for you there.

// Missing space after "<!--"
eprintln!("Warning: Missing space after '<!--' in semconv comment: {}", result);
}
if result.ends_with("semconv") && !result.trim().ends_with(" semconv") {
// Missing space before "-->"
eprintln!("Warning: Missing space before '-->' in semconv comment: {}", result);
}

// The string "-->".
let (input, _) = tag("-->")(input)?;
Ok((input, result))
}

/// Parses the semantic convention header and directives for markdown generation.
fn parse_semconv_snippet_directive(input: &str) -> IResult<&str, GenerateMarkdownArgs> {
let (input, _) = multispace0(input)?;
let (input, _) = tag("semconv")(input)?;
let (input, _) = tag(SEMCONV_HEADER)(input)?;
let (input, _) = multispace0(input)?;
let (input, id) = parse_id(input)?;
let (input, opt_args) = opt(parse_markdown_gen_parameters)(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = tag("-->")(input)?;

Ok((
input,
GenerateMarkdownArgs {
Expand All @@ -98,10 +123,39 @@ fn parse_markdown_snippet_raw(input: &str) -> IResult<&str, GenerateMarkdownArgs
))
}

/// nom parser for <!-- semconv {id}({args}) -->
fn parse_markdown_snippet_raw(input: &str) -> IResult<&str, GenerateMarkdownArgs> {
let (input, snippet) = parse_html_comment(input)?;
let (remains, result) = parse_semconv_snippet_directive(snippet)?;
if remains.is_empty() {
Ok((input, result))
} else {
Err(nom::Err::Failure(ParseError::from_error_kind(
remains,
ErrorKind::IsNot,
)))
}
}

/// nom parser for <!-- endsemconv -->
fn parse_semconv_trailer(input: &str) -> IResult<&str, ()> {
let (input, snippet) = parse_html_comment(input)?;
let (snippet, _) = multispace0(snippet)?;
let (snippet, _) = tag(SEMCONV_TRAILER)(snippet)?;
let (snippet, _) = multispace0(snippet)?;
if snippet.is_empty() {
Ok((input, ()))
} else {
Err(nom::Err::Failure(ParseError::from_error_kind(
snippet,
ErrorKind::Not,
)))
}
}

/// Returns true if the line is the <!-- endsemconv --> marker for markdown snippets.
pub fn is_semconv_trailer(line: &str) -> bool {
// TODO - more flexibility in what we recognize here.
line.trim() == SEMCONV_TRAILER
matches!(parse_semconv_trailer(line), Ok((rest, _)) if rest.trim().is_empty())
}

/// Returns true if the line begins a markdown snippet directive and needs tobe parsed.
Expand Down Expand Up @@ -130,6 +184,10 @@ mod tests {
fn recognizes_trailer() {
assert!(is_semconv_trailer("<!-- endsemconv -->"));
assert!(!is_semconv_trailer("<!-- endsemconvded -->"));
// Add whitespace friendly versions
assert!(is_semconv_trailer("<!--endsemconv-->"));
assert!(is_semconv_trailer("<!-- endsemconv-->"));
assert!(is_semconv_trailer("<!--endsemconv -->"));
}

#[test]
Expand Down Expand Up @@ -157,6 +215,10 @@ mod tests {
assert!(!is_markdown_snippet_directive(
"<!-- other semconv stuff -->"
));
// Test ignoring whitespace
assert!(is_markdown_snippet_directive("<!-- semconv stuff-->"));
assert!(is_markdown_snippet_directive("<!--semconv stuff -->"));
assert!(is_markdown_snippet_directive("<!--semconv stuff-->"));
}

#[test]
Expand Down Expand Up @@ -188,6 +250,9 @@ mod tests {
.iter()
.any(|v| v == &MarkdownGenParameters::Tag("tech-specific-rabbitmq".into())));

let result = parse_markdown_snippet_directive("<!--semconv stuff-->")?;
assert_eq!(result.id, "stuff");

Ok(())
}
}