From 230a23ecbe08c6c987dccb30958d4ff16495afc3 Mon Sep 17 00:00:00 2001 From: Josh Suereth Date: Wed, 4 Dec 2024 09:18:24 -0500 Subject: [PATCH 1/4] Add failing tests and better printlns to discover the issues. --- crates/weaver_semconv_gen/src/parser.rs | 31 ++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/crates/weaver_semconv_gen/src/parser.rs b/crates/weaver_semconv_gen/src/parser.rs index e62ef907..4776f1d8 100644 --- a/crates/weaver_semconv_gen/src/parser.rs +++ b/crates/weaver_semconv_gen/src/parser.rs @@ -113,9 +113,16 @@ pub fn is_markdown_snippet_directive(line: &str) -> bool { pub fn parse_markdown_snippet_directive(line: &str) -> Result { match parse_markdown_snippet_raw(line) { Ok((rest, result)) if rest.trim().is_empty() => Ok(result), - _ => Err(Error::InvalidSnippetHeader { - header: line.to_owned(), - }), + Ok((rest, _)) => { + println!("Failed to parse {line}, leftover: [{rest}]"); + Err(Error::InvalidSnippetHeader { header: line.to_owned() }) + }, + Err(e) => { + println!("Failed to parse {line}, errors: [{e}]"); + Err(Error::InvalidSnippetHeader { + header: line.to_owned(), + }) + }, } } @@ -130,6 +137,10 @@ mod tests { fn recognizes_trailer() { assert!(is_semconv_trailer("")); assert!(!is_semconv_trailer("")); + // Add whitespace friendly versions + assert!(is_semconv_trailer("")); + assert!(is_semconv_trailer("")); + assert!(is_semconv_trailer("")); } #[test] @@ -157,6 +168,16 @@ mod tests { assert!(!is_markdown_snippet_directive( "" )); + // Test ignoring whitespace + assert!(is_markdown_snippet_directive( + "" + )); + assert!(is_markdown_snippet_directive( + "" + )); + assert!(is_markdown_snippet_directive( + "" + )); } #[test] @@ -188,6 +209,10 @@ mod tests { .iter() .any(|v| v == &MarkdownGenParameters::Tag("tech-specific-rabbitmq".into()))); + let result = + parse_markdown_snippet_directive("")?; + assert_eq!(result.id, "stuff"); + Ok(()) } } From b8d7626e78242250cc1420cc9d5444bd5e87fc5f Mon Sep 17 00:00:00 2001 From: Josh Suereth Date: Fri, 6 Dec 2024 11:26:06 -0500 Subject: [PATCH 2/4] Increase flexibility of HTML comment parsing. --- crates/weaver_semconv_gen/src/parser.rs | 64 +++++++++++++++++++------ 1 file changed, 50 insertions(+), 14 deletions(-) diff --git a/crates/weaver_semconv_gen/src/parser.rs b/crates/weaver_semconv_gen/src/parser.rs index 4776f1d8..9b53ffb1 100644 --- a/crates/weaver_semconv_gen/src/parser.rs +++ b/crates/weaver_semconv_gen/src/parser.rs @@ -5,6 +5,9 @@ use crate::Error; use crate::GenerateMarkdownArgs; use crate::MarkdownGenParameters; +use nom::bytes::complete::take_until; +use nom::error::ErrorKind; +use nom::error::ParseError; use nom::multi::many0_count; use nom::{ branch::alt, @@ -16,8 +19,11 @@ use nom::{ IResult, }; +/// exact string we expect for starting a semconv snippet. +const SEMCONV_HEADER: &str = "semconv"; /// exact string we expect for ending a semconv snippet. -const SEMCONV_TRAILER: &str = ""; +const SEMCONV_TRAILER: &str = "endsemconv"; + /// nom parser for tag values. fn parse_value(input: &str) -> IResult<&str, &str> { @@ -78,30 +84,60 @@ fn parse_id(input: &str) -> IResult<&str, &str> { ))(input) } -/// nom parser for -fn parse_markdown_snippet_raw(input: &str) -> IResult<&str, GenerateMarkdownArgs> { +/// nom parser for HTML comments: ` +fn parse_html_comment(input: &str) -> IResult<&str, &str> { + // Comments must have the following format: + // The string "", or "--!>", nor end with the string "")(input)?; + // The string "-->". + let (input, _) = tag("-->")(input)?; + Ok((input, result)) +} + +/// Parses the semantic convention header and directives for markdown generation. +fn parse_semconv_snippet_directive(input: &str) -> IResult<&str, GenerateMarkdownArgs> { let (input, _) = multispace0(input)?; - let (input, _) = tag("semconv")(input)?; + let (input, _) = tag(SEMCONV_HEADER)(input)?; let (input, _) = multispace0(input)?; let (input, id) = parse_id(input)?; let (input, opt_args) = opt(parse_markdown_gen_parameters)(input)?; let (input, _) = multispace0(input)?; - let (input, _) = tag("-->")(input)?; + Ok((input, GenerateMarkdownArgs { + id: id.to_owned(), + args: opt_args.unwrap_or(Vec::new()), + },)) +} - Ok(( - input, - GenerateMarkdownArgs { - id: id.to_owned(), - args: opt_args.unwrap_or(Vec::new()), - }, - )) +/// nom parser for +fn parse_markdown_snippet_raw(input: &str) -> IResult<&str, GenerateMarkdownArgs> { + let (input, snippet) = parse_html_comment(input)?; + let (remains, result) = parse_semconv_snippet_directive(snippet)?; + if remains.is_empty() { + Ok((input, result)) + } else { + Err(nom::Err::Failure(ParseError::from_error_kind(remains, ErrorKind::IsNot))) + } +} + +/// nom parser for +fn parse_semconv_trailer(input: &str) -> IResult<&str, ()> { + let (input, snippet) = parse_html_comment(input)?; + let (snippet, _) = multispace0(snippet)?; + let (snippet, _) = tag(SEMCONV_TRAILER)(snippet)?; + let (snippet, _) = multispace0(snippet)?; + if snippet.is_empty() { + Ok((input, ())) + } else { + Err(nom::Err::Failure(ParseError::from_error_kind(snippet, ErrorKind::Not))) + } + } /// Returns true if the line is the marker for markdown snippets. pub fn is_semconv_trailer(line: &str) -> bool { - // TODO - more flexibility in what we recognize here. - line.trim() == SEMCONV_TRAILER + matches!(parse_semconv_trailer(line), Ok((rest, _)) if rest.trim().is_empty()) } /// Returns true if the line begins a markdown snippet directive and needs tobe parsed. From cbc790ea15c56079639b1a141df310da4dac24f7 Mon Sep 17 00:00:00 2001 From: Josh Suereth Date: Fri, 6 Dec 2024 11:30:00 -0500 Subject: [PATCH 3/4] Fix clippy/fmt --- crates/weaver_semconv_gen/src/parser.rs | 51 +++++++++++-------------- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/crates/weaver_semconv_gen/src/parser.rs b/crates/weaver_semconv_gen/src/parser.rs index 9b53ffb1..9c8431b5 100644 --- a/crates/weaver_semconv_gen/src/parser.rs +++ b/crates/weaver_semconv_gen/src/parser.rs @@ -24,7 +24,6 @@ const SEMCONV_HEADER: &str = "semconv"; /// exact string we expect for ending a semconv snippet. const SEMCONV_TRAILER: &str = "endsemconv"; - /// nom parser for tag values. fn parse_value(input: &str) -> IResult<&str, &str> { recognize(pair( @@ -104,10 +103,13 @@ fn parse_semconv_snippet_directive(input: &str) -> IResult<&str, GenerateMarkdow let (input, id) = parse_id(input)?; let (input, opt_args) = opt(parse_markdown_gen_parameters)(input)?; let (input, _) = multispace0(input)?; - Ok((input, GenerateMarkdownArgs { - id: id.to_owned(), - args: opt_args.unwrap_or(Vec::new()), - },)) + Ok(( + input, + GenerateMarkdownArgs { + id: id.to_owned(), + args: opt_args.unwrap_or(Vec::new()), + }, + )) } /// nom parser for @@ -117,7 +119,10 @@ fn parse_markdown_snippet_raw(input: &str) -> IResult<&str, GenerateMarkdownArgs if remains.is_empty() { Ok((input, result)) } else { - Err(nom::Err::Failure(ParseError::from_error_kind(remains, ErrorKind::IsNot))) + Err(nom::Err::Failure(ParseError::from_error_kind( + remains, + ErrorKind::IsNot, + ))) } } @@ -130,9 +135,11 @@ fn parse_semconv_trailer(input: &str) -> IResult<&str, ()> { if snippet.is_empty() { Ok((input, ())) } else { - Err(nom::Err::Failure(ParseError::from_error_kind(snippet, ErrorKind::Not))) + Err(nom::Err::Failure(ParseError::from_error_kind( + snippet, + ErrorKind::Not, + ))) } - } /// Returns true if the line is the marker for markdown snippets. @@ -149,16 +156,9 @@ pub fn is_markdown_snippet_directive(line: &str) -> bool { pub fn parse_markdown_snippet_directive(line: &str) -> Result { match parse_markdown_snippet_raw(line) { Ok((rest, result)) if rest.trim().is_empty() => Ok(result), - Ok((rest, _)) => { - println!("Failed to parse {line}, leftover: [{rest}]"); - Err(Error::InvalidSnippetHeader { header: line.to_owned() }) - }, - Err(e) => { - println!("Failed to parse {line}, errors: [{e}]"); - Err(Error::InvalidSnippetHeader { - header: line.to_owned(), - }) - }, + _ => Err(Error::InvalidSnippetHeader { + header: line.to_owned(), + }), } } @@ -205,15 +205,9 @@ mod tests { "" )); // Test ignoring whitespace - assert!(is_markdown_snippet_directive( - "" - )); - assert!(is_markdown_snippet_directive( - "" - )); - assert!(is_markdown_snippet_directive( - "" - )); + assert!(is_markdown_snippet_directive("")); + assert!(is_markdown_snippet_directive("")); + assert!(is_markdown_snippet_directive("")); } #[test] @@ -245,8 +239,7 @@ mod tests { .iter() .any(|v| v == &MarkdownGenParameters::Tag("tech-specific-rabbitmq".into()))); - let result = - parse_markdown_snippet_directive("")?; + let result = parse_markdown_snippet_directive("")?; assert_eq!(result.id, "stuff"); Ok(()) From 70fa4d2009c11c929b704a8ba7e7ecc84cb0b025 Mon Sep 17 00:00:00 2001 From: Bonnie Date: Sun, 8 Dec 2024 02:35:43 -0500 Subject: [PATCH 4/4] Add warning message for incorrect format --- crates/weaver_semconv_gen/src/parser.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/crates/weaver_semconv_gen/src/parser.rs b/crates/weaver_semconv_gen/src/parser.rs index 9c8431b5..aa12ece4 100644 --- a/crates/weaver_semconv_gen/src/parser.rs +++ b/crates/weaver_semconv_gen/src/parser.rs @@ -90,6 +90,17 @@ fn parse_html_comment(input: &str) -> IResult<&str, &str> { let (input, _) = tag("", or "--!>", nor end with the string "")(input)?; + + // Check for spacing issues and warn if found + if result.starts_with("semconv") && !input.trim().starts_with("semconv ") { + // Missing space after "" + eprintln!("Warning: Missing space before '-->' in semconv comment: {}", result); + } + // The string "-->". let (input, _) = tag("-->")(input)?; Ok((input, result))