diff --git a/CosmosDBShell.Tests/Shell/HighlighterTests.cs b/CosmosDBShell.Tests/Shell/HighlighterTests.cs index 1c8b8e1..31223ec 100644 --- a/CosmosDBShell.Tests/Shell/HighlighterTests.cs +++ b/CosmosDBShell.Tests/Shell/HighlighterTests.cs @@ -116,6 +116,81 @@ public void TestInterpolatedStringHighlight() Assert.Equal("echo", segs[0].Text.Trim()); } + [Fact] + public void TestInterpolatedExpressionDoesNotDuplicateText() + { + // Regression: nested expressions inside $(...) carry positions from a separate + // sub-Lexer, so recursing into them while indexing this.text used to smear + // characters from the start of the line into the rendered output. The full + // visible text must round-trip exactly through the highlighter. + var input = "echo \"$(3+5)\"\"$(3+5)\""; + var highlighter = (IHighlighter)ShellInterpreter.Instance; + + var res = highlighter.BuildHighlightedText(input) as Markup; + Assert.NotNull(res); + var rendered = string.Concat(res.GetSegments(AnsiConsole.Console).Select(s => s.Text)); + + Assert.Equal(input, rendered); + } + + [Fact] + public void TestInterpolatedExpressionContentsAreColoredAsExpression() + { + // The '+' inside $( ... ) should be rendered with the operator color rather + // than being merged into the surrounding string-literal coloring. Spectre + // collapses adjacent segments that share a style, so any character whose + // color does not match the surrounding literal color must end up on its own + // segment — that is exactly what we want to verify. + var highlighter = (IHighlighter)ShellInterpreter.Instance; + + var res = highlighter.BuildHighlightedText("echo \"$(3+5)\"") as Markup; + Assert.NotNull(res); + var segs = res.GetSegments(AnsiConsole.Console).ToList(); + + var plusSeg = segs.FirstOrDefault(s => s.Text == "+"); + Assert.NotNull(plusSeg); + + var quotedSeg = segs.FirstOrDefault(s => s.Text.Contains("\"")); + Assert.NotNull(quotedSeg); + Assert.NotEqual(quotedSeg.Style.Foreground, plusSeg.Style.Foreground); + } + + [Fact] + public void TestInterpolatedVariableIsColoredSeparately() + { + // $name inside an interpolated string should be rendered as a variable + // reference, not lumped together with the quoted text. + var highlighter = (IHighlighter)ShellInterpreter.Instance; + + var input = "echo \"Hello $name!\""; + var res = highlighter.BuildHighlightedText(input) as Markup; + Assert.NotNull(res); + + var rendered = string.Concat(res.GetSegments(AnsiConsole.Console).Select(s => s.Text)); + Assert.Equal(input, rendered); + + var segs = res.GetSegments(AnsiConsole.Console).ToList(); + Assert.Contains(segs, s => s.Text == "$name"); + } + + [Fact] + public void TestInterpolatedExpressionWithEscapesRoundTrips() + { + // The cooked content of an interpolated string collapses escape sequences + // (e.g. \" -> "), so an inner Lexer that walks the cooked text would emit + // token positions that drift relative to the outer source. Verify that an + // interpolation containing an inner string literal with a backslash escape + // still renders the visible characters in their original positions. + var highlighter = (IHighlighter)ShellInterpreter.Instance; + + var input = "echo \"$( \\\"a\\nb\\\" )\""; + var res = highlighter.BuildHighlightedText(input) as Markup; + Assert.NotNull(res); + + var rendered = string.Concat(res.GetSegments(AnsiConsole.Console).Select(s => s.Text)); + Assert.Equal(input, rendered); + } + [Fact] public void TestExpressionHighlight() { diff --git a/CosmosDBShell.Tests/UtilTest/JsonOutputHighlighterTests.cs b/CosmosDBShell.Tests/UtilTest/JsonOutputHighlighterTests.cs new file mode 100644 index 0000000..d377f93 --- /dev/null +++ b/CosmosDBShell.Tests/UtilTest/JsonOutputHighlighterTests.cs @@ -0,0 +1,90 @@ +// ------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +// ------------------------------------------------------------ + +using System.Text.Json; + +using Azure.Data.Cosmos.Shell.Core; + +namespace CosmosShell.Tests.UtilTest; + +public class JsonOutputHighlighterTests +{ + [Fact] + public void Primitives_AreColoredByType() + { + var element = JsonSerializer.Deserialize("{ \"name\": \"alice\", \"age\": 42, \"active\": true, \"nick\": null }"); + + var markup = JsonOutputHighlighter.BuildMarkup(element); + + // Property name uses the JSON property color (cyan). + Assert.Contains("[cyan]\"name\"[/]", markup); + + // Each value type uses its dedicated helper from Theme. + Assert.Contains("[violet]\"alice\"[/]", markup); + Assert.Contains("[violet]42[/]", markup); + Assert.Contains("[violet]true[/]", markup); + Assert.Contains("[violet]null[/]", markup); + + // Outer braces use the depth-0 bracket color; comma and colon use the + // shared punctuation color. + var depth0 = Theme.GetBracketColor(0); + Assert.Contains($"[{depth0}]{{[/]", markup); + Assert.Contains($"[{depth0}]}}[/]", markup); + Assert.Contains("[yellow]:[/]", markup); + Assert.Contains("[yellow],[/]", markup); + } + + [Fact] + public void NestedObjectsAndArrays_AreIndented() + { + var element = JsonSerializer.Deserialize("{ \"items\": [1, 2] }"); + + var markup = JsonOutputHighlighter.BuildMarkup(element); + + // Two-space indentation matching Utf8JsonWriter(Indented=true). + Assert.Contains("\n [cyan]\"items\"[/]", markup); + Assert.Contains("\n [violet]1[/]", markup); + Assert.Contains("\n [violet]2[/]", markup); + } + + [Fact] + public void EmptyObjectAndArray_RenderInline() + { + var emptyObject = JsonSerializer.Deserialize("{}"); + var emptyArray = JsonSerializer.Deserialize("[]"); + + var depth0 = Theme.GetBracketColor(0); + Assert.Equal($"[{depth0}]{{[/][{depth0}]}}[/]", JsonOutputHighlighter.BuildMarkup(emptyObject)); + Assert.Equal($"[{depth0}][[[/][{depth0}]]][/]", JsonOutputHighlighter.BuildMarkup(emptyArray)); + } + + [Fact] + public void StringValues_AreJsonAndMarkupEscaped() + { + var element = JsonSerializer.Deserialize("{ \"q\": \"a\\\"b\" }"); + + var markup = JsonOutputHighlighter.BuildMarkup(element); + + // The embedded quote stays JSON-escaped inside the markup token. + Assert.Contains("[violet]\"a\\u0022b\"[/]", markup); + } + + [Fact] + public void NestedBrackets_CycleColorsByDepth() + { + // Depth 0 -> '{', depth 1 -> '[', depth 2 -> '{' (next nested object). + var element = JsonSerializer.Deserialize("{ \"a\": [ { \"b\": 1 } ] }"); + + var markup = JsonOutputHighlighter.BuildMarkup(element); + + Assert.Contains($"[{Theme.GetBracketColor(0)}]{{[/]", markup); + Assert.Contains($"[{Theme.GetBracketColor(1)}][[[/]", markup); + Assert.Contains($"[{Theme.GetBracketColor(2)}]{{[/]", markup); + + // Closing brackets should use the same color as their matching opener. + Assert.Contains($"[{Theme.GetBracketColor(2)}]}}[/]", markup); + Assert.Contains($"[{Theme.GetBracketColor(1)}]]][/]", markup); + Assert.Contains($"[{Theme.GetBracketColor(0)}]}}[/]", markup); + } +} diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/JsonOutputHighlighter.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/JsonOutputHighlighter.cs new file mode 100644 index 0000000..5f2bf9c --- /dev/null +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/JsonOutputHighlighter.cs @@ -0,0 +1,139 @@ +// ------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +// ------------------------------------------------------------ +namespace Azure.Data.Cosmos.Shell.Core; + +using System.Text; +using System.Text.Json; +using Spectre.Console; + +/// +/// Produces a Spectre.Console markup string for a , applying the +/// JSON colors defined in . The resulting layout matches the indented +/// output produced by with Indented = true. +/// +internal static class JsonOutputHighlighter +{ + private const int IndentSize = 2; + + public static string BuildMarkup(JsonElement element) + { + var sb = new StringBuilder(); + WriteValue(sb, element, indent: 0); + return sb.ToString(); + } + + private static void WriteValue(StringBuilder sb, JsonElement element, int indent) + { + switch (element.ValueKind) + { + case JsonValueKind.Object: + WriteObject(sb, element, indent); + break; + case JsonValueKind.Array: + WriteArray(sb, element, indent); + break; + case JsonValueKind.String: + sb.Append(Theme.FormatJsonString(EncodeJsonString(element.GetString() ?? string.Empty))); + break; + case JsonValueKind.Number: + sb.Append(Theme.FormatJsonNumber(element.GetRawText())); + break; + case JsonValueKind.True: + case JsonValueKind.False: + sb.Append(Theme.FormatJsonBoolean(element.GetRawText())); + break; + case JsonValueKind.Null: + sb.Append(Theme.FormatJsonNull("null")); + break; + default: + sb.Append(Markup.Escape(element.GetRawText())); + break; + } + } + + private static void WriteObject(StringBuilder sb, JsonElement element, int indent) + { + var enumerator = element.EnumerateObject(); + if (!enumerator.MoveNext()) + { + sb.Append(Theme.FormatBracket("{", indent)); + sb.Append(Theme.FormatBracket("}", indent)); + return; + } + + sb.Append(Theme.FormatBracket("{", indent)); + sb.Append('\n'); + + var first = true; + do + { + if (!first) + { + sb.Append(Theme.FormatJsonBracket(",")); + sb.Append('\n'); + } + + first = false; + + AppendIndent(sb, indent + 1); + sb.Append(Theme.FormatJsonProperty(EncodeJsonString(enumerator.Current.Name))); + sb.Append(Theme.FormatJsonBracket(":")); + sb.Append(' '); + WriteValue(sb, enumerator.Current.Value, indent + 1); + } + while (enumerator.MoveNext()); + + sb.Append('\n'); + AppendIndent(sb, indent); + sb.Append(Theme.FormatBracket("}", indent)); + } + + private static void WriteArray(StringBuilder sb, JsonElement element, int indent) + { + var enumerator = element.EnumerateArray(); + if (!enumerator.MoveNext()) + { + sb.Append(Theme.FormatBracket("[", indent)); + sb.Append(Theme.FormatBracket("]", indent)); + return; + } + + sb.Append(Theme.FormatBracket("[", indent)); + sb.Append('\n'); + + var first = true; + do + { + if (!first) + { + sb.Append(Theme.FormatJsonBracket(",")); + sb.Append('\n'); + } + + first = false; + + AppendIndent(sb, indent + 1); + WriteValue(sb, enumerator.Current, indent + 1); + } + while (enumerator.MoveNext()); + + sb.Append('\n'); + AppendIndent(sb, indent); + sb.Append(Theme.FormatBracket("]", indent)); + } + + private static void AppendIndent(StringBuilder sb, int level) + { + sb.Append(' ', level * IndentSize); + } + + /// + /// Serializes the value as a JSON string literal (with surrounding quotes and JSON escapes) + /// so that embedded quotes, backslashes, and control characters render correctly. + /// + private static string EncodeJsonString(string value) + { + return JsonSerializer.Serialize(value); + } +} diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.Highlighter.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.Highlighter.cs index de6db4b..35f1ec3 100644 --- a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.Highlighter.cs +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.Highlighter.cs @@ -104,6 +104,12 @@ internal class HighlightingVisitor : IAstVisitor private int currentPosition; private string? currentCommand; + // Tracks the current paired-bracket nesting level so that '{', '[', '(' and their + // closing counterparts can be colored using . + // The counter is shared across bracket types, mirroring the rainbow bracket + // behavior found in modern editors. + private int bracketDepth; + public HighlightingVisitor(string text, ShellInterpreter interpreter) { this.text = text; @@ -294,7 +300,17 @@ public void Visit(BinaryOperatorExpression binaryOperatorExpression) public void Visit(ParensExpression parensExpression) { + // Color the parentheses using the current bracket depth so they participate + // in the same rainbow cycle as JSON braces and brackets. + var parenDepth = this.bracketDepth; + + this.AppendToken(parensExpression.LParToken, Theme.FormatBracket(parensExpression.LParToken.Value, parenDepth)); + + this.bracketDepth = parenDepth + 1; parensExpression.InnerExpression.Accept(this); + this.bracketDepth = parenDepth; + + this.AppendToken(parensExpression.RParToken, Theme.FormatBracket(parensExpression.RParToken.Value, parenDepth)); } public void Visit(JsonExpression jsonExpression) @@ -303,15 +319,21 @@ public void Visit(JsonExpression jsonExpression) var startPos = jsonExpression.Start; this.AppendUpTo(startPos); + // Color the opening brace using the current bracket depth, then increment + // so nested braces/brackets/parens use the next color in the cycle. + var braceDepth = this.bracketDepth; + // Find and highlight the opening brace var openBracePos = this.text.IndexOf('{', this.currentPosition); if (openBracePos >= 0 && openBracePos < jsonExpression.Start + jsonExpression.Length) { this.AppendUpTo(openBracePos); - this.result.Append(Theme.FormatJsonBracket("{")); + this.result.Append(Theme.FormatBracket("{", braceDepth)); this.currentPosition = openBracePos + 1; } + this.bracketDepth = braceDepth + 1; + // Process the properties foreach (var property in jsonExpression.Properties) { @@ -389,12 +411,15 @@ public void Visit(JsonExpression jsonExpression) } } + // Restore depth so the closing brace matches its opener color. + this.bracketDepth = braceDepth; + // Find and highlight the closing brace var endBracePos = this.text.LastIndexOf('}', jsonExpression.Start + jsonExpression.Length - 1); if (endBracePos >= 0 && endBracePos >= this.currentPosition) { this.AppendUpTo(endBracePos); - this.result.Append(Theme.FormatJsonBracket("}")); + this.result.Append(Theme.FormatBracket("}", braceDepth)); this.currentPosition = endBracePos + 1; } } @@ -405,15 +430,21 @@ public void Visit(JsonArrayExpression jsonArrayExpression) var startPos = jsonArrayExpression.Start; this.AppendUpTo(startPos); + // Color the opening bracket using the current bracket depth, then increment + // so nested braces/brackets/parens use the next color in the cycle. + var bracketDepthForPair = this.bracketDepth; + // Find and highlight the opening bracket var openBracketPos = this.text.IndexOf('[', this.currentPosition); if (openBracketPos >= 0 && openBracketPos < jsonArrayExpression.Start + jsonArrayExpression.Length) { this.AppendUpTo(openBracketPos); - this.result.Append(Theme.FormatJsonBracket("[")); + this.result.Append(Theme.FormatBracket("[", bracketDepthForPair)); this.currentPosition = openBracketPos + 1; } + this.bracketDepth = bracketDepthForPair + 1; + // Process each element in the array for (int i = 0; i < jsonArrayExpression.Expressions.Count; i++) { @@ -435,13 +466,16 @@ public void Visit(JsonArrayExpression jsonArrayExpression) } } + // Restore depth so the closing bracket matches its opener color. + this.bracketDepth = bracketDepthForPair; + // Find and highlight the closing bracket var closeBracketPos = this.text.IndexOf(']', this.currentPosition); if (closeBracketPos >= 0 && closeBracketPos < jsonArrayExpression.Start + jsonArrayExpression.Length) { // AppendUpTo preserves any whitespace before the closing bracket this.AppendUpTo(closeBracketPos); - this.result.Append(Theme.FormatJsonBracket("]")); + this.result.Append(Theme.FormatBracket("]", bracketDepthForPair)); this.currentPosition = closeBracketPos + 1; } @@ -495,12 +529,57 @@ public void Visit(CommandExpression commandExpression) public void Visit(InterpolatedStringExpression interpolatedStringExpression) { + // The interpolated string is rendered as a string-literal background; any + // sub-expressions that carry accurate outer-source positions (variable + // references and "$(...)" interpolations whose tokens have been produced with + // the appropriate Lexer position offset) are visited in place so they pick up + // their dedicated colors. Sub-expressions whose positions don't lie within the + // interpolated string's outer span are treated as literal text — this covers + // the ConstantExpression placeholders the parser emits for raw text chunks + // between interpolations, which still carry the surrounding string token's + // position. + this.AppendUpTo(interpolatedStringExpression.Start); + + var endPos = Math.Min( + interpolatedStringExpression.Start + interpolatedStringExpression.Length, + this.text.Length); + + var interpolations = new List(); foreach (var expr in interpolatedStringExpression.Expressions) { + if (expr is ConstantExpression) + { + continue; + } + + if (expr.Start <= interpolatedStringExpression.Start || expr.Start >= endPos) + { + continue; + } + + interpolations.Add(expr); + } + + interpolations.Sort((a, b) => a.Start.CompareTo(b.Start)); + + foreach (var expr in interpolations) + { + if (expr.Start > this.currentPosition) + { + var chunk = this.text.Substring(this.currentPosition, expr.Start - this.currentPosition); + this.result.Append(Theme.FormatStringLiteral(chunk)); + this.currentPosition = expr.Start; + } + expr.Accept(this); } - this.AppendUpTo(interpolatedStringExpression.Start + interpolatedStringExpression.Length); + if (this.currentPosition < endPos) + { + var chunk = this.text.Substring(this.currentPosition, endPos - this.currentPosition); + this.result.Append(Theme.FormatStringLiteral(chunk)); + this.currentPosition = endPos; + } } // Statement visitors diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.cs index 7460313..594ecb5 100644 --- a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.cs +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.cs @@ -951,6 +951,21 @@ internal CommandState PrintState(CommandState state) if (state.Result?.DataType == Parser.DataType.Json) { + // When writing JSON to the terminal (not redirected to a file), apply + // syntax highlighting using the configured Spectre.Console theme. File + // redirection still receives plain text so downstream tooling and tests + // are unaffected. + if (state.OutputFormat == OutputFormat.JSon && string.IsNullOrEmpty(this.StdOutRedirect)) + { + var element = (JsonElement?)state.Result.ConvertShellObject(Parser.DataType.Json); + if (element.HasValue) + { + AnsiConsole.MarkupLine(JsonOutputHighlighter.BuildMarkup(element.Value)); + state.Result = null; + return state; + } + } + output = state.GenerateOutputText(); } else diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/Theme.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/Theme.cs index 19ae0a2..bea2164 100644 --- a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/Theme.cs +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/Theme.cs @@ -12,6 +12,19 @@ internal static class Theme { public const string CommandColor = "[lightyellow3]"; + /// + /// Colors used for paired brackets ({}, [], ()) cycled by nesting depth, similar to + /// the "bracket pair colorization" feature in modern editors. The cycle is shared + /// across bracket types so that a single visual depth counter spans every kind of + /// pair. + /// + private static readonly string[] BracketDepthColors = + { + "gold1", + "orchid", + "deepskyblue1", + }; + public static string FormatUnknownCommand(string command) { return $"[bold red]{Markup.Escape(command)}[/]"; @@ -72,6 +85,50 @@ public static string FormatJsonBracket(string text) return $"[yellow]{Markup.Escape(text)}[/]"; } + /// + /// Returns the Spectre.Console color name for a bracket at the given (zero-based) + /// nesting depth. Colors cycle when the depth exceeds the palette length. + /// + public static string GetBracketColor(int depth) + { + if (depth < 0) + { + depth = 0; + } + + return BracketDepthColors[depth % BracketDepthColors.Length]; + } + + /// + /// Formats a single bracket character ('{', '}', '[', ']', '(', ')') with the + /// depth-cycled color. Comma and colon should continue to use + /// instead. + /// + public static string FormatBracket(string text, int depth) + { + return $"[{GetBracketColor(depth)}]{Markup.Escape(text)}[/]"; + } + + public static string FormatJsonString(string text) + { + return $"[violet]{Markup.Escape(text)}[/]"; + } + + public static string FormatJsonNumber(string text) + { + return $"[violet]{Markup.Escape(text)}[/]"; + } + + public static string FormatJsonBoolean(string text) + { + return $"[violet]{Markup.Escape(text)}[/]"; + } + + public static string FormatJsonNull(string text) + { + return $"[violet]{Markup.Escape(text)}[/]"; + } + internal static string FormatStringLiteral(string text) { return $"[violet]{Markup.Escape(text)}[/]"; diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/ExpressionParser.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/ExpressionParser.cs index 50a8d95..bc2b154 100644 --- a/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/ExpressionParser.cs +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/ExpressionParser.cs @@ -671,6 +671,41 @@ private InterpolatedStringExpression ParseInterpolatedStringExpression(Token tok var content = token.Value; // The content without the quotes var position = 0; + // Mapping from each character index in the cooked content to its absolute + // position in the original outer source buffer (accounting for escape sequences). + // Available for tokens produced by the same lexer that owns this parser; falls back + // to null for synthetic interpolated string tokens produced elsewhere. + var sourceMap = this.lexer.GetInterpolatedStringSourceMap(token); + + // Returns the absolute outer source position for the given content index. When no + // mapping is available (defensive fallback) callers degrade to the surrounding + // interpolated string token's position. + int OuterPos(int contentIndex) + { + if (sourceMap != null && contentIndex >= 0 && contentIndex < sourceMap.Count) + { + return sourceMap[contentIndex]; + } + + return token.Start; + } + + // Builds a synthetic identifier token spanning a slice of the outer source text + // for sub-expressions extracted from the interpolated string (variable references + // and the like). Using accurate positions allows the syntax highlighter and other + // tooling to operate on these nodes without consulting the cooked content. + Token MakeOuterToken(string value, int contentStart, int contentEnd) + { + if (sourceMap == null || contentEnd <= contentStart) + { + return token; + } + + var startOuter = OuterPos(contentStart); + var endOuter = OuterPos(contentEnd - 1) + 1; + return new Token(TokenType.Identifier, value, startOuter, Math.Max(0, endOuter - startOuter)); + } + while (position < content.Length) { // Find the next interpolation @@ -764,17 +799,53 @@ private InterpolatedStringExpression ParseInterpolatedStringExpression(Token tok // Parse the expression if (!string.IsNullOrWhiteSpace(exprContent)) { - var exprLexer = new Lexer(exprContent); + // The cooked exprContent can drift from the outer source whenever + // the interpolated string contains escape sequences (for example a + // string literal inside the interpolation: $( "a\nb" )). To keep token + // positions from the inner lexer correct in those cases, lex the raw + // outer-source slice that produced this content instead of the cooked + // text. The slice runs from the absolute position of the first inner + // character to one past the absolute position of the last inner + // character (the position immediately before the closing ')'). + if (sourceMap != null && this.lexer.RawInput.Length > 0) + { + var rawStartOuter = OuterPos(startExprPos); + var rawEndOuter = OuterPos(position - 2) + 1; + var lexerOriginStart = this.lexer.PositionOffset; + var rawStart = rawStartOuter - lexerOriginStart; + var rawEnd = rawEndOuter - lexerOriginStart; + if (rawStart >= 0 && rawEnd >= rawStart && rawEnd <= this.lexer.RawInput.Length) + { + var rawSlice = this.lexer.RawInput.Substring(rawStart, rawEnd - rawStart); + var rawLexer = new Lexer(rawSlice, rawStartOuter); + var rawParser = new ExpressionParser(rawLexer); + var expr = rawParser.ParseExpression(); + + if (rawLexer.Errors.Count > 0) + { + this.lexer.Errors.AddRange(rawLexer.Errors); + } + + expressions.Add(expr); + continue; + } + } + + // Fallback path: no source map (synthetic token) or the raw slice could + // not be located. Lex the cooked content with a single fixed offset. + // Token positions may drift through escape sequences but stay correct + // for escape-free interpolations, which covers the common case. + var innerOffset = OuterPos(startExprPos); + var exprLexer = new Lexer(exprContent, innerOffset); var exprParser = new ExpressionParser(exprLexer); - var expr = exprParser.ParseExpression(); + var expr2 = exprParser.ParseExpression(); - // Merge nested errors if (exprLexer.Errors.Count > 0) { this.lexer.Errors.AddRange(exprLexer.Errors); } - expressions.Add(expr); + expressions.Add(expr2); } } else if (char.IsLetter(content[position]) || content[position] == '_') @@ -797,14 +868,19 @@ private InterpolatedStringExpression ParseInterpolatedStringExpression(Token tok var varName = content.Substring(startVarPos, position - startVarPos); if (!string.IsNullOrEmpty(varName)) { + // Span the synthetic token from the leading '$' through the last + // character of the variable name so the AST node carries an accurate + // outer-source range. + var varToken = MakeOuterToken(varName, dollarIndex, position); + // Check if it contains property access or array access if (varName.Contains('.') || varName.Contains('[')) { - expressions.Add(new JSonPathExpression(token, varName)); + expressions.Add(new JSonPathExpression(varToken, varName)); } else { - expressions.Add(new VariableExpression(token, varName)); + expressions.Add(new VariableExpression(varToken, varName)); } } } @@ -822,7 +898,8 @@ private InterpolatedStringExpression ParseInterpolatedStringExpression(Token tok var varName = content.Substring(startVarPos, position - startVarPos); if (!string.IsNullOrEmpty(varName)) { - expressions.Add(new VariableExpression(token, varName)); + var varToken = MakeOuterToken(varName, dollarIndex, position); + expressions.Add(new VariableExpression(varToken, varName)); } } else diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/Lexer.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/Lexer.cs index c28b170..fe10198 100644 --- a/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/Lexer.cs +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/Lexer.cs @@ -216,13 +216,34 @@ internal class Lexer { private readonly string input; private readonly Stack putBackTokens; + private readonly int positionOffset; + + // Use reference equality on the Token key. Token is a record (value equality), + // so two distinct tokens with identical Type/Value/Start/Length would otherwise + // collide as map keys. Reference equality guarantees that only the exact token + // instance produced by ReadInterpolatedString / ReadDoubleQuotedString in this + // lexer can retrieve its source map. + private readonly Dictionary interpolatedStringSourceMaps = new(ReferenceEqualityComparer.Instance); + private int position; private Token? lastToken; public Lexer(string input) + : this(input, 0) + { + } + + /// + /// Creates a lexer that reports token positions shifted by . + /// Used when lexing a substring of a larger source buffer (for example, the contents of + /// a $(...) interpolation inside an interpolated string) so that the produced + /// tokens carry positions relative to the outer buffer. + /// + public Lexer(string input, int positionOffset) { this.input = input ?? string.Empty; this.position = 0; + this.positionOffset = positionOffset; this.putBackTokens = new Stack(); this.lastToken = null; } @@ -231,6 +252,35 @@ public Lexer(string input) public ErrorList Errors { get; } = new ErrorList(); + /// + /// Gets the raw input string this lexer is reading. Combined with + /// , callers can recover the original outer-source + /// substring underlying any token position produced by this lexer. + /// + internal string RawInput => this.input; + + /// + /// Gets the position offset added to every produced token's Start value. + /// + internal int PositionOffset => this.positionOffset; + + /// + /// Returns the per-character source-position mapping recorded for a previously produced + /// interpolated string token, or null if the token did not originate from this + /// lexer or contained no mapping. Each entry is the absolute source position of the + /// corresponding character in the cooked token value, taking + /// into account. + /// + internal IReadOnlyList? GetInterpolatedStringSourceMap(Token token) + { + return this.interpolatedStringSourceMaps.TryGetValue(token, out var map) ? map : null; + } + + private Token MakeToken(TokenType type, string value, int rawStart, int length) + { + return new Token(type, value, rawStart + this.positionOffset, length); + } + public IEnumerable Tokenize() { Token? token; @@ -316,51 +366,51 @@ private static bool IsVariableIdentifierPart(char ch) { case '|': this.Advance(); - return new Token(TokenType.Pipe, "|", startPosition, 1); + return this.MakeToken(TokenType.Pipe, "|", startPosition, 1); case '(': this.Advance(); - return new Token(TokenType.OpenParenthesis, "(", startPosition, 1); + return this.MakeToken(TokenType.OpenParenthesis, "(", startPosition, 1); case ')': this.Advance(); - return new Token(TokenType.CloseParenthesis, ")", startPosition, 1); + return this.MakeToken(TokenType.CloseParenthesis, ")", startPosition, 1); case '[': this.Advance(); - return new Token(TokenType.OpenBracket, "[", startPosition, 1); + return this.MakeToken(TokenType.OpenBracket, "[", startPosition, 1); case ']': this.Advance(); - return new Token(TokenType.CloseBracket, "]", startPosition, 1); + return this.MakeToken(TokenType.CloseBracket, "]", startPosition, 1); case '{': this.Advance(); - return new Token(TokenType.OpenBrace, "{", startPosition, 1); + return this.MakeToken(TokenType.OpenBrace, "{", startPosition, 1); case '}': this.Advance(); - return new Token(TokenType.CloseBrace, "}", startPosition, 1); + return this.MakeToken(TokenType.CloseBrace, "}", startPosition, 1); case ':': this.Advance(); - return new Token(TokenType.Colon, ":", startPosition, 1); + return this.MakeToken(TokenType.Colon, ":", startPosition, 1); case ';': this.Advance(); - return new Token(TokenType.Semicolon, ";", startPosition, 1); + return this.MakeToken(TokenType.Semicolon, ";", startPosition, 1); case ',': this.Advance(); - return new Token(TokenType.Comma, ",", startPosition, 1); + return this.MakeToken(TokenType.Comma, ",", startPosition, 1); case '+': this.Advance(); - return new Token(TokenType.Plus, "+", startPosition, 1); + return this.MakeToken(TokenType.Plus, "+", startPosition, 1); case '-': this.Advance(); - return new Token(TokenType.Minus, "-", startPosition, 1); + return this.MakeToken(TokenType.Minus, "-", startPosition, 1); case '/': // Check if this might be the start of a partition key identifier (e.g., /partitionKey) @@ -373,25 +423,25 @@ private static bool IsVariableIdentifierPart(char ch) // Otherwise treat as division operator this.Advance(); - return new Token(TokenType.Divide, "/", startPosition, 1); + return this.MakeToken(TokenType.Divide, "/", startPosition, 1); case '%': this.Advance(); - return new Token(TokenType.Mod, "%", startPosition, 1); + return this.MakeToken(TokenType.Mod, "%", startPosition, 1); case '^': this.Advance(); - return new Token(TokenType.Xor, "^", startPosition, 1); + return this.MakeToken(TokenType.Xor, "^", startPosition, 1); case '!': this.Advance(); - return new Token(TokenType.Not, "!", startPosition, 1); + return this.MakeToken(TokenType.Not, "!", startPosition, 1); case '\n': case '\r': var eolStartPos = this.position; this.SkipNewline(); - return new Token(TokenType.Eol, Environment.NewLine, startPosition, this.position - eolStartPos); + return this.MakeToken(TokenType.Eol, Environment.NewLine, startPosition, this.position - eolStartPos); case '#': return this.ReadComment(startPosition); @@ -411,7 +461,7 @@ private static bool IsVariableIdentifierPart(char ch) { // Unknown character, treat as single character identifier this.Advance(); - return new Token(TokenType.Identifier, ch.ToString(), startPosition, 1); + return this.MakeToken(TokenType.Identifier, ch.ToString(), startPosition, 1); } } } @@ -424,7 +474,7 @@ private bool TryReadMultiCharacterToken(int startPosition, out Token? token) if (this.LookAhead("&&")) { this.Advance(2); - token = new Token(TokenType.And, "&&", startPosition, 2); + token = this.MakeToken(TokenType.And, "&&", startPosition, 2); return true; } @@ -432,7 +482,7 @@ private bool TryReadMultiCharacterToken(int startPosition, out Token? token) if (this.LookAhead("||")) { this.Advance(2); - token = new Token(TokenType.Or, "||", startPosition, 2); + token = this.MakeToken(TokenType.Or, "||", startPosition, 2); return true; } @@ -440,7 +490,7 @@ private bool TryReadMultiCharacterToken(int startPosition, out Token? token) if (this.LookAhead("**")) { this.Advance(2); - token = new Token(TokenType.Pow, "**", startPosition, 2); + token = this.MakeToken(TokenType.Pow, "**", startPosition, 2); return true; } @@ -448,7 +498,7 @@ private bool TryReadMultiCharacterToken(int startPosition, out Token? token) if (this.LookAhead("==")) { this.Advance(2); - token = new Token(TokenType.Equal, "==", startPosition, 2); + token = this.MakeToken(TokenType.Equal, "==", startPosition, 2); return true; } @@ -456,7 +506,7 @@ private bool TryReadMultiCharacterToken(int startPosition, out Token? token) if (this.LookAhead("!=")) { this.Advance(2); - token = new Token(TokenType.NotEqual, "!=", startPosition, 2); + token = this.MakeToken(TokenType.NotEqual, "!=", startPosition, 2); return true; } @@ -464,7 +514,7 @@ private bool TryReadMultiCharacterToken(int startPosition, out Token? token) if (this.LookAhead("<=")) { this.Advance(2); - token = new Token(TokenType.LessThanOrEqual, "<=", startPosition, 2); + token = this.MakeToken(TokenType.LessThanOrEqual, "<=", startPosition, 2); return true; } @@ -472,7 +522,7 @@ private bool TryReadMultiCharacterToken(int startPosition, out Token? token) if (this.LookAhead(">=")) { this.Advance(2); - token = new Token(TokenType.GreaterThanOrEqual, ">=", startPosition, 2); + token = this.MakeToken(TokenType.GreaterThanOrEqual, ">=", startPosition, 2); return true; } @@ -482,22 +532,22 @@ private bool TryReadMultiCharacterToken(int startPosition, out Token? token) { case '*': this.Advance(); - token = new Token(TokenType.Multiply, "*", startPosition, 1); + token = this.MakeToken(TokenType.Multiply, "*", startPosition, 1); return true; case '=': this.Advance(); - token = new Token(TokenType.Assignment, "=", startPosition, 1); + token = this.MakeToken(TokenType.Assignment, "=", startPosition, 1); return true; case '<': this.Advance(); - token = new Token(TokenType.LessThan, "<", startPosition, 1); + token = this.MakeToken(TokenType.LessThan, "<", startPosition, 1); return true; case '>': this.Advance(); - token = new Token(TokenType.GreaterThan, ">", startPosition, 1); + token = this.MakeToken(TokenType.GreaterThan, ">", startPosition, 1); return true; } @@ -519,7 +569,7 @@ private Token ReadComment(int startPosition) this.Advance(); } - var commentToken = new Token(TokenType.Comment, sb.ToString(), startPosition, this.position - startPosition); + var commentToken = this.MakeToken(TokenType.Comment, sb.ToString(), startPosition, this.position - startPosition); this.Comments.Add(commentToken); return commentToken; } @@ -562,7 +612,7 @@ private Token ReadIdentifier(int startPosition) } } - return new Token(TokenType.Identifier, sb.ToString(), startPosition, this.position - startPosition); + return this.MakeToken(TokenType.Identifier, sb.ToString(), startPosition, this.position - startPosition); } private Token ReadDoubleQuotedString(int startPosition) @@ -570,11 +620,17 @@ private Token ReadDoubleQuotedString(int startPosition) var sb = new StringBuilder(); bool hasInterpolation = false; + // Mirrors the source-position tracking in ReadInterpolatedString so callers can + // map cooked content indices back to absolute outer-source positions when the + // string contains "$..." interpolations. + var sourcePositions = new List(); + // Skip opening quote this.Advance(); while (this.position < this.input.Length) { + var sourcePos = this.position + this.positionOffset; var ch = this.input[this.position]; if (ch == '"') @@ -598,6 +654,7 @@ private Token ReadDoubleQuotedString(int startPosition) default: sb.Append(ch); break; } + sourcePositions.Add(sourcePos); this.Advance(); } else @@ -610,12 +667,19 @@ private Token ReadDoubleQuotedString(int startPosition) } sb.Append(ch); + sourcePositions.Add(sourcePos); this.Advance(); } } var tokenType = hasInterpolation ? TokenType.InterpolatedString : TokenType.String; - return new Token(tokenType, sb.ToString(), startPosition, this.position - startPosition); + var token = this.MakeToken(tokenType, sb.ToString(), startPosition, this.position - startPosition); + if (hasInterpolation) + { + this.interpolatedStringSourceMaps[token] = sourcePositions.ToArray(); + } + + return token; } private Token ReadSingleQuotedString(int startPosition) @@ -654,7 +718,7 @@ private Token ReadSingleQuotedString(int startPosition) } } - return new Token(TokenType.String, sb.ToString(), startPosition, this.position - startPosition); + return this.MakeToken(TokenType.String, sb.ToString(), startPosition, this.position - startPosition); } private Token ReadNumber(int startPosition) @@ -728,7 +792,7 @@ private Token ReadNumber(int startPosition) // Determine token type based on what we found var tokenType = (hasDecimalPoint || hasExponent) ? TokenType.Decimal : TokenType.Number; - return new Token(tokenType, sb.ToString(), startPosition, this.position - startPosition); + return this.MakeToken(tokenType, sb.ToString(), startPosition, this.position - startPosition); } private void SkipWhitespace() @@ -787,12 +851,21 @@ private Token ReadInterpolatedString(int startPosition) { var sb = new StringBuilder(); + // Records the absolute outer-source position of the source character that + // produced each cooked character appended to sb. Used by callers + // (notably ) + // to map indices in the cooked content back to positions in the original + // input, which is required for syntax highlighting of nested + // $(...) interpolations and $VAR references. + var sourcePositions = new List(); + // Skip the '$' and opening quote this.Advance(); // skip $ this.Advance(); // skip " while (this.position < this.input.Length) { + var sourcePos = this.position + this.positionOffset; var ch = this.input[this.position]; if (ch == '"') @@ -818,12 +891,14 @@ private Token ReadInterpolatedString(int startPosition) default: sb.Append(ch); break; } + sourcePositions.Add(sourcePos); this.Advance(); } else if (ch == '{' && this.position + 1 < this.input.Length && this.input[this.position + 1] == '{') { // Handle escaped opening brace {{ sb.Append('{'); + sourcePositions.Add(sourcePos); this.Advance(); // skip first { this.Advance(); // skip second { } @@ -831,6 +906,7 @@ private Token ReadInterpolatedString(int startPosition) { // Handle escaped closing brace }} sb.Append('}'); + sourcePositions.Add(sourcePos); this.Advance(); // skip first } this.Advance(); // skip second } } @@ -838,10 +914,13 @@ private Token ReadInterpolatedString(int startPosition) { // Regular character (including interpolation expressions) sb.Append(ch); + sourcePositions.Add(sourcePos); this.Advance(); } } - return new Token(TokenType.InterpolatedString, sb.ToString(), startPosition, this.position - startPosition); + var token = this.MakeToken(TokenType.InterpolatedString, sb.ToString(), startPosition, this.position - startPosition); + this.interpolatedStringSourceMaps[token] = sourcePositions.ToArray(); + return token; } } \ No newline at end of file