diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e3ced6f..6e7ace2 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -18,7 +18,7 @@ jobs:
 
       - name: Set up monorepo overrides
         run: |
-          for pkg in rumil_parsers rumil_expressions; do
+          for pkg in rumil_parsers rumil_expressions rumil_tokens; do
           cat > $pkg/pubspec_overrides.yaml <<'EOF'
           dependency_overrides:
             rumil:
@@ -37,7 +37,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          for pkg in rumil rumil_codec rumil_parsers rumil_expressions rumil_codec_builder; do
+          for pkg in rumil rumil_codec rumil_parsers rumil_expressions rumil_codec_builder rumil_tokens; do
             (cd $pkg && dart pub get)
           done
 
@@ -47,7 +47,7 @@ jobs:
 
       - name: Analyze
         run: |
-          for pkg in rumil rumil_codec rumil_parsers rumil_expressions rumil_codec_builder; do
+          for pkg in rumil rumil_codec rumil_parsers rumil_expressions rumil_codec_builder rumil_tokens; do
             echo "=== $pkg ==="
             (cd $pkg && dart analyze --fatal-infos)
           done
@@ -63,7 +63,7 @@ jobs:
 
       - name: Set up monorepo overrides
         run: |
-          for pkg in rumil_parsers rumil_expressions; do
+          for pkg in rumil_parsers rumil_expressions rumil_tokens; do
           cat > $pkg/pubspec_overrides.yaml <<'EOF'
           dependency_overrides:
             rumil:
@@ -82,12 +82,12 @@ jobs:
 
       - name: Install dependencies
         run: |
-          for pkg in rumil rumil_codec rumil_parsers rumil_expressions rumil_codec_builder; do
+          for pkg in rumil rumil_codec rumil_parsers rumil_expressions rumil_codec_builder rumil_tokens; do
             (cd $pkg && dart pub get)
           done
 
       - name: Check formatting
-        run: dart format --output=none --set-exit-if-changed rumil rumil_codec rumil_parsers rumil_expressions rumil_codec_builder
+        run: dart format --output=none --set-exit-if-changed rumil rumil_codec rumil_parsers rumil_expressions rumil_codec_builder rumil_tokens
 
   test:
     name: Test
@@ -100,7 +100,7 @@ jobs:
 
       - name: Set up monorepo overrides
         run: |
-          for pkg in rumil_parsers rumil_expressions; do
+          for pkg in rumil_parsers rumil_expressions rumil_tokens; do
           cat > $pkg/pubspec_overrides.yaml <<'EOF'
           dependency_overrides:
             rumil:
@@ -110,7 +110,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          for pkg in rumil rumil_codec rumil_parsers rumil_expressions; do
+          for pkg in rumil rumil_codec rumil_parsers rumil_expressions rumil_tokens; do
             (cd $pkg && dart pub get)
           done
 
@@ -130,6 +130,10 @@ jobs:
         working-directory: rumil_expressions
         run: dart test
 
+      - name: Test rumil_tokens
+        working-directory: rumil_tokens
+        run: dart test
+
   doc:
     name: Documentation
     runs-on: ubuntu-latest
@@ -141,7 +145,7 @@ jobs:
 
       - name: Set up monorepo overrides
         run: |
-          for pkg in rumil_parsers rumil_expressions; do
+          for pkg in rumil_parsers rumil_expressions rumil_tokens; do
           cat > $pkg/pubspec_overrides.yaml <<'EOF'
           dependency_overrides:
             rumil:
@@ -151,13 +155,13 @@ jobs:
 
       - name: Install dependencies
         run: |
-          for pkg in rumil rumil_codec rumil_parsers rumil_expressions; do
+          for pkg in rumil rumil_codec rumil_parsers rumil_expressions rumil_tokens; do
             (cd $pkg && dart pub get)
           done
 
       - name: Generate docs
         run: |
-          for pkg in rumil rumil_codec; do
+          for pkg in rumil rumil_codec rumil_tokens; do
             echo "=== $pkg ==="
             (cd $pkg && dart doc --validate-links)
           done
diff --git a/rumil/CHANGELOG.md b/rumil/CHANGELOG.md
index dccd0d1..a53c7ee 100644
--- a/rumil/CHANGELOG.md
+++ b/rumil/CHANGELOG.md
@@ -1,3 +1,13 @@
+## 0.6.0
+
+Synchronized release across all rumil-dart packages. Additive for
+`rumil`.
+
+- `position()` primitive: a zero-width parser that yields the current
+  byte offset. Combines with `Zip` for span capture:
+  `position().zip(p).zip(position())` produces `((start, value), end)`
+  in one pass.
+
 ## 0.5.0
 
 **Interpreter optimizations and API refinements.**
diff --git a/rumil/lib/src/interpreter.dart b/rumil/lib/src/interpreter.dart
index da7b38a..b401333 100644
--- a/rumil/lib/src/interpreter.dart
+++ b/rumil/lib/src/interpreter.dart
@@ -317,6 +317,9 @@ Result<E, A> interpretI<E, A>(Parser<E, A> parser, ParserState state) {
           loc,
         );
 
+      case GetPosition():
+        return Success<E, A>(state.offset as A, 0);
+
       case Mapped<E, dynamic, A>():
         return _runTrampoline<E, A>(p, state);
 
diff --git a/rumil/lib/src/location.dart b/rumil/lib/src/location.dart
index 3b4ea70..6719e03 100644
--- a/rumil/lib/src/location.dart
+++ b/rumil/lib/src/location.dart
@@ -11,7 +11,7 @@ final class Location {
   /// 0-indexed byte offset from start of input.
   final int offset;
 
-  /// Creates a location at [offset] within [input].
+  /// Creates a location at [offset] within the given input string.
   const Location(this._input, this.offset);
 
   /// The start of input: line 1, column 1, offset 0.
diff --git a/rumil/lib/src/parser.dart b/rumil/lib/src/parser.dart
index b8a3292..80edd38 100644
--- a/rumil/lib/src/parser.dart
+++ b/rumil/lib/src/parser.dart
@@ -99,6 +99,19 @@ final class Eof<E> extends Parser<E, void> {
   bool get isSimple => true;
 }
 
+/// Succeeds without consuming input, yielding the current byte offset.
+///
+/// Use via [position] in `primitives.dart`. Typically wrapped into span
+/// tracking: `position().zip(p).zip(position())` gives the start offset,
+/// the parsed value, and the end offset in one pass.
+final class GetPosition<E> extends Parser<E, int> {
+  /// Creates a position-reading parser.
+  const GetPosition();
+
+  @override
+  bool get isSimple => true;
+}
+
 // ---------------------------------------------------------------------------
 // Composition
 // ---------------------------------------------------------------------------
diff --git a/rumil/lib/src/primitives.dart b/rumil/lib/src/primitives.dart
index 2f5a0aa..8041835 100644
--- a/rumil/lib/src/primitives.dart
+++ b/rumil/lib/src/primitives.dart
@@ -112,6 +112,19 @@ Parser<ParseError, String> symbol(String s) => lexeme(string(s));
 /// Matches end of input.
 Parser<ParseError, void> eof() => const Eof<ParseError>();
 
+/// Succeeds without consuming input, yielding the current byte offset.
+///
+/// Combines with [Zip] to capture spans around a parser:
+///
+/// ```dart
+/// final spanned = position<E>().zip(myParser).zip(position<E>());
+/// // produces (((int startOffset, A value), int endOffset))
+/// ```
+///
+/// The offset is 0-indexed. Use [Location] (via `Location(input, offset)`)
+/// when converting to line/column.
+Parser<E, int> position<E>() => GetPosition<E>();
+
 /// Defers parser construction for recursive grammars.
 Parser<E, A> defer<E, A>(Parser<E, A> Function() thunk) => Defer<E, A>(thunk);
 
diff --git a/rumil/pubspec.yaml b/rumil/pubspec.yaml
index d423ea3..59d7f3b 100644
--- a/rumil/pubspec.yaml
+++ b/rumil/pubspec.yaml
@@ -2,7 +2,7 @@ name: rumil
 description: >-
   Parser combinator library for Dart with left recursion, stack-safe
   trampolining, typed errors, lazy error construction, and sealed ADT design.
-version: 0.5.0
+version: 0.6.0
 repository: https://github.com/hakimjonas/rumil-dart
 topics:
   - parser
diff --git a/rumil/test/smoke_test.dart b/rumil/test/smoke_test.dart
index cb37181..f19b589 100644
--- a/rumil/test/smoke_test.dart
+++ b/rumil/test/smoke_test.dart
@@ -42,6 +42,30 @@ void main() {
     test('eof fails with remaining input', () {
       expectFailure(eof().run('x'));
     });
+
+    test('position at start yields 0', () {
+      final r = position<ParseError>().run('abc');
+      expect(successValue(r), 0);
+      expect((r as Success<ParseError, int>).consumed, 0);
+    });
+
+    test('position after consumption yields offset', () {
+      final r = string('abc').skipThen(position<ParseError>()).run('abcdef');
+      expect(successValue(r), 3);
+    });
+
+    test('position captures span around a parser', () {
+      final spanned = spaces()
+          .skipThen(position<ParseError>())
+          .zip(string('hello'))
+          .zip(position<ParseError>());
+      final r = spanned.run('  hello!');
+      expect(r, isA<Success<ParseError, ((int, String), int)>>());
+      final s = r as Success<ParseError, ((int, String), int)>;
+      expect(s.value.$1.$1, 2);
+      expect(s.value.$1.$2, 'hello');
+      expect(s.value.$2, 7);
+    });
   });
 
   group('Composition', () {
diff --git a/rumil_codec/CHANGELOG.md b/rumil_codec/CHANGELOG.md
index 332cbcf..1d16fb4 100644
--- a/rumil_codec/CHANGELOG.md
+++ b/rumil_codec/CHANGELOG.md
@@ -1,3 +1,8 @@
+## 0.6.0
+
+Version aligned with the rumil-dart monorepo 0.6.0 release. No
+functional changes in this package.
+
 ## 0.5.0
 
 - **New:** `dateTimeCodec` — microsecond precision, preserves UTC/local flag.
diff --git a/rumil_codec/pubspec.yaml b/rumil_codec/pubspec.yaml
index bef4dd5..fe838d7 100644
--- a/rumil_codec/pubspec.yaml
+++ b/rumil_codec/pubspec.yaml
@@ -2,7 +2,7 @@ name: rumil_codec
 description: >-
   Binary codec library for Dart with ZigZag/Varint encoding,
   composable BinaryCodec instances, and product type composition via records.
-version: 0.5.0
+version: 0.6.0
 repository: https://github.com/hakimjonas/rumil-dart
 topics:
   - codec
diff --git a/rumil_codec_builder/CHANGELOG.md b/rumil_codec_builder/CHANGELOG.md
index 819e34b..572949b 100644
--- a/rumil_codec_builder/CHANGELOG.md
+++ b/rumil_codec_builder/CHANGELOG.md
@@ -1,3 +1,8 @@
+## 0.6.0
+
+- Depends on `rumil_codec: ^0.6.0`, `rumil_parsers: ^0.6.0`. Version
+  aligned with the rumil-dart monorepo 0.6.0 release.
+
 ## 0.5.0
 
 - Depends on `rumil_codec: ^0.5.0`, `rumil_parsers: ^0.5.0`. Version aligned.
diff --git a/rumil_codec_builder/pubspec.yaml b/rumil_codec_builder/pubspec.yaml
index 71e7996..71c229e 100644
--- a/rumil_codec_builder/pubspec.yaml
+++ b/rumil_codec_builder/pubspec.yaml
@@ -2,7 +2,7 @@ name: rumil_codec_builder
 description: >-
   Code generator for rumil_codec: derives BinaryCodec implementations
   for annotated classes and sealed class hierarchies.
-version: 0.5.0
+version: 0.6.0
 repository: https://github.com/hakimjonas/rumil-dart
 topics:
   - codec
@@ -16,11 +16,11 @@ dependencies:
   build: ">=3.0.0 <5.0.0"
   source_gen: ^4.0.0
   analyzer: ">=12.0.0 <13.0.0"
-  rumil_codec: ^0.5.0
+  rumil_codec: ^0.6.0
 
 dev_dependencies:
   build_runner: ">=2.4.0 <3.0.0"
   build_test: ^3.5.0
-  rumil_parsers: ^0.5.0
+  rumil_parsers: ^0.6.0
   test: ^1.25.0
   lints: ^6.0.0
diff --git a/rumil_expressions/CHANGELOG.md b/rumil_expressions/CHANGELOG.md
index 0260af6..41c87fa 100644
--- a/rumil_expressions/CHANGELOG.md
+++ b/rumil_expressions/CHANGELOG.md
@@ -1,3 +1,8 @@
+## 0.6.0
+
+- Depends on `rumil: ^0.6.0`. Version aligned with the rumil-dart
+  monorepo 0.6.0 release. No functional changes in this package.
+
 ## 0.5.0
 
 - Depends on rumil ^0.5.0. Benefits from interpreter optimizations (5-9% AOT, 30-52% WasmGC).
diff --git a/rumil_expressions/pubspec.yaml b/rumil_expressions/pubspec.yaml
index 0e78f21..a94d1ed 100644
--- a/rumil_expressions/pubspec.yaml
+++ b/rumil_expressions/pubspec.yaml
@@ -2,7 +2,7 @@ name: rumil_expressions
 description: >-
   Formula evaluator built on Rumil: arithmetic, boolean logic, string ops,
   variables, custom functions, and precise error locations.
-version: 0.5.0
+version: 0.6.0
 repository: https://github.com/hakimjonas/rumil-dart
 topics:
   - parser
@@ -13,7 +13,7 @@ environment:
   sdk: ^3.7.0
 
 dependencies:
-  rumil: ^0.5.0
+  rumil: ^0.6.0
 
 dev_dependencies:
   test: ^1.31.0
diff --git a/rumil_parsers/CHANGELOG.md b/rumil_parsers/CHANGELOG.md
index c8e49cf..8cc21ef 100644
--- a/rumil_parsers/CHANGELOG.md
+++ b/rumil_parsers/CHANGELOG.md
@@ -1,3 +1,8 @@
+## 0.6.0
+
+- Depends on `rumil: ^0.6.0`. Version aligned with the rumil-dart
+  monorepo 0.6.0 release. No functional changes in this package.
+
 ## 0.5.0
 
 **CommonMark Markdown parser. Architecture audit. 7376 tests.**
diff --git a/rumil_parsers/pubspec.yaml b/rumil_parsers/pubspec.yaml
index d0a6df0..2ac57c1 100644
--- a/rumil_parsers/pubspec.yaml
+++ b/rumil_parsers/pubspec.yaml
@@ -2,7 +2,7 @@ name: rumil_parsers
 description: >-
   Format parsers built on Rumil: JSON, CSV, XML, TOML, YAML, Proto3, HCL,
   and CommonMark Markdown, plus typed AST decoders with ObjectAccessor pattern.
-version: 0.5.0
+version: 0.6.0
 repository: https://github.com/hakimjonas/rumil-dart
 topics:
   - parser
@@ -13,7 +13,7 @@ environment:
   sdk: ^3.7.0
 
 dependencies:
-  rumil: ^0.5.0
+  rumil: ^0.6.0
 
 dev_dependencies:
   test: ^1.31.0
diff --git a/rumil_tokens/CHANGELOG.md b/rumil_tokens/CHANGELOG.md
new file mode 100644
index 0000000..465d90c
--- /dev/null
+++ b/rumil_tokens/CHANGELOG.md
@@ -0,0 +1,52 @@
+## 0.1.0
+
+Initial in-tree cut. Source code tokenizer built on Rumil. Not
+published to pub.dev; consumed via path dependency from elsewhere
+in the monorepo.
+
+### Tokens
+
+- Sealed `Token` ADT: `Keyword`, `TypeName`, `StringLit`, `NumberLit`,
+  `Comment`, `Punctuation`, `Operator`, `Variable`, `Identifier`,
+  `Annotation`, `Whitespace`, `Plain`.
+
+### API
+
+- `tokenize(source, grammar)` returns a lossless `List<Token>`;
+  concatenating `token.text` reconstructs the source exactly.
+- `tokenizeSpans(source, grammar)` returns `List<Spanned<Token>>`
+  carrying byte offsets. Spans are half-open `[start, end)`,
+  contiguous, and anchored to `[0, source.length)`.
+- `Spanned<T extends Token>` is an extension type over
+  `(T, int, int)`. Narrow types upcast to wider ones.
+
+### Built-in grammars
+
+- `dart`, `scala`, `yaml`, `json`, `shell`.
+- `grammarFor(name)` returns the matching grammar or `null`.
+
+### `LangGrammar` fields
+
+- Lexical: `keywords`, `types`, `lineComment`, `blockComment`,
+  `stringDelimiters`, `multiLineStringDelimiters`, `annotationPrefix`,
+  `punctuationChars`, `operatorChars`, `multiCharOperators`.
+- Flags: `identifiersAllowDollar`, `rawStringPrefix`,
+  `identifierStringPrefix`, `backtickIdentifiers`, `shellVariables`,
+  `backtickCommandSubstitution`, `heredocs`.
+
+### Known limitations
+
+- YAML block scalars (`|`, `>`) tokenize the indented body as regular
+  YAML content rather than one string literal.
+- Dart string interpolation (`"$x"`, `"${expr}"`) remains one
+  `StringLit`; no structured tokens for the interpolated parts.
+- Shell braced expansions do not balance nested braces: `${x:-${y}}`
+  closes the outer expansion prematurely.
+- Heredoc body is one `StringLit`; per-component coloring is not
+  available.
+- Nested generic close (`List<Map<String, int>>`) highlights the outer
+  `>>` as the right-shift operator.
+
+### Dependencies
+
+- `rumil: ^0.6.0` for the `position()` primitive.
diff --git a/rumil_tokens/LICENSE b/rumil_tokens/LICENSE
new file mode 100644
index 0000000..cd50bba
--- /dev/null
+++ b/rumil_tokens/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 Hakim Jonas Ghoula
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/rumil_tokens/README.md b/rumil_tokens/README.md
new file mode 100644
index 0000000..033f74b
--- /dev/null
+++ b/rumil_tokens/README.md
@@ -0,0 +1,83 @@
+# rumil_tokens
+
+**Status: in-tree only. Not published to pub.dev.** Consumed via path
+dependency from other packages in the monorepo (e.g. `rem`).
+
+Source code tokenizer built on [Rumil](https://pub.dev/packages/rumil)
+parser combinators. Classifies source text into typed token spans:
+keywords, strings, comments, numbers, types, annotations, operators,
+variables, and punctuation. Token streams are lossless; concatenating
+`token.text` across a stream reconstructs the input exactly.
+
+## Usage
+
+```dart
+import 'package:rumil_tokens/rumil_tokens.dart';
+
+final tokens = tokenize('final x = 42; // answer', dart);
+for (final token in tokens) {
+  print('${token.runtimeType}: ${token.text}');
+}
+```
+
+Use a built-in grammar (`dart`, `scala`, `yaml`, `json`, `shell`) or define
+your own:
+
+```dart
+const rust = LangGrammar(
+  name: 'rust',
+  keywords: ['fn', 'let', 'mut', 'if', 'else', 'match', 'impl', 'struct'],
+  types: ['i32', 'u64', 'String', 'Vec', 'Option', 'Result', 'bool'],
+  lineComment: '//',
+  blockComment: ('/*', '*/'),
+  stringDelimiters: ['"'],
+  annotationPrefix: '#',
+);
+
+final tokens = tokenize(source, rust);
+```
+
+Look up a grammar by name:
+
+```dart
+final grammar = grammarFor('dart'); // returns null for unknown languages
+```
+
+## Lossless property
+
+Concatenating `token.text` for every token reconstructs the original source:
+
+```dart
+assert(tokens.map((t) => t.text).join() == source);
+```
+
+## Positions
+
+For tooling that needs byte offsets, use `tokenizeSpans`:
+
+```dart
+final spans = tokenizeSpans(source, dart);
+for (final s in spans) {
+  print('[${s.start}, ${s.end}) ${s.token}');
+  assert(source.substring(s.start, s.end) == s.token.text);
+}
+```
+
+`Spanned<Token>` is an extension type over `(Token, int, int)`.
+The `[start, end)` interval is half-open; spans are contiguous
+(`spans[i].end == spans[i+1].start`) and anchored (`spans.first.start == 0`,
+`spans.last.end == source.length`).
+
+## Grammar coverage
+
+Known limitations (see `CHANGELOG.md`):
+
+- YAML block scalars (`|`, `>`) tokenize the indented body as regular
+  YAML content rather than one string literal.
+- Dart string interpolation (`"$x"`, `"${expr}"`) remains one
+  `StringLit`.
+- Shell braced expansions do not balance nested braces.
+- Heredoc body is one `StringLit`.
+- Nested generic close renders the outer `>>` as right-shift.
+
+Part of the [rumil-dart](https://github.com/hakimjonas/rumil-dart) monorepo.
diff --git a/rumil_tokens/analysis_options.yaml b/rumil_tokens/analysis_options.yaml
new file mode 100644
index 0000000..b414920
--- /dev/null
+++ b/rumil_tokens/analysis_options.yaml
@@ -0,0 +1,21 @@
+include: package:lints/recommended.yaml
+
+analyzer:
+  language:
+    strict-casts: true
+    strict-inference: true
+    strict-raw-types: true
+
+linter:
+  rules:
+    - prefer_final_locals
+    - prefer_final_in_for_each
+    - prefer_const_constructors
+    - prefer_const_declarations
+    - always_declare_return_types
+    - annotate_overrides
+    - avoid_dynamic_calls
+    - prefer_expression_function_bodies
+    - unnecessary_lambdas
+    - prefer_single_quotes
+    - public_member_api_docs
diff --git a/rumil_tokens/example/example.dart b/rumil_tokens/example/example.dart
new file mode 100644
index 0000000..cfbe5b7
--- /dev/null
+++ b/rumil_tokens/example/example.dart
@@ -0,0 +1,33 @@
+import 'package:rumil_tokens/rumil_tokens.dart';
+
+void main() {
+  const source = '''
+void main() {
+  final x = 42;
+  // greeting
+  print("hello \$x");
+}
+''';
+
+  final tokens = tokenize(source, dart);
+
+  for (final token in tokens) {
+    final kind = switch (token) {
+      Keyword() => 'keyword',
+      TypeName() => 'type',
+      StringLit() => 'string',
+      NumberLit() => 'number',
+      Comment() => 'comment',
+      Annotation() => 'annotation',
+      Punctuation() => 'punct',
+      Operator() => 'op',
+      Variable() => 'var',
+      Identifier() => 'ident',
+      Whitespace() => 'ws',
+      Plain() => 'plain',
+    };
+    if (token is! Whitespace) {
+      print('$kind: ${token.text}');
+    }
+  }
+}
diff --git a/rumil_tokens/lib/rumil_tokens.dart b/rumil_tokens/lib/rumil_tokens.dart
new file mode 100644
index 0000000..c930362
--- /dev/null
+++ b/rumil_tokens/lib/rumil_tokens.dart
@@ -0,0 +1,17 @@
+/// Lossless source code tokenizer built on Rumil.
+library;
+
+// Token types
+export 'src/token.dart';
+
+// Spanned tokens (byte offsets into source).
+export 'src/spanned.dart';
+
+// Grammar definition
+export 'src/grammar.dart';
+
+// Tokenizer
+export 'src/tokenizer.dart' show tokenize, tokenizeSpans;
+
+// Built-in language grammars
+export 'src/languages.dart';
diff --git a/rumil_tokens/lib/src/grammar.dart b/rumil_tokens/lib/src/grammar.dart
new file mode 100644
index 0000000..d087431
--- /dev/null
+++ b/rumil_tokens/lib/src/grammar.dart
@@ -0,0 +1,140 @@
+/// Language grammar definitions for the tokenizer.
+library;
+
+/// Describes a language's lexical structure.
+///
+/// Grammars are plain data. The tokenizer reads a grammar and builds
+/// the combinator pipeline.
+class LangGrammar {
+  /// Language identifier (e.g. `'dart'`, `'scala'`).
+  final String name;
+
+  /// Reserved keywords (e.g. `['if', 'else', 'class']`).
+  final List<String> keywords;
+
+  /// Built-in or well-known type names (e.g. `['int', 'String']`).
+  final List<String> types;
+
+  /// Line comment prefix (e.g. `'//'`), or `null` if unsupported.
+  final String? lineComment;
+
+  /// Block comment delimiters `(open, close)`, or `null`.
+  final (String, String)? blockComment;
+
+  /// String delimiters to recognize (e.g. `['"', "'"]`).
+  final List<String> stringDelimiters;
+
+  /// Multi-line string delimiters (e.g. `['"""', "'''"]`).
+  final List<String> multiLineStringDelimiters;
+
+  /// Annotation prefix (e.g. `'@'` for Dart/Java), or `null`.
+  final String? annotationPrefix;
+
+  /// Structural punctuation: delimiters, separators, grouping characters.
+  ///
+  /// Typical contents: `()`, `{}`, `[]`, `,`, `;`, `:`, `.`. Distinct from
+  /// [operatorChars], which is reserved for value-computing operators
+  /// (`+`, `*`, `==`).
+  final String punctuationChars;
+
+  /// Multi-character operator vocabulary, matched before single-char
+  /// operators or punctuation.
+  ///
+  /// Order within the list is irrelevant; the tokenizer matches in
+  /// longest-first order. Each entry is matched as a literal string.
+  ///
+  /// Dart example: `['=>', '<=', '>=', '==', '!=', '&&', '||', '??',
+  /// '?.', '<<', '>>', '~/']`. Scala adds `'<-'`, `'->'`, `'::'`.
+  ///
+  /// Matched operators emit one [Operator] token including the full
+  /// multi-char text.
+  final List<String> multiCharOperators;
+
+  /// Single-character operator alphabet.
+  ///
+  /// Typical contents: `+`, `-`, `*`, `/`, `%`, `=`, `&`, `|`, `^`, `~`,
+  /// `!`. Characters here emit one [Operator] token each; runs do not
+  /// coalesce. Multi-character operators must be listed explicitly in
+  /// [multiCharOperators].
+  ///
+  /// When empty (e.g. JSON), no operator classification happens.
+  /// Overlaps with [punctuationChars] are resolved in favor of operators.
+  final String operatorChars;
+
+  /// Whether identifiers may contain `$`.
+  ///
+  /// Dart allows `$` in identifiers; most other languages do not. When
+  /// `false`, `$` is free to carry language-specific meaning such as a
+  /// shell variable prefix.
+  final bool identifiersAllowDollar;
+
+  /// Raw-string prefix (Dart's `'r'` for `r'no\escape'`), or `null`.
+  ///
+  /// When set, the tokenizer recognizes the single-character prefix
+  /// immediately followed by any [stringDelimiters] or
+  /// [multiLineStringDelimiters] as one [StringLit] whose text includes
+  /// the prefix. Escape sequences inside raw strings are not processed;
+  /// the body is captured verbatim up to the matching delimiter.
+  final String? rawStringPrefix;
+
+  /// Whether an identifier immediately followed by a string delimiter
+  /// forms a string with that identifier as a prefix.
+  ///
+  /// Scala's string interpolators (`s"hi $x"`, `f"$x%.2f"`, any
+  /// user-defined `foo"..."`) follow this pattern. When `true`, the
+  /// tokenizer treats `<ident>"..."` as one [StringLit] whose text
+  /// includes the identifier prefix.
+  final bool identifierStringPrefix;
+
+  /// Whether backtick-delimited identifiers are allowed (`` `type` ``).
+  ///
+  /// Scala uses this to escape keywords. When `true`, the tokenizer
+  /// recognizes `` `...` `` as one [Identifier] even when the bracketed
+  /// content would otherwise be a keyword.
+  final bool backtickIdentifiers;
+
+  /// Whether `$` introduces a variable reference (shell-style).
+  ///
+  /// When `true`, the tokenizer recognizes:
+  /// - `$NAME`: one [Variable] token including the `$`.
+  /// - `${NAME}` and `${...}` expansions: one [Variable] token, including
+  ///   braces and body up to the matching close brace.
+  /// - Bare `$` not followed by a name or `{` falls through.
+  final bool shellVariables;
+
+  /// Whether backtick-delimited command substitution is recognized
+  /// (`` `cmd` ``).
+  ///
+  /// When `true`, the tokenizer emits [Punctuation] for each backtick;
+  /// the body between them is tokenized as ordinary source.
+  final bool backtickCommandSubstitution;
+
+  /// Whether `<<` followed by a marker introduces a heredoc.
+  ///
+  /// Shell heredocs (`<<EOF`, `<<-EOF`, `<<'EOF'`) capture their body
+  /// up to a line equal to the marker. The body is emitted as one
+  /// [StringLit] token.
+  final bool heredocs;
+
+  /// Creates a language grammar.
+  const LangGrammar({
+    required this.name,
+    this.keywords = const [],
+    this.types = const [],
+    this.lineComment,
+    this.blockComment,
+    this.stringDelimiters = const ['"', "'"],
+    this.multiLineStringDelimiters = const [],
+    this.annotationPrefix,
+    this.punctuationChars = '(){}[];:,.',
+    this.operatorChars = '',
+    this.multiCharOperators = const [],
+    this.identifiersAllowDollar = false,
+    this.rawStringPrefix,
+    this.identifierStringPrefix = false,
+    this.backtickIdentifiers = false,
+    this.shellVariables = false,
+    this.backtickCommandSubstitution = false,
+    this.heredocs = false,
+  });
+}
diff --git a/rumil_tokens/lib/src/languages.dart b/rumil_tokens/lib/src/languages.dart
new file mode 100644
index 0000000..17344ce
--- /dev/null
+++ b/rumil_tokens/lib/src/languages.dart
@@ -0,0 +1,364 @@
+/// Built-in language grammars.
+library;
+
+import 'grammar.dart';
+
+/// Dart language grammar.
+const LangGrammar dart = LangGrammar(
+  name: 'dart',
+  keywords: [
+    'abstract',
+    'as',
+    'assert',
+    'async',
+    'await',
+    'base',
+    'break',
+    'case',
+    'catch',
+    'class',
+    'const',
+    'continue',
+    'covariant',
+    'default',
+    'deferred',
+    'do',
+    'else',
+    'enum',
+    'export',
+    'extends',
+    'extension',
+    'external',
+    'factory',
+    'false',
+    'final',
+    'finally',
+    'for',
+    'Function',
+    'get',
+    'hide',
+    'if',
+    'implements',
+    'import',
+    'in',
+    'interface',
+    'is',
+    'late',
+    'library',
+    'mixin',
+    'new',
+    'null',
+    'on',
+    'operator',
+    'part',
+    'required',
+    'rethrow',
+    'return',
+    'sealed',
+    'set',
+    'show',
+    'static',
+    'super',
+    'switch',
+    'sync',
+    'this',
+    'throw',
+    'true',
+    'try',
+    'typedef',
+    'var',
+    'void',
+    'when',
+    'while',
+    'with',
+    'yield',
+  ],
+  types: [
+    'int',
+    'double',
+    'num',
+    'String',
+    'bool',
+    'List',
+    'Map',
+    'Set',
+    'Future',
+    'Stream',
+    'Iterable',
+    'Object',
+    'dynamic',
+    'Never',
+    'Type',
+    'Symbol',
+    'Null',
+    'Duration',
+    'DateTime',
+    'Uri',
+    'Record',
+    'BigInt',
+    'RegExp',
+    'Comparable',
+    'Pattern',
+  ],
+  lineComment: '//',
+  blockComment: ('/*', '*/'),
+  stringDelimiters: ['"', "'"],
+  multiLineStringDelimiters: ['"""', "'''"],
+  annotationPrefix: '@',
+  punctuationChars: '(){}[]<>;:,.?',
+  operatorChars: '+-*/%=!&|^~',
+  multiCharOperators: [
+    '<<=',
+    '>>=',
+    '>>>',
+    '~/=',
+    '??=',
+    '<<',
+    '>>',
+    '<=',
+    '>=',
+    '==',
+    '!=',
+    '&&',
+    '||',
+    '??',
+    '?.',
+    '=>',
+    '~/',
+    '+=',
+    '-=',
+    '*=',
+    '/=',
+    '%=',
+    '&=',
+    '|=',
+    '^=',
+  ],
+  identifiersAllowDollar: true,
+  rawStringPrefix: 'r',
+);
+
+/// Scala language grammar.
+const LangGrammar scala = LangGrammar(
+  name: 'scala',
+  keywords: [
+    'abstract',
+    'as',
+    'case',
+    'catch',
+    'class',
+    'def',
+    'derives',
+    'do',
+    'else',
+    'end',
+    'enum',
+    'export',
+    'extends',
+    'extension',
+    'false',
+    'final',
+    'finally',
+    'for',
+    'forSome',
+    'given',
+    'if',
+    'implicit',
+    'import',
+    'inline',
+    'infix',
+    'lazy',
+    'match',
+    'new',
+    'null',
+    'object',
+    'opaque',
+    'open',
+    'override',
+    'package',
+    'private',
+    'protected',
+    'return',
+    'sealed',
+    'super',
+    'then',
+    'this',
+    'throw',
+    'trait',
+    'transparent',
+    'true',
+    'try',
+    'type',
+    'using',
+    'val',
+    'var',
+    'while',
+    'with',
+    'yield',
+  ],
+  types: [
+    'Int',
+    'Long',
+    'Short',
+    'Byte',
+    'Float',
+    'Double',
+    'Char',
+    'Boolean',
+    'String',
+    'Unit',
+    'Nothing',
+    'Any',
+    'AnyRef',
+    'AnyVal',
+    'Option',
+    'Some',
+    'None',
+    'Either',
+    'Left',
+    'Right',
+    'List',
+    'Map',
+    'Set',
+    'Seq',
+    'Vector',
+    'Array',
+    'Future',
+    'Try',
+    'Success',
+    'Failure',
+  ],
+  lineComment: '//',
+  blockComment: ('/*', '*/'),
+  stringDelimiters: ['"', "'"],
+  multiLineStringDelimiters: ['"""'],
+  annotationPrefix: '@',
+  punctuationChars: '(){}[]<>;:,.?',
+  operatorChars: '+-*/%=!&|^~',
+  multiCharOperators: [
+    '<<=',
+    '>>=',
+    '>>>',
+    '<=',
+    '>=',
+    '==',
+    '!=',
+    '&&',
+    '||',
+    '<<',
+    '>>',
+    '<-',
+    '->',
+    '=>',
+    '::',
+    '+=',
+    '-=',
+    '*=',
+    '/=',
+    '%=',
+    '&=',
+    '|=',
+    '^=',
+  ],
+  identifierStringPrefix: true,
+  backtickIdentifiers: true,
+);
+
+/// YAML 1.2 grammar (flow-collection keys, quoted strings, anchors,
+/// aliases, comments).
+///
+/// Block scalars (`|`, `>`) tokenize the indented body as regular YAML
+/// content rather than as a single string literal.
+const LangGrammar yaml = LangGrammar(
+  name: 'yaml',
+  keywords: ['true', 'false', 'null'],
+  lineComment: '#',
+  stringDelimiters: ['"', "'"],
+  punctuationChars: '[]{},:-',
+  operatorChars: '&*!%|>',
+);
+
+/// JSON grammar, lenient for highlighting purposes.
+///
+/// Tokenizes technically-invalid JSON inputs (hex literals, leading
+/// zeros) as numbers rather than rejecting them. For strict validation
+/// use a JSON parser.
+const LangGrammar json = LangGrammar(
+  name: 'json',
+  keywords: ['true', 'false', 'null'],
+  stringDelimiters: ['"'],
+  punctuationChars: '{}[]:,',
+  operatorChars: '',
+);
+
+/// Shell / Bash grammar.
+const LangGrammar shell = LangGrammar(
+  name: 'shell',
+  keywords: [
+    'if',
+    'then',
+    'else',
+    'elif',
+    'fi',
+    'for',
+    'while',
+    'until',
+    'do',
+    'done',
+    'case',
+    'esac',
+    'in',
+    'function',
+    'return',
+    'exit',
+    'local',
+    'export',
+    'readonly',
+    'declare',
+    'typeset',
+    'source',
+    'eval',
+    'exec',
+    'set',
+    'unset',
+    'true',
+    'false',
+  ],
+  lineComment: '#',
+  stringDelimiters: ['"', "'"],
+  punctuationChars: '(){}[];,',
+  operatorChars: r'=!<>&|+-*/%',
+  multiCharOperators: [
+    '<<=',
+    '>>=',
+    '==',
+    '!=',
+    '<=',
+    '>=',
+    '&&',
+    '||',
+    '<<',
+    '>>',
+    '+=',
+    '-=',
+    '*=',
+    '/=',
+    '%=',
+  ],
+  annotationPrefix: null,
+  shellVariables: true,
+  backtickCommandSubstitution: true,
+  heredocs: true,
+);
+
+/// Look up a built-in grammar by name.
+///
+/// Returns `null` if no built-in grammar matches.
+LangGrammar? grammarFor(String language) => switch (language) {
+  'dart' => dart,
+  'scala' => scala,
+  'yaml' || 'yml' => yaml,
+  'json' => json,
+  'sh' || 'bash' || 'shell' || 'zsh' => shell,
+  _ => null,
+};
diff --git a/rumil_tokens/lib/src/spanned.dart b/rumil_tokens/lib/src/spanned.dart
new file mode 100644
index 0000000..b198ade
--- /dev/null
+++ b/rumil_tokens/lib/src/spanned.dart
@@ -0,0 +1,35 @@
+/// Spanned token: a [Token] paired with byte offsets into the source.
+library;
+
+import 'token.dart';
+
+/// A [Token] with byte offsets into the original source string.
+///
+/// The interval `[start, end)` is half-open. `source.substring(start, end)`
+/// reconstructs the token's text:
+///
+/// ```dart
+/// final spans = tokenizeSpans(source, grammar);
+/// for (final s in spans) {
+///   assert(source.substring(s.start, s.end) == s.token.text);
+/// }
+/// ```
+///
+/// Callers needing line/column can construct a `Location(source, offset)`
+/// from `rumil`.
+extension type const Spanned<T extends Token>._((T, int, int) _) {
+  /// Creates a spanned token covering the half-open interval `[start, end)`.
+  const Spanned.of(T token, int start, int end) : this._((token, start, end));
+
+  /// The classified token.
+  T get token => _.$1;
+
+  /// Byte offset of the first character of [token] in the original source.
+  int get start => _.$2;
+
+  /// Byte offset one past the last character of [token] in the original source.
+  int get end => _.$3;
+
+  /// Length of the span in code units: `end - start`.
+  int get length => _.$3 - _.$2;
+}
diff --git a/rumil_tokens/lib/src/token.dart b/rumil_tokens/lib/src/token.dart
new file mode 100644
index 0000000..67a3117
--- /dev/null
+++ b/rumil_tokens/lib/src/token.dart
@@ -0,0 +1,129 @@
+/// Token types produced by the tokenizer.
+library;
+
+/// A classified span of source text.
+///
+/// Tokens are lossless: concatenating [text] from a token stream
+/// reconstructs the original source exactly.
+sealed class Token {
+  /// The source text this token covers.
+  final String text;
+
+  /// Creates a token covering [text].
+  const Token(this.text);
+}
+
+/// A language keyword (`if`, `class`, `val`, etc.).
+final class Keyword extends Token {
+  /// Creates a keyword token.
+  const Keyword(super.text);
+
+  @override
+  String toString() => 'Keyword($text)';
+}
+
+/// A built-in or well-known type name (`int`, `String`, `List`, etc.).
+final class TypeName extends Token {
+  /// Creates a type-name token.
+  const TypeName(super.text);
+
+  @override
+  String toString() => 'TypeName($text)';
+}
+
+/// A string literal, including delimiters.
+final class StringLit extends Token {
+  /// Creates a string-literal token.
+  const StringLit(super.text);
+
+  @override
+  String toString() => 'StringLit($text)';
+}
+
+/// A numeric literal (integer, float, hex, etc.).
+final class NumberLit extends Token {
+  /// Creates a number-literal token.
+  const NumberLit(super.text);
+
+  @override
+  String toString() => 'NumberLit($text)';
+}
+
+/// A comment (line or block), including delimiters.
+final class Comment extends Token {
+  /// Creates a comment token.
+  const Comment(super.text);
+
+  @override
+  String toString() => 'Comment($text)';
+}
+
+/// Structural punctuation: `(`, `)`, `{`, `}`, `[`, `]`, `,`, `;`, `:`, `.`.
+///
+/// Delimits, separates, or groups. Distinct from [Operator], which is
+/// reserved for value-computing operators.
+final class Punctuation extends Token {
+  /// Creates a punctuation token.
+  const Punctuation(super.text);
+
+  @override
+  String toString() => 'Punctuation($text)';
+}
+
+/// A value-computing operator: `+`, `*`, `==`, `&&`, `=>`, `->`.
+///
+/// Distinct from [Punctuation].
+final class Operator extends Token {
+  /// Creates an operator token.
+  const Operator(super.text);
+
+  @override
+  String toString() => 'Operator($text)';
+}
+
+/// A variable reference: shell `$HOME`, `${PATH}`.
+///
+/// The token text includes the leading `$` and braces if present.
+final class Variable extends Token {
+  /// Creates a variable token.
+  const Variable(super.text);
+
+  @override
+  String toString() => 'Variable($text)';
+}
+
+/// An identifier that is not a keyword or type name.
+final class Identifier extends Token {
+  /// Creates an identifier token.
+  const Identifier(super.text);
+
+  @override
+  String toString() => 'Identifier($text)';
+}
+
+/// An annotation or decorator (`@override`, `#[derive]`, etc.).
+final class Annotation extends Token {
+  /// Creates an annotation token.
+  const Annotation(super.text);
+
+  @override
+  String toString() => 'Annotation($text)';
+}
+
+/// Whitespace (spaces, tabs, newlines).
+final class Whitespace extends Token {
+  /// Creates a whitespace token.
+  const Whitespace(super.text);
+
+  @override
+  String toString() => 'Whitespace(${text.length})';
+}
+
+/// Any text not matched by a language-specific rule.
+final class Plain extends Token {
+  /// Creates a plain-text token.
+  const Plain(super.text);
+
+  @override
+  String toString() => 'Plain($text)';
+}
diff --git a/rumil_tokens/lib/src/tokenizer.dart b/rumil_tokens/lib/src/tokenizer.dart
new file mode 100644
index 0000000..15501eb
--- /dev/null
+++ b/rumil_tokens/lib/src/tokenizer.dart
@@ -0,0 +1,522 @@
+/// Lossless tokenizer built on Rumil combinators.
+library;
+
+import 'package:rumil/rumil.dart';
+
+import 'grammar.dart';
+import 'spanned.dart';
+import 'token.dart';
+
+/// Tokenize [source] according to [grammar].
+///
+/// Returns a lossless token stream: concatenating every token's [Token.text]
+/// reproduces [source] exactly.
+///
+/// Equivalent to `tokenizeSpans(source, grammar).map((s) => s.token).toList()`.
+/// Callers that need byte offsets should use [tokenizeSpans] directly.
+List<Token> tokenize(String source, LangGrammar grammar) =>
+    tokenizeSpans(source, grammar).map((s) => s.token).toList();
+
+/// Tokenize [source] into [Spanned] tokens carrying byte offsets.
+///
+/// The returned list satisfies:
+///
+/// - Lossless: `spans.map((s) => s.token.text).join() == source`.
+/// - Anchored: `spans.first.start == 0` and `spans.last.end == source.length`
+///   unless [source] is empty, in which case the list is empty.
+/// - Contiguous: `spans[i].end == spans[i+1].start` for every adjacent pair.
+/// - Text matches span: `source.substring(s.start, s.end) == s.token.text`.
+///
+/// On parser failure the whole source is returned as a single `Spanned<Plain>`
+/// covering `[0, source.length)`.
+List<Spanned<Token>> tokenizeSpans(String source, LangGrammar grammar) {
+  if (source.isEmpty) return const [];
+  final parser = _buildSpannedTokenizer(grammar);
+  final result = parser.run(source);
+  final spans = switch (result) {
+    Success<ParseError, List<Spanned<Token>>>(:final value) => value,
+    Partial<ParseError, List<Spanned<Token>>>(:final value) => value,
+    Failure<ParseError, List<Spanned<Token>>>() => <Spanned<Token>>[
+      Spanned<Token>.of(Plain(source), 0, source.length),
+    ],
+  };
+  return _mergePlainSpans(spans);
+}
+
+Parser<ParseError, List<Spanned<Token>>> _buildSpannedTokenizer(
+  LangGrammar grammar,
+) {
+  final choice = Choice<ParseError, Token>(_alternatives(grammar));
+  final spanned = position<ParseError>()
+      .zip(choice)
+      .zip(position<ParseError>())
+      .map<Spanned<Token>>((nested) {
+        final ((start, token), end) = nested;
+        return Spanned<Token>.of(token, start, end);
+      });
+  return spanned.many.thenSkip(eof());
+}
+
+List<Parser<ParseError, Token>> _alternatives(LangGrammar grammar) {
+  final alternatives = <Parser<ParseError, Token>>[];
+
+  if (grammar.blockComment case (final open, final close)) {
+    alternatives.add(_blockComment(open, close));
+  }
+  if (grammar.lineComment case final prefix?) {
+    alternatives.add(_lineComment(prefix));
+  }
+
+  if (grammar.rawStringPrefix case final prefix?) {
+    for (final delim in grammar.multiLineStringDelimiters) {
+      alternatives.add(_rawMultiLineString(prefix, delim));
+    }
+    for (final delim in grammar.stringDelimiters) {
+      alternatives.add(_rawStringLiteral(prefix, delim));
+    }
+  }
+
+  if (grammar.identifierStringPrefix) {
+    for (final delim in grammar.multiLineStringDelimiters) {
+      alternatives.add(_prefixedMultiLineString(delim));
+    }
+    for (final delim in grammar.stringDelimiters) {
+      alternatives.add(_prefixedStringLiteral(delim));
+    }
+  }
+
+  for (final delim in grammar.multiLineStringDelimiters) {
+    alternatives.add(_multiLineString(delim));
+  }
+  for (final delim in grammar.stringDelimiters) {
+    alternatives.add(_stringLiteral(delim));
+  }
+
+  alternatives.add(_number(grammar.operatorChars));
+
+  if (grammar.annotationPrefix case final prefix?) {
+    alternatives.add(_annotation(prefix, grammar.identifiersAllowDollar));
+  }
+
+  if (grammar.backtickIdentifiers) {
+    alternatives.add(_backtickIdentifier());
+  }
+
+  if (grammar.heredocs) {
+    alternatives.add(_heredoc());
+  }
+
+  if (grammar.shellVariables) {
+    alternatives.add(_shellVariableBraced());
+    alternatives.add(_shellVariableBare());
+  }
+
+  if (grammar.backtickCommandSubstitution) {
+    alternatives.add(char('`').map((c) => Punctuation(c) as Token));
+  }
+
+  alternatives.add(
+    _identifierOrKeyword(
+      grammar.keywords,
+      grammar.types,
+      grammar.identifiersAllowDollar,
+    ),
+  );
+
+  if (grammar.multiCharOperators.isNotEmpty) {
+    alternatives.add(_multiCharOperator(grammar.multiCharOperators));
+  }
+
+  if (grammar.operatorChars.isNotEmpty) {
+    alternatives.add(_operator(grammar.operatorChars));
+  }
+
+  if (grammar.punctuationChars.isNotEmpty) {
+    alternatives.add(_punctuation(grammar.punctuationChars));
+  }
+
+  alternatives.add(_whitespace());
+  alternatives.add(anyChar().map(Plain.new));
+
+  return alternatives;
+}
+
+Parser<ParseError, Token> _lineComment(String prefix) => string(prefix)
+    .skipThen(satisfy((c) => c != '\n', 'comment char').many.capture)
+    .map((body) => Comment('$prefix$body') as Token);
+
+Parser<ParseError, Token> _blockComment(String open, String close) {
+  final closeFirst = close[0];
+  final body = (string(close).notFollowedBy.skipThen(anyChar())).many.capture;
+  return string(open)
+      .skipThen(body)
+      .thenSkip(string(close))
+      .map((body) => Comment('$open$body$close') as Token)
+      .or(
+        string(open)
+            .skipThen(satisfy((c) => c != closeFirst, 'any char').many.capture)
+            .map((body) => Comment('$open$body') as Token),
+      );
+}
+
+Parser<ParseError, Token> _multiLineString(String delim) {
+  final body = (string(delim).notFollowedBy.skipThen(anyChar())).many.capture;
+  return string(delim)
+      .skipThen(body)
+      .thenSkip(string(delim))
+      .map((body) => StringLit('$delim$body$delim') as Token)
+      .or(
+        string(delim)
+            .skipThen(anyChar().many.capture)
+            .map((body) => StringLit('$delim$body') as Token),
+      );
+}
+
+Parser<ParseError, Token> _stringLiteral(String delim) {
+  final escaped = string('\\').skipThen(anyChar()).capture;
+  final normal = satisfy((c) => c != delim && c != '\\' && c != '\n', 'char');
+  final body = (escaped | normal.capture).many.map((parts) => parts.join());
+  return char(delim).skipThen(body).zip(char(delim).capture.optional).map((
+    pair,
+  ) {
+    final (body, close) = pair;
+    return StringLit('$delim$body${close ?? ''}') as Token;
+  });
+}
+
+/// Raw string literal (`r'no\escape'`). Escapes are captured verbatim;
+/// the body runs until the matching delimiter or end-of-line.
+Parser<ParseError, Token> _rawStringLiteral(String prefix, String delim) {
+  final normal = satisfy((c) => c != delim && c != '\n', 'raw-string char');
+  final body = normal.many.capture;
+  return string(prefix)
+      .skipThen(char(delim))
+      .skipThen(body)
+      .zip(char(delim).capture.optional)
+      .map((pair) {
+        final (body, close) = pair;
+        return StringLit('$prefix$delim$body${close ?? ''}') as Token;
+      });
+}
+
+/// Raw multi-line string literal (`r'''no\escape'''`). Body runs until
+/// the matching triple delimiter; a missing close is tolerated.
+Parser<ParseError, Token> _rawMultiLineString(String prefix, String delim) {
+  final body = (string(delim).notFollowedBy.skipThen(anyChar())).many.capture;
+  return string(prefix)
+      .skipThen(string(delim))
+      .skipThen(body)
+      .thenSkip(string(delim))
+      .map((body) => StringLit('$prefix$delim$body$delim') as Token)
+      .or(
+        string(prefix)
+            .skipThen(string(delim))
+            .skipThen(anyChar().many.capture)
+            .map((body) => StringLit('$prefix$delim$body') as Token),
+      );
+}
+
+/// Identifier-prefixed string literal (`s"hi $x"`). Escapes are respected
+/// like a regular string literal.
+Parser<ParseError, Token> _prefixedStringLiteral(String delim) {
+  final prefix = satisfy((c) => _isAlpha(c) || c == '_', 'interpolator prefix')
+      .zip(
+        satisfy(
+          (c) => _isAlpha(c) || _isDigit(c) || c == '_',
+          'ident char',
+        ).many,
+      )
+      .map((pair) => pair.$1 + pair.$2.join());
+  final escaped = string('\\').skipThen(anyChar()).capture;
+  final normal = satisfy((c) => c != delim && c != '\\' && c != '\n', 'char');
+  final body = (escaped | normal.capture).many.map((parts) => parts.join());
+  return prefix
+      .zip(char(delim))
+      .zip(body)
+      .zip(char(delim).capture.optional)
+      .map((nested) {
+        final (((p, d), b), close) = nested;
+        return StringLit('$p$d$b${close ?? ''}') as Token;
+      });
+}
+
+/// Identifier-prefixed multi-line string literal (`s"""hi $x"""`).
+Parser<ParseError, Token> _prefixedMultiLineString(String delim) {
+  final prefix = satisfy((c) => _isAlpha(c) || c == '_', 'interpolator prefix')
+      .zip(
+        satisfy(
+          (c) => _isAlpha(c) || _isDigit(c) || c == '_',
+          'ident char',
+        ).many,
+      )
+      .map((pair) => pair.$1 + pair.$2.join());
+  final body = (string(delim).notFollowedBy.skipThen(anyChar())).many.capture;
+  return prefix
+      .zip(string(delim))
+      .zip(body)
+      .thenSkip(string(delim))
+      .map((nested) {
+        final ((p, d), b) = nested;
+        return StringLit('$p$d$b$d') as Token;
+      })
+      .or(
+        prefix.zip(string(delim)).zip(anyChar().many.capture).map((nested) {
+          final ((p, d), b) = nested;
+          return StringLit('$p$d$b') as Token;
+        }),
+      );
+}
+
+/// Backtick-delimited identifier (`` `type` ``). Keywords inside
+/// backticks are identifiers. Body runs until the matching backtick.
+Parser<ParseError, Token> _backtickIdentifier() {
+  final normal = satisfy((c) => c != '`' && c != '\n', 'backtick-ident char');
+  final body = normal.many.capture;
+  return char('`').skipThen(body).zip(char('`').capture.optional).map((pair) {
+    final (body, close) = pair;
+    return Identifier('`$body${close ?? ''}') as Token;
+  });
+}
+
+/// Bare shell variable (`$NAME`, `$1`, `$@`, `$#`, `$?`, `$$`, `$!`).
+///
+/// Matches `$` followed by an identifier name, a single digit, or a
+/// special positional parameter character. A lone `$` emits `$` as
+/// [Variable] so the follower tokenizes normally.
+Parser<ParseError, Token> _shellVariableBare() {
+  final name = satisfy((c) => _isAlpha(c) || c == '_', 'variable name start')
+      .zip(
+        satisfy(
+          (c) => _isAlpha(c) || _isDigit(c) || c == '_',
+          'ident char',
+        ).many,
+      )
+      .map((pair) => pair.$1 + pair.$2.join());
+  final special = satisfy(
+    (c) =>
+        _isDigit(c) ||
+        c == '@' ||
+        c == '#' ||
+        c == '?' ||
+        c == r'$' ||
+        c == '!' ||
+        c == '*' ||
+        c == '-',
+    'special parameter',
+  );
+  final body = name | special.capture | succeed<ParseError, String>('');
+  return char(
+    r'$',
+  ).skipThen(body).map((name) => Variable(r'$' + name) as Token);
+}
+
+/// Shell heredoc (`<<EOF\nbody\nEOF`, `<<-EOF\nbody\n\tEOF`,
+/// `<<'EOF'\nbody\nEOF`).
+///
+/// The entire construct is emitted as one [StringLit] token.
+///
+/// Limitations:
+/// - Only `<<` and `<<-` (tab-strip) are recognized. `<<~` and `<<<` are not.
+/// - Marker may be bare, single-quoted, or double-quoted. The body is
+///   an opaque string regardless; variable expansion is not tokenized.
+/// - If the terminator is never found, the body runs to end-of-input.
+Parser<ParseError, Token> _heredoc() {
+  final introducer = string('<<-') | string('<<');
+  final quotedMarker = char("'")
+      .skipThen(
+        satisfy((c) => c != "'" && c != '\n', 'marker char').many.capture,
+      )
+      .thenSkip(char("'"))
+      .map((m) => ("'$m'", m));
+  final dquotedMarker = char('"')
+      .skipThen(
+        satisfy((c) => c != '"' && c != '\n', 'marker char').many.capture,
+      )
+      .thenSkip(char('"'))
+      .map((m) => ('"$m"', m));
+  final bareMarker = satisfy((c) => _isAlpha(c) || c == '_', 'marker start')
+      .zip(
+        satisfy(
+          (c) => _isAlpha(c) || _isDigit(c) || c == '_',
+          'marker char',
+        ).many,
+      )
+      .map((p) => (p.$1 + p.$2.join(), p.$1 + p.$2.join()));
+  final marker = quotedMarker | dquotedMarker | bareMarker;
+
+  return introducer.zip(marker).flatMap((pair) {
+    final (intro, markerPair) = pair;
+    final (markerText, markerName) = markerPair;
+    final restOfLine = satisfy((c) => c != '\n', 'heredoc rest').many.capture;
+    final newline = char('\n');
+    final tabs = satisfy((c) => c == '\t', 'tab').many.capture;
+    final eolOrEof = newline.capture | succeed<ParseError, String>('');
+    final stripLeadingTabs = intro == '<<-';
+    // Word-boundary lookahead: `EOF` terminates, `EOFISH` does not.
+    final markerEnd =
+        satisfy(
+          (c) => c != '\n' && (_isAlpha(c) || _isDigit(c) || c == '_'),
+          'ident continuation',
+        ).notFollowedBy;
+    final terminatorLine = (stripLeadingTabs
+            ? tabs
+            : succeed<ParseError, String>(''))
+        .zip(string(markerName))
+        .thenSkip(markerEnd)
+        .zip(eolOrEof)
+        .map((nested) {
+          final ((leading, mark), trailing) = nested;
+          return '$leading$mark$trailing';
+        });
+    final bodyChar = terminatorLine.notFollowedBy.skipThen(anyChar());
+    final body = bodyChar.many.capture;
+    final full = restOfLine.thenSkip(newline).zip(body).zip(terminatorLine).map(
+      (nested) {
+        final ((rest, bodyText), term) = nested;
+        return StringLit('$intro$markerText$rest\n$bodyText$term') as Token;
+      },
+    );
+    final untilEof = restOfLine
+        .thenSkip(newline.optional)
+        .zip(anyChar().many.capture)
+        .map((pair) {
+          final (rest, bodyText) = pair;
+          return StringLit('$intro$markerText$rest\n$bodyText') as Token;
+        });
+    return full | untilEof;
+  });
+}
+
+/// Braced shell variable (`${NAME}`, `${#NAME}`, `${NAME:-default}`,
+/// `${NAME//pat/repl}`). Body runs until the matching close brace.
+///
+/// Nested braces are not balanced: the first `}` at the top level
+/// closes the expansion.
+Parser<ParseError, Token> _shellVariableBraced() {
+  final body =
+      satisfy((c) => c != '}' && c != '\n', 'expansion body').many.capture;
+  return string(r'${').skipThen(body).zip(char('}').capture.optional).map((
+    pair,
+  ) {
+    final (body, close) = pair;
+    return Variable('\${$body${close ?? ''}') as Token;
+  });
+}
+
+Parser<ParseError, Token> _number(String operatorChars) {
+  final hexLit =
+      string('0x').skipThen(satisfy(_isHexDigit, 'hex digit').many1).capture;
+  final binLit = string('0b').skipThen(oneOf('01').many1).capture;
+
+  final digits = satisfy(_isDigit, 'digit').many1.capture;
+  // Digit-lookahead gate so `x.length` doesn't read `.l` as a decimal.
+  final decimalPart =
+      char(
+        '.',
+      ).thenSkip(satisfy(_isDigit, 'digit').lookAhead).skipThen(digits).capture;
+  final exponent =
+      oneOf('eE').skipThen(oneOf('+-').optional).zip(digits).capture;
+  final suffix = oneOf('lLfFdD').capture.optional;
+
+  final decLit =
+      digits
+          .zip(decimalPart.optional)
+          .zip(exponent.optional)
+          .zip(suffix)
+          .capture;
+
+  // When `-` is an operator character the operator parser handles it;
+  // otherwise (JSON) we accept an optional leading `-` as part of the number.
+  final signed =
+      operatorChars.contains('-')
+          ? (hexLit | binLit | decLit)
+          : char('-').capture.optional
+              .zip(hexLit | binLit | decLit)
+              .map((pair) => (pair.$1 ?? '') + pair.$2);
+
+  return signed.map(NumberLit.new as Token Function(String));
+}
+
+Parser<ParseError, Token> _annotation(String prefix, bool allowDollar) =>
+    string(prefix)
+        .skipThen(_identRaw(allowDollar))
+        .map((id) => Annotation('$prefix$id') as Token);
+
+Parser<ParseError, Token> _identifierOrKeyword(
+  List<String> keywords,
+  List<String> types,
+  bool allowDollar,
+) {
+  final keywordSet = {...keywords};
+  final typeSet = {...types};
+  return _identRaw(allowDollar).map((id) {
+    if (keywordSet.contains(id)) return Keyword(id) as Token;
+    if (typeSet.contains(id)) return TypeName(id) as Token;
+    return Identifier(id) as Token;
+  });
+}
+
+Parser<ParseError, String> _identRaw(bool allowDollar) {
+  bool isStart(String c) =>
+      _isAlpha(c) || c == '_' || (allowDollar && c == r'$');
+  bool isCont(String c) => isStart(c) || _isDigit(c);
+  return satisfy(isStart, 'identifier start')
+      .zip(satisfy(isCont, 'identifier char').many)
+      .map((pair) => pair.$1 + pair.$2.join());
+}
+
+Parser<ParseError, Token> _punctuation(String chars) => satisfy(
+  (c) => chars.contains(c),
+  'punctuation',
+).map((c) => Punctuation(c) as Token);
+
+/// Single-character operator parser. Emits one character per token.
+/// Multi-character operators must be declared in
+/// [LangGrammar.multiCharOperators].
+Parser<ParseError, Token> _operator(String chars) => satisfy(
+  (c) => chars.contains(c),
+  'operator',
+).map((c) => Operator(c) as Token);
+
+/// Multi-character operator parser. Matches candidates longest-first.
+Parser<ParseError, Token> _multiCharOperator(List<String> ops) {
+  final sorted = [...ops]..sort((a, b) => b.length.compareTo(a.length));
+  var parser = string(sorted.first);
+  for (final op in sorted.skip(1)) {
+    parser = parser.or(string(op));
+  }
+  return parser.map((s) => Operator(s) as Token);
+}
+
+Parser<ParseError, Token> _whitespace() =>
+    satisfy(_isWhitespace, 'whitespace').many1.capture.map(Whitespace.new);
+
+/// Merges consecutive [Plain]-spanned entries. The merged span inherits
+/// the first entry's `start` and the last entry's `end`.
+List<Spanned<Token>> _mergePlainSpans(List<Spanned<Token>> spans) {
+  if (spans.length < 2) return spans;
+  final out = <Spanned<Token>>[];
+  for (final cur in spans) {
+    if (cur.token is Plain && out.isNotEmpty && out.last.token is Plain) {
+      final prev = out.last;
+      final merged = Plain(prev.token.text + cur.token.text);
+      out[out.length - 1] = Spanned<Token>.of(merged, prev.start, cur.end);
+    } else {
+      out.add(cur);
+    }
+  }
+  return out;
+}
+
+bool _isDigit(String c) => c.compareTo('0') >= 0 && c.compareTo('9') <= 0;
+
+bool _isHexDigit(String c) =>
+    _isDigit(c) ||
+    (c.compareTo('a') >= 0 && c.compareTo('f') <= 0) ||
+    (c.compareTo('A') >= 0 && c.compareTo('F') <= 0);
+
+bool _isAlpha(String c) =>
+    (c.compareTo('a') >= 0 && c.compareTo('z') <= 0) ||
+    (c.compareTo('A') >= 0 && c.compareTo('Z') <= 0);
+
+bool _isWhitespace(String c) => c == ' ' || c == '\t' || c == '\n' || c == '\r';
diff --git a/rumil_tokens/pubspec.yaml b/rumil_tokens/pubspec.yaml
new file mode 100644
index 0000000..bb1c114
--- /dev/null
+++ b/rumil_tokens/pubspec.yaml
@@ -0,0 +1,16 @@
+name: rumil_tokens
+description: >-
+  Source code tokenizer built on Rumil. Classified token spans for syntax
+  highlighting. In-tree within the rumil-dart monorepo; not published.
+version: 0.1.0
+publish_to: none
+
+environment:
+  sdk: ^3.7.0
+
+dependencies:
+  rumil: ^0.6.0
+
+dev_dependencies:
+  test: ^1.31.0
+  lints: ^6.0.0
diff --git a/rumil_tokens/test/tokenizer_test.dart b/rumil_tokens/test/tokenizer_test.dart
new file mode 100644
index 0000000..8a27c54
--- /dev/null
+++ b/rumil_tokens/test/tokenizer_test.dart
@@ -0,0 +1,1446 @@
+import 'package:rumil_tokens/rumil_tokens.dart';
+import 'package:test/test.dart';
+
+void _expectLossless(String source, LangGrammar grammar) {
+  final tokens = tokenize(source, grammar);
+  final reconstructed = tokens.map((t) => t.text).join();
+  expect(reconstructed, source, reason: 'lossless round-trip');
+}
+
+List<T> _ofType<T extends Token>(List<Token> tokens) =>
+    tokens.whereType<T>().toList();
+
+List<String> _textsOf<T extends Token>(List<Token> tokens) =>
+    _ofType<T>(tokens).map((t) => t.text).toList();
+
+void main() {
+  // ---------------------------------------------------------------------------
+  // Lossless round-trip.
+  // ---------------------------------------------------------------------------
+
+  group('lossless round-trip', () {
+    test('empty input', () {
+      expect(tokenize('', dart), isEmpty);
+    });
+
+    test('single character', () {
+      _expectLossless('x', dart);
+    });
+
+    test('only whitespace', () {
+      _expectLossless('   \t\n  \r\n', dart);
+    });
+
+    test('Dart function', () {
+      const source = '''
+void main() {
+  final x = 42;
+  // comment
+  print('hello \$x');
+}
+''';
+      _expectLossless(source, dart);
+    });
+
+    test('Dart class with annotations', () {
+      const source = '''
+@immutable
+class Point {
+  final int x;
+  final int y;
+  const Point(this.x, this.y);
+
+  @override
+  String toString() => 'Point(\$x, \$y)';
+}
+''';
+      _expectLossless(source, dart);
+    });
+
+    test('Dart multi-line strings', () {
+      _expectLossless("var a = '''multi\nline''';\n", dart);
+      _expectLossless('var b = """another\nmulti""";\n', dart);
+    });
+
+    test('Dart string interpolation with escapes', () {
+      const source = r'''var s = "line1\nline2\t\"quoted\"\\end";''';
+      _expectLossless(source, dart);
+    });
+
+    test('Dart hex and binary literals', () {
+      _expectLossless('var a = 0xFF; var b = 0b1010;', dart);
+    });
+
+    test('Dart floating point', () {
+      _expectLossless('var x = 3.14; var y = 1e10; var z = 2.5e-3;', dart);
+    });
+
+    test('Dart block comment', () {
+      _expectLossless('x /* block\ncomment */ y', dart);
+    });
+
+    test('unterminated string', () {
+      _expectLossless('var s = "unterminated', dart);
+    });
+
+    test('unterminated multi-line string', () {
+      _expectLossless("var s = '''unterminated", dart);
+    });
+
+    test('unterminated block comment', () {
+      _expectLossless('/* unterminated', dart);
+    });
+
+    test('Scala snippet', () {
+      const source = '''
+object Main:
+  def run(args: List[String]): Unit =
+    val x: Int = 42
+    /* block comment */
+    println(s"hello \$x")
+''';
+      _expectLossless(source, scala);
+    });
+
+    test('Scala triple-quoted string', () {
+      _expectLossless('val s = """raw\nstring"""', scala);
+    });
+
+    test('YAML document', () {
+      const source = '''
+name: rumil
+version: 0.5.0
+# comment
+dependencies:
+  rumil: ^0.5.0
+  flag: true
+list:
+  - one
+  - two
+''';
+      _expectLossless(source, yaml);
+    });
+
+    test('JSON document', () {
+      const source = '''
+{
+  "name": "rumil",
+  "version": 42,
+  "active": true,
+  "data": null,
+  "items": [1, 2, 3]
+}
+''';
+      _expectLossless(source, json);
+    });
+
+    test('shell script', () {
+      const source = '''
+#!/bin/bash
+# deploy script
+export PORT=8080
+for f in *.dart; do
+  echo "building \$f"
+  if [ -f "\$f" ]; then
+    dart compile exe "\$f"
+  fi
+done
+''';
+      _expectLossless(source, shell);
+    });
+
+    test('consecutive comments', () {
+      _expectLossless('// one\n// two\n// three\n', dart);
+    });
+
+    test('adjacent strings', () {
+      _expectLossless('"a""b""c"', dart);
+    });
+
+    test('mixed content', () {
+      const source = 'if (x > 0) { return "yes"; } // done';
+      _expectLossless(source, dart);
+    });
+
+    test('only punctuation', () {
+      _expectLossless('(){}[]<>;:,.', dart);
+    });
+
+    test('only numbers', () {
+      _expectLossless('42', dart);
+    });
+
+    test('only a string', () {
+      _expectLossless('"hello world"', dart);
+    });
+
+    test('only a comment', () {
+      _expectLossless('// just a comment', dart);
+    });
+
+    test('unicode identifiers', () {
+      _expectLossless(r'var $dollar = _under;', dart);
+    });
+  });
+
+  // ---------------------------------------------------------------------------
+  // Token classification.
+  // ---------------------------------------------------------------------------
+
+  group('keywords', () {
+    test('Dart keywords', () {
+      final tokens = tokenize('if else class final var void return', dart);
+      expect(_textsOf<Keyword>(tokens), [
+        'if',
+        'else',
+        'class',
+        'final',
+        'var',
+        'void',
+        'return',
+      ]);
+    });
+
+    test('Scala keywords', () {
+      final tokens = tokenize('def val object trait sealed match', scala);
+      expect(_textsOf<Keyword>(tokens), [
+        'def',
+        'val',
+        'object',
+        'trait',
+        'sealed',
+        'match',
+      ]);
+    });
+
+    test('YAML keywords', () {
+      final tokens = tokenize('true false null', yaml);
+      expect(_textsOf<Keyword>(tokens), ['true', 'false', 'null']);
+    });
+
+    test('JSON keywords', () {
+      final tokens = tokenize('true false null', json);
+      expect(_textsOf<Keyword>(tokens), ['true', 'false', 'null']);
+    });
+
+    test('shell keywords', () {
+      final tokens = tokenize('if then else fi for do done', shell);
+      expect(_textsOf<Keyword>(tokens), [
+        'if',
+        'then',
+        'else',
+        'fi',
+        'for',
+        'do',
+        'done',
+      ]);
+    });
+
+    test('keyword not matched inside identifier', () {
+      final tokens = tokenize('classify iffy', dart);
+      expect(_textsOf<Identifier>(tokens), ['classify', 'iffy']);
+      expect(_ofType<Keyword>(tokens), isEmpty);
+    });
+
+    test('keyword not matched as prefix of identifier', () {
+      final tokens = tokenize('ifTrue forEachItem', dart);
+      expect(_textsOf<Identifier>(tokens), ['ifTrue', 'forEachItem']);
+      expect(_ofType<Keyword>(tokens), isEmpty);
+    });
+
+    test('keyword followed by punctuation', () {
+      final tokens = tokenize('if(x)', dart);
+      expect(_textsOf<Keyword>(tokens), ['if']);
+      expect(_textsOf<Identifier>(tokens), ['x']);
+    });
+  });
+
+  group('type names', () {
+    test('Dart types', () {
+      final tokens = tokenize('int String List Map Future', dart);
+      expect(_textsOf<TypeName>(tokens), [
+        'int',
+        'String',
+        'List',
+        'Map',
+        'Future',
+      ]);
+    });
+
+    test('Scala types', () {
+      final tokens = tokenize('Int Boolean Option Either Unit', scala);
+      expect(_textsOf<TypeName>(tokens), [
+        'Int',
+        'Boolean',
+        'Option',
+        'Either',
+        'Unit',
+      ]);
+    });
+
+    test('type not matched inside identifier', () {
+      final tokens = tokenize('integer Stringify', dart);
+      expect(_textsOf<Identifier>(tokens), ['integer', 'Stringify']);
+      expect(_ofType<TypeName>(tokens), isEmpty);
+    });
+  });
+
+  group('identifiers', () {
+    test('simple identifiers', () {
+      final tokens = tokenize('foo bar baz', dart);
+      expect(_textsOf<Identifier>(tokens), ['foo', 'bar', 'baz']);
+    });
+
+    test('underscore identifiers', () {
+      final tokens = tokenize('_private __dunder _a1', dart);
+      expect(_textsOf<Identifier>(tokens), ['_private', '__dunder', '_a1']);
+    });
+
+    test('dollar identifiers', () {
+      final tokens = tokenize(r'$ref $$double', dart);
+      expect(_textsOf<Identifier>(tokens), [r'$ref', r'$$double']);
+    });
+
+    test('alphanumeric identifiers', () {
+      final tokens = tokenize('item1 item2 a123b', dart);
+      expect(_textsOf<Identifier>(tokens), ['item1', 'item2', 'a123b']);
+    });
+  });
+
+  group('string literals', () {
+    test('double-quoted string', () {
+      final tokens = tokenize('"hello"', dart);
+      expect(_textsOf<StringLit>(tokens), ['"hello"']);
+    });
+
+    test('single-quoted string', () {
+      final tokens = tokenize("'world'", dart);
+      expect(_textsOf<StringLit>(tokens), ["'world'"]);
+    });
+
+    test('empty string', () {
+      final tokens = tokenize('""', dart);
+      expect(_textsOf<StringLit>(tokens), ['""']);
+    });
+
+    test('string with escapes', () {
+      final tokens = tokenize(r'"hello\nworld"', dart);
+      expect(_textsOf<StringLit>(tokens), [r'"hello\nworld"']);
+    });
+
+    test('string with escaped quote', () {
+      final tokens = tokenize(r'"say \"hi\""', dart);
+      expect(_textsOf<StringLit>(tokens), [r'"say \"hi\""']);
+    });
+
+    test('string with escaped backslash', () {
+      final tokens = tokenize(r'"path\\file"', dart);
+      expect(_textsOf<StringLit>(tokens), [r'"path\\file"']);
+    });
+
+    test('multi-line string (triple single)', () {
+      final tokens = tokenize("'''multi\nline'''", dart);
+      expect(_textsOf<StringLit>(tokens), ["'''multi\nline'''"]);
+    });
+
+    test('multi-line string (triple double)', () {
+      final tokens = tokenize('"""multi\nline"""', dart);
+      expect(_textsOf<StringLit>(tokens), ['"""multi\nline"""']);
+    });
+
+    test('unterminated string stops at newline', () {
+      final tokens = tokenize('"unterminated\nnext', dart);
+      final strings = _textsOf<StringLit>(tokens);
+      expect(strings.length, 1);
+      expect(strings.first, '"unterminated');
+    });
+
+    test('adjacent strings', () {
+      final tokens = tokenize('"a""b"', dart);
+      expect(_textsOf<StringLit>(tokens), ['"a"', '"b"']);
+    });
+
+    test('JSON only has double-quoted strings', () {
+      final tokens = tokenize('"value"', json);
+      expect(_textsOf<StringLit>(tokens), ['"value"']);
+    });
+  });
+
+  group('number literals', () {
+    test('integer', () {
+      final tokens = tokenize('42', dart);
+      expect(_textsOf<NumberLit>(tokens), ['42']);
+    });
+
+    test('float', () {
+      final tokens = tokenize('3.14', dart);
+      expect(_textsOf<NumberLit>(tokens), ['3.14']);
+    });
+
+    test('hex literal', () {
+      final tokens = tokenize('0xFF', dart);
+      expect(_textsOf<NumberLit>(tokens), ['0xFF']);
+    });
+
+    test('hex lowercase', () {
+      final tokens = tokenize('0xdeadbeef', dart);
+      expect(_textsOf<NumberLit>(tokens), ['0xdeadbeef']);
+    });
+
+    test('binary literal', () {
+      final tokens = tokenize('0b1010', dart);
+      expect(_textsOf<NumberLit>(tokens), ['0b1010']);
+    });
+
+    test('scientific notation', () {
+      final tokens = tokenize('1e10', dart);
+      expect(_textsOf<NumberLit>(tokens), ['1e10']);
+    });
+
+    test('scientific with decimal', () {
+      final tokens = tokenize('2.5e-3', dart);
+      expect(_textsOf<NumberLit>(tokens), ['2.5e-3']);
+    });
+
+    test('multiple numbers', () {
+      final tokens = tokenize('1 2 3', dart);
+      expect(_textsOf<NumberLit>(tokens), ['1', '2', '3']);
+    });
+
+    test('number followed by punctuation', () {
+      final tokens = tokenize('42;', dart);
+      expect(_textsOf<NumberLit>(tokens), ['42']);
+      expect(_textsOf<Punctuation>(tokens), [';']);
+    });
+
+    test('number inside expression', () {
+      final tokens = tokenize('x+42', dart);
+      expect(_textsOf<NumberLit>(tokens), ['42']);
+    });
+  });
+
+  group('comments', () {
+    test('line comment', () {
+      final tokens = tokenize('x // comment\ny', dart);
+      expect(_textsOf<Comment>(tokens), ['// comment']);
+    });
+
+    test('line comment at start', () {
+      final tokens = tokenize('// first line', dart);
+      expect(_textsOf<Comment>(tokens), ['// first line']);
+    });
+
+    test('line comment with no space', () {
+      final tokens = tokenize('//compact', dart);
+      expect(_textsOf<Comment>(tokens), ['//compact']);
+    });
+
+    test('empty line comment', () {
+      final tokens = tokenize('//\nx', dart);
+      expect(_textsOf<Comment>(tokens), ['//']);
+    });
+
+    test('block comment single line', () {
+      final tokens = tokenize('/* block */', dart);
+      expect(_textsOf<Comment>(tokens), ['/* block */']);
+    });
+
+    test('block comment multi-line', () {
+      final tokens = tokenize('/* line1\nline2 */', dart);
+      expect(_textsOf<Comment>(tokens), ['/* line1\nline2 */']);
+    });
+
+    test('block comment with stars', () {
+      final tokens = tokenize('/** doc comment */', dart);
+      expect(_textsOf<Comment>(tokens), ['/** doc comment */']);
+    });
+
+    test('unterminated block comment', () {
+      final tokens = tokenize('/* unterminated', dart);
+      expect(_textsOf<Comment>(tokens), ['/* unterminated']);
+    });
+
+    test('consecutive line comments', () {
+      final tokens = tokenize('// one\n// two\n// three', dart);
+      expect(_textsOf<Comment>(tokens), ['// one', '// two', '// three']);
+    });
+
+    test('hash comment (YAML)', () {
+      final tokens = tokenize('key: value # comment', yaml);
+      expect(_textsOf<Comment>(tokens), ['# comment']);
+    });
+
+    test('hash comment (shell)', () {
+      final tokens = tokenize('echo hi # comment', shell);
+      expect(_textsOf<Comment>(tokens), ['# comment']);
+    });
+
+    test('JSON has no comments', () {
+      final tokens = tokenize('// not a comment', json);
+      expect(_ofType<Comment>(tokens), isEmpty);
+    });
+  });
+
+  group('annotations', () {
+    test('Dart annotation', () {
+      final tokens = tokenize('@override', dart);
+      expect(_textsOf<Annotation>(tokens), ['@override']);
+    });
+
+    test('Dart annotation before declaration', () {
+      final tokens = tokenize('@deprecated void f() {}', dart);
+      expect(_textsOf<Annotation>(tokens), ['@deprecated']);
+    });
+
+    test('multiple annotations', () {
+      final tokens = tokenize('@immutable @sealed class X {}', dart);
+      expect(_textsOf<Annotation>(tokens), ['@immutable', '@sealed']);
+    });
+
+    test('Scala annotation', () {
+      final tokens = tokenize('@tailrec def f(): Unit = ???', scala);
+      expect(_textsOf<Annotation>(tokens), ['@tailrec']);
+    });
+
+    test('no annotations in YAML', () {
+      final tokens = tokenize('@value', yaml);
+      expect(_ofType<Annotation>(tokens), isEmpty);
+    });
+
+    test('no annotations in JSON', () {
+      final tokens = tokenize('@value', json);
+      expect(_ofType<Annotation>(tokens), isEmpty);
+    });
+  });
+
+  group('punctuation', () {
+    test('parentheses', () {
+      final tokens = tokenize('()', dart);
+      expect(_textsOf<Punctuation>(tokens), ['(', ')']);
+    });
+
+    test('braces', () {
+      final tokens = tokenize('{}', dart);
+      expect(_textsOf<Punctuation>(tokens), ['{', '}']);
+    });
+
+    test('brackets', () {
+      final tokens = tokenize('[]', dart);
+      expect(_textsOf<Punctuation>(tokens), ['[', ']']);
+    });
+
+    test('mixed punctuation', () {
+      final tokens = tokenize('f(x, y);', dart);
+      expect(_textsOf<Punctuation>(tokens), ['(', ',', ')', ';']);
+    });
+
+    test('operators are Operator, not Punctuation', () {
+      final tokens = tokenize('a + b * c', dart);
+      expect(_textsOf<Punctuation>(tokens), isEmpty);
+      expect(_textsOf<Operator>(tokens), ['+', '*']);
+    });
+
+    test('JSON punctuation', () {
+      final tokens = tokenize('{[]:,}', json);
+      expect(_textsOf<Punctuation>(tokens), ['{', '[', ']', ':', ',', '}']);
+    });
+
+    test('YAML colon', () {
+      final tokens = tokenize('key: value', yaml);
+      expect(_textsOf<Punctuation>(tokens), [':']);
+    });
+  });
+
+  group('whitespace', () {
+    test('spaces', () {
+      final tokens = tokenize('a  b', dart);
+      expect(tokens[1], isA<Whitespace>());
+      expect(tokens[1].text, '  ');
+    });
+
+    test('tabs', () {
+      final tokens = tokenize('a\tb', dart);
+      expect(tokens[1], isA<Whitespace>());
+      expect(tokens[1].text, '\t');
+    });
+
+    test('newlines', () {
+      final tokens = tokenize('a\nb', dart);
+      expect(tokens[1], isA<Whitespace>());
+      expect(tokens[1].text, '\n');
+    });
+
+    test('mixed whitespace collapsed', () {
+      final tokens = tokenize('a \t\n b', dart);
+      expect(tokens[1], isA<Whitespace>());
+      expect(tokens[1].text, ' \t\n ');
+    });
+
+    test('only whitespace', () {
+      final tokens = tokenize('   ', dart);
+      expect(tokens.length, 1);
+      expect(tokens.first, isA<Whitespace>());
+    });
+  });
+
+  // ---------------------------------------------------------------------------
+  // Language-specific integration tests
+  // ---------------------------------------------------------------------------
+
+  group('Dart integration', () {
+    test('full function', () {
+      const source = '''
+Future<int> compute(List<String> args) async {
+  final result = await fetch("url");
+  if (result == null) return -1;
+  // process
+  return result.length;
+}
+''';
+      _expectLossless(source, dart);
+      final tokens = tokenize(source, dart);
+      expect(_textsOf<Keyword>(tokens), contains('async'));
+      expect(_textsOf<Keyword>(tokens), contains('await'));
+      expect(_textsOf<Keyword>(tokens), contains('if'));
+      expect(_textsOf<Keyword>(tokens), contains('return'));
+      expect(_textsOf<TypeName>(tokens), contains('Future'));
+      expect(_textsOf<TypeName>(tokens), contains('int'));
+      expect(_textsOf<TypeName>(tokens), contains('List'));
+      expect(_textsOf<TypeName>(tokens), contains('String'));
+      expect(_textsOf<StringLit>(tokens), ['"url"']);
+      expect(_textsOf<NumberLit>(tokens), ['1']);
+      expect(_textsOf<Comment>(tokens), ['// process']);
+    });
+
+    test('sealed class with pattern matching', () {
+      const source = '''
+sealed class Shape {}
+final class Circle extends Shape {
+  final double radius;
+  const Circle(this.radius);
+}
+''';
+      _expectLossless(source, dart);
+      final tokens = tokenize(source, dart);
+      expect(_textsOf<Keyword>(tokens), contains('sealed'));
+      expect(_textsOf<Keyword>(tokens), contains('extends'));
+      expect(_textsOf<TypeName>(tokens), contains('double'));
+    });
+  });
+
+  group('Scala integration', () {
+    test('case class and match', () {
+      const source = '''
+case class Point(x: Int, y: Int)
+
+val p = Point(1, 2)
+val desc = p match
+  case Point(0, 0) => "origin"
+  case Point(x, y) => s"(\$x, \$y)"
+''';
+      _expectLossless(source, scala);
+      final tokens = tokenize(source, scala);
+      expect(_textsOf<Keyword>(tokens), contains('case'));
+      expect(_textsOf<Keyword>(tokens), contains('class'));
+      expect(_textsOf<Keyword>(tokens), contains('val'));
+      expect(_textsOf<Keyword>(tokens), contains('match'));
+      expect(_textsOf<TypeName>(tokens), contains('Int'));
+    });
+  });
+
+  group('YAML integration', () {
+    test('nested structure', () {
+      const source = '''
+server:
+  host: "localhost"
+  port: 8080
+  debug: true
+  tags:
+    - web
+    - api
+''';
+      _expectLossless(source, yaml);
+      final tokens = tokenize(source, yaml);
+      expect(_textsOf<StringLit>(tokens), ['"localhost"']);
+      expect(_textsOf<NumberLit>(tokens), ['8080']);
+      expect(_textsOf<Keyword>(tokens), contains('true'));
+    });
+  });
+
+  group('JSON integration', () {
+    test('nested object', () {
+      const source = '{"a": 1, "b": [true, false, null], "c": "text"}';
+      _expectLossless(source, json);
+      final tokens = tokenize(source, json);
+      expect(_textsOf<Keyword>(tokens), ['true', 'false', 'null']);
+      expect(_textsOf<NumberLit>(tokens), ['1']);
+      expect(_textsOf<StringLit>(tokens), ['"a"', '"b"', '"c"', '"text"']);
+    });
+  });
+
+  group('shell integration', () {
+    test('script with conditionals and loops', () {
+      const source = '''
+if [ -d "build" ]; then
+  for f in build/*; do
+    echo "removing \$f"
+  done
+fi
+''';
+      _expectLossless(source, shell);
+      final tokens = tokenize(source, shell);
+      expect(_textsOf<Keyword>(tokens), contains('if'));
+      expect(_textsOf<Keyword>(tokens), contains('then'));
+      expect(_textsOf<Keyword>(tokens), contains('for'));
+      expect(_textsOf<Keyword>(tokens), contains('do'));
+      expect(_textsOf<Keyword>(tokens), contains('done'));
+      expect(_textsOf<Keyword>(tokens), contains('fi'));
+    });
+  });
+
+  // ---------------------------------------------------------------------------
+  // Custom grammars
+  // ---------------------------------------------------------------------------
+
+  group('custom grammar', () {
+    test('minimal grammar', () {
+      const minimal = LangGrammar(name: 'minimal');
+      const source = 'hello 42 "world"';
+      _expectLossless(source, minimal);
+    });
+
+    test('custom keywords', () {
+      const custom = LangGrammar(
+        name: 'custom',
+        keywords: ['fn', 'let'],
+        types: ['u32'],
+      );
+      final tokens = tokenize('fn main() { let x: u32 = 1; }', custom);
+      expect(_textsOf<Keyword>(tokens), ['fn', 'let']);
+      expect(_textsOf<TypeName>(tokens), ['u32']);
+    });
+
+    test('custom comment syntax', () {
+      const custom = LangGrammar(
+        name: 'custom',
+        lineComment: '--',
+        blockComment: ('{-', '-}'),
+      );
+      final tokens = tokenize('x -- line comment\ny {- block -} z', custom);
+      expect(_textsOf<Comment>(tokens), ['-- line comment', '{- block -}']);
+    });
+  });
+
+  // ---------------------------------------------------------------------------
+  // grammarFor lookup
+  // ---------------------------------------------------------------------------
+
+  group('grammarFor', () {
+    test('dart', () {
+      expect(grammarFor('dart')?.name, 'dart');
+    });
+
+    test('scala', () {
+      expect(grammarFor('scala')?.name, 'scala');
+    });
+
+    test('yaml aliases', () {
+      expect(grammarFor('yaml')?.name, 'yaml');
+      expect(grammarFor('yml')?.name, 'yaml');
+    });
+
+    test('json', () {
+      expect(grammarFor('json')?.name, 'json');
+    });
+
+    test('shell aliases', () {
+      expect(grammarFor('sh')?.name, 'shell');
+      expect(grammarFor('bash')?.name, 'shell');
+      expect(grammarFor('shell')?.name, 'shell');
+      expect(grammarFor('zsh')?.name, 'shell');
+    });
+
+    test('unknown returns null', () {
+      expect(grammarFor('brainfuck'), isNull);
+      expect(grammarFor(''), isNull);
+    });
+  });
+
+  // ---------------------------------------------------------------------------
+  // Edge cases
+  // ---------------------------------------------------------------------------
+
+  group('edge cases', () {
+    test('empty input returns empty list', () {
+      expect(tokenize('', dart), isEmpty);
+    });
+
+    test('single keyword', () {
+      final tokens = tokenize('if', dart);
+      expect(tokens.length, 1);
+      expect(tokens.first, isA<Keyword>());
+    });
+
+    test('single number', () {
+      final tokens = tokenize('42', dart);
+      expect(tokens.length, 1);
+      expect(tokens.first, isA<NumberLit>());
+    });
+
+    test('single string', () {
+      final tokens = tokenize('"x"', dart);
+      expect(tokens.length, 1);
+      expect(tokens.first, isA<StringLit>());
+    });
+
+    test('single comment', () {
+      final tokens = tokenize('// comment', dart);
+      expect(tokens.length, 1);
+      expect(tokens.first, isA<Comment>());
+    });
+
+    test('number then dot then identifier is not float', () {
+      final tokens = tokenize('x.length', dart);
+      expect(_textsOf<Identifier>(tokens), ['x', 'length']);
+      expect(_textsOf<Punctuation>(tokens), ['.']);
+    });
+
+    test('annotation without following identifier is Plain', () {
+      // `@` is only valid as an annotation prefix in Dart. On its own
+      // It falls through to Plain.
+      final tokens = tokenize('@ ', dart);
+      expect(_ofType<Annotation>(tokens), isEmpty);
+      expect(_textsOf<Plain>(tokens), ['@']);
+    });
+
+    test('special characters merged into single Plain token', () {
+      final tokens = tokenize('\u00e9\u00e8\u00ea', dart);
+      expect(tokens.length, 1);
+      expect(tokens.first, isA<Plain>());
+      expect(tokens.first.text, '\u00e9\u00e8\u00ea');
+    });
+
+    test('very long input', () {
+      final source = 'var x = ${List.filled(1000, '42').join(' + ')};';
+      _expectLossless(source, dart);
+    });
+  });
+
+  // ---------------------------------------------------------------------------
+  // tokenizeSpans: byte offsets into source.
+  // ---------------------------------------------------------------------------
+
+  void expectSpanInvariants(String source, LangGrammar grammar) {
+    final spans = tokenizeSpans(source, grammar);
+    if (source.isEmpty) {
+      expect(spans, isEmpty);
+      return;
+    }
+    // Anchored.
+    expect(spans.first.start, 0, reason: 'first span starts at 0');
+    expect(
+      spans.last.end,
+      source.length,
+      reason: 'last span ends at source.length',
+    );
+    // Contiguous.
+    for (var i = 0; i + 1 < spans.length; i++) {
+      expect(
+        spans[i].end,
+        spans[i + 1].start,
+        reason: 'spans[$i].end == spans[${i + 1}].start',
+      );
+    }
+    // Text matches substring.
+    for (final s in spans) {
+      expect(
+        source.substring(s.start, s.end),
+        s.token.text,
+        reason: 'substring matches token text at [${s.start}, ${s.end})',
+      );
+      expect(s.length, s.end - s.start, reason: 'length == end - start');
+    }
+    // Lossless (already covered by substring-matches + contiguous, but
+    // explicit for readability).
+    expect(
+      spans.map((s) => s.token.text).join(),
+      source,
+      reason: 'lossless join',
+    );
+  }
+
+  group('tokenizeSpans invariants', () {
+    test('empty source returns empty list', () {
+      expect(tokenizeSpans('', dart), isEmpty);
+    });
+
+    test('single-character source', () {
+      expectSpanInvariants('x', dart);
+    });
+
+    test('Dart snippet', () {
+      const source = '''
+void main() {
+  final x = 42;
+  // greeting
+  print("hello \$x");
+}
+''';
+      expectSpanInvariants(source, dart);
+    });
+
+    test('Scala snippet', () {
+      const source = '''
+object Main:
+  def run(args: List[String]): Unit =
+    val x: Int = 42
+''';
+      expectSpanInvariants(source, scala);
+    });
+
+    test('YAML snippet', () {
+      const source = '''
+name: rumil_tokens
+version: 0.6.0
+# comment
+''';
+      expectSpanInvariants(source, yaml);
+    });
+
+    test('JSON snippet', () {
+      expectSpanInvariants('{"a": 1, "b": true, "c": null}', json);
+    });
+
+    test('shell snippet', () {
+      const source = '''
+# deploy
+for f in *.dart; do
+  echo "\$f"
+done
+''';
+      expectSpanInvariants(source, shell);
+    });
+
+    test('unterminated string preserves end-of-source anchor', () {
+      expectSpanInvariants('var s = "unterminated', dart);
+    });
+
+    test('unterminated block comment preserves end-of-source anchor', () {
+      expectSpanInvariants('/* unterminated', dart);
+    });
+
+    test('only whitespace', () {
+      expectSpanInvariants('   \t\n  ', dart);
+    });
+
+    test('only punctuation', () {
+      expectSpanInvariants('(){}[]<>;:,.', dart);
+    });
+  });
+
+  group('tokenizeSpans parity with tokenize', () {
+    // Token has no == / hashCode so we compare by (runtimeType, text).
+    (Type, String) key(Token t) => (t.runtimeType, t.text);
+    void expectParity(String source, LangGrammar grammar) {
+      final tokens = tokenize(source, grammar);
+      final spans = tokenizeSpans(source, grammar);
+      expect(
+        spans.map((s) => key(s.token)).toList(),
+        tokens.map(key).toList(),
+        reason: 'tokenizeSpans token sequence matches tokenize',
+      );
+    }
+
+    test('Dart', () {
+      expectParity('void f() { return 42; }', dart);
+    });
+
+    test('Scala', () {
+      expectParity('val x: Int = 42', scala);
+    });
+
+    test('YAML', () {
+      expectParity('key: "value" # c\n', yaml);
+    });
+
+    test('JSON', () {
+      expectParity('[1, 2, 3]', json);
+    });
+
+    test('shell', () {
+      expectParity('if [ -f "x" ]; then echo hi; fi', shell);
+    });
+  });
+
+  group('tokenizeSpans span boundaries', () {
+    test('keyword span covers exact characters', () {
+      final spans = tokenizeSpans('if x', dart);
+      expect(spans[0].token, isA<Keyword>());
+      expect(spans[0].start, 0);
+      expect(spans[0].end, 2);
+    });
+
+    test('whitespace span covers exact characters', () {
+      final spans = tokenizeSpans('a  b', dart);
+      expect(spans[1].token, isA<Whitespace>());
+      expect(spans[1].start, 1);
+      expect(spans[1].end, 3);
+    });
+
+    test('number span covers exact characters', () {
+      final spans = tokenizeSpans('x = 42', dart);
+      final number = spans.firstWhere((s) => s.token is NumberLit);
+      expect(number.start, 4);
+      expect(number.end, 6);
+    });
+
+    test('string span includes delimiters', () {
+      final spans = tokenizeSpans('"hi"', dart);
+      expect(spans.single.token, isA<StringLit>());
+      expect(spans.single.start, 0);
+      expect(spans.single.end, 4);
+    });
+
+    test('line comment span runs to end of line (exclusive of newline)', () {
+      final spans = tokenizeSpans('// note\nx', dart);
+      final comment = spans.firstWhere((s) => s.token is Comment);
+      expect(comment.start, 0);
+      expect(comment.end, 7);
+      // Newline is a separate Whitespace token.
+      expect(spans[1].token, isA<Whitespace>());
+      expect(spans[1].start, 7);
+    });
+
+    test('merged Plain spans cover the full run', () {
+      // Unicode chars the tokenizer doesn't classify → Plain tokens that
+      // get merged. The merged span must cover the whole run.
+      const source = 'éèê';
+      final spans = tokenizeSpans(source, dart);
+      expect(spans, hasLength(1));
+      expect(spans.single.token, isA<Plain>());
+      expect(spans.single.start, 0);
+      expect(spans.single.end, source.length);
+      expect(spans.single.token.text, source);
+    });
+
+    test('annotation span includes prefix', () {
+      final spans = tokenizeSpans('@override', dart);
+      expect(spans.single.token, isA<Annotation>());
+      expect(spans.single.start, 0);
+      expect(spans.single.end, 9);
+    });
+  });
+
+  group('Spanned generics', () {
+    test('Spanned<Token> exposes record fields through getters', () {
+      const s = Spanned<Token>.of(Keyword('if'), 3, 5);
+      expect(s.token.text, 'if');
+      expect(s.start, 3);
+      expect(s.end, 5);
+      expect(s.length, 2);
+    });
+
+    test('narrow type parameter upcasts to Spanned<Token>', () {
+      const kw = Spanned<Keyword>.of(Keyword('if'), 0, 2);
+      // Covariance through the record type parameter.
+      const Spanned<Token> wide = kw;
+      expect(wide.token, isA<Keyword>());
+      expect(wide.start, 0);
+    });
+  });
+
+  // ---------------------------------------------------------------------------
+  // Grammar correctness (0.6.0 Path C fixes)
+  // ---------------------------------------------------------------------------
+
+  group('Dart raw strings', () {
+    test("r'...' is one StringLit including the r prefix", () {
+      final tokens = tokenize("r'no\\escape'", dart);
+      expect(_textsOf<StringLit>(tokens), ["r'no\\escape'"]);
+      expect(_ofType<Identifier>(tokens), isEmpty);
+    });
+
+    test('r"..." is one StringLit including the r prefix', () {
+      final tokens = tokenize('r"no\\escape"', dart);
+      expect(_textsOf<StringLit>(tokens), ['r"no\\escape"']);
+    });
+
+    test("r'''...''' (triple-single) is one StringLit", () {
+      final tokens = tokenize("r'''no\\escape'''", dart);
+      expect(_textsOf<StringLit>(tokens), ["r'''no\\escape'''"]);
+    });
+
+    test('r"""..." (triple-double) is one StringLit', () {
+      final tokens = tokenize('r"""no\\escape"""', dart);
+      expect(_textsOf<StringLit>(tokens), ['r"""no\\escape"""']);
+    });
+
+    test('non-raw identifier r followed by space is Identifier', () {
+      final tokens = tokenize("r 'x'", dart);
+      expect(_textsOf<Identifier>(tokens), ['r']);
+      expect(_textsOf<StringLit>(tokens), ["'x'"]);
+    });
+  });
+
+  group('Scala backtick identifiers', () {
+    test('`type` is one Identifier even though `type` is a keyword', () {
+      final tokens = tokenize('val `type` = 1', scala);
+      expect(_textsOf<Identifier>(tokens), ['`type`']);
+      // `type` inside backticks must NOT appear as a separate keyword.
+      expect(_textsOf<Keyword>(tokens), ['val']);
+    });
+
+    test('unterminated backtick identifier is tolerated', () {
+      final tokens = tokenize('val `noclose', scala);
+      expect(_textsOf<Identifier>(tokens), ['`noclose']);
+    });
+
+    test('backtick identifier with spaces', () {
+      final tokens = tokenize('val `hello world` = 1', scala);
+      expect(_textsOf<Identifier>(tokens), ['`hello world`']);
+    });
+  });
+
+  group('Scala string interpolator prefix', () {
+    test('s"..." is one StringLit including the s prefix', () {
+      final tokens = tokenize(r'val s = s"hi $name"', scala);
+      expect(_textsOf<StringLit>(tokens), ['s"hi \$name"']);
+      // Only `s` as in `val s = ...` should be an Identifier, not the
+      // interpolator prefix.
+      expect(_textsOf<Identifier>(tokens), ['s']);
+    });
+
+    test('f"..." is one StringLit including the f prefix', () {
+      final tokens = tokenize(r'val x = f"$v%.2f"', scala);
+      expect(_textsOf<StringLit>(tokens).last, 'f"\$v%.2f"');
+    });
+
+    test('arbitrary identifier prefix (my_interp"..."):', () {
+      final tokens = tokenize('val x = my_interp"body"', scala);
+      expect(_textsOf<StringLit>(tokens), ['my_interp"body"']);
+    });
+
+    test('triple-quoted with prefix (raw"""...""")', () {
+      final tokens = tokenize('val x = raw"""body"""', scala);
+      expect(_textsOf<StringLit>(tokens), ['raw"""body"""']);
+    });
+
+    test('no prefix still works (plain "...")', () {
+      final tokens = tokenize('val x = "body"', scala);
+      expect(_textsOf<StringLit>(tokens), ['"body"']);
+    });
+  });
+
+  group('JSON negative numbers', () {
+    test('-1 is one NumberLit', () {
+      final tokens = tokenize('{"n": -1}', json);
+      expect(_textsOf<NumberLit>(tokens), ['-1']);
+      // No separate `-` token anywhere.
+      expect(_ofType<Operator>(tokens), isEmpty);
+      expect(_textsOf<Plain>(tokens), isEmpty);
+    });
+
+    test('-3.14 is one NumberLit', () {
+      final tokens = tokenize('{"n": -3.14}', json);
+      expect(_textsOf<NumberLit>(tokens), ['-3.14']);
+    });
+
+    test('-1e10 is one NumberLit', () {
+      final tokens = tokenize('{"n": -1e10}', json);
+      expect(_textsOf<NumberLit>(tokens), ['-1e10']);
+    });
+
+    test('positive numbers still work', () {
+      final tokens = tokenize('{"n": 42}', json);
+      expect(_textsOf<NumberLit>(tokens), ['42']);
+    });
+  });
+
+  group('YAML flow collections', () {
+    test('flow sequence [1, 2, 3] classifies as punctuation', () {
+      final tokens = tokenize('[1, 2, 3]', yaml);
+      expect(_textsOf<Punctuation>(tokens), ['[', ',', ',', ']']);
+      expect(_textsOf<NumberLit>(tokens), ['1', '2', '3']);
+    });
+
+    test('flow map {a: 1} classifies as punctuation', () {
+      final tokens = tokenize('{a: 1}', yaml);
+      expect(_textsOf<Punctuation>(tokens), ['{', ':', '}']);
+    });
+
+    test('YAML 1.1 keywords removed: yes/no/on/off are identifiers', () {
+      final tokens = tokenize('a: yes\nb: no\nc: on\nd: off', yaml);
+      // In YAML 1.2 these are strings (we treat as identifiers for highlighting).
+      expect(_textsOf<Keyword>(tokens), isEmpty);
+      expect(_textsOf<Identifier>(tokens), [
+        'a',
+        'yes',
+        'b',
+        'no',
+        'c',
+        'on',
+        'd',
+        'off',
+      ]);
+    });
+
+    test('YAML 1.2 booleans still classified as Keyword', () {
+      final tokens = tokenize('a: true\nb: false\nc: null', yaml);
+      expect(_textsOf<Keyword>(tokens), ['true', 'false', 'null']);
+    });
+  });
+
+  group('Operator vs Punctuation classification', () {
+    test('Dart: + and * are Operator, not Punctuation', () {
+      final tokens = tokenize('a + b * c', dart);
+      expect(_textsOf<Operator>(tokens), ['+', '*']);
+      expect(_ofType<Punctuation>(tokens), isEmpty);
+    });
+
+    test('Dart: parens and comma are Punctuation', () {
+      final tokens = tokenize('f(1, 2)', dart);
+      expect(_textsOf<Punctuation>(tokens), ['(', ',', ')']);
+      expect(_ofType<Operator>(tokens), isEmpty);
+    });
+
+    test('Dart: multi-char operators coalesce into one token', () {
+      final tokens = tokenize('a == b && c', dart);
+      expect(_textsOf<Operator>(tokens), ['==', '&&']);
+    });
+
+    test('Dart: arrow => is one Operator', () {
+      final tokens = tokenize('(x) => x', dart);
+      expect(_textsOf<Operator>(tokens), ['=>']);
+    });
+
+    test('Scala: <- is one Operator (for-comprehensions)', () {
+      final tokens = tokenize('for { x <- xs }', scala);
+      expect(_textsOf<Operator>(tokens), ['<-']);
+    });
+
+    test('JSON: no operators (no operator classification happens)', () {
+      final tokens = tokenize('{"n": 1}', json);
+      expect(_ofType<Operator>(tokens), isEmpty);
+    });
+  });
+
+  group('Shell variables', () {
+    test('bare \$NAME is one Variable', () {
+      final tokens = tokenize(r'echo $HOME', shell);
+      expect(_textsOf<Variable>(tokens), [r'$HOME']);
+      // $ must not leak out as a separate Plain token.
+      expect(_textsOf<Plain>(tokens), isEmpty);
+    });
+
+    test(r'${NAME} (braced) is one Variable', () {
+      final tokens = tokenize(r'echo ${HOME}', shell);
+      expect(_textsOf<Variable>(tokens), [r'${HOME}']);
+    });
+
+    test(r'${NAME:-default} captures full expansion', () {
+      final tokens = tokenize(r'echo ${X:-hi}', shell);
+      expect(_textsOf<Variable>(tokens), [r'${X:-hi}']);
+    });
+
+    test(r'${#NAME} (string length) captures full expansion', () {
+      final tokens = tokenize(r'echo ${#PATH}', shell);
+      expect(_textsOf<Variable>(tokens), [r'${#PATH}']);
+    });
+
+    test('special parameters: \$1, \$@, \$?, \$\$', () {
+      final tokens = tokenize(r'echo $1 $@ $? $$', shell);
+      expect(_textsOf<Variable>(tokens), [r'$1', r'$@', r'$?', r'$$']);
+    });
+
+    test(r'lone $ before ( (for $(...)) emits $ as Variable', () {
+      final tokens = tokenize(r'echo $(ls)', shell);
+      final vars = _textsOf<Variable>(tokens);
+      expect(vars, [r'$']);
+      expect(_textsOf<Punctuation>(tokens), ['(', ')']);
+      expect(_textsOf<Identifier>(tokens), ['echo', 'ls']);
+    });
+
+    test('unterminated \${ is tolerated', () {
+      final tokens = tokenize(r'echo ${foo', shell);
+      expect(_textsOf<Variable>(tokens), [r'${foo']);
+    });
+
+    test('Dart: \$ in identifier does NOT produce Variable', () {
+      final tokens = tokenize(r'var $x = 1;', dart);
+      // Dart allows $ in idents; no Variable classification.
+      expect(_ofType<Variable>(tokens), isEmpty);
+      expect(_textsOf<Identifier>(tokens), [r'$x']);
+    });
+  });
+
+  group('Shell backtick command substitution', () {
+    test('backticks classified as Punctuation', () {
+      final tokens = tokenize('echo `ls`', shell);
+      expect(_textsOf<Punctuation>(tokens), ['`', '`']);
+      expect(_textsOf<Identifier>(tokens), ['echo', 'ls']);
+    });
+
+    test('non-shell grammars do not recognize backticks as punctuation', () {
+      final tokens = tokenize('x `y`', json);
+      // backtick falls through to Plain in JSON.
+      expect(_textsOf<Plain>(tokens), contains('`'));
+    });
+  });
+
+  group('Shell heredocs', () {
+    test('<<EOF ... EOF captures the full construct as StringLit', () {
+      const source = 'cat <<EOF\nhello\nEOF\n';
+      final tokens = tokenize(source, shell);
+      // The heredoc is one StringLit covering `<<EOF\nhello\nEOF\n`.
+      final heredoc = tokens.whereType<StringLit>().single;
+      expect(heredoc.text, startsWith('<<EOF'));
+      expect(heredoc.text, contains('hello'));
+      expect(heredoc.text, endsWith('EOF\n'));
+    });
+
+    test('<<-EOF tab-stripped terminator', () {
+      const source = 'cat <<-EOF\n\tbody\n\tEOF\n';
+      final tokens = tokenize(source, shell);
+      final heredoc = tokens.whereType<StringLit>().single;
+      expect(heredoc.text, startsWith('<<-EOF'));
+      expect(heredoc.text, contains('\tbody'));
+    });
+
+    test("<<'EOF' single-quoted marker", () {
+      const source = "cat <<'EOF'\nbody\nEOF\n";
+      final tokens = tokenize(source, shell);
+      final heredoc = tokens.whereType<StringLit>().single;
+      expect(heredoc.text, startsWith("<<'EOF'"));
+      expect(heredoc.text, contains('body'));
+    });
+
+    test('unterminated heredoc consumes to end-of-source', () {
+      const source = 'cat <<EOF\nbody line\nanother line\n';
+      final tokens = tokenize(source, shell);
+      final heredoc = tokens.whereType<StringLit>().single;
+      expect(heredoc.text, startsWith('<<EOF'));
+      expect(heredoc.text, contains('another line'));
+    });
+
+    test('body containing EOF-ish lines that are not the terminator', () {
+      const source = 'cat <<EOF\nEOFISH\nEOF\n';
+      final tokens = tokenize(source, shell);
+      final heredoc = tokens.whereType<StringLit>().single;
+      expect(heredoc.text, contains('EOFISH'));
+      expect(heredoc.text, endsWith('EOF\n'));
+    });
+  });
+
+  group('Multi-char operators and generics (0.6.0 polish)', () {
+    test('Dart: Map<String, int>: < and > are Punctuation (generics)', () {
+      final tokens = tokenize('Map<String, int>', dart);
+      expect(_textsOf<Punctuation>(tokens), ['<', ',', '>']);
+      expect(_ofType<Operator>(tokens), isEmpty);
+    });
+
+    test('Dart: a <= b is one Operator', () {
+      final tokens = tokenize('a <= b', dart);
+      expect(_textsOf<Operator>(tokens), ['<=']);
+      expect(_ofType<Punctuation>(tokens), isEmpty);
+    });
+
+    test('Dart: a >= b is one Operator', () {
+      final tokens = tokenize('a >= b', dart);
+      expect(_textsOf<Operator>(tokens), ['>=']);
+    });
+
+    test('Dart: a ?? b is one Operator', () {
+      final tokens = tokenize('a ?? b', dart);
+      expect(_textsOf<Operator>(tokens), ['??']);
+    });
+
+    test('Dart: a?.b is one Operator', () {
+      final tokens = tokenize('a?.b', dart);
+      expect(_textsOf<Operator>(tokens), ['?.']);
+    });
+
+    test('Dart: nullable type String?: ? is Punctuation', () {
+      final tokens = tokenize('String? x', dart);
+      expect(_textsOf<Punctuation>(tokens), ['?']);
+    });
+
+    test('Dart: arrow => is one Operator, not < plus =', () {
+      final tokens = tokenize('(x) => x', dart);
+      expect(_textsOf<Operator>(tokens), ['=>']);
+    });
+
+    test('Dart: compound assign += -= *= etc', () {
+      final tokens = tokenize('x += 1; y -= 2; z *= 3;', dart);
+      expect(_textsOf<Operator>(tokens), ['+=', '-=', '*=']);
+    });
+
+    test('Dart: ??= compound assign', () {
+      final tokens = tokenize('x ??= 1', dart);
+      expect(_textsOf<Operator>(tokens), ['??=']);
+    });
+
+    test('Dart: x=-1 tokenizes as three tokens', () {
+      final tokens = tokenize('x=-1', dart);
+      expect(_textsOf<Operator>(tokens), ['=', '-']);
+      expect(_textsOf<NumberLit>(tokens), ['1']);
+    });
+
+    test('Scala: <- is one Operator', () {
+      final tokens = tokenize('for { x <- xs }', scala);
+      expect(_textsOf<Operator>(tokens), ['<-']);
+    });
+
+    test('Scala: -> is one Operator', () {
+      final tokens = tokenize('val m = Map(1 -> "a")', scala);
+      expect(_textsOf<Operator>(tokens), contains('->'));
+    });
+
+    test('Scala: :: is one Operator', () {
+      final tokens = tokenize('1 :: Nil', scala);
+      expect(_textsOf<Operator>(tokens), ['::']);
+    });
+
+    test('Shell: && is one Operator', () {
+      final tokens = tokenize('a && b', shell);
+      expect(_textsOf<Operator>(tokens), ['&&']);
+    });
+
+    test('Shell: || is one Operator', () {
+      final tokens = tokenize('a || b', shell);
+      expect(_textsOf<Operator>(tokens), ['||']);
+    });
+  });
+
+  group('lossless roundtrip under new grammar rules', () {
+    // Every grammar-fix input must still round-trip losslessly.
+    test('Dart raw strings round-trip', () {
+      _expectLossless("r'no\\escape' + r\"also\" + r'''triple'''", dart);
+    });
+
+    test('Scala interpolators round-trip', () {
+      _expectLossless(r'val s = s"hi $name"; val f = f"$x%.2f"', scala);
+    });
+
+    test('Scala backtick idents round-trip', () {
+      _expectLossless('val `type` = 1; val `hello world` = 2', scala);
+    });
+
+    test('JSON negatives round-trip', () {
+      _expectLossless('{"a": -1, "b": -3.14, "c": -1e10}', json);
+    });
+
+    test('YAML flow collections round-trip', () {
+      _expectLossless('a: [1, 2, 3]\nb: {x: 1, y: 2}\n', yaml);
+    });
+
+    test('shell variables round-trip', () {
+      _expectLossless(r'echo $HOME ${PATH:-/bin} $(ls) `pwd`', shell);
+    });
+
+    test('shell heredocs round-trip', () {
+      _expectLossless('cat <<EOF\nhello\nworld\nEOF\n', shell);
+      _expectLossless('cat <<-EOF\n\tindented\n\tEOF\n', shell);
+      _expectLossless("cat <<'EOF'\nno expansion\nEOF\n", shell);
+    });
+  });
+}
diff --git a/rumil_tokens/tool/grammar_audit.dart b/rumil_tokens/tool/grammar_audit.dart
new file mode 100644
index 0000000..5c648be
--- /dev/null
+++ b/rumil_tokens/tool/grammar_audit.dart
@@ -0,0 +1,52 @@
+// Diagnostic probe: dumps the token stream produced by each built-in
+// grammar for a fixed set of inputs.
+//
+// Run: dart run tool/grammar_audit.dart
+
+import 'package:rumil_tokens/rumil_tokens.dart';
+
+void dump(String label, String src, LangGrammar g) {
+  print('--- $label ---');
+  print('  source: ${src.replaceAll("\n", r"\n")}');
+  for (final t in tokenize(src, g)) {
+    if (t is Whitespace) continue;
+    print('  ${t.runtimeType.toString().padRight(12)} ${t.text}');
+  }
+  print('');
+}
+
+void main() {
+  dump('dart raw string', r"final r = r'no\escape';", dart);
+  dump('dart interp \$var', r'final s = "hi $name";', dart);
+  dump('dart interp \${expr}', r'final s = "hi ${name.x}";', dart);
+  dump('dart negative num', 'final n = -42;', dart);
+  dump('dart records', 'final r = (1, "x");', dart);
+  dump('dart triple-single raw', r"final r = r'''no\escape''';", dart);
+
+  dump('scala s-string', r'val s = s"hi $name"', scala);
+  dump('scala f-string', r'val f = f"$x%.2f"', scala);
+  dump('scala backtick ident', 'val `type` = 1', scala);
+  dump('scala symbol literal', "val s = 'sym", scala);
+
+  dump('yaml block |', 'text: |\n  hello\n  world\n', yaml);
+  dump('yaml block >', 'text: >\n  folded\n  lines\n', yaml);
+  dump('yaml anchor/ref', 'foo: &a 1\nbar: *a\n', yaml);
+  dump('yaml doc sep', '---\nkey: value\n', yaml);
+  dump('yaml plain scalar', 'key: value\n', yaml);
+  dump('yaml flow seq', '[1, 2, 3]\n', yaml);
+  dump('yaml flow map', '{a: 1}\n', yaml);
+
+  dump('json hex', '{"n": 0xFF}', json);
+  dump('json negative num', '{"n": -1}', json);
+  dump('json line-comment', '{"a": 1} // c', json);
+  dump('json exponent', '{"n": 1e10}', json);
+
+  dump('sh variable', r'echo $HOME', shell);
+  dump('sh var braces', r'echo ${HOME}', shell);
+  dump('sh command sub', r'echo $(ls)', shell);
+  dump('sh backtick sub', 'echo `ls`', shell);
+  dump('sh heredoc', 'cat <<EOF\nbody\nEOF\n', shell);
+  dump('sh fn def', 'greet() { echo hi; }', shell);
+  dump('sh test brackets', '[ -f "x" ]', shell);
+  dump('sh arithmetic', r'x=$((1+2))', shell);
+}