diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000000..2fb833a5df --- /dev/null +++ b/.clang-format @@ -0,0 +1,2 @@ +# http://clang.llvm.org/docs/ClangFormatStyleOptions.html +BasedOnStyle: Chromium diff --git a/wgsl/Makefile b/wgsl/Makefile index 5374b9d6cf..6bd8439878 100644 --- a/wgsl/Makefile +++ b/wgsl/Makefile @@ -1,14 +1,14 @@ .PHONY: all clean nfkc validate lalr validate-examples all: index.html nfkc validate test diagrams -validate: lalr validate-examples -validate-examples: grammar/grammar.js +validate: lalr unit_tests validate-examples clean: - rm -f index.html index.pre.html grammar/grammar.js wgsl.lalr.txt + rm -f index.html index.pre.html index.bs.pre grammar/grammar.js grammar/build wgsl.lalr.txt + # Generate spec HTML from Bikeshed source. -WGSL_SOURCES:=index.bs $(wildcard wgsl.*.bs.include) +WGSL_SOURCES:=index.bs scanner.cc wgsl.recursive.bs.include wgsl.reserved.bs.include index.pre.html: $(WGSL_SOURCES) DIE_ON=everything bash ../tools/invoke-bikeshed.sh $@ $(WGSL_SOURCES) @@ -23,14 +23,28 @@ diagrams: $(MERMAID_OUTPUTS) img/%.mmd.svg: diagrams/%.mmd ../tools/invoke-mermaid.sh ../tools/mermaid.json sh ../tools/invoke-mermaid.sh -i $< -o $@ -# Extract WGSL grammar from the spec, validate it with Treesitter, -# and use Treesitter to parse many code examples in the spec. -grammar/grammar.js: index.bs extract-grammar.py - python3 ./extract-grammar.py index.bs grammar/grammar.js +TREESITTER_GRAMMAR_INPUT := grammar/grammar.js +TREESITTER_PARSER := grammar/build/wgsl.so + +# Extract WGSL grammar from the spec, validate it by building a Treesitter parser from it. +$(TREESITTER_GRAMMAR_INPUT) $(TREESITTER_PARSER): index.bs scanner.cc extract-grammar.py + python3 ./extract-grammar.py --spec index.bs --scanner scanner.cc --tree-sitter-dir grammar --flow xb + +.PHONY: validate-examples +# Use Treesitter to parse many code examples in the spec. +validate-examples: $(TREESITTER_PARSER) + python3 ./extract-grammar.py --flow e + +.PHONY: unit_tests +# Use Treesitter to parse code samples +unit_tests: $(TREESITTER_PARSER) wgsl_unit_tests.py + python3 wgsl_unit_tests.py --parser $(TREESITTER_PARSER) # The grammar in JSON form, emitted by Treesitter. WGSL_GRAMMAR=grammar/src/grammar.json -$(WGSL_GRAMMAR) : grammar/grammar.js +$(WGSL_GRAMMAR) : $(TREESITTER_GRAMMAR_INPUT) + +wgsl_unit_tests: .PHONY: nfkc nfkc: diff --git a/wgsl/analyze/Grammar.py b/wgsl/analyze/Grammar.py index d051856205..f51b7a99ee 100755 --- a/wgsl/analyze/Grammar.py +++ b/wgsl/analyze/Grammar.py @@ -44,6 +44,7 @@ import json import functools +import sys from ObjectRegistry import RegisterableObject, ObjectRegistry from collections import defaultdict @@ -323,8 +324,25 @@ def with_meta(phrase,metachar,print_option): # Print ourselves if print_option.bikeshed: context = 'recursive descent syntax' - if print_option.grammar.rules[name].is_token(): + g = print_option.grammar + if g.rules[name].is_token(): context = 'syntax' + if name in g.extra_externals: + context = 'syntax_sym' + if name == '_disambiguate_template': + # This is an implementation detail, so make it invisible. + return '' + else: + without_underscore = ['_less_than', + '_less_than_equal', + '_greater_than', + '_greater_than_equal', + '_shift_left', + '_shift_left_assign', + '_shift_right', + '_shift_right_assign'] + if name in without_underscore: + name = name[1:] return "[={}/{}=]".format(context,name) return name if isinstance(rule,Choice): @@ -350,7 +368,7 @@ def with_meta(phrase,metachar,print_option): # If it's not canonical, then it can have nesting. return "(" + inside + nl + ")" if isinstance(rule,Seq): - return " ".join([i.pretty_str(print_option) for i in rule]) + return " ".join(filter(lambda i: len(i)>0, [i.pretty_str(print_option) for i in rule])) if isinstance(rule,Repeat1): return "( " + "".join([i.pretty_str(print_option) for i in rule]) + " )+" raise RuntimeError("unexpected node: {}".format(str(rule))) @@ -859,6 +877,21 @@ def is_accepting(self): def at_end(self): return self.position == len(self.items()) +def json_externals(json): + """ + Returns the set of names of symbols in the "externals" section of the + Treesitter JSON grammar. + + Data looks like this, for section "externals". + { + "externals": [ + { "type": "SYMBOL", name: "_block_comment" }, + { "type": "SYMBOL", name: "_error_sentinel" } + } + } + """ + return set([ x["name"] for x in json.get("externals",[]) ]) + def json_hook(grammar,memo,tokens_only,dct): """ @@ -1801,6 +1834,22 @@ def __init__(self, json_text, start_symbol, ignore='_reserved'): # First decode it without any interpretation. pass0 = json.loads(json_text) + + # Get the external tokens, these are not necessarily represented in the rules. + external_tokens = json_externals(pass0) + #print(external_tokens,file=sys.stderr) + defined_rules = set(pass0["rules"].keys()) + # The set of external tokens that don't have an ordinary definition in the grammar. + self.extra_externals = external_tokens - defined_rules + for e in self.extra_externals: + content = "\\u200B{}".format(e) + if e == '_disambiguate_template': + # This is a zero-width token used for Treesitter's benefit + #content = '' + pass + # Create a placholder definition + pass0["rules"][e] = {"type":"TOKEN","content":{"type":"PATTERN","value":content}} + # Remove any rules that should be ignored # The WGSL grammar has _reserved, which includes 'attribute' but # that is also the name of a different grammar rule. @@ -1922,6 +1971,7 @@ def pretty_str(self,print_option=PrintOption()): token_rules = set() + # Look for defined rules that look better as absorbed into their uses. for name, rule in self.rules.items(): # Star-able is also optional-able, so starrable must come first. starred_phrase = rule.as_starred(name) @@ -1938,6 +1988,8 @@ def pretty_str(self,print_option=PrintOption()): if len(phrase)==1 and phrase[0].is_token(): token_rules.add(name) + # A rule that was generated to satisfy canonicalization is better + # presented as absorbed in its original parent. for name, rule in self.rules.items(): # We only care about rules generated during canonicalization if name.find('.') > 0 or name.find('/') > 0: diff --git a/wgsl/extract-grammar.py b/wgsl/extract-grammar.py index 938c9fdae1..9329fe08eb 100755 --- a/wgsl/extract-grammar.py +++ b/wgsl/extract-grammar.py @@ -3,28 +3,50 @@ from datetime import date from string import Template +import argparse import os import re import subprocess import sys +import shutil +import wgsl_unit_tests +from distutils.ccompiler import new_compiler +from distutils.unixccompiler import UnixCCompiler from tree_sitter import Language, Parser -HEADER = """ -// Copyright (C) [$YEAR] World Wide Web Consortium, -// (Massachusetts Institute of Technology, European Research Consortium for -// Informatics and Mathematics, Keio University, Beihang). -// All Rights Reserved. -// -// This work is distributed under the W3C (R) Software License [1] in the hope -// that it will be useful, but WITHOUT ANY WARRANTY; without even the implied -// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -// -// [1] http://www.w3.org/Consortium/Legal/copyright-software - -// **** This file is auto-generated. Do not edit. **** - -""".lstrip() +class Options(): + def __init__(self,bs_filename, tree_sitter_dir, scanner_cc_filename): + self.script = 'extract-grammar.py' + self.bs_filename = bs_filename + self.grammar_dir = tree_sitter_dir + self.scanner_cc_filename = scanner_cc_filename + self.wgsl_shared_lib = os.path.join(self.grammar_dir,"build","wgsl.so") + self.grammar_filename = os.path.join(self.grammar_dir,"grammar.js") + self.verbose = False + + def __str__(self): + parts = [] + parts.append("script = {}".format(self.script)) + parts.append("bs_filename = {}".format(self.bs_filename)) + parts.append("grammar_dir = {}".format(self.grammar_dir)) + parts.append("grammar_filename = {}".format(self.grammar_filename)) + parts.append("scanner_cc_filename = {}".format(self.scanner_cc_filename)) + parts.append("wgsl_shared_lib = {}".format(self.wgsl_shared_lib)) + return "Options({})".format(",".join(parts)) + +def newer_than(first,second): + """ + Returns true if file 'first' is newer than 'second', + or if 'second' does not exist + """ + if not os.path.exists(first): + raise Exception("Missing file {}".format(first)) + if not os.path.exists(second): + return True + first_time = os.path.getmtime(first) + second_time = os.path.getmtime(second) + return first_time >= second_time def read_lines_from_file(filename, exclusions): @@ -34,7 +56,8 @@ def read_lines_from_file(filename, exclusions): """ file = open(filename, "r") # Break up the input into lines, and skip empty lines. - parts = [j for i in [i.split("\n") for i in file.readlines()] for j in i if len(j) > 0] + parts = [j for i in [i.split("\n") for i in file.readlines()] + for j in i if len(j) > 0] result = [] include_re = re.compile('path:\s+(\S+)') for line in parts: @@ -42,35 +65,17 @@ def read_lines_from_file(filename, exclusions): if m: included_file = m.group(1) if included_file not in exclusions: - print("including {}".format(included_file)) - result.extend(read_lines_from_file(included_file,exclusions)) + result.extend(read_lines_from_file(included_file, exclusions)) continue result.append(line) return result -scanner_lines = read_lines_from_file(sys.argv[1], {'wgsl.recursive.bs.include'}) - -# Skip lines like: -#
-#  
-scanner_lines = filter(lambda s: not s.startswith("") and not s.startswith("
', '', line) for line in scanner_lines]
-
-grammar_filename = sys.argv[2]
-grammar_path = os.path.dirname(grammar_filename)
-os.makedirs(grammar_path, exist_ok=True)
-grammar_file = open(grammar_filename, "w")
-
-# Global variable holding the current line text.
-line = ""
 
 """
 Scanner classes are used to parse contiguous sets of lines in the WGSL bikeshed
 source text.
 """
+
 class Scanner:
 
     @staticmethod
@@ -130,6 +135,9 @@ def parse(lines, i):
 
 
 class scanner_rule(Scanner):
+    """
+    A scanner that reads grammar rules from bikeshed source text.
+    """
     @staticmethod
     def name():
         return "rule"
@@ -188,7 +196,10 @@ def parse(lines, i):
         return (None, None, None)
 
 
-class scanner_example(Scanner):  # Not an example of a scanner, scanner of examples from specification
+class scanner_example(Scanner):
+    """
+    A scanner that reads WGSL program examples from bikeshed source text.
+    """
     @staticmethod
     def name():
         return "example"
@@ -231,134 +242,24 @@ def parse(lines, i):
         return (None, line, 0)
 
 
-scanner_spans = [scanner_rule,
-                 scanner_example]
-
-
-scanner_components = {i.name(): {} for i in scanner_spans}
-
-scanner_i = 0 # line number of the current line
-scanner_span = None
-scanner_record = False
-last_key = None   # The rule name, if the most recently parsed thing was a rule.
-last_value = None # The most recently parsed thing
-while scanner_i < len(scanner_lines):
-    # Try both the rule and the example scanners.
-    for j in scanner_spans:
-        scanner_begin = j.begin(scanner_lines, scanner_i)
-        if scanner_begin[0]:
-            # Use this scanner
-            scanner_span = None
-            scanner_record = False
-            last_key = None
-            last_value = None
-            scanner_span = j
-            if scanner_begin[1] != None:
-                last_key = scanner_begin[1]
-            scanner_i += scanner_begin[-1]
-        if scanner_span == j:
-            # Check if we should stop using this scanner.
-            scanner_end = j.end(scanner_lines, scanner_i)
-            if scanner_end[0]:
-                # Yes, stop using this scanner.
-                scanner_span = None
-                scanner_record = False
-                last_key = None
-                last_value = None
-                scanner_i += scanner_end[-1]
-    if scanner_span != None:
-        # We're are in the middle of scanning a span of lines.
-        if scanner_record:
-            scanner_skip = scanner_span.skip(scanner_lines, scanner_i)
-            if scanner_skip[0]:
-                # Stop recording
-                scanner_record = False
-                scanner_i += scanner_skip[-1]  # Advance past this line
-        else:
-            # Should we start recording?
-            scanner_record_value = scanner_span.record(
-                scanner_lines, scanner_i)
-            if scanner_record_value[0]:
-                # Start recording
-                scanner_record = True
-                if last_key != None and scanner_span.name() == "example":  # TODO Remove special case
-                    if last_key in scanner_components[scanner_span.name()]:
-                        raise RuntimeError("line " + str(scanner_i) + ": example with duplicate name: " + last_key)
-                    else:
-                        scanner_components[scanner_span.name()][last_key] = []
-                scanner_i += scanner_record_value[-1]
-        if scanner_record and scanner_span.valid(scanner_lines, scanner_i):
-            # Try parsing this line
-            scanner_parse = scanner_span.parse(scanner_lines, scanner_i)
-            if scanner_parse[2] < 0:
-                # This line continues the rule parsed on the immediately preceding lines.
-                if (scanner_parse[1] != None and
-                        last_key != None and
-                        last_value != None and
-                        last_key in scanner_components[scanner_span.name()] and
-                        len(scanner_components[scanner_span.name()][last_key]) > 0):
-                    scanner_components[scanner_span.name(
-                    )][last_key][-1] += scanner_parse[1]
-            else:
-                if scanner_parse[0] != None:
-                    # It's a rule, with name in the 0'th position.
-                    last_key = scanner_parse[0]
-                    if scanner_parse[1] != None:
-                        last_value = scanner_parse[1]
-                        if last_key not in scanner_components[scanner_span.name()]:
-                            # Create a new entry for this rule
-                            scanner_components[scanner_span.name()][last_key] = [
-                                last_value]
-                        else:
-                            # Append to the existing entry.
-                            scanner_components[scanner_span.name()][last_key].append(
-                                last_value)
-                    else:
-                        # Reset
-                        last_value = None
-                        scanner_components[scanner_span.name()][last_key] = []
-                else:
-                    # It's example text
-                    if scanner_parse[1] != None:
-                        last_value = scanner_parse[1]
-                        scanner_components[scanner_span.name()][last_key].append(
-                            last_value)
-                scanner_i += scanner_parse[-1] # Advance line index
-    scanner_i += 1
-
-
-grammar_source = ""
-
-grammar_source += r"""
-module.exports = grammar({
-    name: 'wgsl',
-
-    externals: $ => [
-        $._block_comment,
-    ],
-
-    extras: $ => [
-        $._comment,
-        $._block_comment,
-        $._blankspace,
-    ],
-
-    inline: $ => [
-        $.global_decl,
-        $._reserved,
-    ],
-
-    // WGSL has no parsing conflicts.
-    conflicts: $ => [],
-
-    word: $ => $.ident_pattern_token,
-
-    rules: {
-"""[1:-1]
-grammar_source += "\n"
-
+# These fixed tokens must be parsed by the custom scanner.
+# This is needed to support template disambiguation.
+custom_simple_tokens = {
+    '>': '_greater_than',
+    '>=': '_greater_than_equal',
+    '<': '_less_than',
+    '<=': '_less_than_equal',
+    '<<': '_shift_left',
+    '>>': '_shift_right',
+    '<<=': '_shift_left_assign',
+    '>>=': '_shift_right_assign'
+}
 
 def grammar_from_rule_item(rule_item):
+    """
+    Returns a string for the JavaScript expression for this rule.
+    """
+    global custom_simple_tokens
     result = ""
     item_choice = False
     items = []
@@ -369,16 +270,50 @@ def grammar_from_rule_item(rule_item):
         i_skip = 0
         i_item = ""
         if rule_item[i].startswith("[=syntax/"):
+            # From '[=syntax/foobar=]' pick out 'foobar'
             i_item = rule_item[i].split("[=syntax/")[1].split("=]")[0]
             i_item = f"$.{i_item}"
         elif rule_item[i].startswith("`/"):
+            # From "`/pattern/`" pick out '/pattern/'
             i_item = f"token({rule_item[i][1:-1]})"
         elif rule_item[i].startswith("`'"):
-            i_item = f"token({rule_item[i][1:-1]})"
+            # From "`'&&'`" pick out '&&'
+            content = rule_item[i][2:-2]
+            # If the name maps to a custom token, then use that, otherwise,
+            # use the content name itself.
+            if content in custom_simple_tokens:
+                i_item = custom_simple_tokens[content]
+            else:
+                i_item = f"token('{content}')"
+        elif rule_item[i].startswith("_disambiguate_template']
+            # pick out '_disambiguate_template'
+            match = re.fullmatch("[^>]*>(.*)",rule_item[i+1])
+            token = match.group(1)
+            i_item = f"$.{token}"
+            i += 1
         elif rule_item[i].startswith("`'")[1].split("'``'true'`"]
+            # pick out "true"
+            match = re.fullmatch("[^>]*>`'(.*)'`",rule_item[i+2])
+            if match:
+                token = match.group(1)
+            else:
+                # Now try it without `' '` surrounding the element content text.
+                # From  ['_disam"]
+                # pick out "_disam"
+                match = re.fullmatch("[^>]*>(.*)",rule_item[i+2])
+                token = match.group(1)
+            if token in custom_simple_tokens:
+                token = custom_simple_tokens[token]
+                i_item = f"$.{token}"
+            elif token.startswith("_") and token != "_":
+                i_item = f"$.{token}"
+            else:
+                i_item = f"""token('{token}')"""
             i += 2
         elif rule_item[i] == "(":
+            # Extract a parenthesized rule
             j = i + 1
             j_span = 0
             rule_subitem = []
@@ -433,236 +368,557 @@ def grammar_from_rule(key, value):
     return result
 
 
-scanner_components[scanner_rule.name()]["_comment"] = [["`/\\/\\/.*/`"]]
+class ScanResult(dict):
+    """
+    A dictionary containing the results of scanning the WGSL spec.
+
+    self['raw']
+         A list of the Bikeshed source text lines, after include expansion and before
+         without further filtering
+    self['rule']
+         A dictionary mapping a parsed grammar rule to its definition.
+    self['example']
+         A dictionary mapping the name of an example to the
+         WGSL source text for the example.
+         The name is taken from the "heading" attriute of the 
element. + """ + def __init__(self): + self['rule'] = dict() + self['example'] = dict() + self['raw'] = [] -# Following sections are to allow out-of-order per syntactic grammar appearance of rules +def read_spec(options): + """ + Returns a ScanResult from parsing the Bikeshed source of the WGSL spec. + """ + result = ScanResult() -rule_skip = set() + # Get the input bikeshed text. + scanner_lines = read_lines_from_file( + options.bs_filename, {'wgsl.recursive.bs.include'}) + # Make a *copy* of the text input because we'll filter it later. + result['raw'] = [x for x in scanner_lines] -for rule in ["translation_unit", "global_directive", "global_decl"]: - grammar_source += grammar_from_rule( - rule, scanner_components[scanner_rule.name()][rule]) + ",\n" - rule_skip.add(rule) + # Skip lines like: + #
+    #  
+ scanner_lines = filter(lambda s: not s.startswith( + "
") and not s.startswith("
', '', line) for line in scanner_lines]
 
+    os.makedirs(options.grammar_dir, exist_ok=True)
 
-for key, value in scanner_components[scanner_rule.name()].items():
-    if key.endswith("_literal") and key not in rule_skip:
-        grammar_source += grammar_from_rule(key, value) + ",\n"
-        rule_skip.add(key)
+    # Global variable holding the current line text.
+    line = ""
 
 
-# Extract constituents
+    scanner_spans = [scanner_rule,
+                     scanner_example]
 
+    scanner_i = 0  # line number of the current line
+    scanner_span = None
+    scanner_record = False
+    # The rule name, if the most recently parsed thing was a rule.
+    last_key = None
+    last_value = None  # The most recently parsed thing
+    while scanner_i < len(scanner_lines):
+        # Try both the rule and the example scanners.
+        for j in scanner_spans:
+            scanner_begin = j.begin(scanner_lines, scanner_i)
+            if scanner_begin[0]:
+                # Use this scanner
+                scanner_span = None
+                scanner_record = False
+                last_key = None
+                last_value = None
+                scanner_span = j
+                if scanner_begin[1] != None:
+                    last_key = scanner_begin[1]
+                scanner_i += scanner_begin[-1]
+            if scanner_span == j:
+                # Check if we should stop using this scanner.
+                scanner_end = j.end(scanner_lines, scanner_i)
+                if scanner_end[0]:
+                    # Yes, stop using this scanner.
+                    scanner_span = None
+                    scanner_record = False
+                    last_key = None
+                    last_value = None
+                    scanner_i += scanner_end[-1]
+        if scanner_span != None:
+            # We're are in the middle of scanning a span of lines.
+            if scanner_record:
+                scanner_skip = scanner_span.skip(scanner_lines, scanner_i)
+                if scanner_skip[0]:
+                    # Stop recording
+                    scanner_record = False
+                    scanner_i += scanner_skip[-1]  # Advance past this line
+            else:
+                # Should we start recording?
+                scanner_record_value = scanner_span.record(
+                    scanner_lines, scanner_i)
+                if scanner_record_value[0]:
+                    # Start recording
+                    scanner_record = True
+                    if last_key != None and scanner_span.name() == "example":  # TODO Remove special case
+                        if last_key in result[scanner_span.name()]:
+                            raise RuntimeError(
+                                "line " + str(scanner_i) + ": example with duplicate name: " + last_key)
+                        else:
+                            result[scanner_span.name()][last_key] = []
+                    scanner_i += scanner_record_value[-1]
+            if scanner_record and scanner_span.valid(scanner_lines, scanner_i):
+                # Try parsing this line
+                scanner_parse = scanner_span.parse(scanner_lines, scanner_i)
+                if scanner_parse[2] < 0:
+                    # This line continues the rule parsed on the immediately preceding lines.
+                    if (scanner_parse[1] != None and
+                            last_key != None and
+                            last_value != None and
+                            last_key in result[scanner_span.name()] and
+                            len(result[scanner_span.name()][last_key]) > 0):
+                        result[scanner_span.name(
+                        )][last_key][-1] += scanner_parse[1]
+                else:
+                    if scanner_parse[0] != None:
+                        # It's a rule, with name in the 0'th position.
+                        last_key = scanner_parse[0]
+                        if scanner_parse[1] != None:
+                            last_value = scanner_parse[1]
+                            if last_key not in result[scanner_span.name()]:
+                                # Create a new entry for this rule
+                                result[scanner_span.name()][last_key] = [
+                                    last_value]
+                            else:
+                                # Append to the existing entry.
+                                result[scanner_span.name()][last_key].append(
+                                    last_value)
+                        else:
+                            # Reset
+                            last_value = None
+                            result[scanner_span.name()][last_key] = []
+                    else:
+                        # It's example text
+                        if scanner_parse[1] != None:
+                            last_value = scanner_parse[1]
+                            result[scanner_span.name()][last_key].append(
+                                last_value)
+                    scanner_i += scanner_parse[-1]  # Advance line index
+        scanner_i += 1
 
-def not_token_only(value):
-    result = False
-    for i in value:
-        result = result or len(
-            [j for j in i if not j.startswith("`/") and not j.startswith("`'")]) > 0
+    result[scanner_rule.name()]["_comment"] = [["`/\\/\\/.*/`"]]
     return result
 
 
-for key, value in scanner_components[scanner_rule.name()].items():
-    if not key.startswith("_") and key != "ident" and not_token_only(value) and key not in rule_skip:
-        grammar_source += grammar_from_rule(key, value) + ",\n"
-        rule_skip.add(key)
+def flow_extract(options, scan_result):
+    """
+    Write the tree-sitter grammar definition for WGSL
+
+    Args:
+        options: Options
+        scan_result: the ScanResult holding rules and examples extracted from the WGSL spec
+    """
+    print("{}: Extract...".format(options.script))
 
+    input_bs_is_fresh = True
+    previously_scanned_bs_file = options.bs_filename + ".pre"
+    if not os.path.exists(options.grammar_filename):
+        # Must regenerate the tree-sitter grammar file
+        pass
+    else:
+        # Check against previously scanned text
+        if os.path.exists(previously_scanned_bs_file):
+            with open(previously_scanned_bs_file,"r") as previous_file:
+                previous_lines = previous_file.readlines()
+                if previous_lines == scan_result['raw']:
+                    input_bs_is_fresh = False
 
-# Extract tokens
+    if input_bs_is_fresh:
+        rules = scan_result['rule']
 
+        grammar_source = ""
 
-for key, value in scanner_components[scanner_rule.name()].items():
-    if not key.startswith("_") and key != "ident" and key not in rule_skip:
-        grammar_source += grammar_from_rule(key, value) + ",\n"
-        rule_skip.add(key)
+        grammar_source += r"""
+        module.exports = grammar({
+            name: 'wgsl',
 
+            externals: $ => [
+                $._block_comment,
+                $._disambiguate_template,
+                $._template_args_start,
+                $._template_args_end,
+                $._less_than,
+                $._less_than_equal,
+                $._shift_left,
+                $._shift_left_assign,
+                $._greater_than,
+                $._greater_than_equal,
+                $._shift_right,
+                $._shift_right_assign,
+                $._error_sentinel,
+            ],
 
-# Extract underscore
+            extras: $ => [
+                $._comment,
+                $._block_comment,
+                $._blankspace,
+            ],
 
+            inline: $ => [
+                $.global_decl,
+                $._reserved,
+            ],
 
-for key, value in scanner_components[scanner_rule.name()].items():
-    if key.startswith("_") and key != "_comment" and key != "_blankspace" and key not in rule_skip:
-        grammar_source += grammar_from_rule(key, value) + ",\n"
-        rule_skip.add(key)
+            // WGSL has no parsing conflicts.
+            conflicts: $ => [],
 
+            word: $ => $.ident_pattern_token,
 
-# Extract ident
+            rules: {
+        """[1:-1]
+        grammar_source += "\n"
 
+        # Following sections are to allow out-of-order per syntactic grammar appearance of rules
 
-grammar_source += grammar_from_rule(
-    "ident", scanner_components[scanner_rule.name()]["ident"]) + ",\n"
-rule_skip.add("ident")
+        rule_skip = set()
 
+        for rule in ["translation_unit", "global_directive", "global_decl"]:
+            grammar_source += grammar_from_rule(
+                rule, rules[rule]) + ",\n"
+            rule_skip.add(rule)
 
-# Extract comment
 
+        # Extract literals
 
-grammar_source += grammar_from_rule(
-    "_comment", scanner_components[scanner_rule.name()]["_comment"]) + ",\n"
-rule_skip.add("_comment")
 
+        for key, value in rules.items():
+            if key.endswith("_literal") and key not in rule_skip:
+                grammar_source += grammar_from_rule(key, value) + ",\n"
+                rule_skip.add(key)
 
-# Extract space
 
+        # Extract constituents
 
-grammar_source += grammar_from_rule(
-    "_blankspace", scanner_components[scanner_rule.name()]["_blankspace"])
-rule_skip.add("_blankspace")
 
+        def not_token_only(value):
+            result = False
+            for i in value:
+                result = result or len(
+                    [j for j in i if not j.startswith("`/") and not j.startswith("`'")]) > 0
+            return result
 
-grammar_source += "\n"
-grammar_source += r"""
-    },
-});
-"""[1:-1]
 
-headerTemplate = Template(HEADER)
-grammar_file.write(headerTemplate.substitute(
-    YEAR=date.today().year) + grammar_source + "\n")
-grammar_file.close()
+        for key, value in rules.items():
+            if not key.startswith("_") and not_token_only(value) and key not in rule_skip:
+                grammar_source += grammar_from_rule(key, value) + ",\n"
+                rule_skip.add(key)
 
-with open(grammar_path + "/package.json", "w") as grammar_package:
-    grammar_package.write('{\n')
-    grammar_package.write('    "name": "tree-sitter-wgsl",\n')
-    grammar_package.write('    "dependencies": {\n')
-    grammar_package.write('        "nan": "^2.15.0"\n')
-    grammar_package.write('    },\n')
-    grammar_package.write('    "devDependencies": {\n')
-    grammar_package.write('        "tree-sitter-cli": "^0.20.0"\n')
-    grammar_package.write('    },\n')
-    grammar_package.write('    "main": "bindings/node"\n')
-    grammar_package.write('}\n')
 
-# External scanner for nested block comments
-# For the API, see https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners
-# See: https://github.com/tree-sitter/tree-sitter-rust/blob/master/src/scanner.c
+        # Extract tokens
 
-os.makedirs(os.path.join(grammar_path, "src"), exist_ok=True)
-with open(os.path.join(grammar_path, "src", "scanner.c"), "w") as external_scanner:
-    external_scanner.write(r"""
-#include 
-#include 
 
-enum TokenType {
-  BLOCK_COMMENT,
-};
+        for key, value in rules.items():
+            if not key.startswith("_") and key not in rule_skip:
+                grammar_source += grammar_from_rule(key, value) + ",\n"
+                rule_skip.add(key)
 
-void *tree_sitter_wgsl_external_scanner_create() { return NULL; }
-void tree_sitter_wgsl_external_scanner_destroy(void *p) {}
-unsigned tree_sitter_wgsl_external_scanner_serialize(void *p, char *buffer) { return 0; }
-void tree_sitter_wgsl_external_scanner_deserialize(void *p, const char *b, unsigned n) {}
 
-static void advance(TSLexer *lexer) {
-  lexer->advance(lexer, false);
-}
+        # Extract underscore
 
-bool tree_sitter_wgsl_external_scanner_scan(void *payload, TSLexer *lexer,
-                                            const bool *valid_symbols) {
-  while (iswspace(lexer->lookahead)) lexer->advance(lexer, true);
-
-  if (lexer->lookahead == '/') {
-    advance(lexer);
-    if (lexer->lookahead != '*') return false;
-    advance(lexer);
-
-    bool after_star = false;
-    unsigned nesting_depth = 1;
-    for (;;) {
-      switch (lexer->lookahead) {
-        case '\0':
-          /* This signals the end of input. Since nesting depth is
-           * greater than zero, the scanner is in the middle of
-           * a block comment. Block comments must be affirmatively
-           * terminated.
-           */
-          return false;
-        case '*':
-          advance(lexer);
-          after_star = true;
-          break;
-        case '/':
-          if (after_star) {
-            advance(lexer);
-            after_star = false;
-            nesting_depth--;
-            if (nesting_depth == 0) {
-              lexer->result_symbol = BLOCK_COMMENT;
-              return true;
-            }
-          } else {
-            advance(lexer);
-            after_star = false;
-            if (lexer->lookahead == '*') {
-              nesting_depth++;
-              advance(lexer);
-            }
-          }
-          break;
-        default:
-          advance(lexer);
-          after_star = false;
-          break;
-      }
-    }
-  }
-
-  return false;
-}
-"""[1:-1])
-
-
-# Use "npm install" to create the tree-sitter CLI that has WGSL
-# support.  But "npm install" fetches data over the network.
-# That can be flaky, so only invoke it when needed.
-if os.path.exists("grammar/node_modules/tree-sitter-cli") and os.path.exists("grammar/node_modules/nan"):
-    # "npm install" has been run already.
-    pass
-else:
-    subprocess.run(["npm", "install"], cwd=grammar_path, check=True)
-subprocess.run(["npx", "tree-sitter", "generate"],
-               cwd=grammar_path, check=True)
-# Following are commented for future reference to expose playground
-# Remove "--docker" if local environment matches with the container
-# subprocess.run(["npx", "tree-sitter", "build-wasm", "--docker"],
-#                cwd=grammar_path, check=True)
-
-Language.build_library(
-    grammar_path + "/build/wgsl.so",
-    [
-        grammar_path,
-    ]
-)
-
-WGSL_LANGUAGE = Language(grammar_path + "/build/wgsl.so", "wgsl")
-
-parser = Parser()
-parser.set_language(WGSL_LANGUAGE)
-
-error_list = []
-
-for key, value in scanner_components[scanner_example.name()].items():
-    if "expect-error" in key:
-        continue
-    value = value[:]
-    if "function-scope" in key:
-        value = ["fn function__scope____() {"] + value + ["}"]
-    if "type-scope" in key:
-        # Initiailize with zero-value expression.
-        value = ["const type_scope____: "] + value + ["="] + value + ["()"] + [";"]
-    program = "\n".join(value)
-    tree = parser.parse(bytes(program, "utf8"))
-    if tree.root_node.has_error:
-        error_list.append((program, tree))
-    # TODO Semantic CI
-
-if len(error_list) > 0:
-    for error in error_list:
-        print("Example:")
-        print(error[0])
-        print("Tree:")
-        print(error[1].root_node.sexp())
-    raise Exception("Grammar is not compatible with examples!")
+
+        for key, value in rules.items():
+            if key.startswith("_") and key != "_comment" and key != "_blankspace" and key not in rule_skip:
+                grammar_source += grammar_from_rule(key, value) + ",\n"
+                rule_skip.add(key)
+
+
+        # Extract ident
+
+
+        grammar_source += grammar_from_rule( "ident", rules["ident"]) + ",\n"
+        rule_skip.add("ident")
+
+
+        # Extract comment
+
+
+        grammar_source += grammar_from_rule(
+            "_comment", rules["_comment"]) + ",\n"
+        rule_skip.add("_comment")
+
+
+        # Extract space
+
+
+        grammar_source += grammar_from_rule(
+            "_blankspace", rules["_blankspace"])
+        rule_skip.add("_blankspace")
+
+
+        grammar_source += "\n"
+        grammar_source += r"""
+            },
+        });
+        """[1:-1]
+
+        HEADER = """
+        // Copyright (C) [$YEAR] World Wide Web Consortium,
+        // (Massachusetts Institute of Technology, European Research Consortium for
+        // Informatics and Mathematics, Keio University, Beihang).
+        // All Rights Reserved.
+        //
+        // This work is distributed under the W3C (R) Software License [1] in the hope
+        // that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
+        // warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+        //
+        // [1] http://www.w3.org/Consortium/Legal/copyright-software
+
+        // **** This file is auto-generated. Do not edit. ****
+
+        """.lstrip()
+
+    if input_bs_is_fresh:
+        print("{}: ...Creating tree-sitter parser".format(options.script,options.grammar_filename))
+        with open(options.grammar_filename, "w") as grammar_file:
+            headerTemplate = Template(HEADER)
+            grammar_file.write(headerTemplate.substitute(
+                YEAR=date.today().year) + grammar_source + "\n")
+            grammar_file.close()
+
+    if input_bs_is_fresh:
+        # Save scanned lines for next time.
+        with open(previously_scanned_bs_file,"w") as previous_file:
+            for line in scan_result['raw']:
+                previous_file.write(line)
+
+    with open(os.path.join(options.grammar_dir,"package.json"), "w") as grammar_package:
+        grammar_package.write('{\n')
+        grammar_package.write('    "name": "tree-sitter-wgsl",\n')
+        grammar_package.write('    "dependencies": {\n')
+        grammar_package.write('        "nan": "^2.15.0"\n')
+        grammar_package.write('    },\n')
+        grammar_package.write('    "devDependencies": {\n')
+        grammar_package.write('        "tree-sitter-cli": "^0.20.7"\n')
+        grammar_package.write('    },\n')
+        grammar_package.write('    "main": "bindings/node"\n')
+        grammar_package.write('}\n')
+
+    return True
+
+def flow_build(options):
+    """
+    Build the shared library for the custom tree-sitter scanner.
+    """
+
+    print("{}: Build...".format(options.script))
+    if not os.path.exists(options.grammar_filename):
+        print("missing grammar file: {}")
+        return False
+
+    # External scanner for nested block comments
+    # For the API, see https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners
+    # See: https://github.com/tree-sitter/tree-sitter-rust/blob/master/src/scanner.c
+
+    os.makedirs(os.path.join(options.grammar_dir, "src"), exist_ok=True)
+
+    # Remove the old custom scanner, if it exists.
+    scanner_c_staging = os.path.join(options.grammar_dir, "src", "scanner.c")
+    if os.path.exists(scanner_c_staging):
+        os.remove(scanner_c_staging)
+    # Copy the new scanner into place, if newer
+    scanner_cc_staging = os.path.join(options.grammar_dir, "src", "scanner.cc")
+    if newer_than(options.scanner_cc_filename, scanner_cc_staging):
+        shutil.copyfile(options.scanner_cc_filename, scanner_cc_staging)
+
+
+    # Use "npm install" to create the tree-sitter CLI that has WGSL
+    # support.  But "npm install" fetches data over the network.
+    # That can be flaky, so only invoke it when needed.
+    if os.path.exists("grammar/node_modules/tree-sitter-cli") and os.path.exists("grammar/node_modules/nan"):
+        # "npm install" has been run already.
+        pass
+    else:
+        subprocess.run(["npm", "install"], cwd=options.grammar_dir, check=True)
+    subprocess.run(["npx", "tree-sitter", "generate"],
+                   cwd=options.grammar_dir, check=True)
+    # Following are commented for future reference to expose playground
+    # Remove "--docker" if local environment matches with the container
+    # subprocess.run(["npx", "tree-sitter", "build-wasm", "--docker"],
+    #                cwd=options.grammar_dir, check=True)
+
+
+    def build_library(output_file, input_files):
+        # The py-tree-sitter build_library method wasn't compiling with C++17 flags,
+        # so invoke the compile ourselves.
+        compiler = new_compiler()
+        clang_like = isinstance(compiler, UnixCCompiler)
+
+        # Compile .c and .cc files down to object files.
+        object_files = []
+        includes = [os.path.dirname(input_files[0])]
+        for src in input_files:
+            flags = []
+            if src.endswith(".cc"):
+                if clang_like:
+                    flags.extend(["-fPIC", "-std=c++17"])
+                else:
+                    flags.extend(["/std:c++17"])
+            objects = compiler.compile([src],
+                                       extra_preargs=flags,
+                                       include_dirs=includes)
+            object_files.extend(objects)
+
+        # Link object files to a single shared library.
+        if clang_like:
+            link_flags = ["-lstdc++"]
+        compiler.link_shared_object(
+                object_files,
+                output_file,
+                target_lang="c++",
+                extra_postargs=link_flags)
+
+    if newer_than(scanner_cc_staging, options.wgsl_shared_lib) or newer_than(options.grammar_filename,options.wgsl_shared_lib):
+        print("{}: ...Building custom scanner: {}".format(options.script,options.wgsl_shared_lib))
+        build_library(options.wgsl_shared_lib,
+                      [scanner_cc_staging,
+                       os.path.join(options.grammar_dir,"src","parser.c")])
+    return True
+
+def flow_examples(options,scan_result):
+    """
+    Check the tree-sitter parser can parse the examples from the WGSL spec.
+
+    Args:
+        options: Options
+        scan_result: the ScanResult holding rules and examples extracted from the WGSL spec
+    """
+    print("{}: Examples...".format(options.script))
+
+    examples = scan_result['example']
+    WGSL_LANGUAGE = Language(options.wgsl_shared_lib, "wgsl")
+
+    parser = Parser()
+    parser.set_language(WGSL_LANGUAGE)
+
+    errors = 0
+    for key, value in examples.items():
+        print(".",flush=True,end='')
+        if "expect-error" in key:
+            continue
+        value = value[:]
+        if "function-scope" in key:
+            value = ["fn function__scope____() {"] + value + ["}"]
+        if "type-scope" in key:
+            # Initialize with zero-value expression.
+            value = ["const type_scope____: "] + \
+                value + ["="] + value + ["()"] + [";"]
+        program = "\n".join(value)
+        # print("**************** BEGIN ****************")
+        # print(program)
+        # print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
+        tree = parser.parse(bytes(program, "utf8"))
+        if tree.root_node.has_error:
+            print("Example:")
+            print(program)
+            print("Tree:")
+            print(tree.root_node.sexp())
+            errors = errors + 1
+        # print("***************** END *****************")
+        # print("")
+        # print("")
+
+        # TODO Semantic CI
+
+    if errors > 0:
+        raise Exception("Grammar is not compatible with examples!")
+    print("Ok",flush=True)
+    return True
+
+
+FLOW_HELP = """
+A complete flow has the following steps, in order
+    'x' (think 'extract'): Generate a tree-sitter grammar definition from the
+          bikeshed source for the WGSL specification.
+    'b' (think 'build'): Build the tree-sitter parser
+    'e' (think 'example'): Check the examples from the WGSL spec parse correctly.
+    't' (think 'test'): Run parser unit tests.
+
+You can be more selective by specifying the --flow option followed by a word
+containing the letters for the steps to run.
+
+For example, the following will extract the grammar, build the tree-sitter parse,
+and check that the examples from the spec parse correctly:
+
+    extract-grammar --flow xbe
+
+The order of the letters is not significant. The steps will always run in the
+same relative order as the default flow.
+"""
+DEFAULT_FLOW="xbet"
+
+def main():
+    argparser = argparse.ArgumentParser(
+            prog="extract-grammar.py",
+            description="Extract the grammar from the WGSL spec and run checks",
+            add_help=False # We want to print our own additional formatted help
+            )
+    argparser.add_argument("--help","-h",
+                           action='store_true',
+                           help="Show this help message, then exit")
+    argparser.add_argument("--verbose","-v",
+                           action='store_true',
+                           help="Be verbose")
+    argparser.add_argument("--flow",
+                           action='store',
+                           help="The flow steps to run. Default is the whole flow.",
+                           default=DEFAULT_FLOW)
+    argparser.add_argument("--tree-sitter-dir",
+                           help="Target directory for the tree-sitter parser",
+                           default="grammar")
+    argparser.add_argument("--spec",
+                           action='store',
+                           help="Bikeshed source file for the WGSL spec",
+                           default="index.bs")
+    argparser.add_argument("--scanner",
+                           action='store',
+                           help="source file for the tree-sitter custom scanner",
+                           default="scanner.cc")
+
+    args = argparser.parse_args()
+    if args.help:
+        print(argparser.format_help())
+        print(FLOW_HELP)
+        return 0
+
+    options = Options(args.spec,args.tree_sitter_dir,args.scanner)
+    options.verbose = args.verbose
+    if args.verbose:
+        print(options)
+
+    scan_result = None
+
+    if 'x' in args.flow:
+        scan_result = read_spec(options)
+        if not flow_extract(options,scan_result):
+            return 1
+    if 'b' in args.flow:
+        if not flow_build(options):
+            return 1
+    if 'e' in args.flow:
+        if scan_result is None:
+            scan_result = read_spec(options)
+        if not flow_examples(options,scan_result):
+            return 1
+    if 't' in args.flow:
+        test_options = wgsl_unit_tests.Options(options.wgsl_shared_lib)
+        if not wgsl_unit_tests.run_tests(test_options):
+            return 1
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/wgsl/index.bs b/wgsl/index.bs
index 4ce62da0ec..942e84fc22 100644
--- a/wgsl/index.bs
+++ b/wgsl/index.bs
@@ -110,6 +110,9 @@ div.syntax > p > a > code {
   font-style: normal;
   font-weight: bold;
 }
+.hidden {
+  display: none
+}
 table.data.builtin tbody{
   border-bottom: 0;
 }
@@ -532,16 +535,23 @@ The program text [=shader-creation error|must not=] include a null code point (`
 ## Parsing ## {#parsing}
 
 To parse a WGSL program:
+
1. Remove [=comments=]: * Replace the first comment with a space code point (`U+0020`). * Repeat until no comments remain. -2. Parse the whole text, attempting to match the [=syntax/translation_unit=] grammar rule. +2. Find [=template lists=], using the [=template list discovery|algorithm=] in [[#template-lists-sec]]. +3. Parse the whole text, attempting to match the [=syntax/translation_unit=] grammar rule. Parsing uses a LALR(1) parser (one token of lookahead) [[!DeRemer1969]], with the following customization: * Tokenization is interleaved with parsing, and is context-aware. When the parser requests the next token: * Consume and ignore an initial sequence of [=blankspace=] code points. - * A token candidate is any WGSL [=token=] formed from the non-empty prefix of the remaining unconsumed code points. - * The token returned is the longest [=token candidate=] that is also a valid lookahead token for the current parser state. [[!VanWyk2007]] + * If the next code point is the start of a [=template list=], consume it and return [=syntax_sym/_template_args_start=]. + * If the next code point is the end of a [=template list=], consume it and return [=syntax_sym/_template_args_end=]. + * Otherwise: + * A token candidate is any WGSL [=token=] formed from the non-empty prefix of the remaining unconsumed code points. + * The token returned is the longest [=token candidate=] that is also a valid lookahead token for the current parser state. [[!VanWyk2007]] + +
A [=shader-creation error=] results if: * the entire source text cannot be converted into a finite sequence of valid tokens, or @@ -966,6 +976,149 @@ The spelling of the token may be the same as an [=identifier=], but the token do Section [[#context-dependent-name-tokens]] lists all such tokens. +## Template Lists ## {#template-lists-sec} + +Template parameterization is a way to specify parameters that modify a general concept. +To write a template parameterization, write the general concept, followed by a [=template list=]. + +Ignoring [=comments=] and [=blankspace=], a template list is: +* An initial `'<'` (U+003C) code point, then +* A [=syntax_sym/comma=]-separated list of one or more template parameters, then +* An optional trailing [=syntax_sym/comma=], then +* A terminating `'>'` (U+003E) code point. + +The form of a [=template parameter=] is implicitly defined by the [=template list discovery=] algorithm below. +Generally, they are names, expressions, or types. + +Note: For example, the phrase `vec3` is a template parameterization where `vec3` is the general concept being modified, +and `` is a template list containing one parameter, the [=f32=] type. +Together, `vec3` denotes a specific [=vector=] type. + +Note: For example, the phrase `var` modifies the general `var` concept with template parameters `storage` and `read_write`. + +
+Note: For example, the phrase `array>` has two template parameterizations: +* `vec4` modifies the general `vec4` concept with template parameter `f32`. +* `array>` modifies the general `array` concept with template parameter `vec4`. + +
+ +The `'<'` (U+003C) and `'>'` (U+003E) code points that delimit a template list are also used when spelling: +* A comparison operator in a [=syntax/relational_expression=]. +* A shift operator in a [=syntax/shift_expression=]. +* A [=syntax/compound_assignment_operator=] for performing a shift operation followed by an assignment. + +The syntactic ambiguity is resolved in favour of template lists: +* Template lists are discovered in an early phase of parsing, before [=declarations=], [=expressions=], [=statements=] are parsed. +* During tokenization in a later phase, + the intial `'<'` (U+003C) of a template list is mapped to a [=syntax_sym/_template_args_start=] token, and + the terminating `'>'` (U+003E) of a template list is mapped to a [=syntax_sym/_template_args_end=] token. + + +The template list discovery algorithm is as follows. +
+**Input:** The program source text. + +**Record types:** + +Let |UnclosedCandidate| be a record type containing: + * |position|, a location in the source text + * |depth|, an integer, the expression nesting depth at |position| + +Let |TemplateList| be record type containing: + * |start_position|, the source location of the `'<'` (U+003C) code point that starts this template list. + * |end_position|, the source location of the `'>'` (U+003E) code point that ends this template list. + +**Output:** |DiscoveredTemplateLists|, a list of |TemplateList| records. + +**Algorithm:** +* Initialize |DiscoveredTemplateLists| to an empty list. +* Initialize a |Pending| variable to be an empty stack of |UnclosedCandidate| records. +* Initialize a |CurrentPosition| integer variable to 0. + It encodes the position of the code point currently being examined, as a count of the number of code points after the start of the source text. + * This variable will advance forward in the text while executing the algorithm. + When the end of text is reached, terminate the algorithm immediately and have it return |DiscoveredTemplateLists|. +* Initialize a |NestingDepth| integer variable to 0. +* Repeat the following steps: + * Advance |CurrentPosition| past [=blankspace=], [=comments=], and [=literals=]. + * If [=syntax/ident_pattern_token=] matches the text at |CurrentPosition|, then: + * Advance |CurrentPosition| past the [=syntax/ident_pattern_token=]. + * Advance |CurrentPosition| past blankspace and comments, if present. + * If `'<'` (U+003C) appears at |CurrentPosition|, then: + * Note: This code point is a candidate for being the start of a template list. + Save enough state so it can be matched against a terminating `'>'` (U+003E) appearing later in the input. + * Push |UnclosedCandidate|(|position|=|CurrentPosition|,|depth|=|NestingDepth|) onto the |Pending| stack. + * Advance |CurrentPosition| to the next code point, and start the next iteration of the loop. + * If `'>'` (U+003E) appears at |CurrentPosition| then: + * Note: This code point is a candidate for being the end of a template list. + * If |Pending| is not empty, then let |T| be its top entry, and: + * If |T|.|depth| equals |NestingDepth| then: + * Note: This code point ends the current template list whose start is recorded in |T|. + * Add |TemplateList|(|start_position|=|T|.|position|, |end_position|=|CurrentPosition|) to |DiscoveredTemplateLists|. + * Pop |T| off the |Pending| stack. + * Advance |CurrentPosition| past this code point, and start the next iteration of the loop. + * If `'('` (U+0028) or `'['` (U+005B) appears at |CurrentPosition| then: + * Note: Enter a nested expression. + * Add 1 to |NestingDepth|. + * Advance |CurrentPosition| past this code point, and start the next iteration of the loop. + * If `')'` (U+0029) or `']'` (U+005D) appears at |CurrentPosition| then: + * Note: Exit a nested expression. + * Pop entries from the |Pending| stack until it is empty, or until the its top entry has |depth| < |NestingDepth|. + * Set |NestingDepth| to 0 or |NestingDepth| − 1, whichever is larger. + * Advance |CurrentPosition| past this code point, and start the next iteration of the loop. + * If `';'` (U+003B) or `'{'` (U+007B) or `'='` (U+003D) or `':'` (U+003A) appears at |CurrentPosition| then: + * Note: These cannot contain an expression, and therefore cannot appear in a template list. + Clear pending unclosed candidates. + * Set |NestingDepth| to 0. + * Remove all entries from the |Pending| stack. + * Advance |CurrentPosition| past this code point, and start the next iteration of the loop. + * If `'&&'` or `'||'` matches the text at |CurrentPosition| then: + * Note: These are operators that have lower precedence than comparisons. Reject any pending unclosed candidates at the current expression level. + * Note: With this rule, no template list will be found in the program fragment `ad`. + Instead it will be recognized as the short-circuiting disjunction of two comparisons. + * Pop entries from the |Pending| stack until it is empty, or until the its top entry has |depth| < |NestingDepth|. + * Advance |CurrentPosition| past the two code points, and start the next iteration of the loop. + * Advance |CurrentPosition| past the current code point. + +
+ +
+Note: The algorithm can be modified to find the source ranges for [=template parameters=], as follows: + +* Modify |UnclosedCandidate| to add the following fields: + * |parameters|, a list of source ranges of template parameters. + * |parameter_start_position|, a source location. +* Modify |TemplateList| to add a field: + * |parameters|, a list of source ranges of template parameters. +* When pushing a new |UnclosedCandidate| onto the |Pending| stack: + * Set its |parameters| field to an empty list. + * Set |parameter_start_position| to one code point past |CurrentPosition|. +* When adding a |TemplateList|, |TL|, to DiscoveredTemplateLists: + * Let |T| be the top of the |Pending| stack, as in the original algorithm. + * Push the source range starting at |T|.|parameter_start_position| and ending at |CurrentPosition|−1 onto |T|.|parameters|. + * Prepare |TL| as in the original algorithm. + * Set |TL|.|parameters| to |T|.|parameters|. +* Insert a check at the end the loop, just before advancing past the current code point: + * If '`,`' (U+002C) appears at |CurrentPosition| and |Pending| is not empty, then: + * Let |T| be the top of the |Pending| stack. + * Push the source range starting at |T|.|parameter_start_position| and ending at |CurrentPosition|−1 onto |T|.|parameters|. + * Set |T|.|parameter_start_position| to |CurrentPosition|+1 + +
+ +Note: The algorithm explicitly skips past literals because some numeric literals end in a letter, for example `1.0f`. +The terminating `f` should not be mistaken as the start of an [=syntax/ident_pattern_token=]. + +Note: In the phrase `A ( B < C, D > ( E ) )`, the segment `< C, D >` is a [=template list=]. + +Note: The algorithm respects expression nesting: The start and end of a particular template list cannot appear at different expression nesting levels. +For example, in `arrayb)>`, the template list has three parameters, where the last one is `select(2,3,a>b)`. +The `'>'` in `a>b` does not terminate the template list because it is enclosed in a parenthesized part of the expression calling the `select` function. + +Note: Both ends of a template list must appear within the same array indexing phrase. For example `a[b()` does not contain a valid template list. + + + ## Attributes ## {#attributes} An attribute modifies an object. @@ -2311,7 +2464,7 @@ This includes the [=store type=] of a workgroup variable.
array_type_specifier : - | `'array'` `'<'` [=syntax/type_specifier=] ( `','` [=syntax/element_count_expression=] ) ? `'>'` + | `'array'` _template_args_start [=syntax/type_specifier=] ( `','` [=syntax/element_count_expression=] ) ? _template_args_end
element_count_expression : @@ -3993,11 +4146,11 @@ sampler_comparison | [=syntax/depth_texture_type=] - | [=syntax/sampled_texture_type=] `'<'` [=syntax/type_specifier=] `'>'` + | [=syntax/sampled_texture_type=] _template_args_start [=syntax/type_specifier=] _template_args_end - | [=syntax/multisampled_texture_type=] `'<'` [=syntax/type_specifier=] `'>'` + | [=syntax/multisampled_texture_type=] _template_args_start [=syntax/type_specifier=] _template_args_end - | [=syntax/storage_texture_type=] `'<'` [=syntax/texel_format=] `','` [=syntax/access_mode=] `'>'` + | [=syntax/storage_texture_type=] _template_args_start [=syntax/texel_format=] `','` [=syntax/access_mode=] _template_args_end
sampler_type : @@ -4102,15 +4255,15 @@ all properties of the members of *S*, including attributes, carry over to the me | `'u32'` - | [=syntax/vec_prefix=] `'<'` [=syntax/type_specifier=] `'>'` + | [=syntax/vec_prefix=] _template_args_start [=syntax/type_specifier=] _template_args_end - | [=syntax/mat_prefix=] `'<'` [=syntax/type_specifier=] `'>'` + | [=syntax/mat_prefix=] _template_args_start [=syntax/type_specifier=] _template_args_end - | `'ptr'` `'<'` [=syntax/address_space=] `','` [=syntax/type_specifier=] ( `','` [=syntax/access_mode=] ) ? `'>'` + | `'ptr'` _template_args_start [=syntax/address_space=] `','` [=syntax/type_specifier=] ( `','` [=syntax/access_mode=] ) ? _template_args_end | [=syntax/array_type_specifier=] - | `'atomic'` `'<'` [=syntax/type_specifier=] `'>'` + | `'atomic'` _template_args_start [=syntax/type_specifier=] _template_args_end | [=syntax/texture_and_sampler_types=]
@@ -4714,7 +4867,7 @@ such that the redundant loads are eliminated.
variable_decl : - | `'var'` [=syntax/variable_qualifier=] ? [=syntax/optionally_typed_ident=] + | `'var'` [=syntax/variable_qualifier=] ? [=syntax/optionally_typed_ident=]
optionally_typed_ident : @@ -4724,7 +4877,7 @@ such that the redundant loads are eliminated.
variable_qualifier : - | `'<'` [=syntax/address_space=] ( `','` [=syntax/access_mode=] ) ? `'>'` + | _template_args_start [=syntax/address_space=] ( `','` [=syntax/access_mode=] ) ? _template_args_end
@@ -6727,7 +6880,7 @@ When an identifier is used as a [=syntax/callable=] item, it is one of: | [=syntax/paren_expression=] - | `'bitcast'` `'<'` [=syntax/type_specifier=] `'>'` [=syntax/paren_expression=] + | `'bitcast'` _template_args_start [=syntax/type_specifier=] _template_args_end [=syntax/paren_expression=]
call_expression : @@ -6747,11 +6900,11 @@ Note: The [=syntax/call_expression=] rule exists to ensure [=type checking=] app | [=syntax/type_specifier_without_ident=] - | [=syntax/vec_prefix=] + | [=syntax/vec_prefix=] - | [=syntax/mat_prefix=] + | [=syntax/mat_prefix=] - | `'array'` + | `'array'`
paren_expression : @@ -7051,7 +7204,7 @@ to bind with this operator. This column is necessary for linearly listing operat # Statements # {#statements} -Statements are program fragments that control its execution. +A statement is a program fragment that controls execution. Statements are generally executed in sequential order; however, [[#control-flow|control flow statements]] may cause a program to execute in non-sequential order. @@ -7239,9 +7392,9 @@ An [=statement/assignment=] is a compound assignment when th | `'^='` - | `'>>='` + | `'>>='` - | `'<<='` + | `'<<='`
The type requirements, semantics, and behavior of each statement is defined as if @@ -11096,7 +11249,7 @@ A syntactic token is a sequence of special code points, used: * to spell an expression operator, or * as punctuation: to group, sequence, or separate other grammar elements. -List of [=syntactic tokens=]: +The [=syntactic tokens=] are: * `'&'` (Code point: `U+0026`) * `'&&'` (Code points: `U+0026` `U+0026`) @@ -11142,8 +11295,16 @@ List of [=syntactic tokens=]: * `'&='` (Code points: `U+0026` `U+003D`) * `'|='` (Code points: `U+007C` `U+003D`) * `'^='` (Code points: `U+005E` `U+003D`) -* `'>>='` (Code points: `U+003E` `U+003E` `U+003D`) -* `'<<='` (Code points: `U+003C` `U+003C` `U+003D`) +* `'>>='` (Code point: `U+003E` `U+003E` `U+003D`) +* `'<<='` (Code points: `U+003C` `U+003C` `U+003D`) +* `_template_args_end` + * Text: `'>'` (Code point: `U+003E`) + * This token is textually the same as the [=syntax_sym/greater_than=] syntactic token. + * It is generated by [=template list discovery=], and is used as the last token in a [=template list=]. +* `_template_args_start` + * Text: `'<'` (Code point: `U+003C`) + * This token is textually the same as the [=syntax_sym/less_than=] syntactic token. + * It is generated by [=template list discovery=], and is used as the first token in a [=template list=]. ## Context-Dependent Name Tokens ## {#context-dependent-name-tokens} diff --git a/wgsl/scanner.cc b/wgsl/scanner.cc new file mode 100644 index 0000000000..59879988d4 --- /dev/null +++ b/wgsl/scanner.cc @@ -0,0 +1,992 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ENABLE_LOGGING 0 + +#if ENABLE_LOGGING +#define LOG(msg, ...) printf(msg "\n", ##__VA_ARGS__) +#else +#define LOG(...) +#endif + +namespace { + +/// The possible external tokens matched by this custom scanner. +/// The order of the entries in this enumerator must match the 'externals' in +/// the grammar.js. +enum Token { + BLOCK_COMMENT, + DISAMBIGUATE_TEMPLATE, // A zero-length token used to scan ahead + TEMPLATE_ARGS_START, + TEMPLATE_ARGS_END, + LESS_THAN, // '<' + LESS_THAN_EQUAL, // '<=' + SHIFT_LEFT, // '<<' + SHIFT_LEFT_ASSIGN, // '<<=' + GREATER_THAN, // '>' + GREATER_THAN_EQUAL, // '>=' + SHIFT_RIGHT, // '>>' + SHIFT_RIGHT_ASSIGN, // '>>=' + + // A sentinel value used to signal an error has occurred already. + // https://tree-sitter.github.io/tree-sitter/creating-parsers#other-external-scanner-details + ERROR, +}; + +const char* str(Token tok,bool brief=false) { + switch (tok) { + case Token::BLOCK_COMMENT: + return "BLOCK_COMMENT"; + case Token::DISAMBIGUATE_TEMPLATE: + return "DISAMBIGUATE_TEMPLATE"; + case Token::TEMPLATE_ARGS_START: + return "TEMPLATE_ARGS_START"; + case Token::TEMPLATE_ARGS_END: + return "TEMPLATE_ARGS_END"; + case Token::LESS_THAN: + return brief ? "<" : "LESS_THAN"; + case Token::LESS_THAN_EQUAL: + return brief ? "<=" : "LESS_THAN_EQUAL"; + case Token::SHIFT_LEFT: + return brief ? "<<" : "SHIFT_LEFT"; + case Token::SHIFT_LEFT_ASSIGN: + return brief ? "<<=" : "SHIFT_LEFT_ASSIGN"; + case Token::GREATER_THAN: + return brief ? ">" : "GREATER_THAN"; + case Token::GREATER_THAN_EQUAL: + return brief ? ">=" : "GREATER_THAN_EQUAL"; + case Token::SHIFT_RIGHT: + return brief ? ">>" : "SHIFT_RIGHT"; + case Token::SHIFT_RIGHT_ASSIGN: + return brief ? ">>=" : "SHIFT_RIGHT_ASSIGN"; + case Token::ERROR: + return "ERROR"; + default: + return ""; + } +} + +using CodePoint = uint32_t; + +static constexpr CodePoint kEOF = 0; + +struct CodePointRange { + CodePoint first; // First code point in the interval + CodePoint last; // Last code point in the interval (inclusive) +}; + +inline bool operator<(CodePoint code_point, CodePointRange range) { + return code_point < range.first; +} +inline bool operator<(CodePointRange range, CodePoint code_point) { + return range.last < code_point; +} + +// Interval ranges of all code points in the Unicode 14 XID_Start set +// This array needs to be in ascending order. +constexpr CodePointRange kXIDStartRanges[] = { + {0x00041, 0x0005a}, {0x00061, 0x0007a}, {0x000aa, 0x000aa}, + {0x000b5, 0x000b5}, {0x000ba, 0x000ba}, {0x000c0, 0x000d6}, + {0x000d8, 0x000f6}, {0x000f8, 0x002c1}, {0x002c6, 0x002d1}, + {0x002e0, 0x002e4}, {0x002ec, 0x002ec}, {0x002ee, 0x002ee}, + {0x00370, 0x00374}, {0x00376, 0x00377}, {0x0037b, 0x0037d}, + {0x0037f, 0x0037f}, {0x00386, 0x00386}, {0x00388, 0x0038a}, + {0x0038c, 0x0038c}, {0x0038e, 0x003a1}, {0x003a3, 0x003f5}, + {0x003f7, 0x00481}, {0x0048a, 0x0052f}, {0x00531, 0x00556}, + {0x00559, 0x00559}, {0x00560, 0x00588}, {0x005d0, 0x005ea}, + {0x005ef, 0x005f2}, {0x00620, 0x0064a}, {0x0066e, 0x0066f}, + {0x00671, 0x006d3}, {0x006d5, 0x006d5}, {0x006e5, 0x006e6}, + {0x006ee, 0x006ef}, {0x006fa, 0x006fc}, {0x006ff, 0x006ff}, + {0x00710, 0x00710}, {0x00712, 0x0072f}, {0x0074d, 0x007a5}, + {0x007b1, 0x007b1}, {0x007ca, 0x007ea}, {0x007f4, 0x007f5}, + {0x007fa, 0x007fa}, {0x00800, 0x00815}, {0x0081a, 0x0081a}, + {0x00824, 0x00824}, {0x00828, 0x00828}, {0x00840, 0x00858}, + {0x00860, 0x0086a}, {0x00870, 0x00887}, {0x00889, 0x0088e}, + {0x008a0, 0x008c9}, {0x00904, 0x00939}, {0x0093d, 0x0093d}, + {0x00950, 0x00950}, {0x00958, 0x00961}, {0x00971, 0x00980}, + {0x00985, 0x0098c}, {0x0098f, 0x00990}, {0x00993, 0x009a8}, + {0x009aa, 0x009b0}, {0x009b2, 0x009b2}, {0x009b6, 0x009b9}, + {0x009bd, 0x009bd}, {0x009ce, 0x009ce}, {0x009dc, 0x009dd}, + {0x009df, 0x009e1}, {0x009f0, 0x009f1}, {0x009fc, 0x009fc}, + {0x00a05, 0x00a0a}, {0x00a0f, 0x00a10}, {0x00a13, 0x00a28}, + {0x00a2a, 0x00a30}, {0x00a32, 0x00a33}, {0x00a35, 0x00a36}, + {0x00a38, 0x00a39}, {0x00a59, 0x00a5c}, {0x00a5e, 0x00a5e}, + {0x00a72, 0x00a74}, {0x00a85, 0x00a8d}, {0x00a8f, 0x00a91}, + {0x00a93, 0x00aa8}, {0x00aaa, 0x00ab0}, {0x00ab2, 0x00ab3}, + {0x00ab5, 0x00ab9}, {0x00abd, 0x00abd}, {0x00ad0, 0x00ad0}, + {0x00ae0, 0x00ae1}, {0x00af9, 0x00af9}, {0x00b05, 0x00b0c}, + {0x00b0f, 0x00b10}, {0x00b13, 0x00b28}, {0x00b2a, 0x00b30}, + {0x00b32, 0x00b33}, {0x00b35, 0x00b39}, {0x00b3d, 0x00b3d}, + {0x00b5c, 0x00b5d}, {0x00b5f, 0x00b61}, {0x00b71, 0x00b71}, + {0x00b83, 0x00b83}, {0x00b85, 0x00b8a}, {0x00b8e, 0x00b90}, + {0x00b92, 0x00b95}, {0x00b99, 0x00b9a}, {0x00b9c, 0x00b9c}, + {0x00b9e, 0x00b9f}, {0x00ba3, 0x00ba4}, {0x00ba8, 0x00baa}, + {0x00bae, 0x00bb9}, {0x00bd0, 0x00bd0}, {0x00c05, 0x00c0c}, + {0x00c0e, 0x00c10}, {0x00c12, 0x00c28}, {0x00c2a, 0x00c39}, + {0x00c3d, 0x00c3d}, {0x00c58, 0x00c5a}, {0x00c5d, 0x00c5d}, + {0x00c60, 0x00c61}, {0x00c80, 0x00c80}, {0x00c85, 0x00c8c}, + {0x00c8e, 0x00c90}, {0x00c92, 0x00ca8}, {0x00caa, 0x00cb3}, + {0x00cb5, 0x00cb9}, {0x00cbd, 0x00cbd}, {0x00cdd, 0x00cde}, + {0x00ce0, 0x00ce1}, {0x00cf1, 0x00cf2}, {0x00d04, 0x00d0c}, + {0x00d0e, 0x00d10}, {0x00d12, 0x00d3a}, {0x00d3d, 0x00d3d}, + {0x00d4e, 0x00d4e}, {0x00d54, 0x00d56}, {0x00d5f, 0x00d61}, + {0x00d7a, 0x00d7f}, {0x00d85, 0x00d96}, {0x00d9a, 0x00db1}, + {0x00db3, 0x00dbb}, {0x00dbd, 0x00dbd}, {0x00dc0, 0x00dc6}, + {0x00e01, 0x00e30}, {0x00e32, 0x00e32}, {0x00e40, 0x00e46}, + {0x00e81, 0x00e82}, {0x00e84, 0x00e84}, {0x00e86, 0x00e8a}, + {0x00e8c, 0x00ea3}, {0x00ea5, 0x00ea5}, {0x00ea7, 0x00eb0}, + {0x00eb2, 0x00eb2}, {0x00ebd, 0x00ebd}, {0x00ec0, 0x00ec4}, + {0x00ec6, 0x00ec6}, {0x00edc, 0x00edf}, {0x00f00, 0x00f00}, + {0x00f40, 0x00f47}, {0x00f49, 0x00f6c}, {0x00f88, 0x00f8c}, + {0x01000, 0x0102a}, {0x0103f, 0x0103f}, {0x01050, 0x01055}, + {0x0105a, 0x0105d}, {0x01061, 0x01061}, {0x01065, 0x01066}, + {0x0106e, 0x01070}, {0x01075, 0x01081}, {0x0108e, 0x0108e}, + {0x010a0, 0x010c5}, {0x010c7, 0x010c7}, {0x010cd, 0x010cd}, + {0x010d0, 0x010fa}, {0x010fc, 0x01248}, {0x0124a, 0x0124d}, + {0x01250, 0x01256}, {0x01258, 0x01258}, {0x0125a, 0x0125d}, + {0x01260, 0x01288}, {0x0128a, 0x0128d}, {0x01290, 0x012b0}, + {0x012b2, 0x012b5}, {0x012b8, 0x012be}, {0x012c0, 0x012c0}, + {0x012c2, 0x012c5}, {0x012c8, 0x012d6}, {0x012d8, 0x01310}, + {0x01312, 0x01315}, {0x01318, 0x0135a}, {0x01380, 0x0138f}, + {0x013a0, 0x013f5}, {0x013f8, 0x013fd}, {0x01401, 0x0166c}, + {0x0166f, 0x0167f}, {0x01681, 0x0169a}, {0x016a0, 0x016ea}, + {0x016ee, 0x016f8}, {0x01700, 0x01711}, {0x0171f, 0x01731}, + {0x01740, 0x01751}, {0x01760, 0x0176c}, {0x0176e, 0x01770}, + {0x01780, 0x017b3}, {0x017d7, 0x017d7}, {0x017dc, 0x017dc}, + {0x01820, 0x01878}, {0x01880, 0x018a8}, {0x018aa, 0x018aa}, + {0x018b0, 0x018f5}, {0x01900, 0x0191e}, {0x01950, 0x0196d}, + {0x01970, 0x01974}, {0x01980, 0x019ab}, {0x019b0, 0x019c9}, + {0x01a00, 0x01a16}, {0x01a20, 0x01a54}, {0x01aa7, 0x01aa7}, + {0x01b05, 0x01b33}, {0x01b45, 0x01b4c}, {0x01b83, 0x01ba0}, + {0x01bae, 0x01baf}, {0x01bba, 0x01be5}, {0x01c00, 0x01c23}, + {0x01c4d, 0x01c4f}, {0x01c5a, 0x01c7d}, {0x01c80, 0x01c88}, + {0x01c90, 0x01cba}, {0x01cbd, 0x01cbf}, {0x01ce9, 0x01cec}, + {0x01cee, 0x01cf3}, {0x01cf5, 0x01cf6}, {0x01cfa, 0x01cfa}, + {0x01d00, 0x01dbf}, {0x01e00, 0x01f15}, {0x01f18, 0x01f1d}, + {0x01f20, 0x01f45}, {0x01f48, 0x01f4d}, {0x01f50, 0x01f57}, + {0x01f59, 0x01f59}, {0x01f5b, 0x01f5b}, {0x01f5d, 0x01f5d}, + {0x01f5f, 0x01f7d}, {0x01f80, 0x01fb4}, {0x01fb6, 0x01fbc}, + {0x01fbe, 0x01fbe}, {0x01fc2, 0x01fc4}, {0x01fc6, 0x01fcc}, + {0x01fd0, 0x01fd3}, {0x01fd6, 0x01fdb}, {0x01fe0, 0x01fec}, + {0x01ff2, 0x01ff4}, {0x01ff6, 0x01ffc}, {0x02071, 0x02071}, + {0x0207f, 0x0207f}, {0x02090, 0x0209c}, {0x02102, 0x02102}, + {0x02107, 0x02107}, {0x0210a, 0x02113}, {0x02115, 0x02115}, + {0x02118, 0x0211d}, {0x02124, 0x02124}, {0x02126, 0x02126}, + {0x02128, 0x02128}, {0x0212a, 0x02139}, {0x0213c, 0x0213f}, + {0x02145, 0x02149}, {0x0214e, 0x0214e}, {0x02160, 0x02188}, + {0x02c00, 0x02ce4}, {0x02ceb, 0x02cee}, {0x02cf2, 0x02cf3}, + {0x02d00, 0x02d25}, {0x02d27, 0x02d27}, {0x02d2d, 0x02d2d}, + {0x02d30, 0x02d67}, {0x02d6f, 0x02d6f}, {0x02d80, 0x02d96}, + {0x02da0, 0x02da6}, {0x02da8, 0x02dae}, {0x02db0, 0x02db6}, + {0x02db8, 0x02dbe}, {0x02dc0, 0x02dc6}, {0x02dc8, 0x02dce}, + {0x02dd0, 0x02dd6}, {0x02dd8, 0x02dde}, {0x03005, 0x03007}, + {0x03021, 0x03029}, {0x03031, 0x03035}, {0x03038, 0x0303c}, + {0x03041, 0x03096}, {0x0309d, 0x0309f}, {0x030a1, 0x030fa}, + {0x030fc, 0x030ff}, {0x03105, 0x0312f}, {0x03131, 0x0318e}, + {0x031a0, 0x031bf}, {0x031f0, 0x031ff}, {0x03400, 0x04dbf}, + {0x04e00, 0x0a48c}, {0x0a4d0, 0x0a4fd}, {0x0a500, 0x0a60c}, + {0x0a610, 0x0a61f}, {0x0a62a, 0x0a62b}, {0x0a640, 0x0a66e}, + {0x0a67f, 0x0a69d}, {0x0a6a0, 0x0a6ef}, {0x0a717, 0x0a71f}, + {0x0a722, 0x0a788}, {0x0a78b, 0x0a7ca}, {0x0a7d0, 0x0a7d1}, + {0x0a7d3, 0x0a7d3}, {0x0a7d5, 0x0a7d9}, {0x0a7f2, 0x0a801}, + {0x0a803, 0x0a805}, {0x0a807, 0x0a80a}, {0x0a80c, 0x0a822}, + {0x0a840, 0x0a873}, {0x0a882, 0x0a8b3}, {0x0a8f2, 0x0a8f7}, + {0x0a8fb, 0x0a8fb}, {0x0a8fd, 0x0a8fe}, {0x0a90a, 0x0a925}, + {0x0a930, 0x0a946}, {0x0a960, 0x0a97c}, {0x0a984, 0x0a9b2}, + {0x0a9cf, 0x0a9cf}, {0x0a9e0, 0x0a9e4}, {0x0a9e6, 0x0a9ef}, + {0x0a9fa, 0x0a9fe}, {0x0aa00, 0x0aa28}, {0x0aa40, 0x0aa42}, + {0x0aa44, 0x0aa4b}, {0x0aa60, 0x0aa76}, {0x0aa7a, 0x0aa7a}, + {0x0aa7e, 0x0aaaf}, {0x0aab1, 0x0aab1}, {0x0aab5, 0x0aab6}, + {0x0aab9, 0x0aabd}, {0x0aac0, 0x0aac0}, {0x0aac2, 0x0aac2}, + {0x0aadb, 0x0aadd}, {0x0aae0, 0x0aaea}, {0x0aaf2, 0x0aaf4}, + {0x0ab01, 0x0ab06}, {0x0ab09, 0x0ab0e}, {0x0ab11, 0x0ab16}, + {0x0ab20, 0x0ab26}, {0x0ab28, 0x0ab2e}, {0x0ab30, 0x0ab5a}, + {0x0ab5c, 0x0ab69}, {0x0ab70, 0x0abe2}, {0x0ac00, 0x0d7a3}, + {0x0d7b0, 0x0d7c6}, {0x0d7cb, 0x0d7fb}, {0x0f900, 0x0fa6d}, + {0x0fa70, 0x0fad9}, {0x0fb00, 0x0fb06}, {0x0fb13, 0x0fb17}, + {0x0fb1d, 0x0fb1d}, {0x0fb1f, 0x0fb28}, {0x0fb2a, 0x0fb36}, + {0x0fb38, 0x0fb3c}, {0x0fb3e, 0x0fb3e}, {0x0fb40, 0x0fb41}, + {0x0fb43, 0x0fb44}, {0x0fb46, 0x0fbb1}, {0x0fbd3, 0x0fc5d}, + {0x0fc64, 0x0fd3d}, {0x0fd50, 0x0fd8f}, {0x0fd92, 0x0fdc7}, + {0x0fdf0, 0x0fdf9}, {0x0fe71, 0x0fe71}, {0x0fe73, 0x0fe73}, + {0x0fe77, 0x0fe77}, {0x0fe79, 0x0fe79}, {0x0fe7b, 0x0fe7b}, + {0x0fe7d, 0x0fe7d}, {0x0fe7f, 0x0fefc}, {0x0ff21, 0x0ff3a}, + {0x0ff41, 0x0ff5a}, {0x0ff66, 0x0ff9d}, {0x0ffa0, 0x0ffbe}, + {0x0ffc2, 0x0ffc7}, {0x0ffca, 0x0ffcf}, {0x0ffd2, 0x0ffd7}, + {0x0ffda, 0x0ffdc}, {0x10000, 0x1000b}, {0x1000d, 0x10026}, + {0x10028, 0x1003a}, {0x1003c, 0x1003d}, {0x1003f, 0x1004d}, + {0x10050, 0x1005d}, {0x10080, 0x100fa}, {0x10140, 0x10174}, + {0x10280, 0x1029c}, {0x102a0, 0x102d0}, {0x10300, 0x1031f}, + {0x1032d, 0x1034a}, {0x10350, 0x10375}, {0x10380, 0x1039d}, + {0x103a0, 0x103c3}, {0x103c8, 0x103cf}, {0x103d1, 0x103d5}, + {0x10400, 0x1049d}, {0x104b0, 0x104d3}, {0x104d8, 0x104fb}, + {0x10500, 0x10527}, {0x10530, 0x10563}, {0x10570, 0x1057a}, + {0x1057c, 0x1058a}, {0x1058c, 0x10592}, {0x10594, 0x10595}, + {0x10597, 0x105a1}, {0x105a3, 0x105b1}, {0x105b3, 0x105b9}, + {0x105bb, 0x105bc}, {0x10600, 0x10736}, {0x10740, 0x10755}, + {0x10760, 0x10767}, {0x10780, 0x10785}, {0x10787, 0x107b0}, + {0x107b2, 0x107ba}, {0x10800, 0x10805}, {0x10808, 0x10808}, + {0x1080a, 0x10835}, {0x10837, 0x10838}, {0x1083c, 0x1083c}, + {0x1083f, 0x10855}, {0x10860, 0x10876}, {0x10880, 0x1089e}, + {0x108e0, 0x108f2}, {0x108f4, 0x108f5}, {0x10900, 0x10915}, + {0x10920, 0x10939}, {0x10980, 0x109b7}, {0x109be, 0x109bf}, + {0x10a00, 0x10a00}, {0x10a10, 0x10a13}, {0x10a15, 0x10a17}, + {0x10a19, 0x10a35}, {0x10a60, 0x10a7c}, {0x10a80, 0x10a9c}, + {0x10ac0, 0x10ac7}, {0x10ac9, 0x10ae4}, {0x10b00, 0x10b35}, + {0x10b40, 0x10b55}, {0x10b60, 0x10b72}, {0x10b80, 0x10b91}, + {0x10c00, 0x10c48}, {0x10c80, 0x10cb2}, {0x10cc0, 0x10cf2}, + {0x10d00, 0x10d23}, {0x10e80, 0x10ea9}, {0x10eb0, 0x10eb1}, + {0x10f00, 0x10f1c}, {0x10f27, 0x10f27}, {0x10f30, 0x10f45}, + {0x10f70, 0x10f81}, {0x10fb0, 0x10fc4}, {0x10fe0, 0x10ff6}, + {0x11003, 0x11037}, {0x11071, 0x11072}, {0x11075, 0x11075}, + {0x11083, 0x110af}, {0x110d0, 0x110e8}, {0x11103, 0x11126}, + {0x11144, 0x11144}, {0x11147, 0x11147}, {0x11150, 0x11172}, + {0x11176, 0x11176}, {0x11183, 0x111b2}, {0x111c1, 0x111c4}, + {0x111da, 0x111da}, {0x111dc, 0x111dc}, {0x11200, 0x11211}, + {0x11213, 0x1122b}, {0x11280, 0x11286}, {0x11288, 0x11288}, + {0x1128a, 0x1128d}, {0x1128f, 0x1129d}, {0x1129f, 0x112a8}, + {0x112b0, 0x112de}, {0x11305, 0x1130c}, {0x1130f, 0x11310}, + {0x11313, 0x11328}, {0x1132a, 0x11330}, {0x11332, 0x11333}, + {0x11335, 0x11339}, {0x1133d, 0x1133d}, {0x11350, 0x11350}, + {0x1135d, 0x11361}, {0x11400, 0x11434}, {0x11447, 0x1144a}, + {0x1145f, 0x11461}, {0x11480, 0x114af}, {0x114c4, 0x114c5}, + {0x114c7, 0x114c7}, {0x11580, 0x115ae}, {0x115d8, 0x115db}, + {0x11600, 0x1162f}, {0x11644, 0x11644}, {0x11680, 0x116aa}, + {0x116b8, 0x116b8}, {0x11700, 0x1171a}, {0x11740, 0x11746}, + {0x11800, 0x1182b}, {0x118a0, 0x118df}, {0x118ff, 0x11906}, + {0x11909, 0x11909}, {0x1190c, 0x11913}, {0x11915, 0x11916}, + {0x11918, 0x1192f}, {0x1193f, 0x1193f}, {0x11941, 0x11941}, + {0x119a0, 0x119a7}, {0x119aa, 0x119d0}, {0x119e1, 0x119e1}, + {0x119e3, 0x119e3}, {0x11a00, 0x11a00}, {0x11a0b, 0x11a32}, + {0x11a3a, 0x11a3a}, {0x11a50, 0x11a50}, {0x11a5c, 0x11a89}, + {0x11a9d, 0x11a9d}, {0x11ab0, 0x11af8}, {0x11c00, 0x11c08}, + {0x11c0a, 0x11c2e}, {0x11c40, 0x11c40}, {0x11c72, 0x11c8f}, + {0x11d00, 0x11d06}, {0x11d08, 0x11d09}, {0x11d0b, 0x11d30}, + {0x11d46, 0x11d46}, {0x11d60, 0x11d65}, {0x11d67, 0x11d68}, + {0x11d6a, 0x11d89}, {0x11d98, 0x11d98}, {0x11ee0, 0x11ef2}, + {0x11fb0, 0x11fb0}, {0x12000, 0x12399}, {0x12400, 0x1246e}, + {0x12480, 0x12543}, {0x12f90, 0x12ff0}, {0x13000, 0x1342e}, + {0x14400, 0x14646}, {0x16800, 0x16a38}, {0x16a40, 0x16a5e}, + {0x16a70, 0x16abe}, {0x16ad0, 0x16aed}, {0x16b00, 0x16b2f}, + {0x16b40, 0x16b43}, {0x16b63, 0x16b77}, {0x16b7d, 0x16b8f}, + {0x16e40, 0x16e7f}, {0x16f00, 0x16f4a}, {0x16f50, 0x16f50}, + {0x16f93, 0x16f9f}, {0x16fe0, 0x16fe1}, {0x16fe3, 0x16fe3}, + {0x17000, 0x187f7}, {0x18800, 0x18cd5}, {0x18d00, 0x18d08}, + {0x1aff0, 0x1aff3}, {0x1aff5, 0x1affb}, {0x1affd, 0x1affe}, + {0x1b000, 0x1b122}, {0x1b150, 0x1b152}, {0x1b164, 0x1b167}, + {0x1b170, 0x1b2fb}, {0x1bc00, 0x1bc6a}, {0x1bc70, 0x1bc7c}, + {0x1bc80, 0x1bc88}, {0x1bc90, 0x1bc99}, {0x1d400, 0x1d454}, + {0x1d456, 0x1d49c}, {0x1d49e, 0x1d49f}, {0x1d4a2, 0x1d4a2}, + {0x1d4a5, 0x1d4a6}, {0x1d4a9, 0x1d4ac}, {0x1d4ae, 0x1d4b9}, + {0x1d4bb, 0x1d4bb}, {0x1d4bd, 0x1d4c3}, {0x1d4c5, 0x1d505}, + {0x1d507, 0x1d50a}, {0x1d50d, 0x1d514}, {0x1d516, 0x1d51c}, + {0x1d51e, 0x1d539}, {0x1d53b, 0x1d53e}, {0x1d540, 0x1d544}, + {0x1d546, 0x1d546}, {0x1d54a, 0x1d550}, {0x1d552, 0x1d6a5}, + {0x1d6a8, 0x1d6c0}, {0x1d6c2, 0x1d6da}, {0x1d6dc, 0x1d6fa}, + {0x1d6fc, 0x1d714}, {0x1d716, 0x1d734}, {0x1d736, 0x1d74e}, + {0x1d750, 0x1d76e}, {0x1d770, 0x1d788}, {0x1d78a, 0x1d7a8}, + {0x1d7aa, 0x1d7c2}, {0x1d7c4, 0x1d7cb}, {0x1df00, 0x1df1e}, + {0x1e100, 0x1e12c}, {0x1e137, 0x1e13d}, {0x1e14e, 0x1e14e}, + {0x1e290, 0x1e2ad}, {0x1e2c0, 0x1e2eb}, {0x1e7e0, 0x1e7e6}, + {0x1e7e8, 0x1e7eb}, {0x1e7ed, 0x1e7ee}, {0x1e7f0, 0x1e7fe}, + {0x1e800, 0x1e8c4}, {0x1e900, 0x1e943}, {0x1e94b, 0x1e94b}, + {0x1ee00, 0x1ee03}, {0x1ee05, 0x1ee1f}, {0x1ee21, 0x1ee22}, + {0x1ee24, 0x1ee24}, {0x1ee27, 0x1ee27}, {0x1ee29, 0x1ee32}, + {0x1ee34, 0x1ee37}, {0x1ee39, 0x1ee39}, {0x1ee3b, 0x1ee3b}, + {0x1ee42, 0x1ee42}, {0x1ee47, 0x1ee47}, {0x1ee49, 0x1ee49}, + {0x1ee4b, 0x1ee4b}, {0x1ee4d, 0x1ee4f}, {0x1ee51, 0x1ee52}, + {0x1ee54, 0x1ee54}, {0x1ee57, 0x1ee57}, {0x1ee59, 0x1ee59}, + {0x1ee5b, 0x1ee5b}, {0x1ee5d, 0x1ee5d}, {0x1ee5f, 0x1ee5f}, + {0x1ee61, 0x1ee62}, {0x1ee64, 0x1ee64}, {0x1ee67, 0x1ee6a}, + {0x1ee6c, 0x1ee72}, {0x1ee74, 0x1ee77}, {0x1ee79, 0x1ee7c}, + {0x1ee7e, 0x1ee7e}, {0x1ee80, 0x1ee89}, {0x1ee8b, 0x1ee9b}, + {0x1eea1, 0x1eea3}, {0x1eea5, 0x1eea9}, {0x1eeab, 0x1eebb}, + {0x20000, 0x2a6df}, {0x2a700, 0x2b738}, {0x2b740, 0x2b81d}, + {0x2b820, 0x2cea1}, {0x2ceb0, 0x2ebe0}, {0x2f800, 0x2fa1d}, + {0x30000, 0x3134a}, +}; + +// Number of ranges in kXIDStartRanges +constexpr size_t kNumXIDStartRanges = + sizeof(kXIDStartRanges) / sizeof(kXIDStartRanges[0]); + +// The additional code point interval ranges for the Unicode 14 XID_Continue +// set. This extends the values in kXIDStartRanges. +// This array needs to be in ascending order. +constexpr CodePointRange kXIDContinueRanges[] = { + {0x00030, 0x00039}, {0x0005f, 0x0005f}, {0x000b7, 0x000b7}, + {0x00300, 0x0036f}, {0x00387, 0x00387}, {0x00483, 0x00487}, + {0x00591, 0x005bd}, {0x005bf, 0x005bf}, {0x005c1, 0x005c2}, + {0x005c4, 0x005c5}, {0x005c7, 0x005c7}, {0x00610, 0x0061a}, + {0x0064b, 0x00669}, {0x00670, 0x00670}, {0x006d6, 0x006dc}, + {0x006df, 0x006e4}, {0x006e7, 0x006e8}, {0x006ea, 0x006ed}, + {0x006f0, 0x006f9}, {0x00711, 0x00711}, {0x00730, 0x0074a}, + {0x007a6, 0x007b0}, {0x007c0, 0x007c9}, {0x007eb, 0x007f3}, + {0x007fd, 0x007fd}, {0x00816, 0x00819}, {0x0081b, 0x00823}, + {0x00825, 0x00827}, {0x00829, 0x0082d}, {0x00859, 0x0085b}, + {0x00898, 0x0089f}, {0x008ca, 0x008e1}, {0x008e3, 0x00903}, + {0x0093a, 0x0093c}, {0x0093e, 0x0094f}, {0x00951, 0x00957}, + {0x00962, 0x00963}, {0x00966, 0x0096f}, {0x00981, 0x00983}, + {0x009bc, 0x009bc}, {0x009be, 0x009c4}, {0x009c7, 0x009c8}, + {0x009cb, 0x009cd}, {0x009d7, 0x009d7}, {0x009e2, 0x009e3}, + {0x009e6, 0x009ef}, {0x009fe, 0x009fe}, {0x00a01, 0x00a03}, + {0x00a3c, 0x00a3c}, {0x00a3e, 0x00a42}, {0x00a47, 0x00a48}, + {0x00a4b, 0x00a4d}, {0x00a51, 0x00a51}, {0x00a66, 0x00a71}, + {0x00a75, 0x00a75}, {0x00a81, 0x00a83}, {0x00abc, 0x00abc}, + {0x00abe, 0x00ac5}, {0x00ac7, 0x00ac9}, {0x00acb, 0x00acd}, + {0x00ae2, 0x00ae3}, {0x00ae6, 0x00aef}, {0x00afa, 0x00aff}, + {0x00b01, 0x00b03}, {0x00b3c, 0x00b3c}, {0x00b3e, 0x00b44}, + {0x00b47, 0x00b48}, {0x00b4b, 0x00b4d}, {0x00b55, 0x00b57}, + {0x00b62, 0x00b63}, {0x00b66, 0x00b6f}, {0x00b82, 0x00b82}, + {0x00bbe, 0x00bc2}, {0x00bc6, 0x00bc8}, {0x00bca, 0x00bcd}, + {0x00bd7, 0x00bd7}, {0x00be6, 0x00bef}, {0x00c00, 0x00c04}, + {0x00c3c, 0x00c3c}, {0x00c3e, 0x00c44}, {0x00c46, 0x00c48}, + {0x00c4a, 0x00c4d}, {0x00c55, 0x00c56}, {0x00c62, 0x00c63}, + {0x00c66, 0x00c6f}, {0x00c81, 0x00c83}, {0x00cbc, 0x00cbc}, + {0x00cbe, 0x00cc4}, {0x00cc6, 0x00cc8}, {0x00cca, 0x00ccd}, + {0x00cd5, 0x00cd6}, {0x00ce2, 0x00ce3}, {0x00ce6, 0x00cef}, + {0x00d00, 0x00d03}, {0x00d3b, 0x00d3c}, {0x00d3e, 0x00d44}, + {0x00d46, 0x00d48}, {0x00d4a, 0x00d4d}, {0x00d57, 0x00d57}, + {0x00d62, 0x00d63}, {0x00d66, 0x00d6f}, {0x00d81, 0x00d83}, + {0x00dca, 0x00dca}, {0x00dcf, 0x00dd4}, {0x00dd6, 0x00dd6}, + {0x00dd8, 0x00ddf}, {0x00de6, 0x00def}, {0x00df2, 0x00df3}, + {0x00e31, 0x00e31}, {0x00e33, 0x00e3a}, {0x00e47, 0x00e4e}, + {0x00e50, 0x00e59}, {0x00eb1, 0x00eb1}, {0x00eb3, 0x00ebc}, + {0x00ec8, 0x00ecd}, {0x00ed0, 0x00ed9}, {0x00f18, 0x00f19}, + {0x00f20, 0x00f29}, {0x00f35, 0x00f35}, {0x00f37, 0x00f37}, + {0x00f39, 0x00f39}, {0x00f3e, 0x00f3f}, {0x00f71, 0x00f84}, + {0x00f86, 0x00f87}, {0x00f8d, 0x00f97}, {0x00f99, 0x00fbc}, + {0x00fc6, 0x00fc6}, {0x0102b, 0x0103e}, {0x01040, 0x01049}, + {0x01056, 0x01059}, {0x0105e, 0x01060}, {0x01062, 0x01064}, + {0x01067, 0x0106d}, {0x01071, 0x01074}, {0x01082, 0x0108d}, + {0x0108f, 0x0109d}, {0x0135d, 0x0135f}, {0x01369, 0x01371}, + {0x01712, 0x01715}, {0x01732, 0x01734}, {0x01752, 0x01753}, + {0x01772, 0x01773}, {0x017b4, 0x017d3}, {0x017dd, 0x017dd}, + {0x017e0, 0x017e9}, {0x0180b, 0x0180d}, {0x0180f, 0x01819}, + {0x018a9, 0x018a9}, {0x01920, 0x0192b}, {0x01930, 0x0193b}, + {0x01946, 0x0194f}, {0x019d0, 0x019da}, {0x01a17, 0x01a1b}, + {0x01a55, 0x01a5e}, {0x01a60, 0x01a7c}, {0x01a7f, 0x01a89}, + {0x01a90, 0x01a99}, {0x01ab0, 0x01abd}, {0x01abf, 0x01ace}, + {0x01b00, 0x01b04}, {0x01b34, 0x01b44}, {0x01b50, 0x01b59}, + {0x01b6b, 0x01b73}, {0x01b80, 0x01b82}, {0x01ba1, 0x01bad}, + {0x01bb0, 0x01bb9}, {0x01be6, 0x01bf3}, {0x01c24, 0x01c37}, + {0x01c40, 0x01c49}, {0x01c50, 0x01c59}, {0x01cd0, 0x01cd2}, + {0x01cd4, 0x01ce8}, {0x01ced, 0x01ced}, {0x01cf4, 0x01cf4}, + {0x01cf7, 0x01cf9}, {0x01dc0, 0x01dff}, {0x0203f, 0x02040}, + {0x02054, 0x02054}, {0x020d0, 0x020dc}, {0x020e1, 0x020e1}, + {0x020e5, 0x020f0}, {0x02cef, 0x02cf1}, {0x02d7f, 0x02d7f}, + {0x02de0, 0x02dff}, {0x0302a, 0x0302f}, {0x03099, 0x0309a}, + {0x0a620, 0x0a629}, {0x0a66f, 0x0a66f}, {0x0a674, 0x0a67d}, + {0x0a69e, 0x0a69f}, {0x0a6f0, 0x0a6f1}, {0x0a802, 0x0a802}, + {0x0a806, 0x0a806}, {0x0a80b, 0x0a80b}, {0x0a823, 0x0a827}, + {0x0a82c, 0x0a82c}, {0x0a880, 0x0a881}, {0x0a8b4, 0x0a8c5}, + {0x0a8d0, 0x0a8d9}, {0x0a8e0, 0x0a8f1}, {0x0a8ff, 0x0a909}, + {0x0a926, 0x0a92d}, {0x0a947, 0x0a953}, {0x0a980, 0x0a983}, + {0x0a9b3, 0x0a9c0}, {0x0a9d0, 0x0a9d9}, {0x0a9e5, 0x0a9e5}, + {0x0a9f0, 0x0a9f9}, {0x0aa29, 0x0aa36}, {0x0aa43, 0x0aa43}, + {0x0aa4c, 0x0aa4d}, {0x0aa50, 0x0aa59}, {0x0aa7b, 0x0aa7d}, + {0x0aab0, 0x0aab0}, {0x0aab2, 0x0aab4}, {0x0aab7, 0x0aab8}, + {0x0aabe, 0x0aabf}, {0x0aac1, 0x0aac1}, {0x0aaeb, 0x0aaef}, + {0x0aaf5, 0x0aaf6}, {0x0abe3, 0x0abea}, {0x0abec, 0x0abed}, + {0x0abf0, 0x0abf9}, {0x0fb1e, 0x0fb1e}, {0x0fe00, 0x0fe0f}, + {0x0fe20, 0x0fe2f}, {0x0fe33, 0x0fe34}, {0x0fe4d, 0x0fe4f}, + {0x0ff10, 0x0ff19}, {0x0ff3f, 0x0ff3f}, {0x0ff9e, 0x0ff9f}, + {0x101fd, 0x101fd}, {0x102e0, 0x102e0}, {0x10376, 0x1037a}, + {0x104a0, 0x104a9}, {0x10a01, 0x10a03}, {0x10a05, 0x10a06}, + {0x10a0c, 0x10a0f}, {0x10a38, 0x10a3a}, {0x10a3f, 0x10a3f}, + {0x10ae5, 0x10ae6}, {0x10d24, 0x10d27}, {0x10d30, 0x10d39}, + {0x10eab, 0x10eac}, {0x10f46, 0x10f50}, {0x10f82, 0x10f85}, + {0x11000, 0x11002}, {0x11038, 0x11046}, {0x11066, 0x11070}, + {0x11073, 0x11074}, {0x1107f, 0x11082}, {0x110b0, 0x110ba}, + {0x110c2, 0x110c2}, {0x110f0, 0x110f9}, {0x11100, 0x11102}, + {0x11127, 0x11134}, {0x11136, 0x1113f}, {0x11145, 0x11146}, + {0x11173, 0x11173}, {0x11180, 0x11182}, {0x111b3, 0x111c0}, + {0x111c9, 0x111cc}, {0x111ce, 0x111d9}, {0x1122c, 0x11237}, + {0x1123e, 0x1123e}, {0x112df, 0x112ea}, {0x112f0, 0x112f9}, + {0x11300, 0x11303}, {0x1133b, 0x1133c}, {0x1133e, 0x11344}, + {0x11347, 0x11348}, {0x1134b, 0x1134d}, {0x11357, 0x11357}, + {0x11362, 0x11363}, {0x11366, 0x1136c}, {0x11370, 0x11374}, + {0x11435, 0x11446}, {0x11450, 0x11459}, {0x1145e, 0x1145e}, + {0x114b0, 0x114c3}, {0x114d0, 0x114d9}, {0x115af, 0x115b5}, + {0x115b8, 0x115c0}, {0x115dc, 0x115dd}, {0x11630, 0x11640}, + {0x11650, 0x11659}, {0x116ab, 0x116b7}, {0x116c0, 0x116c9}, + {0x1171d, 0x1172b}, {0x11730, 0x11739}, {0x1182c, 0x1183a}, + {0x118e0, 0x118e9}, {0x11930, 0x11935}, {0x11937, 0x11938}, + {0x1193b, 0x1193e}, {0x11940, 0x11940}, {0x11942, 0x11943}, + {0x11950, 0x11959}, {0x119d1, 0x119d7}, {0x119da, 0x119e0}, + {0x119e4, 0x119e4}, {0x11a01, 0x11a0a}, {0x11a33, 0x11a39}, + {0x11a3b, 0x11a3e}, {0x11a47, 0x11a47}, {0x11a51, 0x11a5b}, + {0x11a8a, 0x11a99}, {0x11c2f, 0x11c36}, {0x11c38, 0x11c3f}, + {0x11c50, 0x11c59}, {0x11c92, 0x11ca7}, {0x11ca9, 0x11cb6}, + {0x11d31, 0x11d36}, {0x11d3a, 0x11d3a}, {0x11d3c, 0x11d3d}, + {0x11d3f, 0x11d45}, {0x11d47, 0x11d47}, {0x11d50, 0x11d59}, + {0x11d8a, 0x11d8e}, {0x11d90, 0x11d91}, {0x11d93, 0x11d97}, + {0x11da0, 0x11da9}, {0x11ef3, 0x11ef6}, {0x16a60, 0x16a69}, + {0x16ac0, 0x16ac9}, {0x16af0, 0x16af4}, {0x16b30, 0x16b36}, + {0x16b50, 0x16b59}, {0x16f4f, 0x16f4f}, {0x16f51, 0x16f87}, + {0x16f8f, 0x16f92}, {0x16fe4, 0x16fe4}, {0x16ff0, 0x16ff1}, + {0x1bc9d, 0x1bc9e}, {0x1cf00, 0x1cf2d}, {0x1cf30, 0x1cf46}, + {0x1d165, 0x1d169}, {0x1d16d, 0x1d172}, {0x1d17b, 0x1d182}, + {0x1d185, 0x1d18b}, {0x1d1aa, 0x1d1ad}, {0x1d242, 0x1d244}, + {0x1d7ce, 0x1d7ff}, {0x1da00, 0x1da36}, {0x1da3b, 0x1da6c}, + {0x1da75, 0x1da75}, {0x1da84, 0x1da84}, {0x1da9b, 0x1da9f}, + {0x1daa1, 0x1daaf}, {0x1e000, 0x1e006}, {0x1e008, 0x1e018}, + {0x1e01b, 0x1e021}, {0x1e023, 0x1e024}, {0x1e026, 0x1e02a}, + {0x1e130, 0x1e136}, {0x1e140, 0x1e149}, {0x1e2ae, 0x1e2ae}, + {0x1e2ec, 0x1e2f9}, {0x1e8d0, 0x1e8d6}, {0x1e944, 0x1e94a}, + {0x1e950, 0x1e959}, {0x1fbf0, 0x1fbf9}, {0xe0100, 0xe01ef}, +}; + +// Number of ranges in kXIDContinueRanges +constexpr size_t kNumXIDContinueRanges = + sizeof(kXIDContinueRanges) / sizeof(kXIDContinueRanges[0]); + +/// @param code_point the input code_point +/// @return true if the code_point is part of the XIDStart unicode set +bool is_xid_start(CodePoint code_point) { + // Fast path for ASCII. + if ((code_point >= 'a' && code_point <= 'z') || + (code_point >= 'A' && code_point <= 'Z')) { + return true; + } + // With [a-zA-Z] handled, nothing less then the next sequence start can be + // XIDStart, so filter them all out. This catches most of the common symbols + // that are used in ASCII. + if (code_point < 0x000aa) { + return false; + } + return std::binary_search(kXIDStartRanges, + kXIDStartRanges + kNumXIDStartRanges, code_point); +} + +/// @param code_point the input code_point +/// @return true if the code_point is part of the XIDContinue unicode set +bool is_xid_continue(CodePoint code_point) { + // Short circuit ASCII. The binary search will find these last, but most + // of our current source is ASCII, so handle them quicker. + if ((code_point >= '0' && code_point <= '9') || code_point == '_') { + return true; + } + return is_xid_start(code_point) || + std::binary_search(kXIDContinueRanges, + kXIDContinueRanges + kNumXIDContinueRanges, + code_point); +} + +/// @return true if @p code_point is considered a whitespace +bool is_space(CodePoint code_point) { + switch (code_point) { + case 0x0020: + case 0x0009: + case 0x000a: + case 0x000b: + case 0x000c: + case 0x000d: + case 0x0085: + case 0x200e: + case 0x200f: + case 0x2028: + case 0x2029: + return true; + default: + return false; + } +} + +/// A fixed capacity, dynamic sized queue of bits (expressed as bools) +template +class BitQueue { + public: + /// @param index the index of the bit starting from the front + /// @return the bit value + auto operator[](size_t index) { + assert(index < count()); // TODO(dneto): this should error out. + return bits_[(index + read_offset_) % CAPACITY_IN_BITS]; + } + + /// Removes the bit at the front of the queue + /// @returns the value of the bit that was removed + bool pop_front() { + assert(count_ > 0); + bool value = (*this)[0]; + count_--; + read_offset_++; + return value; + } + + /// Appends a bit to the back of the queue + void push_back(bool value) { + assert(count_ < CAPACITY_IN_BITS); + count_++; + (*this)[count_ - 1] = value; + } + + /// @returns true if the queue holds no bits. + bool empty() const { return count_ == 0; } + + /// @returns the number of bits held by the queue. + size_t count() const { return count_; } + + private: + std::bitset bits_; + size_t count_ = 0; // number of bits contained + size_t read_offset_ = 0; // read offset in bits + // +#if ENABLE_LOGGING + public: + void to_chars(std::string& str) { + std::stringstream ss; + ss << count_ << ":"; + for (auto i = 0; i < count_; ++i) { + bool is_template = (*this)[i]; + ss << (is_template ? "#" : "."); + } + str = ss.str(); + } +#endif +}; + +class Lexer { + public: + Lexer(TSLexer* l) : lexer_(l) {} + + /// Advances the lexer by one code point. + void advance() { lexer_->advance(lexer_, /* whitespace */ false); } + + /// Returns the next code point, advancing the lexer by one code point. + CodePoint next() { + // TODO(dneto): should assert !lexer_->eof(lexer_) + CodePoint lookahead = lexer_->lookahead; + advance(); + return lookahead; + } + + /// @return the next code point without advancing the lexer, or kEOF if there + /// are no more code points + CodePoint peek() { return lexer_->eof(lexer_) ? kEOF : lexer_->lookahead; } + + /// @return true if the next code point is equal to @p code_point. + /// @note if the code point was found, then the lexer is advanced to that code + /// point. + bool match(CodePoint code_point) { + if (peek() == code_point) { + advance(); + return true; + } + return false; + } + + /// @return true if the next code point is found in @p code_points. + /// @note if the code point was found, then the lexer is advanced to that code + /// point. + bool match_anyof(std::initializer_list code_points) { + for (CodePoint code_point : code_points) { + if (match(code_point)) { + return true; + } + } + return false; + } + + /// Attempts to match an identifier pattern that starts with XIDStart followed by + /// any number of XIDContinue code points. + bool match_identifier() { + if (!is_xid_start(peek())) { + return false; + } + + std::stringstream ss; + bool is_ascii = true; + if (CodePoint start = next(); start < 0x80) { + ss.put(char(start)); + } else { + is_ascii = false; + } + + while (true) { + if (!is_xid_continue(peek())) { + break; + } + if (CodePoint code_point = next(); code_point < 0x80) { + ss.put(char(code_point)); + } else { + is_ascii = false; + } + } + + if (is_ascii) { + LOG("ident: '%s'", ss.str().c_str()); + } else { + LOG("ident"); + } + + return true; + } + + /// Attempts to match a /* block comment */ + bool match_block_comment() { + // TODO(dneto): Need to un-advance if matched '/' but not '*' + if (!match('/') || !match('*')) { + return false; + } + + size_t nesting = 1; + while (nesting > 0 && !match(kEOF)) { + // TODO(dneto): If we match '/' but not '*' there is no way to un-advance + // back to make '/' the lookahead. + if (match('/') && match('*')) { + nesting++; + // TODO(dneto): Same here, need to be able to un-advance to before '*' + } else if (match('*') && match('/')) { + nesting--; + } else { + next(); + } + } + return true; + } + + /// Advances the lexer while the next code point is considered whitespace + void skip_whitespace() { + while (is_space(peek())) { + lexer_->advance(lexer_, /* whitespace */ true); + } + } + + private: + TSLexer* lexer_; +}; + +struct Scanner { + struct State { + BitQueue<1024> lt_is_tmpl; // Queue of disambiguated '<' + BitQueue<1024> gt_is_tmpl; // Queue of disambiguated '>' + bool empty() const { return lt_is_tmpl.empty() && gt_is_tmpl.empty(); } + }; + State state; + static_assert(sizeof(State) < TREE_SITTER_SERIALIZATION_BUFFER_SIZE); + // State is trivially copyable, so it can be serialized and deserialized + // with memcpy. + static_assert(std::is_trivially_copyable::value); + + /// Updates #state with the disambiguated '<' and '>' tokens. + /// The following assumptions are made on entry: + /// * lexer has just advanced to the end of an identifier + /// On exit, all '<' and '>' template tokens will be paired up to the closing + /// '>' for the first '<'. + void classify_template_args(Lexer& lexer) { + LOG("classify_template_args()"); + + if (!lexer.match('<')) { + LOG(" missing '<'"); + return; + } + + // The current expression nesting depth. + size_t expr_depth = 0; + + // A stack of '<' tokens. + // Used to pair '<' and '>' tokens at the same expression depth. + struct StackEntry { + size_t index; // Index of the opening '>' in lt_is_tmpl + size_t expr_depth; // The value of 'expr_depth' for the opening '<' + }; + std::vector lt_stack; + + LOG("classify_template_args() '<'"); + lt_stack.push_back(StackEntry{state.lt_is_tmpl.count(), expr_depth}); + state.lt_is_tmpl.push_back(false); // Default to less-than + + while (!lt_stack.empty() && !lexer.match(kEOF)) { + lexer.skip_whitespace(); + + // TODO: skip line-ending comments. + if (lexer.match_block_comment()) { + continue; + } + + if (lexer.match_identifier()) { + lexer.skip_whitespace(); // TODO: Skip comments + if (lexer.match('<')) { + LOG("classify_template_args() '<'"); + lt_stack.push_back(StackEntry{state.lt_is_tmpl.count(), expr_depth}); + state.lt_is_tmpl.push_back(false); // Default to less-than + } + continue; + } + + if (lexer.match('>')) { + LOG("classify_template_args() '>'"); + if (!lt_stack.empty() && lt_stack.back().expr_depth == expr_depth) { + LOG(" TEMPLATE MATCH"); + state.gt_is_tmpl.push_back(true); + state.lt_is_tmpl[lt_stack.back().index] = true; + lt_stack.pop_back(); + } else { + LOG(" non-template '>'"); + state.gt_is_tmpl.push_back(false); + } + continue; + } + + if (lexer.match_anyof({'(', '['})) { + LOG(" expr_depth++"); + // Entering a nested expression + expr_depth++; + continue; + } + + if (lexer.match_anyof({')', ']'})) { + LOG(" expr_depth--"); + // Exiting a nested expression + // Pop the stack until we return to the current expression + // expr_depth + while (!lt_stack.empty() && lt_stack.back().expr_depth == expr_depth) { + lt_stack.pop_back(); + } + if (expr_depth > 0) { + expr_depth--; + } + continue; + } + + if (lexer.match_anyof({';', '{', '=', ':'})) { + LOG(" expression terminator"); + // Expression terminating tokens. No opening template list can + // hold these tokens, so clear the stack and expression depth. + expr_depth = 0; + lt_stack.clear(); + continue; + } + + bool short_circuit = false; + if (lexer.match('&')) { + short_circuit = lexer.match('&'); + } else if (lexer.match('|')) { + short_circuit = lexer.match('|'); + } + if (short_circuit) { + LOG(" short-circuiting expression"); + // Treat 'a < b || c > d' as a logical binary operator of two + // comparison operators instead of a single template argument + // 'b||c'. Use parentheses around 'b||c' to parse as a + // template argument list. + while (!lt_stack.empty() && lt_stack.back().expr_depth == expr_depth) { + lt_stack.pop_back(); + } + continue; + } + + LOG(" skip: '%c'",char(lexer.peek())); + lexer.next(); + } + } + + std::string valids(const bool* const valid_symbols) { + std::string result; + for (int i = 0; i < static_cast(ERROR) ; i++) { + result += std::string(valid_symbols[i] ? "+" : "_"); + } + for (int i = 0; i < static_cast(ERROR) ; i++) { + if (valid_symbols[i]) { + result += std::string(" ") + str(static_cast(i),true); + } + } + return result; + } + + /// The external token scanner function. Handles block comments and + /// template-argument-list vs less-than / greater-than disambiguation. + /// @return true if lexer->result_symbol was assigned a Token, or + /// false if the token should be taken from the regular WGSL tree-sitter + /// grammar. + bool scan(TSLexer* ts_lexer, const bool* const valid_symbols) { + Lexer lexer{ts_lexer}; + + LOG("scan: '%c' [%u] %s", char(lexer.peek()), unsigned(ts_lexer->get_column(ts_lexer)), valids(valid_symbols).c_str()); + + if (valid_symbols[Token::ERROR]) { + ts_lexer->result_symbol = Token::ERROR; + return true; + } + + if (valid_symbols[Token::DISAMBIGUATE_TEMPLATE]) { + // The parser is telling us the _disambiguate_template token + // may appear at the current position. + // The next token may be the start of a template list, so + // scan forward and use the token-list disambiguation + // algorithm to mark template-list-start and template-list-end + // tokens. These are recorded in the lt and gt bit queues. + + // Call mark_end so that we can "advance" past codepoints without + // automatically including them in the resulting token. + ts_lexer->mark_end(ts_lexer); + ts_lexer->result_symbol = Token::DISAMBIGUATE_TEMPLATE; + + // TODO(dneto): should also skip comments, both line comments + // and block comments. + lexer.skip_whitespace(); + if (lexer.peek() == '<') { + if (state.lt_is_tmpl.empty()) { + classify_template_args(lexer); + } + } + + // This has to return true so that Treesitter will save + // the state generated by the disambiguation scan. + return true; + } + + lexer.skip_whitespace(); + + auto match = [&](Token token) { + ts_lexer->mark_end(ts_lexer); + ts_lexer->result_symbol = token; + return true; + }; + + // TODO(dneto): checkpoint and rewind if failed. + if (lexer.match_block_comment()) { + return match(Token::BLOCK_COMMENT); + } + + // TODO(dneto): Check valid array first. + if (lexer.match('<')) { + if (!state.lt_is_tmpl.empty() && state.lt_is_tmpl.pop_front()) { + return match(Token::TEMPLATE_ARGS_START); + } + if (lexer.match('=')) { + return match(Token::LESS_THAN_EQUAL); + } + if (lexer.match('<')) { + if (lexer.match('=')) { + return match(Token::SHIFT_LEFT_ASSIGN); + } + return match(Token::SHIFT_LEFT); + } + return match(Token::LESS_THAN); + } + + // TODO(dneto): check valid array first. + if (lexer.match('>')) { + if (!state.gt_is_tmpl.empty() && state.gt_is_tmpl.pop_front()) { + return match(Token::TEMPLATE_ARGS_END); + } + if (lexer.match('=')) { + return match(Token::GREATER_THAN_EQUAL); + } + if (lexer.match('>')) { + if (lexer.match('=')) { + return match(Token::SHIFT_RIGHT_ASSIGN); + } + return match(Token::SHIFT_RIGHT); + } + return match(Token::GREATER_THAN); + } + + return false; // Use regular parsing + } + + /// Serializes the scanner state into @p buffer. + unsigned serialize(char* buffer) { + if (state.empty()) { + return 0; + } +#if ENABLE_LOGGING + std::string lt_str; state.lt_is_tmpl.to_chars(lt_str); + std::string gt_str; state.gt_is_tmpl.to_chars(gt_str); + LOG("serialize(lt_is_tmpl: %s, gt_is_tmpl: %s)", + lt_str.c_str(), gt_str.c_str()); +#endif + size_t bytes_written = 0; + auto write = [&](const void* data, size_t num_bytes) { + assert(bytes_written + num_bytes <= + TREE_SITTER_SERIALIZATION_BUFFER_SIZE); + memcpy(buffer + bytes_written, data, num_bytes); + bytes_written += num_bytes; + }; + write(&state.lt_is_tmpl, sizeof(state.lt_is_tmpl)); + write(&state.gt_is_tmpl, sizeof(state.gt_is_tmpl)); + // TODO(dneto): implicit conversion be narrowing. + return bytes_written; + } + + /// Deserializes the scanner state from @p buffer. + void deserialize(const char* const buffer, unsigned length) { + if (length == 0) { + state = {}; + } else { + size_t bytes_read = 0; + auto read = [&](void* data, size_t num_bytes) { + assert(bytes_read + num_bytes <= length); + memcpy(data, buffer + bytes_read, num_bytes); + bytes_read += num_bytes; + }; + read(&state.lt_is_tmpl, sizeof(state.lt_is_tmpl)); + read(&state.gt_is_tmpl, sizeof(state.gt_is_tmpl)); +#if ENABLE_LOGGING + std::string lt_str; state.lt_is_tmpl.to_chars(lt_str); + std::string gt_str; state.gt_is_tmpl.to_chars(gt_str); + LOG("deserialize(lt_is_tmpl: %s, gt_is_tmpl: %s)", + lt_str.c_str(), gt_str.c_str()); +#endif + assert(bytes_read == length); + } + } +}; + +} // anonymous namespace + +extern "C" { +// Called once when language is set on a parser. +// Allocates memory for storing scanner state. +void* tree_sitter_wgsl_external_scanner_create() { + return new Scanner(); +} + +// Called once parser is deleted or different language set. +// Frees memory storing scanner state. +void tree_sitter_wgsl_external_scanner_destroy(void* const payload) { + Scanner* const scanner = static_cast(payload); + delete scanner; +} + +// Called whenever this scanner recognizes a token. +// Serializes scanner state into buffer. +unsigned tree_sitter_wgsl_external_scanner_serialize(void* const payload, + char* const buffer) { + Scanner* scanner = static_cast(payload); + return scanner->serialize(buffer); +} + +// Called when handling edits and ambiguities. +// Deserializes scanner state from buffer. +void tree_sitter_wgsl_external_scanner_deserialize(void* const payload, + const char* const buffer, + unsigned const length) { + Scanner* const scanner = static_cast(payload); + scanner->deserialize(buffer, length); +} + +// Scans for tokens. +bool tree_sitter_wgsl_external_scanner_scan(void* const payload, + TSLexer* const lexer, + const bool* const valid_symbols) { + Scanner* const scanner = static_cast(payload); + if (scanner->scan(lexer, valid_symbols)) { + LOG("scan returned: %s", str(static_cast(lexer->result_symbol))); + return true; + } + return false; +} + +} // extern "C" diff --git a/wgsl/wgsl.recursive.bs.include b/wgsl/wgsl.recursive.bs.include index 9f333d580b..f295215069 100644 --- a/wgsl/wgsl.recursive.bs.include +++ b/wgsl/wgsl.recursive.bs.include @@ -163,6 +163,10 @@
compound_assignment_operator: + | [=syntax_sym/shift_left_assign=] + + | [=syntax_sym/shift_right_assign=] + | `'%='` | `'&='` @@ -175,10 +179,6 @@ | `'/='` - | `'<<='` - - | `'>>='` - | `'^='` | `'|='` @@ -287,7 +287,7 @@ | [=recursive descent syntax/attribute=] * `'override'` [=recursive descent syntax/optionally_typed_ident=] ( `'='` [=recursive descent syntax/expression=] )? `';'` - | [=recursive descent syntax/attribute=] * `'var'` ( `'<'` [=recursive descent syntax/address_space=] ( `','` [=recursive descent syntax/access_mode=] )? `'>'` )? [=recursive descent syntax/optionally_typed_ident=] ( `'='` [=recursive descent syntax/expression=] )? `';'` + | [=recursive descent syntax/attribute=] * `'var'` ( [=syntax_sym/_template_args_start=] [=recursive descent syntax/address_space=] ( `','` [=recursive descent syntax/access_mode=] )? [=syntax_sym/_template_args_end=] )? [=recursive descent syntax/optionally_typed_ident=] ( `'='` [=recursive descent syntax/expression=] )? `';'` | `';'` @@ -423,7 +423,7 @@ | `'('` [=recursive descent syntax/expression=] `')'` - | `'bitcast'` `'<'` [=recursive descent syntax/type_specifier=] `'>'` `'('` [=recursive descent syntax/expression=] `')'` + | `'bitcast'` [=syntax_sym/_template_args_start=] [=recursive descent syntax/type_specifier=] [=syntax_sym/_template_args_end=] `'('` [=recursive descent syntax/expression=] `')'`
@@ -431,17 +431,17 @@ | [=recursive descent syntax/shift_expression.post.unary_expression=] - | [=recursive descent syntax/shift_expression.post.unary_expression=] `'!='` [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] + | [=recursive descent syntax/shift_expression.post.unary_expression=] [=syntax_sym/greater_than=] [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] - | [=recursive descent syntax/shift_expression.post.unary_expression=] `'<'` [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] + | [=recursive descent syntax/shift_expression.post.unary_expression=] [=syntax_sym/greater_than_equal=] [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] - | [=recursive descent syntax/shift_expression.post.unary_expression=] `'<='` [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] + | [=recursive descent syntax/shift_expression.post.unary_expression=] [=syntax_sym/less_than=] [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] - | [=recursive descent syntax/shift_expression.post.unary_expression=] `'=='` [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] + | [=recursive descent syntax/shift_expression.post.unary_expression=] [=syntax_sym/less_than_equal=] [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] - | [=recursive descent syntax/shift_expression.post.unary_expression=] `'>'` [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] + | [=recursive descent syntax/shift_expression.post.unary_expression=] `'!='` [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] - | [=recursive descent syntax/shift_expression.post.unary_expression=] `'>='` [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] + | [=recursive descent syntax/shift_expression.post.unary_expression=] `'=='` [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=]
@@ -473,9 +473,9 @@ | ( [=recursive descent syntax/multiplicative_operator=] [=recursive descent syntax/unary_expression=] )* ( [=recursive descent syntax/additive_operator=] [=recursive descent syntax/unary_expression=] ( [=recursive descent syntax/multiplicative_operator=] [=recursive descent syntax/unary_expression=] )* )* - | `'<<'` [=recursive descent syntax/unary_expression=] + | [=syntax_sym/shift_left=] [=recursive descent syntax/unary_expression=] - | `'>>'` [=recursive descent syntax/unary_expression=] + | [=syntax_sym/shift_right=] [=recursive descent syntax/unary_expression=]
@@ -595,13 +595,13 @@ | [=recursive descent syntax/depth_texture_type=] - | [=recursive descent syntax/sampled_texture_type=] `'<'` [=recursive descent syntax/type_specifier=] `'>'` + | [=recursive descent syntax/sampled_texture_type=] [=syntax_sym/_template_args_start=] [=recursive descent syntax/type_specifier=] [=syntax_sym/_template_args_end=] | [=recursive descent syntax/sampler_type=] - | [=recursive descent syntax/storage_texture_type=] `'<'` [=recursive descent syntax/texel_format=] `','` [=recursive descent syntax/access_mode=] `'>'` + | [=recursive descent syntax/storage_texture_type=] [=syntax_sym/_template_args_start=] [=recursive descent syntax/texel_format=] `','` [=recursive descent syntax/access_mode=] [=syntax_sym/_template_args_end=] - | [=syntax/multisampled_texture_type=] `'<'` [=recursive descent syntax/type_specifier=] `'>'` + | [=syntax/multisampled_texture_type=] [=syntax_sym/_template_args_start=] [=recursive descent syntax/type_specifier=] [=syntax_sym/_template_args_end=]
@@ -621,15 +621,15 @@
type_specifier_without_ident: - | [=recursive descent syntax/mat_prefix=] `'<'` [=recursive descent syntax/type_specifier=] `'>'` + | [=recursive descent syntax/mat_prefix=] [=syntax_sym/_template_args_start=] [=recursive descent syntax/type_specifier=] [=syntax_sym/_template_args_end=] | [=recursive descent syntax/texture_and_sampler_types=] - | [=recursive descent syntax/vec_prefix=] `'<'` [=recursive descent syntax/type_specifier=] `'>'` + | [=recursive descent syntax/vec_prefix=] [=syntax_sym/_template_args_start=] [=recursive descent syntax/type_specifier=] [=syntax_sym/_template_args_end=] - | `'array'` `'<'` [=recursive descent syntax/type_specifier=] ( `','` [=recursive descent syntax/element_count_expression=] )? `'>'` + | `'array'` [=syntax_sym/_template_args_start=] [=recursive descent syntax/type_specifier=] ( `','` [=recursive descent syntax/element_count_expression=] )? [=syntax_sym/_template_args_end=] - | `'atomic'` `'<'` [=recursive descent syntax/type_specifier=] `'>'` + | `'atomic'` [=syntax_sym/_template_args_start=] [=recursive descent syntax/type_specifier=] [=syntax_sym/_template_args_end=] | `'bool'` @@ -639,7 +639,7 @@ | `'i32'` - | `'ptr'` `'<'` [=recursive descent syntax/address_space=] `','` [=recursive descent syntax/type_specifier=] ( `','` [=recursive descent syntax/access_mode=] )? `'>'` + | `'ptr'` [=syntax_sym/_template_args_start=] [=recursive descent syntax/address_space=] `','` [=recursive descent syntax/type_specifier=] ( `','` [=recursive descent syntax/access_mode=] )? [=syntax_sym/_template_args_end=] | `'u32'`
@@ -663,7 +663,7 @@
variable_decl: - | `'var'` ( `'<'` [=recursive descent syntax/address_space=] ( `','` [=recursive descent syntax/access_mode=] )? `'>'` )? [=recursive descent syntax/optionally_typed_ident=] + | `'var'` ( [=syntax_sym/_template_args_start=] [=recursive descent syntax/address_space=] ( `','` [=recursive descent syntax/access_mode=] )? [=syntax_sym/_template_args_end=] )? [=recursive descent syntax/optionally_typed_ident=]
diff --git a/wgsl/wgsl_unit_tests.py b/wgsl/wgsl_unit_tests.py new file mode 100644 index 0000000000..5fadef0a77 --- /dev/null +++ b/wgsl/wgsl_unit_tests.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +# +# Copyright 2023 Google LLC +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of works must retain the original copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the original +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# +# 3. Neither the name of the W3C nor the names of its contributors +# may be used to endorse or promote products derived from this work +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + + +import argparse +import os +import sys +from tree_sitter import Language, Parser + +SCRIPT='wgsl_unit_tests.py' + +class Case: + """ + A test case + """ + def __init__(self,text,expect_pass=True): + self.text = text + self.expect_pass = (expect_pass == True) + + def __str__(self): + expectation = "expect_pass" if self.expect_pass else "expect_fail" + return "Case:{}:\n---\n{}\n---".format(expectation,self.text) + +class XFail(Case): + def __init__(self,text): + super().__init__(text,expect_pass=False) + +cases = [ + XFail("this fails"), + XFail("#version 450"), + Case("const pi = 3.14;"), + Case("const b = bitcast(1u);"), + Case("var s: sampler;"), + Case("@group(0) @binding(0) var s: sampler;"), + Case("var w: i32;"), + Case("fn foo() {var f: i32;}"), + Case("var w: array,1>;"), + XFail("var w: array,(vec(1).x)>;"), + Case( "var w: array,(vec3(1).x)>;"), + XFail("const c = arrayb>;"), + Case("var c : arrayb)>;"), + Case("const a = arrayb))>();"), + Case("const b = arrayb)>();"), + XFail("const d : arrayb)>();"), + Case("fn main(){i=1;}"), + Case("fn main(){var i:i32; i=1;}"), + Case("var w: array;"), + Case("var w: array,1>;"), + Case("var w: vec3;"), + Case("alias t = vec3;"), + Case("alias t = vec3;"), + Case("alias t = array;"), + Case("var c : array;"), + XFail("var c : array<(a>b)>;"), # Type specifier must start with identifier + Case("fn f(p: ptr) {}"), + Case("fn m(){x++;}"), + Case("fn m(){x--;}"), + Case("fn m(){x();}"), +] + +class Options: + def __init__(self,shared_lib): + self.shared_lib = shared_lib + self.verbose = False + +def run_tests(options): + """ + Returns True if all tests passed + """ + global cases + if not os.path.exists(options.shared_lib): + raise RuntimeException("missing shared library {}",options.shared_lib) + + language = Language(options.shared_lib, "wgsl") + parser = Parser() + parser.set_language(language) + + print("{}: ".format(SCRIPT),flush=True,end='') + + num_cases = 0 + num_errors = 0 + for case in cases: + num_cases += 1 + print(".",flush=True,end='') + if options.verbose: + print(case) + tree = parser.parse(bytes(case.text,"utf8")) + if case.expect_pass == tree.root_node.has_error: + num_errors += 1 + print("**Error**") + print(case) + print(tree.root_node.sexp()) + print("---Case end\n",flush=True) + + print("{} pass {} fail ".format(num_cases-num_errors,num_errors),flush=True) + + return num_errors == 0 + +def main(): + argparser = argparse.ArgumentParser( + prog='wgsl_grammar_test.py', + description='Unit tests for the tree-sitter WGSL parser') + argparser.add_argument("--verbose","-v", + action='store_true', + help="be verbose") + argparser.add_argument("--parser", + help="path the shared library for the WGSL tree-sitter parser", + default="grammar/build/wgsl.so") + + args = argparser.parse_args() + options = Options(args.parser) + options.verbose = args.verbose + + if not run_tests(options): + return 1 + return 0 + + +if __name__ == '__main__': + sys.exit(main())