diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000000..2fb833a5df --- /dev/null +++ b/.clang-format @@ -0,0 +1,2 @@ +# http://clang.llvm.org/docs/ClangFormatStyleOptions.html +BasedOnStyle: Chromium diff --git a/wgsl/Makefile b/wgsl/Makefile index 5374b9d6cf..6bd8439878 100644 --- a/wgsl/Makefile +++ b/wgsl/Makefile @@ -1,14 +1,14 @@ .PHONY: all clean nfkc validate lalr validate-examples all: index.html nfkc validate test diagrams -validate: lalr validate-examples -validate-examples: grammar/grammar.js +validate: lalr unit_tests validate-examples clean: - rm -f index.html index.pre.html grammar/grammar.js wgsl.lalr.txt + rm -f index.html index.pre.html index.bs.pre grammar/grammar.js grammar/build wgsl.lalr.txt + # Generate spec HTML from Bikeshed source. -WGSL_SOURCES:=index.bs $(wildcard wgsl.*.bs.include) +WGSL_SOURCES:=index.bs scanner.cc wgsl.recursive.bs.include wgsl.reserved.bs.include index.pre.html: $(WGSL_SOURCES) DIE_ON=everything bash ../tools/invoke-bikeshed.sh $@ $(WGSL_SOURCES) @@ -23,14 +23,28 @@ diagrams: $(MERMAID_OUTPUTS) img/%.mmd.svg: diagrams/%.mmd ../tools/invoke-mermaid.sh ../tools/mermaid.json sh ../tools/invoke-mermaid.sh -i $< -o $@ -# Extract WGSL grammar from the spec, validate it with Treesitter, -# and use Treesitter to parse many code examples in the spec. -grammar/grammar.js: index.bs extract-grammar.py - python3 ./extract-grammar.py index.bs grammar/grammar.js +TREESITTER_GRAMMAR_INPUT := grammar/grammar.js +TREESITTER_PARSER := grammar/build/wgsl.so + +# Extract WGSL grammar from the spec, validate it by building a Treesitter parser from it. +$(TREESITTER_GRAMMAR_INPUT) $(TREESITTER_PARSER): index.bs scanner.cc extract-grammar.py + python3 ./extract-grammar.py --spec index.bs --scanner scanner.cc --tree-sitter-dir grammar --flow xb + +.PHONY: validate-examples +# Use Treesitter to parse many code examples in the spec. +validate-examples: $(TREESITTER_PARSER) + python3 ./extract-grammar.py --flow e + +.PHONY: unit_tests +# Use Treesitter to parse code samples +unit_tests: $(TREESITTER_PARSER) wgsl_unit_tests.py + python3 wgsl_unit_tests.py --parser $(TREESITTER_PARSER) # The grammar in JSON form, emitted by Treesitter. WGSL_GRAMMAR=grammar/src/grammar.json -$(WGSL_GRAMMAR) : grammar/grammar.js +$(WGSL_GRAMMAR) : $(TREESITTER_GRAMMAR_INPUT) + +wgsl_unit_tests: .PHONY: nfkc nfkc: diff --git a/wgsl/analyze/Grammar.py b/wgsl/analyze/Grammar.py index d051856205..f51b7a99ee 100755 --- a/wgsl/analyze/Grammar.py +++ b/wgsl/analyze/Grammar.py @@ -44,6 +44,7 @@ import json import functools +import sys from ObjectRegistry import RegisterableObject, ObjectRegistry from collections import defaultdict @@ -323,8 +324,25 @@ def with_meta(phrase,metachar,print_option): # Print ourselves if print_option.bikeshed: context = 'recursive descent syntax' - if print_option.grammar.rules[name].is_token(): + g = print_option.grammar + if g.rules[name].is_token(): context = 'syntax' + if name in g.extra_externals: + context = 'syntax_sym' + if name == '_disambiguate_template': + # This is an implementation detail, so make it invisible. + return '' + else: + without_underscore = ['_less_than', + '_less_than_equal', + '_greater_than', + '_greater_than_equal', + '_shift_left', + '_shift_left_assign', + '_shift_right', + '_shift_right_assign'] + if name in without_underscore: + name = name[1:] return "[={}/{}=]".format(context,name) return name if isinstance(rule,Choice): @@ -350,7 +368,7 @@ def with_meta(phrase,metachar,print_option): # If it's not canonical, then it can have nesting. return "(" + inside + nl + ")" if isinstance(rule,Seq): - return " ".join([i.pretty_str(print_option) for i in rule]) + return " ".join(filter(lambda i: len(i)>0, [i.pretty_str(print_option) for i in rule])) if isinstance(rule,Repeat1): return "( " + "".join([i.pretty_str(print_option) for i in rule]) + " )+" raise RuntimeError("unexpected node: {}".format(str(rule))) @@ -859,6 +877,21 @@ def is_accepting(self): def at_end(self): return self.position == len(self.items()) +def json_externals(json): + """ + Returns the set of names of symbols in the "externals" section of the + Treesitter JSON grammar. + + Data looks like this, for section "externals". + { + "externals": [ + { "type": "SYMBOL", name: "_block_comment" }, + { "type": "SYMBOL", name: "_error_sentinel" } + } + } + """ + return set([ x["name"] for x in json.get("externals",[]) ]) + def json_hook(grammar,memo,tokens_only,dct): """ @@ -1801,6 +1834,22 @@ def __init__(self, json_text, start_symbol, ignore='_reserved'): # First decode it without any interpretation. pass0 = json.loads(json_text) + + # Get the external tokens, these are not necessarily represented in the rules. + external_tokens = json_externals(pass0) + #print(external_tokens,file=sys.stderr) + defined_rules = set(pass0["rules"].keys()) + # The set of external tokens that don't have an ordinary definition in the grammar. + self.extra_externals = external_tokens - defined_rules + for e in self.extra_externals: + content = "\\u200B{}".format(e) + if e == '_disambiguate_template': + # This is a zero-width token used for Treesitter's benefit + #content = '' + pass + # Create a placholder definition + pass0["rules"][e] = {"type":"TOKEN","content":{"type":"PATTERN","value":content}} + # Remove any rules that should be ignored # The WGSL grammar has _reserved, which includes 'attribute' but # that is also the name of a different grammar rule. @@ -1922,6 +1971,7 @@ def pretty_str(self,print_option=PrintOption()): token_rules = set() + # Look for defined rules that look better as absorbed into their uses. for name, rule in self.rules.items(): # Star-able is also optional-able, so starrable must come first. starred_phrase = rule.as_starred(name) @@ -1938,6 +1988,8 @@ def pretty_str(self,print_option=PrintOption()): if len(phrase)==1 and phrase[0].is_token(): token_rules.add(name) + # A rule that was generated to satisfy canonicalization is better + # presented as absorbed in its original parent. for name, rule in self.rules.items(): # We only care about rules generated during canonicalization if name.find('.') > 0 or name.find('/') > 0: diff --git a/wgsl/extract-grammar.py b/wgsl/extract-grammar.py index 938c9fdae1..9329fe08eb 100755 --- a/wgsl/extract-grammar.py +++ b/wgsl/extract-grammar.py @@ -3,28 +3,50 @@ from datetime import date from string import Template +import argparse import os import re import subprocess import sys +import shutil +import wgsl_unit_tests +from distutils.ccompiler import new_compiler +from distutils.unixccompiler import UnixCCompiler from tree_sitter import Language, Parser -HEADER = """ -// Copyright (C) [$YEAR] World Wide Web Consortium, -// (Massachusetts Institute of Technology, European Research Consortium for -// Informatics and Mathematics, Keio University, Beihang). -// All Rights Reserved. -// -// This work is distributed under the W3C (R) Software License [1] in the hope -// that it will be useful, but WITHOUT ANY WARRANTY; without even the implied -// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -// -// [1] http://www.w3.org/Consortium/Legal/copyright-software - -// **** This file is auto-generated. Do not edit. **** - -""".lstrip() +class Options(): + def __init__(self,bs_filename, tree_sitter_dir, scanner_cc_filename): + self.script = 'extract-grammar.py' + self.bs_filename = bs_filename + self.grammar_dir = tree_sitter_dir + self.scanner_cc_filename = scanner_cc_filename + self.wgsl_shared_lib = os.path.join(self.grammar_dir,"build","wgsl.so") + self.grammar_filename = os.path.join(self.grammar_dir,"grammar.js") + self.verbose = False + + def __str__(self): + parts = [] + parts.append("script = {}".format(self.script)) + parts.append("bs_filename = {}".format(self.bs_filename)) + parts.append("grammar_dir = {}".format(self.grammar_dir)) + parts.append("grammar_filename = {}".format(self.grammar_filename)) + parts.append("scanner_cc_filename = {}".format(self.scanner_cc_filename)) + parts.append("wgsl_shared_lib = {}".format(self.wgsl_shared_lib)) + return "Options({})".format(",".join(parts)) + +def newer_than(first,second): + """ + Returns true if file 'first' is newer than 'second', + or if 'second' does not exist + """ + if not os.path.exists(first): + raise Exception("Missing file {}".format(first)) + if not os.path.exists(second): + return True + first_time = os.path.getmtime(first) + second_time = os.path.getmtime(second) + return first_time >= second_time def read_lines_from_file(filename, exclusions): @@ -34,7 +56,8 @@ def read_lines_from_file(filename, exclusions): """ file = open(filename, "r") # Break up the input into lines, and skip empty lines. - parts = [j for i in [i.split("\n") for i in file.readlines()] for j in i if len(j) > 0] + parts = [j for i in [i.split("\n") for i in file.readlines()] + for j in i if len(j) > 0] result = [] include_re = re.compile('path:\s+(\S+)') for line in parts: @@ -42,35 +65,17 @@ def read_lines_from_file(filename, exclusions): if m: included_file = m.group(1) if included_file not in exclusions: - print("including {}".format(included_file)) - result.extend(read_lines_from_file(included_file,exclusions)) + result.extend(read_lines_from_file(included_file, exclusions)) continue result.append(line) return result -scanner_lines = read_lines_from_file(sys.argv[1], {'wgsl.recursive.bs.include'}) - -# Skip lines like: -#
-#-scanner_lines = filter(lambda s: not s.startswith("") and not s.startswith("
', '', line) for line in scanner_lines] - -grammar_filename = sys.argv[2] -grammar_path = os.path.dirname(grammar_filename) -os.makedirs(grammar_path, exist_ok=True) -grammar_file = open(grammar_filename, "w") - -# Global variable holding the current line text. -line = "" """ Scanner classes are used to parse contiguous sets of lines in the WGSL bikeshed source text. """ + class Scanner: @staticmethod @@ -130,6 +135,9 @@ def parse(lines, i): class scanner_rule(Scanner): + """ + A scanner that reads grammar rules from bikeshed source text. + """ @staticmethod def name(): return "rule" @@ -188,7 +196,10 @@ def parse(lines, i): return (None, None, None) -class scanner_example(Scanner): # Not an example of a scanner, scanner of examples from specification +class scanner_example(Scanner): + """ + A scanner that reads WGSL program examples from bikeshed source text. + """ @staticmethod def name(): return "example" @@ -231,134 +242,24 @@ def parse(lines, i): return (None, line, 0) -scanner_spans = [scanner_rule, - scanner_example] - - -scanner_components = {i.name(): {} for i in scanner_spans} - -scanner_i = 0 # line number of the current line -scanner_span = None -scanner_record = False -last_key = None # The rule name, if the most recently parsed thing was a rule. -last_value = None # The most recently parsed thing -while scanner_i < len(scanner_lines): - # Try both the rule and the example scanners. - for j in scanner_spans: - scanner_begin = j.begin(scanner_lines, scanner_i) - if scanner_begin[0]: - # Use this scanner - scanner_span = None - scanner_record = False - last_key = None - last_value = None - scanner_span = j - if scanner_begin[1] != None: - last_key = scanner_begin[1] - scanner_i += scanner_begin[-1] - if scanner_span == j: - # Check if we should stop using this scanner. - scanner_end = j.end(scanner_lines, scanner_i) - if scanner_end[0]: - # Yes, stop using this scanner. - scanner_span = None - scanner_record = False - last_key = None - last_value = None - scanner_i += scanner_end[-1] - if scanner_span != None: - # We're are in the middle of scanning a span of lines. - if scanner_record: - scanner_skip = scanner_span.skip(scanner_lines, scanner_i) - if scanner_skip[0]: - # Stop recording - scanner_record = False - scanner_i += scanner_skip[-1] # Advance past this line - else: - # Should we start recording? - scanner_record_value = scanner_span.record( - scanner_lines, scanner_i) - if scanner_record_value[0]: - # Start recording - scanner_record = True - if last_key != None and scanner_span.name() == "example": # TODO Remove special case - if last_key in scanner_components[scanner_span.name()]: - raise RuntimeError("line " + str(scanner_i) + ": example with duplicate name: " + last_key) - else: - scanner_components[scanner_span.name()][last_key] = [] - scanner_i += scanner_record_value[-1] - if scanner_record and scanner_span.valid(scanner_lines, scanner_i): - # Try parsing this line - scanner_parse = scanner_span.parse(scanner_lines, scanner_i) - if scanner_parse[2] < 0: - # This line continues the rule parsed on the immediately preceding lines. - if (scanner_parse[1] != None and - last_key != None and - last_value != None and - last_key in scanner_components[scanner_span.name()] and - len(scanner_components[scanner_span.name()][last_key]) > 0): - scanner_components[scanner_span.name( - )][last_key][-1] += scanner_parse[1] - else: - if scanner_parse[0] != None: - # It's a rule, with name in the 0'th position. - last_key = scanner_parse[0] - if scanner_parse[1] != None: - last_value = scanner_parse[1] - if last_key not in scanner_components[scanner_span.name()]: - # Create a new entry for this rule - scanner_components[scanner_span.name()][last_key] = [ - last_value] - else: - # Append to the existing entry. - scanner_components[scanner_span.name()][last_key].append( - last_value) - else: - # Reset - last_value = None - scanner_components[scanner_span.name()][last_key] = [] - else: - # It's example text - if scanner_parse[1] != None: - last_value = scanner_parse[1] - scanner_components[scanner_span.name()][last_key].append( - last_value) - scanner_i += scanner_parse[-1] # Advance line index - scanner_i += 1 - - -grammar_source = "" - -grammar_source += r""" -module.exports = grammar({ - name: 'wgsl', - - externals: $ => [ - $._block_comment, - ], - - extras: $ => [ - $._comment, - $._block_comment, - $._blankspace, - ], - - inline: $ => [ - $.global_decl, - $._reserved, - ], - - // WGSL has no parsing conflicts. - conflicts: $ => [], - - word: $ => $.ident_pattern_token, - - rules: { -"""[1:-1] -grammar_source += "\n" - +# These fixed tokens must be parsed by the custom scanner. +# This is needed to support template disambiguation. +custom_simple_tokens = { + '>': '_greater_than', + '>=': '_greater_than_equal', + '<': '_less_than', + '<=': '_less_than_equal', + '<<': '_shift_left', + '>>': '_shift_right', + '<<=': '_shift_left_assign', + '>>=': '_shift_right_assign' +} def grammar_from_rule_item(rule_item): + """ + Returns a string for the JavaScript expression for this rule. + """ + global custom_simple_tokens result = "" item_choice = False items = [] @@ -369,16 +270,50 @@ def grammar_from_rule_item(rule_item): i_skip = 0 i_item = "" if rule_item[i].startswith("[=syntax/"): + # From '[=syntax/foobar=]' pick out 'foobar' i_item = rule_item[i].split("[=syntax/")[1].split("=]")[0] i_item = f"$.{i_item}" elif rule_item[i].startswith("`/"): + # From "`/pattern/`" pick out '/pattern/' i_item = f"token({rule_item[i][1:-1]})" elif rule_item[i].startswith("`'"): - i_item = f"token({rule_item[i][1:-1]})" + # From "`'&&'`" pick out '&&' + content = rule_item[i][2:-2] + # If the name maps to a custom token, then use that, otherwise, + # use the content name itself. + if content in custom_simple_tokens: + i_item = custom_simple_tokens[content] + else: + i_item = f"token('{content}')" + elif rule_item[i].startswith("_disambiguate_template'] + # pick out '_disambiguate_template' + match = re.fullmatch("[^>]*>(.*)",rule_item[i+1]) + token = match.group(1) + i_item = f"$.{token}" + i += 1 elif rule_item[i].startswith("`'")[1].split("'``'true'`"] + # pick out "true" + match = re.fullmatch("[^>]*>`'(.*)'`",rule_item[i+2]) + if match: + token = match.group(1) + else: + # Now try it without `' '` surrounding the element content text. + # From ['_disam"] + # pick out "_disam" + match = re.fullmatch("[^>]*>(.*)",rule_item[i+2]) + token = match.group(1) + if token in custom_simple_tokens: + token = custom_simple_tokens[token] + i_item = f"$.{token}" + elif token.startswith("_") and token != "_": + i_item = f"$.{token}" + else: + i_item = f"""token('{token}')""" i += 2 elif rule_item[i] == "(": + # Extract a parenthesized rule j = i + 1 j_span = 0 rule_subitem = [] @@ -433,236 +368,557 @@ def grammar_from_rule(key, value): return result -scanner_components[scanner_rule.name()]["_comment"] = [["`/\\/\\/.*/`"]] +class ScanResult(dict): + """ + A dictionary containing the results of scanning the WGSL spec. + + self['raw'] + A list of the Bikeshed source text lines, after include expansion and before + without further filtering + self['rule'] + A dictionary mapping a parsed grammar rule to its definition. + self['example'] + A dictionary mapping the name of an example to the + WGSL source text for the example. + The name is taken from the "heading" attriute of theelement. + """ + def __init__(self): + self['rule'] = dict() + self['example'] = dict() + self['raw'] = [] -# Following sections are to allow out-of-order per syntactic grammar appearance of rules +def read_spec(options): + """ + Returns a ScanResult from parsing the Bikeshed source of the WGSL spec. + """ + result = ScanResult() -rule_skip = set() + # Get the input bikeshed text. + scanner_lines = read_lines_from_file( + options.bs_filename, {'wgsl.recursive.bs.include'}) + # Make a *copy* of the text input because we'll filter it later. + result['raw'] = [x for x in scanner_lines] -for rule in ["translation_unit", "global_directive", "global_decl"]: - grammar_source += grammar_from_rule( - rule, scanner_components[scanner_rule.name()][rule]) + ",\n" - rule_skip.add(rule) + # Skip lines like: + #+ #+ scanner_lines = filter(lambda s: not s.startswith( + "") and not s.startswith("', '', line) for line in scanner_lines] + os.makedirs(options.grammar_dir, exist_ok=True) -for key, value in scanner_components[scanner_rule.name()].items(): - if key.endswith("_literal") and key not in rule_skip: - grammar_source += grammar_from_rule(key, value) + ",\n" - rule_skip.add(key) + # Global variable holding the current line text. + line = "" -# Extract constituents + scanner_spans = [scanner_rule, + scanner_example] + scanner_i = 0 # line number of the current line + scanner_span = None + scanner_record = False + # The rule name, if the most recently parsed thing was a rule. + last_key = None + last_value = None # The most recently parsed thing + while scanner_i < len(scanner_lines): + # Try both the rule and the example scanners. + for j in scanner_spans: + scanner_begin = j.begin(scanner_lines, scanner_i) + if scanner_begin[0]: + # Use this scanner + scanner_span = None + scanner_record = False + last_key = None + last_value = None + scanner_span = j + if scanner_begin[1] != None: + last_key = scanner_begin[1] + scanner_i += scanner_begin[-1] + if scanner_span == j: + # Check if we should stop using this scanner. + scanner_end = j.end(scanner_lines, scanner_i) + if scanner_end[0]: + # Yes, stop using this scanner. + scanner_span = None + scanner_record = False + last_key = None + last_value = None + scanner_i += scanner_end[-1] + if scanner_span != None: + # We're are in the middle of scanning a span of lines. + if scanner_record: + scanner_skip = scanner_span.skip(scanner_lines, scanner_i) + if scanner_skip[0]: + # Stop recording + scanner_record = False + scanner_i += scanner_skip[-1] # Advance past this line + else: + # Should we start recording? + scanner_record_value = scanner_span.record( + scanner_lines, scanner_i) + if scanner_record_value[0]: + # Start recording + scanner_record = True + if last_key != None and scanner_span.name() == "example": # TODO Remove special case + if last_key in result[scanner_span.name()]: + raise RuntimeError( + "line " + str(scanner_i) + ": example with duplicate name: " + last_key) + else: + result[scanner_span.name()][last_key] = [] + scanner_i += scanner_record_value[-1] + if scanner_record and scanner_span.valid(scanner_lines, scanner_i): + # Try parsing this line + scanner_parse = scanner_span.parse(scanner_lines, scanner_i) + if scanner_parse[2] < 0: + # This line continues the rule parsed on the immediately preceding lines. + if (scanner_parse[1] != None and + last_key != None and + last_value != None and + last_key in result[scanner_span.name()] and + len(result[scanner_span.name()][last_key]) > 0): + result[scanner_span.name( + )][last_key][-1] += scanner_parse[1] + else: + if scanner_parse[0] != None: + # It's a rule, with name in the 0'th position. + last_key = scanner_parse[0] + if scanner_parse[1] != None: + last_value = scanner_parse[1] + if last_key not in result[scanner_span.name()]: + # Create a new entry for this rule + result[scanner_span.name()][last_key] = [ + last_value] + else: + # Append to the existing entry. + result[scanner_span.name()][last_key].append( + last_value) + else: + # Reset + last_value = None + result[scanner_span.name()][last_key] = [] + else: + # It's example text + if scanner_parse[1] != None: + last_value = scanner_parse[1] + result[scanner_span.name()][last_key].append( + last_value) + scanner_i += scanner_parse[-1] # Advance line index + scanner_i += 1 -def not_token_only(value): - result = False - for i in value: - result = result or len( - [j for j in i if not j.startswith("`/") and not j.startswith("`'")]) > 0 + result[scanner_rule.name()]["_comment"] = [["`/\\/\\/.*/`"]] return result -for key, value in scanner_components[scanner_rule.name()].items(): - if not key.startswith("_") and key != "ident" and not_token_only(value) and key not in rule_skip: - grammar_source += grammar_from_rule(key, value) + ",\n" - rule_skip.add(key) +def flow_extract(options, scan_result): + """ + Write the tree-sitter grammar definition for WGSL + + Args: + options: Options + scan_result: the ScanResult holding rules and examples extracted from the WGSL spec + """ + print("{}: Extract...".format(options.script)) + input_bs_is_fresh = True + previously_scanned_bs_file = options.bs_filename + ".pre" + if not os.path.exists(options.grammar_filename): + # Must regenerate the tree-sitter grammar file + pass + else: + # Check against previously scanned text + if os.path.exists(previously_scanned_bs_file): + with open(previously_scanned_bs_file,"r") as previous_file: + previous_lines = previous_file.readlines() + if previous_lines == scan_result['raw']: + input_bs_is_fresh = False -# Extract tokens + if input_bs_is_fresh: + rules = scan_result['rule'] + grammar_source = "" -for key, value in scanner_components[scanner_rule.name()].items(): - if not key.startswith("_") and key != "ident" and key not in rule_skip: - grammar_source += grammar_from_rule(key, value) + ",\n" - rule_skip.add(key) + grammar_source += r""" + module.exports = grammar({ + name: 'wgsl', + externals: $ => [ + $._block_comment, + $._disambiguate_template, + $._template_args_start, + $._template_args_end, + $._less_than, + $._less_than_equal, + $._shift_left, + $._shift_left_assign, + $._greater_than, + $._greater_than_equal, + $._shift_right, + $._shift_right_assign, + $._error_sentinel, + ], -# Extract underscore + extras: $ => [ + $._comment, + $._block_comment, + $._blankspace, + ], + inline: $ => [ + $.global_decl, + $._reserved, + ], -for key, value in scanner_components[scanner_rule.name()].items(): - if key.startswith("_") and key != "_comment" and key != "_blankspace" and key not in rule_skip: - grammar_source += grammar_from_rule(key, value) + ",\n" - rule_skip.add(key) + // WGSL has no parsing conflicts. + conflicts: $ => [], + word: $ => $.ident_pattern_token, -# Extract ident + rules: { + """[1:-1] + grammar_source += "\n" + # Following sections are to allow out-of-order per syntactic grammar appearance of rules -grammar_source += grammar_from_rule( - "ident", scanner_components[scanner_rule.name()]["ident"]) + ",\n" -rule_skip.add("ident") + rule_skip = set() + for rule in ["translation_unit", "global_directive", "global_decl"]: + grammar_source += grammar_from_rule( + rule, rules[rule]) + ",\n" + rule_skip.add(rule) -# Extract comment + # Extract literals -grammar_source += grammar_from_rule( - "_comment", scanner_components[scanner_rule.name()]["_comment"]) + ",\n" -rule_skip.add("_comment") + for key, value in rules.items(): + if key.endswith("_literal") and key not in rule_skip: + grammar_source += grammar_from_rule(key, value) + ",\n" + rule_skip.add(key) -# Extract space + # Extract constituents -grammar_source += grammar_from_rule( - "_blankspace", scanner_components[scanner_rule.name()]["_blankspace"]) -rule_skip.add("_blankspace") + def not_token_only(value): + result = False + for i in value: + result = result or len( + [j for j in i if not j.startswith("`/") and not j.startswith("`'")]) > 0 + return result -grammar_source += "\n" -grammar_source += r""" - }, -}); -"""[1:-1] -headerTemplate = Template(HEADER) -grammar_file.write(headerTemplate.substitute( - YEAR=date.today().year) + grammar_source + "\n") -grammar_file.close() + for key, value in rules.items(): + if not key.startswith("_") and not_token_only(value) and key not in rule_skip: + grammar_source += grammar_from_rule(key, value) + ",\n" + rule_skip.add(key) -with open(grammar_path + "/package.json", "w") as grammar_package: - grammar_package.write('{\n') - grammar_package.write(' "name": "tree-sitter-wgsl",\n') - grammar_package.write(' "dependencies": {\n') - grammar_package.write(' "nan": "^2.15.0"\n') - grammar_package.write(' },\n') - grammar_package.write(' "devDependencies": {\n') - grammar_package.write(' "tree-sitter-cli": "^0.20.0"\n') - grammar_package.write(' },\n') - grammar_package.write(' "main": "bindings/node"\n') - grammar_package.write('}\n') -# External scanner for nested block comments -# For the API, see https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners -# See: https://github.com/tree-sitter/tree-sitter-rust/blob/master/src/scanner.c + # Extract tokens -os.makedirs(os.path.join(grammar_path, "src"), exist_ok=True) -with open(os.path.join(grammar_path, "src", "scanner.c"), "w") as external_scanner: - external_scanner.write(r""" -#include-#include -enum TokenType { - BLOCK_COMMENT, -}; + for key, value in rules.items(): + if not key.startswith("_") and key not in rule_skip: + grammar_source += grammar_from_rule(key, value) + ",\n" + rule_skip.add(key) -void *tree_sitter_wgsl_external_scanner_create() { return NULL; } -void tree_sitter_wgsl_external_scanner_destroy(void *p) {} -unsigned tree_sitter_wgsl_external_scanner_serialize(void *p, char *buffer) { return 0; } -void tree_sitter_wgsl_external_scanner_deserialize(void *p, const char *b, unsigned n) {} -static void advance(TSLexer *lexer) { - lexer->advance(lexer, false); -} + # Extract underscore -bool tree_sitter_wgsl_external_scanner_scan(void *payload, TSLexer *lexer, - const bool *valid_symbols) { - while (iswspace(lexer->lookahead)) lexer->advance(lexer, true); - - if (lexer->lookahead == '/') { - advance(lexer); - if (lexer->lookahead != '*') return false; - advance(lexer); - - bool after_star = false; - unsigned nesting_depth = 1; - for (;;) { - switch (lexer->lookahead) { - case '\0': - /* This signals the end of input. Since nesting depth is - * greater than zero, the scanner is in the middle of - * a block comment. Block comments must be affirmatively - * terminated. - */ - return false; - case '*': - advance(lexer); - after_star = true; - break; - case '/': - if (after_star) { - advance(lexer); - after_star = false; - nesting_depth--; - if (nesting_depth == 0) { - lexer->result_symbol = BLOCK_COMMENT; - return true; - } - } else { - advance(lexer); - after_star = false; - if (lexer->lookahead == '*') { - nesting_depth++; - advance(lexer); - } - } - break; - default: - advance(lexer); - after_star = false; - break; - } - } - } - - return false; -} -"""[1:-1]) - - -# Use "npm install" to create the tree-sitter CLI that has WGSL -# support. But "npm install" fetches data over the network. -# That can be flaky, so only invoke it when needed. -if os.path.exists("grammar/node_modules/tree-sitter-cli") and os.path.exists("grammar/node_modules/nan"): - # "npm install" has been run already. - pass -else: - subprocess.run(["npm", "install"], cwd=grammar_path, check=True) -subprocess.run(["npx", "tree-sitter", "generate"], - cwd=grammar_path, check=True) -# Following are commented for future reference to expose playground -# Remove "--docker" if local environment matches with the container -# subprocess.run(["npx", "tree-sitter", "build-wasm", "--docker"], -# cwd=grammar_path, check=True) - -Language.build_library( - grammar_path + "/build/wgsl.so", - [ - grammar_path, - ] -) - -WGSL_LANGUAGE = Language(grammar_path + "/build/wgsl.so", "wgsl") - -parser = Parser() -parser.set_language(WGSL_LANGUAGE) - -error_list = [] - -for key, value in scanner_components[scanner_example.name()].items(): - if "expect-error" in key: - continue - value = value[:] - if "function-scope" in key: - value = ["fn function__scope____() {"] + value + ["}"] - if "type-scope" in key: - # Initiailize with zero-value expression. - value = ["const type_scope____: "] + value + ["="] + value + ["()"] + [";"] - program = "\n".join(value) - tree = parser.parse(bytes(program, "utf8")) - if tree.root_node.has_error: - error_list.append((program, tree)) - # TODO Semantic CI - -if len(error_list) > 0: - for error in error_list: - print("Example:") - print(error[0]) - print("Tree:") - print(error[1].root_node.sexp()) - raise Exception("Grammar is not compatible with examples!") + + for key, value in rules.items(): + if key.startswith("_") and key != "_comment" and key != "_blankspace" and key not in rule_skip: + grammar_source += grammar_from_rule(key, value) + ",\n" + rule_skip.add(key) + + + # Extract ident + + + grammar_source += grammar_from_rule( "ident", rules["ident"]) + ",\n" + rule_skip.add("ident") + + + # Extract comment + + + grammar_source += grammar_from_rule( + "_comment", rules["_comment"]) + ",\n" + rule_skip.add("_comment") + + + # Extract space + + + grammar_source += grammar_from_rule( + "_blankspace", rules["_blankspace"]) + rule_skip.add("_blankspace") + + + grammar_source += "\n" + grammar_source += r""" + }, + }); + """[1:-1] + + HEADER = """ + // Copyright (C) [$YEAR] World Wide Web Consortium, + // (Massachusetts Institute of Technology, European Research Consortium for + // Informatics and Mathematics, Keio University, Beihang). + // All Rights Reserved. + // + // This work is distributed under the W3C (R) Software License [1] in the hope + // that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + // warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + // + // [1] http://www.w3.org/Consortium/Legal/copyright-software + + // **** This file is auto-generated. Do not edit. **** + + """.lstrip() + + if input_bs_is_fresh: + print("{}: ...Creating tree-sitter parser".format(options.script,options.grammar_filename)) + with open(options.grammar_filename, "w") as grammar_file: + headerTemplate = Template(HEADER) + grammar_file.write(headerTemplate.substitute( + YEAR=date.today().year) + grammar_source + "\n") + grammar_file.close() + + if input_bs_is_fresh: + # Save scanned lines for next time. + with open(previously_scanned_bs_file,"w") as previous_file: + for line in scan_result['raw']: + previous_file.write(line) + + with open(os.path.join(options.grammar_dir,"package.json"), "w") as grammar_package: + grammar_package.write('{\n') + grammar_package.write(' "name": "tree-sitter-wgsl",\n') + grammar_package.write(' "dependencies": {\n') + grammar_package.write(' "nan": "^2.15.0"\n') + grammar_package.write(' },\n') + grammar_package.write(' "devDependencies": {\n') + grammar_package.write(' "tree-sitter-cli": "^0.20.7"\n') + grammar_package.write(' },\n') + grammar_package.write(' "main": "bindings/node"\n') + grammar_package.write('}\n') + + return True + +def flow_build(options): + """ + Build the shared library for the custom tree-sitter scanner. + """ + + print("{}: Build...".format(options.script)) + if not os.path.exists(options.grammar_filename): + print("missing grammar file: {}") + return False + + # External scanner for nested block comments + # For the API, see https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners + # See: https://github.com/tree-sitter/tree-sitter-rust/blob/master/src/scanner.c + + os.makedirs(os.path.join(options.grammar_dir, "src"), exist_ok=True) + + # Remove the old custom scanner, if it exists. + scanner_c_staging = os.path.join(options.grammar_dir, "src", "scanner.c") + if os.path.exists(scanner_c_staging): + os.remove(scanner_c_staging) + # Copy the new scanner into place, if newer + scanner_cc_staging = os.path.join(options.grammar_dir, "src", "scanner.cc") + if newer_than(options.scanner_cc_filename, scanner_cc_staging): + shutil.copyfile(options.scanner_cc_filename, scanner_cc_staging) + + + # Use "npm install" to create the tree-sitter CLI that has WGSL + # support. But "npm install" fetches data over the network. + # That can be flaky, so only invoke it when needed. + if os.path.exists("grammar/node_modules/tree-sitter-cli") and os.path.exists("grammar/node_modules/nan"): + # "npm install" has been run already. + pass + else: + subprocess.run(["npm", "install"], cwd=options.grammar_dir, check=True) + subprocess.run(["npx", "tree-sitter", "generate"], + cwd=options.grammar_dir, check=True) + # Following are commented for future reference to expose playground + # Remove "--docker" if local environment matches with the container + # subprocess.run(["npx", "tree-sitter", "build-wasm", "--docker"], + # cwd=options.grammar_dir, check=True) + + + def build_library(output_file, input_files): + # The py-tree-sitter build_library method wasn't compiling with C++17 flags, + # so invoke the compile ourselves. + compiler = new_compiler() + clang_like = isinstance(compiler, UnixCCompiler) + + # Compile .c and .cc files down to object files. + object_files = [] + includes = [os.path.dirname(input_files[0])] + for src in input_files: + flags = [] + if src.endswith(".cc"): + if clang_like: + flags.extend(["-fPIC", "-std=c++17"]) + else: + flags.extend(["/std:c++17"]) + objects = compiler.compile([src], + extra_preargs=flags, + include_dirs=includes) + object_files.extend(objects) + + # Link object files to a single shared library. + if clang_like: + link_flags = ["-lstdc++"] + compiler.link_shared_object( + object_files, + output_file, + target_lang="c++", + extra_postargs=link_flags) + + if newer_than(scanner_cc_staging, options.wgsl_shared_lib) or newer_than(options.grammar_filename,options.wgsl_shared_lib): + print("{}: ...Building custom scanner: {}".format(options.script,options.wgsl_shared_lib)) + build_library(options.wgsl_shared_lib, + [scanner_cc_staging, + os.path.join(options.grammar_dir,"src","parser.c")]) + return True + +def flow_examples(options,scan_result): + """ + Check the tree-sitter parser can parse the examples from the WGSL spec. + + Args: + options: Options + scan_result: the ScanResult holding rules and examples extracted from the WGSL spec + """ + print("{}: Examples...".format(options.script)) + + examples = scan_result['example'] + WGSL_LANGUAGE = Language(options.wgsl_shared_lib, "wgsl") + + parser = Parser() + parser.set_language(WGSL_LANGUAGE) + + errors = 0 + for key, value in examples.items(): + print(".",flush=True,end='') + if "expect-error" in key: + continue + value = value[:] + if "function-scope" in key: + value = ["fn function__scope____() {"] + value + ["}"] + if "type-scope" in key: + # Initialize with zero-value expression. + value = ["const type_scope____: "] + \ + value + ["="] + value + ["()"] + [";"] + program = "\n".join(value) + # print("**************** BEGIN ****************") + # print(program) + # print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") + tree = parser.parse(bytes(program, "utf8")) + if tree.root_node.has_error: + print("Example:") + print(program) + print("Tree:") + print(tree.root_node.sexp()) + errors = errors + 1 + # print("***************** END *****************") + # print("") + # print("") + + # TODO Semantic CI + + if errors > 0: + raise Exception("Grammar is not compatible with examples!") + print("Ok",flush=True) + return True + + +FLOW_HELP = """ +A complete flow has the following steps, in order + 'x' (think 'extract'): Generate a tree-sitter grammar definition from the + bikeshed source for the WGSL specification. + 'b' (think 'build'): Build the tree-sitter parser + 'e' (think 'example'): Check the examples from the WGSL spec parse correctly. + 't' (think 'test'): Run parser unit tests. + +You can be more selective by specifying the --flow option followed by a word +containing the letters for the steps to run. + +For example, the following will extract the grammar, build the tree-sitter parse, +and check that the examples from the spec parse correctly: + + extract-grammar --flow xbe + +The order of the letters is not significant. The steps will always run in the +same relative order as the default flow. +""" +DEFAULT_FLOW="xbet" + +def main(): + argparser = argparse.ArgumentParser( + prog="extract-grammar.py", + description="Extract the grammar from the WGSL spec and run checks", + add_help=False # We want to print our own additional formatted help + ) + argparser.add_argument("--help","-h", + action='store_true', + help="Show this help message, then exit") + argparser.add_argument("--verbose","-v", + action='store_true', + help="Be verbose") + argparser.add_argument("--flow", + action='store', + help="The flow steps to run. Default is the whole flow.", + default=DEFAULT_FLOW) + argparser.add_argument("--tree-sitter-dir", + help="Target directory for the tree-sitter parser", + default="grammar") + argparser.add_argument("--spec", + action='store', + help="Bikeshed source file for the WGSL spec", + default="index.bs") + argparser.add_argument("--scanner", + action='store', + help="source file for the tree-sitter custom scanner", + default="scanner.cc") + + args = argparser.parse_args() + if args.help: + print(argparser.format_help()) + print(FLOW_HELP) + return 0 + + options = Options(args.spec,args.tree_sitter_dir,args.scanner) + options.verbose = args.verbose + if args.verbose: + print(options) + + scan_result = None + + if 'x' in args.flow: + scan_result = read_spec(options) + if not flow_extract(options,scan_result): + return 1 + if 'b' in args.flow: + if not flow_build(options): + return 1 + if 'e' in args.flow: + if scan_result is None: + scan_result = read_spec(options) + if not flow_examples(options,scan_result): + return 1 + if 't' in args.flow: + test_options = wgsl_unit_tests.Options(options.wgsl_shared_lib) + if not wgsl_unit_tests.run_tests(test_options): + return 1 + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/wgsl/index.bs b/wgsl/index.bs index 4ce62da0ec..942e84fc22 100644 --- a/wgsl/index.bs +++ b/wgsl/index.bs @@ -110,6 +110,9 @@ div.syntax > p > a > code { font-style: normal; font-weight: bold; } +.hidden { + display: none +} table.data.builtin tbody{ border-bottom: 0; } @@ -532,16 +535,23 @@ The program text [=shader-creation error|must not=] include a null code point (` ## Parsing ## {#parsing} To parse a WGSL program: + 1. Remove [=comments=]: * Replace the first comment with a space code point (`U+0020`). * Repeat until no comments remain. -2. Parse the whole text, attempting to match the [=syntax/translation_unit=] grammar rule. +2. Find [=template lists=], using the [=template list discovery|algorithm=] in [[#template-lists-sec]]. +3. Parse the whole text, attempting to match the [=syntax/translation_unit=] grammar rule. Parsing uses a LALR(1) parser (one token of lookahead) [[!DeRemer1969]], with the following customization: * Tokenization is interleaved with parsing, and is context-aware. When the parser requests the next token: * Consume and ignore an initial sequence of [=blankspace=] code points. - * A token candidate is any WGSL [=token=] formed from the non-empty prefix of the remaining unconsumed code points. - * The token returned is the longest [=token candidate=] that is also a valid lookahead token for the current parser state. [[!VanWyk2007]] + * If the next code point is the start of a [=template list=], consume it and return [=syntax_sym/_template_args_start=]. + * If the next code point is the end of a [=template list=], consume it and return [=syntax_sym/_template_args_end=]. + * Otherwise: + * A token candidate is any WGSL [=token=] formed from the non-empty prefix of the remaining unconsumed code points. + * The token returned is the longest [=token candidate=] that is also a valid lookahead token for the current parser state. [[!VanWyk2007]] + +A [=shader-creation error=] results if: * the entire source text cannot be converted into a finite sequence of valid tokens, or @@ -966,6 +976,149 @@ The spelling of the token may be the same as an [=identifier=], but the token do Section [[#context-dependent-name-tokens]] lists all such tokens. +## Template Lists ## {#template-lists-sec} + +Template parameterization is a way to specify parameters that modify a general concept. +To write a template parameterization, write the general concept, followed by a [=template list=]. + +Ignoring [=comments=] and [=blankspace=], a template list is: +* An initial `'<'` (U+003C) code point, then +* A [=syntax_sym/comma=]-separated list of one or more template parameters, then +* An optional trailing [=syntax_sym/comma=], then +* A terminating `'>'` (U+003E) code point. + +The form of a [=template parameter=] is implicitly defined by the [=template list discovery=] algorithm below. +Generally, they are names, expressions, or types. + +Note: For example, the phrase `vec3` is a template parameterization where `vec3` is the general concept being modified, +and ` ` is a template list containing one parameter, the [=f32=] type. +Together, `vec3 ` denotes a specific [=vector=] type. + +Note: For example, the phrase `var ` modifies the general `var` concept with template parameters `storage` and `read_write`. + + +Note: For example, the phrase `array+ +The `'<'` (U+003C) and `'>'` (U+003E) code points that delimit a template list are also used when spelling: +* A comparison operator in a [=syntax/relational_expression=]. +* A shift operator in a [=syntax/shift_expression=]. +* A [=syntax/compound_assignment_operator=] for performing a shift operation followed by an assignment. + +The syntactic ambiguity is resolved in favour of template lists: +* Template lists are discovered in an early phase of parsing, before [=declarations=], [=expressions=], [=statements=] are parsed. +* During tokenization in a later phase, + the intial `'<'` (U+003C) of a template list is mapped to a [=syntax_sym/_template_args_start=] token, and + the terminating `'>'` (U+003E) of a template list is mapped to a [=syntax_sym/_template_args_end=] token. + + +The template list discovery algorithm is as follows. +>` has two template parameterizations: +* `vec4 ` modifies the general `vec4` concept with template parameter `f32`. +* `array >` modifies the general `array` concept with template parameter `vec4 `. + + +**Input:** The program source text. + +**Record types:** + +Let |UnclosedCandidate| be a record type containing: + * |position|, a location in the source text + * |depth|, an integer, the expression nesting depth at |position| + +Let |TemplateList| be record type containing: + * |start_position|, the source location of the `'<'` (U+003C) code point that starts this template list. + * |end_position|, the source location of the `'>'` (U+003E) code point that ends this template list. + +**Output:** |DiscoveredTemplateLists|, a list of |TemplateList| records. + +**Algorithm:** +* Initialize |DiscoveredTemplateLists| to an empty list. +* Initialize a |Pending| variable to be an empty stack of |UnclosedCandidate| records. +* Initialize a |CurrentPosition| integer variable to 0. + It encodes the position of the code point currently being examined, as a count of the number of code points after the start of the source text. + * This variable will advance forward in the text while executing the algorithm. + When the end of text is reached, terminate the algorithm immediately and have it return |DiscoveredTemplateLists|. +* Initialize a |NestingDepth| integer variable to 0. +* Repeat the following steps: + * Advance |CurrentPosition| past [=blankspace=], [=comments=], and [=literals=]. + * If [=syntax/ident_pattern_token=] matches the text at |CurrentPosition|, then: + * Advance |CurrentPosition| past the [=syntax/ident_pattern_token=]. + * Advance |CurrentPosition| past blankspace and comments, if present. + * If `'<'` (U+003C) appears at |CurrentPosition|, then: + * Note: This code point is a candidate for being the start of a template list. + Save enough state so it can be matched against a terminating `'>'` (U+003E) appearing later in the input. + * Push |UnclosedCandidate|(|position|=|CurrentPosition|,|depth|=|NestingDepth|) onto the |Pending| stack. + * Advance |CurrentPosition| to the next code point, and start the next iteration of the loop. + * If `'>'` (U+003E) appears at |CurrentPosition| then: + * Note: This code point is a candidate for being the end of a template list. + * If |Pending| is not empty, then let |T| be its top entry, and: + * If |T|.|depth| equals |NestingDepth| then: + * Note: This code point ends the current template list whose start is recorded in |T|. + * Add |TemplateList|(|start_position|=|T|.|position|, |end_position|=|CurrentPosition|) to |DiscoveredTemplateLists|. + * Pop |T| off the |Pending| stack. + * Advance |CurrentPosition| past this code point, and start the next iteration of the loop. + * If `'('` (U+0028) or `'['` (U+005B) appears at |CurrentPosition| then: + * Note: Enter a nested expression. + * Add 1 to |NestingDepth|. + * Advance |CurrentPosition| past this code point, and start the next iteration of the loop. + * If `')'` (U+0029) or `']'` (U+005D) appears at |CurrentPosition| then: + * Note: Exit a nested expression. + * Pop entries from the |Pending| stack until it is empty, or until the its top entry has |depth| < |NestingDepth|. + * Set |NestingDepth| to 0 or |NestingDepth| − 1, whichever is larger. + * Advance |CurrentPosition| past this code point, and start the next iteration of the loop. + * If `';'` (U+003B) or `'{'` (U+007B) or `'='` (U+003D) or `':'` (U+003A) appears at |CurrentPosition| then: + * Note: These cannot contain an expression, and therefore cannot appear in a template list. + Clear pending unclosed candidates. + * Set |NestingDepth| to 0. + * Remove all entries from the |Pending| stack. + * Advance |CurrentPosition| past this code point, and start the next iteration of the loop. + * If `'&&'` or `'||'` matches the text at |CurrentPosition| then: + * Note: These are operators that have lower precedence than comparisons. Reject any pending unclosed candidates at the current expression level. + * Note: With this rule, no template list will be found in the program fragment `ad`. + Instead it will be recognized as the short-circuiting disjunction of two comparisons. + * Pop entries from the |Pending| stack until it is empty, or until the its top entry has |depth| < |NestingDepth|. + * Advance |CurrentPosition| past the two code points, and start the next iteration of the loop. + * Advance |CurrentPosition| past the current code point. + ++ ++Note: The algorithm can be modified to find the source ranges for [=template parameters=], as follows: + +* Modify |UnclosedCandidate| to add the following fields: + * |parameters|, a list of source ranges of template parameters. + * |parameter_start_position|, a source location. +* Modify |TemplateList| to add a field: + * |parameters|, a list of source ranges of template parameters. +* When pushing a new |UnclosedCandidate| onto the |Pending| stack: + * Set its |parameters| field to an empty list. + * Set |parameter_start_position| to one code point past |CurrentPosition|. +* When adding a |TemplateList|, |TL|, to DiscoveredTemplateLists: + * Let |T| be the top of the |Pending| stack, as in the original algorithm. + * Push the source range starting at |T|.|parameter_start_position| and ending at |CurrentPosition|−1 onto |T|.|parameters|. + * Prepare |TL| as in the original algorithm. + * Set |TL|.|parameters| to |T|.|parameters|. +* Insert a check at the end the loop, just before advancing past the current code point: + * If '`,`' (U+002C) appears at |CurrentPosition| and |Pending| is not empty, then: + * Let |T| be the top of the |Pending| stack. + * Push the source range starting at |T|.|parameter_start_position| and ending at |CurrentPosition|−1 onto |T|.|parameters|. + * Set |T|.|parameter_start_position| to |CurrentPosition|+1 + ++ +Note: The algorithm explicitly skips past literals because some numeric literals end in a letter, for example `1.0f`. +The terminating `f` should not be mistaken as the start of an [=syntax/ident_pattern_token=]. + +Note: In the phrase `A ( B < C, D > ( E ) )`, the segment `< C, D >` is a [=template list=]. + +Note: The algorithm respects expression nesting: The start and end of a particular template list cannot appear at different expression nesting levels. +For example, in `arrayb)>`, the template list has three parameters, where the last one is `select(2,3,a>b)`. +The `'>'` in `a>b` does not terminate the template list because it is enclosed in a parenthesized part of the expression calling the `select` function. + +Note: Both ends of a template list must appear within the same array indexing phrase. For example `a[b ()` does not contain a valid template list. + + + ## Attributes ## {#attributes} An attribute modifies an object. @@ -2311,7 +2464,7 @@ This includes the [=store type=] of a workgroup variable. array_type_specifier : - | `'array'` `'<'` [=syntax/type_specifier=] ( `','` [=syntax/element_count_expression=] ) ? `'>'` + | `'array'` _disambiguate_template _template_args_start [=syntax/type_specifier=] ( `','` [=syntax/element_count_expression=] ) ? _template_args_endelement_count_expression : @@ -3993,11 +4146,11 @@ sampler_comparison | [=syntax/depth_texture_type=] - | [=syntax/sampled_texture_type=] `'<'` [=syntax/type_specifier=] `'>'` + | [=syntax/sampled_texture_type=] _disambiguate_template _template_args_start [=syntax/type_specifier=] _template_args_end - | [=syntax/multisampled_texture_type=] `'<'` [=syntax/type_specifier=] `'>'` + | [=syntax/multisampled_texture_type=] _disambiguate_template _template_args_start [=syntax/type_specifier=] _template_args_end - | [=syntax/storage_texture_type=] `'<'` [=syntax/texel_format=] `','` [=syntax/access_mode=] `'>'` + | [=syntax/storage_texture_type=] _disambiguate_template _template_args_start [=syntax/texel_format=] `','` [=syntax/access_mode=] _template_args_endsampler_type : @@ -4102,15 +4255,15 @@ all properties of the members of *S*, including attributes, carry over to the me | `'u32'` - | [=syntax/vec_prefix=] `'<'` [=syntax/type_specifier=] `'>'` + | [=syntax/vec_prefix=] _disambiguate_template _template_args_start [=syntax/type_specifier=] _template_args_end - | [=syntax/mat_prefix=] `'<'` [=syntax/type_specifier=] `'>'` + | [=syntax/mat_prefix=] _disambiguate_template _template_args_start [=syntax/type_specifier=] _template_args_end - | `'ptr'` `'<'` [=syntax/address_space=] `','` [=syntax/type_specifier=] ( `','` [=syntax/access_mode=] ) ? `'>'` + | `'ptr'` _disambiguate_template _template_args_start [=syntax/address_space=] `','` [=syntax/type_specifier=] ( `','` [=syntax/access_mode=] ) ? _template_args_end | [=syntax/array_type_specifier=] - | `'atomic'` `'<'` [=syntax/type_specifier=] `'>'` + | `'atomic'` _disambiguate_template _template_args_start [=syntax/type_specifier=] _template_args_end | [=syntax/texture_and_sampler_types=]@@ -4714,7 +4867,7 @@ such that the redundant loads are eliminated.variable_decl : - | `'var'` [=syntax/variable_qualifier=] ? [=syntax/optionally_typed_ident=] + | `'var'` _disambiguate_template [=syntax/variable_qualifier=] ? [=syntax/optionally_typed_ident=]optionally_typed_ident : @@ -4724,7 +4877,7 @@ such that the redundant loads are eliminated.variable_qualifier : - | `'<'` [=syntax/address_space=] ( `','` [=syntax/access_mode=] ) ? `'>'` + | _template_args_start [=syntax/address_space=] ( `','` [=syntax/access_mode=] ) ? _template_args_end@@ -6727,7 +6880,7 @@ When an identifier is used as a [=syntax/callable=] item, it is one of: | [=syntax/paren_expression=] - | `'bitcast'` `'<'` [=syntax/type_specifier=] `'>'` [=syntax/paren_expression=] + | `'bitcast'` _disambiguate_template _template_args_start [=syntax/type_specifier=] _template_args_end [=syntax/paren_expression=]call_expression : @@ -6747,11 +6900,11 @@ Note: The [=syntax/call_expression=] rule exists to ensure [=type checking=] app | [=syntax/type_specifier_without_ident=] - | [=syntax/vec_prefix=] + | [=syntax/vec_prefix=] _disambiguate_template - | [=syntax/mat_prefix=] + | [=syntax/mat_prefix=] _disambiguate_template - | `'array'` + | `'array'` _disambiguate_templateparen_expression : @@ -7051,7 +7204,7 @@ to bind with this operator. This column is necessary for linearly listing operat # Statements # {#statements} -Statements are program fragments that control its execution. +A statement is a program fragment that controls execution. Statements are generally executed in sequential order; however, [[#control-flow|control flow statements]] may cause a program to execute in non-sequential order. @@ -7239,9 +7392,9 @@ An [=statement/assignment=] is a compound assignment when th | `'^='` - | `'>>='` + | `'>>='` - | `'<<='` + | `'<<='`The type requirements, semantics, and behavior of each statement is defined as if @@ -11096,7 +11249,7 @@ A syntactic token is a sequence of special code points, used: * to spell an expression operator, or * as punctuation: to group, sequence, or separate other grammar elements. -List of [=syntactic tokens=]: +The [=syntactic tokens=] are: * `'&'` (Code point: `U+0026`) * `'&&'` (Code points: `U+0026` `U+0026`) @@ -11142,8 +11295,16 @@ List of [=syntactic tokens=]: * `'&='` (Code points: `U+0026` `U+003D`) * `'|='` (Code points: `U+007C` `U+003D`) * `'^='` (Code points: `U+005E` `U+003D`) -* `'>>='` (Code points: `U+003E` `U+003E` `U+003D`) -* `'<<='` (Code points: `U+003C` `U+003C` `U+003D`) +* `'>>='` (Code point: `U+003E` `U+003E` `U+003D`) +* `'<<='` (Code points: `U+003C` `U+003C` `U+003D`) +* `_template_args_end` + * Text: `'>'` (Code point: `U+003E`) + * This token is textually the same as the [=syntax_sym/greater_than=] syntactic token. + * It is generated by [=template list discovery=], and is used as the last token in a [=template list=]. +* `_template_args_start` + * Text: `'<'` (Code point: `U+003C`) + * This token is textually the same as the [=syntax_sym/less_than=] syntactic token. + * It is generated by [=template list discovery=], and is used as the first token in a [=template list=]. ## Context-Dependent Name Tokens ## {#context-dependent-name-tokens} diff --git a/wgsl/scanner.cc b/wgsl/scanner.cc new file mode 100644 index 0000000000..59879988d4 --- /dev/null +++ b/wgsl/scanner.cc @@ -0,0 +1,992 @@ + +#include+#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ENABLE_LOGGING 0 + +#if ENABLE_LOGGING +#define LOG(msg, ...) printf(msg "\n", ##__VA_ARGS__) +#else +#define LOG(...) +#endif + +namespace { + +/// The possible external tokens matched by this custom scanner. +/// The order of the entries in this enumerator must match the 'externals' in +/// the grammar.js. +enum Token { + BLOCK_COMMENT, + DISAMBIGUATE_TEMPLATE, // A zero-length token used to scan ahead + TEMPLATE_ARGS_START, + TEMPLATE_ARGS_END, + LESS_THAN, // '<' + LESS_THAN_EQUAL, // '<=' + SHIFT_LEFT, // '<<' + SHIFT_LEFT_ASSIGN, // '<<=' + GREATER_THAN, // '>' + GREATER_THAN_EQUAL, // '>=' + SHIFT_RIGHT, // '>>' + SHIFT_RIGHT_ASSIGN, // '>>=' + + // A sentinel value used to signal an error has occurred already. + // https://tree-sitter.github.io/tree-sitter/creating-parsers#other-external-scanner-details + ERROR, +}; + +const char* str(Token tok,bool brief=false) { + switch (tok) { + case Token::BLOCK_COMMENT: + return "BLOCK_COMMENT"; + case Token::DISAMBIGUATE_TEMPLATE: + return "DISAMBIGUATE_TEMPLATE"; + case Token::TEMPLATE_ARGS_START: + return "TEMPLATE_ARGS_START"; + case Token::TEMPLATE_ARGS_END: + return "TEMPLATE_ARGS_END"; + case Token::LESS_THAN: + return brief ? "<" : "LESS_THAN"; + case Token::LESS_THAN_EQUAL: + return brief ? "<=" : "LESS_THAN_EQUAL"; + case Token::SHIFT_LEFT: + return brief ? "<<" : "SHIFT_LEFT"; + case Token::SHIFT_LEFT_ASSIGN: + return brief ? "<<=" : "SHIFT_LEFT_ASSIGN"; + case Token::GREATER_THAN: + return brief ? ">" : "GREATER_THAN"; + case Token::GREATER_THAN_EQUAL: + return brief ? ">=" : "GREATER_THAN_EQUAL"; + case Token::SHIFT_RIGHT: + return brief ? ">>" : "SHIFT_RIGHT"; + case Token::SHIFT_RIGHT_ASSIGN: + return brief ? ">>=" : "SHIFT_RIGHT_ASSIGN"; + case Token::ERROR: + return "ERROR"; + default: + return " "; + } +} + +using CodePoint = uint32_t; + +static constexpr CodePoint kEOF = 0; + +struct CodePointRange { + CodePoint first; // First code point in the interval + CodePoint last; // Last code point in the interval (inclusive) +}; + +inline bool operator<(CodePoint code_point, CodePointRange range) { + return code_point < range.first; +} +inline bool operator<(CodePointRange range, CodePoint code_point) { + return range.last < code_point; +} + +// Interval ranges of all code points in the Unicode 14 XID_Start set +// This array needs to be in ascending order. +constexpr CodePointRange kXIDStartRanges[] = { + {0x00041, 0x0005a}, {0x00061, 0x0007a}, {0x000aa, 0x000aa}, + {0x000b5, 0x000b5}, {0x000ba, 0x000ba}, {0x000c0, 0x000d6}, + {0x000d8, 0x000f6}, {0x000f8, 0x002c1}, {0x002c6, 0x002d1}, + {0x002e0, 0x002e4}, {0x002ec, 0x002ec}, {0x002ee, 0x002ee}, + {0x00370, 0x00374}, {0x00376, 0x00377}, {0x0037b, 0x0037d}, + {0x0037f, 0x0037f}, {0x00386, 0x00386}, {0x00388, 0x0038a}, + {0x0038c, 0x0038c}, {0x0038e, 0x003a1}, {0x003a3, 0x003f5}, + {0x003f7, 0x00481}, {0x0048a, 0x0052f}, {0x00531, 0x00556}, + {0x00559, 0x00559}, {0x00560, 0x00588}, {0x005d0, 0x005ea}, + {0x005ef, 0x005f2}, {0x00620, 0x0064a}, {0x0066e, 0x0066f}, + {0x00671, 0x006d3}, {0x006d5, 0x006d5}, {0x006e5, 0x006e6}, + {0x006ee, 0x006ef}, {0x006fa, 0x006fc}, {0x006ff, 0x006ff}, + {0x00710, 0x00710}, {0x00712, 0x0072f}, {0x0074d, 0x007a5}, + {0x007b1, 0x007b1}, {0x007ca, 0x007ea}, {0x007f4, 0x007f5}, + {0x007fa, 0x007fa}, {0x00800, 0x00815}, {0x0081a, 0x0081a}, + {0x00824, 0x00824}, {0x00828, 0x00828}, {0x00840, 0x00858}, + {0x00860, 0x0086a}, {0x00870, 0x00887}, {0x00889, 0x0088e}, + {0x008a0, 0x008c9}, {0x00904, 0x00939}, {0x0093d, 0x0093d}, + {0x00950, 0x00950}, {0x00958, 0x00961}, {0x00971, 0x00980}, + {0x00985, 0x0098c}, {0x0098f, 0x00990}, {0x00993, 0x009a8}, + {0x009aa, 0x009b0}, {0x009b2, 0x009b2}, {0x009b6, 0x009b9}, + {0x009bd, 0x009bd}, {0x009ce, 0x009ce}, {0x009dc, 0x009dd}, + {0x009df, 0x009e1}, {0x009f0, 0x009f1}, {0x009fc, 0x009fc}, + {0x00a05, 0x00a0a}, {0x00a0f, 0x00a10}, {0x00a13, 0x00a28}, + {0x00a2a, 0x00a30}, {0x00a32, 0x00a33}, {0x00a35, 0x00a36}, + {0x00a38, 0x00a39}, {0x00a59, 0x00a5c}, {0x00a5e, 0x00a5e}, + {0x00a72, 0x00a74}, {0x00a85, 0x00a8d}, {0x00a8f, 0x00a91}, + {0x00a93, 0x00aa8}, {0x00aaa, 0x00ab0}, {0x00ab2, 0x00ab3}, + {0x00ab5, 0x00ab9}, {0x00abd, 0x00abd}, {0x00ad0, 0x00ad0}, + {0x00ae0, 0x00ae1}, {0x00af9, 0x00af9}, {0x00b05, 0x00b0c}, + {0x00b0f, 0x00b10}, {0x00b13, 0x00b28}, {0x00b2a, 0x00b30}, + {0x00b32, 0x00b33}, {0x00b35, 0x00b39}, {0x00b3d, 0x00b3d}, + {0x00b5c, 0x00b5d}, {0x00b5f, 0x00b61}, {0x00b71, 0x00b71}, + {0x00b83, 0x00b83}, {0x00b85, 0x00b8a}, {0x00b8e, 0x00b90}, + {0x00b92, 0x00b95}, {0x00b99, 0x00b9a}, {0x00b9c, 0x00b9c}, + {0x00b9e, 0x00b9f}, {0x00ba3, 0x00ba4}, {0x00ba8, 0x00baa}, + {0x00bae, 0x00bb9}, {0x00bd0, 0x00bd0}, {0x00c05, 0x00c0c}, + {0x00c0e, 0x00c10}, {0x00c12, 0x00c28}, {0x00c2a, 0x00c39}, + {0x00c3d, 0x00c3d}, {0x00c58, 0x00c5a}, {0x00c5d, 0x00c5d}, + {0x00c60, 0x00c61}, {0x00c80, 0x00c80}, {0x00c85, 0x00c8c}, + {0x00c8e, 0x00c90}, {0x00c92, 0x00ca8}, {0x00caa, 0x00cb3}, + {0x00cb5, 0x00cb9}, {0x00cbd, 0x00cbd}, {0x00cdd, 0x00cde}, + {0x00ce0, 0x00ce1}, {0x00cf1, 0x00cf2}, {0x00d04, 0x00d0c}, + {0x00d0e, 0x00d10}, {0x00d12, 0x00d3a}, {0x00d3d, 0x00d3d}, + {0x00d4e, 0x00d4e}, {0x00d54, 0x00d56}, {0x00d5f, 0x00d61}, + {0x00d7a, 0x00d7f}, {0x00d85, 0x00d96}, {0x00d9a, 0x00db1}, + {0x00db3, 0x00dbb}, {0x00dbd, 0x00dbd}, {0x00dc0, 0x00dc6}, + {0x00e01, 0x00e30}, {0x00e32, 0x00e32}, {0x00e40, 0x00e46}, + {0x00e81, 0x00e82}, {0x00e84, 0x00e84}, {0x00e86, 0x00e8a}, + {0x00e8c, 0x00ea3}, {0x00ea5, 0x00ea5}, {0x00ea7, 0x00eb0}, + {0x00eb2, 0x00eb2}, {0x00ebd, 0x00ebd}, {0x00ec0, 0x00ec4}, + {0x00ec6, 0x00ec6}, {0x00edc, 0x00edf}, {0x00f00, 0x00f00}, + {0x00f40, 0x00f47}, {0x00f49, 0x00f6c}, {0x00f88, 0x00f8c}, + {0x01000, 0x0102a}, {0x0103f, 0x0103f}, {0x01050, 0x01055}, + {0x0105a, 0x0105d}, {0x01061, 0x01061}, {0x01065, 0x01066}, + {0x0106e, 0x01070}, {0x01075, 0x01081}, {0x0108e, 0x0108e}, + {0x010a0, 0x010c5}, {0x010c7, 0x010c7}, {0x010cd, 0x010cd}, + {0x010d0, 0x010fa}, {0x010fc, 0x01248}, {0x0124a, 0x0124d}, + {0x01250, 0x01256}, {0x01258, 0x01258}, {0x0125a, 0x0125d}, + {0x01260, 0x01288}, {0x0128a, 0x0128d}, {0x01290, 0x012b0}, + {0x012b2, 0x012b5}, {0x012b8, 0x012be}, {0x012c0, 0x012c0}, + {0x012c2, 0x012c5}, {0x012c8, 0x012d6}, {0x012d8, 0x01310}, + {0x01312, 0x01315}, {0x01318, 0x0135a}, {0x01380, 0x0138f}, + {0x013a0, 0x013f5}, {0x013f8, 0x013fd}, {0x01401, 0x0166c}, + {0x0166f, 0x0167f}, {0x01681, 0x0169a}, {0x016a0, 0x016ea}, + {0x016ee, 0x016f8}, {0x01700, 0x01711}, {0x0171f, 0x01731}, + {0x01740, 0x01751}, {0x01760, 0x0176c}, {0x0176e, 0x01770}, + {0x01780, 0x017b3}, {0x017d7, 0x017d7}, {0x017dc, 0x017dc}, + {0x01820, 0x01878}, {0x01880, 0x018a8}, {0x018aa, 0x018aa}, + {0x018b0, 0x018f5}, {0x01900, 0x0191e}, {0x01950, 0x0196d}, + {0x01970, 0x01974}, {0x01980, 0x019ab}, {0x019b0, 0x019c9}, + {0x01a00, 0x01a16}, {0x01a20, 0x01a54}, {0x01aa7, 0x01aa7}, + {0x01b05, 0x01b33}, {0x01b45, 0x01b4c}, {0x01b83, 0x01ba0}, + {0x01bae, 0x01baf}, {0x01bba, 0x01be5}, {0x01c00, 0x01c23}, + {0x01c4d, 0x01c4f}, {0x01c5a, 0x01c7d}, {0x01c80, 0x01c88}, + {0x01c90, 0x01cba}, {0x01cbd, 0x01cbf}, {0x01ce9, 0x01cec}, + {0x01cee, 0x01cf3}, {0x01cf5, 0x01cf6}, {0x01cfa, 0x01cfa}, + {0x01d00, 0x01dbf}, {0x01e00, 0x01f15}, {0x01f18, 0x01f1d}, + {0x01f20, 0x01f45}, {0x01f48, 0x01f4d}, {0x01f50, 0x01f57}, + {0x01f59, 0x01f59}, {0x01f5b, 0x01f5b}, {0x01f5d, 0x01f5d}, + {0x01f5f, 0x01f7d}, {0x01f80, 0x01fb4}, {0x01fb6, 0x01fbc}, + {0x01fbe, 0x01fbe}, {0x01fc2, 0x01fc4}, {0x01fc6, 0x01fcc}, + {0x01fd0, 0x01fd3}, {0x01fd6, 0x01fdb}, {0x01fe0, 0x01fec}, + {0x01ff2, 0x01ff4}, {0x01ff6, 0x01ffc}, {0x02071, 0x02071}, + {0x0207f, 0x0207f}, {0x02090, 0x0209c}, {0x02102, 0x02102}, + {0x02107, 0x02107}, {0x0210a, 0x02113}, {0x02115, 0x02115}, + {0x02118, 0x0211d}, {0x02124, 0x02124}, {0x02126, 0x02126}, + {0x02128, 0x02128}, {0x0212a, 0x02139}, {0x0213c, 0x0213f}, + {0x02145, 0x02149}, {0x0214e, 0x0214e}, {0x02160, 0x02188}, + {0x02c00, 0x02ce4}, {0x02ceb, 0x02cee}, {0x02cf2, 0x02cf3}, + {0x02d00, 0x02d25}, {0x02d27, 0x02d27}, {0x02d2d, 0x02d2d}, + {0x02d30, 0x02d67}, {0x02d6f, 0x02d6f}, {0x02d80, 0x02d96}, + {0x02da0, 0x02da6}, {0x02da8, 0x02dae}, {0x02db0, 0x02db6}, + {0x02db8, 0x02dbe}, {0x02dc0, 0x02dc6}, {0x02dc8, 0x02dce}, + {0x02dd0, 0x02dd6}, {0x02dd8, 0x02dde}, {0x03005, 0x03007}, + {0x03021, 0x03029}, {0x03031, 0x03035}, {0x03038, 0x0303c}, + {0x03041, 0x03096}, {0x0309d, 0x0309f}, {0x030a1, 0x030fa}, + {0x030fc, 0x030ff}, {0x03105, 0x0312f}, {0x03131, 0x0318e}, + {0x031a0, 0x031bf}, {0x031f0, 0x031ff}, {0x03400, 0x04dbf}, + {0x04e00, 0x0a48c}, {0x0a4d0, 0x0a4fd}, {0x0a500, 0x0a60c}, + {0x0a610, 0x0a61f}, {0x0a62a, 0x0a62b}, {0x0a640, 0x0a66e}, + {0x0a67f, 0x0a69d}, {0x0a6a0, 0x0a6ef}, {0x0a717, 0x0a71f}, + {0x0a722, 0x0a788}, {0x0a78b, 0x0a7ca}, {0x0a7d0, 0x0a7d1}, + {0x0a7d3, 0x0a7d3}, {0x0a7d5, 0x0a7d9}, {0x0a7f2, 0x0a801}, + {0x0a803, 0x0a805}, {0x0a807, 0x0a80a}, {0x0a80c, 0x0a822}, + {0x0a840, 0x0a873}, {0x0a882, 0x0a8b3}, {0x0a8f2, 0x0a8f7}, + {0x0a8fb, 0x0a8fb}, {0x0a8fd, 0x0a8fe}, {0x0a90a, 0x0a925}, + {0x0a930, 0x0a946}, {0x0a960, 0x0a97c}, {0x0a984, 0x0a9b2}, + {0x0a9cf, 0x0a9cf}, {0x0a9e0, 0x0a9e4}, {0x0a9e6, 0x0a9ef}, + {0x0a9fa, 0x0a9fe}, {0x0aa00, 0x0aa28}, {0x0aa40, 0x0aa42}, + {0x0aa44, 0x0aa4b}, {0x0aa60, 0x0aa76}, {0x0aa7a, 0x0aa7a}, + {0x0aa7e, 0x0aaaf}, {0x0aab1, 0x0aab1}, {0x0aab5, 0x0aab6}, + {0x0aab9, 0x0aabd}, {0x0aac0, 0x0aac0}, {0x0aac2, 0x0aac2}, + {0x0aadb, 0x0aadd}, {0x0aae0, 0x0aaea}, {0x0aaf2, 0x0aaf4}, + {0x0ab01, 0x0ab06}, {0x0ab09, 0x0ab0e}, {0x0ab11, 0x0ab16}, + {0x0ab20, 0x0ab26}, {0x0ab28, 0x0ab2e}, {0x0ab30, 0x0ab5a}, + {0x0ab5c, 0x0ab69}, {0x0ab70, 0x0abe2}, {0x0ac00, 0x0d7a3}, + {0x0d7b0, 0x0d7c6}, {0x0d7cb, 0x0d7fb}, {0x0f900, 0x0fa6d}, + {0x0fa70, 0x0fad9}, {0x0fb00, 0x0fb06}, {0x0fb13, 0x0fb17}, + {0x0fb1d, 0x0fb1d}, {0x0fb1f, 0x0fb28}, {0x0fb2a, 0x0fb36}, + {0x0fb38, 0x0fb3c}, {0x0fb3e, 0x0fb3e}, {0x0fb40, 0x0fb41}, + {0x0fb43, 0x0fb44}, {0x0fb46, 0x0fbb1}, {0x0fbd3, 0x0fc5d}, + {0x0fc64, 0x0fd3d}, {0x0fd50, 0x0fd8f}, {0x0fd92, 0x0fdc7}, + {0x0fdf0, 0x0fdf9}, {0x0fe71, 0x0fe71}, {0x0fe73, 0x0fe73}, + {0x0fe77, 0x0fe77}, {0x0fe79, 0x0fe79}, {0x0fe7b, 0x0fe7b}, + {0x0fe7d, 0x0fe7d}, {0x0fe7f, 0x0fefc}, {0x0ff21, 0x0ff3a}, + {0x0ff41, 0x0ff5a}, {0x0ff66, 0x0ff9d}, {0x0ffa0, 0x0ffbe}, + {0x0ffc2, 0x0ffc7}, {0x0ffca, 0x0ffcf}, {0x0ffd2, 0x0ffd7}, + {0x0ffda, 0x0ffdc}, {0x10000, 0x1000b}, {0x1000d, 0x10026}, + {0x10028, 0x1003a}, {0x1003c, 0x1003d}, {0x1003f, 0x1004d}, + {0x10050, 0x1005d}, {0x10080, 0x100fa}, {0x10140, 0x10174}, + {0x10280, 0x1029c}, {0x102a0, 0x102d0}, {0x10300, 0x1031f}, + {0x1032d, 0x1034a}, {0x10350, 0x10375}, {0x10380, 0x1039d}, + {0x103a0, 0x103c3}, {0x103c8, 0x103cf}, {0x103d1, 0x103d5}, + {0x10400, 0x1049d}, {0x104b0, 0x104d3}, {0x104d8, 0x104fb}, + {0x10500, 0x10527}, {0x10530, 0x10563}, {0x10570, 0x1057a}, + {0x1057c, 0x1058a}, {0x1058c, 0x10592}, {0x10594, 0x10595}, + {0x10597, 0x105a1}, {0x105a3, 0x105b1}, {0x105b3, 0x105b9}, + {0x105bb, 0x105bc}, {0x10600, 0x10736}, {0x10740, 0x10755}, + {0x10760, 0x10767}, {0x10780, 0x10785}, {0x10787, 0x107b0}, + {0x107b2, 0x107ba}, {0x10800, 0x10805}, {0x10808, 0x10808}, + {0x1080a, 0x10835}, {0x10837, 0x10838}, {0x1083c, 0x1083c}, + {0x1083f, 0x10855}, {0x10860, 0x10876}, {0x10880, 0x1089e}, + {0x108e0, 0x108f2}, {0x108f4, 0x108f5}, {0x10900, 0x10915}, + {0x10920, 0x10939}, {0x10980, 0x109b7}, {0x109be, 0x109bf}, + {0x10a00, 0x10a00}, {0x10a10, 0x10a13}, {0x10a15, 0x10a17}, + {0x10a19, 0x10a35}, {0x10a60, 0x10a7c}, {0x10a80, 0x10a9c}, + {0x10ac0, 0x10ac7}, {0x10ac9, 0x10ae4}, {0x10b00, 0x10b35}, + {0x10b40, 0x10b55}, {0x10b60, 0x10b72}, {0x10b80, 0x10b91}, + {0x10c00, 0x10c48}, {0x10c80, 0x10cb2}, {0x10cc0, 0x10cf2}, + {0x10d00, 0x10d23}, {0x10e80, 0x10ea9}, {0x10eb0, 0x10eb1}, + {0x10f00, 0x10f1c}, {0x10f27, 0x10f27}, {0x10f30, 0x10f45}, + {0x10f70, 0x10f81}, {0x10fb0, 0x10fc4}, {0x10fe0, 0x10ff6}, + {0x11003, 0x11037}, {0x11071, 0x11072}, {0x11075, 0x11075}, + {0x11083, 0x110af}, {0x110d0, 0x110e8}, {0x11103, 0x11126}, + {0x11144, 0x11144}, {0x11147, 0x11147}, {0x11150, 0x11172}, + {0x11176, 0x11176}, {0x11183, 0x111b2}, {0x111c1, 0x111c4}, + {0x111da, 0x111da}, {0x111dc, 0x111dc}, {0x11200, 0x11211}, + {0x11213, 0x1122b}, {0x11280, 0x11286}, {0x11288, 0x11288}, + {0x1128a, 0x1128d}, {0x1128f, 0x1129d}, {0x1129f, 0x112a8}, + {0x112b0, 0x112de}, {0x11305, 0x1130c}, {0x1130f, 0x11310}, + {0x11313, 0x11328}, {0x1132a, 0x11330}, {0x11332, 0x11333}, + {0x11335, 0x11339}, {0x1133d, 0x1133d}, {0x11350, 0x11350}, + {0x1135d, 0x11361}, {0x11400, 0x11434}, {0x11447, 0x1144a}, + {0x1145f, 0x11461}, {0x11480, 0x114af}, {0x114c4, 0x114c5}, + {0x114c7, 0x114c7}, {0x11580, 0x115ae}, {0x115d8, 0x115db}, + {0x11600, 0x1162f}, {0x11644, 0x11644}, {0x11680, 0x116aa}, + {0x116b8, 0x116b8}, {0x11700, 0x1171a}, {0x11740, 0x11746}, + {0x11800, 0x1182b}, {0x118a0, 0x118df}, {0x118ff, 0x11906}, + {0x11909, 0x11909}, {0x1190c, 0x11913}, {0x11915, 0x11916}, + {0x11918, 0x1192f}, {0x1193f, 0x1193f}, {0x11941, 0x11941}, + {0x119a0, 0x119a7}, {0x119aa, 0x119d0}, {0x119e1, 0x119e1}, + {0x119e3, 0x119e3}, {0x11a00, 0x11a00}, {0x11a0b, 0x11a32}, + {0x11a3a, 0x11a3a}, {0x11a50, 0x11a50}, {0x11a5c, 0x11a89}, + {0x11a9d, 0x11a9d}, {0x11ab0, 0x11af8}, {0x11c00, 0x11c08}, + {0x11c0a, 0x11c2e}, {0x11c40, 0x11c40}, {0x11c72, 0x11c8f}, + {0x11d00, 0x11d06}, {0x11d08, 0x11d09}, {0x11d0b, 0x11d30}, + {0x11d46, 0x11d46}, {0x11d60, 0x11d65}, {0x11d67, 0x11d68}, + {0x11d6a, 0x11d89}, {0x11d98, 0x11d98}, {0x11ee0, 0x11ef2}, + {0x11fb0, 0x11fb0}, {0x12000, 0x12399}, {0x12400, 0x1246e}, + {0x12480, 0x12543}, {0x12f90, 0x12ff0}, {0x13000, 0x1342e}, + {0x14400, 0x14646}, {0x16800, 0x16a38}, {0x16a40, 0x16a5e}, + {0x16a70, 0x16abe}, {0x16ad0, 0x16aed}, {0x16b00, 0x16b2f}, + {0x16b40, 0x16b43}, {0x16b63, 0x16b77}, {0x16b7d, 0x16b8f}, + {0x16e40, 0x16e7f}, {0x16f00, 0x16f4a}, {0x16f50, 0x16f50}, + {0x16f93, 0x16f9f}, {0x16fe0, 0x16fe1}, {0x16fe3, 0x16fe3}, + {0x17000, 0x187f7}, {0x18800, 0x18cd5}, {0x18d00, 0x18d08}, + {0x1aff0, 0x1aff3}, {0x1aff5, 0x1affb}, {0x1affd, 0x1affe}, + {0x1b000, 0x1b122}, {0x1b150, 0x1b152}, {0x1b164, 0x1b167}, + {0x1b170, 0x1b2fb}, {0x1bc00, 0x1bc6a}, {0x1bc70, 0x1bc7c}, + {0x1bc80, 0x1bc88}, {0x1bc90, 0x1bc99}, {0x1d400, 0x1d454}, + {0x1d456, 0x1d49c}, {0x1d49e, 0x1d49f}, {0x1d4a2, 0x1d4a2}, + {0x1d4a5, 0x1d4a6}, {0x1d4a9, 0x1d4ac}, {0x1d4ae, 0x1d4b9}, + {0x1d4bb, 0x1d4bb}, {0x1d4bd, 0x1d4c3}, {0x1d4c5, 0x1d505}, + {0x1d507, 0x1d50a}, {0x1d50d, 0x1d514}, {0x1d516, 0x1d51c}, + {0x1d51e, 0x1d539}, {0x1d53b, 0x1d53e}, {0x1d540, 0x1d544}, + {0x1d546, 0x1d546}, {0x1d54a, 0x1d550}, {0x1d552, 0x1d6a5}, + {0x1d6a8, 0x1d6c0}, {0x1d6c2, 0x1d6da}, {0x1d6dc, 0x1d6fa}, + {0x1d6fc, 0x1d714}, {0x1d716, 0x1d734}, {0x1d736, 0x1d74e}, + {0x1d750, 0x1d76e}, {0x1d770, 0x1d788}, {0x1d78a, 0x1d7a8}, + {0x1d7aa, 0x1d7c2}, {0x1d7c4, 0x1d7cb}, {0x1df00, 0x1df1e}, + {0x1e100, 0x1e12c}, {0x1e137, 0x1e13d}, {0x1e14e, 0x1e14e}, + {0x1e290, 0x1e2ad}, {0x1e2c0, 0x1e2eb}, {0x1e7e0, 0x1e7e6}, + {0x1e7e8, 0x1e7eb}, {0x1e7ed, 0x1e7ee}, {0x1e7f0, 0x1e7fe}, + {0x1e800, 0x1e8c4}, {0x1e900, 0x1e943}, {0x1e94b, 0x1e94b}, + {0x1ee00, 0x1ee03}, {0x1ee05, 0x1ee1f}, {0x1ee21, 0x1ee22}, + {0x1ee24, 0x1ee24}, {0x1ee27, 0x1ee27}, {0x1ee29, 0x1ee32}, + {0x1ee34, 0x1ee37}, {0x1ee39, 0x1ee39}, {0x1ee3b, 0x1ee3b}, + {0x1ee42, 0x1ee42}, {0x1ee47, 0x1ee47}, {0x1ee49, 0x1ee49}, + {0x1ee4b, 0x1ee4b}, {0x1ee4d, 0x1ee4f}, {0x1ee51, 0x1ee52}, + {0x1ee54, 0x1ee54}, {0x1ee57, 0x1ee57}, {0x1ee59, 0x1ee59}, + {0x1ee5b, 0x1ee5b}, {0x1ee5d, 0x1ee5d}, {0x1ee5f, 0x1ee5f}, + {0x1ee61, 0x1ee62}, {0x1ee64, 0x1ee64}, {0x1ee67, 0x1ee6a}, + {0x1ee6c, 0x1ee72}, {0x1ee74, 0x1ee77}, {0x1ee79, 0x1ee7c}, + {0x1ee7e, 0x1ee7e}, {0x1ee80, 0x1ee89}, {0x1ee8b, 0x1ee9b}, + {0x1eea1, 0x1eea3}, {0x1eea5, 0x1eea9}, {0x1eeab, 0x1eebb}, + {0x20000, 0x2a6df}, {0x2a700, 0x2b738}, {0x2b740, 0x2b81d}, + {0x2b820, 0x2cea1}, {0x2ceb0, 0x2ebe0}, {0x2f800, 0x2fa1d}, + {0x30000, 0x3134a}, +}; + +// Number of ranges in kXIDStartRanges +constexpr size_t kNumXIDStartRanges = + sizeof(kXIDStartRanges) / sizeof(kXIDStartRanges[0]); + +// The additional code point interval ranges for the Unicode 14 XID_Continue +// set. This extends the values in kXIDStartRanges. +// This array needs to be in ascending order. +constexpr CodePointRange kXIDContinueRanges[] = { + {0x00030, 0x00039}, {0x0005f, 0x0005f}, {0x000b7, 0x000b7}, + {0x00300, 0x0036f}, {0x00387, 0x00387}, {0x00483, 0x00487}, + {0x00591, 0x005bd}, {0x005bf, 0x005bf}, {0x005c1, 0x005c2}, + {0x005c4, 0x005c5}, {0x005c7, 0x005c7}, {0x00610, 0x0061a}, + {0x0064b, 0x00669}, {0x00670, 0x00670}, {0x006d6, 0x006dc}, + {0x006df, 0x006e4}, {0x006e7, 0x006e8}, {0x006ea, 0x006ed}, + {0x006f0, 0x006f9}, {0x00711, 0x00711}, {0x00730, 0x0074a}, + {0x007a6, 0x007b0}, {0x007c0, 0x007c9}, {0x007eb, 0x007f3}, + {0x007fd, 0x007fd}, {0x00816, 0x00819}, {0x0081b, 0x00823}, + {0x00825, 0x00827}, {0x00829, 0x0082d}, {0x00859, 0x0085b}, + {0x00898, 0x0089f}, {0x008ca, 0x008e1}, {0x008e3, 0x00903}, + {0x0093a, 0x0093c}, {0x0093e, 0x0094f}, {0x00951, 0x00957}, + {0x00962, 0x00963}, {0x00966, 0x0096f}, {0x00981, 0x00983}, + {0x009bc, 0x009bc}, {0x009be, 0x009c4}, {0x009c7, 0x009c8}, + {0x009cb, 0x009cd}, {0x009d7, 0x009d7}, {0x009e2, 0x009e3}, + {0x009e6, 0x009ef}, {0x009fe, 0x009fe}, {0x00a01, 0x00a03}, + {0x00a3c, 0x00a3c}, {0x00a3e, 0x00a42}, {0x00a47, 0x00a48}, + {0x00a4b, 0x00a4d}, {0x00a51, 0x00a51}, {0x00a66, 0x00a71}, + {0x00a75, 0x00a75}, {0x00a81, 0x00a83}, {0x00abc, 0x00abc}, + {0x00abe, 0x00ac5}, {0x00ac7, 0x00ac9}, {0x00acb, 0x00acd}, + {0x00ae2, 0x00ae3}, {0x00ae6, 0x00aef}, {0x00afa, 0x00aff}, + {0x00b01, 0x00b03}, {0x00b3c, 0x00b3c}, {0x00b3e, 0x00b44}, + {0x00b47, 0x00b48}, {0x00b4b, 0x00b4d}, {0x00b55, 0x00b57}, + {0x00b62, 0x00b63}, {0x00b66, 0x00b6f}, {0x00b82, 0x00b82}, + {0x00bbe, 0x00bc2}, {0x00bc6, 0x00bc8}, {0x00bca, 0x00bcd}, + {0x00bd7, 0x00bd7}, {0x00be6, 0x00bef}, {0x00c00, 0x00c04}, + {0x00c3c, 0x00c3c}, {0x00c3e, 0x00c44}, {0x00c46, 0x00c48}, + {0x00c4a, 0x00c4d}, {0x00c55, 0x00c56}, {0x00c62, 0x00c63}, + {0x00c66, 0x00c6f}, {0x00c81, 0x00c83}, {0x00cbc, 0x00cbc}, + {0x00cbe, 0x00cc4}, {0x00cc6, 0x00cc8}, {0x00cca, 0x00ccd}, + {0x00cd5, 0x00cd6}, {0x00ce2, 0x00ce3}, {0x00ce6, 0x00cef}, + {0x00d00, 0x00d03}, {0x00d3b, 0x00d3c}, {0x00d3e, 0x00d44}, + {0x00d46, 0x00d48}, {0x00d4a, 0x00d4d}, {0x00d57, 0x00d57}, + {0x00d62, 0x00d63}, {0x00d66, 0x00d6f}, {0x00d81, 0x00d83}, + {0x00dca, 0x00dca}, {0x00dcf, 0x00dd4}, {0x00dd6, 0x00dd6}, + {0x00dd8, 0x00ddf}, {0x00de6, 0x00def}, {0x00df2, 0x00df3}, + {0x00e31, 0x00e31}, {0x00e33, 0x00e3a}, {0x00e47, 0x00e4e}, + {0x00e50, 0x00e59}, {0x00eb1, 0x00eb1}, {0x00eb3, 0x00ebc}, + {0x00ec8, 0x00ecd}, {0x00ed0, 0x00ed9}, {0x00f18, 0x00f19}, + {0x00f20, 0x00f29}, {0x00f35, 0x00f35}, {0x00f37, 0x00f37}, + {0x00f39, 0x00f39}, {0x00f3e, 0x00f3f}, {0x00f71, 0x00f84}, + {0x00f86, 0x00f87}, {0x00f8d, 0x00f97}, {0x00f99, 0x00fbc}, + {0x00fc6, 0x00fc6}, {0x0102b, 0x0103e}, {0x01040, 0x01049}, + {0x01056, 0x01059}, {0x0105e, 0x01060}, {0x01062, 0x01064}, + {0x01067, 0x0106d}, {0x01071, 0x01074}, {0x01082, 0x0108d}, + {0x0108f, 0x0109d}, {0x0135d, 0x0135f}, {0x01369, 0x01371}, + {0x01712, 0x01715}, {0x01732, 0x01734}, {0x01752, 0x01753}, + {0x01772, 0x01773}, {0x017b4, 0x017d3}, {0x017dd, 0x017dd}, + {0x017e0, 0x017e9}, {0x0180b, 0x0180d}, {0x0180f, 0x01819}, + {0x018a9, 0x018a9}, {0x01920, 0x0192b}, {0x01930, 0x0193b}, + {0x01946, 0x0194f}, {0x019d0, 0x019da}, {0x01a17, 0x01a1b}, + {0x01a55, 0x01a5e}, {0x01a60, 0x01a7c}, {0x01a7f, 0x01a89}, + {0x01a90, 0x01a99}, {0x01ab0, 0x01abd}, {0x01abf, 0x01ace}, + {0x01b00, 0x01b04}, {0x01b34, 0x01b44}, {0x01b50, 0x01b59}, + {0x01b6b, 0x01b73}, {0x01b80, 0x01b82}, {0x01ba1, 0x01bad}, + {0x01bb0, 0x01bb9}, {0x01be6, 0x01bf3}, {0x01c24, 0x01c37}, + {0x01c40, 0x01c49}, {0x01c50, 0x01c59}, {0x01cd0, 0x01cd2}, + {0x01cd4, 0x01ce8}, {0x01ced, 0x01ced}, {0x01cf4, 0x01cf4}, + {0x01cf7, 0x01cf9}, {0x01dc0, 0x01dff}, {0x0203f, 0x02040}, + {0x02054, 0x02054}, {0x020d0, 0x020dc}, {0x020e1, 0x020e1}, + {0x020e5, 0x020f0}, {0x02cef, 0x02cf1}, {0x02d7f, 0x02d7f}, + {0x02de0, 0x02dff}, {0x0302a, 0x0302f}, {0x03099, 0x0309a}, + {0x0a620, 0x0a629}, {0x0a66f, 0x0a66f}, {0x0a674, 0x0a67d}, + {0x0a69e, 0x0a69f}, {0x0a6f0, 0x0a6f1}, {0x0a802, 0x0a802}, + {0x0a806, 0x0a806}, {0x0a80b, 0x0a80b}, {0x0a823, 0x0a827}, + {0x0a82c, 0x0a82c}, {0x0a880, 0x0a881}, {0x0a8b4, 0x0a8c5}, + {0x0a8d0, 0x0a8d9}, {0x0a8e0, 0x0a8f1}, {0x0a8ff, 0x0a909}, + {0x0a926, 0x0a92d}, {0x0a947, 0x0a953}, {0x0a980, 0x0a983}, + {0x0a9b3, 0x0a9c0}, {0x0a9d0, 0x0a9d9}, {0x0a9e5, 0x0a9e5}, + {0x0a9f0, 0x0a9f9}, {0x0aa29, 0x0aa36}, {0x0aa43, 0x0aa43}, + {0x0aa4c, 0x0aa4d}, {0x0aa50, 0x0aa59}, {0x0aa7b, 0x0aa7d}, + {0x0aab0, 0x0aab0}, {0x0aab2, 0x0aab4}, {0x0aab7, 0x0aab8}, + {0x0aabe, 0x0aabf}, {0x0aac1, 0x0aac1}, {0x0aaeb, 0x0aaef}, + {0x0aaf5, 0x0aaf6}, {0x0abe3, 0x0abea}, {0x0abec, 0x0abed}, + {0x0abf0, 0x0abf9}, {0x0fb1e, 0x0fb1e}, {0x0fe00, 0x0fe0f}, + {0x0fe20, 0x0fe2f}, {0x0fe33, 0x0fe34}, {0x0fe4d, 0x0fe4f}, + {0x0ff10, 0x0ff19}, {0x0ff3f, 0x0ff3f}, {0x0ff9e, 0x0ff9f}, + {0x101fd, 0x101fd}, {0x102e0, 0x102e0}, {0x10376, 0x1037a}, + {0x104a0, 0x104a9}, {0x10a01, 0x10a03}, {0x10a05, 0x10a06}, + {0x10a0c, 0x10a0f}, {0x10a38, 0x10a3a}, {0x10a3f, 0x10a3f}, + {0x10ae5, 0x10ae6}, {0x10d24, 0x10d27}, {0x10d30, 0x10d39}, + {0x10eab, 0x10eac}, {0x10f46, 0x10f50}, {0x10f82, 0x10f85}, + {0x11000, 0x11002}, {0x11038, 0x11046}, {0x11066, 0x11070}, + {0x11073, 0x11074}, {0x1107f, 0x11082}, {0x110b0, 0x110ba}, + {0x110c2, 0x110c2}, {0x110f0, 0x110f9}, {0x11100, 0x11102}, + {0x11127, 0x11134}, {0x11136, 0x1113f}, {0x11145, 0x11146}, + {0x11173, 0x11173}, {0x11180, 0x11182}, {0x111b3, 0x111c0}, + {0x111c9, 0x111cc}, {0x111ce, 0x111d9}, {0x1122c, 0x11237}, + {0x1123e, 0x1123e}, {0x112df, 0x112ea}, {0x112f0, 0x112f9}, + {0x11300, 0x11303}, {0x1133b, 0x1133c}, {0x1133e, 0x11344}, + {0x11347, 0x11348}, {0x1134b, 0x1134d}, {0x11357, 0x11357}, + {0x11362, 0x11363}, {0x11366, 0x1136c}, {0x11370, 0x11374}, + {0x11435, 0x11446}, {0x11450, 0x11459}, {0x1145e, 0x1145e}, + {0x114b0, 0x114c3}, {0x114d0, 0x114d9}, {0x115af, 0x115b5}, + {0x115b8, 0x115c0}, {0x115dc, 0x115dd}, {0x11630, 0x11640}, + {0x11650, 0x11659}, {0x116ab, 0x116b7}, {0x116c0, 0x116c9}, + {0x1171d, 0x1172b}, {0x11730, 0x11739}, {0x1182c, 0x1183a}, + {0x118e0, 0x118e9}, {0x11930, 0x11935}, {0x11937, 0x11938}, + {0x1193b, 0x1193e}, {0x11940, 0x11940}, {0x11942, 0x11943}, + {0x11950, 0x11959}, {0x119d1, 0x119d7}, {0x119da, 0x119e0}, + {0x119e4, 0x119e4}, {0x11a01, 0x11a0a}, {0x11a33, 0x11a39}, + {0x11a3b, 0x11a3e}, {0x11a47, 0x11a47}, {0x11a51, 0x11a5b}, + {0x11a8a, 0x11a99}, {0x11c2f, 0x11c36}, {0x11c38, 0x11c3f}, + {0x11c50, 0x11c59}, {0x11c92, 0x11ca7}, {0x11ca9, 0x11cb6}, + {0x11d31, 0x11d36}, {0x11d3a, 0x11d3a}, {0x11d3c, 0x11d3d}, + {0x11d3f, 0x11d45}, {0x11d47, 0x11d47}, {0x11d50, 0x11d59}, + {0x11d8a, 0x11d8e}, {0x11d90, 0x11d91}, {0x11d93, 0x11d97}, + {0x11da0, 0x11da9}, {0x11ef3, 0x11ef6}, {0x16a60, 0x16a69}, + {0x16ac0, 0x16ac9}, {0x16af0, 0x16af4}, {0x16b30, 0x16b36}, + {0x16b50, 0x16b59}, {0x16f4f, 0x16f4f}, {0x16f51, 0x16f87}, + {0x16f8f, 0x16f92}, {0x16fe4, 0x16fe4}, {0x16ff0, 0x16ff1}, + {0x1bc9d, 0x1bc9e}, {0x1cf00, 0x1cf2d}, {0x1cf30, 0x1cf46}, + {0x1d165, 0x1d169}, {0x1d16d, 0x1d172}, {0x1d17b, 0x1d182}, + {0x1d185, 0x1d18b}, {0x1d1aa, 0x1d1ad}, {0x1d242, 0x1d244}, + {0x1d7ce, 0x1d7ff}, {0x1da00, 0x1da36}, {0x1da3b, 0x1da6c}, + {0x1da75, 0x1da75}, {0x1da84, 0x1da84}, {0x1da9b, 0x1da9f}, + {0x1daa1, 0x1daaf}, {0x1e000, 0x1e006}, {0x1e008, 0x1e018}, + {0x1e01b, 0x1e021}, {0x1e023, 0x1e024}, {0x1e026, 0x1e02a}, + {0x1e130, 0x1e136}, {0x1e140, 0x1e149}, {0x1e2ae, 0x1e2ae}, + {0x1e2ec, 0x1e2f9}, {0x1e8d0, 0x1e8d6}, {0x1e944, 0x1e94a}, + {0x1e950, 0x1e959}, {0x1fbf0, 0x1fbf9}, {0xe0100, 0xe01ef}, +}; + +// Number of ranges in kXIDContinueRanges +constexpr size_t kNumXIDContinueRanges = + sizeof(kXIDContinueRanges) / sizeof(kXIDContinueRanges[0]); + +/// @param code_point the input code_point +/// @return true if the code_point is part of the XIDStart unicode set +bool is_xid_start(CodePoint code_point) { + // Fast path for ASCII. + if ((code_point >= 'a' && code_point <= 'z') || + (code_point >= 'A' && code_point <= 'Z')) { + return true; + } + // With [a-zA-Z] handled, nothing less then the next sequence start can be + // XIDStart, so filter them all out. This catches most of the common symbols + // that are used in ASCII. + if (code_point < 0x000aa) { + return false; + } + return std::binary_search(kXIDStartRanges, + kXIDStartRanges + kNumXIDStartRanges, code_point); +} + +/// @param code_point the input code_point +/// @return true if the code_point is part of the XIDContinue unicode set +bool is_xid_continue(CodePoint code_point) { + // Short circuit ASCII. The binary search will find these last, but most + // of our current source is ASCII, so handle them quicker. + if ((code_point >= '0' && code_point <= '9') || code_point == '_') { + return true; + } + return is_xid_start(code_point) || + std::binary_search(kXIDContinueRanges, + kXIDContinueRanges + kNumXIDContinueRanges, + code_point); +} + +/// @return true if @p code_point is considered a whitespace +bool is_space(CodePoint code_point) { + switch (code_point) { + case 0x0020: + case 0x0009: + case 0x000a: + case 0x000b: + case 0x000c: + case 0x000d: + case 0x0085: + case 0x200e: + case 0x200f: + case 0x2028: + case 0x2029: + return true; + default: + return false; + } +} + +/// A fixed capacity, dynamic sized queue of bits (expressed as bools) +template +class BitQueue { + public: + /// @param index the index of the bit starting from the front + /// @return the bit value + auto operator[](size_t index) { + assert(index < count()); // TODO(dneto): this should error out. + return bits_[(index + read_offset_) % CAPACITY_IN_BITS]; + } + + /// Removes the bit at the front of the queue + /// @returns the value of the bit that was removed + bool pop_front() { + assert(count_ > 0); + bool value = (*this)[0]; + count_--; + read_offset_++; + return value; + } + + /// Appends a bit to the back of the queue + void push_back(bool value) { + assert(count_ < CAPACITY_IN_BITS); + count_++; + (*this)[count_ - 1] = value; + } + + /// @returns true if the queue holds no bits. + bool empty() const { return count_ == 0; } + + /// @returns the number of bits held by the queue. + size_t count() const { return count_; } + + private: + std::bitset bits_; + size_t count_ = 0; // number of bits contained + size_t read_offset_ = 0; // read offset in bits + // +#if ENABLE_LOGGING + public: + void to_chars(std::string& str) { + std::stringstream ss; + ss << count_ << ":"; + for (auto i = 0; i < count_; ++i) { + bool is_template = (*this)[i]; + ss << (is_template ? "#" : "."); + } + str = ss.str(); + } +#endif +}; + +class Lexer { + public: + Lexer(TSLexer* l) : lexer_(l) {} + + /// Advances the lexer by one code point. + void advance() { lexer_->advance(lexer_, /* whitespace */ false); } + + /// Returns the next code point, advancing the lexer by one code point. + CodePoint next() { + // TODO(dneto): should assert !lexer_->eof(lexer_) + CodePoint lookahead = lexer_->lookahead; + advance(); + return lookahead; + } + + /// @return the next code point without advancing the lexer, or kEOF if there + /// are no more code points + CodePoint peek() { return lexer_->eof(lexer_) ? kEOF : lexer_->lookahead; } + + /// @return true if the next code point is equal to @p code_point. + /// @note if the code point was found, then the lexer is advanced to that code + /// point. + bool match(CodePoint code_point) { + if (peek() == code_point) { + advance(); + return true; + } + return false; + } + + /// @return true if the next code point is found in @p code_points. + /// @note if the code point was found, then the lexer is advanced to that code + /// point. + bool match_anyof(std::initializer_list code_points) { + for (CodePoint code_point : code_points) { + if (match(code_point)) { + return true; + } + } + return false; + } + + /// Attempts to match an identifier pattern that starts with XIDStart followed by + /// any number of XIDContinue code points. + bool match_identifier() { + if (!is_xid_start(peek())) { + return false; + } + + std::stringstream ss; + bool is_ascii = true; + if (CodePoint start = next(); start < 0x80) { + ss.put(char(start)); + } else { + is_ascii = false; + } + + while (true) { + if (!is_xid_continue(peek())) { + break; + } + if (CodePoint code_point = next(); code_point < 0x80) { + ss.put(char(code_point)); + } else { + is_ascii = false; + } + } + + if (is_ascii) { + LOG("ident: '%s'", ss.str().c_str()); + } else { + LOG("ident"); + } + + return true; + } + + /// Attempts to match a /* block comment */ + bool match_block_comment() { + // TODO(dneto): Need to un-advance if matched '/' but not '*' + if (!match('/') || !match('*')) { + return false; + } + + size_t nesting = 1; + while (nesting > 0 && !match(kEOF)) { + // TODO(dneto): If we match '/' but not '*' there is no way to un-advance + // back to make '/' the lookahead. + if (match('/') && match('*')) { + nesting++; + // TODO(dneto): Same here, need to be able to un-advance to before '*' + } else if (match('*') && match('/')) { + nesting--; + } else { + next(); + } + } + return true; + } + + /// Advances the lexer while the next code point is considered whitespace + void skip_whitespace() { + while (is_space(peek())) { + lexer_->advance(lexer_, /* whitespace */ true); + } + } + + private: + TSLexer* lexer_; +}; + +struct Scanner { + struct State { + BitQueue<1024> lt_is_tmpl; // Queue of disambiguated '<' + BitQueue<1024> gt_is_tmpl; // Queue of disambiguated '>' + bool empty() const { return lt_is_tmpl.empty() && gt_is_tmpl.empty(); } + }; + State state; + static_assert(sizeof(State) < TREE_SITTER_SERIALIZATION_BUFFER_SIZE); + // State is trivially copyable, so it can be serialized and deserialized + // with memcpy. + static_assert(std::is_trivially_copyable ::value); + + /// Updates #state with the disambiguated '<' and '>' tokens. + /// The following assumptions are made on entry: + /// * lexer has just advanced to the end of an identifier + /// On exit, all '<' and '>' template tokens will be paired up to the closing + /// '>' for the first '<'. + void classify_template_args(Lexer& lexer) { + LOG("classify_template_args()"); + + if (!lexer.match('<')) { + LOG(" missing '<'"); + return; + } + + // The current expression nesting depth. + size_t expr_depth = 0; + + // A stack of '<' tokens. + // Used to pair '<' and '>' tokens at the same expression depth. + struct StackEntry { + size_t index; // Index of the opening '>' in lt_is_tmpl + size_t expr_depth; // The value of 'expr_depth' for the opening '<' + }; + std::vector lt_stack; + + LOG("classify_template_args() '<'"); + lt_stack.push_back(StackEntry{state.lt_is_tmpl.count(), expr_depth}); + state.lt_is_tmpl.push_back(false); // Default to less-than + + while (!lt_stack.empty() && !lexer.match(kEOF)) { + lexer.skip_whitespace(); + + // TODO: skip line-ending comments. + if (lexer.match_block_comment()) { + continue; + } + + if (lexer.match_identifier()) { + lexer.skip_whitespace(); // TODO: Skip comments + if (lexer.match('<')) { + LOG("classify_template_args() '<'"); + lt_stack.push_back(StackEntry{state.lt_is_tmpl.count(), expr_depth}); + state.lt_is_tmpl.push_back(false); // Default to less-than + } + continue; + } + + if (lexer.match('>')) { + LOG("classify_template_args() '>'"); + if (!lt_stack.empty() && lt_stack.back().expr_depth == expr_depth) { + LOG(" TEMPLATE MATCH"); + state.gt_is_tmpl.push_back(true); + state.lt_is_tmpl[lt_stack.back().index] = true; + lt_stack.pop_back(); + } else { + LOG(" non-template '>'"); + state.gt_is_tmpl.push_back(false); + } + continue; + } + + if (lexer.match_anyof({'(', '['})) { + LOG(" expr_depth++"); + // Entering a nested expression + expr_depth++; + continue; + } + + if (lexer.match_anyof({')', ']'})) { + LOG(" expr_depth--"); + // Exiting a nested expression + // Pop the stack until we return to the current expression + // expr_depth + while (!lt_stack.empty() && lt_stack.back().expr_depth == expr_depth) { + lt_stack.pop_back(); + } + if (expr_depth > 0) { + expr_depth--; + } + continue; + } + + if (lexer.match_anyof({';', '{', '=', ':'})) { + LOG(" expression terminator"); + // Expression terminating tokens. No opening template list can + // hold these tokens, so clear the stack and expression depth. + expr_depth = 0; + lt_stack.clear(); + continue; + } + + bool short_circuit = false; + if (lexer.match('&')) { + short_circuit = lexer.match('&'); + } else if (lexer.match('|')) { + short_circuit = lexer.match('|'); + } + if (short_circuit) { + LOG(" short-circuiting expression"); + // Treat 'a < b || c > d' as a logical binary operator of two + // comparison operators instead of a single template argument + // 'b||c'. Use parentheses around 'b||c' to parse as a + // template argument list. + while (!lt_stack.empty() && lt_stack.back().expr_depth == expr_depth) { + lt_stack.pop_back(); + } + continue; + } + + LOG(" skip: '%c'",char(lexer.peek())); + lexer.next(); + } + } + + std::string valids(const bool* const valid_symbols) { + std::string result; + for (int i = 0; i < static_cast (ERROR) ; i++) { + result += std::string(valid_symbols[i] ? "+" : "_"); + } + for (int i = 0; i < static_cast (ERROR) ; i++) { + if (valid_symbols[i]) { + result += std::string(" ") + str(static_cast (i),true); + } + } + return result; + } + + /// The external token scanner function. Handles block comments and + /// template-argument-list vs less-than / greater-than disambiguation. + /// @return true if lexer->result_symbol was assigned a Token, or + /// false if the token should be taken from the regular WGSL tree-sitter + /// grammar. + bool scan(TSLexer* ts_lexer, const bool* const valid_symbols) { + Lexer lexer{ts_lexer}; + + LOG("scan: '%c' [%u] %s", char(lexer.peek()), unsigned(ts_lexer->get_column(ts_lexer)), valids(valid_symbols).c_str()); + + if (valid_symbols[Token::ERROR]) { + ts_lexer->result_symbol = Token::ERROR; + return true; + } + + if (valid_symbols[Token::DISAMBIGUATE_TEMPLATE]) { + // The parser is telling us the _disambiguate_template token + // may appear at the current position. + // The next token may be the start of a template list, so + // scan forward and use the token-list disambiguation + // algorithm to mark template-list-start and template-list-end + // tokens. These are recorded in the lt and gt bit queues. + + // Call mark_end so that we can "advance" past codepoints without + // automatically including them in the resulting token. + ts_lexer->mark_end(ts_lexer); + ts_lexer->result_symbol = Token::DISAMBIGUATE_TEMPLATE; + + // TODO(dneto): should also skip comments, both line comments + // and block comments. + lexer.skip_whitespace(); + if (lexer.peek() == '<') { + if (state.lt_is_tmpl.empty()) { + classify_template_args(lexer); + } + } + + // This has to return true so that Treesitter will save + // the state generated by the disambiguation scan. + return true; + } + + lexer.skip_whitespace(); + + auto match = [&](Token token) { + ts_lexer->mark_end(ts_lexer); + ts_lexer->result_symbol = token; + return true; + }; + + // TODO(dneto): checkpoint and rewind if failed. + if (lexer.match_block_comment()) { + return match(Token::BLOCK_COMMENT); + } + + // TODO(dneto): Check valid array first. + if (lexer.match('<')) { + if (!state.lt_is_tmpl.empty() && state.lt_is_tmpl.pop_front()) { + return match(Token::TEMPLATE_ARGS_START); + } + if (lexer.match('=')) { + return match(Token::LESS_THAN_EQUAL); + } + if (lexer.match('<')) { + if (lexer.match('=')) { + return match(Token::SHIFT_LEFT_ASSIGN); + } + return match(Token::SHIFT_LEFT); + } + return match(Token::LESS_THAN); + } + + // TODO(dneto): check valid array first. + if (lexer.match('>')) { + if (!state.gt_is_tmpl.empty() && state.gt_is_tmpl.pop_front()) { + return match(Token::TEMPLATE_ARGS_END); + } + if (lexer.match('=')) { + return match(Token::GREATER_THAN_EQUAL); + } + if (lexer.match('>')) { + if (lexer.match('=')) { + return match(Token::SHIFT_RIGHT_ASSIGN); + } + return match(Token::SHIFT_RIGHT); + } + return match(Token::GREATER_THAN); + } + + return false; // Use regular parsing + } + + /// Serializes the scanner state into @p buffer. + unsigned serialize(char* buffer) { + if (state.empty()) { + return 0; + } +#if ENABLE_LOGGING + std::string lt_str; state.lt_is_tmpl.to_chars(lt_str); + std::string gt_str; state.gt_is_tmpl.to_chars(gt_str); + LOG("serialize(lt_is_tmpl: %s, gt_is_tmpl: %s)", + lt_str.c_str(), gt_str.c_str()); +#endif + size_t bytes_written = 0; + auto write = [&](const void* data, size_t num_bytes) { + assert(bytes_written + num_bytes <= + TREE_SITTER_SERIALIZATION_BUFFER_SIZE); + memcpy(buffer + bytes_written, data, num_bytes); + bytes_written += num_bytes; + }; + write(&state.lt_is_tmpl, sizeof(state.lt_is_tmpl)); + write(&state.gt_is_tmpl, sizeof(state.gt_is_tmpl)); + // TODO(dneto): implicit conversion be narrowing. + return bytes_written; + } + + /// Deserializes the scanner state from @p buffer. + void deserialize(const char* const buffer, unsigned length) { + if (length == 0) { + state = {}; + } else { + size_t bytes_read = 0; + auto read = [&](void* data, size_t num_bytes) { + assert(bytes_read + num_bytes <= length); + memcpy(data, buffer + bytes_read, num_bytes); + bytes_read += num_bytes; + }; + read(&state.lt_is_tmpl, sizeof(state.lt_is_tmpl)); + read(&state.gt_is_tmpl, sizeof(state.gt_is_tmpl)); +#if ENABLE_LOGGING + std::string lt_str; state.lt_is_tmpl.to_chars(lt_str); + std::string gt_str; state.gt_is_tmpl.to_chars(gt_str); + LOG("deserialize(lt_is_tmpl: %s, gt_is_tmpl: %s)", + lt_str.c_str(), gt_str.c_str()); +#endif + assert(bytes_read == length); + } + } +}; + +} // anonymous namespace + +extern "C" { +// Called once when language is set on a parser. +// Allocates memory for storing scanner state. +void* tree_sitter_wgsl_external_scanner_create() { + return new Scanner(); +} + +// Called once parser is deleted or different language set. +// Frees memory storing scanner state. +void tree_sitter_wgsl_external_scanner_destroy(void* const payload) { + Scanner* const scanner = static_cast (payload); + delete scanner; +} + +// Called whenever this scanner recognizes a token. +// Serializes scanner state into buffer. +unsigned tree_sitter_wgsl_external_scanner_serialize(void* const payload, + char* const buffer) { + Scanner* scanner = static_cast (payload); + return scanner->serialize(buffer); +} + +// Called when handling edits and ambiguities. +// Deserializes scanner state from buffer. +void tree_sitter_wgsl_external_scanner_deserialize(void* const payload, + const char* const buffer, + unsigned const length) { + Scanner* const scanner = static_cast (payload); + scanner->deserialize(buffer, length); +} + +// Scans for tokens. +bool tree_sitter_wgsl_external_scanner_scan(void* const payload, + TSLexer* const lexer, + const bool* const valid_symbols) { + Scanner* const scanner = static_cast (payload); + if (scanner->scan(lexer, valid_symbols)) { + LOG("scan returned: %s", str(static_cast (lexer->result_symbol))); + return true; + } + return false; +} + +} // extern "C" diff --git a/wgsl/wgsl.recursive.bs.include b/wgsl/wgsl.recursive.bs.include index 9f333d580b..f295215069 100644 --- a/wgsl/wgsl.recursive.bs.include +++ b/wgsl/wgsl.recursive.bs.include @@ -163,6 +163,10 @@ compound_assignment_operator: + | [=syntax_sym/shift_left_assign=] + + | [=syntax_sym/shift_right_assign=] + | `'%='` | `'&='` @@ -175,10 +179,6 @@ | `'/='` - | `'<<='` - - | `'>>='` - | `'^='` | `'|='` @@ -287,7 +287,7 @@ | [=recursive descent syntax/attribute=] * `'override'` [=recursive descent syntax/optionally_typed_ident=] ( `'='` [=recursive descent syntax/expression=] )? `';'` - | [=recursive descent syntax/attribute=] * `'var'` ( `'<'` [=recursive descent syntax/address_space=] ( `','` [=recursive descent syntax/access_mode=] )? `'>'` )? [=recursive descent syntax/optionally_typed_ident=] ( `'='` [=recursive descent syntax/expression=] )? `';'` + | [=recursive descent syntax/attribute=] * `'var'` ( [=syntax_sym/_template_args_start=] [=recursive descent syntax/address_space=] ( `','` [=recursive descent syntax/access_mode=] )? [=syntax_sym/_template_args_end=] )? [=recursive descent syntax/optionally_typed_ident=] ( `'='` [=recursive descent syntax/expression=] )? `';'` | `';'` @@ -423,7 +423,7 @@ | `'('` [=recursive descent syntax/expression=] `')'` - | `'bitcast'` `'<'` [=recursive descent syntax/type_specifier=] `'>'` `'('` [=recursive descent syntax/expression=] `')'` + | `'bitcast'` [=syntax_sym/_template_args_start=] [=recursive descent syntax/type_specifier=] [=syntax_sym/_template_args_end=] `'('` [=recursive descent syntax/expression=] `')'`@@ -431,17 +431,17 @@ | [=recursive descent syntax/shift_expression.post.unary_expression=] - | [=recursive descent syntax/shift_expression.post.unary_expression=] `'!='` [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] + | [=recursive descent syntax/shift_expression.post.unary_expression=] [=syntax_sym/greater_than=] [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] - | [=recursive descent syntax/shift_expression.post.unary_expression=] `'<'` [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] + | [=recursive descent syntax/shift_expression.post.unary_expression=] [=syntax_sym/greater_than_equal=] [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] - | [=recursive descent syntax/shift_expression.post.unary_expression=] `'<='` [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] + | [=recursive descent syntax/shift_expression.post.unary_expression=] [=syntax_sym/less_than=] [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] - | [=recursive descent syntax/shift_expression.post.unary_expression=] `'=='` [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] + | [=recursive descent syntax/shift_expression.post.unary_expression=] [=syntax_sym/less_than_equal=] [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] - | [=recursive descent syntax/shift_expression.post.unary_expression=] `'>'` [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] + | [=recursive descent syntax/shift_expression.post.unary_expression=] `'!='` [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] - | [=recursive descent syntax/shift_expression.post.unary_expression=] `'>='` [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=] + | [=recursive descent syntax/shift_expression.post.unary_expression=] `'=='` [=recursive descent syntax/unary_expression=] [=recursive descent syntax/shift_expression.post.unary_expression=]@@ -473,9 +473,9 @@ | ( [=recursive descent syntax/multiplicative_operator=] [=recursive descent syntax/unary_expression=] )* ( [=recursive descent syntax/additive_operator=] [=recursive descent syntax/unary_expression=] ( [=recursive descent syntax/multiplicative_operator=] [=recursive descent syntax/unary_expression=] )* )* - | `'<<'` [=recursive descent syntax/unary_expression=] + | [=syntax_sym/shift_left=] [=recursive descent syntax/unary_expression=] - | `'>>'` [=recursive descent syntax/unary_expression=] + | [=syntax_sym/shift_right=] [=recursive descent syntax/unary_expression=]@@ -595,13 +595,13 @@ | [=recursive descent syntax/depth_texture_type=] - | [=recursive descent syntax/sampled_texture_type=] `'<'` [=recursive descent syntax/type_specifier=] `'>'` + | [=recursive descent syntax/sampled_texture_type=] [=syntax_sym/_template_args_start=] [=recursive descent syntax/type_specifier=] [=syntax_sym/_template_args_end=] | [=recursive descent syntax/sampler_type=] - | [=recursive descent syntax/storage_texture_type=] `'<'` [=recursive descent syntax/texel_format=] `','` [=recursive descent syntax/access_mode=] `'>'` + | [=recursive descent syntax/storage_texture_type=] [=syntax_sym/_template_args_start=] [=recursive descent syntax/texel_format=] `','` [=recursive descent syntax/access_mode=] [=syntax_sym/_template_args_end=] - | [=syntax/multisampled_texture_type=] `'<'` [=recursive descent syntax/type_specifier=] `'>'` + | [=syntax/multisampled_texture_type=] [=syntax_sym/_template_args_start=] [=recursive descent syntax/type_specifier=] [=syntax_sym/_template_args_end=]@@ -621,15 +621,15 @@type_specifier_without_ident: - | [=recursive descent syntax/mat_prefix=] `'<'` [=recursive descent syntax/type_specifier=] `'>'` + | [=recursive descent syntax/mat_prefix=] [=syntax_sym/_template_args_start=] [=recursive descent syntax/type_specifier=] [=syntax_sym/_template_args_end=] | [=recursive descent syntax/texture_and_sampler_types=] - | [=recursive descent syntax/vec_prefix=] `'<'` [=recursive descent syntax/type_specifier=] `'>'` + | [=recursive descent syntax/vec_prefix=] [=syntax_sym/_template_args_start=] [=recursive descent syntax/type_specifier=] [=syntax_sym/_template_args_end=] - | `'array'` `'<'` [=recursive descent syntax/type_specifier=] ( `','` [=recursive descent syntax/element_count_expression=] )? `'>'` + | `'array'` [=syntax_sym/_template_args_start=] [=recursive descent syntax/type_specifier=] ( `','` [=recursive descent syntax/element_count_expression=] )? [=syntax_sym/_template_args_end=] - | `'atomic'` `'<'` [=recursive descent syntax/type_specifier=] `'>'` + | `'atomic'` [=syntax_sym/_template_args_start=] [=recursive descent syntax/type_specifier=] [=syntax_sym/_template_args_end=] | `'bool'` @@ -639,7 +639,7 @@ | `'i32'` - | `'ptr'` `'<'` [=recursive descent syntax/address_space=] `','` [=recursive descent syntax/type_specifier=] ( `','` [=recursive descent syntax/access_mode=] )? `'>'` + | `'ptr'` [=syntax_sym/_template_args_start=] [=recursive descent syntax/address_space=] `','` [=recursive descent syntax/type_specifier=] ( `','` [=recursive descent syntax/access_mode=] )? [=syntax_sym/_template_args_end=] | `'u32'`@@ -663,7 +663,7 @@variable_decl: - | `'var'` ( `'<'` [=recursive descent syntax/address_space=] ( `','` [=recursive descent syntax/access_mode=] )? `'>'` )? [=recursive descent syntax/optionally_typed_ident=] + | `'var'` ( [=syntax_sym/_template_args_start=] [=recursive descent syntax/address_space=] ( `','` [=recursive descent syntax/access_mode=] )? [=syntax_sym/_template_args_end=] )? [=recursive descent syntax/optionally_typed_ident=]diff --git a/wgsl/wgsl_unit_tests.py b/wgsl/wgsl_unit_tests.py new file mode 100644 index 0000000000..5fadef0a77 --- /dev/null +++ b/wgsl/wgsl_unit_tests.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +# +# Copyright 2023 Google LLC +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of works must retain the original copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the original +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# +# 3. Neither the name of the W3C nor the names of its contributors +# may be used to endorse or promote products derived from this work +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + + +import argparse +import os +import sys +from tree_sitter import Language, Parser + +SCRIPT='wgsl_unit_tests.py' + +class Case: + """ + A test case + """ + def __init__(self,text,expect_pass=True): + self.text = text + self.expect_pass = (expect_pass == True) + + def __str__(self): + expectation = "expect_pass" if self.expect_pass else "expect_fail" + return "Case:{}:\n---\n{}\n---".format(expectation,self.text) + +class XFail(Case): + def __init__(self,text): + super().__init__(text,expect_pass=False) + +cases = [ + XFail("this fails"), + XFail("#version 450"), + Case("const pi = 3.14;"), + Case("const b = bitcast(1u);"), + Case("var s: sampler;"), + Case("@group(0) @binding(0) var s: sampler;"), + Case("var w: i32;"), + Case("fn foo() {var f: i32;}"), + Case("var w: array ,1>;"), + XFail("var w: array ,(vec (1).x)>;"), + Case( "var w: array ,(vec3 (1).x)>;"), + XFail("const c = arrayb>;"), + Case("var c : array b)>;"), + Case("const a = array b))>();"), + Case("const b = array b)>();"), + XFail("const d : array